namei.c 86.2 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/namei.c
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/namei.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 *  Directory entry file type support and forward compatibility hooks
 *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
 *  Hash Tree Directory indexing (c)
 *	Daniel Phillips, 2001
 *  Hash Tree Directory indexing porting
 *	Christopher Li, 2002
 *  Hash Tree Directory indexing cleanup
 *	Theodore Ts'o, 2002
 */

#include <linux/fs.h>
#include <linux/pagemap.h>
29
#include <linux/jbd2.h>
30
31
32
33
34
35
36
#include <linux/time.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
37
38
#include "ext4.h"
#include "ext4_jbd2.h"
39
40
41
42

#include "xattr.h"
#include "acl.h"

43
#include <trace/events/ext4.h>
44
45
46
47
48
/*
 * define how far ahead to read directories while searching them.
 */
#define NAMEI_RA_CHUNKS  2
#define NAMEI_RA_BLOCKS  4
Dave Kleikamp's avatar
Dave Kleikamp committed
49
#define NAMEI_RA_SIZE	     (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
50
51
#define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))

52
static struct buffer_head *ext4_append(handle_t *handle,
53
					struct inode *inode,
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
54
					ext4_lblk_t *block, int *err)
55
56
57
{
	struct buffer_head *bh;

58
59
60
61
62
63
64
	if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
		     ((inode->i_size >> 10) >=
		      EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) {
		*err = -ENOSPC;
		return NULL;
	}

65
66
	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;

67
68
	bh = ext4_bread(handle, inode, *block, 1, err);
	if (bh) {
69
		inode->i_size += inode->i_sb->s_blocksize;
70
		EXT4_I(inode)->i_disksize = inode->i_size;
71
72
73
74
75
		*err = ext4_journal_get_write_access(handle, bh);
		if (*err) {
			brelse(bh);
			bh = NULL;
		}
76
	}
Carlos Maiolino's avatar
Carlos Maiolino committed
77
78
79
80
81
82
	if (!bh && !(*err)) {
		*err = -EIO;
		ext4_error(inode->i_sb,
			   "Directory hole detected on inode %lu\n",
			   inode->i_ino);
	}
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
	return bh;
}

#ifndef assert
#define assert(test) J_ASSERT(test)
#endif

#ifdef DX_DEBUG
#define dxtrace(command) command
#else
#define dxtrace(command)
#endif

struct fake_dirent
{
	__le32 inode;
	__le16 rec_len;
	u8 name_len;
	u8 file_type;
};

struct dx_countlimit
{
	__le16 limit;
	__le16 count;
};

struct dx_entry
{
	__le32 hash;
	__le32 block;
};

/*
 * dx_root_info is laid out so that if it should somehow get overlaid by a
 * dirent the two low bits of the hash version will be zero.  Therefore, the
 * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
 */

struct dx_root
{
	struct fake_dirent dot;
	char dot_name[4];
	struct fake_dirent dotdot;
	char dotdot_name[4];
	struct dx_root_info
	{
		__le32 reserved_zero;
		u8 hash_version;
		u8 info_length; /* 8 */
		u8 indirect_levels;
		u8 unused_flags;
	}
	info;
	struct dx_entry	entries[0];
};

struct dx_node
{
	struct fake_dirent fake;
	struct dx_entry	entries[0];
};


struct dx_frame
{
	struct buffer_head *bh;
	struct dx_entry *entries;
	struct dx_entry *at;
};

struct dx_map_entry
{
	u32 hash;
157
158
	u16 offs;
	u16 size;
159
160
};

161
162
163
164
165
166
167
168
/*
 * This goes at the end of each htree block.
 */
struct dx_tail {
	u32 dt_reserved;
	__le32 dt_checksum;	/* crc32c(uuid+inum+dirblock) */
};

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
169
170
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
171
172
173
174
175
176
177
178
static inline unsigned dx_get_hash(struct dx_entry *entry);
static void dx_set_hash(struct dx_entry *entry, unsigned value);
static unsigned dx_get_count(struct dx_entry *entries);
static unsigned dx_get_limit(struct dx_entry *entries);
static void dx_set_count(struct dx_entry *entries, unsigned value);
static void dx_set_limit(struct dx_entry *entries, unsigned value);
static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
static unsigned dx_node_limit(struct inode *dir);
179
static struct dx_frame *dx_probe(const struct qstr *d_name,
180
181
182
183
				 struct inode *dir,
				 struct dx_hash_info *hinfo,
				 struct dx_frame *frame,
				 int *err);
184
static void dx_release(struct dx_frame *frames);
185
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
186
		       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
187
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
188
static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
189
		struct dx_map_entry *offsets, int count, unsigned blocksize);
190
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
191
192
static void dx_insert_block(struct dx_frame *frame,
					u32 hash, ext4_lblk_t block);
193
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
194
195
196
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash);
197
198
199
200
static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
		const struct qstr *d_name,
		struct ext4_dir_entry_2 **res_dir,
		int *err);
201
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
202
203
			     struct inode *inode);

204
/* checksumming functions */
205
206
void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
			    unsigned int blocksize)
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
{
	memset(t, 0, sizeof(struct ext4_dir_entry_tail));
	t->det_rec_len = ext4_rec_len_to_disk(
			sizeof(struct ext4_dir_entry_tail), blocksize);
	t->det_reserved_ft = EXT4_FT_DIR_CSUM;
}

/* Walk through a dirent block to find a checksum "dirent" at the tail */
static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
						   struct ext4_dir_entry *de)
{
	struct ext4_dir_entry_tail *t;

#ifdef PARANOID
	struct ext4_dir_entry *d, *top;

	d = de;
	top = (struct ext4_dir_entry *)(((void *)de) +
		(EXT4_BLOCK_SIZE(inode->i_sb) -
		sizeof(struct ext4_dir_entry_tail)));
	while (d < top && d->rec_len)
		d = (struct ext4_dir_entry *)(((void *)d) +
		    le16_to_cpu(d->rec_len));

	if (d != top)
		return NULL;

	t = (struct ext4_dir_entry_tail *)d;
#else
	t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
#endif

	if (t->det_reserved_zero1 ||
	    le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
	    t->det_reserved_zero2 ||
	    t->det_reserved_ft != EXT4_FT_DIR_CSUM)
		return NULL;

	return t;
}

static __le32 ext4_dirent_csum(struct inode *inode,
			       struct ext4_dir_entry *dirent, int size)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
	__u32 csum;

	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	return cpu_to_le32(csum);
}

259
260
261
262
263
264
static void warn_no_space_for_csum(struct inode *inode)
{
	ext4_warning(inode->i_sb, "no space in directory inode %lu leaf for "
		     "checksum.  Please run e2fsck -D.", inode->i_ino);
}

265
266
267
268
269
270
271
272
273
274
int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
275
		warn_no_space_for_csum(inode);
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
		return 0;
	}

	if (t->det_checksum != ext4_dirent_csum(inode, dirent,
						(void *)t - (void *)dirent))
		return 0;

	return 1;
}

static void ext4_dirent_csum_set(struct inode *inode,
				 struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
297
		warn_no_space_for_csum(inode);
298
299
300
301
302
303
304
		return;
	}

	t->det_checksum = ext4_dirent_csum(inode, dirent,
					   (void *)t - (void *)dirent);
}

305
306
307
int ext4_handle_dirty_dirent_node(handle_t *handle,
				  struct inode *inode,
				  struct buffer_head *bh)
308
309
310
311
312
{
	ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
					       struct ext4_dir_entry *dirent,
					       int *offset)
{
	struct ext4_dir_entry *dp;
	struct dx_root_info *root;
	int count_offset;

	if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
		count_offset = 8;
	else if (le16_to_cpu(dirent->rec_len) == 12) {
		dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
		if (le16_to_cpu(dp->rec_len) !=
		    EXT4_BLOCK_SIZE(inode->i_sb) - 12)
			return NULL;
		root = (struct dx_root_info *)(((void *)dp + 12));
		if (root->reserved_zero ||
		    root->info_length != sizeof(struct dx_root_info))
			return NULL;
		count_offset = 32;
	} else
		return NULL;

	if (offset)
		*offset = count_offset;
	return (struct dx_countlimit *)(((void *)dirent) + count_offset);
}

static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
			   int count_offset, int count, struct dx_tail *t)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
	__u32 csum, old_csum;
	int size;

	size = count_offset + (count * sizeof(struct dx_entry));
	old_csum = t->dt_checksum;
	t->dt_checksum = 0;
	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
	t->dt_checksum = old_csum;

	return cpu_to_le32(csum);
}

static int ext4_dx_csum_verify(struct inode *inode,
			       struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return 1;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
379
		warn_no_space_for_csum(inode);
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
		return 1;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
					    count, t))
		return 0;
	return 1;
}

static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
409
		warn_no_space_for_csum(inode);
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
		return;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
}

static inline int ext4_handle_dirty_dx_node(handle_t *handle,
					    struct inode *inode,
					    struct buffer_head *bh)
{
	ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

425
426
427
428
/*
 * p is at least 6 bytes before the end of page
 */
static inline struct ext4_dir_entry_2 *
429
ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
430
431
{
	return (struct ext4_dir_entry_2 *)((char *)p +
432
		ext4_rec_len_from_disk(p->rec_len, blocksize));
433
434
}

435
436
437
438
439
/*
 * Future: use high four bits of block for coalesce-on-delete flags
 * Mask them off for now.
 */

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
440
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
441
442
443
444
{
	return le32_to_cpu(entry->block) & 0x00ffffff;
}

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
445
static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
446
447
448
449
{
	entry->block = cpu_to_le32(value);
}

450
static inline unsigned dx_get_hash(struct dx_entry *entry)
451
452
453
454
{
	return le32_to_cpu(entry->hash);
}

455
static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
456
457
458
459
{
	entry->hash = cpu_to_le32(value);
}

460
static inline unsigned dx_get_count(struct dx_entry *entries)
461
462
463
464
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
}

465
static inline unsigned dx_get_limit(struct dx_entry *entries)
466
467
468
469
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
}

470
static inline void dx_set_count(struct dx_entry *entries, unsigned value)
471
472
473
474
{
	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
}

475
static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
476
477
478
479
{
	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
}

480
static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
481
{
482
483
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
		EXT4_DIR_REC_LEN(2) - infosize;
484
485
486
487

	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		entry_space -= sizeof(struct dx_tail);
488
	return entry_space / sizeof(struct dx_entry);
489
490
}

491
static inline unsigned dx_node_limit(struct inode *dir)
492
{
493
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
494
495
496
497

	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		entry_space -= sizeof(struct dx_tail);
498
	return entry_space / sizeof(struct dx_entry);
499
500
501
502
503
504
}

/*
 * Debug
 */
#ifdef DX_DEBUG
505
static void dx_show_index(char * label, struct dx_entry *entries)
506
{
507
	int i, n = dx_get_count (entries);
508
	printk(KERN_DEBUG "%s index ", label);
509
	for (i = 0; i < n; i++) {
510
		printk("%x->%lu ", i ? dx_get_hash(entries + i) :
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
511
				0, (unsigned long)dx_get_block(entries + i));
512
513
	}
	printk("\n");
514
515
516
517
518
519
520
521
522
}

struct stats
{
	unsigned names;
	unsigned space;
	unsigned bcount;
};

523
static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
				 int size, int show_names)
{
	unsigned names = 0, space = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

	printk("names: ");
	while ((char *) de < base + size)
	{
		if (de->inode)
		{
			if (show_names)
			{
				int len = de->name_len;
				char *name = de->name;
				while (len--) printk("%c", *name++);
540
				ext4fs_dirhash(de->name, de->name_len, &h);
541
				printk(":%x.%u ", h.hash,
542
				       (unsigned) ((char *) de - base));
543
			}
544
			space += EXT4_DIR_REC_LEN(de->name_len);
545
546
			names++;
		}
547
		de = ext4_next_entry(de, size);
548
549
550
551
552
553
554
555
556
	}
	printk("(%i)\n", names);
	return (struct stats) { names, space, 1 };
}

struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
			     struct dx_entry *entries, int levels)
{
	unsigned blocksize = dir->i_sb->s_blocksize;
557
	unsigned count = dx_get_count(entries), names = 0, space = 0, i;
558
559
560
561
562
563
	unsigned bcount = 0;
	struct buffer_head *bh;
	int err;
	printk("%i indexed blocks...\n", count);
	for (i = 0; i < count; i++, entries++)
	{
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
564
565
		ext4_lblk_t block = dx_get_block(entries);
		ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
566
567
568
		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
		struct stats stats;
		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
569
		if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
570
571
		stats = levels?
		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
572
		   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
573
574
575
		names += stats.names;
		space += stats.space;
		bcount += stats.bcount;
576
		brelse(bh);
577
578
	}
	if (bcount)
579
		printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
580
581
		       levels ? "" : "   ", names, space/bcount,
		       (space/bcount)*100/blocksize);
582
583
584
585
586
587
588
589
590
591
592
593
594
595
	return (struct stats) { names, space, bcount};
}
#endif /* DX_DEBUG */

/*
 * Probe for a directory leaf block to search.
 *
 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
 * error in the directory index, and the caller should fall back to
 * searching the directory normally.  The callers of dx_probe **MUST**
 * check for this error code, and make sure it never gets reflected
 * back to userspace.
 */
static struct dx_frame *
596
dx_probe(const struct qstr *d_name, struct inode *dir,
597
598
599
600
601
602
603
604
605
606
	 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
{
	unsigned count, indirect;
	struct dx_entry *at, *entries, *p, *q, *m;
	struct dx_root *root;
	struct buffer_head *bh;
	struct dx_frame *frame = frame_in;
	u32 hash;

	frame->bh = NULL;
Carlos Maiolino's avatar
Carlos Maiolino committed
607
608
609
	if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) {
		if (*err == 0)
			*err = ERR_BAD_DX_DIR;
610
		goto fail;
Carlos Maiolino's avatar
Carlos Maiolino committed
611
	}
612
613
614
615
	root = (struct dx_root *) bh->b_data;
	if (root->info.hash_version != DX_HASH_TEA &&
	    root->info.hash_version != DX_HASH_HALF_MD4 &&
	    root->info.hash_version != DX_HASH_LEGACY) {
616
		ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
617
618
619
620
621
622
			     root->info.hash_version);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}
	hinfo->hash_version = root->info.hash_version;
623
624
	if (hinfo->hash_version <= DX_HASH_TEA)
		hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
625
	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
626
627
	if (d_name)
		ext4fs_dirhash(d_name->name, d_name->len, hinfo);
628
629
630
	hash = hinfo->hash;

	if (root->info.unused_flags & 1) {
631
		ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
632
633
634
635
636
637
638
			     root->info.unused_flags);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

	if ((indirect = root->info.indirect_levels) > 1) {
639
		ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
640
641
642
643
644
645
			     root->info.indirect_levels);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

646
647
648
649
650
651
652
653
654
	if (!buffer_verified(bh) &&
	    !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) {
		ext4_warning(dir->i_sb, "Root failed checksum");
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}
	set_buffer_verified(bh);

655
656
	entries = (struct dx_entry *) (((char *)&root->info) +
				       root->info.info_length);
657
658
659

	if (dx_get_limit(entries) != dx_root_limit(dir,
						   root->info.info_length)) {
660
		ext4_warning(dir->i_sb, "dx entry: limit != root limit");
661
662
663
664
665
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

666
	dxtrace(printk("Look up %x", hash));
667
668
669
	while (1)
	{
		count = dx_get_count(entries);
670
		if (!count || count > dx_get_limit(entries)) {
671
			ext4_warning(dir->i_sb,
672
673
674
675
676
677
				     "dx entry: no count or count > limit");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail2;
		}

678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
		p = entries + 1;
		q = entries + count - 1;
		while (p <= q)
		{
			m = p + (q - p)/2;
			dxtrace(printk("."));
			if (dx_get_hash(m) > hash)
				q = m - 1;
			else
				p = m + 1;
		}

		if (0) // linear search cross check
		{
			unsigned n = count - 1;
			at = entries;
			while (n--)
			{
				dxtrace(printk(","));
				if (dx_get_hash(++at) > hash)
				{
					at--;
					break;
				}
			}
			assert (at == p - 1);
		}

		at = p - 1;
		dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
		frame->bh = bh;
		frame->entries = entries;
		frame->at = at;
		if (!indirect--) return frame;
Carlos Maiolino's avatar
Carlos Maiolino committed
712
713
714
		if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) {
			if (!(*err))
				*err = ERR_BAD_DX_DIR;
715
			goto fail2;
Carlos Maiolino's avatar
Carlos Maiolino committed
716
		}
717
		at = entries = ((struct dx_node *) bh->b_data)->entries;
718
719
720
721
722
723
724
725
726
727
728

		if (!buffer_verified(bh) &&
		    !ext4_dx_csum_verify(dir,
					 (struct ext4_dir_entry *)bh->b_data)) {
			ext4_warning(dir->i_sb, "Node failed checksum");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail;
		}
		set_buffer_verified(bh);

729
		if (dx_get_limit(entries) != dx_node_limit (dir)) {
730
			ext4_warning(dir->i_sb,
731
732
733
734
735
				     "dx entry: limit != node limit");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail2;
		}
736
		frame++;
737
		frame->bh = NULL;
738
739
740
741
742
743
744
	}
fail2:
	while (frame >= frame_in) {
		brelse(frame->bh);
		frame--;
	}
fail:
745
	if (*err == ERR_BAD_DX_DIR)
746
		ext4_warning(dir->i_sb,
Zheng Liu's avatar
Zheng Liu committed
747
			     "Corrupt dir inode %lu, running e2fsck is "
748
			     "recommended.", dir->i_ino);
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
	return NULL;
}

static void dx_release (struct dx_frame *frames)
{
	if (frames[0].bh == NULL)
		return;

	if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
		brelse(frames[1].bh);
	brelse(frames[0].bh);
}

/*
 * This function increments the frame pointer to search the next leaf
 * block, and reads in the necessary intervening nodes if the search
 * should be necessary.  Whether or not the search is necessary is
 * controlled by the hash parameter.  If the hash value is even, then
 * the search is only continued if the next block starts with that
 * hash value.  This is used if we are searching for a specific file.
 *
 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
 *
 * This function returns 1 if the caller should continue to search,
 * or 0 if it should not.  If there is an error reading one of the
 * index blocks, it will a negative error code.
 *
 * If start_hash is non-null, it will be filled in with the starting
 * hash of the next page.
 */
779
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash)
{
	struct dx_frame *p;
	struct buffer_head *bh;
	int err, num_frames = 0;
	__u32 bhash;

	p = frame;
	/*
	 * Find the next leaf page by incrementing the frame pointer.
	 * If we run out of entries in the interior node, loop around and
	 * increment pointer in the parent node.  When we break out of
	 * this loop, num_frames indicates the number of interior
	 * nodes need to be read.
	 */
	while (1) {
		if (++(p->at) < p->entries + dx_get_count(p->entries))
			break;
		if (p == frames)
			return 0;
		num_frames++;
		p--;
	}

	/*
	 * If the hash is 1, then continue only if the next page has a
	 * continuation hash of any value.  This is used for readdir
	 * handling.  Otherwise, check to see if the hash matches the
	 * desired contiuation hash.  If it doesn't, return since
	 * there's no point to read in the successive index pages.
	 */
	bhash = dx_get_hash(p->at);
	if (start_hash)
		*start_hash = bhash;
	if ((hash & 1) == 0) {
		if ((bhash & ~1) != hash)
			return 0;
	}
	/*
	 * If the hash is HASH_NB_ALWAYS, we always go to the next
	 * block so no check is necessary
	 */
	while (num_frames--) {
825
		if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
Carlos Maiolino's avatar
Carlos Maiolino committed
826
827
828
829
830
831
832
				      0, &err))) {
			if (!err) {
				ext4_error(dir->i_sb,
					   "Directory hole detected on inode %lu\n",
					   dir->i_ino);
				return -EIO;
			}
833
			return err; /* Failure */
Carlos Maiolino's avatar
Carlos Maiolino committed
834
		}
835
836
837
838
839
840
841
842
843

		if (!buffer_verified(bh) &&
		    !ext4_dx_csum_verify(dir,
					 (struct ext4_dir_entry *)bh->b_data)) {
			ext4_warning(dir->i_sb, "Node failed checksum");
			return -EIO;
		}
		set_buffer_verified(bh);

844
		p++;
845
		brelse(p->bh);
846
847
848
849
850
851
852
853
854
855
856
857
858
		p->bh = bh;
		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
	}
	return 1;
}


/*
 * This function fills a red-black tree with information from a
 * directory block.  It returns the number directory entries loaded
 * into the tree.  If there is an error it is returned in err.
 */
static int htree_dirblock_to_tree(struct file *dir_file,
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
859
				  struct inode *dir, ext4_lblk_t block,
860
861
862
863
				  struct dx_hash_info *hinfo,
				  __u32 start_hash, __u32 start_minor_hash)
{
	struct buffer_head *bh;
864
	struct ext4_dir_entry_2 *de, *top;
865
	int err = 0, count = 0;
866

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
867
868
	dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
							(unsigned long)block));
Carlos Maiolino's avatar
Carlos Maiolino committed
869
870
871
872
873
874
875
	if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) {
		if (!err) {
			err = -EIO;
			ext4_error(dir->i_sb,
				   "Directory hole detected on inode %lu\n",
				   dir->i_ino);
		}
876
		return err;
Carlos Maiolino's avatar
Carlos Maiolino committed
877
	}
878

879
880
881
882
883
	if (!buffer_verified(bh) &&
	    !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
		return -EIO;
	set_buffer_verified(bh);

884
885
	de = (struct ext4_dir_entry_2 *) bh->b_data;
	top = (struct ext4_dir_entry_2 *) ((char *) de +
886
					   dir->i_sb->s_blocksize -
887
					   EXT4_DIR_REC_LEN(0));
888
	for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
889
		if (ext4_check_dir_entry(dir, NULL, de, bh,
890
				bh->b_data, bh->b_size,
891
892
				(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
					 + ((char *)de - bh->b_data))) {
893
894
895
			/* On error, skip the f_pos to the next block. */
			dir_file->f_pos = (dir_file->f_pos |
					(dir->i_sb->s_blocksize - 1)) + 1;
896
			brelse(bh);
897
898
			return count;
		}
899
		ext4fs_dirhash(de->name, de->name_len, hinfo);
900
901
902
903
904
905
		if ((hinfo->hash < start_hash) ||
		    ((hinfo->hash == start_hash) &&
		     (hinfo->minor_hash < start_minor_hash)))
			continue;
		if (de->inode == 0)
			continue;
906
		if ((err = ext4_htree_store_dirent(dir_file,
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
				   hinfo->hash, hinfo->minor_hash, de)) != 0) {
			brelse(bh);
			return err;
		}
		count++;
	}
	brelse(bh);
	return count;
}


/*
 * This function fills a red-black tree with information from a
 * directory.  We start scanning the directory in hash order, starting
 * at start_hash and start_minor_hash.
 *
 * This function returns the number of entries inserted into the tree,
 * or a negative error code.
 */
926
int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
927
928
929
			 __u32 start_minor_hash, __u32 *next_hash)
{
	struct dx_hash_info hinfo;
930
	struct ext4_dir_entry_2 *de;
931
932
	struct dx_frame frames[2], *frame;
	struct inode *dir;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
933
	ext4_lblk_t block;
934
	int count = 0;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
935
	int ret, err;
936
937
	__u32 hashval;

938
	dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
939
		       start_hash, start_minor_hash));
940
	dir = dir_file->f_path.dentry->d_inode;
941
	if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
942
		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
943
944
945
		if (hinfo.hash_version <= DX_HASH_TEA)
			hinfo.hash_version +=
				EXT4_SB(dir->i_sb)->s_hash_unsigned;
946
		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
947
948
949
950
951
952
953
		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
					       start_hash, start_minor_hash);
		*next_hash = ~0;
		return count;
	}
	hinfo.hash = start_hash;
	hinfo.minor_hash = 0;
954
	frame = dx_probe(NULL, dir, &hinfo, frames, &err);
955
956
957
958
959
	if (!frame)
		return err;

	/* Add '.' and '..' from the htree header */
	if (!start_hash && !start_minor_hash) {
960
961
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
		if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
962
963
964
965
			goto errout;
		count++;
	}
	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
966
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
967
		de = ext4_next_entry(de, dir->i_sb->s_blocksize);
968
		if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
969
970
971
972
973
974
975
976
977
978
979
980
981
982
			goto errout;
		count++;
	}

	while (1) {
		block = dx_get_block(frame->at);
		ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
					     start_hash, start_minor_hash);
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		count += ret;
		hashval = ~0;
983
		ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
					    frame, frames, &hashval);
		*next_hash = hashval;
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		/*
		 * Stop if:  (a) there are no more entries, or
		 * (b) we have inserted at least one entry and the
		 * next hash value is not a continuation
		 */
		if ((ret == 0) ||
		    (count && ((hashval & 1) == 0)))
			break;
	}
	dx_release(frames);
1000
1001
	dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
		       "next hash: %x\n", count, *next_hash));
1002
1003
1004
1005
1006
1007
	return count;
errout:
	dx_release(frames);
	return (err);
}

1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
static inline int search_dirblock(struct buffer_head *bh,
				  struct inode *dir,
				  const struct qstr *d_name,
				  unsigned int offset,
				  struct ext4_dir_entry_2 **res_dir)
{
	return search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
			  d_name, offset, res_dir);
}

1018
1019
1020
1021
/*
 * Directory block splitting, compacting
 */

1022
1023
1024
1025
/*
 * Create map of hash values, offsets, and sizes, stored at end of block.
 * Returns number of entries mapped.
 */
1026
1027
1028
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
		       struct dx_hash_info *hinfo,
		       struct dx_map_entry *map_tail)
1029
1030
1031
1032
1033
{
	int count = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

1034
	while ((char *) de < base + blocksize) {
1035
		if (de->name_len && de->inode) {
1036
			ext4fs_dirhash(de->name, de->name_len, &h);
1037
1038
			map_tail--;
			map_tail->hash = h.hash;
1039
			map_tail->offs = ((char *) de - base)>>2;
1040
			map_tail->size = le16_to_cpu(de->rec_len);
1041
1042
1043
1044
			count++;
			cond_resched();
		}
		/* XXX: do we need to check rec_len == 0 case? -Chris */
1045
		de = ext4_next_entry(de, blocksize);
1046
1047
1048
1049
	}
	return count;
}

1050
/* Sort map by hash value */
1051
1052
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
	struct dx_map_entry *p, *q, *top = map + count - 1;
	int more;
	/* Combsort until bubble sort doesn't suck */
	while (count > 2) {
		count = count*10/13;
		if (count - 9 < 2) /* 9, 10 -> 11 */
			count = 11;
		for (p = top, q = p - count; q >= map; p--, q--)
			if (p->hash < q->hash)
				swap(*p, *q);
	}
	/* Garden variety bubble sort */
	do {
		more = 0;
		q = top;
		while (q-- > map) {
			if (q[1].hash >= q[0].hash)
1070
				continue;
1071
1072
			swap(*(q+1), *q);
			more = 1;
1073
1074
1075
1076
		}
	} while(more);
}

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1077
static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
{
	struct dx_entry *entries = frame->entries;
	struct dx_entry *old = frame->at, *new = old + 1;
	int count = dx_get_count(entries);

	assert(count < dx_get_limit(entries));
	assert(old < entries + count);
	memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
	dx_set_hash(new, hash);
	dx_set_block(new, block);
	dx_set_count(entries, count + 1);
}

/*
1092
 * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
1093
 *
1094
 * `len <= EXT4_NAME_LEN' is guaranteed by caller.
1095
1096
 * `de != NULL' is guaranteed by caller.
 */
1097
1098
static inline int ext4_match (int len, const char * const name,
			      struct ext4_dir_entry_2 * de)
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
{
	if (len != de->name_len)
		return 0;
	if (!de->inode)
		return 0;
	return !memcmp(name, de->name, len);
}

/*
 * Returns 0 if not found, -1 on failure, and 1 on success
 */
1110
1111
1112
1113
1114
1115
1116
int search_dir(struct buffer_head *bh,
	       char *search_buf,
	       int buf_size,
	       struct inode *dir,
	       const struct qstr *d_name,
	       unsigned int offset,
	       struct ext4_dir_entry_2 **res_dir)
1117
{
1118
	struct ext4_dir_entry_2 * de;
1119
1120
	char * dlimit;
	int de_len;
1121
1122
	const char *name = d_name->name;
	int namelen = d_name->len;
1123

1124
1125
	de = (struct ext4_dir_entry_2 *)search_buf;
	dlimit = search_buf + buf_size;
1126
1127
1128
1129
1130
	while ((char *) de < dlimit) {
		/* this code is executed quadratically often */
		/* do minimal checking `by hand' */

		if ((char *) de + namelen <= dlimit &&
1131
		    ext4_match (namelen, name, de)) {
1132
			/* found a match - just to be sure, do a full check */
1133
1134
			if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
						 bh->b_size, offset))
1135
1136
1137
1138
1139
				return -1;
			*res_dir = de;
			return 1;
		}
		/* prevent looping on a bad block */
1140
1141
		de_len = ext4_rec_len_from_disk(de->rec_len,
						dir->i_sb->s_blocksize);
1142
1143
1144
		if (de_len <= 0)
			return -1;
		offset += de_len;
1145
		de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1146
1147
1148
1149
	}
	return 0;
}

1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
			       struct ext4_dir_entry *de)
{
	struct super_block *sb = dir->i_sb;

	if (!is_dx(dir))
		return 0;
	if (block == 0)
		return 1;
	if (de->inode == 0 &&
	    ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
			sb->s_blocksize)
		return 1;
	return 0;
}
1165
1166

/*
1167
 *	ext4_find_entry()
1168
1169
1170
1171
1172
1173
1174
1175
1176
 *
 * finds an entry in the specified directory with the wanted name. It
 * returns the cache buffer in which the entry was found, and the entry
 * itself (as a parameter - res_dir). It does NOT read the inode of the
 * entry - you'll have to do that yourself if you want to.
 *
 * The returned buffer_head has ->b_count elevated.  The caller is expected
 * to brelse() it when appropriate.
 */
1177
1178
static struct buffer_head * ext4_find_entry (struct inode *dir,
					const struct qstr *d_name,
1179
1180
					struct ext4_dir_entry_2 **res_dir,
					int *inlined)
1181
{
1182
1183
1184
	struct super_block *sb;
	struct buffer_head *bh_use[NAMEI_RA_SIZE];
	struct buffer_head *bh, *ret = NULL;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1185
	ext4_lblk_t start, block, b;
1186
	const u8 *name = d_name->name;
1187
1188
1189
1190
1191
	int ra_max = 0;		/* Number of bh's in the readahead
				   buffer, bh_use[] */
	int ra_ptr = 0;		/* Current index into readahead
				   buffer */
	int num = 0;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1192
1193
	ext4_lblk_t  nblocks;
	int i, err;
1194
1195
1196
1197
	int namelen;

	*res_dir = NULL;
	sb = dir->i_sb;
1198
	namelen = d_name->len;
1199
	if (namelen > EXT4_NAME_LEN)
1200
		return NULL;
1201
1202
1203
1204
1205

	if (ext4_has_inline_data(dir)) {
		int has_inline_data = 1;
		ret = ext4_find_inline_entry(dir, d_name, res_dir,
					     &has_inline_data);
1206
1207
1208
		if (has_inline_data) {
			if (inlined)
				*inlined = 1;
1209
			return ret;
1210
		}
1211
1212
	}

1213
	if ((namelen <= 2) && (name[0] == '.') &&
1214
	    (name[1] == '.' || name[1] == '\0')) {
1215
1216
1217
1218
1219
1220
1221
1222
		/*
		 * "." or ".." will only be in the first block
		 * NFS may look up ".."; "." should be handled by the VFS
		 */
		block = start = 0;
		nblocks = 1;
		goto restart;
	}
1223
	if (is_dx(dir)) {
1224
		bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
1225
1226
1227
1228
1229
1230
1231
		/*
		 * On success, or if the error was file not found,
		 * return.  Otherwise, fall back to doing a search the
		 * old fashioned way.
		 */
		if (bh || (err != ERR_BAD_DX_DIR))
			return bh;
1232
1233
		dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
			       "falling back\n"));
1234
	}
1235
1236
	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
	start = EXT4_I(dir)->i_dir_start_lookup;
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
	if (start >= nblocks)
		start = 0;
	block = start;
restart:
	do {
		/*
		 * We deal with the read-ahead logic here.
		 */
		if (ra_ptr >= ra_max) {
			/* Refill the readahead buffer */
			ra_ptr = 0;
			b = block;
			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
				/*
				 * Terminate if we reach the end of the
				 * directory and must wrap, or if our
				 * search has finished at this block.
				 */
				if (b >= nblocks || (num && block == start)) {
					bh_use[ra_max] = NULL;
					break;
				}
				num++;
1260
				bh = ext4_getblk(NULL, dir, b++, 0, &err);
1261
1262
				bh_use[ra_max] = bh;
				if (bh)
1263
1264
					ll_rw_block(READ | REQ_META | REQ_PRIO,
						    1, &bh);
1265
1266
1267
1268
1269
1270
1271
			}
		}
		if ((bh = bh_use[ra_ptr++]) == NULL)
			goto next;
		wait_on_buffer(bh);
		if (!buffer_uptodate(bh)) {
			/* read error, skip block & hope for the best */
1272
1273
			EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
					 (unsigned long) block);
1274
1275
1276
			brelse(bh);
			goto next;
		}
1277
		if (!buffer_verified(bh) &&
1278
1279
		    !is_dx_internal_node(dir, block,
					 (struct ext4_dir_entry *)bh->b_data) &&
1280
1281
1282
1283
1284
1285
1286
1287
		    !ext4_dirent_csum_verify(dir,
				(struct ext4_dir_entry *)bh->b_data)) {
			EXT4_ERROR_INODE(dir, "checksumming directory "
					 "block %lu", (unsigned long)block);
			brelse(bh);
			goto next;
		}
		set_buffer_verified(bh);
1288
		i = search_dirblock(bh, dir, d_name,
1289
			    block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1290
		if (i == 1) {
1291
			EXT4_I(dir)->i_dir_start_lookup = block;
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
			ret = bh;
			goto cleanup_and_exit;
		} else {
			brelse(bh);
			if (i < 0)
				goto cleanup_and_exit;
		}
	next:
		if (++block >= nblocks)
			block = 0;
	} while (block != start);

	/*
	 * If the directory has grown while we were searching, then
	 * search the last part of the directory before giving up.
	 */
	block = nblocks;
1309
	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1310
1311
1312
1313
1314
1315
1316
1317
	if (block < nblocks) {
		start = 0;
		goto restart;
	}

cleanup_and_exit:
	/* Clean up the read-ahead blocks */
	for (; ra_ptr < ra_max; ra_ptr++)
1318
		brelse(bh_use[ra_ptr]);
1319
1320
1321
	return ret;
}

1322
static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
1323
		       struct ext4_dir_entry_2 **res_dir, int *err)
1324
{
1325
	struct super_block * sb = dir->i_sb;
1326
1327
1328
	struct dx_hash_info	hinfo;
	struct dx_frame frames[2], *frame;
	struct buffer_head *bh;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1329
	ext4_lblk_t block;
1330
1331
	int retval;

1332
1333
	if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
		return NULL;
1334
1335
	do {
		block = dx_get_block(frame->at);
Carlos Maiolino's avatar
Carlos Maiolino committed
1336
1337
1338
1339
1340
1341
1342
		if (!(bh = ext4_bread(NULL, dir, block, 0, err))) {
			if (!(*err)) {
				*err = -EIO;
				ext4_error(dir->i_sb,
					   "Directory hole detected on inode %lu\n",
					   dir->i_ino);
			}
1343
			goto errout;
Carlos Maiolino's avatar
Carlos Maiolino committed
1344
		}
1345

1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
		if (!buffer_verified(bh) &&
		    !ext4_dirent_csum_verify(dir,
				(struct ext4_dir_entry *)bh->b_data)) {
			EXT4_ERROR_INODE(dir, "checksumming directory "
					 "block %lu", (unsigned long)block);
			brelse(bh);
			*err = -EIO;
			goto errout;
		}
		set_buffer_verified(bh);
1356
1357
1358
1359
1360
1361
		retval = search_dirblock(bh, dir, d_name,
					 block << EXT4_BLOCK_SIZE_BITS(sb),
					 res_dir);
		if (retval == 1) { 	/* Success! */
			dx_release(frames);
			return bh;
1362
		}
1363
		brelse(bh);
1364
1365
1366
1367
1368
		if (retval == -1) {
			*err = ERR_BAD_DX_DIR;
			goto errout;
		}

1369
		/* Check to see if we should continue to search */
1370
		retval = ext4_htree_next_block(dir, hinfo.hash, frame,
1371
1372
					       frames, NULL);
		if (retval < 0) {
1373
			ext4_warning(sb,
1374
1375
1376
1377
1378
1379
1380
1381
1382
			     "error reading index page in directory #%lu",
			     dir->i_ino);
			*err = retval;
			goto errout;
		}
	} while (retval == 1);

	*err = -ENOENT;
errout:
1383
	dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
1384
1385
1386
1387
	dx_release (frames);
	return NULL;
}

Al Viro's avatar
Al Viro committed
1388
static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1389
{
1390
1391
1392
	struct inode *inode;
	struct ext4_dir_entry_2 *de;
	struct buffer_head *bh;
1393

1394
	if (dentry->d_name.len > EXT4_NAME_LEN)
1395
1396
		return ERR_PTR(-ENAMETOOLONG);

1397
	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
1398
1399
	inode = NULL;
	if (bh) {
1400
		__u32 ino = le32_to_cpu(de->inode);
1401
		brelse(bh);