namei.c 85.9 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/namei.c
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/namei.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 *  Directory entry file type support and forward compatibility hooks
 *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
 *  Hash Tree Directory indexing (c)
 *	Daniel Phillips, 2001
 *  Hash Tree Directory indexing porting
 *	Christopher Li, 2002
 *  Hash Tree Directory indexing cleanup
 *	Theodore Ts'o, 2002
 */

#include <linux/fs.h>
#include <linux/pagemap.h>
29
#include <linux/jbd2.h>
30
31
32
33
34
35
36
#include <linux/time.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
37
38
#include "ext4.h"
#include "ext4_jbd2.h"
39
40
41
42

#include "xattr.h"
#include "acl.h"

43
#include <trace/events/ext4.h>
44
45
46
47
48
/*
 * define how far ahead to read directories while searching them.
 */
#define NAMEI_RA_CHUNKS  2
#define NAMEI_RA_BLOCKS  4
Dave Kleikamp's avatar
Dave Kleikamp committed
49
#define NAMEI_RA_SIZE	     (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
50

51
static struct buffer_head *ext4_append(handle_t *handle,
52
					struct inode *inode,
53
					ext4_lblk_t *block)
54
55
{
	struct buffer_head *bh;
56
	int err = 0;
57

58
59
	if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
		     ((inode->i_size >> 10) >=
60
61
		      EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
		return ERR_PTR(-ENOSPC);
62

63
64
	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;

65
66
67
68
69
70
71
72
73
74
	bh = ext4_bread(handle, inode, *block, 1, &err);
	if (!bh)
		return ERR_PTR(err);
	inode->i_size += inode->i_sb->s_blocksize;
	EXT4_I(inode)->i_disksize = inode->i_size;
	err = ext4_journal_get_write_access(handle, bh);
	if (err) {
		brelse(bh);
		ext4_std_error(inode->i_sb, err);
		return ERR_PTR(err);
Carlos Maiolino's avatar
Carlos Maiolino committed
75
	}
76
77
78
	return bh;
}

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
static int ext4_dx_csum_verify(struct inode *inode,
			       struct ext4_dir_entry *dirent);

typedef enum {
	EITHER, INDEX, DIRENT
} dirblock_type_t;

#define ext4_read_dirblock(inode, block, type) \
	__ext4_read_dirblock((inode), (block), (type), __LINE__)

static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
					      ext4_lblk_t block,
					      dirblock_type_t type,
					      unsigned int line)
{
	struct buffer_head *bh;
	struct ext4_dir_entry *dirent;
	int err = 0, is_dx_block = 0;

	bh = ext4_bread(NULL, inode, block, 0, &err);
	if (!bh) {
		if (err == 0) {
			ext4_error_inode(inode, __func__, line, block,
					       "Directory hole found");
			return ERR_PTR(-EIO);
		}
		__ext4_warning(inode->i_sb, __func__, line,
			       "error reading directory block "
			       "(ino %lu, block %lu)", inode->i_ino,
			       (unsigned long) block);
		return ERR_PTR(err);
	}
	dirent = (struct ext4_dir_entry *) bh->b_data;
	/* Determine whether or not we have an index block */
	if (is_dx(inode)) {
		if (block == 0)
			is_dx_block = 1;
		else if (ext4_rec_len_from_disk(dirent->rec_len,
						inode->i_sb->s_blocksize) ==
			 inode->i_sb->s_blocksize)
			is_dx_block = 1;
	}
	if (!is_dx_block && type == INDEX) {
		ext4_error_inode(inode, __func__, line, block,
		       "directory leaf block found instead of index block");
		return ERR_PTR(-EIO);
	}
	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
	    buffer_verified(bh))
		return bh;

	/*
	 * An empty leaf block can get mistaken for a index block; for
	 * this reason, we can only check the index checksum when the
	 * caller is sure it should be an index block.
	 */
	if (is_dx_block && type == INDEX) {
		if (ext4_dx_csum_verify(inode, dirent))
			set_buffer_verified(bh);
		else {
			ext4_error_inode(inode, __func__, line, block,
				"Directory index failed checksum");
142
			brelse(bh);
143
			return ERR_PTR(-EIO);
144
		}
145
	}
146
147
148
149
150
151
152
153
154
	if (!is_dx_block) {
		if (ext4_dirent_csum_verify(inode, dirent))
			set_buffer_verified(bh);
		else {
			ext4_error_inode(inode, __func__, line, block,
				"Directory block failed checksum");
			brelse(bh);
			return ERR_PTR(-EIO);
		}
Carlos Maiolino's avatar
Carlos Maiolino committed
155
	}
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
	return bh;
}

#ifndef assert
#define assert(test) J_ASSERT(test)
#endif

#ifdef DX_DEBUG
#define dxtrace(command) command
#else
#define dxtrace(command)
#endif

struct fake_dirent
{
	__le32 inode;
	__le16 rec_len;
	u8 name_len;
	u8 file_type;
};

struct dx_countlimit
{
	__le16 limit;
	__le16 count;
};

struct dx_entry
{
	__le32 hash;
	__le32 block;
};

/*
 * dx_root_info is laid out so that if it should somehow get overlaid by a
 * dirent the two low bits of the hash version will be zero.  Therefore, the
 * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
 */

struct dx_root
{
	struct fake_dirent dot;
	char dot_name[4];
	struct fake_dirent dotdot;
	char dotdot_name[4];
	struct dx_root_info
	{
		__le32 reserved_zero;
		u8 hash_version;
		u8 info_length; /* 8 */
		u8 indirect_levels;
		u8 unused_flags;
	}
	info;
	struct dx_entry	entries[0];
};

struct dx_node
{
	struct fake_dirent fake;
	struct dx_entry	entries[0];
};


struct dx_frame
{
	struct buffer_head *bh;
	struct dx_entry *entries;
	struct dx_entry *at;
};

struct dx_map_entry
{
	u32 hash;
230
231
	u16 offs;
	u16 size;
232
233
};

234
235
236
237
238
239
240
241
/*
 * This goes at the end of each htree block.
 */
struct dx_tail {
	u32 dt_reserved;
	__le32 dt_checksum;	/* crc32c(uuid+inum+dirblock) */
};

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
242
243
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
244
245
246
247
248
249
250
251
static inline unsigned dx_get_hash(struct dx_entry *entry);
static void dx_set_hash(struct dx_entry *entry, unsigned value);
static unsigned dx_get_count(struct dx_entry *entries);
static unsigned dx_get_limit(struct dx_entry *entries);
static void dx_set_count(struct dx_entry *entries, unsigned value);
static void dx_set_limit(struct dx_entry *entries, unsigned value);
static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
static unsigned dx_node_limit(struct inode *dir);
252
static struct dx_frame *dx_probe(const struct qstr *d_name,
253
254
255
256
				 struct inode *dir,
				 struct dx_hash_info *hinfo,
				 struct dx_frame *frame,
				 int *err);
257
static void dx_release(struct dx_frame *frames);
258
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
259
		       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
260
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
261
static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
262
		struct dx_map_entry *offsets, int count, unsigned blocksize);
263
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
264
265
static void dx_insert_block(struct dx_frame *frame,
					u32 hash, ext4_lblk_t block);
266
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
267
268
269
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash);
270
271
272
273
static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
		const struct qstr *d_name,
		struct ext4_dir_entry_2 **res_dir,
		int *err);
274
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
275
276
			     struct inode *inode);

277
/* checksumming functions */
278
279
void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
			    unsigned int blocksize)
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
{
	memset(t, 0, sizeof(struct ext4_dir_entry_tail));
	t->det_rec_len = ext4_rec_len_to_disk(
			sizeof(struct ext4_dir_entry_tail), blocksize);
	t->det_reserved_ft = EXT4_FT_DIR_CSUM;
}

/* Walk through a dirent block to find a checksum "dirent" at the tail */
static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
						   struct ext4_dir_entry *de)
{
	struct ext4_dir_entry_tail *t;

#ifdef PARANOID
	struct ext4_dir_entry *d, *top;

	d = de;
	top = (struct ext4_dir_entry *)(((void *)de) +
		(EXT4_BLOCK_SIZE(inode->i_sb) -
		sizeof(struct ext4_dir_entry_tail)));
	while (d < top && d->rec_len)
		d = (struct ext4_dir_entry *)(((void *)d) +
		    le16_to_cpu(d->rec_len));

	if (d != top)
		return NULL;

	t = (struct ext4_dir_entry_tail *)d;
#else
	t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
#endif

	if (t->det_reserved_zero1 ||
	    le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
	    t->det_reserved_zero2 ||
	    t->det_reserved_ft != EXT4_FT_DIR_CSUM)
		return NULL;

	return t;
}

static __le32 ext4_dirent_csum(struct inode *inode,
			       struct ext4_dir_entry *dirent, int size)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
	__u32 csum;

	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	return cpu_to_le32(csum);
}

332
333
334
335
336
337
static void warn_no_space_for_csum(struct inode *inode)
{
	ext4_warning(inode->i_sb, "no space in directory inode %lu leaf for "
		     "checksum.  Please run e2fsck -D.", inode->i_ino);
}

338
339
340
341
342
343
344
345
346
347
int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
348
		warn_no_space_for_csum(inode);
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
		return 0;
	}

	if (t->det_checksum != ext4_dirent_csum(inode, dirent,
						(void *)t - (void *)dirent))
		return 0;

	return 1;
}

static void ext4_dirent_csum_set(struct inode *inode,
				 struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
370
		warn_no_space_for_csum(inode);
371
372
373
374
375
376
377
		return;
	}

	t->det_checksum = ext4_dirent_csum(inode, dirent,
					   (void *)t - (void *)dirent);
}

378
379
380
int ext4_handle_dirty_dirent_node(handle_t *handle,
				  struct inode *inode,
				  struct buffer_head *bh)
381
382
383
384
385
{
	ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
					       struct ext4_dir_entry *dirent,
					       int *offset)
{
	struct ext4_dir_entry *dp;
	struct dx_root_info *root;
	int count_offset;

	if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
		count_offset = 8;
	else if (le16_to_cpu(dirent->rec_len) == 12) {
		dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
		if (le16_to_cpu(dp->rec_len) !=
		    EXT4_BLOCK_SIZE(inode->i_sb) - 12)
			return NULL;
		root = (struct dx_root_info *)(((void *)dp + 12));
		if (root->reserved_zero ||
		    root->info_length != sizeof(struct dx_root_info))
			return NULL;
		count_offset = 32;
	} else
		return NULL;

	if (offset)
		*offset = count_offset;
	return (struct dx_countlimit *)(((void *)dirent) + count_offset);
}

static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
			   int count_offset, int count, struct dx_tail *t)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
419
420
	__u32 csum;
	__le32 save_csum;
421
422
423
	int size;

	size = count_offset + (count * sizeof(struct dx_entry));
424
	save_csum = t->dt_checksum;
425
426
427
	t->dt_checksum = 0;
	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
428
	t->dt_checksum = save_csum;
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452

	return cpu_to_le32(csum);
}

static int ext4_dx_csum_verify(struct inode *inode,
			       struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return 1;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
453
		warn_no_space_for_csum(inode);
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
		return 1;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
					    count, t))
		return 0;
	return 1;
}

static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
483
		warn_no_space_for_csum(inode);
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
		return;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
}

static inline int ext4_handle_dirty_dx_node(handle_t *handle,
					    struct inode *inode,
					    struct buffer_head *bh)
{
	ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

499
500
501
502
/*
 * p is at least 6 bytes before the end of page
 */
static inline struct ext4_dir_entry_2 *
503
ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
504
505
{
	return (struct ext4_dir_entry_2 *)((char *)p +
506
		ext4_rec_len_from_disk(p->rec_len, blocksize));
507
508
}

509
510
511
512
513
/*
 * Future: use high four bits of block for coalesce-on-delete flags
 * Mask them off for now.
 */

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
514
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
515
516
517
518
{
	return le32_to_cpu(entry->block) & 0x00ffffff;
}

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
519
static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
520
521
522
523
{
	entry->block = cpu_to_le32(value);
}

524
static inline unsigned dx_get_hash(struct dx_entry *entry)
525
526
527
528
{
	return le32_to_cpu(entry->hash);
}

529
static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
530
531
532
533
{
	entry->hash = cpu_to_le32(value);
}

534
static inline unsigned dx_get_count(struct dx_entry *entries)
535
536
537
538
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
}

539
static inline unsigned dx_get_limit(struct dx_entry *entries)
540
541
542
543
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
}

544
static inline void dx_set_count(struct dx_entry *entries, unsigned value)
545
546
547
548
{
	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
}

549
static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
550
551
552
553
{
	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
}

554
static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
555
{
556
557
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
		EXT4_DIR_REC_LEN(2) - infosize;
558
559
560
561

	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		entry_space -= sizeof(struct dx_tail);
562
	return entry_space / sizeof(struct dx_entry);
563
564
}

565
static inline unsigned dx_node_limit(struct inode *dir)
566
{
567
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
568
569
570
571

	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		entry_space -= sizeof(struct dx_tail);
572
	return entry_space / sizeof(struct dx_entry);
573
574
575
576
577
578
}

/*
 * Debug
 */
#ifdef DX_DEBUG
579
static void dx_show_index(char * label, struct dx_entry *entries)
580
{
581
	int i, n = dx_get_count (entries);
582
	printk(KERN_DEBUG "%s index ", label);
583
	for (i = 0; i < n; i++) {
584
		printk("%x->%lu ", i ? dx_get_hash(entries + i) :
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
585
				0, (unsigned long)dx_get_block(entries + i));
586
587
	}
	printk("\n");
588
589
590
591
592
593
594
595
596
}

struct stats
{
	unsigned names;
	unsigned space;
	unsigned bcount;
};

597
static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
				 int size, int show_names)
{
	unsigned names = 0, space = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

	printk("names: ");
	while ((char *) de < base + size)
	{
		if (de->inode)
		{
			if (show_names)
			{
				int len = de->name_len;
				char *name = de->name;
				while (len--) printk("%c", *name++);
614
				ext4fs_dirhash(de->name, de->name_len, &h);
615
				printk(":%x.%u ", h.hash,
616
				       (unsigned) ((char *) de - base));
617
			}
618
			space += EXT4_DIR_REC_LEN(de->name_len);
619
620
			names++;
		}
621
		de = ext4_next_entry(de, size);
622
623
624
625
626
627
628
629
630
	}
	printk("(%i)\n", names);
	return (struct stats) { names, space, 1 };
}

struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
			     struct dx_entry *entries, int levels)
{
	unsigned blocksize = dir->i_sb->s_blocksize;
631
	unsigned count = dx_get_count(entries), names = 0, space = 0, i;
632
633
634
635
636
637
	unsigned bcount = 0;
	struct buffer_head *bh;
	int err;
	printk("%i indexed blocks...\n", count);
	for (i = 0; i < count; i++, entries++)
	{
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
638
639
		ext4_lblk_t block = dx_get_block(entries);
		ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
640
641
642
		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
		struct stats stats;
		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
643
		if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
644
645
		stats = levels?
		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
646
		   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
647
648
649
		names += stats.names;
		space += stats.space;
		bcount += stats.bcount;
650
		brelse(bh);
651
652
	}
	if (bcount)
653
		printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
654
655
		       levels ? "" : "   ", names, space/bcount,
		       (space/bcount)*100/blocksize);
656
657
658
659
660
661
662
663
664
665
666
667
668
669
	return (struct stats) { names, space, bcount};
}
#endif /* DX_DEBUG */

/*
 * Probe for a directory leaf block to search.
 *
 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
 * error in the directory index, and the caller should fall back to
 * searching the directory normally.  The callers of dx_probe **MUST**
 * check for this error code, and make sure it never gets reflected
 * back to userspace.
 */
static struct dx_frame *
670
dx_probe(const struct qstr *d_name, struct inode *dir,
671
672
673
674
675
676
677
678
679
680
	 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
{
	unsigned count, indirect;
	struct dx_entry *at, *entries, *p, *q, *m;
	struct dx_root *root;
	struct buffer_head *bh;
	struct dx_frame *frame = frame_in;
	u32 hash;

	frame->bh = NULL;
681
682
683
	bh = ext4_read_dirblock(dir, 0, INDEX);
	if (IS_ERR(bh)) {
		*err = PTR_ERR(bh);
684
		goto fail;
Carlos Maiolino's avatar
Carlos Maiolino committed
685
	}
686
687
688
689
	root = (struct dx_root *) bh->b_data;
	if (root->info.hash_version != DX_HASH_TEA &&
	    root->info.hash_version != DX_HASH_HALF_MD4 &&
	    root->info.hash_version != DX_HASH_LEGACY) {
690
		ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
691
692
693
694
695
696
			     root->info.hash_version);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}
	hinfo->hash_version = root->info.hash_version;
697
698
	if (hinfo->hash_version <= DX_HASH_TEA)
		hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
699
	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
700
701
	if (d_name)
		ext4fs_dirhash(d_name->name, d_name->len, hinfo);
702
703
704
	hash = hinfo->hash;

	if (root->info.unused_flags & 1) {
705
		ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
706
707
708
709
710
711
712
			     root->info.unused_flags);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

	if ((indirect = root->info.indirect_levels) > 1) {
713
		ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
714
715
716
717
718
719
720
721
			     root->info.indirect_levels);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

	entries = (struct dx_entry *) (((char *)&root->info) +
				       root->info.info_length);
722
723
724

	if (dx_get_limit(entries) != dx_root_limit(dir,
						   root->info.info_length)) {
725
		ext4_warning(dir->i_sb, "dx entry: limit != root limit");
726
727
728
729
730
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

731
	dxtrace(printk("Look up %x", hash));
732
733
734
	while (1)
	{
		count = dx_get_count(entries);
735
		if (!count || count > dx_get_limit(entries)) {
736
			ext4_warning(dir->i_sb,
737
738
739
740
741
742
				     "dx entry: no count or count > limit");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail2;
		}

743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
		p = entries + 1;
		q = entries + count - 1;
		while (p <= q)
		{
			m = p + (q - p)/2;
			dxtrace(printk("."));
			if (dx_get_hash(m) > hash)
				q = m - 1;
			else
				p = m + 1;
		}

		if (0) // linear search cross check
		{
			unsigned n = count - 1;
			at = entries;
			while (n--)
			{
				dxtrace(printk(","));
				if (dx_get_hash(++at) > hash)
				{
					at--;
					break;
				}
			}
			assert (at == p - 1);
		}

		at = p - 1;
		dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
		frame->bh = bh;
		frame->entries = entries;
		frame->at = at;
		if (!indirect--) return frame;
777
778
779
		bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
		if (IS_ERR(bh)) {
			*err = PTR_ERR(bh);
780
			goto fail2;
781
		}
782
		entries = ((struct dx_node *) bh->b_data)->entries;
783

784
		if (dx_get_limit(entries) != dx_node_limit (dir)) {
785
			ext4_warning(dir->i_sb,
786
787
788
789
790
				     "dx entry: limit != node limit");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail2;
		}
791
		frame++;
792
		frame->bh = NULL;
793
794
795
796
797
798
799
	}
fail2:
	while (frame >= frame_in) {
		brelse(frame->bh);
		frame--;
	}
fail:
800
	if (*err == ERR_BAD_DX_DIR)
801
		ext4_warning(dir->i_sb,
Zheng Liu's avatar
Zheng Liu committed
802
			     "Corrupt dir inode %lu, running e2fsck is "
803
			     "recommended.", dir->i_ino);
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
	return NULL;
}

static void dx_release (struct dx_frame *frames)
{
	if (frames[0].bh == NULL)
		return;

	if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
		brelse(frames[1].bh);
	brelse(frames[0].bh);
}

/*
 * This function increments the frame pointer to search the next leaf
 * block, and reads in the necessary intervening nodes if the search
 * should be necessary.  Whether or not the search is necessary is
 * controlled by the hash parameter.  If the hash value is even, then
 * the search is only continued if the next block starts with that
 * hash value.  This is used if we are searching for a specific file.
 *
 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
 *
 * This function returns 1 if the caller should continue to search,
 * or 0 if it should not.  If there is an error reading one of the
 * index blocks, it will a negative error code.
 *
 * If start_hash is non-null, it will be filled in with the starting
 * hash of the next page.
 */
834
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
835
836
837
838
839
840
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash)
{
	struct dx_frame *p;
	struct buffer_head *bh;
841
	int num_frames = 0;
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
	__u32 bhash;

	p = frame;
	/*
	 * Find the next leaf page by incrementing the frame pointer.
	 * If we run out of entries in the interior node, loop around and
	 * increment pointer in the parent node.  When we break out of
	 * this loop, num_frames indicates the number of interior
	 * nodes need to be read.
	 */
	while (1) {
		if (++(p->at) < p->entries + dx_get_count(p->entries))
			break;
		if (p == frames)
			return 0;
		num_frames++;
		p--;
	}

	/*
	 * If the hash is 1, then continue only if the next page has a
	 * continuation hash of any value.  This is used for readdir
	 * handling.  Otherwise, check to see if the hash matches the
	 * desired contiuation hash.  If it doesn't, return since
	 * there's no point to read in the successive index pages.
	 */
	bhash = dx_get_hash(p->at);
	if (start_hash)
		*start_hash = bhash;
	if ((hash & 1) == 0) {
		if ((bhash & ~1) != hash)
			return 0;
	}
	/*
	 * If the hash is HASH_NB_ALWAYS, we always go to the next
	 * block so no check is necessary
	 */
	while (num_frames--) {
880
881
882
		bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
		if (IS_ERR(bh))
			return PTR_ERR(bh);
883
		p++;
884
		brelse(p->bh);
885
886
887
888
889
890
891
892
893
894
895
896
897
		p->bh = bh;
		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
	}
	return 1;
}


/*
 * This function fills a red-black tree with information from a
 * directory block.  It returns the number directory entries loaded
 * into the tree.  If there is an error it is returned in err.
 */
static int htree_dirblock_to_tree(struct file *dir_file,
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
898
				  struct inode *dir, ext4_lblk_t block,
899
900
901
902
				  struct dx_hash_info *hinfo,
				  __u32 start_hash, __u32 start_minor_hash)
{
	struct buffer_head *bh;
903
	struct ext4_dir_entry_2 *de, *top;
904
	int err = 0, count = 0;
905

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
906
907
	dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
							(unsigned long)block));
908
909
910
	bh = ext4_read_dirblock(dir, block, DIRENT);
	if (IS_ERR(bh))
		return PTR_ERR(bh);
911

912
913
	de = (struct ext4_dir_entry_2 *) bh->b_data;
	top = (struct ext4_dir_entry_2 *) ((char *) de +
914
					   dir->i_sb->s_blocksize -
915
					   EXT4_DIR_REC_LEN(0));
916
	for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
917
		if (ext4_check_dir_entry(dir, NULL, de, bh,
918
				bh->b_data, bh->b_size,
919
920
				(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
					 + ((char *)de - bh->b_data))) {
921
922
			/* silently ignore the rest of the block */
			break;
923
		}
924
		ext4fs_dirhash(de->name, de->name_len, hinfo);
925
926
927
928
929
930
		if ((hinfo->hash < start_hash) ||
		    ((hinfo->hash == start_hash) &&
		     (hinfo->minor_hash < start_minor_hash)))
			continue;
		if (de->inode == 0)
			continue;
931
		if ((err = ext4_htree_store_dirent(dir_file,
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
				   hinfo->hash, hinfo->minor_hash, de)) != 0) {
			brelse(bh);
			return err;
		}
		count++;
	}
	brelse(bh);
	return count;
}


/*
 * This function fills a red-black tree with information from a
 * directory.  We start scanning the directory in hash order, starting
 * at start_hash and start_minor_hash.
 *
 * This function returns the number of entries inserted into the tree,
 * or a negative error code.
 */
951
int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
952
953
954
			 __u32 start_minor_hash, __u32 *next_hash)
{
	struct dx_hash_info hinfo;
955
	struct ext4_dir_entry_2 *de;
956
957
	struct dx_frame frames[2], *frame;
	struct inode *dir;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
958
	ext4_lblk_t block;
959
	int count = 0;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
960
	int ret, err;
961
962
	__u32 hashval;

963
	dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
964
		       start_hash, start_minor_hash));
Al Viro's avatar
Al Viro committed
965
	dir = file_inode(dir_file);
966
	if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
967
		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
968
969
970
		if (hinfo.hash_version <= DX_HASH_TEA)
			hinfo.hash_version +=
				EXT4_SB(dir->i_sb)->s_hash_unsigned;
971
		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
972
973
974
975
976
977
978
979
980
981
982
		if (ext4_has_inline_data(dir)) {
			int has_inline_data = 1;
			count = htree_inlinedir_to_tree(dir_file, dir, 0,
							&hinfo, start_hash,
							start_minor_hash,
							&has_inline_data);
			if (has_inline_data) {
				*next_hash = ~0;
				return count;
			}
		}
983
984
985
986
987
988
989
		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
					       start_hash, start_minor_hash);
		*next_hash = ~0;
		return count;
	}
	hinfo.hash = start_hash;
	hinfo.minor_hash = 0;
990
	frame = dx_probe(NULL, dir, &hinfo, frames, &err);
991
992
993
994
995
	if (!frame)
		return err;

	/* Add '.' and '..' from the htree header */
	if (!start_hash && !start_minor_hash) {
996
997
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
		if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
998
999
1000
1001
			goto errout;
		count++;
	}
	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1002
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1003
		de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1004
		if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
			goto errout;
		count++;
	}

	while (1) {
		block = dx_get_block(frame->at);
		ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
					     start_hash, start_minor_hash);
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		count += ret;
		hashval = ~0;
1019
		ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
					    frame, frames, &hashval);
		*next_hash = hashval;
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		/*
		 * Stop if:  (a) there are no more entries, or
		 * (b) we have inserted at least one entry and the
		 * next hash value is not a continuation
		 */
		if ((ret == 0) ||
		    (count && ((hashval & 1) == 0)))
			break;
	}
	dx_release(frames);
1036
1037
	dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
		       "next hash: %x\n", count, *next_hash));
1038
1039
1040
1041
1042
1043
	return count;
errout:
	dx_release(frames);
	return (err);
}

1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
static inline int search_dirblock(struct buffer_head *bh,
				  struct inode *dir,
				  const struct qstr *d_name,
				  unsigned int offset,
				  struct ext4_dir_entry_2 **res_dir)
{
	return search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
			  d_name, offset, res_dir);
}

1054
1055
1056
1057
/*
 * Directory block splitting, compacting
 */

1058
1059
1060
1061
/*
 * Create map of hash values, offsets, and sizes, stored at end of block.
 * Returns number of entries mapped.
 */
1062
1063
1064
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
		       struct dx_hash_info *hinfo,
		       struct dx_map_entry *map_tail)
1065
1066
1067
1068
1069
{
	int count = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

1070
	while ((char *) de < base + blocksize) {
1071
		if (de->name_len && de->inode) {
1072
			ext4fs_dirhash(de->name, de->name_len, &h);
1073
1074
			map_tail--;
			map_tail->hash = h.hash;
1075
			map_tail->offs = ((char *) de - base)>>2;
1076
			map_tail->size = le16_to_cpu(de->rec_len);
1077
1078
1079
1080
			count++;
			cond_resched();
		}
		/* XXX: do we need to check rec_len == 0 case? -Chris */
1081
		de = ext4_next_entry(de, blocksize);
1082
1083
1084
1085
	}
	return count;
}

1086
/* Sort map by hash value */
1087
1088
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
	struct dx_map_entry *p, *q, *top = map + count - 1;
	int more;
	/* Combsort until bubble sort doesn't suck */
	while (count > 2) {
		count = count*10/13;
		if (count - 9 < 2) /* 9, 10 -> 11 */
			count = 11;
		for (p = top, q = p - count; q >= map; p--, q--)
			if (p->hash < q->hash)
				swap(*p, *q);
	}
	/* Garden variety bubble sort */
	do {
		more = 0;
		q = top;
		while (q-- > map) {
			if (q[1].hash >= q[0].hash)
1106
				continue;
1107
1108
			swap(*(q+1), *q);
			more = 1;
1109
1110
1111
1112
		}
	} while(more);
}

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1113
static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
{
	struct dx_entry *entries = frame->entries;
	struct dx_entry *old = frame->at, *new = old + 1;
	int count = dx_get_count(entries);

	assert(count < dx_get_limit(entries));
	assert(old < entries + count);
	memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
	dx_set_hash(new, hash);
	dx_set_block(new, block);
	dx_set_count(entries, count + 1);
}

/*
1128
 * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
1129
 *
1130
 * `len <= EXT4_NAME_LEN' is guaranteed by caller.
1131
1132
 * `de != NULL' is guaranteed by caller.
 */
1133
1134
static inline int ext4_match (int len, const char * const name,
			      struct ext4_dir_entry_2 * de)
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
{
	if (len != de->name_len)
		return 0;
	if (!de->inode)
		return 0;
	return !memcmp(name, de->name, len);
}

/*
 * Returns 0 if not found, -1 on failure, and 1 on success
 */
1146
1147
1148
1149
1150
1151
1152
int search_dir(struct buffer_head *bh,
	       char *search_buf,
	       int buf_size,
	       struct inode *dir,
	       const struct qstr *d_name,
	       unsigned int offset,
	       struct ext4_dir_entry_2 **res_dir)
1153
{
1154
	struct ext4_dir_entry_2 * de;
1155
1156
	char * dlimit;
	int de_len;
1157
1158
	const char *name = d_name->name;
	int namelen = d_name->len;
1159

1160
1161
	de = (struct ext4_dir_entry_2 *)search_buf;
	dlimit = search_buf + buf_size;
1162
1163
1164
1165
1166
	while ((char *) de < dlimit) {
		/* this code is executed quadratically often */
		/* do minimal checking `by hand' */

		if ((char *) de + namelen <= dlimit &&
1167
		    ext4_match (namelen, name, de)) {
1168
			/* found a match - just to be sure, do a full check */
1169
1170
			if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
						 bh->b_size, offset))
1171
1172
1173
1174
1175
				return -1;
			*res_dir = de;
			return 1;
		}
		/* prevent looping on a bad block */
1176
1177
		de_len = ext4_rec_len_from_disk(de->rec_len,
						dir->i_sb->s_blocksize);
1178
1179
1180
		if (de_len <= 0)
			return -1;
		offset += de_len;
1181
		de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1182
1183
1184
1185
	}
	return 0;
}

1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
			       struct ext4_dir_entry *de)
{
	struct super_block *sb = dir->i_sb;

	if (!is_dx(dir))
		return 0;
	if (block == 0)
		return 1;
	if (de->inode == 0 &&
	    ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
			sb->s_blocksize)
		return 1;
	return 0;
}
1201
1202

/*
1203
 *	ext4_find_entry()
1204
1205
1206
1207
1208
1209
1210
1211
1212
 *
 * finds an entry in the specified directory with the wanted name. It
 * returns the cache buffer in which the entry was found, and the entry
 * itself (as a parameter - res_dir). It does NOT read the inode of the
 * entry - you'll have to do that yourself if you want to.
 *
 * The returned buffer_head has ->b_count elevated.  The caller is expected
 * to brelse() it when appropriate.
 */
1213
1214
static struct buffer_head * ext4_find_entry (struct inode *dir,
					const struct qstr *d_name,
1215
1216
					struct ext4_dir_entry_2 **res_dir,
					int *inlined)
1217
{
1218
1219
1220
	struct super_block *sb;
	struct buffer_head *bh_use[NAMEI_RA_SIZE];
	struct buffer_head *bh, *ret = NULL;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1221
	ext4_lblk_t start, block, b;
1222
	const u8 *name = d_name->name;
1223
1224
1225
1226
1227
	int ra_max = 0;		/* Number of bh's in the readahead
				   buffer, bh_use[] */
	int ra_ptr = 0;		/* Current index into readahead
				   buffer */
	int num = 0;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1228
1229
	ext4_lblk_t  nblocks;
	int i, err;
1230
1231
1232
1233
	int namelen;

	*res_dir = NULL;
	sb = dir->i_sb;
1234
	namelen = d_name->len;
1235
	if (namelen > EXT4_NAME_LEN)
1236
		return NULL;
1237
1238
1239
1240
1241

	if (ext4_has_inline_data(dir)) {
		int has_inline_data = 1;
		ret = ext4_find_inline_entry(dir, d_name, res_dir,
					     &has_inline_data);
1242
1243
1244
		if (has_inline_data) {
			if (inlined)
				*inlined = 1;
1245
			return ret;
1246
		}
1247
1248
	}

1249
	if ((namelen <= 2) && (name[0] == '.') &&
1250
	    (name[1] == '.' || name[1] == '\0')) {
1251
1252
1253
1254
1255
1256
1257
1258
		/*
		 * "." or ".." will only be in the first block
		 * NFS may look up ".."; "." should be handled by the VFS
		 */
		block = start = 0;
		nblocks = 1;
		goto restart;
	}
1259
	if (is_dx(dir)) {
1260
		bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
1261
1262
1263
1264
1265
1266
1267
		/*
		 * On success, or if the error was file not found,
		 * return.  Otherwise, fall back to doing a search the
		 * old fashioned way.
		 */
		if (bh || (err != ERR_BAD_DX_DIR))
			return bh;
1268
1269
		dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
			       "falling back\n"));
1270
	}
1271
1272
	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
	start = EXT4_I(dir)->i_dir_start_lookup;
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
	if (start >= nblocks)
		start = 0;
	block = start;
restart:
	do {
		/*
		 * We deal with the read-ahead logic here.
		 */
		if (ra_ptr >= ra_max) {
			/* Refill the readahead buffer */
			ra_ptr = 0;
			b = block;
			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
				/*
				 * Terminate if we reach the end of the
				 * directory and must wrap, or if our
				 * search has finished at this block.
				 */
				if (b >= nblocks || (num && block == start)) {
					bh_use[ra_max] = NULL;
					break;
				}
				num++;
1296
				bh = ext4_getblk(NULL, dir, b++, 0, &err);
1297
1298
				bh_use[ra_max] = bh;
				if (bh)
1299
1300
					ll_rw_block(READ | REQ_META | REQ_PRIO,
						    1, &bh);
1301
1302
1303
1304
1305
1306
1307
			}
		}
		if ((bh = bh_use[ra_ptr++]) == NULL)
			goto next;
		wait_on_buffer(bh);
		if (!buffer_uptodate(bh)) {
			/* read error, skip block & hope for the best */
1308
1309
			EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
					 (unsigned long) block);
1310
1311
1312
			brelse(bh);
			goto next;
		}
1313
		if (!buffer_verified(bh) &&
1314
1315
		    !is_dx_internal_node(dir, block,
					 (struct ext4_dir_entry *)bh->b_data) &&
1316
1317
1318
1319
1320
1321
1322
1323
		    !ext4_dirent_csum_verify(dir,
				(struct ext4_dir_entry *)bh->b_data)) {
			EXT4_ERROR_INODE(dir, "checksumming directory "
					 "block %lu", (unsigned long)block);
			brelse(bh);
			goto next;
		}
		set_buffer_verified(bh);
1324
		i = search_dirblock(bh, dir, d_name,
1325
			    block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1326
		if (i == 1) {
1327
			EXT4_I(dir)->i_dir_start_lookup = block;
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
			ret = bh;
			goto cleanup_and_exit;
		} else {
			brelse(bh);
			if (i < 0)
				goto cleanup_and_exit;
		}
	next:
		if (++block >= nblocks)
			block = 0;
	} while (block != start);

	/*
	 * If the directory has grown while we were searching, then
	 * search the last part of the directory before giving up.
	 */
	block = nblocks;
1345
	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1346
1347
1348
1349
1350
1351
1352
1353
	if (block < nblocks) {
		start = 0;
		goto restart;
	}

cleanup_and_exit:
	/* Clean up the read-ahead blocks */
	for (; ra_ptr < ra_max; ra_ptr++)
1354
		brelse(bh_use[ra_ptr]);
1355
1356
1357
	return ret;
}

1358
static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
1359
		       struct ext4_dir_entry_2 **res_dir, int *err)
1360
{
1361
	struct super_block * sb = dir->i_sb;
1362
1363
1364
	struct dx_hash_info	hinfo;
	struct dx_frame frames[2], *frame;
	struct buffer_head *bh;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1365
	ext4_lblk_t block;
1366
1367
	int retval;

1368
1369
	if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
		return NULL;
1370
1371
	do {
		block = dx_get_block(frame->at);
1372
1373
1374
		bh = ext4_read_dirblock(dir, block, DIRENT);
		if (IS_ERR(bh)) {
			*err = PTR_ERR(bh);
1375
1376
			goto errout;
		}
1377
1378
1379
1380
1381
1382
		retval = search_dirblock(bh, dir, d_name,
					 block << EXT4_BLOCK_SIZE_BITS(sb),
					 res_dir);
		if (retval == 1) { 	/* Success! */
			dx_release(frames);
			return bh;
1383
		}
1384
		brelse(bh);
1385
1386
1387
1388
1389
		if (retval == -1) {
			*err = ERR_BAD_DX_DIR;
			goto errout;
		}

1390
		/* Check to see if we should continue to search */
1391
		retval = ext4_htree_next_block(dir, hinfo.hash, frame,
1392
1393
					       frames, NULL);
		if (retval < 0) {
1394
			ext4_warning(sb,
1395
1396
1397
1398
1399
1400
1401
1402
1403
			     "error reading index page in directory #%lu",
			     dir->i_ino);
			*err = retval;
			goto errout;
		}
	} while (retval == 1);

	*err = -ENOENT;
errout:
1404
	dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
1405
1406
1407
1408
	dx_release (frames);
	return NULL;
}

Al Viro's avatar
Al Viro committed
1409
static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1410
{
1411
1412
1413
	struct inode *inode;
	struct ext4_dir_entry_2 *de;
	struct buffer_head *bh;