namei.c 84.7 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/namei.c
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/namei.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 *  Directory entry file type support and forward compatibility hooks
 *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
 *  Hash Tree Directory indexing (c)
 *	Daniel Phillips, 2001
 *  Hash Tree Directory indexing porting
 *	Christopher Li, 2002
 *  Hash Tree Directory indexing cleanup
 *	Theodore Ts'o, 2002
 */

#include <linux/fs.h>
#include <linux/pagemap.h>
29
#include <linux/jbd2.h>
30
31
32
33
34
35
36
#include <linux/time.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
37
38
#include "ext4.h"
#include "ext4_jbd2.h"
39
40
41
42

#include "xattr.h"
#include "acl.h"

43
#include <trace/events/ext4.h>
44
45
46
47
48
/*
 * define how far ahead to read directories while searching them.
 */
#define NAMEI_RA_CHUNKS  2
#define NAMEI_RA_BLOCKS  4
Dave Kleikamp's avatar
Dave Kleikamp committed
49
#define NAMEI_RA_SIZE	     (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
50

51
static struct buffer_head *ext4_append(handle_t *handle,
52
					struct inode *inode,
53
					ext4_lblk_t *block)
54
55
{
	struct buffer_head *bh;
56
	int err = 0;
57

58
59
	if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
		     ((inode->i_size >> 10) >=
60
61
		      EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
		return ERR_PTR(-ENOSPC);
62

63
64
	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;

65
66
67
68
69
70
71
72
73
74
	bh = ext4_bread(handle, inode, *block, 1, &err);
	if (!bh)
		return ERR_PTR(err);
	inode->i_size += inode->i_sb->s_blocksize;
	EXT4_I(inode)->i_disksize = inode->i_size;
	err = ext4_journal_get_write_access(handle, bh);
	if (err) {
		brelse(bh);
		ext4_std_error(inode->i_sb, err);
		return ERR_PTR(err);
Carlos Maiolino's avatar
Carlos Maiolino committed
75
	}
76
77
78
	return bh;
}

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
static int ext4_dx_csum_verify(struct inode *inode,
			       struct ext4_dir_entry *dirent);

typedef enum {
	EITHER, INDEX, DIRENT
} dirblock_type_t;

#define ext4_read_dirblock(inode, block, type) \
	__ext4_read_dirblock((inode), (block), (type), __LINE__)

static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
					      ext4_lblk_t block,
					      dirblock_type_t type,
					      unsigned int line)
{
	struct buffer_head *bh;
	struct ext4_dir_entry *dirent;
	int err = 0, is_dx_block = 0;

	bh = ext4_bread(NULL, inode, block, 0, &err);
	if (!bh) {
		if (err == 0) {
			ext4_error_inode(inode, __func__, line, block,
					       "Directory hole found");
			return ERR_PTR(-EIO);
		}
		__ext4_warning(inode->i_sb, __func__, line,
			       "error reading directory block "
			       "(ino %lu, block %lu)", inode->i_ino,
			       (unsigned long) block);
		return ERR_PTR(err);
	}
	dirent = (struct ext4_dir_entry *) bh->b_data;
	/* Determine whether or not we have an index block */
	if (is_dx(inode)) {
		if (block == 0)
			is_dx_block = 1;
		else if (ext4_rec_len_from_disk(dirent->rec_len,
						inode->i_sb->s_blocksize) ==
			 inode->i_sb->s_blocksize)
			is_dx_block = 1;
	}
	if (!is_dx_block && type == INDEX) {
		ext4_error_inode(inode, __func__, line, block,
		       "directory leaf block found instead of index block");
		return ERR_PTR(-EIO);
	}
	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
	    buffer_verified(bh))
		return bh;

	/*
	 * An empty leaf block can get mistaken for a index block; for
	 * this reason, we can only check the index checksum when the
	 * caller is sure it should be an index block.
	 */
	if (is_dx_block && type == INDEX) {
		if (ext4_dx_csum_verify(inode, dirent))
			set_buffer_verified(bh);
		else {
			ext4_error_inode(inode, __func__, line, block,
				"Directory index failed checksum");
142
			brelse(bh);
143
			return ERR_PTR(-EIO);
144
		}
145
	}
146
147
148
149
150
151
152
153
154
	if (!is_dx_block) {
		if (ext4_dirent_csum_verify(inode, dirent))
			set_buffer_verified(bh);
		else {
			ext4_error_inode(inode, __func__, line, block,
				"Directory block failed checksum");
			brelse(bh);
			return ERR_PTR(-EIO);
		}
Carlos Maiolino's avatar
Carlos Maiolino committed
155
	}
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
	return bh;
}

#ifndef assert
#define assert(test) J_ASSERT(test)
#endif

#ifdef DX_DEBUG
#define dxtrace(command) command
#else
#define dxtrace(command)
#endif

struct fake_dirent
{
	__le32 inode;
	__le16 rec_len;
	u8 name_len;
	u8 file_type;
};

struct dx_countlimit
{
	__le16 limit;
	__le16 count;
};

struct dx_entry
{
	__le32 hash;
	__le32 block;
};

/*
 * dx_root_info is laid out so that if it should somehow get overlaid by a
 * dirent the two low bits of the hash version will be zero.  Therefore, the
 * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
 */

struct dx_root
{
	struct fake_dirent dot;
	char dot_name[4];
	struct fake_dirent dotdot;
	char dotdot_name[4];
	struct dx_root_info
	{
		__le32 reserved_zero;
		u8 hash_version;
		u8 info_length; /* 8 */
		u8 indirect_levels;
		u8 unused_flags;
	}
	info;
	struct dx_entry	entries[0];
};

struct dx_node
{
	struct fake_dirent fake;
	struct dx_entry	entries[0];
};


struct dx_frame
{
	struct buffer_head *bh;
	struct dx_entry *entries;
	struct dx_entry *at;
};

struct dx_map_entry
{
	u32 hash;
230
231
	u16 offs;
	u16 size;
232
233
};

234
235
236
237
238
239
240
241
/*
 * This goes at the end of each htree block.
 */
struct dx_tail {
	u32 dt_reserved;
	__le32 dt_checksum;	/* crc32c(uuid+inum+dirblock) */
};

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
242
243
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
244
245
246
247
248
249
250
251
static inline unsigned dx_get_hash(struct dx_entry *entry);
static void dx_set_hash(struct dx_entry *entry, unsigned value);
static unsigned dx_get_count(struct dx_entry *entries);
static unsigned dx_get_limit(struct dx_entry *entries);
static void dx_set_count(struct dx_entry *entries, unsigned value);
static void dx_set_limit(struct dx_entry *entries, unsigned value);
static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
static unsigned dx_node_limit(struct inode *dir);
252
static struct dx_frame *dx_probe(const struct qstr *d_name,
253
254
255
256
				 struct inode *dir,
				 struct dx_hash_info *hinfo,
				 struct dx_frame *frame,
				 int *err);
257
static void dx_release(struct dx_frame *frames);
258
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
259
		       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
260
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
261
static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
262
		struct dx_map_entry *offsets, int count, unsigned blocksize);
263
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
264
265
static void dx_insert_block(struct dx_frame *frame,
					u32 hash, ext4_lblk_t block);
266
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
267
268
269
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash);
270
271
272
273
static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
		const struct qstr *d_name,
		struct ext4_dir_entry_2 **res_dir,
		int *err);
274
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
275
276
			     struct inode *inode);

277
/* checksumming functions */
278
279
void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
			    unsigned int blocksize)
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
{
	memset(t, 0, sizeof(struct ext4_dir_entry_tail));
	t->det_rec_len = ext4_rec_len_to_disk(
			sizeof(struct ext4_dir_entry_tail), blocksize);
	t->det_reserved_ft = EXT4_FT_DIR_CSUM;
}

/* Walk through a dirent block to find a checksum "dirent" at the tail */
static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
						   struct ext4_dir_entry *de)
{
	struct ext4_dir_entry_tail *t;

#ifdef PARANOID
	struct ext4_dir_entry *d, *top;

	d = de;
	top = (struct ext4_dir_entry *)(((void *)de) +
		(EXT4_BLOCK_SIZE(inode->i_sb) -
		sizeof(struct ext4_dir_entry_tail)));
	while (d < top && d->rec_len)
		d = (struct ext4_dir_entry *)(((void *)d) +
		    le16_to_cpu(d->rec_len));

	if (d != top)
		return NULL;

	t = (struct ext4_dir_entry_tail *)d;
#else
	t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
#endif

	if (t->det_reserved_zero1 ||
	    le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
	    t->det_reserved_zero2 ||
	    t->det_reserved_ft != EXT4_FT_DIR_CSUM)
		return NULL;

	return t;
}

static __le32 ext4_dirent_csum(struct inode *inode,
			       struct ext4_dir_entry *dirent, int size)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
	__u32 csum;

	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	return cpu_to_le32(csum);
}

332
333
334
335
336
337
static void warn_no_space_for_csum(struct inode *inode)
{
	ext4_warning(inode->i_sb, "no space in directory inode %lu leaf for "
		     "checksum.  Please run e2fsck -D.", inode->i_ino);
}

338
339
340
341
342
343
344
345
346
347
int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
348
		warn_no_space_for_csum(inode);
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
		return 0;
	}

	if (t->det_checksum != ext4_dirent_csum(inode, dirent,
						(void *)t - (void *)dirent))
		return 0;

	return 1;
}

static void ext4_dirent_csum_set(struct inode *inode,
				 struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
370
		warn_no_space_for_csum(inode);
371
372
373
374
375
376
377
		return;
	}

	t->det_checksum = ext4_dirent_csum(inode, dirent,
					   (void *)t - (void *)dirent);
}

378
379
380
int ext4_handle_dirty_dirent_node(handle_t *handle,
				  struct inode *inode,
				  struct buffer_head *bh)
381
382
383
384
385
{
	ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
					       struct ext4_dir_entry *dirent,
					       int *offset)
{
	struct ext4_dir_entry *dp;
	struct dx_root_info *root;
	int count_offset;

	if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
		count_offset = 8;
	else if (le16_to_cpu(dirent->rec_len) == 12) {
		dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
		if (le16_to_cpu(dp->rec_len) !=
		    EXT4_BLOCK_SIZE(inode->i_sb) - 12)
			return NULL;
		root = (struct dx_root_info *)(((void *)dp + 12));
		if (root->reserved_zero ||
		    root->info_length != sizeof(struct dx_root_info))
			return NULL;
		count_offset = 32;
	} else
		return NULL;

	if (offset)
		*offset = count_offset;
	return (struct dx_countlimit *)(((void *)dirent) + count_offset);
}

static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
			   int count_offset, int count, struct dx_tail *t)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
419
420
	__u32 csum;
	__le32 save_csum;
421
422
423
	int size;

	size = count_offset + (count * sizeof(struct dx_entry));
424
	save_csum = t->dt_checksum;
425
426
427
	t->dt_checksum = 0;
	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
428
	t->dt_checksum = save_csum;
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452

	return cpu_to_le32(csum);
}

static int ext4_dx_csum_verify(struct inode *inode,
			       struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return 1;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
453
		warn_no_space_for_csum(inode);
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
		return 1;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
					    count, t))
		return 0;
	return 1;
}

static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
483
		warn_no_space_for_csum(inode);
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
		return;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
}

static inline int ext4_handle_dirty_dx_node(handle_t *handle,
					    struct inode *inode,
					    struct buffer_head *bh)
{
	ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

499
500
501
502
/*
 * p is at least 6 bytes before the end of page
 */
static inline struct ext4_dir_entry_2 *
503
ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
504
505
{
	return (struct ext4_dir_entry_2 *)((char *)p +
506
		ext4_rec_len_from_disk(p->rec_len, blocksize));
507
508
}

509
510
511
512
513
/*
 * Future: use high four bits of block for coalesce-on-delete flags
 * Mask them off for now.
 */

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
514
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
515
516
517
518
{
	return le32_to_cpu(entry->block) & 0x00ffffff;
}

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
519
static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
520
521
522
523
{
	entry->block = cpu_to_le32(value);
}

524
static inline unsigned dx_get_hash(struct dx_entry *entry)
525
526
527
528
{
	return le32_to_cpu(entry->hash);
}

529
static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
530
531
532
533
{
	entry->hash = cpu_to_le32(value);
}

534
static inline unsigned dx_get_count(struct dx_entry *entries)
535
536
537
538
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
}

539
static inline unsigned dx_get_limit(struct dx_entry *entries)
540
541
542
543
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
}

544
static inline void dx_set_count(struct dx_entry *entries, unsigned value)
545
546
547
548
{
	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
}

549
static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
550
551
552
553
{
	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
}

554
static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
555
{
556
557
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
		EXT4_DIR_REC_LEN(2) - infosize;
558
559
560
561

	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		entry_space -= sizeof(struct dx_tail);
562
	return entry_space / sizeof(struct dx_entry);
563
564
}

565
static inline unsigned dx_node_limit(struct inode *dir)
566
{
567
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
568
569
570
571

	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		entry_space -= sizeof(struct dx_tail);
572
	return entry_space / sizeof(struct dx_entry);
573
574
575
576
577
578
}

/*
 * Debug
 */
#ifdef DX_DEBUG
579
static void dx_show_index(char * label, struct dx_entry *entries)
580
{
581
	int i, n = dx_get_count (entries);
582
	printk(KERN_DEBUG "%s index ", label);
583
	for (i = 0; i < n; i++) {
584
		printk("%x->%lu ", i ? dx_get_hash(entries + i) :
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
585
				0, (unsigned long)dx_get_block(entries + i));
586
587
	}
	printk("\n");
588
589
590
591
592
593
594
595
596
}

struct stats
{
	unsigned names;
	unsigned space;
	unsigned bcount;
};

597
static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
				 int size, int show_names)
{
	unsigned names = 0, space = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

	printk("names: ");
	while ((char *) de < base + size)
	{
		if (de->inode)
		{
			if (show_names)
			{
				int len = de->name_len;
				char *name = de->name;
				while (len--) printk("%c", *name++);
614
				ext4fs_dirhash(de->name, de->name_len, &h);
615
				printk(":%x.%u ", h.hash,
616
				       (unsigned) ((char *) de - base));
617
			}
618
			space += EXT4_DIR_REC_LEN(de->name_len);
619
620
			names++;
		}
621
		de = ext4_next_entry(de, size);
622
623
624
625
626
627
628
629
630
	}
	printk("(%i)\n", names);
	return (struct stats) { names, space, 1 };
}

struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
			     struct dx_entry *entries, int levels)
{
	unsigned blocksize = dir->i_sb->s_blocksize;
631
	unsigned count = dx_get_count(entries), names = 0, space = 0, i;
632
633
634
635
636
637
	unsigned bcount = 0;
	struct buffer_head *bh;
	int err;
	printk("%i indexed blocks...\n", count);
	for (i = 0; i < count; i++, entries++)
	{
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
638
639
		ext4_lblk_t block = dx_get_block(entries);
		ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
640
641
642
		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
		struct stats stats;
		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
643
		if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
644
645
		stats = levels?
		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
646
		   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
647
648
649
		names += stats.names;
		space += stats.space;
		bcount += stats.bcount;
650
		brelse(bh);
651
652
	}
	if (bcount)
653
		printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
654
655
		       levels ? "" : "   ", names, space/bcount,
		       (space/bcount)*100/blocksize);
656
657
658
659
660
661
662
663
664
665
666
667
668
669
	return (struct stats) { names, space, bcount};
}
#endif /* DX_DEBUG */

/*
 * Probe for a directory leaf block to search.
 *
 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
 * error in the directory index, and the caller should fall back to
 * searching the directory normally.  The callers of dx_probe **MUST**
 * check for this error code, and make sure it never gets reflected
 * back to userspace.
 */
static struct dx_frame *
670
dx_probe(const struct qstr *d_name, struct inode *dir,
671
672
673
674
675
676
677
678
679
680
	 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
{
	unsigned count, indirect;
	struct dx_entry *at, *entries, *p, *q, *m;
	struct dx_root *root;
	struct buffer_head *bh;
	struct dx_frame *frame = frame_in;
	u32 hash;

	frame->bh = NULL;
681
682
683
	bh = ext4_read_dirblock(dir, 0, INDEX);
	if (IS_ERR(bh)) {
		*err = PTR_ERR(bh);
684
		goto fail;
Carlos Maiolino's avatar
Carlos Maiolino committed
685
	}
686
687
688
689
	root = (struct dx_root *) bh->b_data;
	if (root->info.hash_version != DX_HASH_TEA &&
	    root->info.hash_version != DX_HASH_HALF_MD4 &&
	    root->info.hash_version != DX_HASH_LEGACY) {
690
		ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
691
692
693
694
695
696
			     root->info.hash_version);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}
	hinfo->hash_version = root->info.hash_version;
697
698
	if (hinfo->hash_version <= DX_HASH_TEA)
		hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
699
	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
700
701
	if (d_name)
		ext4fs_dirhash(d_name->name, d_name->len, hinfo);
702
703
704
	hash = hinfo->hash;

	if (root->info.unused_flags & 1) {
705
		ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
706
707
708
709
710
711
712
			     root->info.unused_flags);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

	if ((indirect = root->info.indirect_levels) > 1) {
713
		ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
714
715
716
717
718
719
720
721
			     root->info.indirect_levels);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

	entries = (struct dx_entry *) (((char *)&root->info) +
				       root->info.info_length);
722
723
724

	if (dx_get_limit(entries) != dx_root_limit(dir,
						   root->info.info_length)) {
725
		ext4_warning(dir->i_sb, "dx entry: limit != root limit");
726
727
728
729
730
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

731
	dxtrace(printk("Look up %x", hash));
732
733
734
	while (1)
	{
		count = dx_get_count(entries);
735
		if (!count || count > dx_get_limit(entries)) {
736
			ext4_warning(dir->i_sb,
737
738
739
740
741
742
				     "dx entry: no count or count > limit");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail2;
		}

743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
		p = entries + 1;
		q = entries + count - 1;
		while (p <= q)
		{
			m = p + (q - p)/2;
			dxtrace(printk("."));
			if (dx_get_hash(m) > hash)
				q = m - 1;
			else
				p = m + 1;
		}

		if (0) // linear search cross check
		{
			unsigned n = count - 1;
			at = entries;
			while (n--)
			{
				dxtrace(printk(","));
				if (dx_get_hash(++at) > hash)
				{
					at--;
					break;
				}
			}
			assert (at == p - 1);
		}

		at = p - 1;
		dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
		frame->bh = bh;
		frame->entries = entries;
		frame->at = at;
		if (!indirect--) return frame;
777
778
779
		bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
		if (IS_ERR(bh)) {
			*err = PTR_ERR(bh);
780
			goto fail2;
781
		}
782
		entries = ((struct dx_node *) bh->b_data)->entries;
783

784
		if (dx_get_limit(entries) != dx_node_limit (dir)) {
785
			ext4_warning(dir->i_sb,
786
787
788
789
790
				     "dx entry: limit != node limit");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail2;
		}
791
		frame++;
792
		frame->bh = NULL;
793
794
795
796
797
798
799
	}
fail2:
	while (frame >= frame_in) {
		brelse(frame->bh);
		frame--;
	}
fail:
800
	if (*err == ERR_BAD_DX_DIR)
801
		ext4_warning(dir->i_sb,
Zheng Liu's avatar
Zheng Liu committed
802
			     "Corrupt dir inode %lu, running e2fsck is "
803
			     "recommended.", dir->i_ino);
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
	return NULL;
}

static void dx_release (struct dx_frame *frames)
{
	if (frames[0].bh == NULL)
		return;

	if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
		brelse(frames[1].bh);
	brelse(frames[0].bh);
}

/*
 * This function increments the frame pointer to search the next leaf
 * block, and reads in the necessary intervening nodes if the search
 * should be necessary.  Whether or not the search is necessary is
 * controlled by the hash parameter.  If the hash value is even, then
 * the search is only continued if the next block starts with that
 * hash value.  This is used if we are searching for a specific file.
 *
 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
 *
 * This function returns 1 if the caller should continue to search,
 * or 0 if it should not.  If there is an error reading one of the
 * index blocks, it will a negative error code.
 *
 * If start_hash is non-null, it will be filled in with the starting
 * hash of the next page.
 */
834
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
835
836
837
838
839
840
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash)
{
	struct dx_frame *p;
	struct buffer_head *bh;
841
	int num_frames = 0;
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
	__u32 bhash;

	p = frame;
	/*
	 * Find the next leaf page by incrementing the frame pointer.
	 * If we run out of entries in the interior node, loop around and
	 * increment pointer in the parent node.  When we break out of
	 * this loop, num_frames indicates the number of interior
	 * nodes need to be read.
	 */
	while (1) {
		if (++(p->at) < p->entries + dx_get_count(p->entries))
			break;
		if (p == frames)
			return 0;
		num_frames++;
		p--;
	}

	/*
	 * If the hash is 1, then continue only if the next page has a
	 * continuation hash of any value.  This is used for readdir
	 * handling.  Otherwise, check to see if the hash matches the
	 * desired contiuation hash.  If it doesn't, return since
	 * there's no point to read in the successive index pages.
	 */
	bhash = dx_get_hash(p->at);
	if (start_hash)
		*start_hash = bhash;
	if ((hash & 1) == 0) {
		if ((bhash & ~1) != hash)
			return 0;
	}
	/*
	 * If the hash is HASH_NB_ALWAYS, we always go to the next
	 * block so no check is necessary
	 */
	while (num_frames--) {
880
881
882
		bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
		if (IS_ERR(bh))
			return PTR_ERR(bh);
883
		p++;
884
		brelse(p->bh);
885
886
887
888
889
890
891
892
893
894
895
896
897
		p->bh = bh;
		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
	}
	return 1;
}


/*
 * This function fills a red-black tree with information from a
 * directory block.  It returns the number directory entries loaded
 * into the tree.  If there is an error it is returned in err.
 */
static int htree_dirblock_to_tree(struct file *dir_file,
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
898
				  struct inode *dir, ext4_lblk_t block,
899
900
901
902
				  struct dx_hash_info *hinfo,
				  __u32 start_hash, __u32 start_minor_hash)
{
	struct buffer_head *bh;
903
	struct ext4_dir_entry_2 *de, *top;
904
	int err = 0, count = 0;
905

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
906
907
	dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
							(unsigned long)block));
908
909
910
	bh = ext4_read_dirblock(dir, block, DIRENT);
	if (IS_ERR(bh))
		return PTR_ERR(bh);
911

912
913
	de = (struct ext4_dir_entry_2 *) bh->b_data;
	top = (struct ext4_dir_entry_2 *) ((char *) de +
914
					   dir->i_sb->s_blocksize -
915
					   EXT4_DIR_REC_LEN(0));
916
	for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
917
		if (ext4_check_dir_entry(dir, NULL, de, bh,
918
				bh->b_data, bh->b_size,
919
920
				(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
					 + ((char *)de - bh->b_data))) {
921
922
923
			/* On error, skip the f_pos to the next block. */
			dir_file->f_pos = (dir_file->f_pos |
					(dir->i_sb->s_blocksize - 1)) + 1;
924
			brelse(bh);
925
926
			return count;
		}
927
		ext4fs_dirhash(de->name, de->name_len, hinfo);
928
929
930
931
932
933
		if ((hinfo->hash < start_hash) ||
		    ((hinfo->hash == start_hash) &&
		     (hinfo->minor_hash < start_minor_hash)))
			continue;
		if (de->inode == 0)
			continue;
934
		if ((err = ext4_htree_store_dirent(dir_file,
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
				   hinfo->hash, hinfo->minor_hash, de)) != 0) {
			brelse(bh);
			return err;
		}
		count++;
	}
	brelse(bh);
	return count;
}


/*
 * This function fills a red-black tree with information from a
 * directory.  We start scanning the directory in hash order, starting
 * at start_hash and start_minor_hash.
 *
 * This function returns the number of entries inserted into the tree,
 * or a negative error code.
 */
954
int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
955
956
957
			 __u32 start_minor_hash, __u32 *next_hash)
{
	struct dx_hash_info hinfo;
958
	struct ext4_dir_entry_2 *de;
959
960
	struct dx_frame frames[2], *frame;
	struct inode *dir;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
961
	ext4_lblk_t block;
962
	int count = 0;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
963
	int ret, err;
964
965
	__u32 hashval;

966
	dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
967
		       start_hash, start_minor_hash));
Al Viro's avatar
Al Viro committed
968
	dir = file_inode(dir_file);
969
	if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
970
		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
971
972
973
		if (hinfo.hash_version <= DX_HASH_TEA)
			hinfo.hash_version +=
				EXT4_SB(dir->i_sb)->s_hash_unsigned;
974
		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
975
976
977
978
979
980
981
982
983
984
985
		if (ext4_has_inline_data(dir)) {
			int has_inline_data = 1;
			count = htree_inlinedir_to_tree(dir_file, dir, 0,
							&hinfo, start_hash,
							start_minor_hash,
							&has_inline_data);
			if (has_inline_data) {
				*next_hash = ~0;
				return count;
			}
		}
986
987
988
989
990
991
992
		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
					       start_hash, start_minor_hash);
		*next_hash = ~0;
		return count;
	}
	hinfo.hash = start_hash;
	hinfo.minor_hash = 0;
993
	frame = dx_probe(NULL, dir, &hinfo, frames, &err);
994
995
996
997
998
	if (!frame)
		return err;

	/* Add '.' and '..' from the htree header */
	if (!start_hash && !start_minor_hash) {
999
1000
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
		if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
1001
1002
1003
1004
			goto errout;
		count++;
	}
	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1005
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1006
		de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1007
		if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
			goto errout;
		count++;
	}

	while (1) {
		block = dx_get_block(frame->at);
		ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
					     start_hash, start_minor_hash);
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		count += ret;
		hashval = ~0;
1022
		ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
					    frame, frames, &hashval);
		*next_hash = hashval;
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		/*
		 * Stop if:  (a) there are no more entries, or
		 * (b) we have inserted at least one entry and the
		 * next hash value is not a continuation
		 */
		if ((ret == 0) ||
		    (count && ((hashval & 1) == 0)))
			break;
	}
	dx_release(frames);
1039
1040
	dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
		       "next hash: %x\n", count, *next_hash));
1041
1042
1043
1044
1045
1046
	return count;
errout:
	dx_release(frames);
	return (err);
}

1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
static inline int search_dirblock(struct buffer_head *bh,
				  struct inode *dir,
				  const struct qstr *d_name,
				  unsigned int offset,
				  struct ext4_dir_entry_2 **res_dir)
{
	return search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
			  d_name, offset, res_dir);
}

1057
1058
1059
1060
/*
 * Directory block splitting, compacting
 */

1061
1062
1063
1064
/*
 * Create map of hash values, offsets, and sizes, stored at end of block.
 * Returns number of entries mapped.
 */
1065
1066
1067
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
		       struct dx_hash_info *hinfo,
		       struct dx_map_entry *map_tail)
1068
1069
1070
1071
1072
{
	int count = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

1073
	while ((char *) de < base + blocksize) {
1074
		if (de->name_len && de->inode) {
1075
			ext4fs_dirhash(de->name, de->name_len, &h);
1076
1077
			map_tail--;
			map_tail->hash = h.hash;
1078
			map_tail->offs = ((char *) de - base)>>2;
1079
			map_tail->size = le16_to_cpu(de->rec_len);
1080
1081
1082
1083
			count++;
			cond_resched();
		}
		/* XXX: do we need to check rec_len == 0 case? -Chris */
1084
		de = ext4_next_entry(de, blocksize);
1085
1086
1087
1088
	}
	return count;
}

1089
/* Sort map by hash value */
1090
1091
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
	struct dx_map_entry *p, *q, *top = map + count - 1;
	int more;
	/* Combsort until bubble sort doesn't suck */
	while (count > 2) {
		count = count*10/13;
		if (count - 9 < 2) /* 9, 10 -> 11 */
			count = 11;
		for (p = top, q = p - count; q >= map; p--, q--)
			if (p->hash < q->hash)
				swap(*p, *q);
	}
	/* Garden variety bubble sort */
	do {
		more = 0;
		q = top;
		while (q-- > map) {
			if (q[1].hash >= q[0].hash)
1109
				continue;
1110
1111
			swap(*(q+1), *q);
			more = 1;
1112
1113
1114
1115
		}
	} while(more);
}

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1116
static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
{
	struct dx_entry *entries = frame->entries;
	struct dx_entry *old = frame->at, *new = old + 1;
	int count = dx_get_count(entries);

	assert(count < dx_get_limit(entries));
	assert(old < entries + count);
	memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
	dx_set_hash(new, hash);
	dx_set_block(new, block);
	dx_set_count(entries, count + 1);
}

/*
1131
 * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
1132
 *
1133
 * `len <= EXT4_NAME_LEN' is guaranteed by caller.
1134
1135
 * `de != NULL' is guaranteed by caller.
 */
1136
1137
static inline int ext4_match (int len, const char * const name,
			      struct ext4_dir_entry_2 * de)
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
{
	if (len != de->name_len)
		return 0;
	if (!de->inode)
		return 0;
	return !memcmp(name, de->name, len);
}

/*
 * Returns 0 if not found, -1 on failure, and 1 on success
 */
1149
1150
1151
1152
1153
1154
1155
int search_dir(struct buffer_head *bh,
	       char *search_buf,
	       int buf_size,
	       struct inode *dir,
	       const struct qstr *d_name,
	       unsigned int offset,
	       struct ext4_dir_entry_2 **res_dir)
1156
{
1157
	struct ext4_dir_entry_2 * de;
1158
1159
	char * dlimit;
	int de_len;
1160
1161
	const char *name = d_name->name;
	int namelen = d_name->len;
1162

1163
1164
	de = (struct ext4_dir_entry_2 *)search_buf;
	dlimit = search_buf + buf_size;
1165
1166
1167
1168
1169
	while ((char *) de < dlimit) {
		/* this code is executed quadratically often */
		/* do minimal checking `by hand' */

		if ((char *) de + namelen <= dlimit &&
1170
		    ext4_match (namelen, name, de)) {
1171
			/* found a match - just to be sure, do a full check */
1172
1173
			if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
						 bh->b_size, offset))
1174
1175
1176
1177
1178
				return -1;
			*res_dir = de;
			return 1;
		}
		/* prevent looping on a bad block */
1179
1180
		de_len = ext4_rec_len_from_disk(de->rec_len,
						dir->i_sb->s_blocksize);
1181
1182
1183
		if (de_len <= 0)
			return -1;
		offset += de_len;
1184
		de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1185
1186
1187
1188
	}
	return 0;
}

1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
			       struct ext4_dir_entry *de)
{
	struct super_block *sb = dir->i_sb;

	if (!is_dx(dir))
		return 0;
	if (block == 0)
		return 1;
	if (de->inode == 0 &&
	    ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
			sb->s_blocksize)
		return 1;
	return 0;
}
1204
1205

/*
1206
 *	ext4_find_entry()
1207
1208
1209
1210
1211
1212
1213
1214
1215
 *
 * finds an entry in the specified directory with the wanted name. It
 * returns the cache buffer in which the entry was found, and the entry
 * itself (as a parameter - res_dir). It does NOT read the inode of the
 * entry - you'll have to do that yourself if you want to.
 *
 * The returned buffer_head has ->b_count elevated.  The caller is expected
 * to brelse() it when appropriate.
 */
1216
1217
static struct buffer_head * ext4_find_entry (struct inode *dir,
					const struct qstr *d_name,
1218
1219
					struct ext4_dir_entry_2 **res_dir,
					int *inlined)
1220
{
1221
1222
1223
	struct super_block *sb;
	struct buffer_head *bh_use[NAMEI_RA_SIZE];
	struct buffer_head *bh, *ret = NULL;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1224
	ext4_lblk_t start, block, b;
1225
	const u8 *name = d_name->name;
1226
1227
1228
1229
1230
	int ra_max = 0;		/* Number of bh's in the readahead
				   buffer, bh_use[] */
	int ra_ptr = 0;		/* Current index into readahead
				   buffer */
	int num = 0;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1231
1232
	ext4_lblk_t  nblocks;
	int i, err;
1233
1234
1235
1236
	int namelen;

	*res_dir = NULL;
	sb = dir->i_sb;
1237
	namelen = d_name->len;
1238
	if (namelen > EXT4_NAME_LEN)
1239
		return NULL;
1240
1241
1242
1243
1244

	if (ext4_has_inline_data(dir)) {
		int has_inline_data = 1;
		ret = ext4_find_inline_entry(dir, d_name, res_dir,
					     &has_inline_data);
1245
1246
1247
		if (has_inline_data) {
			if (inlined)
				*inlined = 1;
1248
			return ret;
1249
		}
1250
1251
	}

1252
	if ((namelen <= 2) && (name[0] == '.') &&
1253
	    (name[1] == '.' || name[1] == '\0')) {
1254
1255
1256
1257
1258
1259
1260
1261
		/*
		 * "." or ".." will only be in the first block
		 * NFS may look up ".."; "." should be handled by the VFS
		 */
		block = start = 0;
		nblocks = 1;
		goto restart;
	}
1262
	if (is_dx(dir)) {
1263
		bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
1264
1265
1266
1267
1268
1269
1270
		/*
		 * On success, or if the error was file not found,
		 * return.  Otherwise, fall back to doing a search the
		 * old fashioned way.
		 */
		if (bh || (err != ERR_BAD_DX_DIR))
			return bh;
1271
1272
		dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
			       "falling back\n"));
1273
	}
1274
1275
	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
	start = EXT4_I(dir)->i_dir_start_lookup;
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
	if (start >= nblocks)
		start = 0;
	block = start;
restart:
	do {
		/*
		 * We deal with the read-ahead logic here.
		 */
		if (ra_ptr >= ra_max) {
			/* Refill the readahead buffer */
			ra_ptr = 0;
			b = block;
			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
				/*
				 * Terminate if we reach the end of the
				 * directory and must wrap, or if our
				 * search has finished at this block.
				 */
				if (b >= nblocks || (num && block == start)) {
					bh_use[ra_max] = NULL;
					break;
				}
				num++;
1299
				bh = ext4_getblk(NULL, dir, b++, 0, &err);
1300
1301
				bh_use[ra_max] = bh;
				if (bh)
1302
1303
					ll_rw_block(READ | REQ_META | REQ_PRIO,
						    1, &bh);
1304
1305
1306
1307
1308
1309
1310
			}
		}
		if ((bh = bh_use[ra_ptr++]) == NULL)
			goto next;
		wait_on_buffer(bh);
		if (!buffer_uptodate(bh)) {
			/* read error, skip block & hope for the best */
1311
1312
			EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
					 (unsigned long) block);
1313
1314
1315
			brelse(bh);
			goto next;
		}
1316
		if (!buffer_verified(bh) &&
1317
1318
		    !is_dx_internal_node(dir, block,
					 (struct ext4_dir_entry *)bh->b_data) &&
1319
1320
1321
1322
1323
1324
1325
1326
		    !ext4_dirent_csum_verify(dir,
				(struct ext4_dir_entry *)bh->b_data)) {
			EXT4_ERROR_INODE(dir, "checksumming directory "
					 "block %lu", (unsigned long)block);
			brelse(bh);
			goto next;
		}
		set_buffer_verified(bh);
1327
		i = search_dirblock(bh, dir, d_name,
1328
			    block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1329
		if (i == 1) {
1330
			EXT4_I(dir)->i_dir_start_lookup = block;
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
			ret = bh;
			goto cleanup_and_exit;
		} else {
			brelse(bh);
			if (i < 0)
				goto cleanup_and_exit;
		}
	next:
		if (++block >= nblocks)
			block = 0;
	} while (block != start);

	/*
	 * If the directory has grown while we were searching, then
	 * search the last part of the directory before giving up.
	 */
	block = nblocks;
1348
	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1349
1350
1351
1352
1353
1354
1355
1356
	if (block < nblocks) {
		start = 0;
		goto restart;
	}

cleanup_and_exit:
	/* Clean up the read-ahead blocks */
	for (; ra_ptr < ra_max; ra_ptr++)
1357
		brelse(bh_use[ra_ptr]);
1358
1359
1360
	return ret;
}

1361
static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
1362
		       struct ext4_dir_entry_2 **res_dir, int *err)
1363
{
1364
	struct super_block * sb = dir->i_sb;
1365
1366
1367
	struct dx_hash_info	hinfo;
	struct dx_frame frames[2], *frame;
	struct buffer_head *bh;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1368
	ext4_lblk_t block;
1369
1370
	int retval;

1371
1372
	if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
		return NULL;
1373
1374
	do {
		block = dx_get_block(frame->at);
1375
1376
1377
		bh = ext4_read_dirblock(dir, block, DIRENT);
		if (IS_ERR(bh)) {
			*err = PTR_ERR(bh);
1378
1379
			goto errout;
		}
1380
1381
1382
1383
1384
1385
		retval = search_dirblock(bh, dir, d_name,
					 block << EXT4_BLOCK_SIZE_BITS(sb),
					 res_dir);
		if (retval == 1) { 	/* Success! */
			dx_release(frames);
			return bh;
1386
		}
1387
		brelse(bh);
1388
1389
1390
1391
1392
		if (retval == -1) {
			*err = ERR_BAD_DX_DIR;
			goto errout;
		}

1393
		/* Check to see if we should continue to search */
1394
		retval = ext4_htree_next_block(dir, hinfo.hash, frame,
1395
1396
					       frames, NULL);
		if (retval < 0) {
1397
			ext4_warning(sb,
1398
1399
1400
1401
1402
1403
1404
1405
1406
			     "error reading index page in directory #%lu",
			     dir->i_ino);
			*err = retval;
			goto errout;
		}
	} while (retval == 1);

	*err = -ENOENT;
errout:
1407
	dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
1408
1409
1410
1411
	dx_release (frames);
	return NULL;
}

Al Viro's avatar
Al Viro committed
1412
static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1413
{