namei.c 81 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/namei.c
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/namei.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 *  Directory entry file type support and forward compatibility hooks
 *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
 *  Hash Tree Directory indexing (c)
 *	Daniel Phillips, 2001
 *  Hash Tree Directory indexing porting
 *	Christopher Li, 2002
 *  Hash Tree Directory indexing cleanup
 *	Theodore Ts'o, 2002
 */

#include <linux/fs.h>
#include <linux/pagemap.h>
29
#include <linux/jbd2.h>
30
31
32
33
34
35
36
#include <linux/time.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
37
38
#include "ext4.h"
#include "ext4_jbd2.h"
39
40
41
42

#include "xattr.h"
#include "acl.h"

43
#include <trace/events/ext4.h>
44
45
46
47
48
/*
 * define how far ahead to read directories while searching them.
 */
#define NAMEI_RA_CHUNKS  2
#define NAMEI_RA_BLOCKS  4
Dave Kleikamp's avatar
Dave Kleikamp committed
49
#define NAMEI_RA_SIZE	     (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
50
51
#define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))

52
static struct buffer_head *ext4_append(handle_t *handle,
53
					struct inode *inode,
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
54
					ext4_lblk_t *block, int *err)
55
56
57
58
59
{
	struct buffer_head *bh;

	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;

60
61
	bh = ext4_bread(handle, inode, *block, 1, err);
	if (bh) {
62
		inode->i_size += inode->i_sb->s_blocksize;
63
		EXT4_I(inode)->i_disksize = inode->i_size;
64
65
66
67
68
		*err = ext4_journal_get_write_access(handle, bh);
		if (*err) {
			brelse(bh);
			bh = NULL;
		}
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
	}
	return bh;
}

#ifndef assert
#define assert(test) J_ASSERT(test)
#endif

#ifdef DX_DEBUG
#define dxtrace(command) command
#else
#define dxtrace(command)
#endif

struct fake_dirent
{
	__le32 inode;
	__le16 rec_len;
	u8 name_len;
	u8 file_type;
};

struct dx_countlimit
{
	__le16 limit;
	__le16 count;
};

struct dx_entry
{
	__le32 hash;
	__le32 block;
};

/*
 * dx_root_info is laid out so that if it should somehow get overlaid by a
 * dirent the two low bits of the hash version will be zero.  Therefore, the
 * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
 */

struct dx_root
{
	struct fake_dirent dot;
	char dot_name[4];
	struct fake_dirent dotdot;
	char dotdot_name[4];
	struct dx_root_info
	{
		__le32 reserved_zero;
		u8 hash_version;
		u8 info_length; /* 8 */
		u8 indirect_levels;
		u8 unused_flags;
	}
	info;
	struct dx_entry	entries[0];
};

struct dx_node
{
	struct fake_dirent fake;
	struct dx_entry	entries[0];
};


struct dx_frame
{
	struct buffer_head *bh;
	struct dx_entry *entries;
	struct dx_entry *at;
};

struct dx_map_entry
{
	u32 hash;
144
145
	u16 offs;
	u16 size;
146
147
};

148
149
150
151
152
153
154
155
/*
 * This goes at the end of each htree block.
 */
struct dx_tail {
	u32 dt_reserved;
	__le32 dt_checksum;	/* crc32c(uuid+inum+dirblock) */
};

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
156
157
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
158
159
160
161
162
163
164
165
static inline unsigned dx_get_hash(struct dx_entry *entry);
static void dx_set_hash(struct dx_entry *entry, unsigned value);
static unsigned dx_get_count(struct dx_entry *entries);
static unsigned dx_get_limit(struct dx_entry *entries);
static void dx_set_count(struct dx_entry *entries, unsigned value);
static void dx_set_limit(struct dx_entry *entries, unsigned value);
static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
static unsigned dx_node_limit(struct inode *dir);
166
static struct dx_frame *dx_probe(const struct qstr *d_name,
167
168
169
170
				 struct inode *dir,
				 struct dx_hash_info *hinfo,
				 struct dx_frame *frame,
				 int *err);
171
static void dx_release(struct dx_frame *frames);
172
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
173
		       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
174
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
175
static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
176
		struct dx_map_entry *offsets, int count, unsigned blocksize);
177
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
178
179
static void dx_insert_block(struct dx_frame *frame,
					u32 hash, ext4_lblk_t block);
180
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
181
182
183
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash);
184
185
186
187
static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
		const struct qstr *d_name,
		struct ext4_dir_entry_2 **res_dir,
		int *err);
188
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
189
190
			     struct inode *inode);

191
/* checksumming functions */
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
#define EXT4_DIRENT_TAIL(block, blocksize) \
	((struct ext4_dir_entry_tail *)(((void *)(block)) + \
					((blocksize) - \
					 sizeof(struct ext4_dir_entry_tail))))

static void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
				   unsigned int blocksize)
{
	memset(t, 0, sizeof(struct ext4_dir_entry_tail));
	t->det_rec_len = ext4_rec_len_to_disk(
			sizeof(struct ext4_dir_entry_tail), blocksize);
	t->det_reserved_ft = EXT4_FT_DIR_CSUM;
}

/* Walk through a dirent block to find a checksum "dirent" at the tail */
static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
						   struct ext4_dir_entry *de)
{
	struct ext4_dir_entry_tail *t;

#ifdef PARANOID
	struct ext4_dir_entry *d, *top;

	d = de;
	top = (struct ext4_dir_entry *)(((void *)de) +
		(EXT4_BLOCK_SIZE(inode->i_sb) -
		sizeof(struct ext4_dir_entry_tail)));
	while (d < top && d->rec_len)
		d = (struct ext4_dir_entry *)(((void *)d) +
		    le16_to_cpu(d->rec_len));

	if (d != top)
		return NULL;

	t = (struct ext4_dir_entry_tail *)d;
#else
	t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
#endif

	if (t->det_reserved_zero1 ||
	    le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
	    t->det_reserved_zero2 ||
	    t->det_reserved_ft != EXT4_FT_DIR_CSUM)
		return NULL;

	return t;
}

static __le32 ext4_dirent_csum(struct inode *inode,
			       struct ext4_dir_entry *dirent, int size)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
	__u32 csum;

	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	return cpu_to_le32(csum);
}

int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
		EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir "
				 "leaf for checksum.  Please run e2fsck -D.");
		return 0;
	}

	if (t->det_checksum != ext4_dirent_csum(inode, dirent,
						(void *)t - (void *)dirent))
		return 0;

	return 1;
}

static void ext4_dirent_csum_set(struct inode *inode,
				 struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
		EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir "
				 "leaf for checksum.  Please run e2fsck -D.");
		return;
	}

	t->det_checksum = ext4_dirent_csum(inode, dirent,
					   (void *)t - (void *)dirent);
}

static inline int ext4_handle_dirty_dirent_node(handle_t *handle,
						struct inode *inode,
						struct buffer_head *bh)
{
	ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
					       struct ext4_dir_entry *dirent,
					       int *offset)
{
	struct ext4_dir_entry *dp;
	struct dx_root_info *root;
	int count_offset;

	if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
		count_offset = 8;
	else if (le16_to_cpu(dirent->rec_len) == 12) {
		dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
		if (le16_to_cpu(dp->rec_len) !=
		    EXT4_BLOCK_SIZE(inode->i_sb) - 12)
			return NULL;
		root = (struct dx_root_info *)(((void *)dp + 12));
		if (root->reserved_zero ||
		    root->info_length != sizeof(struct dx_root_info))
			return NULL;
		count_offset = 32;
	} else
		return NULL;

	if (offset)
		*offset = count_offset;
	return (struct dx_countlimit *)(((void *)dirent) + count_offset);
}

static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
			   int count_offset, int count, struct dx_tail *t)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
	__u32 csum, old_csum;
	int size;

	size = count_offset + (count * sizeof(struct dx_entry));
	old_csum = t->dt_checksum;
	t->dt_checksum = 0;
	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
	t->dt_checksum = old_csum;

	return cpu_to_le32(csum);
}

static int ext4_dx_csum_verify(struct inode *inode,
			       struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return 1;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
		EXT4_ERROR_INODE(inode, "metadata_csum set but no space for "
				 "tree checksum found.  Run e2fsck -D.");
		return 1;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
					    count, t))
		return 0;
	return 1;
}

static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
		EXT4_ERROR_INODE(inode, "metadata_csum set but no space for "
				 "tree checksum.  Run e2fsck -D.");
		return;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
}

static inline int ext4_handle_dirty_dx_node(handle_t *handle,
					    struct inode *inode,
					    struct buffer_head *bh)
{
	ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

415
416
417
418
/*
 * p is at least 6 bytes before the end of page
 */
static inline struct ext4_dir_entry_2 *
419
ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
420
421
{
	return (struct ext4_dir_entry_2 *)((char *)p +
422
		ext4_rec_len_from_disk(p->rec_len, blocksize));
423
424
}

425
426
427
428
429
/*
 * Future: use high four bits of block for coalesce-on-delete flags
 * Mask them off for now.
 */

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
430
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
431
432
433
434
{
	return le32_to_cpu(entry->block) & 0x00ffffff;
}

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
435
static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
436
437
438
439
{
	entry->block = cpu_to_le32(value);
}

440
static inline unsigned dx_get_hash(struct dx_entry *entry)
441
442
443
444
{
	return le32_to_cpu(entry->hash);
}

445
static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
446
447
448
449
{
	entry->hash = cpu_to_le32(value);
}

450
static inline unsigned dx_get_count(struct dx_entry *entries)
451
452
453
454
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
}

455
static inline unsigned dx_get_limit(struct dx_entry *entries)
456
457
458
459
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
}

460
static inline void dx_set_count(struct dx_entry *entries, unsigned value)
461
462
463
464
{
	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
}

465
static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
466
467
468
469
{
	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
}

470
static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
471
{
472
473
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
		EXT4_DIR_REC_LEN(2) - infosize;
474
475
476
477

	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		entry_space -= sizeof(struct dx_tail);
478
	return entry_space / sizeof(struct dx_entry);
479
480
}

481
static inline unsigned dx_node_limit(struct inode *dir)
482
{
483
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
484
485
486
487

	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		entry_space -= sizeof(struct dx_tail);
488
	return entry_space / sizeof(struct dx_entry);
489
490
491
492
493
494
}

/*
 * Debug
 */
#ifdef DX_DEBUG
495
static void dx_show_index(char * label, struct dx_entry *entries)
496
{
497
	int i, n = dx_get_count (entries);
498
	printk(KERN_DEBUG "%s index ", label);
499
	for (i = 0; i < n; i++) {
500
		printk("%x->%lu ", i ? dx_get_hash(entries + i) :
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
501
				0, (unsigned long)dx_get_block(entries + i));
502
503
	}
	printk("\n");
504
505
506
507
508
509
510
511
512
}

struct stats
{
	unsigned names;
	unsigned space;
	unsigned bcount;
};

513
static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
				 int size, int show_names)
{
	unsigned names = 0, space = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

	printk("names: ");
	while ((char *) de < base + size)
	{
		if (de->inode)
		{
			if (show_names)
			{
				int len = de->name_len;
				char *name = de->name;
				while (len--) printk("%c", *name++);
530
				ext4fs_dirhash(de->name, de->name_len, &h);
531
				printk(":%x.%u ", h.hash,
532
				       (unsigned) ((char *) de - base));
533
			}
534
			space += EXT4_DIR_REC_LEN(de->name_len);
535
536
			names++;
		}
537
		de = ext4_next_entry(de, size);
538
539
540
541
542
543
544
545
546
	}
	printk("(%i)\n", names);
	return (struct stats) { names, space, 1 };
}

struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
			     struct dx_entry *entries, int levels)
{
	unsigned blocksize = dir->i_sb->s_blocksize;
547
	unsigned count = dx_get_count(entries), names = 0, space = 0, i;
548
549
550
551
552
553
	unsigned bcount = 0;
	struct buffer_head *bh;
	int err;
	printk("%i indexed blocks...\n", count);
	for (i = 0; i < count; i++, entries++)
	{
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
554
555
		ext4_lblk_t block = dx_get_block(entries);
		ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
556
557
558
		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
		struct stats stats;
		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
559
		if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
560
561
		stats = levels?
		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
562
		   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
563
564
565
		names += stats.names;
		space += stats.space;
		bcount += stats.bcount;
566
		brelse(bh);
567
568
	}
	if (bcount)
569
		printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
570
571
		       levels ? "" : "   ", names, space/bcount,
		       (space/bcount)*100/blocksize);
572
573
574
575
576
577
578
579
580
581
582
583
584
585
	return (struct stats) { names, space, bcount};
}
#endif /* DX_DEBUG */

/*
 * Probe for a directory leaf block to search.
 *
 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
 * error in the directory index, and the caller should fall back to
 * searching the directory normally.  The callers of dx_probe **MUST**
 * check for this error code, and make sure it never gets reflected
 * back to userspace.
 */
static struct dx_frame *
586
dx_probe(const struct qstr *d_name, struct inode *dir,
587
588
589
590
591
592
593
594
595
596
	 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
{
	unsigned count, indirect;
	struct dx_entry *at, *entries, *p, *q, *m;
	struct dx_root *root;
	struct buffer_head *bh;
	struct dx_frame *frame = frame_in;
	u32 hash;

	frame->bh = NULL;
597
	if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
598
599
600
601
602
		goto fail;
	root = (struct dx_root *) bh->b_data;
	if (root->info.hash_version != DX_HASH_TEA &&
	    root->info.hash_version != DX_HASH_HALF_MD4 &&
	    root->info.hash_version != DX_HASH_LEGACY) {
603
		ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
604
605
606
607
608
609
			     root->info.hash_version);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}
	hinfo->hash_version = root->info.hash_version;
610
611
	if (hinfo->hash_version <= DX_HASH_TEA)
		hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
612
	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
613
614
	if (d_name)
		ext4fs_dirhash(d_name->name, d_name->len, hinfo);
615
616
617
	hash = hinfo->hash;

	if (root->info.unused_flags & 1) {
618
		ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
619
620
621
622
623
624
625
			     root->info.unused_flags);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

	if ((indirect = root->info.indirect_levels) > 1) {
626
		ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
627
628
629
630
631
632
			     root->info.indirect_levels);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

633
634
635
636
637
638
639
640
641
	if (!buffer_verified(bh) &&
	    !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) {
		ext4_warning(dir->i_sb, "Root failed checksum");
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}
	set_buffer_verified(bh);

642
643
	entries = (struct dx_entry *) (((char *)&root->info) +
				       root->info.info_length);
644
645
646

	if (dx_get_limit(entries) != dx_root_limit(dir,
						   root->info.info_length)) {
647
		ext4_warning(dir->i_sb, "dx entry: limit != root limit");
648
649
650
651
652
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

653
	dxtrace(printk("Look up %x", hash));
654
655
656
	while (1)
	{
		count = dx_get_count(entries);
657
		if (!count || count > dx_get_limit(entries)) {
658
			ext4_warning(dir->i_sb,
659
660
661
662
663
664
				     "dx entry: no count or count > limit");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail2;
		}

665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
		p = entries + 1;
		q = entries + count - 1;
		while (p <= q)
		{
			m = p + (q - p)/2;
			dxtrace(printk("."));
			if (dx_get_hash(m) > hash)
				q = m - 1;
			else
				p = m + 1;
		}

		if (0) // linear search cross check
		{
			unsigned n = count - 1;
			at = entries;
			while (n--)
			{
				dxtrace(printk(","));
				if (dx_get_hash(++at) > hash)
				{
					at--;
					break;
				}
			}
			assert (at == p - 1);
		}

		at = p - 1;
		dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
		frame->bh = bh;
		frame->entries = entries;
		frame->at = at;
		if (!indirect--) return frame;
699
		if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
700
701
			goto fail2;
		at = entries = ((struct dx_node *) bh->b_data)->entries;
702
703
704
705
706
707
708
709
710
711
712

		if (!buffer_verified(bh) &&
		    !ext4_dx_csum_verify(dir,
					 (struct ext4_dir_entry *)bh->b_data)) {
			ext4_warning(dir->i_sb, "Node failed checksum");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail;
		}
		set_buffer_verified(bh);

713
		if (dx_get_limit(entries) != dx_node_limit (dir)) {
714
			ext4_warning(dir->i_sb,
715
716
717
718
719
				     "dx entry: limit != node limit");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail2;
		}
720
		frame++;
721
		frame->bh = NULL;
722
723
724
725
726
727
728
	}
fail2:
	while (frame >= frame_in) {
		brelse(frame->bh);
		frame--;
	}
fail:
729
	if (*err == ERR_BAD_DX_DIR)
730
		ext4_warning(dir->i_sb,
Zheng Liu's avatar
Zheng Liu committed
731
			     "Corrupt dir inode %lu, running e2fsck is "
732
			     "recommended.", dir->i_ino);
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
	return NULL;
}

static void dx_release (struct dx_frame *frames)
{
	if (frames[0].bh == NULL)
		return;

	if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
		brelse(frames[1].bh);
	brelse(frames[0].bh);
}

/*
 * This function increments the frame pointer to search the next leaf
 * block, and reads in the necessary intervening nodes if the search
 * should be necessary.  Whether or not the search is necessary is
 * controlled by the hash parameter.  If the hash value is even, then
 * the search is only continued if the next block starts with that
 * hash value.  This is used if we are searching for a specific file.
 *
 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
 *
 * This function returns 1 if the caller should continue to search,
 * or 0 if it should not.  If there is an error reading one of the
 * index blocks, it will a negative error code.
 *
 * If start_hash is non-null, it will be filled in with the starting
 * hash of the next page.
 */
763
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash)
{
	struct dx_frame *p;
	struct buffer_head *bh;
	int err, num_frames = 0;
	__u32 bhash;

	p = frame;
	/*
	 * Find the next leaf page by incrementing the frame pointer.
	 * If we run out of entries in the interior node, loop around and
	 * increment pointer in the parent node.  When we break out of
	 * this loop, num_frames indicates the number of interior
	 * nodes need to be read.
	 */
	while (1) {
		if (++(p->at) < p->entries + dx_get_count(p->entries))
			break;
		if (p == frames)
			return 0;
		num_frames++;
		p--;
	}

	/*
	 * If the hash is 1, then continue only if the next page has a
	 * continuation hash of any value.  This is used for readdir
	 * handling.  Otherwise, check to see if the hash matches the
	 * desired contiuation hash.  If it doesn't, return since
	 * there's no point to read in the successive index pages.
	 */
	bhash = dx_get_hash(p->at);
	if (start_hash)
		*start_hash = bhash;
	if ((hash & 1) == 0) {
		if ((bhash & ~1) != hash)
			return 0;
	}
	/*
	 * If the hash is HASH_NB_ALWAYS, we always go to the next
	 * block so no check is necessary
	 */
	while (num_frames--) {
809
		if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
810
811
				      0, &err)))
			return err; /* Failure */
812
813
814
815
816
817
818
819
820

		if (!buffer_verified(bh) &&
		    !ext4_dx_csum_verify(dir,
					 (struct ext4_dir_entry *)bh->b_data)) {
			ext4_warning(dir->i_sb, "Node failed checksum");
			return -EIO;
		}
		set_buffer_verified(bh);

821
		p++;
822
		brelse(p->bh);
823
824
825
826
827
828
829
830
831
832
833
834
835
		p->bh = bh;
		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
	}
	return 1;
}


/*
 * This function fills a red-black tree with information from a
 * directory block.  It returns the number directory entries loaded
 * into the tree.  If there is an error it is returned in err.
 */
static int htree_dirblock_to_tree(struct file *dir_file,
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
836
				  struct inode *dir, ext4_lblk_t block,
837
838
839
840
				  struct dx_hash_info *hinfo,
				  __u32 start_hash, __u32 start_minor_hash)
{
	struct buffer_head *bh;
841
	struct ext4_dir_entry_2 *de, *top;
842
843
	int err, count = 0;

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
844
845
	dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
							(unsigned long)block));
846
	if (!(bh = ext4_bread (NULL, dir, block, 0, &err)))
847
848
		return err;

849
850
851
852
853
	if (!buffer_verified(bh) &&
	    !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
		return -EIO;
	set_buffer_verified(bh);

854
855
	de = (struct ext4_dir_entry_2 *) bh->b_data;
	top = (struct ext4_dir_entry_2 *) ((char *) de +
856
					   dir->i_sb->s_blocksize -
857
					   EXT4_DIR_REC_LEN(0));
858
	for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
859
		if (ext4_check_dir_entry(dir, NULL, de, bh,
860
861
				(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
					 + ((char *)de - bh->b_data))) {
862
863
864
			/* On error, skip the f_pos to the next block. */
			dir_file->f_pos = (dir_file->f_pos |
					(dir->i_sb->s_blocksize - 1)) + 1;
865
			brelse(bh);
866
867
			return count;
		}
868
		ext4fs_dirhash(de->name, de->name_len, hinfo);
869
870
871
872
873
874
		if ((hinfo->hash < start_hash) ||
		    ((hinfo->hash == start_hash) &&
		     (hinfo->minor_hash < start_minor_hash)))
			continue;
		if (de->inode == 0)
			continue;
875
		if ((err = ext4_htree_store_dirent(dir_file,
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
				   hinfo->hash, hinfo->minor_hash, de)) != 0) {
			brelse(bh);
			return err;
		}
		count++;
	}
	brelse(bh);
	return count;
}


/*
 * This function fills a red-black tree with information from a
 * directory.  We start scanning the directory in hash order, starting
 * at start_hash and start_minor_hash.
 *
 * This function returns the number of entries inserted into the tree,
 * or a negative error code.
 */
895
int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
896
897
898
			 __u32 start_minor_hash, __u32 *next_hash)
{
	struct dx_hash_info hinfo;
899
	struct ext4_dir_entry_2 *de;
900
901
	struct dx_frame frames[2], *frame;
	struct inode *dir;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
902
	ext4_lblk_t block;
903
	int count = 0;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
904
	int ret, err;
905
906
	__u32 hashval;

907
	dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
908
		       start_hash, start_minor_hash));
909
	dir = dir_file->f_path.dentry->d_inode;
910
	if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
911
		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
912
913
914
		if (hinfo.hash_version <= DX_HASH_TEA)
			hinfo.hash_version +=
				EXT4_SB(dir->i_sb)->s_hash_unsigned;
915
		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
916
917
918
919
920
921
922
		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
					       start_hash, start_minor_hash);
		*next_hash = ~0;
		return count;
	}
	hinfo.hash = start_hash;
	hinfo.minor_hash = 0;
923
	frame = dx_probe(NULL, dir, &hinfo, frames, &err);
924
925
926
927
928
	if (!frame)
		return err;

	/* Add '.' and '..' from the htree header */
	if (!start_hash && !start_minor_hash) {
929
930
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
		if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
931
932
933
934
			goto errout;
		count++;
	}
	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
935
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
936
		de = ext4_next_entry(de, dir->i_sb->s_blocksize);
937
		if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
938
939
940
941
942
943
944
945
946
947
948
949
950
951
			goto errout;
		count++;
	}

	while (1) {
		block = dx_get_block(frame->at);
		ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
					     start_hash, start_minor_hash);
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		count += ret;
		hashval = ~0;
952
		ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
					    frame, frames, &hashval);
		*next_hash = hashval;
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		/*
		 * Stop if:  (a) there are no more entries, or
		 * (b) we have inserted at least one entry and the
		 * next hash value is not a continuation
		 */
		if ((ret == 0) ||
		    (count && ((hashval & 1) == 0)))
			break;
	}
	dx_release(frames);
969
970
	dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
		       "next hash: %x\n", count, *next_hash));
971
972
973
974
975
976
977
978
979
980
981
	return count;
errout:
	dx_release(frames);
	return (err);
}


/*
 * Directory block splitting, compacting
 */

982
983
984
985
/*
 * Create map of hash values, offsets, and sizes, stored at end of block.
 * Returns number of entries mapped.
 */
986
987
988
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
		       struct dx_hash_info *hinfo,
		       struct dx_map_entry *map_tail)
989
990
991
992
993
{
	int count = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

994
	while ((char *) de < base + blocksize) {
995
		if (de->name_len && de->inode) {
996
			ext4fs_dirhash(de->name, de->name_len, &h);
997
998
			map_tail--;
			map_tail->hash = h.hash;
999
			map_tail->offs = ((char *) de - base)>>2;
1000
			map_tail->size = le16_to_cpu(de->rec_len);