super.c 134 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/super.c
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/inode.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 */

#include <linux/module.h>
#include <linux/string.h>
#include <linux/fs.h>
#include <linux/time.h>
23
#include <linux/vmalloc.h>
24
#include <linux/jbd2.h>
25
26
27
28
29
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/parser.h>
#include <linux/buffer_head.h>
30
#include <linux/exportfs.h>
31
32
33
34
35
36
#include <linux/vfs.h>
#include <linux/random.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/quotaops.h>
#include <linux/seq_file.h>
37
#include <linux/proc_fs.h>
Theodore Ts'o's avatar
Theodore Ts'o committed
38
#include <linux/ctype.h>
Vignesh Babu's avatar
Vignesh Babu committed
39
#include <linux/log2.h>
40
#include <linux/crc16.h>
41
42
#include <asm/uaccess.h>

43
44
45
#include <linux/kthread.h>
#include <linux/freezer.h>

46
47
#include "ext4.h"
#include "ext4_jbd2.h"
48
49
#include "xattr.h"
#include "acl.h"
50
#include "mballoc.h"
51

52
53
54
#define CREATE_TRACE_POINTS
#include <trace/events/ext4.h>

55
static struct proc_dir_entry *ext4_proc_root;
Theodore Ts'o's avatar
Theodore Ts'o committed
56
static struct kset *ext4_kset;
57
58
struct ext4_lazy_init *ext4_li_info;
struct mutex ext4_li_mtx;
59
struct ext4_features *ext4_feat;
60

61
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
62
			     unsigned long journal_devnum);
63
static int ext4_commit_super(struct super_block *sb, int sync);
64
65
66
67
static void ext4_mark_recovery_complete(struct super_block *sb,
					struct ext4_super_block *es);
static void ext4_clear_journal_err(struct super_block *sb,
				   struct ext4_super_block *es);
68
static int ext4_sync_fs(struct super_block *sb, int wait);
69
static const char *ext4_decode_error(struct super_block *sb, int errno,
70
				     char nbuf[16]);
71
72
static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
73
static int ext4_unfreeze(struct super_block *sb);
74
static void ext4_write_super(struct super_block *sb);
75
static int ext4_freeze(struct super_block *sb);
Al Viro's avatar
Al Viro committed
76
77
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
		       const char *dev_name, void *data);
78
79
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
80
static void ext4_clear_request_list(void);
81

82
83
84
85
#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext3_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "ext3",
Al Viro's avatar
Al Viro committed
86
	.mount		= ext4_mount,
87
88
89
90
91
92
93
	.kill_sb	= kill_block_super,
	.fs_flags	= FS_REQUIRES_DEV,
};
#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
#else
#define IS_EXT3_SB(sb) (0)
#endif
Laurent Vivier's avatar
Laurent Vivier committed
94

95
96
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
			       struct ext4_group_desc *bg)
Laurent Vivier's avatar
Laurent Vivier committed
97
{
98
	return le32_to_cpu(bg->bg_block_bitmap_lo) |
99
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
100
		 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
Laurent Vivier's avatar
Laurent Vivier committed
101
102
}

103
104
ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
			       struct ext4_group_desc *bg)
Laurent Vivier's avatar
Laurent Vivier committed
105
{
106
	return le32_to_cpu(bg->bg_inode_bitmap_lo) |
107
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
108
		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
Laurent Vivier's avatar
Laurent Vivier committed
109
110
}

111
112
ext4_fsblk_t ext4_inode_table(struct super_block *sb,
			      struct ext4_group_desc *bg)
Laurent Vivier's avatar
Laurent Vivier committed
113
{
114
	return le32_to_cpu(bg->bg_inode_table_lo) |
115
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
116
		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
Laurent Vivier's avatar
Laurent Vivier committed
117
118
}

119
120
121
122
123
__u32 ext4_free_blks_count(struct super_block *sb,
			      struct ext4_group_desc *bg)
{
	return le16_to_cpu(bg->bg_free_blocks_count_lo) |
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
124
		 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
125
126
127
128
129
130
131
}

__u32 ext4_free_inodes_count(struct super_block *sb,
			      struct ext4_group_desc *bg)
{
	return le16_to_cpu(bg->bg_free_inodes_count_lo) |
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
132
		 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
133
134
135
136
137
138
139
}

__u32 ext4_used_dirs_count(struct super_block *sb,
			      struct ext4_group_desc *bg)
{
	return le16_to_cpu(bg->bg_used_dirs_count_lo) |
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
140
		 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
141
142
143
144
145
146
147
}

__u32 ext4_itable_unused_count(struct super_block *sb,
			      struct ext4_group_desc *bg)
{
	return le16_to_cpu(bg->bg_itable_unused_lo) |
		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
148
		 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
149
150
}

151
152
void ext4_block_bitmap_set(struct super_block *sb,
			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
Laurent Vivier's avatar
Laurent Vivier committed
153
{
154
	bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
155
156
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
Laurent Vivier's avatar
Laurent Vivier committed
157
158
}

159
160
void ext4_inode_bitmap_set(struct super_block *sb,
			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
Laurent Vivier's avatar
Laurent Vivier committed
161
{
162
	bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
163
164
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
Laurent Vivier's avatar
Laurent Vivier committed
165
166
}

167
168
void ext4_inode_table_set(struct super_block *sb,
			  struct ext4_group_desc *bg, ext4_fsblk_t blk)
Laurent Vivier's avatar
Laurent Vivier committed
169
{
170
	bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
171
172
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
Laurent Vivier's avatar
Laurent Vivier committed
173
174
}

175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
void ext4_free_blks_set(struct super_block *sb,
			  struct ext4_group_desc *bg, __u32 count)
{
	bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
}

void ext4_free_inodes_set(struct super_block *sb,
			  struct ext4_group_desc *bg, __u32 count)
{
	bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
}

void ext4_used_dirs_set(struct super_block *sb,
			  struct ext4_group_desc *bg, __u32 count)
{
	bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
}

void ext4_itable_unused_set(struct super_block *sb,
			  struct ext4_group_desc *bg, __u32 count)
{
	bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
		bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
}

207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236

/* Just increment the non-pointer handle value */
static handle_t *ext4_get_nojournal(void)
{
	handle_t *handle = current->journal_info;
	unsigned long ref_cnt = (unsigned long)handle;

	BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);

	ref_cnt++;
	handle = (handle_t *)ref_cnt;

	current->journal_info = handle;
	return handle;
}


/* Decrement the non-pointer handle value */
static void ext4_put_nojournal(handle_t *handle)
{
	unsigned long ref_cnt = (unsigned long)handle;

	BUG_ON(ref_cnt == 0);

	ref_cnt--;
	handle = (handle_t *)ref_cnt;

	current->journal_info = handle;
}

237
/*
238
 * Wrappers for jbd2_journal_start/end.
239
240
241
242
243
244
 *
 * The only special thing we need to do here is to make sure that all
 * journal_end calls result in the superblock being marked dirty, so
 * that sync() will call the filesystem's write_super callback if
 * appropriate.
 */
245
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
246
247
248
249
250
251
{
	journal_t *journal;

	if (sb->s_flags & MS_RDONLY)
		return ERR_PTR(-EROFS);

252
	vfs_check_frozen(sb, SB_FREEZE_TRANS);
253
254
255
	/* Special case here: if the journal has aborted behind our
	 * backs (eg. EIO in the commit thread), then we still need to
	 * take the FS itself readonly cleanly. */
256
	journal = EXT4_SB(sb)->s_journal;
257
258
	if (journal) {
		if (is_journal_aborted(journal)) {
259
			ext4_abort(sb, "Detected aborted journal");
260
261
262
			return ERR_PTR(-EROFS);
		}
		return jbd2_journal_start(journal, nblocks);
263
	}
264
	return ext4_get_nojournal();
265
266
267
268
}

/*
 * The only special thing we need to do here is to make sure that all
269
 * jbd2_journal_stop calls result in the superblock being marked dirty, so
270
271
272
 * that sync() will call the filesystem's write_super callback if
 * appropriate.
 */
273
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
274
275
276
277
278
{
	struct super_block *sb;
	int err;
	int rc;

279
	if (!ext4_handle_valid(handle)) {
280
		ext4_put_nojournal(handle);
281
282
		return 0;
	}
283
284
	sb = handle->h_transaction->t_journal->j_private;
	err = handle->h_err;
285
	rc = jbd2_journal_stop(handle);
286
287
288
289

	if (!err)
		err = rc;
	if (err)
290
		__ext4_std_error(sb, where, line, err);
291
292
293
	return err;
}

294
295
296
void ext4_journal_abort_handle(const char *caller, unsigned int line,
			       const char *err_fn, struct buffer_head *bh,
			       handle_t *handle, int err)
297
298
{
	char nbuf[16];
299
	const char *errstr = ext4_decode_error(NULL, err, nbuf);
300

301
302
	BUG_ON(!ext4_handle_valid(handle));

303
304
305
306
307
308
309
310
311
	if (bh)
		BUFFER_TRACE(bh, "abort");

	if (!handle->h_err)
		handle->h_err = err;

	if (is_handle_aborted(handle))
		return;

312
313
	printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n",
	       caller, line, errstr, err_fn);
314

315
	jbd2_journal_abort_handle(handle);
316
317
}

318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
static void __save_error_info(struct super_block *sb, const char *func,
			    unsigned int line)
{
	struct ext4_super_block *es = EXT4_SB(sb)->s_es;

	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
	es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
	es->s_last_error_time = cpu_to_le32(get_seconds());
	strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
	es->s_last_error_line = cpu_to_le32(line);
	if (!es->s_first_error_time) {
		es->s_first_error_time = es->s_last_error_time;
		strncpy(es->s_first_error_func, func,
			sizeof(es->s_first_error_func));
		es->s_first_error_line = cpu_to_le32(line);
		es->s_first_error_ino = es->s_last_error_ino;
		es->s_first_error_block = es->s_last_error_block;
	}
336
337
338
339
340
341
	/*
	 * Start the daily error reporting function if it hasn't been
	 * started already
	 */
	if (!es->s_error_count)
		mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
342
343
344
345
346
347
348
349
350
351
352
	es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
}

static void save_error_info(struct super_block *sb, const char *func,
			    unsigned int line)
{
	__save_error_info(sb, func, line);
	ext4_commit_super(sb, 1);
}


353
354
355
356
/* Deal with the reporting of failure conditions on a filesystem such as
 * inconsistencies detected or read IO failures.
 *
 * On ext2, we can store the error state of the filesystem in the
357
 * superblock.  That is not possible on ext4, because we may have other
358
359
360
361
362
 * write ordering constraints on the superblock which prevent us from
 * writing it out straight away; and given that the journal is about to
 * be aborted, we can't rely on the current, or future, transactions to
 * write out the superblock safely.
 *
363
 * We'll just use the jbd2_journal_abort() error code to record an error in
364
 * the journal instead.  On recovery, the journal will complain about
365
366
367
 * that error until we've noted it down and cleared it.
 */

368
static void ext4_handle_error(struct super_block *sb)
369
370
371
372
{
	if (sb->s_flags & MS_RDONLY)
		return;

373
	if (!test_opt(sb, ERRORS_CONT)) {
374
		journal_t *journal = EXT4_SB(sb)->s_journal;
375

376
		EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
377
		if (journal)
378
			jbd2_journal_abort(journal, -EIO);
379
	}
380
	if (test_opt(sb, ERRORS_RO)) {
381
		ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
382
383
384
		sb->s_flags |= MS_RDONLY;
	}
	if (test_opt(sb, ERRORS_PANIC))
385
		panic("EXT4-fs (device %s): panic forced after error\n",
386
387
388
			sb->s_id);
}

389
void __ext4_error(struct super_block *sb, const char *function,
390
		  unsigned int line, const char *fmt, ...)
391
{
Joe Perches's avatar
Joe Perches committed
392
	struct va_format vaf;
393
394
395
	va_list args;

	va_start(args, fmt);
Joe Perches's avatar
Joe Perches committed
396
397
398
399
	vaf.fmt = fmt;
	vaf.va = &args;
	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
	       sb->s_id, function, line, current->comm, &vaf);
400
401
	va_end(args);

402
	ext4_handle_error(sb);
403
404
}

405
406
void ext4_error_inode(struct inode *inode, const char *function,
		      unsigned int line, ext4_fsblk_t block,
407
408
409
		      const char *fmt, ...)
{
	va_list args;
410
	struct va_format vaf;
411
	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
412

413
414
415
	es->s_last_error_ino = cpu_to_le32(inode->i_ino);
	es->s_last_error_block = cpu_to_le64(block);
	save_error_info(inode->i_sb, function, line);
416
	va_start(args, fmt);
417
418
	vaf.fmt = fmt;
	vaf.va = &args;
419
420
421
	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
	       inode->i_sb->s_id, function, line, inode->i_ino);
	if (block)
422
423
		printk(KERN_CONT "block %llu: ", block);
	printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf);
424
425
426
427
428
	va_end(args);

	ext4_handle_error(inode->i_sb);
}

429
void ext4_error_file(struct file *file, const char *function,
430
431
		     unsigned int line, ext4_fsblk_t block,
		     const char *fmt, ...)
432
433
{
	va_list args;
434
	struct va_format vaf;
435
	struct ext4_super_block *es;
436
437
438
	struct inode *inode = file->f_dentry->d_inode;
	char pathname[80], *path;

439
440
441
	es = EXT4_SB(inode->i_sb)->s_es;
	es->s_last_error_ino = cpu_to_le32(inode->i_ino);
	save_error_info(inode->i_sb, function, line);
442
	path = d_path(&(file->f_path), pathname, sizeof(pathname));
443
	if (IS_ERR(path))
444
445
		path = "(unknown)";
	printk(KERN_CRIT
446
447
448
449
450
451
452
453
	       "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
	       inode->i_sb->s_id, function, line, inode->i_ino);
	if (block)
		printk(KERN_CONT "block %llu: ", block);
	va_start(args, fmt);
	vaf.fmt = fmt;
	vaf.va = &args;
	printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf);
454
455
456
457
458
	va_end(args);

	ext4_handle_error(inode->i_sb);
}

459
static const char *ext4_decode_error(struct super_block *sb, int errno,
460
461
462
463
464
465
466
467
468
469
470
471
				     char nbuf[16])
{
	char *errstr = NULL;

	switch (errno) {
	case -EIO:
		errstr = "IO failure";
		break;
	case -ENOMEM:
		errstr = "Out of memory";
		break;
	case -EROFS:
472
473
		if (!sb || (EXT4_SB(sb)->s_journal &&
			    EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
			errstr = "Journal has aborted";
		else
			errstr = "Readonly filesystem";
		break;
	default:
		/* If the caller passed in an extra buffer for unknown
		 * errors, textualise them now.  Else we just return
		 * NULL. */
		if (nbuf) {
			/* Check for truncated error codes... */
			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
				errstr = nbuf;
		}
		break;
	}

	return errstr;
}

493
/* __ext4_std_error decodes expected errors from journaling functions
494
495
 * automatically and invokes the appropriate error response.  */

496
497
void __ext4_std_error(struct super_block *sb, const char *function,
		      unsigned int line, int errno)
498
499
500
501
502
503
504
505
506
507
508
{
	char nbuf[16];
	const char *errstr;

	/* Special case: if the error is EROFS, and we're not already
	 * inside a transaction, then there's really no point in logging
	 * an error. */
	if (errno == -EROFS && journal_current_handle() == NULL &&
	    (sb->s_flags & MS_RDONLY))
		return;

509
	errstr = ext4_decode_error(sb, errno, nbuf);
510
511
	printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
	       sb->s_id, function, line, errstr);
512
	save_error_info(sb, function, line);
513

514
	ext4_handle_error(sb);
515
516
517
}

/*
518
 * ext4_abort is a much stronger failure handler than ext4_error.  The
519
520
521
522
523
524
525
526
 * abort function may be used to deal with unrecoverable failures such
 * as journal IO errors or ENOMEM at a critical moment in log management.
 *
 * We unconditionally force the filesystem into an ABORT|READONLY state,
 * unless the error response on the fs has been set to panic in which
 * case we take the easy way out and panic immediately.
 */

527
void __ext4_abort(struct super_block *sb, const char *function,
528
		unsigned int line, const char *fmt, ...)
529
530
531
{
	va_list args;

532
	save_error_info(sb, function, line);
533
	va_start(args, fmt);
534
535
	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
	       function, line);
536
537
538
539
	vprintk(fmt, args);
	printk("\n");
	va_end(args);

540
541
542
543
544
545
546
547
	if ((sb->s_flags & MS_RDONLY) == 0) {
		ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
		sb->s_flags |= MS_RDONLY;
		EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
		if (EXT4_SB(sb)->s_journal)
			jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
		save_error_info(sb, function, line);
	}
548
	if (test_opt(sb, ERRORS_PANIC))
549
		panic("EXT4-fs panic from previous error\n");
550
551
}

Joe Perches's avatar
Joe Perches committed
552
void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
553
{
Joe Perches's avatar
Joe Perches committed
554
	struct va_format vaf;
555
556
557
	va_list args;

	va_start(args, fmt);
Joe Perches's avatar
Joe Perches committed
558
559
560
	vaf.fmt = fmt;
	vaf.va = &args;
	printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
561
562
563
	va_end(args);
}

564
void __ext4_warning(struct super_block *sb, const char *function,
565
		    unsigned int line, const char *fmt, ...)
566
{
Joe Perches's avatar
Joe Perches committed
567
	struct va_format vaf;
568
569
570
	va_list args;

	va_start(args, fmt);
Joe Perches's avatar
Joe Perches committed
571
572
573
574
	vaf.fmt = fmt;
	vaf.va = &args;
	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
	       sb->s_id, function, line, &vaf);
575
576
577
	va_end(args);
}

578
579
580
581
void __ext4_grp_locked_error(const char *function, unsigned int line,
			     struct super_block *sb, ext4_group_t grp,
			     unsigned long ino, ext4_fsblk_t block,
			     const char *fmt, ...)
582
583
584
__releases(bitlock)
__acquires(bitlock)
{
Joe Perches's avatar
Joe Perches committed
585
	struct va_format vaf;
586
587
588
	va_list args;
	struct ext4_super_block *es = EXT4_SB(sb)->s_es;

589
590
591
	es->s_last_error_ino = cpu_to_le32(ino);
	es->s_last_error_block = cpu_to_le64(block);
	__save_error_info(sb, function, line);
Joe Perches's avatar
Joe Perches committed
592

593
	va_start(args, fmt);
Joe Perches's avatar
Joe Perches committed
594
595
596

	vaf.fmt = fmt;
	vaf.va = &args;
597
598
599
	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
	       sb->s_id, function, line, grp);
	if (ino)
Joe Perches's avatar
Joe Perches committed
600
		printk(KERN_CONT "inode %lu: ", ino);
601
	if (block)
Joe Perches's avatar
Joe Perches committed
602
603
		printk(KERN_CONT "block %llu:", (unsigned long long) block);
	printk(KERN_CONT "%pV\n", &vaf);
604
605
606
	va_end(args);

	if (test_opt(sb, ERRORS_CONT)) {
607
		ext4_commit_super(sb, 0);
608
609
		return;
	}
610

611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
	ext4_unlock_group(sb, grp);
	ext4_handle_error(sb);
	/*
	 * We only get here in the ERRORS_RO case; relocking the group
	 * may be dangerous, but nothing bad will happen since the
	 * filesystem will have already been marked read/only and the
	 * journal has been aborted.  We return 1 as a hint to callers
	 * who might what to use the return value from
	 * ext4_grp_locked_error() to distinguish beween the
	 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
	 * aggressively from the ext4 function in question, with a
	 * more appropriate error code.
	 */
	ext4_lock_group(sb, grp);
	return;
}

628
void ext4_update_dynamic_rev(struct super_block *sb)
629
{
630
	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
631

632
	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
633
634
		return;

635
	ext4_warning(sb,
636
637
		     "updating to rev %d because of new feature flag, "
		     "running e2fsck is recommended",
638
		     EXT4_DYNAMIC_REV);
639

640
641
642
	es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
	es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
	es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
643
644
645
646
647
648
649
650
651
652
653
654
655
	/* leave es->s_feature_*compat flags alone */
	/* es->s_uuid will be set by e2fsck if empty */

	/*
	 * The rest of the superblock fields should be zero, and if not it
	 * means they are likely already in use, so leave them alone.  We
	 * can leave it up to e2fsck to clean up any inconsistencies there.
	 */
}

/*
 * Open the external journal device
 */
656
static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
657
658
659
660
{
	struct block_device *bdev;
	char b[BDEVNAME_SIZE];

661
	bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
662
663
664
665
666
	if (IS_ERR(bdev))
		goto fail;
	return bdev;

fail:
667
	ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
668
669
670
671
672
673
674
			__bdevname(dev, b), PTR_ERR(bdev));
	return NULL;
}

/*
 * Release the journal device
 */
675
static int ext4_blkdev_put(struct block_device *bdev)
676
{
677
	return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
678
679
}

680
static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
681
682
683
684
685
686
{
	struct block_device *bdev;
	int ret = -ENODEV;

	bdev = sbi->journal_bdev;
	if (bdev) {
687
		ret = ext4_blkdev_put(bdev);
688
689
690
691
692
693
694
		sbi->journal_bdev = NULL;
	}
	return ret;
}

static inline struct inode *orphan_list_entry(struct list_head *l)
{
695
	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
696
697
}

698
static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
699
700
701
{
	struct list_head *l;

702
703
	ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
		 le32_to_cpu(sbi->s_es->s_last_orphan));
704
705
706
707
708
709
710
711
712
713
714
715

	printk(KERN_ERR "sb_info orphan list:\n");
	list_for_each(l, &sbi->s_orphan) {
		struct inode *inode = orphan_list_entry(l);
		printk(KERN_ERR "  "
		       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
		       inode->i_sb->s_id, inode->i_ino, inode,
		       inode->i_mode, inode->i_nlink,
		       NEXT_ORPHAN(inode));
	}
}

716
static void ext4_put_super(struct super_block *sb)
717
{
718
719
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	struct ext4_super_block *es = sbi->s_es;
720
	int i, err;
721

722
	ext4_unregister_li_request(sb);
723
724
	dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);

725
726
727
	flush_workqueue(sbi->dio_unwritten_wq);
	destroy_workqueue(sbi->dio_unwritten_wq);

728
	lock_super(sb);
729
	if (sb->s_dirt)
730
		ext4_commit_super(sb, 1);
731

732
733
734
735
	if (sbi->s_journal) {
		err = jbd2_journal_destroy(sbi->s_journal);
		sbi->s_journal = NULL;
		if (err < 0)
736
			ext4_abort(sb, "Couldn't clean up the journal");
737
	}
738

739
	del_timer(&sbi->s_err_report);
740
741
742
743
744
	ext4_release_system_zone(sb);
	ext4_mb_release(sb);
	ext4_ext_release(sb);
	ext4_xattr_put_super(sb);

745
	if (!(sb->s_flags & MS_RDONLY)) {
746
		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
747
		es->s_state = cpu_to_le16(sbi->s_mount_state);
748
		ext4_commit_super(sb, 1);
749
	}
750
	if (sbi->s_proc) {
751
		remove_proc_entry(sb->s_id, ext4_proc_root);
752
	}
Theodore Ts'o's avatar
Theodore Ts'o committed
753
	kobject_del(&sbi->s_kobj);
754
755
756
757

	for (i = 0; i < sbi->s_gdb_count; i++)
		brelse(sbi->s_group_desc[i]);
	kfree(sbi->s_group_desc);
758
759
760
761
	if (is_vmalloc_addr(sbi->s_flex_groups))
		vfree(sbi->s_flex_groups);
	else
		kfree(sbi->s_flex_groups);
762
763
764
	percpu_counter_destroy(&sbi->s_freeblocks_counter);
	percpu_counter_destroy(&sbi->s_freeinodes_counter);
	percpu_counter_destroy(&sbi->s_dirs_counter);
765
	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
766
767
768
769
770
771
772
773
774
775
776
777
778
779
	brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA
	for (i = 0; i < MAXQUOTAS; i++)
		kfree(sbi->s_qf_names[i]);
#endif

	/* Debugging code just in case the in-memory inode orphan list
	 * isn't empty.  The on-disk one can be non-empty if we've
	 * detected an error and taken the fs readonly, but the
	 * in-memory list had better be clean by this point. */
	if (!list_empty(&sbi->s_orphan))
		dump_orphan_list(sb, sbi);
	J_ASSERT(list_empty(&sbi->s_orphan));

780
	invalidate_bdev(sb->s_bdev);
781
782
783
784
785
786
787
	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
		/*
		 * Invalidate the journal device's buffers.  We don't want them
		 * floating about in memory - the physical journal device may
		 * hotswapped, and it breaks the `ro-after' testing code.
		 */
		sync_blockdev(sbi->journal_bdev);
788
		invalidate_bdev(sbi->journal_bdev);
789
		ext4_blkdev_remove(sbi);
790
791
	}
	sb->s_fs_info = NULL;
Theodore Ts'o's avatar
Theodore Ts'o committed
792
793
794
795
796
797
798
	/*
	 * Now that we are completely done shutting down the
	 * superblock, we need to actually destroy the kobject.
	 */
	unlock_super(sb);
	kobject_put(&sbi->s_kobj);
	wait_for_completion(&sbi->s_kobj_unregister);
799
	kfree(sbi->s_blockgroup_lock);
800
801
802
	kfree(sbi);
}

803
static struct kmem_cache *ext4_inode_cachep;
804
805
806
807

/*
 * Called inside transaction, so use GFP_NOFS
 */
808
static struct inode *ext4_alloc_inode(struct super_block *sb)
809
{
810
	struct ext4_inode_info *ei;
811

812
	ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
813
814
	if (!ei)
		return NULL;
815

816
	ei->vfs_inode.i_version = 1;
817
	ei->vfs_inode.i_data.writeback_index = 0;
818
	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
819
820
	INIT_LIST_HEAD(&ei->i_prealloc_list);
	spin_lock_init(&ei->i_prealloc_lock);
821
822
823
	ei->i_reserved_data_blocks = 0;
	ei->i_reserved_meta_blocks = 0;
	ei->i_allocated_meta_blocks = 0;
824
	ei->i_da_metadata_calc_len = 0;
825
	spin_lock_init(&(ei->i_block_reservation_lock));
826
827
828
#ifdef CONFIG_QUOTA
	ei->i_reserved_quota = 0;
#endif
829
	ei->jinode = NULL;
830
	INIT_LIST_HEAD(&ei->i_completed_io_list);
831
	spin_lock_init(&ei->i_completed_io_lock);
832
	ei->cur_aio_dio = NULL;
833
834
	ei->i_sync_tid = 0;
	ei->i_datasync_tid = 0;
835
	atomic_set(&ei->i_ioend_count, 0);
836
	atomic_set(&ei->i_aiodio_unwritten, 0);
837

838
839
840
	return &ei->vfs_inode;
}

841
842
843
844
845
846
847
848
static int ext4_drop_inode(struct inode *inode)
{
	int drop = generic_drop_inode(inode);

	trace_ext4_drop_inode(inode, drop);
	return drop;
}

Nick Piggin's avatar
Nick Piggin committed
849
850
851
852
853
854
855
static void ext4_i_callback(struct rcu_head *head)
{
	struct inode *inode = container_of(head, struct inode, i_rcu);
	INIT_LIST_HEAD(&inode->i_dentry);
	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
}

856
static void ext4_destroy_inode(struct inode *inode)
857
{
858
	ext4_ioend_wait(inode);
859
	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
860
861
862
		ext4_msg(inode->i_sb, KERN_ERR,
			 "Inode %lu (%p): orphan list check failed!",
			 inode->i_ino, EXT4_I(inode));
863
864
865
866
867
		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
				EXT4_I(inode), sizeof(struct ext4_inode_info),
				true);
		dump_stack();
	}
Nick Piggin's avatar
Nick Piggin committed
868
	call_rcu(&inode->i_rcu, ext4_i_callback);
869
870
}

871
static void init_once(void *foo)
872
{
873
	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
874

875
	INIT_LIST_HEAD(&ei->i_orphan);
Theodore Ts'o's avatar
Theodore Ts'o committed
876
#ifdef CONFIG_EXT4_FS_XATTR
877
	init_rwsem(&ei->xattr_sem);
878
#endif
879
	init_rwsem(&ei->i_data_sem);
880
	inode_init_once(&ei->vfs_inode);
881
882
883
884
}

static int init_inodecache(void)
{
885
886
	ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
					     sizeof(struct ext4_inode_info),
887
888
					     0, (SLAB_RECLAIM_ACCOUNT|
						SLAB_MEM_SPREAD),
889
					     init_once);
890
	if (ext4_inode_cachep == NULL)
891
892
893
894
895
896
		return -ENOMEM;
	return 0;
}

static void destroy_inodecache(void)
{
897
	kmem_cache_destroy(ext4_inode_cachep);
898
899
}

Al Viro's avatar
Al Viro committed
900
void ext4_clear_inode(struct inode *inode)
901
{
Al Viro's avatar
Al Viro committed
902
903
	invalidate_inode_buffers(inode);
	end_writeback(inode);
904
	dquot_drop(inode);
905
	ext4_discard_preallocations(inode);
906
907
908
909
910
911
	if (EXT4_I(inode)->jinode) {
		jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
					       EXT4_I(inode)->jinode);
		jbd2_free_inode(EXT4_I(inode)->jinode);
		EXT4_I(inode)->jinode = NULL;
	}
912
913
}

914
915
static inline void ext4_show_quota_options(struct seq_file *seq,
					   struct super_block *sb)
916
917
{
#if defined(CONFIG_QUOTA)
918
	struct ext4_sb_info *sbi = EXT4_SB(sb);
919

920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
	if (sbi->s_jquota_fmt) {
		char *fmtname = "";

		switch (sbi->s_jquota_fmt) {
		case QFMT_VFS_OLD:
			fmtname = "vfsold";
			break;
		case QFMT_VFS_V0:
			fmtname = "vfsv0";
			break;
		case QFMT_VFS_V1:
			fmtname = "vfsv1";
			break;
		}
		seq_printf(seq, ",jqfmt=%s", fmtname);
	}
936
937
938
939
940
941
942

	if (sbi->s_qf_names[USRQUOTA])
		seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);

	if (sbi->s_qf_names[GRPQUOTA])
		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);

943
	if (test_opt(sb, USRQUOTA))
944
945
		seq_puts(seq, ",usrquota");

946
	if (test_opt(sb, GRPQUOTA))
947
948
949
950
		seq_puts(seq, ",grpquota");
#endif
}

Miklos Szeredi's avatar
Miklos Szeredi committed
951
952
953
954
955
/*
 * Show an option if
 *  - it's set to a non-default value OR
 *  - if the per-sb default is different from the global default
 */
956
static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
957
{
958
959
	int def_errors;
	unsigned long def_mount_opts;
960
	struct super_block *sb = vfs->mnt_sb;
Miklos Szeredi's avatar
Miklos Szeredi committed
961
962
963
964
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	struct ext4_super_block *es = sbi->s_es;

	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
965
	def_errors     = le16_to_cpu(es->s_errors);
Miklos Szeredi's avatar
Miklos Szeredi committed
966
967
968
969
970

	if (sbi->s_sb_block != 1)
		seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
	if (test_opt(sb, MINIX_DF))
		seq_puts(seq, ",minixdf");
971
	if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
Miklos Szeredi's avatar
Miklos Szeredi committed
972
973
974
975
976
977
978
979
980
981
982
		seq_puts(seq, ",grpid");
	if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
		seq_puts(seq, ",nogrpid");
	if (sbi->s_resuid != EXT4_DEF_RESUID ||
	    le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) {
		seq_printf(seq, ",resuid=%u", sbi->s_resuid);
	}
	if (sbi->s_resgid != EXT4_DEF_RESGID ||
	    le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
		seq_printf(seq, ",resgid=%u", sbi->s_resgid);
	}
983
	if (test_opt(sb, ERRORS_RO)) {
Miklos Szeredi's avatar
Miklos Szeredi committed
984
		if (def_errors == EXT4_ERRORS_PANIC ||
985
986
		    def_errors == EXT4_ERRORS_CONTINUE) {
			seq_puts(seq, ",errors=remount-ro");
Miklos Szeredi's avatar
Miklos Szeredi committed
987
988
		}
	}
989
	if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
990
		seq_puts(seq, ",errors=continue");
991
	if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
Miklos Szeredi's avatar
Miklos Szeredi committed
992
		seq_puts(seq, ",errors=panic");
993
	if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
Miklos Szeredi's avatar
Miklos Szeredi committed
994
		seq_puts(seq, ",nouid32");
995
	if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
Miklos Szeredi's avatar
Miklos Szeredi committed
996
997
998
		seq_puts(seq, ",debug");
	if (test_opt(sb, OLDALLOC))
		seq_puts(seq, ",oldalloc");
Theodore Ts'o's avatar
Theodore Ts'o committed
999
#ifdef CONFIG_EXT4_FS_XATTR
1000
1001
	if (test_opt(sb, XATTR_USER) &&
		!(def_mount_opts & EXT4_DEFM_XATTR_USER))
Miklos Szeredi's avatar
Miklos Szeredi committed
1002
1003
1004
1005
1006
1007
		seq_puts(seq, ",user_xattr");
	if (!test_opt(sb, XATTR_USER) &&
	    (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
		seq_puts(seq, ",nouser_xattr");
	}
#endif
Theodore Ts'o's avatar
Theodore Ts'o committed
1008
#ifdef CONFIG_EXT4_FS_POSIX_ACL
1009
	if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
Miklos Szeredi's avatar
Miklos Szeredi committed
1010
1011
1012
1013
		seq_puts(seq, ",acl");
	if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
		seq_puts(seq, ",noacl");
#endif
1014
	if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
Miklos Szeredi's avatar
Miklos Szeredi committed
1015
1016
1017
		seq_printf(seq, ",commit=%u",
			   (unsigned) (sbi->s_commit_interval / HZ));
	}
1018
1019
1020
1021
1022
1023
1024
1025
1026
	if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
		seq_printf(seq, ",min_batch_time=%u",
			   (unsigned) sbi->s_min_batch_time);
	}
	if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
		seq_printf(seq, ",max_batch_time=%u",
			   (unsigned) sbi->s_min_batch_time);
	}

1027
1028
1029
1030
1031
1032
1033
	/*
	 * We're changing the default of barrier mount option, so
	 * let's always display its mount state so it's clear what its
	 * status is.
	 */
	seq_puts(seq, ",barrier=");
	seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
1034
1035
	if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
		seq_puts(seq, ",journal_async_commit");
Jan Kara's avatar
Jan Kara committed
1036
1037
	else if (test_opt(sb, JOURNAL_CHECKSUM))
		seq_puts(seq, ",journal_checksum");
1038
1039
	if (test_opt(sb, I_VERSION))
		seq_puts(seq, ",i_version");
1040
1041
	if (!test_opt(sb, DELALLOC) &&
	    !(def_mount_opts & EXT4_DEFM_NODELALLOC))
1042
1043
		seq_puts(seq, ",nodelalloc");

1044
1045
	if (test_opt(sb, MBLK_IO_SUBMIT))
		seq_puts(seq, ",mblk_io_submit");
1046
1047
	if (sbi->s_stripe)
		seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
1048
1049
1050
1051
	/*
	 * journal mode get enabled in different ways
	 * So just print the value even if we didn't specify it
	 */
1052
	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
1053
		seq_puts(seq, ",data=journal");
1054
	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
1055
		seq_puts(seq, ",data=ordered");
1056
	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
1057
1058
		seq_puts(seq, ",data=writeback");

1059
1060
1061
1062
	if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
		seq_printf(seq, ",inode_readahead_blks=%u",
			   sbi->s_inode_readahead_blks);

1063
1064
1065
	if (test_opt(sb, DATA_ERR_ABORT))
		seq_puts(seq, ",data_err=abort");

1066
	if (test_opt(sb, NO_AUTO_DA_ALLOC))
Theodore Ts'o's avatar
Theodore Ts'o committed
1067
		seq_puts(seq, ",noauto_da_alloc");
1068

1069
	if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD))
1070
1071
		seq_puts(seq, ",discard");

1072
1073
1074
	if (test_opt(sb, NOLOAD))