super.c 51.9 KB
Newer Older
Chris Mason's avatar
Chris Mason committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Yan's avatar
Yan committed
19
#include <linux/blkdev.h>
20
#include <linux/module.h>
Chris Mason's avatar
Chris Mason committed
21
#include <linux/buffer_head.h>
22
23
24
25
26
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
27
#include <linux/seq_file.h>
28
29
#include <linux/string.h>
#include <linux/backing-dev.h>
Yan's avatar
Yan committed
30
#include <linux/mount.h>
Chris Mason's avatar
Chris Mason committed
31
#include <linux/mpage.h>
Chris Mason's avatar
Chris Mason committed
32
33
#include <linux/swap.h>
#include <linux/writeback.h>
Chris Mason's avatar
Chris Mason committed
34
#include <linux/statfs.h>
Chris Mason's avatar
Chris Mason committed
35
#include <linux/compat.h>
36
#include <linux/parser.h>
37
#include <linux/ctype.h>
38
#include <linux/namei.h>
39
#include <linux/miscdevice.h>
40
#include <linux/magic.h>
41
#include <linux/slab.h>
42
#include <linux/cleancache.h>
43
#include <linux/ratelimit.h>
44
#include <linux/btrfs.h>
45
#include "delayed-inode.h"
46
#include "ctree.h"
Chris Mason's avatar
Chris Mason committed
47
#include "disk-io.h"
48
#include "transaction.h"
Chris Mason's avatar
Chris Mason committed
49
#include "btrfs_inode.h"
Chris Mason's avatar
Chris Mason committed
50
#include "print-tree.h"
51
#include "hash.h"
52
#include "props.h"
Josef Bacik's avatar
Josef Bacik committed
53
#include "xattr.h"
54
#include "volumes.h"
Balaji Rao's avatar
Balaji Rao committed
55
#include "export.h"
56
#include "compression.h"
Josef Bacik's avatar
Josef Bacik committed
57
#include "rcu-string.h"
58
#include "dev-replace.h"
59
#include "free-space-cache.h"
60
#include "backref.h"
61
#include "tests/btrfs-tests.h"
62

63
64
65
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>

66
static const struct super_operations btrfs_super_ops;
67
static struct file_system_type btrfs_fs_type;
Chris Mason's avatar
Chris Mason committed
68

69
static const char *btrfs_decode_error(int errno)
70
{
71
	char *errstr = "unknown";
72
73
74
75
76
77
78
79
80
81
82

	switch (errno) {
	case -EIO:
		errstr = "IO failure";
		break;
	case -ENOMEM:
		errstr = "Out of memory";
		break;
	case -EROFS:
		errstr = "Readonly filesystem";
		break;
Jeff Mahoney's avatar
Jeff Mahoney committed
83
84
85
	case -EEXIST:
		errstr = "Object already exists";
		break;
86
87
88
89
90
91
	case -ENOSPC:
		errstr = "No space left";
		break;
	case -ENOENT:
		errstr = "No such entry";
		break;
92
93
94
95
96
	}

	return errstr;
}

97
static void save_error_info(struct btrfs_fs_info *fs_info)
98
99
100
101
102
{
	/*
	 * today we only save the error info into ram.  Long term we'll
	 * also send it down to the disk
	 */
103
	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
104
105
106
107
108
109
110
111
112
113
}

/* btrfs handle error by forcing the filesystem readonly */
static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
{
	struct super_block *sb = fs_info->sb;

	if (sb->s_flags & MS_RDONLY)
		return;

114
	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
115
		sb->s_flags |= MS_RDONLY;
116
		btrfs_info(fs_info, "forced readonly");
117
118
119
120
121
122
123
124
125
126
		/*
		 * Note that a running device replace operation is not
		 * canceled here although there is no way to update
		 * the progress. It would add the risk of a deadlock,
		 * therefore the canceling is ommited. The only penalty
		 * is that some I/O remains active until the procedure
		 * completes. The next time when the filesystem is
		 * mounted writeable again, the device replace
		 * operation continues.
		 */
127
128
129
	}
}

130
#ifdef CONFIG_PRINTK
131
132
133
134
135
/*
 * __btrfs_std_error decodes expected errors from the caller and
 * invokes the approciate error response.
 */
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
136
		       unsigned int line, int errno, const char *fmt, ...)
137
138
139
140
141
142
143
144
145
{
	struct super_block *sb = fs_info->sb;
	const char *errstr;

	/*
	 * Special case: if the error is EROFS, and we're already
	 * under MS_RDONLY, then it is safe here.
	 */
	if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
146
147
  		return;

148
	errstr = btrfs_decode_error(errno);
149
	if (fmt) {
150
151
152
153
154
155
		struct va_format vaf;
		va_list args;

		va_start(args, fmt);
		vaf.fmt = fmt;
		vaf.va = &args;
156

157
158
		printk(KERN_CRIT
			"BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
159
			sb->s_id, function, line, errno, errstr, &vaf);
160
		va_end(args);
161
	} else {
162
		printk(KERN_CRIT "BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
163
			sb->s_id, function, line, errno, errstr);
164
	}
165

166
	/* Don't go through full error handling during mount */
167
168
	save_error_info(fs_info);
	if (sb->s_flags & MS_BORN)
169
170
		btrfs_handle_error(fs_info);
}
171

172
static const char * const logtypes[] = {
173
174
175
176
177
178
179
180
181
182
	"emergency",
	"alert",
	"critical",
	"error",
	"warning",
	"notice",
	"info",
	"debug",
};

183
void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
184
185
186
187
188
189
{
	struct super_block *sb = fs_info->sb;
	char lvl[4];
	struct va_format vaf;
	va_list args;
	const char *type = logtypes[4];
190
	int kern_level;
191
192
193

	va_start(args, fmt);

194
195
196
197
198
199
200
	kern_level = printk_get_level(fmt);
	if (kern_level) {
		size_t size = printk_skip_level(fmt) - fmt;
		memcpy(lvl, fmt,  size);
		lvl[size] = '\0';
		fmt += size;
		type = logtypes[kern_level - '0'];
201
202
203
204
205
	} else
		*lvl = '\0';

	vaf.fmt = fmt;
	vaf.va = &args;
206

207
	printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230

	va_end(args);
}

#else

void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
		       unsigned int line, int errno, const char *fmt, ...)
{
	struct super_block *sb = fs_info->sb;

	/*
	 * Special case: if the error is EROFS, and we're already
	 * under MS_RDONLY, then it is safe here.
	 */
	if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
		return;

	/* Don't go through full error handling during mount */
	if (sb->s_flags & MS_BORN) {
		save_error_info(fs_info);
		btrfs_handle_error(fs_info);
	}
231
}
232
#endif
233

234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
/*
 * We only mark the transaction aborted and then set the file system read-only.
 * This will prevent new transactions from starting or trying to join this
 * one.
 *
 * This means that error recovery at the call site is limited to freeing
 * any local memory allocations and passing the error code up without
 * further cleanup. The transaction should complete as it normally would
 * in the call path but will return -EIO.
 *
 * We'll complete the cleanup in btrfs_end_transaction and
 * btrfs_commit_transaction.
 */
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root, const char *function,
			       unsigned int line, int errno)
{
251
252
253
254
255
	/*
	 * Report first abort since mount
	 */
	if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,
				&root->fs_info->fs_state)) {
256
		WARN(1, KERN_DEBUG "BTRFS: Transaction aborted (error %d)\n",
257
258
				errno);
	}
259
260
261
262
	trans->aborted = errno;
	/* Nothing used. The other threads that have joined this
	 * transaction may be able to continue. */
	if (!trans->blocks_used) {
263
264
		const char *errstr;

265
		errstr = btrfs_decode_error(errno);
266
267
268
		btrfs_warn(root->fs_info,
		           "%s:%d: Aborting unused transaction(%s).",
		           function, line, errstr);
269
		return;
270
	}
271
	ACCESS_ONCE(trans->transaction->aborted) = errno;
272
273
274
	/* Wake up anybody who may be waiting on this transaction */
	wake_up(&root->fs_info->transaction_wait);
	wake_up(&root->fs_info->transaction_blocked_wait);
275
276
	__btrfs_std_error(root->fs_info, function, line, errno, NULL);
}
Jeff Mahoney's avatar
Jeff Mahoney committed
277
278
279
280
281
282
283
284
285
286
287
/*
 * __btrfs_panic decodes unexpected, fatal errors from the caller,
 * issues an alert, and either panics or BUGs, depending on mount options.
 */
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
		   unsigned int line, int errno, const char *fmt, ...)
{
	char *s_id = "<unknown>";
	const char *errstr;
	struct va_format vaf = { .fmt = fmt };
	va_list args;
288

Jeff Mahoney's avatar
Jeff Mahoney committed
289
290
	if (fs_info)
		s_id = fs_info->sb->s_id;
291

Jeff Mahoney's avatar
Jeff Mahoney committed
292
293
294
	va_start(args, fmt);
	vaf.va = &args;

295
	errstr = btrfs_decode_error(errno);
296
	if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
297
298
		panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
			s_id, function, line, &vaf, errno, errstr);
Jeff Mahoney's avatar
Jeff Mahoney committed
299

300
301
	btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
		   function, line, &vaf, errno, errstr);
Jeff Mahoney's avatar
Jeff Mahoney committed
302
303
	va_end(args);
	/* Caller calls BUG() */
304
305
}

306
static void btrfs_put_super(struct super_block *sb)
Chris Mason's avatar
Chris Mason committed
307
{
308
	(void)close_ctree(btrfs_sb(sb)->tree_root);
Al Viro's avatar
Al Viro committed
309
310
311
312
313
314
	/* FIXME: need to fix VFS to return error? */
	/* AV: return it _where_?  ->put_super() can be triggered by any number
	 * of async events, up to and including delivery of SIGKILL to the
	 * last process that kept it busy.  Or segfault in the aforementioned
	 * process...  Whom would you report that to?
	 */
Chris Mason's avatar
Chris Mason committed
315
316
}

317
enum {
318
	Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
319
320
	Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
	Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
321
322
	Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
323
	Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
324
325
	Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
	Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
326
	Opt_check_integrity, Opt_check_integrity_including_extent_data,
327
	Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
Qu Wenruo's avatar
Qu Wenruo committed
328
	Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
Qu Wenruo's avatar
Qu Wenruo committed
329
	Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
330
	Opt_datasum, Opt_treelog, Opt_noinode_cache,
331
	Opt_err,
332
333
334
};

static match_table_t tokens = {
335
	{Opt_degraded, "degraded"},
336
	{Opt_subvol, "subvol=%s"},
337
	{Opt_subvolid, "subvolid=%s"},
338
	{Opt_device, "device=%s"},
339
	{Opt_nodatasum, "nodatasum"},
Qu Wenruo's avatar
Qu Wenruo committed
340
	{Opt_datasum, "datasum"},
341
	{Opt_nodatacow, "nodatacow"},
Qu Wenruo's avatar
Qu Wenruo committed
342
	{Opt_datacow, "datacow"},
343
	{Opt_nobarrier, "nobarrier"},
344
	{Opt_barrier, "barrier"},
345
	{Opt_max_inline, "max_inline=%s"},
346
	{Opt_alloc_start, "alloc_start=%s"},
347
	{Opt_thread_pool, "thread_pool=%d"},
348
	{Opt_compress, "compress"},
349
	{Opt_compress_type, "compress=%s"},
350
	{Opt_compress_force, "compress-force"},
351
	{Opt_compress_force_type, "compress-force=%s"},
352
	{Opt_ssd, "ssd"},
353
	{Opt_ssd_spread, "ssd_spread"},
Chris Mason's avatar
Chris Mason committed
354
	{Opt_nossd, "nossd"},
Qu Wenruo's avatar
Qu Wenruo committed
355
	{Opt_acl, "acl"},
Josef Bacik's avatar
Josef Bacik committed
356
	{Opt_noacl, "noacl"},
Sage Weil's avatar
Sage Weil committed
357
	{Opt_notreelog, "notreelog"},
Qu Wenruo's avatar
Qu Wenruo committed
358
	{Opt_treelog, "treelog"},
359
	{Opt_flushoncommit, "flushoncommit"},
360
	{Opt_noflushoncommit, "noflushoncommit"},
361
	{Opt_ratio, "metadata_ratio=%d"},
362
	{Opt_discard, "discard"},
Qu Wenruo's avatar
Qu Wenruo committed
363
	{Opt_nodiscard, "nodiscard"},
364
	{Opt_space_cache, "space_cache"},
365
	{Opt_clear_cache, "clear_cache"},
366
	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
367
	{Opt_enospc_debug, "enospc_debug"},
368
	{Opt_noenospc_debug, "noenospc_debug"},
369
	{Opt_subvolrootid, "subvolrootid=%d"},
Chris Mason's avatar
Chris Mason committed
370
	{Opt_defrag, "autodefrag"},
371
	{Opt_nodefrag, "noautodefrag"},
Chris Mason's avatar
Chris Mason committed
372
	{Opt_inode_cache, "inode_cache"},
373
	{Opt_noinode_cache, "noinode_cache"},
374
	{Opt_no_space_cache, "nospace_cache"},
375
	{Opt_recovery, "recovery"},
376
	{Opt_skip_balance, "skip_balance"},
377
378
379
	{Opt_check_integrity, "check_int"},
	{Opt_check_integrity_including_extent_data, "check_int_data"},
	{Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
380
	{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
Jeff Mahoney's avatar
Jeff Mahoney committed
381
	{Opt_fatal_errors, "fatal_errors=%s"},
382
	{Opt_commit_interval, "commit=%d"},
Josef Bacik's avatar
Josef Bacik committed
383
	{Opt_err, NULL},
384
385
};

386
387
388
/*
 * Regular mount options parser.  Everything that is needed only when
 * reading in a new superblock is parsed here.
389
 * XXX JDM: This needs to be cleaned up for remount.
390
391
 */
int btrfs_parse_options(struct btrfs_root *root, char *options)
392
{
393
	struct btrfs_fs_info *info = root->fs_info;
394
	substring_t args[MAX_OPT_ARGS];
395
396
	char *p, *num, *orig = NULL;
	u64 cache_gen;
397
	int intarg;
398
	int ret = 0;
399
400
	char *compress_type;
	bool compress_force = false;
401

402
	cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
403
404
405
	if (cache_gen)
		btrfs_set_opt(info->mount_opt, SPACE_CACHE);

406
	if (!options)
407
		goto out;
408

409
410
411
412
413
414
415
416
	/*
	 * strsep changes the string, duplicate it because parse_options
	 * gets called twice
	 */
	options = kstrdup(options, GFP_NOFS);
	if (!options)
		return -ENOMEM;

417
	orig = options;
418

419
	while ((p = strsep(&options, ",")) != NULL) {
420
421
422
423
424
425
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
426
		case Opt_degraded:
427
			btrfs_info(root->fs_info, "allowing degraded mounts");
428
			btrfs_set_opt(info->mount_opt, DEGRADED);
429
			break;
430
		case Opt_subvol:
431
		case Opt_subvolid:
432
		case Opt_subvolrootid:
433
		case Opt_device:
434
			/*
435
			 * These are parsed by btrfs_parse_early_options
436
437
			 * and can be happily ignored here.
			 */
438
439
			break;
		case Opt_nodatasum:
440
			btrfs_info(root->fs_info, "setting nodatasum");
441
			btrfs_set_opt(info->mount_opt, NODATASUM);
442
			break;
Qu Wenruo's avatar
Qu Wenruo committed
443
444
445
446
447
448
449
450
		case Opt_datasum:
			if (btrfs_test_opt(root, NODATACOW))
				btrfs_info(root->fs_info, "setting datasum, datacow enabled");
			else
				btrfs_info(root->fs_info, "setting datasum");
			btrfs_clear_opt(info->mount_opt, NODATACOW);
			btrfs_clear_opt(info->mount_opt, NODATASUM);
			break;
451
		case Opt_nodatacow:
452
453
			if (!btrfs_test_opt(root, COMPRESS) ||
				!btrfs_test_opt(root, FORCE_COMPRESS)) {
454
455
					btrfs_info(root->fs_info,
						"setting nodatacow, compression disabled");
456
			} else {
457
				btrfs_info(root->fs_info, "setting nodatacow");
458
459
460
			}
			btrfs_clear_opt(info->mount_opt, COMPRESS);
			btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
461
462
			btrfs_set_opt(info->mount_opt, NODATACOW);
			btrfs_set_opt(info->mount_opt, NODATASUM);
463
			break;
Qu Wenruo's avatar
Qu Wenruo committed
464
465
466
467
468
		case Opt_datacow:
			if (btrfs_test_opt(root, NODATACOW))
				btrfs_info(root->fs_info, "setting datacow");
			btrfs_clear_opt(info->mount_opt, NODATACOW);
			break;
469
		case Opt_compress_force:
470
471
		case Opt_compress_force_type:
			compress_force = true;
472
			/* Fallthrough */
473
474
475
476
477
478
479
		case Opt_compress:
		case Opt_compress_type:
			if (token == Opt_compress ||
			    token == Opt_compress_force ||
			    strcmp(args[0].from, "zlib") == 0) {
				compress_type = "zlib";
				info->compress_type = BTRFS_COMPRESS_ZLIB;
480
				btrfs_set_opt(info->mount_opt, COMPRESS);
481
482
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
Li Zefan's avatar
Li Zefan committed
483
484
485
			} else if (strcmp(args[0].from, "lzo") == 0) {
				compress_type = "lzo";
				info->compress_type = BTRFS_COMPRESS_LZO;
486
				btrfs_set_opt(info->mount_opt, COMPRESS);
487
488
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
489
				btrfs_set_fs_incompat(info, COMPRESS_LZO);
490
491
492
493
494
			} else if (strncmp(args[0].from, "no", 2) == 0) {
				compress_type = "no";
				btrfs_clear_opt(info->mount_opt, COMPRESS);
				btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
				compress_force = false;
495
496
497
498
499
500
501
			} else {
				ret = -EINVAL;
				goto out;
			}

			if (compress_force) {
				btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
502
				btrfs_info(root->fs_info, "force %s compression",
503
					compress_type);
504
			} else if (btrfs_test_opt(root, COMPRESS)) {
505
506
				pr_info("btrfs: use %s compression\n",
					compress_type);
507
			}
508
			break;
509
		case Opt_ssd:
510
			btrfs_info(root->fs_info, "use ssd allocation scheme");
511
			btrfs_set_opt(info->mount_opt, SSD);
512
			break;
513
		case Opt_ssd_spread:
514
			btrfs_info(root->fs_info, "use spread ssd allocation scheme");
515
516
517
			btrfs_set_opt(info->mount_opt, SSD);
			btrfs_set_opt(info->mount_opt, SSD_SPREAD);
			break;
Chris Mason's avatar
Chris Mason committed
518
		case Opt_nossd:
519
			btrfs_info(root->fs_info, "not using ssd allocation scheme");
Chris Mason's avatar
Chris Mason committed
520
			btrfs_set_opt(info->mount_opt, NOSSD);
Chris Mason's avatar
Chris Mason committed
521
			btrfs_clear_opt(info->mount_opt, SSD);
522
			btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
Chris Mason's avatar
Chris Mason committed
523
			break;
524
525
526
527
528
		case Opt_barrier:
			if (btrfs_test_opt(root, NOBARRIER))
				btrfs_info(root->fs_info, "turning on barriers");
			btrfs_clear_opt(info->mount_opt, NOBARRIER);
			break;
529
		case Opt_nobarrier:
530
			btrfs_info(root->fs_info, "turning off barriers");
531
			btrfs_set_opt(info->mount_opt, NOBARRIER);
532
			break;
533
		case Opt_thread_pool:
534
535
536
537
			ret = match_int(&args[0], &intarg);
			if (ret) {
				goto out;
			} else if (intarg > 0) {
538
				info->thread_pool_size = intarg;
539
540
541
542
			} else {
				ret = -EINVAL;
				goto out;
			}
543
			break;
544
		case Opt_max_inline:
545
546
			num = match_strdup(&args[0]);
			if (num) {
Akinobu Mita's avatar
Akinobu Mita committed
547
				info->max_inline = memparse(num, NULL);
548
549
				kfree(num);

Chris Mason's avatar
Chris Mason committed
550
551
552
553
554
				if (info->max_inline) {
					info->max_inline = max_t(u64,
						info->max_inline,
						root->sectorsize);
				}
555
				btrfs_info(root->fs_info, "max_inline at %llu",
556
					info->max_inline);
557
558
559
			} else {
				ret = -ENOMEM;
				goto out;
560
561
			}
			break;
562
		case Opt_alloc_start:
563
564
			num = match_strdup(&args[0]);
			if (num) {
Miao Xie's avatar
Miao Xie committed
565
				mutex_lock(&info->chunk_mutex);
Akinobu Mita's avatar
Akinobu Mita committed
566
				info->alloc_start = memparse(num, NULL);
Miao Xie's avatar
Miao Xie committed
567
				mutex_unlock(&info->chunk_mutex);
568
				kfree(num);
569
				btrfs_info(root->fs_info, "allocations start at %llu",
570
					info->alloc_start);
571
572
573
			} else {
				ret = -ENOMEM;
				goto out;
574
575
			}
			break;
Qu Wenruo's avatar
Qu Wenruo committed
576
577
578
		case Opt_acl:
			root->fs_info->sb->s_flags |= MS_POSIXACL;
			break;
Josef Bacik's avatar
Josef Bacik committed
579
580
581
		case Opt_noacl:
			root->fs_info->sb->s_flags &= ~MS_POSIXACL;
			break;
Sage Weil's avatar
Sage Weil committed
582
		case Opt_notreelog:
583
			btrfs_info(root->fs_info, "disabling tree log");
Sage Weil's avatar
Sage Weil committed
584
			btrfs_set_opt(info->mount_opt, NOTREELOG);
Qu Wenruo's avatar
Qu Wenruo committed
585
586
587
588
589
			break;
		case Opt_treelog:
			if (btrfs_test_opt(root, NOTREELOG))
				btrfs_info(root->fs_info, "enabling tree log");
			btrfs_clear_opt(info->mount_opt, NOTREELOG);
Sage Weil's avatar
Sage Weil committed
590
			break;
591
		case Opt_flushoncommit:
592
			btrfs_info(root->fs_info, "turning on flush-on-commit");
593
594
			btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
			break;
595
596
597
598
599
		case Opt_noflushoncommit:
			if (btrfs_test_opt(root, FLUSHONCOMMIT))
				btrfs_info(root->fs_info, "turning off flush-on-commit");
			btrfs_clear_opt(info->mount_opt, FLUSHONCOMMIT);
			break;
600
		case Opt_ratio:
601
602
603
604
			ret = match_int(&args[0], &intarg);
			if (ret) {
				goto out;
			} else if (intarg >= 0) {
605
				info->metadata_ratio = intarg;
606
				btrfs_info(root->fs_info, "metadata ratio %d",
607
				       info->metadata_ratio);
608
609
610
			} else {
				ret = -EINVAL;
				goto out;
611
612
			}
			break;
613
614
615
		case Opt_discard:
			btrfs_set_opt(info->mount_opt, DISCARD);
			break;
Qu Wenruo's avatar
Qu Wenruo committed
616
617
618
		case Opt_nodiscard:
			btrfs_clear_opt(info->mount_opt, DISCARD);
			break;
619
620
		case Opt_space_cache:
			btrfs_set_opt(info->mount_opt, SPACE_CACHE);
621
			break;
622
623
624
		case Opt_rescan_uuid_tree:
			btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
			break;
625
		case Opt_no_space_cache:
626
			btrfs_info(root->fs_info, "disabling disk space caching");
627
628
			btrfs_clear_opt(info->mount_opt, SPACE_CACHE);
			break;
Chris Mason's avatar
Chris Mason committed
629
		case Opt_inode_cache:
630
			btrfs_info(root->fs_info, "enabling inode map caching");
631
632
633
634
635
636
			btrfs_set_opt(info->mount_opt, CHANGE_INODE_CACHE);
			break;
		case Opt_noinode_cache:
			if (btrfs_test_opt(root, CHANGE_INODE_CACHE))
				btrfs_info(root->fs_info, "disabling inode map caching");
			btrfs_clear_opt(info->mount_opt, CHANGE_INODE_CACHE);
Chris Mason's avatar
Chris Mason committed
637
			break;
638
		case Opt_clear_cache:
639
			btrfs_info(root->fs_info, "force clearing of disk cache");
640
			btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
641
			break;
642
643
644
		case Opt_user_subvol_rm_allowed:
			btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
			break;
645
646
647
		case Opt_enospc_debug:
			btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
648
649
650
		case Opt_noenospc_debug:
			btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
Chris Mason's avatar
Chris Mason committed
651
		case Opt_defrag:
652
			btrfs_info(root->fs_info, "enabling auto defrag");
Chris Mason's avatar
Chris Mason committed
653
654
			btrfs_set_opt(info->mount_opt, AUTO_DEFRAG);
			break;
655
656
657
658
659
		case Opt_nodefrag:
			if (btrfs_test_opt(root, AUTO_DEFRAG))
				btrfs_info(root->fs_info, "disabling auto defrag");
			btrfs_clear_opt(info->mount_opt, AUTO_DEFRAG);
			break;
660
		case Opt_recovery:
661
			btrfs_info(root->fs_info, "enabling auto recovery");
662
663
			btrfs_set_opt(info->mount_opt, RECOVERY);
			break;
664
665
666
		case Opt_skip_balance:
			btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
			break;
667
668
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
		case Opt_check_integrity_including_extent_data:
669
670
			btrfs_info(root->fs_info,
				   "enabling check integrity including extent data");
671
672
673
674
675
			btrfs_set_opt(info->mount_opt,
				      CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
			break;
		case Opt_check_integrity:
676
			btrfs_info(root->fs_info, "enabling check integrity");
677
678
679
			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
			break;
		case Opt_check_integrity_print_mask:
680
681
682
683
			ret = match_int(&args[0], &intarg);
			if (ret) {
				goto out;
			} else if (intarg >= 0) {
684
				info->check_integrity_print_mask = intarg;
685
				btrfs_info(root->fs_info, "check_integrity_print_mask 0x%x",
686
				       info->check_integrity_print_mask);
687
688
689
			} else {
				ret = -EINVAL;
				goto out;
690
691
692
693
694
695
			}
			break;
#else
		case Opt_check_integrity_including_extent_data:
		case Opt_check_integrity:
		case Opt_check_integrity_print_mask:
696
697
			btrfs_err(root->fs_info,
				"support for check_integrity* not compiled in!");
698
699
700
			ret = -EINVAL;
			goto out;
#endif
Jeff Mahoney's avatar
Jeff Mahoney committed
701
702
703
704
705
706
707
708
709
710
711
712
		case Opt_fatal_errors:
			if (strcmp(args[0].from, "panic") == 0)
				btrfs_set_opt(info->mount_opt,
					      PANIC_ON_FATAL_ERROR);
			else if (strcmp(args[0].from, "bug") == 0)
				btrfs_clear_opt(info->mount_opt,
					      PANIC_ON_FATAL_ERROR);
			else {
				ret = -EINVAL;
				goto out;
			}
			break;
713
714
715
716
		case Opt_commit_interval:
			intarg = 0;
			ret = match_int(&args[0], &intarg);
			if (ret < 0) {
717
				btrfs_err(root->fs_info, "invalid commit interval");
718
719
720
721
722
				ret = -EINVAL;
				goto out;
			}
			if (intarg > 0) {
				if (intarg > 300) {
723
					btrfs_warn(root->fs_info, "excessive commit interval %d",
724
725
726
727
							intarg);
				}
				info->commit_interval = intarg;
			} else {
728
				btrfs_info(root->fs_info, "using default commit interval %ds",
729
730
731
732
				    BTRFS_DEFAULT_COMMIT_INTERVAL);
				info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
			}
			break;
733
		case Opt_err:
734
			btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
735
736
			ret = -EINVAL;
			goto out;
737
		default:
738
			break;
739
740
		}
	}
741
out:
742
	if (!ret && btrfs_test_opt(root, SPACE_CACHE))
743
		btrfs_info(root->fs_info, "disk space caching is enabled");
744
	kfree(orig);
745
	return ret;
746
747
748
749
750
751
752
753
}

/*
 * Parse mount options that are required early in the mount process.
 *
 * All other options will be parsed on much later in the mount process and
 * only when we need to allocate a new super block.
 */
754
static int btrfs_parse_early_options(const char *options, fmode_t flags,
755
		void *holder, char **subvol_name, u64 *subvol_objectid,
756
		struct btrfs_fs_devices **fs_devices)
757
758
{
	substring_t args[MAX_OPT_ARGS];
759
	char *device_name, *opts, *orig, *p;
760
	char *num = NULL;
761
762
763
	int error = 0;

	if (!options)
764
		return 0;
765
766
767
768
769
770
771
772

	/*
	 * strsep changes the string, duplicate it because parse_options
	 * gets called twice
	 */
	opts = kstrdup(options, GFP_KERNEL);
	if (!opts)
		return -ENOMEM;
773
	orig = opts;
774
775
776
777
778
779
780
781
782

	while ((p = strsep(&opts, ",")) != NULL) {
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
		case Opt_subvol:
783
			kfree(*subvol_name);
784
			*subvol_name = match_strdup(&args[0]);
785
786
787
788
			if (!*subvol_name) {
				error = -ENOMEM;
				goto out;
			}
789
			break;
790
		case Opt_subvolid:
791
792
793
794
			num = match_strdup(&args[0]);
			if (num) {
				*subvol_objectid = memparse(num, NULL);
				kfree(num);
795
				/* we want the original fs_tree */
796
				if (!*subvol_objectid)
797
798
					*subvol_objectid =
						BTRFS_FS_TREE_OBJECTID;
799
800
801
			} else {
				error = -EINVAL;
				goto out;
802
			}
803
			break;
804
		case Opt_subvolrootid:
805
			printk(KERN_WARNING
806
807
				"BTRFS: 'subvolrootid' mount option is deprecated and has "
				"no effect\n");
808
			break;
809
		case Opt_device:
810
811
812
813
814
815
			device_name = match_strdup(&args[0]);
			if (!device_name) {
				error = -ENOMEM;
				goto out;
			}
			error = btrfs_scan_one_device(device_name,
816
					flags, holder, fs_devices);
817
			kfree(device_name);
818
			if (error)
819
				goto out;
820
			break;
821
822
823
824
825
		default:
			break;
		}
	}

826
out:
827
	kfree(orig);
828
	return error;
829
830
}

831
832
833
static struct dentry *get_default_root(struct super_block *sb,
				       u64 subvol_objectid)
{
834
835
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
	struct btrfs_root *new_root;
	struct btrfs_dir_item *di;
	struct btrfs_path *path;
	struct btrfs_key location;
	struct inode *inode;
	u64 dir_id;
	int new = 0;

	/*
	 * We have a specific subvol we want to mount, just setup location and
	 * go look up the root.
	 */
	if (subvol_objectid) {
		location.objectid = subvol_objectid;
		location.type = BTRFS_ROOT_ITEM_KEY;
		location.offset = (u64)-1;
		goto find_root;
	}

	path = btrfs_alloc_path();
	if (!path)
		return ERR_PTR(-ENOMEM);
	path->leave_spinning = 1;

	/*
	 * Find the "default" dir item which points to the root item that we
	 * will mount by default if we haven't been given a specific subvolume
	 * to mount.
	 */
865
	dir_id = btrfs_super_root_dir(fs_info->super_copy);
866
	di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
867
868
	if (IS_ERR(di)) {
		btrfs_free_path(path);
869
		return ERR_CAST(di);
870
	}
871
872
873
874
875
876
877
878
	if (!di) {
		/*
		 * Ok the default dir item isn't there.  This is weird since
		 * it's always been there, but don't freak out, just try and
		 * mount to root most subvolume.
		 */
		btrfs_free_path(path);
		dir_id = BTRFS_FIRST_FREE_OBJECTID;
879
		new_root = fs_info->fs_root;
880
881
882
883
884
885
886
		goto setup_root;
	}

	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
	btrfs_free_path(path);

find_root:
887
	new_root = btrfs_read_fs_root_no_name(fs_info, &location);
888
	if (IS_ERR(new_root))
Julia Lawall's avatar
Julia Lawall committed
889
		return ERR_CAST(new_root);
890
891
892
893
894
895
896
897

	dir_id = btrfs_root_dirid(&new_root->root_item);
setup_root:
	location.objectid = dir_id;
	location.type = BTRFS_INODE_ITEM_KEY;
	location.offset = 0;

	inode = btrfs_iget(sb, &location, new_root, &new);
898
899
	if (IS_ERR(inode))
		return ERR_CAST(inode);
900
901
902
903
904
905
906
907
908
909
910

	/*
	 * If we're just mounting the root most subvol put the inode and return
	 * a reference to the dentry.  We will have already gotten a reference
	 * to the inode in btrfs_fill_super so we're good to go.
	 */
	if (!new && sb->s_root->d_inode == inode) {
		iput(inode);
		return dget(sb->s_root);
	}

911
	return d_obtain_alias(inode);
912
913
}

914
static int btrfs_fill_super(struct super_block *sb,
915
			    struct btrfs_fs_devices *fs_devices,
916
			    void *data, int silent)
Chris Mason's avatar
Chris Mason committed
917
{
918
	struct inode *inode;
919
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
920
	struct btrfs_key key;
Chris Mason's avatar
Chris Mason committed
921
	int err;
922

Chris Mason's avatar
Chris Mason committed
923
924
925
	sb->s_maxbytes = MAX_LFS_FILESIZE;
	sb->s_magic = BTRFS_SUPER_MAGIC;
	sb->s_op = &btrfs_super_ops;
Al Viro's avatar
Al Viro committed
926
	sb->s_d_op = &btrfs_dentry_operations;
Balaji Rao's avatar
Balaji Rao committed
927
	sb->s_export_op = &btrfs_export_ops;
Josef Bacik's avatar
Josef Bacik committed
928
	sb->s_xattr = btrfs_xattr_handlers;
Chris Mason's avatar
Chris Mason committed
929
	sb->s_time_gran = 1;
930
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
Josef Bacik's avatar
Josef Bacik committed
931
	sb->s_flags |= MS_POSIXACL;
932
#endif
933
	sb->s_flags |= MS_I_VERSION;
Al Viro's avatar
Al Viro committed
934
935
	err = open_ctree(sb, fs_devices, (char *)data);
	if (err) {
936
		printk(KERN_ERR "BTRFS: open_ctree failed\n");
Al Viro's avatar
Al Viro committed
937
		return err;
938
939
	}

940
941
942
	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
	key.type = BTRFS_INODE_ITEM_KEY;
	key.offset = 0;
943
	inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);