resize.c 59.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 *  linux/fs/ext4/resize.c
4
 *
5
 * Support for resizing an ext4 filesystem while it is mounted.
6
7
8
9
10
11
12
 *
 * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
 *
 * This could probably be made into a module, because it is not often in use.
 */


13
#define EXT4FS_DEBUG
14
15
16
17

#include <linux/errno.h>
#include <linux/slab.h>

18
#include "ext4_jbd2.h"
19

20
21
int ext4_resize_begin(struct super_block *sb)
{
22
	struct ext4_sb_info *sbi = EXT4_SB(sb);
23
24
25
26
27
	int ret = 0;

	if (!capable(CAP_SYS_RESOURCE))
		return -EPERM;

28
29
30
31
32
	/*
	 * If we are not using the primary superblock/GDT copy don't resize,
         * because the user tools have no way of handling this.  Probably a
         * bad time to do it anyways.
         */
33
	if (EXT4_B2C(sbi, sbi->s_sbh->b_blocknr) !=
34
35
36
37
38
39
	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
		ext4_warning(sb, "won't resize using backup superblock at %llu",
			(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
		return -EPERM;
	}

40
41
42
43
44
45
	/*
	 * We are not allowed to do online-resizing on a filesystem mounted
	 * with error, because it can destroy the filesystem easily.
	 */
	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
		ext4_warning(sb, "There are errors in the filesystem, "
46
			     "so online resizing is not allowed");
47
48
49
		return -EPERM;
	}

50
51
	if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
				  &EXT4_SB(sb)->s_ext4_flags))
52
53
54
55
56
57
58
		ret = -EBUSY;

	return ret;
}

void ext4_resize_end(struct super_block *sb)
{
59
	clear_bit_unlock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags);
60
	smp_mb__after_atomic();
61
62
}

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb,
					     ext4_group_t group) {
	return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) <<
	       EXT4_DESC_PER_BLOCK_BITS(sb);
}

static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb,
					     ext4_group_t group) {
	group = ext4_meta_bg_first_group(sb, group);
	return ext4_group_first_block_no(sb, group);
}

static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb,
						ext4_group_t group) {
	ext4_grpblk_t overhead;
	overhead = ext4_bg_num_gdb(sb, group);
	if (ext4_bg_has_super(sb, group))
		overhead += 1 +
			  le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
	return overhead;
}

85
86
87
88
#define outside(b, first, last)	((b) < (first) || (b) >= (last))
#define inside(b, first, last)	((b) >= (first) && (b) < (last))

static int verify_group_input(struct super_block *sb,
89
			      struct ext4_new_group_data *input)
90
{
91
92
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	struct ext4_super_block *es = sbi->s_es;
Laurent Vivier's avatar
Laurent Vivier committed
93
	ext4_fsblk_t start = ext4_blocks_count(es);
94
	ext4_fsblk_t end = start + input->blocks_count;
95
	ext4_group_t group = input->group;
96
	ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
97
98
	unsigned overhead;
	ext4_fsblk_t metaend;
99
	struct buffer_head *bh = NULL;
100
	ext4_grpblk_t free_blocks_count, offset;
101
102
	int err = -EINVAL;

103
104
105
106
107
108
109
110
	if (group != sbi->s_groups_count) {
		ext4_warning(sb, "Cannot add at group %u (only %u groups)",
			     input->group, sbi->s_groups_count);
		return -EINVAL;
	}

	overhead = ext4_group_overhead_blocks(sb, group);
	metaend = start + overhead;
111
	input->free_clusters_count = free_blocks_count =
112
113
114
		input->blocks_count - 2 - overhead - sbi->s_itb_per_group;

	if (test_opt(sb, DEBUG))
115
		printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks "
116
		       "(%d free, %u reserved)\n",
117
		       ext4_bg_has_super(sb, input->group) ? "normal" :
118
119
120
		       "no-super", input->group, input->blocks_count,
		       free_blocks_count, input->reserved_blocks);

121
	ext4_get_group_no_and_offset(sb, start, NULL, &offset);
122
	if (offset != 0)
123
			ext4_warning(sb, "Last group not full");
124
	else if (input->reserved_blocks > input->blocks_count / 5)
125
		ext4_warning(sb, "Reserved blocks too high (%u)",
126
127
			     input->reserved_blocks);
	else if (free_blocks_count < 0)
128
		ext4_warning(sb, "Bad blocks count %u",
129
130
			     input->blocks_count);
	else if (!(bh = sb_bread(sb, end - 1)))
131
		ext4_warning(sb, "Cannot read last block (%llu)",
132
133
			     end - 1);
	else if (outside(input->block_bitmap, start, end))
134
		ext4_warning(sb, "Block bitmap not in group (block %llu)",
135
			     (unsigned long long)input->block_bitmap);
136
	else if (outside(input->inode_bitmap, start, end))
137
		ext4_warning(sb, "Inode bitmap not in group (block %llu)",
138
			     (unsigned long long)input->inode_bitmap);
139
	else if (outside(input->inode_table, start, end) ||
140
		 outside(itend - 1, start, end))
141
		ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)",
142
			     (unsigned long long)input->inode_table, itend - 1);
143
	else if (input->inode_bitmap == input->block_bitmap)
144
		ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)",
145
			     (unsigned long long)input->block_bitmap);
146
	else if (inside(input->block_bitmap, input->inode_table, itend))
147
148
		ext4_warning(sb, "Block bitmap (%llu) in inode table "
			     "(%llu-%llu)",
149
150
			     (unsigned long long)input->block_bitmap,
			     (unsigned long long)input->inode_table, itend - 1);
151
	else if (inside(input->inode_bitmap, input->inode_table, itend))
152
153
		ext4_warning(sb, "Inode bitmap (%llu) in inode table "
			     "(%llu-%llu)",
154
155
			     (unsigned long long)input->inode_bitmap,
			     (unsigned long long)input->inode_table, itend - 1);
156
	else if (inside(input->block_bitmap, start, metaend))
157
		ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)",
158
159
			     (unsigned long long)input->block_bitmap,
			     start, metaend - 1);
160
	else if (inside(input->inode_bitmap, start, metaend))
161
		ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)",
162
163
			     (unsigned long long)input->inode_bitmap,
			     start, metaend - 1);
164
	else if (inside(input->inode_table, start, metaend) ||
165
		 inside(itend - 1, start, metaend))
166
167
		ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table "
			     "(%llu-%llu)",
168
169
			     (unsigned long long)input->inode_table,
			     itend - 1, start, metaend - 1);
170
171
172
173
174
175
176
	else
		err = 0;
	brelse(bh);

	return err;
}

177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
/*
 * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex
 * group each time.
 */
struct ext4_new_flex_group_data {
	struct ext4_new_group_data *groups;	/* new_group_data for groups
						   in the flex group */
	__u16 *bg_flags;			/* block group flags of groups
						   in @groups */
	ext4_group_t count;			/* number of groups in @groups
						 */
};

/*
 * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
 * @flexbg_size.
 *
 * Returns NULL on failure otherwise address of the allocated structure.
 */
static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
{
	struct ext4_new_flex_group_data *flex_gd;

	flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
	if (flex_gd == NULL)
		goto out3;

204
	if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
205
		goto out2;
206
207
	flex_gd->count = flexbg_size;

208
209
210
	flex_gd->groups = kmalloc_array(flexbg_size,
					sizeof(struct ext4_new_group_data),
					GFP_NOFS);
211
212
213
	if (flex_gd->groups == NULL)
		goto out2;

214
215
	flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16),
					  GFP_NOFS);
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
	if (flex_gd->bg_flags == NULL)
		goto out1;

	return flex_gd;

out1:
	kfree(flex_gd->groups);
out2:
	kfree(flex_gd);
out3:
	return NULL;
}

static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
{
	kfree(flex_gd->bg_flags);
	kfree(flex_gd->groups);
	kfree(flex_gd);
}

236
237
238
239
240
241
242
243
244
/*
 * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps
 * and inode tables for a flex group.
 *
 * This function is used by 64bit-resize.  Note that this function allocates
 * group tables from the 1st group of groups contained by @flexgd, which may
 * be a partial of a flex group.
 *
 * @sb: super block of fs to which the groups belongs
245
246
247
 *
 * Returns 0 on a successful allocation of the metadata blocks in the
 * block group.
248
 */
249
static int ext4_alloc_group_tables(struct super_block *sb,
250
251
252
253
254
255
256
257
258
259
260
261
262
				struct ext4_new_flex_group_data *flex_gd,
				int flexbg_size)
{
	struct ext4_new_group_data *group_data = flex_gd->groups;
	ext4_fsblk_t start_blk;
	ext4_fsblk_t last_blk;
	ext4_group_t src_group;
	ext4_group_t bb_index = 0;
	ext4_group_t ib_index = 0;
	ext4_group_t it_index = 0;
	ext4_group_t group;
	ext4_group_t last_group;
	unsigned overhead;
263
	__u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0;
264
	int i;
265
266
267
268
269
270
271
272
273
274

	BUG_ON(flex_gd->count == 0 || group_data == NULL);

	src_group = group_data[0].group;
	last_group  = src_group + flex_gd->count - 1;

	BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) !=
	       (last_group & ~(flexbg_size - 1))));
next_group:
	group = group_data[0].group;
275
276
	if (src_group >= group_data[0].group + flex_gd->count)
		return -ENOSPC;
277
278
279
	start_blk = ext4_group_first_block_no(sb, src_group);
	last_blk = start_blk + group_data[src_group - group].blocks_count;

280
	overhead = ext4_group_overhead_blocks(sb, src_group);
281
282
283
284
285

	start_blk += overhead;

	/* We collect contiguous blocks as much as possible. */
	src_group++;
286
287
	for (; src_group <= last_group; src_group++) {
		overhead = ext4_group_overhead_blocks(sb, src_group);
288
		if (overhead == 0)
289
290
291
			last_blk += group_data[src_group - group].blocks_count;
		else
			break;
292
	}
293
294
295
296
297
298

	/* Allocate block bitmaps */
	for (; bb_index < flex_gd->count; bb_index++) {
		if (start_blk >= last_blk)
			goto next_group;
		group_data[bb_index].block_bitmap = start_blk++;
299
		group = ext4_get_group_number(sb, start_blk - 1);
300
		group -= group_data[0].group;
301
		group_data[group].mdata_blocks++;
302
		flex_gd->bg_flags[group] &= uninit_mask;
303
304
305
306
307
308
309
	}

	/* Allocate inode bitmaps */
	for (; ib_index < flex_gd->count; ib_index++) {
		if (start_blk >= last_blk)
			goto next_group;
		group_data[ib_index].inode_bitmap = start_blk++;
310
		group = ext4_get_group_number(sb, start_blk - 1);
311
		group -= group_data[0].group;
312
		group_data[group].mdata_blocks++;
313
		flex_gd->bg_flags[group] &= uninit_mask;
314
315
316
317
	}

	/* Allocate inode tables */
	for (; it_index < flex_gd->count; it_index++) {
318
319
320
321
		unsigned int itb = EXT4_SB(sb)->s_itb_per_group;
		ext4_fsblk_t next_group_start;

		if (start_blk + itb > last_blk)
322
323
			goto next_group;
		group_data[it_index].inode_table = start_blk;
324
325
		group = ext4_get_group_number(sb, start_blk);
		next_group_start = ext4_group_first_block_no(sb, group + 1);
326
327
		group -= group_data[0].group;

328
329
330
		if (start_blk + itb > next_group_start) {
			flex_gd->bg_flags[group + 1] &= uninit_mask;
			overhead = start_blk + itb - next_group_start;
331
			group_data[group + 1].mdata_blocks += overhead;
332
333
334
			itb -= overhead;
		}

335
		group_data[group].mdata_blocks += itb;
336
		flex_gd->bg_flags[group] &= uninit_mask;
337
338
339
		start_blk += EXT4_SB(sb)->s_itb_per_group;
	}

340
341
342
343
344
345
346
	/* Update free clusters count to exclude metadata blocks */
	for (i = 0; i < flex_gd->count; i++) {
		group_data[i].free_clusters_count -=
				EXT4_NUM_B2C(EXT4_SB(sb),
					     group_data[i].mdata_blocks);
	}

347
348
349
350
351
352
353
354
355
	if (test_opt(sb, DEBUG)) {
		int i;
		group = group_data[0].group;

		printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
		       "%d groups, flexbg size is %d:\n", flex_gd->count,
		       flexbg_size);

		for (i = 0; i < flex_gd->count; i++) {
356
357
			ext4_debug(
			       "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n",
358
359
360
			       ext4_bg_has_super(sb, group + i) ? "normal" :
			       "no-super", group + i,
			       group_data[i].blocks_count,
361
362
			       group_data[i].free_clusters_count,
			       group_data[i].mdata_blocks);
363
364
		}
	}
365
	return 0;
366
367
}

368
static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
369
				  ext4_fsblk_t blk)
370
371
372
373
374
{
	struct buffer_head *bh;
	int err;

	bh = sb_getblk(sb, blk);
375
	if (unlikely(!bh))
376
		return ERR_PTR(-ENOMEM);
377
	BUFFER_TRACE(bh, "get_write_access");
378
	if ((err = ext4_journal_get_write_access(handle, bh))) {
379
380
381
382
383
384
385
386
387
388
		brelse(bh);
		bh = ERR_PTR(err);
	} else {
		memset(bh->b_data, 0, sb->s_blocksize);
		set_buffer_uptodate(bh);
	}

	return bh;
}

389
390
391
392
393
/*
 * If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA.
 * If that fails, restart the transaction & regain write access for the
 * buffer head which is used for block_bitmap modifications.
 */
394
static int extend_or_restart_transaction(handle_t *handle, int thresh)
395
396
397
{
	int err;

398
	if (ext4_handle_has_enough_credits(handle, thresh))
399
400
401
402
403
404
		return 0;

	err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
	if (err < 0)
		return err;
	if (err) {
405
406
		err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA);
		if (err)
407
			return err;
408
	}
409
410
411
412

	return 0;
}

413
/*
414
 * set_flexbg_block_bitmap() mark clusters [@first_cluster, @last_cluster] used.
415
416
417
418
419
420
421
422
423
 *
 * Helper function for ext4_setup_new_group_blocks() which set .
 *
 * @sb: super block
 * @handle: journal handle
 * @flex_gd: flex group data
 */
static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
			struct ext4_new_flex_group_data *flex_gd,
424
			ext4_fsblk_t first_cluster, ext4_fsblk_t last_cluster)
425
{
426
427
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	ext4_group_t count = last_cluster - first_cluster + 1;
428
429
	ext4_group_t count2;

430
431
432
433
	ext4_debug("mark clusters [%llu-%llu] used\n", first_cluster,
		   last_cluster);
	for (count2 = count; count > 0;
	     count -= count2, first_cluster += count2) {
434
435
436
437
438
		ext4_fsblk_t start;
		struct buffer_head *bh;
		ext4_group_t group;
		int err;

439
440
		group = ext4_get_group_number(sb, EXT4_C2B(sbi, first_cluster));
		start = EXT4_B2C(sbi, ext4_group_first_block_no(sb, group));
441
442
		group -= flex_gd->groups[0].group;

443
		count2 = EXT4_CLUSTERS_PER_GROUP(sb) - (first_cluster - start);
444
445
446
447
448
449
450
451
452
453
454
455
456
		if (count2 > count)
			count2 = count;

		if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) {
			BUG_ON(flex_gd->count > 1);
			continue;
		}

		err = extend_or_restart_transaction(handle, 1);
		if (err)
			return err;

		bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
457
		if (unlikely(!bh))
458
			return -ENOMEM;
459

460
		BUFFER_TRACE(bh, "get_write_access");
461
		err = ext4_journal_get_write_access(handle, bh);
462
463
		if (err) {
			brelse(bh);
464
			return err;
465
		}
466
467
468
		ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n",
			   first_cluster, first_cluster - start, count2);
		ext4_set_bits(bh->b_data, first_cluster - start, count2);
469
470

		err = ext4_handle_dirty_metadata(handle, NULL, bh);
471
		brelse(bh);
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
		if (unlikely(err))
			return err;
	}

	return 0;
}

/*
 * Set up the block and inode bitmaps, and the inode table for the new groups.
 * This doesn't need to be part of the main transaction, since we are only
 * changing blocks outside the actual filesystem.  We still do journaling to
 * ensure the recovery is correct in case of a failure just after resize.
 * If any part of this fails, we simply abort the resize.
 *
 * setup_new_flex_group_blocks handles a flex group as follow:
 *  1. copy super block and GDT, and initialize group tables if necessary.
 *     In this step, we only set bits in blocks bitmaps for blocks taken by
 *     super block and GDT.
 *  2. allocate group tables in block bitmaps, that is, set bits in block
 *     bitmap for blocks taken by group tables.
 */
static int setup_new_flex_group_blocks(struct super_block *sb,
				struct ext4_new_flex_group_data *flex_gd)
{
	int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group};
	ext4_fsblk_t start;
	ext4_fsblk_t block;
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	struct ext4_super_block *es = sbi->s_es;
	struct ext4_new_group_data *group_data = flex_gd->groups;
	__u16 *bg_flags = flex_gd->bg_flags;
	handle_t *handle;
	ext4_group_t group, count;
	struct buffer_head *bh = NULL;
	int reserved_gdb, i, j, err = 0, err2;
507
	int meta_bg;
508
509
510
511
512

	BUG_ON(!flex_gd->count || !group_data ||
	       group_data[0].group != sbi->s_groups_count);

	reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
513
	meta_bg = ext4_has_feature_meta_bg(sb);
514
515

	/* This transaction may be extended/restarted along the way */
516
	handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
517
518
519
520
521
522
	if (IS_ERR(handle))
		return PTR_ERR(handle);

	group = group_data[0].group;
	for (i = 0; i < flex_gd->count; i++, group++) {
		unsigned long gdblocks;
523
		ext4_grpblk_t overhead;
524
525
526
527

		gdblocks = ext4_bg_num_gdb(sb, group);
		start = ext4_group_first_block_no(sb, group);

528
		if (meta_bg == 0 && !ext4_bg_has_super(sb, group))
529
530
			goto handle_itb;

531
532
533
534
535
536
537
538
539
		if (meta_bg == 1) {
			ext4_group_t first_group;
			first_group = ext4_meta_bg_first_group(sb, group);
			if (first_group != group + 1 &&
			    first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1)
				goto handle_itb;
		}

		block = start + ext4_bg_has_super(sb, group);
540
		/* Copy all of the GDT blocks into the backup in this group */
541
		for (j = 0; j < gdblocks; j++, block++) {
542
543
544
545
546
547
548
549
			struct buffer_head *gdb;

			ext4_debug("update backup group %#04llx\n", block);
			err = extend_or_restart_transaction(handle, 1);
			if (err)
				goto out;

			gdb = sb_getblk(sb, block);
550
			if (unlikely(!gdb)) {
551
				err = -ENOMEM;
552
553
554
				goto out;
			}

555
			BUFFER_TRACE(gdb, "get_write_access");
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
			err = ext4_journal_get_write_access(handle, gdb);
			if (err) {
				brelse(gdb);
				goto out;
			}
			memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
			       gdb->b_size);
			set_buffer_uptodate(gdb);

			err = ext4_handle_dirty_metadata(handle, NULL, gdb);
			if (unlikely(err)) {
				brelse(gdb);
				goto out;
			}
			brelse(gdb);
		}

		/* Zero out all of the reserved backup group descriptor
		 * table blocks
		 */
		if (ext4_bg_has_super(sb, group)) {
			err = sb_issue_zeroout(sb, gdblocks + start + 1,
					reserved_gdb, GFP_NOFS);
			if (err)
				goto out;
		}

583
handle_itb:
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
		/* Initialize group tables of the grop @group */
		if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
			goto handle_bb;

		/* Zero out all of the inode table blocks */
		block = group_data[i].inode_table;
		ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
			   block, sbi->s_itb_per_group);
		err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
				       GFP_NOFS);
		if (err)
			goto out;

handle_bb:
		if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT)
			goto handle_ib;

		/* Initialize block bitmap of the @group */
		block = group_data[i].block_bitmap;
		err = extend_or_restart_transaction(handle, 1);
		if (err)
			goto out;

		bh = bclean(handle, sb, block);
		if (IS_ERR(bh)) {
			err = PTR_ERR(bh);
			goto out;
		}
612
613
		overhead = ext4_group_overhead_blocks(sb, group);
		if (overhead != 0) {
614
615
			ext4_debug("mark backup superblock %#04llx (+0)\n",
				   start);
616
617
			ext4_set_bits(bh->b_data, 0,
				      EXT4_NUM_B2C(sbi, overhead));
618
		}
619
		ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count),
620
621
				     sb->s_blocksize * 8, bh->b_data);
		err = ext4_handle_dirty_metadata(handle, NULL, bh);
622
		brelse(bh);
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
		if (err)
			goto out;

handle_ib:
		if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
			continue;

		/* Initialize inode bitmap of the @group */
		block = group_data[i].inode_bitmap;
		err = extend_or_restart_transaction(handle, 1);
		if (err)
			goto out;
		/* Mark unused entries in inode bitmap used */
		bh = bclean(handle, sb, block);
		if (IS_ERR(bh)) {
			err = PTR_ERR(bh);
			goto out;
		}

		ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
				     sb->s_blocksize * 8, bh->b_data);
		err = ext4_handle_dirty_metadata(handle, NULL, bh);
645
		brelse(bh);
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
		if (err)
			goto out;
	}

	/* Mark group tables in block bitmap */
	for (j = 0; j < GROUP_TABLE_COUNT; j++) {
		count = group_table_count[j];
		start = (&group_data[0].block_bitmap)[j];
		block = start;
		for (i = 1; i < flex_gd->count; i++) {
			block += group_table_count[j];
			if (block == (&group_data[i].block_bitmap)[j]) {
				count += group_table_count[j];
				continue;
			}
			err = set_flexbg_block_bitmap(sb, handle,
662
663
664
665
666
						      flex_gd,
						      EXT4_B2C(sbi, start),
						      EXT4_B2C(sbi,
							       start + count
							       - 1));
667
668
669
			if (err)
				goto out;
			count = group_table_count[j];
670
			start = (&group_data[i].block_bitmap)[j];
671
672
673
674
675
			block = start;
		}

		if (count) {
			err = set_flexbg_block_bitmap(sb, handle,
676
677
678
679
680
						      flex_gd,
						      EXT4_B2C(sbi, start),
						      EXT4_B2C(sbi,
							       start + count
							       - 1));
681
682
683
684
685
686
687
688
689
690
691
692
693
			if (err)
				goto out;
		}
	}

out:
	err2 = ext4_journal_stop(handle);
	if (err2 && !err)
		err = err2;

	return err;
}

694
695
/*
 * Iterate through the groups which hold BACKUP superblock/GDT copies in an
696
 * ext4 filesystem.  The counters should be initialized to 1, 5, and 7 before
697
698
699
700
 * calling this for the first time.  In a sparse filesystem it will be the
 * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
 * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
 */
701
static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
702
703
704
705
706
707
				  unsigned *five, unsigned *seven)
{
	unsigned *min = three;
	int mult = 3;
	unsigned ret;

708
	if (!ext4_has_feature_sparse_super(sb)) {
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
		ret = *min;
		*min += 1;
		return ret;
	}

	if (*five < *min) {
		min = five;
		mult = 5;
	}
	if (*seven < *min) {
		min = seven;
		mult = 7;
	}

	ret = *min;
	*min *= mult;

	return ret;
}

/*
 * Check that all of the backup GDT blocks are held in the primary GDT block.
 * It is assumed that they are stored in group order.  Returns the number of
 * groups in current filesystem that have BACKUPS, or -ve error code.
 */
static int verify_reserved_gdb(struct super_block *sb,
735
			       ext4_group_t end,
736
737
			       struct buffer_head *primary)
{
738
	const ext4_fsblk_t blk = primary->b_blocknr;
739
740
741
742
743
744
745
	unsigned three = 1;
	unsigned five = 5;
	unsigned seven = 7;
	unsigned grp;
	__le32 *p = (__le32 *)primary->b_data;
	int gdbackups = 0;

746
	while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
Laurent Vivier's avatar
Laurent Vivier committed
747
748
		if (le32_to_cpu(*p++) !=
		    grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
749
			ext4_warning(sb, "reserved GDT %llu"
750
				     " missing grp %d (%llu)",
751
				     blk, grp,
Laurent Vivier's avatar
Laurent Vivier committed
752
753
754
				     grp *
				     (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
				     blk);
755
756
			return -EINVAL;
		}
757
		if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb))
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
			return -EFBIG;
	}

	return gdbackups;
}

/*
 * Called when we need to bring a reserved group descriptor table block into
 * use from the resize inode.  The primary copy of the new GDT block currently
 * is an indirect block (under the double indirect block in the resize inode).
 * The new backup GDT blocks will be stored as leaf blocks in this indirect
 * block, in group order.  Even though we know all the block numbers we need,
 * we check to ensure that the resize inode has actually reserved these blocks.
 *
 * Don't need to update the block bitmaps because the blocks are still in use.
 *
 * We get all of the error cases out of the way, so that we are sure to not
 * fail once we start modifying the data on disk, because JBD has no rollback.
 */
static int add_new_gdb(handle_t *handle, struct inode *inode,
778
		       ext4_group_t group)
779
780
{
	struct super_block *sb = inode->i_sb;
781
	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
782
	unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
783
	ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
784
785
	struct buffer_head **o_group_desc, **n_group_desc;
	struct buffer_head *dind;
786
	struct buffer_head *gdb_bh;
787
	int gdbackups;
788
	struct ext4_iloc iloc;
789
790
791
792
793
	__le32 *data;
	int err;

	if (test_opt(sb, DEBUG))
		printk(KERN_DEBUG
794
		       "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
795
796
		       gdb_num);

797
798
	gdb_bh = sb_bread(sb, gdblock);
	if (!gdb_bh)
799
800
		return -EIO;

801
	gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
802
	if (gdbackups < 0) {
803
804
805
806
		err = gdbackups;
		goto exit_bh;
	}

807
	data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
808
809
810
811
812
813
814
	dind = sb_bread(sb, le32_to_cpu(*data));
	if (!dind) {
		err = -EIO;
		goto exit_bh;
	}

	data = (__le32 *)dind->b_data;
815
	if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
816
		ext4_warning(sb, "new group %u GDT block %llu not reserved",
817
			     group, gdblock);
818
819
820
821
		err = -EINVAL;
		goto exit_dind;
	}

822
	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
823
824
	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
	if (unlikely(err))
825
826
		goto exit_dind;

827
	BUFFER_TRACE(gdb_bh, "get_write_access");
828
	err = ext4_journal_get_write_access(handle, gdb_bh);
829
	if (unlikely(err))
830
		goto exit_dind;
831

832
	BUFFER_TRACE(dind, "get_write_access");
833
834
835
	err = ext4_journal_get_write_access(handle, dind);
	if (unlikely(err))
		ext4_std_error(sb, err);
836

837
	/* ext4_reserve_inode_write() gets a reference on the iloc */
838
839
	err = ext4_reserve_inode_write(handle, inode, &iloc);
	if (unlikely(err))
840
		goto exit_dind;
841

842
843
844
	n_group_desc = ext4_kvmalloc((gdb_num + 1) *
				     sizeof(struct buffer_head *),
				     GFP_NOFS);
845
846
	if (!n_group_desc) {
		err = -ENOMEM;
847
848
		ext4_warning(sb, "not enough memory for %lu groups",
			     gdb_num + 1);
849
850
851
852
853
854
855
856
857
858
859
860
		goto exit_inode;
	}

	/*
	 * Finally, we have all of the possible failures behind us...
	 *
	 * Remove new GDT block from inode double-indirect block and clear out
	 * the new GDT block for use (which also "frees" the backup GDT blocks
	 * from the reserved inode).  We don't need to change the bitmaps for
	 * these blocks, because they are marked as in-use from being in the
	 * reserved inode, and will become GDT blocks (primary and backup).
	 */
861
	data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
862
863
864
865
866
	err = ext4_handle_dirty_metadata(handle, NULL, dind);
	if (unlikely(err)) {
		ext4_std_error(sb, err);
		goto exit_inode;
	}
867
868
	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >>
			   (9 - EXT4_SB(sb)->s_cluster_bits);
869
	ext4_mark_iloc_dirty(handle, inode, &iloc);
870
871
	memset(gdb_bh->b_data, 0, sb->s_blocksize);
	err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
872
873
874
875
876
	if (unlikely(err)) {
		ext4_std_error(sb, err);
		goto exit_inode;
	}
	brelse(dind);
877

878
	o_group_desc = EXT4_SB(sb)->s_group_desc;
879
	memcpy(n_group_desc, o_group_desc,
880
	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
881
	n_group_desc[gdb_num] = gdb_bh;
882
883
	EXT4_SB(sb)->s_group_desc = n_group_desc;
	EXT4_SB(sb)->s_gdb_count++;
Al Viro's avatar
Al Viro committed
884
	kvfree(o_group_desc);
885

Marcin Slusarz's avatar
Marcin Slusarz committed
886
	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
887
	err = ext4_handle_dirty_super(handle, sb);
888
889
	if (err)
		ext4_std_error(sb, err);
890

891
	return err;
892
893

exit_inode:
Al Viro's avatar
Al Viro committed
894
	kvfree(n_group_desc);
895
896
897
898
	brelse(iloc.bh);
exit_dind:
	brelse(dind);
exit_bh:
899
	brelse(gdb_bh);
900

901
	ext4_debug("leaving with error %d\n", err);
902
903
904
	return err;
}

905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
/*
 * add_new_gdb_meta_bg is the sister of add_new_gdb.
 */
static int add_new_gdb_meta_bg(struct super_block *sb,
			       handle_t *handle, ext4_group_t group) {
	ext4_fsblk_t gdblock;
	struct buffer_head *gdb_bh;
	struct buffer_head **o_group_desc, **n_group_desc;
	unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
	int err;

	gdblock = ext4_meta_bg_first_block_no(sb, group) +
		   ext4_bg_has_super(sb, group);
	gdb_bh = sb_bread(sb, gdblock);
	if (!gdb_bh)
		return -EIO;
	n_group_desc = ext4_kvmalloc((gdb_num + 1) *
				     sizeof(struct buffer_head *),
				     GFP_NOFS);
	if (!n_group_desc) {
925
		brelse(gdb_bh);
926
927
928
929
930
931
932
933
934
935
936
937
		err = -ENOMEM;
		ext4_warning(sb, "not enough memory for %lu groups",
			     gdb_num + 1);
		return err;
	}

	o_group_desc = EXT4_SB(sb)->s_group_desc;
	memcpy(n_group_desc, o_group_desc,
	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
	n_group_desc[gdb_num] = gdb_bh;
	EXT4_SB(sb)->s_group_desc = n_group_desc;
	EXT4_SB(sb)->s_gdb_count++;
Al Viro's avatar
Al Viro committed
938
	kvfree(o_group_desc);
939
	BUFFER_TRACE(gdb_bh, "get_write_access");
940
941
942
943
	err = ext4_journal_get_write_access(handle, gdb_bh);
	return err;
}

944
945
946
947
948
949
950
951
952
953
954
955
956
957
/*
 * Called when we are adding a new group which has a backup copy of each of
 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
 * We need to add these reserved backup GDT blocks to the resize inode, so
 * that they are kept for future resizing and not allocated to files.
 *
 * Each reserved backup GDT block will go into a different indirect block.
 * The indirect blocks are actually the primary reserved GDT blocks,
 * so we know in advance what their block numbers are.  We only get the
 * double-indirect block to verify it is pointing to the primary reserved
 * GDT blocks so we don't overwrite a data block by accident.  The reserved
 * backup GDT blocks are stored in their reserved primary GDT block.
 */
static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
958
			      ext4_group_t group)
959
960
{
	struct super_block *sb = inode->i_sb;
961
	int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
962
	int cluster_bits = EXT4_SB(sb)->s_cluster_bits;
963
964
	struct buffer_head **primary;
	struct buffer_head *dind;
965
966
	struct ext4_iloc iloc;
	ext4_fsblk_t blk;
967
968
969
970
971
	__le32 *data, *end;
	int gdbackups = 0;
	int res, i;
	int err;

972
	primary = kmalloc_array(reserved_gdb, sizeof(*primary), GFP_NOFS);
973
974
975
	if (!primary)
		return -ENOMEM;

976
	data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
977
978
979
980
981
982
	dind = sb_bread(sb, le32_to_cpu(*data));
	if (!dind) {
		err = -EIO;
		goto exit_free;
	}

983
	blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
Josef Bacik's avatar
Josef Bacik committed
984
985
	data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count %
					 EXT4_ADDR_PER_BLOCK(sb));
986
	end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
987
988
989
990

	/* Get each reserved primary GDT block and verify it holds backups */
	for (res = 0; res < reserved_gdb; res++, blk++) {
		if (le32_to_cpu(*data) != blk) {
991
			ext4_warning(sb, "reserved block %llu"
992
993
994
995
996
997
998
999
1000
				     " not at offset %ld",
				     blk,
				     (long)(data - (__le32 *)dind->b_data));
			err = -EINVAL;
			goto exit_bh;
		}
		primary[res] = sb_bread(sb, blk);
		if (!primary[res]) {
			err = -EIO;