resize.c 58.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 *  linux/fs/ext4/resize.c
4
 *
5
 * Support for resizing an ext4 filesystem while it is mounted.
6
7
8
9
10
11
12
 *
 * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
 *
 * This could probably be made into a module, because it is not often in use.
 */


13
#define EXT4FS_DEBUG
14
15
16
17

#include <linux/errno.h>
#include <linux/slab.h>

18
#include "ext4_jbd2.h"
19

20
21
int ext4_resize_begin(struct super_block *sb)
{
22
	struct ext4_sb_info *sbi = EXT4_SB(sb);
23
24
25
26
27
	int ret = 0;

	if (!capable(CAP_SYS_RESOURCE))
		return -EPERM;

28
29
30
31
32
	/*
	 * If we are not using the primary superblock/GDT copy don't resize,
         * because the user tools have no way of handling this.  Probably a
         * bad time to do it anyways.
         */
33
	if (EXT4_B2C(sbi, sbi->s_sbh->b_blocknr) !=
34
35
36
37
38
39
	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
		ext4_warning(sb, "won't resize using backup superblock at %llu",
			(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
		return -EPERM;
	}

40
41
42
43
44
45
	/*
	 * We are not allowed to do online-resizing on a filesystem mounted
	 * with error, because it can destroy the filesystem easily.
	 */
	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
		ext4_warning(sb, "There are errors in the filesystem, "
46
			     "so online resizing is not allowed");
47
48
49
		return -EPERM;
	}

50
51
	if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
				  &EXT4_SB(sb)->s_ext4_flags))
52
53
54
55
56
57
58
		ret = -EBUSY;

	return ret;
}

void ext4_resize_end(struct super_block *sb)
{
59
	clear_bit_unlock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags);
60
	smp_mb__after_atomic();
61
62
}

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb,
					     ext4_group_t group) {
	return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) <<
	       EXT4_DESC_PER_BLOCK_BITS(sb);
}

static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb,
					     ext4_group_t group) {
	group = ext4_meta_bg_first_group(sb, group);
	return ext4_group_first_block_no(sb, group);
}

static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb,
						ext4_group_t group) {
	ext4_grpblk_t overhead;
	overhead = ext4_bg_num_gdb(sb, group);
	if (ext4_bg_has_super(sb, group))
		overhead += 1 +
			  le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
	return overhead;
}

85
86
87
88
#define outside(b, first, last)	((b) < (first) || (b) >= (last))
#define inside(b, first, last)	((b) >= (first) && (b) < (last))

static int verify_group_input(struct super_block *sb,
89
			      struct ext4_new_group_data *input)
90
{
91
92
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	struct ext4_super_block *es = sbi->s_es;
Laurent Vivier's avatar
Laurent Vivier committed
93
	ext4_fsblk_t start = ext4_blocks_count(es);
94
	ext4_fsblk_t end = start + input->blocks_count;
95
	ext4_group_t group = input->group;
96
	ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
97
98
	unsigned overhead;
	ext4_fsblk_t metaend;
99
	struct buffer_head *bh = NULL;
100
	ext4_grpblk_t free_blocks_count, offset;
101
102
	int err = -EINVAL;

103
104
105
106
107
108
109
110
	if (group != sbi->s_groups_count) {
		ext4_warning(sb, "Cannot add at group %u (only %u groups)",
			     input->group, sbi->s_groups_count);
		return -EINVAL;
	}

	overhead = ext4_group_overhead_blocks(sb, group);
	metaend = start + overhead;
111
112
113
114
	input->free_blocks_count = free_blocks_count =
		input->blocks_count - 2 - overhead - sbi->s_itb_per_group;

	if (test_opt(sb, DEBUG))
115
		printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks "
116
		       "(%d free, %u reserved)\n",
117
		       ext4_bg_has_super(sb, input->group) ? "normal" :
118
119
120
		       "no-super", input->group, input->blocks_count,
		       free_blocks_count, input->reserved_blocks);

121
	ext4_get_group_no_and_offset(sb, start, NULL, &offset);
122
	if (offset != 0)
123
			ext4_warning(sb, "Last group not full");
124
	else if (input->reserved_blocks > input->blocks_count / 5)
125
		ext4_warning(sb, "Reserved blocks too high (%u)",
126
127
			     input->reserved_blocks);
	else if (free_blocks_count < 0)
128
		ext4_warning(sb, "Bad blocks count %u",
129
130
			     input->blocks_count);
	else if (!(bh = sb_bread(sb, end - 1)))
131
		ext4_warning(sb, "Cannot read last block (%llu)",
132
133
			     end - 1);
	else if (outside(input->block_bitmap, start, end))
134
		ext4_warning(sb, "Block bitmap not in group (block %llu)",
135
			     (unsigned long long)input->block_bitmap);
136
	else if (outside(input->inode_bitmap, start, end))
137
		ext4_warning(sb, "Inode bitmap not in group (block %llu)",
138
			     (unsigned long long)input->inode_bitmap);
139
	else if (outside(input->inode_table, start, end) ||
140
		 outside(itend - 1, start, end))
141
		ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)",
142
			     (unsigned long long)input->inode_table, itend - 1);
143
	else if (input->inode_bitmap == input->block_bitmap)
144
		ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)",
145
			     (unsigned long long)input->block_bitmap);
146
	else if (inside(input->block_bitmap, input->inode_table, itend))
147
148
		ext4_warning(sb, "Block bitmap (%llu) in inode table "
			     "(%llu-%llu)",
149
150
			     (unsigned long long)input->block_bitmap,
			     (unsigned long long)input->inode_table, itend - 1);
151
	else if (inside(input->inode_bitmap, input->inode_table, itend))
152
153
		ext4_warning(sb, "Inode bitmap (%llu) in inode table "
			     "(%llu-%llu)",
154
155
			     (unsigned long long)input->inode_bitmap,
			     (unsigned long long)input->inode_table, itend - 1);
156
	else if (inside(input->block_bitmap, start, metaend))
157
		ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)",
158
159
			     (unsigned long long)input->block_bitmap,
			     start, metaend - 1);
160
	else if (inside(input->inode_bitmap, start, metaend))
161
		ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)",
162
163
			     (unsigned long long)input->inode_bitmap,
			     start, metaend - 1);
164
	else if (inside(input->inode_table, start, metaend) ||
165
		 inside(itend - 1, start, metaend))
166
167
		ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table "
			     "(%llu-%llu)",
168
169
			     (unsigned long long)input->inode_table,
			     itend - 1, start, metaend - 1);
170
171
172
173
174
175
176
	else
		err = 0;
	brelse(bh);

	return err;
}

177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
/*
 * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex
 * group each time.
 */
struct ext4_new_flex_group_data {
	struct ext4_new_group_data *groups;	/* new_group_data for groups
						   in the flex group */
	__u16 *bg_flags;			/* block group flags of groups
						   in @groups */
	ext4_group_t count;			/* number of groups in @groups
						 */
};

/*
 * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
 * @flexbg_size.
 *
 * Returns NULL on failure otherwise address of the allocated structure.
 */
static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
{
	struct ext4_new_flex_group_data *flex_gd;

	flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
	if (flex_gd == NULL)
		goto out3;

204
	if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
205
		goto out2;
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
	flex_gd->count = flexbg_size;

	flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) *
				  flexbg_size, GFP_NOFS);
	if (flex_gd->groups == NULL)
		goto out2;

	flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS);
	if (flex_gd->bg_flags == NULL)
		goto out1;

	return flex_gd;

out1:
	kfree(flex_gd->groups);
out2:
	kfree(flex_gd);
out3:
	return NULL;
}

static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
{
	kfree(flex_gd->bg_flags);
	kfree(flex_gd->groups);
	kfree(flex_gd);
}

234
235
236
237
238
239
240
241
242
/*
 * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps
 * and inode tables for a flex group.
 *
 * This function is used by 64bit-resize.  Note that this function allocates
 * group tables from the 1st group of groups contained by @flexgd, which may
 * be a partial of a flex group.
 *
 * @sb: super block of fs to which the groups belongs
243
244
245
 *
 * Returns 0 on a successful allocation of the metadata blocks in the
 * block group.
246
 */
247
static int ext4_alloc_group_tables(struct super_block *sb,
248
249
250
251
252
253
254
255
256
257
258
259
260
				struct ext4_new_flex_group_data *flex_gd,
				int flexbg_size)
{
	struct ext4_new_group_data *group_data = flex_gd->groups;
	ext4_fsblk_t start_blk;
	ext4_fsblk_t last_blk;
	ext4_group_t src_group;
	ext4_group_t bb_index = 0;
	ext4_group_t ib_index = 0;
	ext4_group_t it_index = 0;
	ext4_group_t group;
	ext4_group_t last_group;
	unsigned overhead;
261
	__u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0;
262
263
264
265
266
267
268
269
270
271

	BUG_ON(flex_gd->count == 0 || group_data == NULL);

	src_group = group_data[0].group;
	last_group  = src_group + flex_gd->count - 1;

	BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) !=
	       (last_group & ~(flexbg_size - 1))));
next_group:
	group = group_data[0].group;
272
273
	if (src_group >= group_data[0].group + flex_gd->count)
		return -ENOSPC;
274
275
276
	start_blk = ext4_group_first_block_no(sb, src_group);
	last_blk = start_blk + group_data[src_group - group].blocks_count;

277
	overhead = ext4_group_overhead_blocks(sb, src_group);
278
279
280
281
282

	start_blk += overhead;

	/* We collect contiguous blocks as much as possible. */
	src_group++;
283
284
	for (; src_group <= last_group; src_group++) {
		overhead = ext4_group_overhead_blocks(sb, src_group);
285
		if (overhead == 0)
286
287
288
			last_blk += group_data[src_group - group].blocks_count;
		else
			break;
289
	}
290
291
292
293
294
295

	/* Allocate block bitmaps */
	for (; bb_index < flex_gd->count; bb_index++) {
		if (start_blk >= last_blk)
			goto next_group;
		group_data[bb_index].block_bitmap = start_blk++;
296
		group = ext4_get_group_number(sb, start_blk - 1);
297
298
		group -= group_data[0].group;
		group_data[group].free_blocks_count--;
299
		flex_gd->bg_flags[group] &= uninit_mask;
300
301
302
303
304
305
306
	}

	/* Allocate inode bitmaps */
	for (; ib_index < flex_gd->count; ib_index++) {
		if (start_blk >= last_blk)
			goto next_group;
		group_data[ib_index].inode_bitmap = start_blk++;
307
		group = ext4_get_group_number(sb, start_blk - 1);
308
309
		group -= group_data[0].group;
		group_data[group].free_blocks_count--;
310
		flex_gd->bg_flags[group] &= uninit_mask;
311
312
313
314
	}

	/* Allocate inode tables */
	for (; it_index < flex_gd->count; it_index++) {
315
316
317
318
		unsigned int itb = EXT4_SB(sb)->s_itb_per_group;
		ext4_fsblk_t next_group_start;

		if (start_blk + itb > last_blk)
319
320
			goto next_group;
		group_data[it_index].inode_table = start_blk;
321
322
		group = ext4_get_group_number(sb, start_blk);
		next_group_start = ext4_group_first_block_no(sb, group + 1);
323
324
		group -= group_data[0].group;

325
326
327
328
329
330
331
332
333
		if (start_blk + itb > next_group_start) {
			flex_gd->bg_flags[group + 1] &= uninit_mask;
			overhead = start_blk + itb - next_group_start;
			group_data[group + 1].free_blocks_count -= overhead;
			itb -= overhead;
		}

		group_data[group].free_blocks_count -= itb;
		flex_gd->bg_flags[group] &= uninit_mask;
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
		start_blk += EXT4_SB(sb)->s_itb_per_group;
	}

	if (test_opt(sb, DEBUG)) {
		int i;
		group = group_data[0].group;

		printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
		       "%d groups, flexbg size is %d:\n", flex_gd->count,
		       flexbg_size);

		for (i = 0; i < flex_gd->count; i++) {
			printk(KERN_DEBUG "adding %s group %u: %u "
			       "blocks (%d free)\n",
			       ext4_bg_has_super(sb, group + i) ? "normal" :
			       "no-super", group + i,
			       group_data[i].blocks_count,
			       group_data[i].free_blocks_count);
		}
	}
354
	return 0;
355
356
}

357
static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
358
				  ext4_fsblk_t blk)
359
360
361
362
363
{
	struct buffer_head *bh;
	int err;

	bh = sb_getblk(sb, blk);
364
	if (unlikely(!bh))
365
		return ERR_PTR(-ENOMEM);
366
	BUFFER_TRACE(bh, "get_write_access");
367
	if ((err = ext4_journal_get_write_access(handle, bh))) {
368
369
370
371
372
373
374
375
376
377
		brelse(bh);
		bh = ERR_PTR(err);
	} else {
		memset(bh->b_data, 0, sb->s_blocksize);
		set_buffer_uptodate(bh);
	}

	return bh;
}

378
379
380
381
382
/*
 * If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA.
 * If that fails, restart the transaction & regain write access for the
 * buffer head which is used for block_bitmap modifications.
 */
383
static int extend_or_restart_transaction(handle_t *handle, int thresh)
384
385
386
{
	int err;

387
	if (ext4_handle_has_enough_credits(handle, thresh))
388
389
390
391
392
393
		return 0;

	err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
	if (err < 0)
		return err;
	if (err) {
394
395
		err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA);
		if (err)
396
			return err;
397
	}
398
399
400
401

	return 0;
}

402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
/*
 * set_flexbg_block_bitmap() mark @count blocks starting from @block used.
 *
 * Helper function for ext4_setup_new_group_blocks() which set .
 *
 * @sb: super block
 * @handle: journal handle
 * @flex_gd: flex group data
 */
static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
			struct ext4_new_flex_group_data *flex_gd,
			ext4_fsblk_t block, ext4_group_t count)
{
	ext4_group_t count2;

	ext4_debug("mark blocks [%llu/%u] used\n", block, count);
	for (count2 = count; count > 0; count -= count2, block += count2) {
		ext4_fsblk_t start;
		struct buffer_head *bh;
		ext4_group_t group;
		int err;

424
		group = ext4_get_group_number(sb, block);
425
426
427
		start = ext4_group_first_block_no(sb, group);
		group -= flex_gd->groups[0].group;

428
		count2 = EXT4_BLOCKS_PER_GROUP(sb) - (block - start);
429
430
431
432
433
434
435
436
437
438
439
440
441
		if (count2 > count)
			count2 = count;

		if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) {
			BUG_ON(flex_gd->count > 1);
			continue;
		}

		err = extend_or_restart_transaction(handle, 1);
		if (err)
			return err;

		bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
442
		if (unlikely(!bh))
443
			return -ENOMEM;
444

445
		BUFFER_TRACE(bh, "get_write_access");
446
		err = ext4_journal_get_write_access(handle, bh);
447
448
		if (err) {
			brelse(bh);
449
			return err;
450
		}
451
452
453
454
455
		ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block,
			   block - start, count2);
		ext4_set_bits(bh->b_data, block - start, count2);

		err = ext4_handle_dirty_metadata(handle, NULL, bh);
456
		brelse(bh);
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
		if (unlikely(err))
			return err;
	}

	return 0;
}

/*
 * Set up the block and inode bitmaps, and the inode table for the new groups.
 * This doesn't need to be part of the main transaction, since we are only
 * changing blocks outside the actual filesystem.  We still do journaling to
 * ensure the recovery is correct in case of a failure just after resize.
 * If any part of this fails, we simply abort the resize.
 *
 * setup_new_flex_group_blocks handles a flex group as follow:
 *  1. copy super block and GDT, and initialize group tables if necessary.
 *     In this step, we only set bits in blocks bitmaps for blocks taken by
 *     super block and GDT.
 *  2. allocate group tables in block bitmaps, that is, set bits in block
 *     bitmap for blocks taken by group tables.
 */
static int setup_new_flex_group_blocks(struct super_block *sb,
				struct ext4_new_flex_group_data *flex_gd)
{
	int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group};
	ext4_fsblk_t start;
	ext4_fsblk_t block;
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	struct ext4_super_block *es = sbi->s_es;
	struct ext4_new_group_data *group_data = flex_gd->groups;
	__u16 *bg_flags = flex_gd->bg_flags;
	handle_t *handle;
	ext4_group_t group, count;
	struct buffer_head *bh = NULL;
	int reserved_gdb, i, j, err = 0, err2;
492
	int meta_bg;
493
494
495
496
497

	BUG_ON(!flex_gd->count || !group_data ||
	       group_data[0].group != sbi->s_groups_count);

	reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
498
	meta_bg = ext4_has_feature_meta_bg(sb);
499
500

	/* This transaction may be extended/restarted along the way */
501
	handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
502
503
504
505
506
507
	if (IS_ERR(handle))
		return PTR_ERR(handle);

	group = group_data[0].group;
	for (i = 0; i < flex_gd->count; i++, group++) {
		unsigned long gdblocks;
508
		ext4_grpblk_t overhead;
509
510
511
512

		gdblocks = ext4_bg_num_gdb(sb, group);
		start = ext4_group_first_block_no(sb, group);

513
		if (meta_bg == 0 && !ext4_bg_has_super(sb, group))
514
515
			goto handle_itb;

516
517
518
519
520
521
522
523
524
		if (meta_bg == 1) {
			ext4_group_t first_group;
			first_group = ext4_meta_bg_first_group(sb, group);
			if (first_group != group + 1 &&
			    first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1)
				goto handle_itb;
		}

		block = start + ext4_bg_has_super(sb, group);
525
		/* Copy all of the GDT blocks into the backup in this group */
526
		for (j = 0; j < gdblocks; j++, block++) {
527
528
529
530
531
532
533
534
			struct buffer_head *gdb;

			ext4_debug("update backup group %#04llx\n", block);
			err = extend_or_restart_transaction(handle, 1);
			if (err)
				goto out;

			gdb = sb_getblk(sb, block);
535
			if (unlikely(!gdb)) {
536
				err = -ENOMEM;
537
538
539
				goto out;
			}

540
			BUFFER_TRACE(gdb, "get_write_access");
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
			err = ext4_journal_get_write_access(handle, gdb);
			if (err) {
				brelse(gdb);
				goto out;
			}
			memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
			       gdb->b_size);
			set_buffer_uptodate(gdb);

			err = ext4_handle_dirty_metadata(handle, NULL, gdb);
			if (unlikely(err)) {
				brelse(gdb);
				goto out;
			}
			brelse(gdb);
		}

		/* Zero out all of the reserved backup group descriptor
		 * table blocks
		 */
		if (ext4_bg_has_super(sb, group)) {
			err = sb_issue_zeroout(sb, gdblocks + start + 1,
					reserved_gdb, GFP_NOFS);
			if (err)
				goto out;
		}

568
handle_itb:
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
		/* Initialize group tables of the grop @group */
		if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
			goto handle_bb;

		/* Zero out all of the inode table blocks */
		block = group_data[i].inode_table;
		ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
			   block, sbi->s_itb_per_group);
		err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
				       GFP_NOFS);
		if (err)
			goto out;

handle_bb:
		if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT)
			goto handle_ib;

		/* Initialize block bitmap of the @group */
		block = group_data[i].block_bitmap;
		err = extend_or_restart_transaction(handle, 1);
		if (err)
			goto out;

		bh = bclean(handle, sb, block);
		if (IS_ERR(bh)) {
			err = PTR_ERR(bh);
			goto out;
		}
597
598
		overhead = ext4_group_overhead_blocks(sb, group);
		if (overhead != 0) {
599
600
			ext4_debug("mark backup superblock %#04llx (+0)\n",
				   start);
601
			ext4_set_bits(bh->b_data, 0, overhead);
602
603
604
605
		}
		ext4_mark_bitmap_end(group_data[i].blocks_count,
				     sb->s_blocksize * 8, bh->b_data);
		err = ext4_handle_dirty_metadata(handle, NULL, bh);
606
		brelse(bh);
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
		if (err)
			goto out;

handle_ib:
		if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
			continue;

		/* Initialize inode bitmap of the @group */
		block = group_data[i].inode_bitmap;
		err = extend_or_restart_transaction(handle, 1);
		if (err)
			goto out;
		/* Mark unused entries in inode bitmap used */
		bh = bclean(handle, sb, block);
		if (IS_ERR(bh)) {
			err = PTR_ERR(bh);
			goto out;
		}

		ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
				     sb->s_blocksize * 8, bh->b_data);
		err = ext4_handle_dirty_metadata(handle, NULL, bh);
629
		brelse(bh);
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
		if (err)
			goto out;
	}

	/* Mark group tables in block bitmap */
	for (j = 0; j < GROUP_TABLE_COUNT; j++) {
		count = group_table_count[j];
		start = (&group_data[0].block_bitmap)[j];
		block = start;
		for (i = 1; i < flex_gd->count; i++) {
			block += group_table_count[j];
			if (block == (&group_data[i].block_bitmap)[j]) {
				count += group_table_count[j];
				continue;
			}
			err = set_flexbg_block_bitmap(sb, handle,
						flex_gd, start, count);
			if (err)
				goto out;
			count = group_table_count[j];
650
			start = (&group_data[i].block_bitmap)[j];
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
			block = start;
		}

		if (count) {
			err = set_flexbg_block_bitmap(sb, handle,
						flex_gd, start, count);
			if (err)
				goto out;
		}
	}

out:
	err2 = ext4_journal_stop(handle);
	if (err2 && !err)
		err = err2;

	return err;
}

670
671
/*
 * Iterate through the groups which hold BACKUP superblock/GDT copies in an
672
 * ext4 filesystem.  The counters should be initialized to 1, 5, and 7 before
673
674
675
676
 * calling this for the first time.  In a sparse filesystem it will be the
 * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
 * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
 */
677
static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
678
679
680
681
682
683
				  unsigned *five, unsigned *seven)
{
	unsigned *min = three;
	int mult = 3;
	unsigned ret;

684
	if (!ext4_has_feature_sparse_super(sb)) {
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
		ret = *min;
		*min += 1;
		return ret;
	}

	if (*five < *min) {
		min = five;
		mult = 5;
	}
	if (*seven < *min) {
		min = seven;
		mult = 7;
	}

	ret = *min;
	*min *= mult;

	return ret;
}

/*
 * Check that all of the backup GDT blocks are held in the primary GDT block.
 * It is assumed that they are stored in group order.  Returns the number of
 * groups in current filesystem that have BACKUPS, or -ve error code.
 */
static int verify_reserved_gdb(struct super_block *sb,
711
			       ext4_group_t end,
712
713
			       struct buffer_head *primary)
{
714
	const ext4_fsblk_t blk = primary->b_blocknr;
715
716
717
718
719
720
721
	unsigned three = 1;
	unsigned five = 5;
	unsigned seven = 7;
	unsigned grp;
	__le32 *p = (__le32 *)primary->b_data;
	int gdbackups = 0;

722
	while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
Laurent Vivier's avatar
Laurent Vivier committed
723
724
		if (le32_to_cpu(*p++) !=
		    grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
725
			ext4_warning(sb, "reserved GDT %llu"
726
				     " missing grp %d (%llu)",
727
				     blk, grp,
Laurent Vivier's avatar
Laurent Vivier committed
728
729
730
				     grp *
				     (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
				     blk);
731
732
			return -EINVAL;
		}
733
		if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb))
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
			return -EFBIG;
	}

	return gdbackups;
}

/*
 * Called when we need to bring a reserved group descriptor table block into
 * use from the resize inode.  The primary copy of the new GDT block currently
 * is an indirect block (under the double indirect block in the resize inode).
 * The new backup GDT blocks will be stored as leaf blocks in this indirect
 * block, in group order.  Even though we know all the block numbers we need,
 * we check to ensure that the resize inode has actually reserved these blocks.
 *
 * Don't need to update the block bitmaps because the blocks are still in use.
 *
 * We get all of the error cases out of the way, so that we are sure to not
 * fail once we start modifying the data on disk, because JBD has no rollback.
 */
static int add_new_gdb(handle_t *handle, struct inode *inode,
754
		       ext4_group_t group)
755
756
{
	struct super_block *sb = inode->i_sb;
757
	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
758
	unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
759
	ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
760
761
	struct buffer_head **o_group_desc, **n_group_desc;
	struct buffer_head *dind;
762
	struct buffer_head *gdb_bh;
763
	int gdbackups;
764
	struct ext4_iloc iloc;
765
766
767
768
769
	__le32 *data;
	int err;

	if (test_opt(sb, DEBUG))
		printk(KERN_DEBUG
770
		       "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
771
772
		       gdb_num);

773
774
	gdb_bh = sb_bread(sb, gdblock);
	if (!gdb_bh)
775
776
		return -EIO;

777
	gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
778
	if (gdbackups < 0) {
779
780
781
782
		err = gdbackups;
		goto exit_bh;
	}

783
	data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
784
785
786
787
788
789
790
	dind = sb_bread(sb, le32_to_cpu(*data));
	if (!dind) {
		err = -EIO;
		goto exit_bh;
	}

	data = (__le32 *)dind->b_data;
791
	if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
792
		ext4_warning(sb, "new group %u GDT block %llu not reserved",
793
			     group, gdblock);
794
795
796
797
		err = -EINVAL;
		goto exit_dind;
	}

798
	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
799
800
	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
	if (unlikely(err))
801
802
		goto exit_dind;

803
	BUFFER_TRACE(gdb_bh, "get_write_access");
804
	err = ext4_journal_get_write_access(handle, gdb_bh);
805
	if (unlikely(err))
806
		goto exit_dind;
807

808
	BUFFER_TRACE(dind, "get_write_access");
809
810
811
	err = ext4_journal_get_write_access(handle, dind);
	if (unlikely(err))
		ext4_std_error(sb, err);
812

813
	/* ext4_reserve_inode_write() gets a reference on the iloc */
814
815
	err = ext4_reserve_inode_write(handle, inode, &iloc);
	if (unlikely(err))
816
		goto exit_dind;
817

818
819
820
	n_group_desc = ext4_kvmalloc((gdb_num + 1) *
				     sizeof(struct buffer_head *),
				     GFP_NOFS);
821
822
	if (!n_group_desc) {
		err = -ENOMEM;
823
824
		ext4_warning(sb, "not enough memory for %lu groups",
			     gdb_num + 1);
825
826
827
828
829
830
831
832
833
834
835
836
		goto exit_inode;
	}

	/*
	 * Finally, we have all of the possible failures behind us...
	 *
	 * Remove new GDT block from inode double-indirect block and clear out
	 * the new GDT block for use (which also "frees" the backup GDT blocks
	 * from the reserved inode).  We don't need to change the bitmaps for
	 * these blocks, because they are marked as in-use from being in the
	 * reserved inode, and will become GDT blocks (primary and backup).
	 */
837
	data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
838
839
840
841
842
	err = ext4_handle_dirty_metadata(handle, NULL, dind);
	if (unlikely(err)) {
		ext4_std_error(sb, err);
		goto exit_inode;
	}
843
	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
844
	ext4_mark_iloc_dirty(handle, inode, &iloc);
845
846
	memset(gdb_bh->b_data, 0, sb->s_blocksize);
	err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
847
848
849
850
851
	if (unlikely(err)) {
		ext4_std_error(sb, err);
		goto exit_inode;
	}
	brelse(dind);
852

853
	o_group_desc = EXT4_SB(sb)->s_group_desc;
854
	memcpy(n_group_desc, o_group_desc,
855
	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
856
	n_group_desc[gdb_num] = gdb_bh;
857
858
	EXT4_SB(sb)->s_group_desc = n_group_desc;
	EXT4_SB(sb)->s_gdb_count++;
Al Viro's avatar
Al Viro committed
859
	kvfree(o_group_desc);
860

Marcin Slusarz's avatar
Marcin Slusarz committed
861
	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
862
	err = ext4_handle_dirty_super(handle, sb);
863
864
	if (err)
		ext4_std_error(sb, err);
865

866
	return err;
867
868

exit_inode:
Al Viro's avatar
Al Viro committed
869
	kvfree(n_group_desc);
870
871
872
873
	brelse(iloc.bh);
exit_dind:
	brelse(dind);
exit_bh:
874
	brelse(gdb_bh);
875

876
	ext4_debug("leaving with error %d\n", err);
877
878
879
	return err;
}

880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
/*
 * add_new_gdb_meta_bg is the sister of add_new_gdb.
 */
static int add_new_gdb_meta_bg(struct super_block *sb,
			       handle_t *handle, ext4_group_t group) {
	ext4_fsblk_t gdblock;
	struct buffer_head *gdb_bh;
	struct buffer_head **o_group_desc, **n_group_desc;
	unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
	int err;

	gdblock = ext4_meta_bg_first_block_no(sb, group) +
		   ext4_bg_has_super(sb, group);
	gdb_bh = sb_bread(sb, gdblock);
	if (!gdb_bh)
		return -EIO;
	n_group_desc = ext4_kvmalloc((gdb_num + 1) *
				     sizeof(struct buffer_head *),
				     GFP_NOFS);
	if (!n_group_desc) {
900
		brelse(gdb_bh);
901
902
903
904
905
906
907
908
909
910
911
912
		err = -ENOMEM;
		ext4_warning(sb, "not enough memory for %lu groups",
			     gdb_num + 1);
		return err;
	}

	o_group_desc = EXT4_SB(sb)->s_group_desc;
	memcpy(n_group_desc, o_group_desc,
	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
	n_group_desc[gdb_num] = gdb_bh;
	EXT4_SB(sb)->s_group_desc = n_group_desc;
	EXT4_SB(sb)->s_gdb_count++;
Al Viro's avatar
Al Viro committed
913
	kvfree(o_group_desc);
914
	BUFFER_TRACE(gdb_bh, "get_write_access");
915
916
917
918
	err = ext4_journal_get_write_access(handle, gdb_bh);
	return err;
}

919
920
921
922
923
924
925
926
927
928
929
930
931
932
/*
 * Called when we are adding a new group which has a backup copy of each of
 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
 * We need to add these reserved backup GDT blocks to the resize inode, so
 * that they are kept for future resizing and not allocated to files.
 *
 * Each reserved backup GDT block will go into a different indirect block.
 * The indirect blocks are actually the primary reserved GDT blocks,
 * so we know in advance what their block numbers are.  We only get the
 * double-indirect block to verify it is pointing to the primary reserved
 * GDT blocks so we don't overwrite a data block by accident.  The reserved
 * backup GDT blocks are stored in their reserved primary GDT block.
 */
static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
933
			      ext4_group_t group)
934
935
{
	struct super_block *sb = inode->i_sb;
936
	int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
937
938
	struct buffer_head **primary;
	struct buffer_head *dind;
939
940
	struct ext4_iloc iloc;
	ext4_fsblk_t blk;
941
942
943
944
945
	__le32 *data, *end;
	int gdbackups = 0;
	int res, i;
	int err;

946
	primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
947
948
949
	if (!primary)
		return -ENOMEM;

950
	data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
951
952
953
954
955
956
	dind = sb_bread(sb, le32_to_cpu(*data));
	if (!dind) {
		err = -EIO;
		goto exit_free;
	}

957
	blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
Josef Bacik's avatar
Josef Bacik committed
958
959
	data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count %
					 EXT4_ADDR_PER_BLOCK(sb));
960
	end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
961
962
963
964

	/* Get each reserved primary GDT block and verify it holds backups */
	for (res = 0; res < reserved_gdb; res++, blk++) {
		if (le32_to_cpu(*data) != blk) {
965
			ext4_warning(sb, "reserved block %llu"
966
967
968
969
970
971
972
973
974
975
976
				     " not at offset %ld",
				     blk,
				     (long)(data - (__le32 *)dind->b_data));
			err = -EINVAL;
			goto exit_bh;
		}
		primary[res] = sb_bread(sb, blk);
		if (!primary[res]) {
			err = -EIO;
			goto exit_bh;
		}
977
978
		gdbackups = verify_reserved_gdb(sb, group, primary[res]);
		if (gdbackups < 0) {
979
980
981
982
983
984
985
986
987
			brelse(primary[res]);
			err = gdbackups;
			goto exit_bh;
		}
		if (++data >= end)
			data = (__le32 *)dind->b_data;
	}

	for (i = 0; i < reserved_gdb; i++) {
988
		BUFFER_TRACE(primary[i], "get_write_access");
989
		if ((err = ext4_journal_get_write_access(handle, primary[i])))
990
991
992
			goto exit_bh;
	}

993
	if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
994
995
996
997
998
999
		goto exit_bh;

	/*
	 * Finally we can add each of the reserved backup GDT blocks from
	 * the new group to its reserved primary GDT block.
	 */
1000
	blk = group * EXT4_BLOCKS_PER_GROUP(sb);