genhd.c 45.5 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
/*
 *  gendisk handling
 */

#include <linux/module.h>
#include <linux/fs.h>
#include <linux/genhd.h>
8
#include <linux/kdev_t.h>
Linus Torvalds's avatar
Linus Torvalds committed
9
10
#include <linux/kernel.h>
#include <linux/blkdev.h>
11
#include <linux/backing-dev.h>
Linus Torvalds's avatar
Linus Torvalds committed
12
13
#include <linux/init.h>
#include <linux/spinlock.h>
14
#include <linux/proc_fs.h>
Linus Torvalds's avatar
Linus Torvalds committed
15
16
17
18
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/kobj_map.h>
19
#include <linux/mutex.h>
20
#include <linux/idr.h>
21
#include <linux/log2.h>
22
#include <linux/pm_runtime.h>
23
#include <linux/badblocks.h>
Linus Torvalds's avatar
Linus Torvalds committed
24

25
26
#include "blk.h"

27
28
static DEFINE_MUTEX(block_class_lock);
struct kobject *block_depr;
Linus Torvalds's avatar
Linus Torvalds committed
29

30
/* for extended dynamic devt allocation, currently only one major is used */
31
#define NR_EXT_DEVT		(1 << MINORBITS)
32

33
/* For extended devt allocation.  ext_devt_lock prevents look up
34
35
 * results from going away underneath its user.
 */
36
static DEFINE_SPINLOCK(ext_devt_lock);
37
38
static DEFINE_IDR(ext_devt_idr);

Adrian Bunk's avatar
Adrian Bunk committed
39
40
static struct device_type disk_type;

Derek Basehore's avatar
Derek Basehore committed
41
42
static void disk_check_events(struct disk_events *ev,
			      unsigned int *clearing_ptr);
43
static void disk_alloc_events(struct gendisk *disk);
44
45
46
47
static void disk_add_events(struct gendisk *disk);
static void disk_del_events(struct gendisk *disk);
static void disk_release_events(struct gendisk *disk);

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
/**
 * disk_get_part - get partition
 * @disk: disk to look partition from
 * @partno: partition number
 *
 * Look for partition @partno from @disk.  If found, increment
 * reference count and return it.
 *
 * CONTEXT:
 * Don't care.
 *
 * RETURNS:
 * Pointer to the found partition on success, NULL if not found.
 */
struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
{
64
65
	struct hd_struct *part = NULL;
	struct disk_part_tbl *ptbl;
66

67
	if (unlikely(partno < 0))
68
		return NULL;
69

70
	rcu_read_lock();
71
72
73
74
75
76
77
78

	ptbl = rcu_dereference(disk->part_tbl);
	if (likely(partno < ptbl->len)) {
		part = rcu_dereference(ptbl->part[partno]);
		if (part)
			get_device(part_to_dev(part));
	}

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
	rcu_read_unlock();

	return part;
}
EXPORT_SYMBOL_GPL(disk_get_part);

/**
 * disk_part_iter_init - initialize partition iterator
 * @piter: iterator to initialize
 * @disk: disk to iterate over
 * @flags: DISK_PITER_* flags
 *
 * Initialize @piter so that it iterates over partitions of @disk.
 *
 * CONTEXT:
 * Don't care.
 */
void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
			  unsigned int flags)
{
99
100
101
102
103
	struct disk_part_tbl *ptbl;

	rcu_read_lock();
	ptbl = rcu_dereference(disk->part_tbl);

104
105
106
107
	piter->disk = disk;
	piter->part = NULL;

	if (flags & DISK_PITER_REVERSE)
108
		piter->idx = ptbl->len - 1;
109
	else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
110
		piter->idx = 0;
Tejun Heo's avatar
Tejun Heo committed
111
112
	else
		piter->idx = 1;
113
114

	piter->flags = flags;
115
116

	rcu_read_unlock();
117
118
119
120
121
122
123
124
125
126
127
128
129
130
}
EXPORT_SYMBOL_GPL(disk_part_iter_init);

/**
 * disk_part_iter_next - proceed iterator to the next partition and return it
 * @piter: iterator of interest
 *
 * Proceed @piter to the next partition and return it.
 *
 * CONTEXT:
 * Don't care.
 */
struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
{
131
	struct disk_part_tbl *ptbl;
132
133
134
135
136
137
	int inc, end;

	/* put the last partition */
	disk_put_part(piter->part);
	piter->part = NULL;

138
	/* get part_tbl */
139
	rcu_read_lock();
140
	ptbl = rcu_dereference(piter->disk->part_tbl);
141
142
143
144

	/* determine iteration parameters */
	if (piter->flags & DISK_PITER_REVERSE) {
		inc = -1;
145
146
		if (piter->flags & (DISK_PITER_INCL_PART0 |
				    DISK_PITER_INCL_EMPTY_PART0))
Tejun Heo's avatar
Tejun Heo committed
147
148
149
			end = -1;
		else
			end = 0;
150
151
	} else {
		inc = 1;
152
		end = ptbl->len;
153
154
155
156
157
158
	}

	/* iterate to the next partition */
	for (; piter->idx != end; piter->idx += inc) {
		struct hd_struct *part;

159
		part = rcu_dereference(ptbl->part[piter->idx]);
160
161
		if (!part)
			continue;
162
		if (!part_nr_sects_read(part) &&
163
164
165
		    !(piter->flags & DISK_PITER_INCL_EMPTY) &&
		    !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
		      piter->idx == 0))
166
167
			continue;

168
		get_device(part_to_dev(part));
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
		piter->part = part;
		piter->idx += inc;
		break;
	}

	rcu_read_unlock();

	return piter->part;
}
EXPORT_SYMBOL_GPL(disk_part_iter_next);

/**
 * disk_part_iter_exit - finish up partition iteration
 * @piter: iter of interest
 *
 * Called when iteration is over.  Cleans up @piter.
 *
 * CONTEXT:
 * Don't care.
 */
void disk_part_iter_exit(struct disk_part_iter *piter)
{
	disk_put_part(piter->part);
	piter->part = NULL;
}
EXPORT_SYMBOL_GPL(disk_part_iter_exit);

196
197
198
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
{
	return part->start_sect <= sector &&
199
		sector < part->start_sect + part_nr_sects_read(part);
200
201
}

202
203
204
205
206
207
208
209
210
211
212
213
214
/**
 * disk_map_sector_rcu - map sector to partition
 * @disk: gendisk of interest
 * @sector: sector to map
 *
 * Find out which partition @sector maps to on @disk.  This is
 * primarily used for stats accounting.
 *
 * CONTEXT:
 * RCU read locked.  The returned partition pointer is valid only
 * while preemption is disabled.
 *
 * RETURNS:
215
 * Found partition on success, part0 is returned if no partition matches
216
217
218
 */
struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
{
219
	struct disk_part_tbl *ptbl;
220
	struct hd_struct *part;
221
222
	int i;

223
224
	ptbl = rcu_dereference(disk->part_tbl);

225
226
227
228
	part = rcu_dereference(ptbl->last_lookup);
	if (part && sector_in_part(part, sector))
		return part;

229
	for (i = 1; i < ptbl->len; i++) {
230
		part = rcu_dereference(ptbl->part[i]);
231

232
233
		if (part && sector_in_part(part, sector)) {
			rcu_assign_pointer(ptbl->last_lookup, part);
234
			return part;
235
		}
236
	}
237
	return &disk->part0;
238
239
240
}
EXPORT_SYMBOL_GPL(disk_map_sector_rcu);

Linus Torvalds's avatar
Linus Torvalds committed
241
242
243
244
245
246
247
248
/*
 * Can be deleted altogether. Later.
 *
 */
static struct blk_major_name {
	struct blk_major_name *next;
	int major;
	char name[16];
249
} *major_names[BLKDEV_MAJOR_HASH_SIZE];
Linus Torvalds's avatar
Linus Torvalds committed
250
251

/* index in the above - for now: assume no multimajor ranges */
252
static inline int major_to_index(unsigned major)
Linus Torvalds's avatar
Linus Torvalds committed
253
{
254
	return major % BLKDEV_MAJOR_HASH_SIZE;
255
256
}

257
#ifdef CONFIG_PROC_FS
258
void blkdev_show(struct seq_file *seqf, off_t offset)
259
{
260
	struct blk_major_name *dp;
261

262
	if (offset < BLKDEV_MAJOR_HASH_SIZE) {
263
		mutex_lock(&block_class_lock);
264
		for (dp = major_names[offset]; dp; dp = dp->next)
265
			seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
266
		mutex_unlock(&block_class_lock);
Linus Torvalds's avatar
Linus Torvalds committed
267
268
	}
}
269
#endif /* CONFIG_PROC_FS */
Linus Torvalds's avatar
Linus Torvalds committed
270

271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
/**
 * register_blkdev - register a new block device
 *
 * @major: the requested major device number [1..255]. If @major=0, try to
 *         allocate any unused major number.
 * @name: the name of the new block device as a zero terminated string
 *
 * The @name must be unique within the system.
 *
 * The return value depends on the @major input parameter.
 *  - if a major device number was requested in range [1..255] then the
 *    function returns zero on success, or a negative error code
 *  - if any unused major number was requested with @major=0 parameter
 *    then the return value is the allocated major number in range
 *    [1..255] or a negative error code otherwise
 */
Linus Torvalds's avatar
Linus Torvalds committed
287
288
289
290
291
int register_blkdev(unsigned int major, const char *name)
{
	struct blk_major_name **n, *p;
	int index, ret = 0;

292
	mutex_lock(&block_class_lock);
Linus Torvalds's avatar
Linus Torvalds committed
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336

	/* temporary */
	if (major == 0) {
		for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
			if (major_names[index] == NULL)
				break;
		}

		if (index == 0) {
			printk("register_blkdev: failed to get major for %s\n",
			       name);
			ret = -EBUSY;
			goto out;
		}
		major = index;
		ret = major;
	}

	p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
	if (p == NULL) {
		ret = -ENOMEM;
		goto out;
	}

	p->major = major;
	strlcpy(p->name, name, sizeof(p->name));
	p->next = NULL;
	index = major_to_index(major);

	for (n = &major_names[index]; *n; n = &(*n)->next) {
		if ((*n)->major == major)
			break;
	}
	if (!*n)
		*n = p;
	else
		ret = -EBUSY;

	if (ret < 0) {
		printk("register_blkdev: cannot get major %d for %s\n",
		       major, name);
		kfree(p);
	}
out:
337
	mutex_unlock(&block_class_lock);
Linus Torvalds's avatar
Linus Torvalds committed
338
339
340
341
342
	return ret;
}

EXPORT_SYMBOL(register_blkdev);

343
void unregister_blkdev(unsigned int major, const char *name)
Linus Torvalds's avatar
Linus Torvalds committed
344
345
346
347
348
{
	struct blk_major_name **n;
	struct blk_major_name *p = NULL;
	int index = major_to_index(major);

349
	mutex_lock(&block_class_lock);
Linus Torvalds's avatar
Linus Torvalds committed
350
351
352
	for (n = &major_names[index]; *n; n = &(*n)->next)
		if ((*n)->major == major)
			break;
353
354
355
	if (!*n || strcmp((*n)->name, name)) {
		WARN_ON(1);
	} else {
Linus Torvalds's avatar
Linus Torvalds committed
356
357
358
		p = *n;
		*n = p->next;
	}
359
	mutex_unlock(&block_class_lock);
Linus Torvalds's avatar
Linus Torvalds committed
360
361
362
363
364
365
366
	kfree(p);
}

EXPORT_SYMBOL(unregister_blkdev);

static struct kobj_map *bdev_map;

367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
/**
 * blk_mangle_minor - scatter minor numbers apart
 * @minor: minor number to mangle
 *
 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
 * is enabled.  Mangling twice gives the original value.
 *
 * RETURNS:
 * Mangled value.
 *
 * CONTEXT:
 * Don't care.
 */
static int blk_mangle_minor(int minor)
{
#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
	int i;

	for (i = 0; i < MINORBITS / 2; i++) {
		int low = minor & (1 << i);
		int high = minor & (1 << (MINORBITS - 1 - i));
		int distance = MINORBITS - 1 - 2 * i;

		minor ^= low | high;	/* clear both bits */
		low <<= distance;	/* swap the positions */
		high >>= distance;
		minor |= low | high;	/* and set */
	}
#endif
	return minor;
}

399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
/**
 * blk_alloc_devt - allocate a dev_t for a partition
 * @part: partition to allocate dev_t for
 * @devt: out parameter for resulting dev_t
 *
 * Allocate a dev_t for block device.
 *
 * RETURNS:
 * 0 on success, allocated dev_t is returned in *@devt.  -errno on
 * failure.
 *
 * CONTEXT:
 * Might sleep.
 */
int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
{
	struct gendisk *disk = part_to_disk(part);
Tejun Heo's avatar
Tejun Heo committed
416
	int idx;
417
418
419
420
421
422
423
424

	/* in consecutive minor range? */
	if (part->partno < disk->minors) {
		*devt = MKDEV(disk->major, disk->first_minor + part->partno);
		return 0;
	}

	/* allocate ext devt */
425
426
	idr_preload(GFP_KERNEL);

427
	spin_lock_bh(&ext_devt_lock);
428
	idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
429
	spin_unlock_bh(&ext_devt_lock);
430
431

	idr_preload_end();
Tejun Heo's avatar
Tejun Heo committed
432
433
	if (idx < 0)
		return idx == -ENOSPC ? -EBUSY : idx;
434

435
	*devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
	return 0;
}

/**
 * blk_free_devt - free a dev_t
 * @devt: dev_t to free
 *
 * Free @devt which was allocated using blk_alloc_devt().
 *
 * CONTEXT:
 * Might sleep.
 */
void blk_free_devt(dev_t devt)
{
	if (devt == MKDEV(0, 0))
		return;

	if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
454
		spin_lock_bh(&ext_devt_lock);
455
		idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
456
		spin_unlock_bh(&ext_devt_lock);
457
458
459
	}
}

460
461
462
463
464
465
466
467
468
469
470
471
static char *bdevt_str(dev_t devt, char *buf)
{
	if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
		char tbuf[BDEVT_SIZE];
		snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
		snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
	} else
		snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));

	return buf;
}

Linus Torvalds's avatar
Linus Torvalds committed
472
473
474
475
476
/*
 * Register device numbers dev..(dev+range-1)
 * range must be nonzero
 * The hash chain is sorted on range, so that subranges can override.
 */
477
void blk_register_region(dev_t devt, unsigned long range, struct module *module,
Linus Torvalds's avatar
Linus Torvalds committed
478
479
480
			 struct kobject *(*probe)(dev_t, int *, void *),
			 int (*lock)(dev_t, void *), void *data)
{
481
	kobj_map(bdev_map, devt, range, module, probe, lock, data);
Linus Torvalds's avatar
Linus Torvalds committed
482
483
484
485
}

EXPORT_SYMBOL(blk_register_region);

486
void blk_unregister_region(dev_t devt, unsigned long range)
Linus Torvalds's avatar
Linus Torvalds committed
487
{
488
	kobj_unmap(bdev_map, devt, range);
Linus Torvalds's avatar
Linus Torvalds committed
489
490
491
492
}

EXPORT_SYMBOL(blk_unregister_region);

493
static struct kobject *exact_match(dev_t devt, int *partno, void *data)
Linus Torvalds's avatar
Linus Torvalds committed
494
495
{
	struct gendisk *p = data;
496

497
	return &disk_to_dev(p)->kobj;
Linus Torvalds's avatar
Linus Torvalds committed
498
499
}

500
static int exact_lock(dev_t devt, void *data)
Linus Torvalds's avatar
Linus Torvalds committed
501
502
503
504
505
506
507
508
{
	struct gendisk *p = data;

	if (!get_disk(p))
		return -1;
	return 0;
}

509
static void register_disk(struct device *parent, struct gendisk *disk)
510
511
512
513
514
515
516
{
	struct device *ddev = disk_to_dev(disk);
	struct block_device *bdev;
	struct disk_part_iter piter;
	struct hd_struct *part;
	int err;

517
	ddev->parent = parent;
518

519
	dev_set_name(ddev, "%s", disk->disk_name);
520
521
522
523
524
525
526
527
528
529
530
531
532
533

	/* delay uevents, until we scanned partition table */
	dev_set_uevent_suppress(ddev, 1);

	if (device_add(ddev))
		return;
	if (!sysfs_deprecated) {
		err = sysfs_create_link(block_depr, &ddev->kobj,
					kobject_name(&ddev->kobj));
		if (err) {
			device_del(ddev);
			return;
		}
	}
534
535
536
537
538
539
540
541

	/*
	 * avoid probable deadlock caused by allocating memory with
	 * GFP_KERNEL in runtime_resume callback of its all ancestor
	 * devices
	 */
	pm_runtime_set_memalloc_noio(ddev, true);

542
543
544
545
	disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
	disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);

	/* No minors to use for partitions */
Tejun Heo's avatar
Tejun Heo committed
546
	if (!disk_part_scan_enabled(disk))
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
		goto exit;

	/* No such device (e.g., media were just removed) */
	if (!get_capacity(disk))
		goto exit;

	bdev = bdget_disk(disk, 0);
	if (!bdev)
		goto exit;

	bdev->bd_invalidated = 1;
	err = blkdev_get(bdev, FMODE_READ, NULL);
	if (err < 0)
		goto exit;
	blkdev_put(bdev, FMODE_READ);

exit:
	/* announce disk after possible partitions are created */
	dev_set_uevent_suppress(ddev, 0);
	kobject_uevent(&ddev->kobj, KOBJ_ADD);

	/* announce possible partitions */
	disk_part_iter_init(&piter, disk, 0);
	while ((part = disk_part_iter_next(&piter)))
		kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
	disk_part_iter_exit(&piter);
}

575
576
577
578
579
580
581
582
583
584
585
586
587
588
void put_disk_devt(struct disk_devt *disk_devt)
{
	if (disk_devt && atomic_dec_and_test(&disk_devt->count))
		disk_devt->release(disk_devt);
}
EXPORT_SYMBOL(put_disk_devt);

void get_disk_devt(struct disk_devt *disk_devt)
{
	if (disk_devt)
		atomic_inc(&disk_devt->count);
}
EXPORT_SYMBOL(get_disk_devt);

Linus Torvalds's avatar
Linus Torvalds committed
589
/**
590
591
 * device_add_disk - add partitioning information to kernel list
 * @parent: parent device for the disk
Linus Torvalds's avatar
Linus Torvalds committed
592
593
594
595
 * @disk: per-device partitioning information
 *
 * This function registers the partitioning information in @disk
 * with the kernel.
596
597
 *
 * FIXME: error handling
Linus Torvalds's avatar
Linus Torvalds committed
598
 */
599
void device_add_disk(struct device *parent, struct gendisk *disk)
Linus Torvalds's avatar
Linus Torvalds committed
600
{
601
	struct backing_dev_info *bdi;
602
	dev_t devt;
603
	int retval;
604

605
606
607
608
609
610
611
	/* minors == 0 indicates to use ext devt from part0 and should
	 * be accompanied with EXT_DEVT flag.  Make sure all
	 * parameters make sense.
	 */
	WARN_ON(disk->minors && !(disk->major || disk->first_minor));
	WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));

Linus Torvalds's avatar
Linus Torvalds committed
612
	disk->flags |= GENHD_FL_UP;
613
614
615
616
617
618
619
620
621
622
623
624
625
626

	retval = blk_alloc_devt(&disk->part0, &devt);
	if (retval) {
		WARN_ON(1);
		return;
	}
	disk_to_dev(disk)->devt = devt;

	/* ->major and ->first_minor aren't supposed to be
	 * dereferenced from here on, but set them just in case.
	 */
	disk->major = MAJOR(devt);
	disk->first_minor = MINOR(devt);

627
628
	disk_alloc_events(disk);

629
630
631
632
633
634
635
	/*
	 * Take a reference on the devt and assign it to queue since it
	 * must not be reallocated while the bdi is registered
	 */
	disk->queue->disk_devt = disk->disk_devt;
	get_disk_devt(disk->disk_devt);

636
	/* Register BDI before referencing it from bdev */
637
	bdi = disk->queue->backing_dev_info;
638
	bdi_register_owner(bdi, disk_to_dev(disk));
639

640
641
	blk_register_region(disk_devt(disk), disk->minors, NULL,
			    exact_match, exact_lock, disk);
642
	register_disk(parent, disk);
Linus Torvalds's avatar
Linus Torvalds committed
643
	blk_register_queue(disk);
644

645
646
647
648
	/*
	 * Take an extra ref on queue which will be put on disk_release()
	 * so that it sticks around as long as @disk is there.
	 */
649
	WARN_ON_ONCE(!blk_get_queue(disk->queue));
650

651
652
	retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
				   "bdi");
653
	WARN_ON(retval);
Linus Torvalds's avatar
Linus Torvalds committed
654

655
	disk_add_events(disk);
656
	blk_integrity_add(disk);
Linus Torvalds's avatar
Linus Torvalds committed
657
}
658
EXPORT_SYMBOL(device_add_disk);
Linus Torvalds's avatar
Linus Torvalds committed
659

660
void del_gendisk(struct gendisk *disk)
Linus Torvalds's avatar
Linus Torvalds committed
661
{
662
663
664
	struct disk_part_iter piter;
	struct hd_struct *part;

665
	blk_integrity_del(disk);
666
667
	disk_del_events(disk);

668
669
670
671
672
	/* invalidate stuff */
	disk_part_iter_init(&piter, disk,
			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
	while ((part = disk_part_iter_next(&piter))) {
		invalidate_partition(disk, part->partno);
673
		bdev_unhash_inode(part_devt(part));
674
675
676
677
678
		delete_partition(disk, part->partno);
	}
	disk_part_iter_exit(&piter);

	invalidate_partition(disk, 0);
679
	bdev_unhash_inode(disk_devt(disk));
680
681
682
	set_capacity(disk, 0);
	disk->flags &= ~GENHD_FL_UP;

683
	sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
Linus Torvalds's avatar
Linus Torvalds committed
684
	blk_unregister_queue(disk);
685
	blk_unregister_region(disk_devt(disk), disk->minors);
686
687
688
689
690
691
692
693

	part_stat_set_all(&disk->part0, 0);
	disk->part0.stamp = 0;

	kobject_put(disk->part0.holder_dir);
	kobject_put(disk->slave_dir);
	if (!sysfs_deprecated)
		sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
694
	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
695
	device_del(disk_to_dev(disk));
Linus Torvalds's avatar
Linus Torvalds committed
696
}
697
EXPORT_SYMBOL(del_gendisk);
Linus Torvalds's avatar
Linus Torvalds committed
698

699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
/* sysfs access to bad-blocks list. */
static ssize_t disk_badblocks_show(struct device *dev,
					struct device_attribute *attr,
					char *page)
{
	struct gendisk *disk = dev_to_disk(dev);

	if (!disk->bb)
		return sprintf(page, "\n");

	return badblocks_show(disk->bb, page, 0);
}

static ssize_t disk_badblocks_store(struct device *dev,
					struct device_attribute *attr,
					const char *page, size_t len)
{
	struct gendisk *disk = dev_to_disk(dev);

	if (!disk->bb)
		return -ENXIO;

	return badblocks_store(disk->bb, page, len, 0);
}

Linus Torvalds's avatar
Linus Torvalds committed
724
725
/**
 * get_gendisk - get partitioning information for a given device
726
 * @devt: device to get partitioning information for
727
 * @partno: returned partition index
Linus Torvalds's avatar
Linus Torvalds committed
728
729
 *
 * This function gets the structure containing partitioning
730
 * information for the given device @devt.
Linus Torvalds's avatar
Linus Torvalds committed
731
 */
732
struct gendisk *get_gendisk(dev_t devt, int *partno)
Linus Torvalds's avatar
Linus Torvalds committed
733
{
734
735
736
737
738
739
740
741
742
743
744
	struct gendisk *disk = NULL;

	if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
		struct kobject *kobj;

		kobj = kobj_lookup(bdev_map, devt, partno);
		if (kobj)
			disk = dev_to_disk(kobj_to_dev(kobj));
	} else {
		struct hd_struct *part;

745
		spin_lock_bh(&ext_devt_lock);
746
		part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
747
748
749
750
		if (part && get_disk(part_to_disk(part))) {
			*partno = part->partno;
			disk = part_to_disk(part);
		}
751
		spin_unlock_bh(&ext_devt_lock);
752
	}
753

754
	return disk;
Linus Torvalds's avatar
Linus Torvalds committed
755
}
756
EXPORT_SYMBOL(get_gendisk);
Linus Torvalds's avatar
Linus Torvalds committed
757

758
759
760
761
762
763
764
765
766
767
768
769
770
/**
 * bdget_disk - do bdget() by gendisk and partition number
 * @disk: gendisk of interest
 * @partno: partition number
 *
 * Find partition @partno from @disk, do bdget() on it.
 *
 * CONTEXT:
 * Don't care.
 *
 * RETURNS:
 * Resulting block_device on success, NULL on failure.
 */
771
struct block_device *bdget_disk(struct gendisk *disk, int partno)
772
{
773
774
	struct hd_struct *part;
	struct block_device *bdev = NULL;
775

776
	part = disk_get_part(disk, partno);
777
	if (part)
778
779
		bdev = bdget(part_devt(part));
	disk_put_part(part);
780

781
	return bdev;
782
783
784
}
EXPORT_SYMBOL(bdget_disk);

785
786
787
788
789
790
791
/*
 * print a full list of all partitions - intended for places where the root
 * filesystem can't be mounted and thus to give the victim some idea of what
 * went wrong
 */
void __init printk_all_partitions(void)
{
792
793
794
795
796
797
	struct class_dev_iter iter;
	struct device *dev;

	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
	while ((dev = class_dev_iter_next(&iter))) {
		struct gendisk *disk = dev_to_disk(dev);
798
799
		struct disk_part_iter piter;
		struct hd_struct *part;
800
801
		char name_buf[BDEVNAME_SIZE];
		char devt_buf[BDEVT_SIZE];
802
803
804

		/*
		 * Don't show empty devices or things that have been
Lucas De Marchi's avatar
Lucas De Marchi committed
805
		 * suppressed
806
807
808
809
810
811
812
813
814
815
		 */
		if (get_capacity(disk) == 0 ||
		    (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
			continue;

		/*
		 * Note, unlike /proc/partitions, I am showing the
		 * numbers in hex - the same format as the root=
		 * option takes.
		 */
816
817
818
		disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
		while ((part = disk_part_iter_next(&piter))) {
			bool is_part0 = part == &disk->part0;
819

820
			printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
821
			       bdevt_str(part_devt(part), devt_buf),
822
823
			       (unsigned long long)part_nr_sects_read(part) >> 1
			       , disk_name(disk, part->partno, name_buf),
824
			       part->info ? part->info->uuid : "");
825
			if (is_part0) {
Dan Williams's avatar
Dan Williams committed
826
				if (dev->parent && dev->parent->driver)
827
					printk(" driver: %s\n",
Dan Williams's avatar
Dan Williams committed
828
					      dev->parent->driver->name);
829
830
831
832
833
				else
					printk(" (driver?)\n");
			} else
				printk("\n");
		}
834
		disk_part_iter_exit(&piter);
835
836
	}
	class_dev_iter_exit(&iter);
837
838
}

Linus Torvalds's avatar
Linus Torvalds committed
839
840
#ifdef CONFIG_PROC_FS
/* iterator */
841
static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
842
{
843
844
845
	loff_t skip = *pos;
	struct class_dev_iter *iter;
	struct device *dev;
846

847
	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
848
849
850
851
852
853
854
855
856
857
858
859
	if (!iter)
		return ERR_PTR(-ENOMEM);

	seqf->private = iter;
	class_dev_iter_init(iter, &block_class, NULL, &disk_type);
	do {
		dev = class_dev_iter_next(iter);
		if (!dev)
			return NULL;
	} while (skip--);

	return dev_to_disk(dev);
860
861
}

862
static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
Linus Torvalds's avatar
Linus Torvalds committed
863
{
864
	struct device *dev;
Linus Torvalds's avatar
Linus Torvalds committed
865

866
867
	(*pos)++;
	dev = class_dev_iter_next(seqf->private);
868
	if (dev)
869
		return dev_to_disk(dev);
870

Linus Torvalds's avatar
Linus Torvalds committed
871
872
873
	return NULL;
}

874
static void disk_seqf_stop(struct seq_file *seqf, void *v)
875
{
876
	struct class_dev_iter *iter = seqf->private;
877

878
879
880
881
	/* stop is called even after start failed :-( */
	if (iter) {
		class_dev_iter_exit(iter);
		kfree(iter);
882
		seqf->private = NULL;
883
	}
Linus Torvalds's avatar
Linus Torvalds committed
884
885
}

886
static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
Linus Torvalds's avatar
Linus Torvalds committed
887
{
888
	void *p;
889
890

	p = disk_seqf_start(seqf, pos);
891
	if (!IS_ERR_OR_NULL(p) && !*pos)
892
893
		seq_puts(seqf, "major minor  #blocks  name\n\n");
	return p;
Linus Torvalds's avatar
Linus Torvalds committed
894
895
}

896
static int show_partition(struct seq_file *seqf, void *v)
Linus Torvalds's avatar
Linus Torvalds committed
897
898
{
	struct gendisk *sgp = v;
899
900
	struct disk_part_iter piter;
	struct hd_struct *part;
Linus Torvalds's avatar
Linus Torvalds committed
901
902
903
	char buf[BDEVNAME_SIZE];

	/* Don't show non-partitionable removeable devices or empty devices */
Tejun Heo's avatar
Tejun Heo committed
904
	if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
905
				   (sgp->flags & GENHD_FL_REMOVABLE)))
Linus Torvalds's avatar
Linus Torvalds committed
906
907
908
909
910
		return 0;
	if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
		return 0;

	/* show the full disk and all non-0 size partitions of it */
911
	disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
912
	while ((part = disk_part_iter_next(&piter)))
913
		seq_printf(seqf, "%4d  %7d %10llu %s\n",
914
			   MAJOR(part_devt(part)), MINOR(part_devt(part)),
915
			   (unsigned long long)part_nr_sects_read(part) >> 1,
916
			   disk_name(sgp, part->partno, buf));
917
	disk_part_iter_exit(&piter);
Linus Torvalds's avatar
Linus Torvalds committed
918
919
920
921

	return 0;
}

922
static const struct seq_operations partitions_op = {
923
924
925
	.start	= show_partition_start,
	.next	= disk_seqf_next,
	.stop	= disk_seqf_stop,
926
	.show	= show_partition
Linus Torvalds's avatar
Linus Torvalds committed
927
};
928
929
930
931
932
933
934
935
936
937
938
939

static int partitions_open(struct inode *inode, struct file *file)
{
	return seq_open(file, &partitions_op);
}

static const struct file_operations proc_partitions_operations = {
	.open		= partitions_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= seq_release,
};
Linus Torvalds's avatar
Linus Torvalds committed
940
941
942
#endif


943
static struct kobject *base_probe(dev_t devt, int *partno, void *data)
Linus Torvalds's avatar
Linus Torvalds committed
944
{
945
	if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
Linus Torvalds's avatar
Linus Torvalds committed
946
		/* Make old-style 2.4 aliases work */
947
		request_module("block-major-%d", MAJOR(devt));
Linus Torvalds's avatar
Linus Torvalds committed
948
949
950
951
952
	return NULL;
}

static int __init genhd_device_init(void)
{
953
954
955
956
	int error;

	block_class.dev_kobj = sysfs_dev_block_kobj;
	error = class_register(&block_class);
Roland McGrath's avatar
Roland McGrath committed
957
958
	if (unlikely(error))
		return error;
959
	bdev_map = kobj_map_init(base_probe, &block_class_lock);
Linus Torvalds's avatar
Linus Torvalds committed
960
	blk_dev_init();
961

962
963
	register_blkdev(BLOCK_EXT_MAJOR, "blkext");

964
	/* create top-level block dir */
965
966
	if (!sysfs_deprecated)
		block_depr = kobject_create_and_add("block", NULL);
967
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
968
969
970
971
}

subsys_initcall(genhd_device_init);

972
973
static ssize_t disk_range_show(struct device *dev,
			       struct device_attribute *attr, char *buf)
Linus Torvalds's avatar
Linus Torvalds committed
974
{
975
	struct gendisk *disk = dev_to_disk(dev);
Linus Torvalds's avatar
Linus Torvalds committed
976

977
	return sprintf(buf, "%d\n", disk->minors);
Linus Torvalds's avatar
Linus Torvalds committed
978
979
}

980
981
982
983
984
static ssize_t disk_ext_range_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);

Tejun Heo's avatar
Tejun Heo committed
985
	return sprintf(buf, "%d\n", disk_max_parts(disk));
986
987
}

988
989
static ssize_t disk_removable_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
990
{
991
	struct gendisk *disk = dev_to_disk(dev);
992

993
994
	return sprintf(buf, "%d\n",
		       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
995
996
}

Kay Sievers's avatar
Kay Sievers committed
997
998
999
1000
static ssize_t disk_ro_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);