elevator.c 18.3 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
/*
 *  Block device elevator/IO-scheduler.
 *
 *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
 *
 * 30042000 Jens Axboe <axboe@suse.de> :
 *
 * Split the elevator a bit so that it is possible to choose a different
 * one or even write a new "plug in". There are three pieces:
 * - elevator_fn, inserts a new request in the queue list
 * - elevator_merge_fn, decides whether a new buffer can be merged with
 *   an existing request
 * - elevator_dequeue_fn, called when a request is taken off the active list
 *
 * 20082000 Dave Jones <davej@suse.de> :
 * Removed tests for max-bomb-segments, which was breaking elvtune
 *  when run without -bN
 *
 * Jens:
 * - Rework again to work with bio instead of buffer_heads
 * - loose bi_dev comparisons, partition handling is right now
 * - completely modularize elevator setup and teardown
 *
 */
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/bio.h>
#include <linux/config.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
35
#include <linux/delay.h>
Linus Torvalds's avatar
Linus Torvalds committed
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

#include <asm/uaccess.h>

static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);

/*
 * can we safely merge with this request?
 */
inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
{
	if (!rq_mergeable(rq))
		return 0;

	/*
	 * different data direction or already started, don't merge
	 */
	if (bio_data_dir(bio) != rq_data_dir(rq))
		return 0;

	/*
	 * same device and no special stuff set, merge is ok
	 */
	if (rq->rq_disk == bio->bi_bdev->bd_disk &&
	    !rq->waiting && !rq->special)
		return 1;

	return 0;
}
EXPORT_SYMBOL(elv_rq_merge_ok);

67
static inline int elv_try_merge(struct request *__rq, struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
{
	int ret = ELEVATOR_NO_MERGE;

	/*
	 * we can merge and sequence is ok, check if it's possible
	 */
	if (elv_rq_merge_ok(__rq, bio)) {
		if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
			ret = ELEVATOR_BACK_MERGE;
		else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
			ret = ELEVATOR_FRONT_MERGE;
	}

	return ret;
}

static struct elevator_type *elevator_find(const char *name)
{
	struct elevator_type *e = NULL;
	struct list_head *entry;

	list_for_each(entry, &elv_list) {
		struct elevator_type *__e;

		__e = list_entry(entry, struct elevator_type, list);

		if (!strcmp(__e->elevator_name, name)) {
			e = __e;
			break;
		}
	}

	return e;
}

static void elevator_put(struct elevator_type *e)
{
	module_put(e->elevator_owner);
}

static struct elevator_type *elevator_get(const char *name)
{
110
	struct elevator_type *e;
Linus Torvalds's avatar
Linus Torvalds committed
111

112
113
114
115
116
117
118
	spin_lock_irq(&elv_list_lock);

	e = elevator_find(name);
	if (e && !try_module_get(e->elevator_owner))
		e = NULL;

	spin_unlock_irq(&elv_list_lock);
Linus Torvalds's avatar
Linus Torvalds committed
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

	return e;
}

static int elevator_attach(request_queue_t *q, struct elevator_type *e,
			   struct elevator_queue *eq)
{
	int ret = 0;

	memset(eq, 0, sizeof(*eq));
	eq->ops = &e->ops;
	eq->elevator_type = e;

	q->elevator = eq;

	if (eq->ops->elevator_init_fn)
		ret = eq->ops->elevator_init_fn(q, eq);

	return ret;
}

static char chosen_elevator[16];

static void elevator_setup_default(void)
{
144
145
	struct elevator_type *e;

Linus Torvalds's avatar
Linus Torvalds committed
146
	/*
147
	 * If default has not been set, use the compiled-in selection.
Linus Torvalds's avatar
Linus Torvalds committed
148
	 */
149
150
151
152
153
154
	if (!chosen_elevator[0])
		strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED);

 	/*
 	 * If the given scheduler is not available, fall back to no-op.
 	 */
155
156
157
 	if ((e = elevator_find(chosen_elevator)))
		elevator_put(e);
	else
158
 		strcpy(chosen_elevator, "noop");
Linus Torvalds's avatar
Linus Torvalds committed
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
}

static int __init elevator_setup(char *str)
{
	strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
	return 0;
}

__setup("elevator=", elevator_setup);

int elevator_init(request_queue_t *q, char *name)
{
	struct elevator_type *e = NULL;
	struct elevator_queue *eq;
	int ret = 0;

175
176
177
178
179
	INIT_LIST_HEAD(&q->queue_head);
	q->last_merge = NULL;
	q->end_sector = 0;
	q->boundary_rq = NULL;

Linus Torvalds's avatar
Linus Torvalds committed
180
181
182
183
184
185
186
187
188
189
190
	elevator_setup_default();

	if (!name)
		name = chosen_elevator;

	e = elevator_get(name);
	if (!e)
		return -EINVAL;

	eq = kmalloc(sizeof(struct elevator_queue), GFP_KERNEL);
	if (!eq) {
Zachary Amsden's avatar
Zachary Amsden committed
191
		elevator_put(e);
Linus Torvalds's avatar
Linus Torvalds committed
192
193
194
195
196
197
		return -ENOMEM;
	}

	ret = elevator_attach(q, e, eq);
	if (ret) {
		kfree(eq);
Zachary Amsden's avatar
Zachary Amsden committed
198
		elevator_put(e);
Linus Torvalds's avatar
Linus Torvalds committed
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
	}

	return ret;
}

void elevator_exit(elevator_t *e)
{
	if (e->ops->elevator_exit_fn)
		e->ops->elevator_exit_fn(e);

	elevator_put(e->elevator_type);
	e->elevator_type = NULL;
	kfree(e);
}

214
215
216
217
218
/*
 * Insert rq into dispatch queue of q.  Queue lock must be held on
 * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
 * appended to the dispatch queue.  To be used by specific elevators.
 */
Jens Axboe's avatar
Jens Axboe committed
219
void elv_dispatch_sort(request_queue_t *q, struct request *rq)
220
221
222
223
{
	sector_t boundary;
	struct list_head *entry;

224
225
	if (q->last_merge == rq)
		q->last_merge = NULL;
226
	q->nr_sorted--;
227

Jens Axboe's avatar
Jens Axboe committed
228
	boundary = q->end_sector;
229

230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
	list_for_each_prev(entry, &q->queue_head) {
		struct request *pos = list_entry_rq(entry);

		if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
			break;
		if (rq->sector >= boundary) {
			if (pos->sector < boundary)
				continue;
		} else {
			if (pos->sector >= boundary)
				break;
		}
		if (rq->sector >= pos->sector)
			break;
	}

	list_add(&rq->queuelist, entry);
}

Linus Torvalds's avatar
Linus Torvalds committed
249
250
251
int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
{
	elevator_t *e = q->elevator;
252
253
254
255
256
257
258
259
260
	int ret;

	if (q->last_merge) {
		ret = elv_try_merge(q->last_merge, bio);
		if (ret != ELEVATOR_NO_MERGE) {
			*req = q->last_merge;
			return ret;
		}
	}
Linus Torvalds's avatar
Linus Torvalds committed
261
262
263
264
265
266
267
268
269
270
271
272
273

	if (e->ops->elevator_merge_fn)
		return e->ops->elevator_merge_fn(q, req, bio);

	return ELEVATOR_NO_MERGE;
}

void elv_merged_request(request_queue_t *q, struct request *rq)
{
	elevator_t *e = q->elevator;

	if (e->ops->elevator_merged_fn)
		e->ops->elevator_merged_fn(q, rq);
274
275

	q->last_merge = rq;
Linus Torvalds's avatar
Linus Torvalds committed
276
277
278
279
280
281
282
283
284
}

void elv_merge_requests(request_queue_t *q, struct request *rq,
			     struct request *next)
{
	elevator_t *e = q->elevator;

	if (e->ops->elevator_merge_req_fn)
		e->ops->elevator_merge_req_fn(q, rq, next);
285
	q->nr_sorted--;
286
287

	q->last_merge = rq;
Linus Torvalds's avatar
Linus Torvalds committed
288
289
}

290
void elv_requeue_request(request_queue_t *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
291
292
293
294
295
296
297
{
	elevator_t *e = q->elevator;

	/*
	 * it already went through dequeue, we need to decrement the
	 * in_flight count again
	 */
298
	if (blk_account_rq(rq)) {
Linus Torvalds's avatar
Linus Torvalds committed
299
		q->in_flight--;
300
301
302
		if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
			e->ops->elevator_deactivate_req_fn(q, rq);
	}
Linus Torvalds's avatar
Linus Torvalds committed
303
304
305

	rq->flags &= ~REQ_STARTED;

306
	__elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0);
Linus Torvalds's avatar
Linus Torvalds committed
307
308
}

309
310
311
312
313
314
315
316
317
318
319
320
321
322
static void elv_drain_elevator(request_queue_t *q)
{
	static int printed;
	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
		;
	if (q->nr_sorted == 0)
		return;
	if (printed++ < 10) {
		printk(KERN_ERR "%s: forced dispatching is broken "
		       "(nr_sorted=%u), please report this\n",
		       q->elevator->elevator_type->elevator_name, q->nr_sorted);
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
323
324
325
void __elv_add_request(request_queue_t *q, struct request *rq, int where,
		       int plug)
{
326
327
328
329
330
331
	struct list_head *pos;
	unsigned ordseq;

	if (q->ordcolor)
		rq->flags |= REQ_ORDERED_COLOR;

332
	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
333
334
335
336
337
		/*
		 * toggle ordered color
		 */
		q->ordcolor ^= 1;

338
339
340
341
342
343
344
		/*
		 * barriers implicitly indicate back insertion
		 */
		if (where == ELEVATOR_INSERT_SORT)
			where = ELEVATOR_INSERT_BACK;

		/*
Jens Axboe's avatar
Jens Axboe committed
345
		 * this request is scheduling boundary, update end_sector
346
347
		 */
		if (blk_fs_request(rq)) {
Jens Axboe's avatar
Jens Axboe committed
348
			q->end_sector = rq_end_sector(rq);
349
350
			q->boundary_rq = rq;
		}
351
352
	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
		where = ELEVATOR_INSERT_BACK;
Linus Torvalds's avatar
Linus Torvalds committed
353
354
355
356
357
358

	if (plug)
		blk_plug_device(q);

	rq->q = q;

359
360
361
362
363
364
365
366
367
	switch (where) {
	case ELEVATOR_INSERT_FRONT:
		rq->flags |= REQ_SOFTBARRIER;

		list_add(&rq->queuelist, &q->queue_head);
		break;

	case ELEVATOR_INSERT_BACK:
		rq->flags |= REQ_SOFTBARRIER;
368
		elv_drain_elevator(q);
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
		list_add_tail(&rq->queuelist, &q->queue_head);
		/*
		 * We kick the queue here for the following reasons.
		 * - The elevator might have returned NULL previously
		 *   to delay requests and returned them now.  As the
		 *   queue wasn't empty before this request, ll_rw_blk
		 *   won't run the queue on return, resulting in hang.
		 * - Usually, back inserted requests won't be merged
		 *   with anything.  There's no point in delaying queue
		 *   processing.
		 */
		blk_remove_plug(q);
		q->request_fn(q);
		break;

	case ELEVATOR_INSERT_SORT:
		BUG_ON(!blk_fs_request(rq));
		rq->flags |= REQ_SORTED;
387
		q->nr_sorted++;
388
389
		if (q->last_merge == NULL && rq_mergeable(rq))
			q->last_merge = rq;
390
391
392
393
394
395
		/*
		 * Some ioscheds (cfq) run q->request_fn directly, so
		 * rq cannot be accessed after calling
		 * elevator_add_req_fn.
		 */
		q->elevator->ops->elevator_add_req_fn(q, rq);
396
397
		break;

398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
	case ELEVATOR_INSERT_REQUEUE:
		/*
		 * If ordered flush isn't in progress, we do front
		 * insertion; otherwise, requests should be requeued
		 * in ordseq order.
		 */
		rq->flags |= REQ_SOFTBARRIER;

		if (q->ordseq == 0) {
			list_add(&rq->queuelist, &q->queue_head);
			break;
		}

		ordseq = blk_ordered_req_seq(rq);

		list_for_each(pos, &q->queue_head) {
			struct request *pos_rq = list_entry_rq(pos);
			if (ordseq <= blk_ordered_req_seq(pos_rq))
				break;
		}

		list_add_tail(&rq->queuelist, pos);
		break;

422
423
424
425
426
427
428
429
430
431
432
433
434
	default:
		printk(KERN_ERR "%s: bad insertion point %d\n",
		       __FUNCTION__, where);
		BUG();
	}

	if (blk_queue_plugged(q)) {
		int nrq = q->rq.count[READ] + q->rq.count[WRITE]
			- q->in_flight;

		if (nrq >= q->unplug_thresh)
			__generic_unplug_device(q);
	}
Linus Torvalds's avatar
Linus Torvalds committed
435
436
437
438
439
440
441
442
443
444
445
446
447
448
}

void elv_add_request(request_queue_t *q, struct request *rq, int where,
		     int plug)
{
	unsigned long flags;

	spin_lock_irqsave(q->queue_lock, flags);
	__elv_add_request(q, rq, where, plug);
	spin_unlock_irqrestore(q->queue_lock, flags);
}

static inline struct request *__elv_next_request(request_queue_t *q)
{
449
450
	struct request *rq;

451
452
453
454
455
456
	while (1) {
		while (!list_empty(&q->queue_head)) {
			rq = list_entry_rq(q->queue_head.next);
			if (blk_do_ordered(q, &rq))
				return rq;
		}
Linus Torvalds's avatar
Linus Torvalds committed
457

458
459
		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
			return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
460
461
462
463
464
465
466
467
468
	}
}

struct request *elv_next_request(request_queue_t *q)
{
	struct request *rq;
	int ret;

	while ((rq = __elv_next_request(q)) != NULL) {
469
470
471
472
473
474
475
476
477
478
479
		if (!(rq->flags & REQ_STARTED)) {
			elevator_t *e = q->elevator;

			/*
			 * This is the first time the device driver
			 * sees this request (possibly after
			 * requeueing).  Notify IO scheduler.
			 */
			if (blk_sorted_rq(rq) &&
			    e->ops->elevator_activate_req_fn)
				e->ops->elevator_activate_req_fn(q, rq);
Linus Torvalds's avatar
Linus Torvalds committed
480

481
482
483
484
485
486
487
			/*
			 * just mark as started even if we don't start
			 * it, a request that has been delayed should
			 * not be passed by new incoming requests
			 */
			rq->flags |= REQ_STARTED;
		}
Linus Torvalds's avatar
Linus Torvalds committed
488

489
		if (!q->boundary_rq || q->boundary_rq == rq) {
Jens Axboe's avatar
Jens Axboe committed
490
			q->end_sector = rq_end_sector(rq);
491
492
			q->boundary_rq = NULL;
		}
Linus Torvalds's avatar
Linus Torvalds committed
493
494
495
496
497
498
499
500

		if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
			break;

		ret = q->prep_rq_fn(q, rq);
		if (ret == BLKPREP_OK) {
			break;
		} else if (ret == BLKPREP_DEFER) {
501
502
503
			/*
			 * the request may have been (partially) prepped.
			 * we need to keep this request in the front to
504
505
			 * avoid resource deadlock.  REQ_STARTED will
			 * prevent other fs requests from passing this one.
506
			 */
Linus Torvalds's avatar
Linus Torvalds committed
507
508
509
510
511
512
513
514
515
516
517
			rq = NULL;
			break;
		} else if (ret == BLKPREP_KILL) {
			int nr_bytes = rq->hard_nr_sectors << 9;

			if (!nr_bytes)
				nr_bytes = rq->data_len;

			blkdev_dequeue_request(rq);
			rq->flags |= REQ_QUIET;
			end_that_request_chunk(rq, 0, nr_bytes);
518
			end_that_request_last(rq, 0);
Linus Torvalds's avatar
Linus Torvalds committed
519
520
521
522
523
524
525
526
527
528
		} else {
			printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
								ret);
			break;
		}
	}

	return rq;
}

529
void elv_dequeue_request(request_queue_t *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
530
{
531
532
533
	BUG_ON(list_empty(&rq->queuelist));

	list_del_init(&rq->queuelist);
Linus Torvalds's avatar
Linus Torvalds committed
534
535
536
537

	/*
	 * the time frame between a request being removed from the lists
	 * and to it is freed is accounted as io that is in progress at
538
	 * the driver side.
Linus Torvalds's avatar
Linus Torvalds committed
539
540
541
542
543
544
545
546
547
	 */
	if (blk_account_rq(rq))
		q->in_flight++;
}

int elv_queue_empty(request_queue_t *q)
{
	elevator_t *e = q->elevator;

548
549
550
	if (!list_empty(&q->queue_head))
		return 0;

Linus Torvalds's avatar
Linus Torvalds committed
551
552
553
	if (e->ops->elevator_queue_empty_fn)
		return e->ops->elevator_queue_empty_fn(q);

554
	return 1;
Linus Torvalds's avatar
Linus Torvalds committed
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
}

struct request *elv_latter_request(request_queue_t *q, struct request *rq)
{
	elevator_t *e = q->elevator;

	if (e->ops->elevator_latter_req_fn)
		return e->ops->elevator_latter_req_fn(q, rq);
	return NULL;
}

struct request *elv_former_request(request_queue_t *q, struct request *rq)
{
	elevator_t *e = q->elevator;

	if (e->ops->elevator_former_req_fn)
		return e->ops->elevator_former_req_fn(q, rq);
	return NULL;
}

575
int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
Al Viro's avatar
Al Viro committed
576
		    gfp_t gfp_mask)
Linus Torvalds's avatar
Linus Torvalds committed
577
578
579
580
{
	elevator_t *e = q->elevator;

	if (e->ops->elevator_set_req_fn)
581
		return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
Linus Torvalds's avatar
Linus Torvalds committed
582
583
584
585
586
587
588
589
590
591
592
593
594

	rq->elevator_private = NULL;
	return 0;
}

void elv_put_request(request_queue_t *q, struct request *rq)
{
	elevator_t *e = q->elevator;

	if (e->ops->elevator_put_req_fn)
		e->ops->elevator_put_req_fn(q, rq);
}

595
int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
596
597
598
599
{
	elevator_t *e = q->elevator;

	if (e->ops->elevator_may_queue_fn)
600
		return e->ops->elevator_may_queue_fn(q, rw, bio);
Linus Torvalds's avatar
Linus Torvalds committed
601
602
603
604
605
606
607
608
609
610
611

	return ELV_MQUEUE_MAY;
}

void elv_completed_request(request_queue_t *q, struct request *rq)
{
	elevator_t *e = q->elevator;

	/*
	 * request is released from the driver, io must be done
	 */
612
	if (blk_account_rq(rq)) {
Linus Torvalds's avatar
Linus Torvalds committed
613
		q->in_flight--;
614
615
616
		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
			e->ops->elevator_completed_req_fn(q, rq);
	}
617

618
619
620
621
622
623
624
	/*
	 * Check if the queue is waiting for fs requests to be
	 * drained for flush sequence.
	 */
	if (unlikely(q->ordseq)) {
		struct request *first_rq = list_entry_rq(q->queue_head.next);
		if (q->in_flight == 0 &&
625
626
627
628
629
		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
		    blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
			q->request_fn(q);
		}
630
	}
Linus Torvalds's avatar
Linus Torvalds committed
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
}

int elv_register_queue(struct request_queue *q)
{
	elevator_t *e = q->elevator;

	e->kobj.parent = kobject_get(&q->kobj);
	if (!e->kobj.parent)
		return -EBUSY;

	snprintf(e->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
	e->kobj.ktype = e->elevator_type->elevator_ktype;

	return kobject_register(&e->kobj);
}

void elv_unregister_queue(struct request_queue *q)
{
	if (q) {
		elevator_t *e = q->elevator;
		kobject_unregister(&e->kobj);
		kobject_put(&q->kobj);
	}
}

int elv_register(struct elevator_type *e)
{
658
	spin_lock_irq(&elv_list_lock);
Linus Torvalds's avatar
Linus Torvalds committed
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
	if (elevator_find(e->elevator_name))
		BUG();
	list_add_tail(&e->list, &elv_list);
	spin_unlock_irq(&elv_list_lock);

	printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
	if (!strcmp(e->elevator_name, chosen_elevator))
		printk(" (default)");
	printk("\n");
	return 0;
}
EXPORT_SYMBOL_GPL(elv_register);

void elv_unregister(struct elevator_type *e)
{
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
	struct task_struct *g, *p;

	/*
	 * Iterate every thread in the process to remove the io contexts.
	 */
	read_lock(&tasklist_lock);
	do_each_thread(g, p) {
		struct io_context *ioc = p->io_context;
		if (ioc && ioc->cic) {
			ioc->cic->exit(ioc->cic);
			ioc->cic->dtor(ioc->cic);
			ioc->cic = NULL;
		}
		if (ioc && ioc->aic) {
			ioc->aic->exit(ioc->aic);
			ioc->aic->dtor(ioc->aic);
			ioc->aic = NULL;
		}
	} while_each_thread(g, p);
	read_unlock(&tasklist_lock);

Linus Torvalds's avatar
Linus Torvalds committed
695
696
697
698
699
700
701
702
703
704
	spin_lock_irq(&elv_list_lock);
	list_del_init(&e->list);
	spin_unlock_irq(&elv_list_lock);
}
EXPORT_SYMBOL_GPL(elv_unregister);

/*
 * switch to new_e io scheduler. be careful not to introduce deadlocks -
 * we don't free the old io scheduler, before we have allocated what we
 * need for the new one. this way we have a chance of going back to the old
705
 * one, if the new one fails init for some reason.
Linus Torvalds's avatar
Linus Torvalds committed
706
707
708
 */
static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
{
709
	elevator_t *old_elevator, *e;
Linus Torvalds's avatar
Linus Torvalds committed
710

711
712
713
714
	/*
	 * Allocate new elevator
	 */
	e = kmalloc(sizeof(elevator_t), GFP_KERNEL);
Linus Torvalds's avatar
Linus Torvalds committed
715
716
717
718
	if (!e)
		goto error;

	/*
719
	 * Turn on BYPASS and drain all requests w/ elevator private data
Linus Torvalds's avatar
Linus Torvalds committed
720
	 */
721
722
	spin_lock_irq(q->queue_lock);

723
	set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
724

725
	elv_drain_elevator(q);
726
727

	while (q->rq.elvpriv) {
728
729
		blk_remove_plug(q);
		q->request_fn(q);
730
		spin_unlock_irq(q->queue_lock);
731
		msleep(10);
732
		spin_lock_irq(q->queue_lock);
733
		elv_drain_elevator(q);
734
735
736
	}

	spin_unlock_irq(q->queue_lock);
Linus Torvalds's avatar
Linus Torvalds committed
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753

	/*
	 * unregister old elevator data
	 */
	elv_unregister_queue(q);
	old_elevator = q->elevator;

	/*
	 * attach and start new elevator
	 */
	if (elevator_attach(q, new_e, e))
		goto fail;

	if (elv_register_queue(q))
		goto fail_register;

	/*
754
	 * finally exit old elevator and turn off BYPASS.
Linus Torvalds's avatar
Linus Torvalds committed
755
756
	 */
	elevator_exit(old_elevator);
757
	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
Linus Torvalds's avatar
Linus Torvalds committed
758
759
760
761
762
763
764
765
	return;

fail_register:
	/*
	 * switch failed, exit the new io scheduler and reattach the old
	 * one again (along with re-adding the sysfs dir)
	 */
	elevator_exit(e);
766
	e = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
767
768
769
fail:
	q->elevator = old_elevator;
	elv_register_queue(q);
770
	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
771
	kfree(e);
Linus Torvalds's avatar
Linus Torvalds committed
772
773
774
775
776
777
778
779
error:
	elevator_put(new_e);
	printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name);
}

ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
{
	char elevator_name[ELV_NAME_MAX];
780
	size_t len;
Linus Torvalds's avatar
Linus Torvalds committed
781
782
	struct elevator_type *e;

783
784
785
	elevator_name[sizeof(elevator_name) - 1] = '\0';
	strncpy(elevator_name, name, sizeof(elevator_name) - 1);
	len = strlen(elevator_name);
Linus Torvalds's avatar
Linus Torvalds committed
786

787
788
	if (len && elevator_name[len - 1] == '\n')
		elevator_name[len - 1] = '\0';
Linus Torvalds's avatar
Linus Torvalds committed
789
790
791
792
793
794
795

	e = elevator_get(elevator_name);
	if (!e) {
		printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
		return -EINVAL;
	}

796
797
	if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
		elevator_put(e);
Linus Torvalds's avatar
Linus Torvalds committed
798
		return count;
799
	}
Linus Torvalds's avatar
Linus Torvalds committed
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827

	elevator_switch(q, e);
	return count;
}

ssize_t elv_iosched_show(request_queue_t *q, char *name)
{
	elevator_t *e = q->elevator;
	struct elevator_type *elv = e->elevator_type;
	struct list_head *entry;
	int len = 0;

	spin_lock_irq(q->queue_lock);
	list_for_each(entry, &elv_list) {
		struct elevator_type *__e;

		__e = list_entry(entry, struct elevator_type, list);
		if (!strcmp(elv->elevator_name, __e->elevator_name))
			len += sprintf(name+len, "[%s] ", elv->elevator_name);
		else
			len += sprintf(name+len, "%s ", __e->elevator_name);
	}
	spin_unlock_irq(q->queue_lock);

	len += sprintf(len+name, "\n");
	return len;
}

Jens Axboe's avatar
Jens Axboe committed
828
EXPORT_SYMBOL(elv_dispatch_sort);
Linus Torvalds's avatar
Linus Torvalds committed
829
830
831
832
EXPORT_SYMBOL(elv_add_request);
EXPORT_SYMBOL(__elv_add_request);
EXPORT_SYMBOL(elv_requeue_request);
EXPORT_SYMBOL(elv_next_request);
833
EXPORT_SYMBOL(elv_dequeue_request);
Linus Torvalds's avatar
Linus Torvalds committed
834
835
836
837
EXPORT_SYMBOL(elv_queue_empty);
EXPORT_SYMBOL(elv_completed_request);
EXPORT_SYMBOL(elevator_exit);
EXPORT_SYMBOL(elevator_init);