blk-cgroup.h 17 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#ifndef _BLK_CGROUP_H
#define _BLK_CGROUP_H
/*
 * Common Block IO controller cgroup interface
 *
 * Based on ideas and code from CFQ, CFS and BFQ:
 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
 *
 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
 *		      Paolo Valente <paolo.valente@unimore.it>
 *
 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
 * 	              Nauman Rafique <nauman@google.com>
 */

#include <linux/cgroup.h>
17
#include <linux/u64_stats_sync.h>
18

19
20
enum blkio_policy_id {
	BLKIO_POLICY_PROP = 0,		/* Proportional Bandwidth division */
21
	BLKIO_POLICY_THROTL,		/* Throttling */
22
23

	BLKIO_NR_POLICIES,
24
25
};

26
27
28
/* Max limits for throttle policy */
#define THROTL_IOPS_MAX		UINT_MAX

Tejun Heo's avatar
Tejun Heo committed
29
#ifdef CONFIG_BLK_CGROUP
30

31
enum stat_type {
32
33
	/* Number of IOs merged */
	BLKIO_STAT_MERGED,
34
35
36
	/* Total time spent (in ns) between request dispatch to the driver and
	 * request completion for IOs doen by this cgroup. This may not be
	 * accurate when NCQ is turned on. */
37
	BLKIO_STAT_SERVICE_TIME,
38
39
	/* Total time spent waiting in scheduler queue in ns */
	BLKIO_STAT_WAIT_TIME,
40
41
	/* Number of IOs queued up */
	BLKIO_STAT_QUEUED,
42

43
44
	/* All the single valued stats go below this */
	BLKIO_STAT_TIME,
45
#ifdef CONFIG_DEBUG_BLK_CGROUP
46
47
	/* Time not charged to this cgroup */
	BLKIO_STAT_UNACCOUNTED_TIME,
48
	BLKIO_STAT_AVG_QUEUE_SIZE,
49
50
51
	BLKIO_STAT_IDLE_TIME,
	BLKIO_STAT_EMPTY_TIME,
	BLKIO_STAT_GROUP_WAIT_TIME,
52
53
54
55
	BLKIO_STAT_DEQUEUE
#endif
};

56
57
58
/* Types lower than this live in stat_arr and have subtypes */
#define BLKIO_STAT_ARR_NR	(BLKIO_STAT_QUEUED + 1)

59
60
61
62
63
64
/* Per cpu stats */
enum stat_type_cpu {
	/* Total bytes transferred */
	BLKIO_STAT_CPU_SERVICE_BYTES,
	/* Total IOs serviced, post merge */
	BLKIO_STAT_CPU_SERVICED,
65
66
67

	/* All the single valued stats go below this */
	BLKIO_STAT_CPU_SECTORS,
68
69
};

70
71
#define BLKIO_STAT_CPU_ARR_NR	(BLKIO_STAT_CPU_SERVICED + 1)

72
73
74
75
76
77
78
79
enum blkg_rwstat_type {
	BLKG_RWSTAT_READ,
	BLKG_RWSTAT_WRITE,
	BLKG_RWSTAT_SYNC,
	BLKG_RWSTAT_ASYNC,

	BLKG_RWSTAT_NR,
	BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
80
81
};

82
83
84
85
86
87
88
/* blkg state flags */
enum blkg_state_flags {
	BLKG_waiting = 0,
	BLKG_idling,
	BLKG_empty,
};

89
90
91
92
93
94
95
96
/* cgroup files owned by proportional weight policy */
enum blkcg_file_name_prop {
	BLKIO_PROP_weight = 1,
	BLKIO_PROP_weight_device,
	BLKIO_PROP_io_service_bytes,
	BLKIO_PROP_io_serviced,
	BLKIO_PROP_time,
	BLKIO_PROP_sectors,
97
	BLKIO_PROP_unaccounted_time,
98
99
100
101
102
103
104
105
106
107
108
	BLKIO_PROP_io_service_time,
	BLKIO_PROP_io_wait_time,
	BLKIO_PROP_io_merged,
	BLKIO_PROP_io_queued,
	BLKIO_PROP_avg_queue_size,
	BLKIO_PROP_group_wait_time,
	BLKIO_PROP_idle_time,
	BLKIO_PROP_empty_time,
	BLKIO_PROP_dequeue,
};

109
110
111
112
/* cgroup files owned by throttle policy */
enum blkcg_file_name_throtl {
	BLKIO_THROTL_read_bps_device,
	BLKIO_THROTL_write_bps_device,
113
114
	BLKIO_THROTL_read_iops_device,
	BLKIO_THROTL_write_iops_device,
115
116
117
118
	BLKIO_THROTL_io_service_bytes,
	BLKIO_THROTL_io_serviced,
};

119
120
121
122
123
struct blkio_cgroup {
	struct cgroup_subsys_state css;
	unsigned int weight;
	spinlock_t lock;
	struct hlist_head blkg_list;
Tejun Heo's avatar
Tejun Heo committed
124
125
126

	/* for policies to test whether associated blkcg has changed */
	uint64_t id;
127
128
};

129
130
131
132
133
134
135
136
137
138
struct blkg_stat {
	struct u64_stats_sync		syncp;
	uint64_t			cnt;
};

struct blkg_rwstat {
	struct u64_stats_sync		syncp;
	uint64_t			cnt[BLKG_RWSTAT_NR];
};

139
struct blkio_group_stats {
140
141
142
143
144
145
146
147
	/* number of ios merged */
	struct blkg_rwstat		merged;
	/* total time spent on device in ns, may not be accurate w/ queueing */
	struct blkg_rwstat		service_time;
	/* total time spent waiting in scheduler queue in ns */
	struct blkg_rwstat		wait_time;
	/* number of IOs queued up */
	struct blkg_rwstat		queued;
148
	/* total disk time and nr sectors dispatched by this group */
149
	struct blkg_stat		time;
150
#ifdef CONFIG_DEBUG_BLK_CGROUP
151
152
153
154
155
156
157
158
159
160
161
162
163
164
	/* time not charged to this cgroup */
	struct blkg_stat		unaccounted_time;
	/* sum of number of ios queued across all samples */
	struct blkg_stat		avg_queue_size_sum;
	/* count of samples taken for average */
	struct blkg_stat		avg_queue_size_samples;
	/* how many times this group has been removed from service tree */
	struct blkg_stat		dequeue;
	/* total time spent waiting for it to be assigned a timeslice. */
	struct blkg_stat		group_wait_time;
	/* time spent idling for this blkio_group */
	struct blkg_stat		idle_time;
	/* total time with empty current active q with other requests queued */
	struct blkg_stat		empty_time;
Tejun Heo's avatar
Tejun Heo committed
165
	/* fields after this shouldn't be cleared on stat reset */
166
167
168
169
	uint64_t			start_group_wait_time;
	uint64_t			start_idle_time;
	uint64_t			start_empty_time;
	uint16_t			flags;
170
171
172
#endif
};

173
174
/* Per cpu blkio group stats */
struct blkio_group_stats_cpu {
175
176
177
178
179
180
	/* total bytes transferred */
	struct blkg_rwstat		service_bytes;
	/* total IOs serviced, post merge */
	struct blkg_rwstat		serviced;
	/* total sectors transferred */
	struct blkg_stat		sectors;
181
182
};

183
184
185
186
187
188
struct blkio_group_conf {
	unsigned int weight;
	unsigned int iops[2];
	u64 bps[2];
};

189
190
191
192
193
/* per-blkg per-policy data */
struct blkg_policy_data {
	/* the blkg this per-policy data belongs to */
	struct blkio_group *blkg;

194
195
196
197
198
199
200
	/* Configuration */
	struct blkio_group_conf conf;

	struct blkio_group_stats stats;
	/* Per cpu stats pointer */
	struct blkio_group_stats_cpu __percpu *stats_cpu;

201
202
203
204
	/* pol->pdata_size bytes of private data used by policy impl */
	char pdata[] __aligned(__alignof__(unsigned long long));
};

205
struct blkio_group {
206
207
	/* Pointer to the associated request_queue */
	struct request_queue *q;
208
	struct list_head q_node;
209
	struct hlist_node blkcg_node;
210
	struct blkio_cgroup *blkcg;
211
212
	/* Store cgroup path */
	char path[128];
Tejun Heo's avatar
Tejun Heo committed
213
214
	/* reference count */
	int refcnt;
215

216
	struct blkg_policy_data *pd[BLKIO_NR_POLICIES];
Tejun Heo's avatar
Tejun Heo committed
217

218
219
	/* List of blkg waiting for per cpu stats memory to be allocated */
	struct list_head alloc_node;
Tejun Heo's avatar
Tejun Heo committed
220
	struct rcu_head rcu_head;
221
222
};

223
typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
224
typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
225
			struct blkio_group *blkg, unsigned int weight);
226
typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
227
			struct blkio_group *blkg, u64 read_bps);
228
typedef void (blkio_update_group_write_bps_fn)(struct request_queue *q,
229
			struct blkio_group *blkg, u64 write_bps);
230
typedef void (blkio_update_group_read_iops_fn)(struct request_queue *q,
231
			struct blkio_group *blkg, unsigned int read_iops);
232
typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
233
			struct blkio_group *blkg, unsigned int write_iops);
234
235

struct blkio_policy_ops {
236
	blkio_init_group_fn *blkio_init_group_fn;
237
	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
238
239
	blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
	blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
240
241
	blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
	blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn;
242
243
244
245
246
};

struct blkio_policy_type {
	struct list_head list;
	struct blkio_policy_ops ops;
247
	enum blkio_policy_id plid;
248
	size_t pdata_size;		/* policy specific private data size */
249
250
};

251
252
253
254
extern int blkcg_init_queue(struct request_queue *q);
extern void blkcg_drain_queue(struct request_queue *q);
extern void blkcg_exit_queue(struct request_queue *q);

255
256
257
/* Blkio controller policy registration */
extern void blkio_policy_register(struct blkio_policy_type *);
extern void blkio_policy_unregister(struct blkio_policy_type *);
258
259
260
extern void blkg_destroy_all(struct request_queue *q, bool destroy_root);
extern void update_root_blkg_pd(struct request_queue *q,
				enum blkio_policy_id plid);
261

262
263
264
265
266
267
268
269
270
271
/**
 * blkg_to_pdata - get policy private data
 * @blkg: blkg of interest
 * @pol: policy of interest
 *
 * Return pointer to private data associated with the @blkg-@pol pair.
 */
static inline void *blkg_to_pdata(struct blkio_group *blkg,
			      struct blkio_policy_type *pol)
{
272
	return blkg ? blkg->pd[pol->plid]->pdata : NULL;
273
274
275
276
277
278
}

/**
 * pdata_to_blkg - get blkg associated with policy private data
 * @pdata: policy private data of interest
 *
279
 * @pdata is policy private data.  Determine the blkg it's associated with.
280
 */
281
static inline struct blkio_group *pdata_to_blkg(void *pdata)
282
283
284
285
286
287
288
289
290
{
	if (pdata) {
		struct blkg_policy_data *pd =
			container_of(pdata, struct blkg_policy_data, pdata);
		return pd->blkg;
	}
	return NULL;
}

291
292
293
294
295
static inline char *blkg_path(struct blkio_group *blkg)
{
	return blkg->path;
}

Tejun Heo's avatar
Tejun Heo committed
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
/**
 * blkg_get - get a blkg reference
 * @blkg: blkg to get
 *
 * The caller should be holding queue_lock and an existing reference.
 */
static inline void blkg_get(struct blkio_group *blkg)
{
	lockdep_assert_held(blkg->q->queue_lock);
	WARN_ON_ONCE(!blkg->refcnt);
	blkg->refcnt++;
}

void __blkg_release(struct blkio_group *blkg);

/**
 * blkg_put - put a blkg reference
 * @blkg: blkg to put
 *
 * The caller should be holding queue_lock.
 */
static inline void blkg_put(struct blkio_group *blkg)
{
	lockdep_assert_held(blkg->q->queue_lock);
	WARN_ON_ONCE(blkg->refcnt <= 0);
	if (!--blkg->refcnt)
		__blkg_release(blkg);
}

325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
/**
 * blkg_stat_add - add a value to a blkg_stat
 * @stat: target blkg_stat
 * @val: value to add
 *
 * Add @val to @stat.  The caller is responsible for synchronizing calls to
 * this function.
 */
static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
{
	u64_stats_update_begin(&stat->syncp);
	stat->cnt += val;
	u64_stats_update_end(&stat->syncp);
}

/**
 * blkg_stat_read - read the current value of a blkg_stat
 * @stat: blkg_stat to read
 *
 * Read the current value of @stat.  This function can be called without
 * synchroniztion and takes care of u64 atomicity.
 */
static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
{
	unsigned int start;
	uint64_t v;

	do {
		start = u64_stats_fetch_begin(&stat->syncp);
		v = stat->cnt;
	} while (u64_stats_fetch_retry(&stat->syncp, start));

	return v;
}

/**
 * blkg_stat_reset - reset a blkg_stat
 * @stat: blkg_stat to reset
 */
static inline void blkg_stat_reset(struct blkg_stat *stat)
{
	stat->cnt = 0;
}

/**
 * blkg_rwstat_add - add a value to a blkg_rwstat
 * @rwstat: target blkg_rwstat
 * @rw: mask of REQ_{WRITE|SYNC}
 * @val: value to add
 *
 * Add @val to @rwstat.  The counters are chosen according to @rw.  The
 * caller is responsible for synchronizing calls to this function.
 */
static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
				   int rw, uint64_t val)
{
	u64_stats_update_begin(&rwstat->syncp);

	if (rw & REQ_WRITE)
		rwstat->cnt[BLKG_RWSTAT_WRITE] += val;
	else
		rwstat->cnt[BLKG_RWSTAT_READ] += val;
	if (rw & REQ_SYNC)
		rwstat->cnt[BLKG_RWSTAT_SYNC] += val;
	else
		rwstat->cnt[BLKG_RWSTAT_ASYNC] += val;

	u64_stats_update_end(&rwstat->syncp);
}

/**
 * blkg_rwstat_read - read the current values of a blkg_rwstat
 * @rwstat: blkg_rwstat to read
 *
 * Read the current snapshot of @rwstat and return it as the return value.
 * This function can be called without synchronization and takes care of
 * u64 atomicity.
 */
static struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
{
	unsigned int start;
	struct blkg_rwstat tmp;

	do {
		start = u64_stats_fetch_begin(&rwstat->syncp);
		tmp = *rwstat;
	} while (u64_stats_fetch_retry(&rwstat->syncp, start));

	return tmp;
}

/**
 * blkg_rwstat_sum - read the total count of a blkg_rwstat
 * @rwstat: blkg_rwstat to read
 *
 * Return the total count of @rwstat regardless of the IO direction.  This
 * function can be called without synchronization and takes care of u64
 * atomicity.
 */
static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat)
{
	struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);

	return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
}

/**
 * blkg_rwstat_reset - reset a blkg_rwstat
 * @rwstat: blkg_rwstat to reset
 */
static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
{
	memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
}

440
441
442
443
444
#else

struct blkio_group {
};

445
446
447
struct blkio_policy_type {
};

448
449
450
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_exit_queue(struct request_queue *q) { }
451
452
static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
453
454
static inline void blkg_destroy_all(struct request_queue *q,
				    bool destory_root) { }
455
456
static inline void update_root_blkg_pd(struct request_queue *q,
				       enum blkio_policy_id plid) { }
457

458
459
460
461
static inline void *blkg_to_pdata(struct blkio_group *blkg,
				struct blkio_policy_type *pol) { return NULL; }
static inline struct blkio_group *pdata_to_blkg(void *pdata,
				struct blkio_policy_type *pol) { return NULL; }
462
static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
Tejun Heo's avatar
Tejun Heo committed
463
464
static inline void blkg_get(struct blkio_group *blkg) { }
static inline void blkg_put(struct blkio_group *blkg) { }
465

466
467
#endif

468
#define BLKIO_WEIGHT_MIN	10
469
470
471
#define BLKIO_WEIGHT_MAX	1000
#define BLKIO_WEIGHT_DEFAULT	500

472
#ifdef CONFIG_DEBUG_BLK_CGROUP
473
474
void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
					 struct blkio_policy_type *pol);
475
void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
476
477
478
479
480
481
482
483
				  struct blkio_policy_type *pol,
				  unsigned long dequeue);
void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
					struct blkio_policy_type *pol);
void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
				    struct blkio_policy_type *pol);
void blkiocg_set_start_empty_time(struct blkio_group *blkg,
				  struct blkio_policy_type *pol);
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504

#define BLKG_FLAG_FNS(name)						\
static inline void blkio_mark_blkg_##name(				\
		struct blkio_group_stats *stats)			\
{									\
	stats->flags |= (1 << BLKG_##name);				\
}									\
static inline void blkio_clear_blkg_##name(				\
		struct blkio_group_stats *stats)			\
{									\
	stats->flags &= ~(1 << BLKG_##name);				\
}									\
static inline int blkio_blkg_##name(struct blkio_group_stats *stats)	\
{									\
	return (stats->flags & (1 << BLKG_##name)) != 0;		\
}									\

BLKG_FLAG_FNS(waiting)
BLKG_FLAG_FNS(idling)
BLKG_FLAG_FNS(empty)
#undef BLKG_FLAG_FNS
505
#else
506
507
static inline void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
			struct blkio_policy_type *pol) { }
508
static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
509
510
511
512
513
514
515
			struct blkio_policy_type *pol, unsigned long dequeue) { }
static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
			struct blkio_policy_type *pol) { }
static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
			struct blkio_policy_type *pol) { }
static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
			struct blkio_policy_type *pol) { }
516
517
#endif

Tejun Heo's avatar
Tejun Heo committed
518
#ifdef CONFIG_BLK_CGROUP
519
520
extern struct blkio_cgroup blkio_root_cgroup;
extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
521
extern struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio);
522
extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
523
				       struct request_queue *q);
524
525
526
struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
				       struct request_queue *q,
				       bool for_root);
527
void blkiocg_update_timeslice_used(struct blkio_group *blkg,
528
529
530
531
532
533
				   struct blkio_policy_type *pol,
				   unsigned long time,
				   unsigned long unaccounted_time);
void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
				   struct blkio_policy_type *pol,
				   uint64_t bytes, bool direction, bool sync);
534
void blkiocg_update_completion_stats(struct blkio_group *blkg,
535
536
537
538
539
540
541
				     struct blkio_policy_type *pol,
				     uint64_t start_time,
				     uint64_t io_start_time, bool direction,
				     bool sync);
void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
				    struct blkio_policy_type *pol,
				    bool direction, bool sync);
542
void blkiocg_update_io_add_stats(struct blkio_group *blkg,
543
544
545
				 struct blkio_policy_type *pol,
				 struct blkio_group *curr_blkg, bool direction,
				 bool sync);
546
void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
547
548
				    struct blkio_policy_type *pol,
				    bool direction, bool sync);
549
#else
550
struct cgroup;
551
552
static inline struct blkio_cgroup *
cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
553
static inline struct blkio_cgroup *
554
bio_blkio_cgroup(struct bio *bio) { return NULL; }
555

556
557
static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
					      void *key) { return NULL; }
558
static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
559
560
			struct blkio_policy_type *pol, unsigned long time,
			unsigned long unaccounted_time) { }
561
static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
562
563
			struct blkio_policy_type *pol, uint64_t bytes,
			bool direction, bool sync) { }
564
static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
565
566
			struct blkio_policy_type *pol, uint64_t start_time,
			uint64_t io_start_time, bool direction, bool sync) { }
Divyesh Shah's avatar
Divyesh Shah committed
567
static inline void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
568
569
			struct blkio_policy_type *pol, bool direction,
			bool sync) { }
570
static inline void blkiocg_update_io_add_stats(struct blkio_group *blkg,
571
572
573
			struct blkio_policy_type *pol,
			struct blkio_group *curr_blkg, bool direction,
			bool sync) { }
574
static inline void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
575
576
			struct blkio_policy_type *pol, bool direction,
			bool sync) { }
577
578
#endif
#endif /* _BLK_CGROUP_H */