blk-cgroup.h 13.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#ifndef _BLK_CGROUP_H
#define _BLK_CGROUP_H
/*
 * Common Block IO controller cgroup interface
 *
 * Based on ideas and code from CFQ, CFS and BFQ:
 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
 *
 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
 *		      Paolo Valente <paolo.valente@unimore.it>
 *
 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
 * 	              Nauman Rafique <nauman@google.com>
 */

#include <linux/cgroup.h>
17
#include <linux/u64_stats_sync.h>
18

19
20
enum blkio_policy_id {
	BLKIO_POLICY_PROP = 0,		/* Proportional Bandwidth division */
21
	BLKIO_POLICY_THROTL,		/* Throttling */
22
23

	BLKIO_NR_POLICIES,
24
25
};

26
27
28
/* Max limits for throttle policy */
#define THROTL_IOPS_MAX		UINT_MAX

Tejun Heo's avatar
Tejun Heo committed
29
#ifdef CONFIG_BLK_CGROUP
30

31
32
33
34
35
36
37
enum stat_type {
	/* Total time spent (in ns) between request dispatch to the driver and
	 * request completion for IOs doen by this cgroup. This may not be
	 * accurate when NCQ is turned on. */
	BLKIO_STAT_SERVICE_TIME = 0,
	/* Total time spent waiting in scheduler queue in ns */
	BLKIO_STAT_WAIT_TIME,
38
39
	/* Number of IOs queued up */
	BLKIO_STAT_QUEUED,
40
41
	/* All the single valued stats go below this */
	BLKIO_STAT_TIME,
42
#ifdef CONFIG_DEBUG_BLK_CGROUP
43
44
	/* Time not charged to this cgroup */
	BLKIO_STAT_UNACCOUNTED_TIME,
45
	BLKIO_STAT_AVG_QUEUE_SIZE,
46
47
48
	BLKIO_STAT_IDLE_TIME,
	BLKIO_STAT_EMPTY_TIME,
	BLKIO_STAT_GROUP_WAIT_TIME,
49
50
51
52
	BLKIO_STAT_DEQUEUE
#endif
};

53
54
55
56
57
58
59
/* Per cpu stats */
enum stat_type_cpu {
	BLKIO_STAT_CPU_SECTORS,
	/* Total bytes transferred */
	BLKIO_STAT_CPU_SERVICE_BYTES,
	/* Total IOs serviced, post merge */
	BLKIO_STAT_CPU_SERVICED,
60
61
	/* Number of IOs merged */
	BLKIO_STAT_CPU_MERGED,
62
63
64
	BLKIO_STAT_CPU_NR
};

65
66
67
68
69
70
enum stat_sub_type {
	BLKIO_STAT_READ = 0,
	BLKIO_STAT_WRITE,
	BLKIO_STAT_SYNC,
	BLKIO_STAT_ASYNC,
	BLKIO_STAT_TOTAL
71
72
};

73
74
75
76
77
78
79
/* blkg state flags */
enum blkg_state_flags {
	BLKG_waiting = 0,
	BLKG_idling,
	BLKG_empty,
};

80
81
82
83
84
85
86
87
/* cgroup files owned by proportional weight policy */
enum blkcg_file_name_prop {
	BLKIO_PROP_weight = 1,
	BLKIO_PROP_weight_device,
	BLKIO_PROP_io_service_bytes,
	BLKIO_PROP_io_serviced,
	BLKIO_PROP_time,
	BLKIO_PROP_sectors,
88
	BLKIO_PROP_unaccounted_time,
89
90
91
92
93
94
95
96
97
98
99
	BLKIO_PROP_io_service_time,
	BLKIO_PROP_io_wait_time,
	BLKIO_PROP_io_merged,
	BLKIO_PROP_io_queued,
	BLKIO_PROP_avg_queue_size,
	BLKIO_PROP_group_wait_time,
	BLKIO_PROP_idle_time,
	BLKIO_PROP_empty_time,
	BLKIO_PROP_dequeue,
};

100
101
102
103
/* cgroup files owned by throttle policy */
enum blkcg_file_name_throtl {
	BLKIO_THROTL_read_bps_device,
	BLKIO_THROTL_write_bps_device,
104
105
	BLKIO_THROTL_read_iops_device,
	BLKIO_THROTL_write_iops_device,
106
107
108
109
	BLKIO_THROTL_io_service_bytes,
	BLKIO_THROTL_io_serviced,
};

110
111
112
113
114
115
116
struct blkio_cgroup {
	struct cgroup_subsys_state css;
	unsigned int weight;
	spinlock_t lock;
	struct hlist_head blkg_list;
};

117
118
119
struct blkio_group_stats {
	/* total disk time and nr sectors dispatched by this group */
	uint64_t time;
120
	uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
121
#ifdef CONFIG_DEBUG_BLK_CGROUP
122
123
124
	/* Time not charged to this cgroup */
	uint64_t unaccounted_time;

125
126
127
128
	/* Sum of number of IOs queued across all samples */
	uint64_t avg_queue_size_sum;
	/* Count of samples taken for average */
	uint64_t avg_queue_size_samples;
129
130
	/* How many times this group has been removed from service tree */
	unsigned long dequeue;
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145

	/* Total time spent waiting for it to be assigned a timeslice. */
	uint64_t group_wait_time;
	uint64_t start_group_wait_time;

	/* Time spent idling for this blkio_group */
	uint64_t idle_time;
	uint64_t start_idle_time;
	/*
	 * Total time when we have requests queued and do not contain the
	 * current active queue.
	 */
	uint64_t empty_time;
	uint64_t start_empty_time;
	uint16_t flags;
146
147
148
#endif
};

149
150
151
152
/* Per cpu blkio group stats */
struct blkio_group_stats_cpu {
	uint64_t sectors;
	uint64_t stat_arr_cpu[BLKIO_STAT_CPU_NR][BLKIO_STAT_TOTAL];
153
	struct u64_stats_sync syncp;
154
155
};

156
157
158
159
160
161
struct blkio_group_conf {
	unsigned int weight;
	unsigned int iops[2];
	u64 bps[2];
};

162
163
164
165
166
/* per-blkg per-policy data */
struct blkg_policy_data {
	/* the blkg this per-policy data belongs to */
	struct blkio_group *blkg;

167
168
169
170
171
172
173
	/* Configuration */
	struct blkio_group_conf conf;

	struct blkio_group_stats stats;
	/* Per cpu stats pointer */
	struct blkio_group_stats_cpu __percpu *stats_cpu;

174
175
176
177
	/* pol->pdata_size bytes of private data used by policy impl */
	char pdata[] __aligned(__alignof__(unsigned long long));
};

178
struct blkio_group {
179
180
	/* Pointer to the associated request_queue, RCU protected */
	struct request_queue __rcu *q;
181
	struct hlist_node blkcg_node;
182
	struct blkio_cgroup *blkcg;
183
184
	/* Store cgroup path */
	char path[128];
185
186
	/* policy which owns this blk group */
	enum blkio_policy_id plid;
Tejun Heo's avatar
Tejun Heo committed
187
188
	/* reference count */
	int refcnt;
189

190
191
	/* Need to serialize the stats in the case of reset/update */
	spinlock_t stats_lock;
192
	struct blkg_policy_data *pd[BLKIO_NR_POLICIES];
Tejun Heo's avatar
Tejun Heo committed
193
194

	struct rcu_head rcu_head;
195
196
};

197
typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
198
199
typedef void (blkio_link_group_fn)(struct request_queue *q,
			struct blkio_group *blkg);
200
201
typedef void (blkio_unlink_group_fn)(struct request_queue *q,
			struct blkio_group *blkg);
202
typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
203
typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
204
			struct blkio_group *blkg, unsigned int weight);
205
typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
206
			struct blkio_group *blkg, u64 read_bps);
207
typedef void (blkio_update_group_write_bps_fn)(struct request_queue *q,
208
			struct blkio_group *blkg, u64 write_bps);
209
typedef void (blkio_update_group_read_iops_fn)(struct request_queue *q,
210
			struct blkio_group *blkg, unsigned int read_iops);
211
typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
212
			struct blkio_group *blkg, unsigned int write_iops);
213
214

struct blkio_policy_ops {
215
	blkio_init_group_fn *blkio_init_group_fn;
216
	blkio_link_group_fn *blkio_link_group_fn;
217
	blkio_unlink_group_fn *blkio_unlink_group_fn;
218
	blkio_clear_queue_fn *blkio_clear_queue_fn;
219
	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
220
221
	blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
	blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
222
223
	blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
	blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn;
224
225
226
227
228
};

struct blkio_policy_type {
	struct list_head list;
	struct blkio_policy_ops ops;
229
	enum blkio_policy_id plid;
230
	size_t pdata_size;		/* policy specific private data size */
231
232
};

233
234
235
236
extern int blkcg_init_queue(struct request_queue *q);
extern void blkcg_drain_queue(struct request_queue *q);
extern void blkcg_exit_queue(struct request_queue *q);

237
238
239
/* Blkio controller policy registration */
extern void blkio_policy_register(struct blkio_policy_type *);
extern void blkio_policy_unregister(struct blkio_policy_type *);
240
extern void blkg_destroy_all(struct request_queue *q);
241

242
243
244
245
246
247
248
249
250
251
/**
 * blkg_to_pdata - get policy private data
 * @blkg: blkg of interest
 * @pol: policy of interest
 *
 * Return pointer to private data associated with the @blkg-@pol pair.
 */
static inline void *blkg_to_pdata(struct blkio_group *blkg,
			      struct blkio_policy_type *pol)
{
252
	return blkg ? blkg->pd[pol->plid]->pdata : NULL;
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
}

/**
 * pdata_to_blkg - get blkg associated with policy private data
 * @pdata: policy private data of interest
 * @pol: policy @pdata is for
 *
 * @pdata is policy private data for @pol.  Determine the blkg it's
 * associated with.
 */
static inline struct blkio_group *pdata_to_blkg(void *pdata,
						struct blkio_policy_type *pol)
{
	if (pdata) {
		struct blkg_policy_data *pd =
			container_of(pdata, struct blkg_policy_data, pdata);
		return pd->blkg;
	}
	return NULL;
}

274
275
276
277
278
static inline char *blkg_path(struct blkio_group *blkg)
{
	return blkg->path;
}

Tejun Heo's avatar
Tejun Heo committed
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
/**
 * blkg_get - get a blkg reference
 * @blkg: blkg to get
 *
 * The caller should be holding queue_lock and an existing reference.
 */
static inline void blkg_get(struct blkio_group *blkg)
{
	lockdep_assert_held(blkg->q->queue_lock);
	WARN_ON_ONCE(!blkg->refcnt);
	blkg->refcnt++;
}

void __blkg_release(struct blkio_group *blkg);

/**
 * blkg_put - put a blkg reference
 * @blkg: blkg to put
 *
 * The caller should be holding queue_lock.
 */
static inline void blkg_put(struct blkio_group *blkg)
{
	lockdep_assert_held(blkg->q->queue_lock);
	WARN_ON_ONCE(blkg->refcnt <= 0);
	if (!--blkg->refcnt)
		__blkg_release(blkg);
}

308
309
310
311
312
#else

struct blkio_group {
};

313
314
315
struct blkio_policy_type {
};

316
317
318
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_exit_queue(struct request_queue *q) { }
319
320
static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
321
static inline void blkg_destroy_all(struct request_queue *q) { }
322

323
324
325
326
static inline void *blkg_to_pdata(struct blkio_group *blkg,
				struct blkio_policy_type *pol) { return NULL; }
static inline struct blkio_group *pdata_to_blkg(void *pdata,
				struct blkio_policy_type *pol) { return NULL; }
327
static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
Tejun Heo's avatar
Tejun Heo committed
328
329
static inline void blkg_get(struct blkio_group *blkg) { }
static inline void blkg_put(struct blkio_group *blkg) { }
330

331
332
#endif

333
#define BLKIO_WEIGHT_MIN	10
334
335
336
#define BLKIO_WEIGHT_MAX	1000
#define BLKIO_WEIGHT_DEFAULT	500

337
#ifdef CONFIG_DEBUG_BLK_CGROUP
338
339
void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
					 struct blkio_policy_type *pol);
340
void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
341
342
343
344
345
346
347
348
				  struct blkio_policy_type *pol,
				  unsigned long dequeue);
void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
					struct blkio_policy_type *pol);
void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
				    struct blkio_policy_type *pol);
void blkiocg_set_start_empty_time(struct blkio_group *blkg,
				  struct blkio_policy_type *pol);
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369

#define BLKG_FLAG_FNS(name)						\
static inline void blkio_mark_blkg_##name(				\
		struct blkio_group_stats *stats)			\
{									\
	stats->flags |= (1 << BLKG_##name);				\
}									\
static inline void blkio_clear_blkg_##name(				\
		struct blkio_group_stats *stats)			\
{									\
	stats->flags &= ~(1 << BLKG_##name);				\
}									\
static inline int blkio_blkg_##name(struct blkio_group_stats *stats)	\
{									\
	return (stats->flags & (1 << BLKG_##name)) != 0;		\
}									\

BLKG_FLAG_FNS(waiting)
BLKG_FLAG_FNS(idling)
BLKG_FLAG_FNS(empty)
#undef BLKG_FLAG_FNS
370
#else
371
372
static inline void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
			struct blkio_policy_type *pol) { }
373
static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
374
375
376
377
378
379
380
			struct blkio_policy_type *pol, unsigned long dequeue) { }
static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
			struct blkio_policy_type *pol) { }
static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
			struct blkio_policy_type *pol) { }
static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
			struct blkio_policy_type *pol) { }
381
382
#endif

Tejun Heo's avatar
Tejun Heo committed
383
#ifdef CONFIG_BLK_CGROUP
384
385
extern struct blkio_cgroup blkio_root_cgroup;
extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
386
extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
387
extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
388
389
390
391
392
393
394
extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
				       struct request_queue *q,
				       enum blkio_policy_id plid);
struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
				       struct request_queue *q,
				       enum blkio_policy_id plid,
				       bool for_root);
395
void blkiocg_update_timeslice_used(struct blkio_group *blkg,
396
397
398
399
400
401
				   struct blkio_policy_type *pol,
				   unsigned long time,
				   unsigned long unaccounted_time);
void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
				   struct blkio_policy_type *pol,
				   uint64_t bytes, bool direction, bool sync);
402
void blkiocg_update_completion_stats(struct blkio_group *blkg,
403
404
405
406
407
408
409
				     struct blkio_policy_type *pol,
				     uint64_t start_time,
				     uint64_t io_start_time, bool direction,
				     bool sync);
void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
				    struct blkio_policy_type *pol,
				    bool direction, bool sync);
410
void blkiocg_update_io_add_stats(struct blkio_group *blkg,
411
412
413
				 struct blkio_policy_type *pol,
				 struct blkio_group *curr_blkg, bool direction,
				 bool sync);
414
void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
415
416
				    struct blkio_policy_type *pol,
				    bool direction, bool sync);
417
#else
418
struct cgroup;
419
420
static inline struct blkio_cgroup *
cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
421
422
static inline struct blkio_cgroup *
task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
423
424
425
426

static inline int
blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }

427
428
static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
					      void *key) { return NULL; }
429
static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
430
431
			struct blkio_policy_type *pol, unsigned long time,
			unsigned long unaccounted_time) { }
432
static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
433
434
			struct blkio_policy_type *pol, uint64_t bytes,
			bool direction, bool sync) { }
435
static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
436
437
			struct blkio_policy_type *pol, uint64_t start_time,
			uint64_t io_start_time, bool direction, bool sync) { }
Divyesh Shah's avatar
Divyesh Shah committed
438
static inline void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
439
440
			struct blkio_policy_type *pol, bool direction,
			bool sync) { }
441
static inline void blkiocg_update_io_add_stats(struct blkio_group *blkg,
442
443
444
			struct blkio_policy_type *pol,
			struct blkio_group *curr_blkg, bool direction,
			bool sync) { }
445
static inline void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
446
447
			struct blkio_policy_type *pol, bool direction,
			bool sync) { }
448
449
#endif
#endif /* _BLK_CGROUP_H */