blk-cgroup.h 22 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#ifndef _BLK_CGROUP_H
#define _BLK_CGROUP_H
/*
 * Common Block IO controller cgroup interface
 *
 * Based on ideas and code from CFQ, CFS and BFQ:
 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
 *
 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
 *		      Paolo Valente <paolo.valente@unimore.it>
 *
 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
 * 	              Nauman Rafique <nauman@google.com>
 */

#include <linux/cgroup.h>
Tejun Heo's avatar
Tejun Heo committed
17
#include <linux/percpu_counter.h>
18
#include <linux/seq_file.h>
19
#include <linux/radix-tree.h>
20
#include <linux/blkdev.h>
21
#include <linux/atomic.h>
22

Tejun Heo's avatar
Tejun Heo committed
23
24
25
/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
#define BLKG_STAT_CPU_BATCH	(INT_MAX / 2)

26
27
28
/* Max limits for throttle policy */
#define THROTL_IOPS_MAX		UINT_MAX

Tejun Heo's avatar
Tejun Heo committed
29
30
#ifdef CONFIG_BLK_CGROUP

31
32
33
34
35
36
37
38
enum blkg_rwstat_type {
	BLKG_RWSTAT_READ,
	BLKG_RWSTAT_WRITE,
	BLKG_RWSTAT_SYNC,
	BLKG_RWSTAT_ASYNC,

	BLKG_RWSTAT_NR,
	BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
39
40
};

41
42
struct blkcg_gq;

Tejun Heo's avatar
Tejun Heo committed
43
struct blkcg {
44
45
	struct cgroup_subsys_state	css;
	spinlock_t			lock;
46
47
48

	struct radix_tree_root		blkg_tree;
	struct blkcg_gq			*blkg_hint;
49
	struct hlist_head		blkg_list;
Tejun Heo's avatar
Tejun Heo committed
50

51
	struct blkcg_policy_data	*cpd[BLKCG_MAX_POLS];
52

Tejun Heo's avatar
Tejun Heo committed
53
	struct list_head		all_blkcgs_node;
54
55
56
#ifdef CONFIG_CGROUP_WRITEBACK
	struct list_head		cgwb_list;
#endif
57
58
};

59
60
/*
 * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
Tejun Heo's avatar
Tejun Heo committed
61
62
 * recursive.  Used to carry stats of dead children, and, for blkg_rwstat,
 * to carry result values from read and sum operations.
63
 */
64
struct blkg_stat {
Tejun Heo's avatar
Tejun Heo committed
65
	struct percpu_counter		cpu_cnt;
66
	atomic64_t			aux_cnt;
67
68
69
};

struct blkg_rwstat {
Tejun Heo's avatar
Tejun Heo committed
70
	struct percpu_counter		cpu_cnt[BLKG_RWSTAT_NR];
71
	atomic64_t			aux_cnt[BLKG_RWSTAT_NR];
72
73
};

74
75
76
77
78
/*
 * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
 * request_queue (q).  This is used by blkcg policies which need to track
 * information per blkcg - q pair.
 *
79
80
81
82
83
 * There can be multiple active blkcg policies and each blkg:policy pair is
 * represented by a blkg_policy_data which is allocated and freed by each
 * policy's pd_alloc/free_fn() methods.  A policy can allocate private data
 * area by allocating larger data structure which embeds blkg_policy_data
 * at the beginning.
84
 */
85
struct blkg_policy_data {
Tejun Heo's avatar
Tejun Heo committed
86
	/* the blkg and policy id this per-policy data belongs to */
Tejun Heo's avatar
Tejun Heo committed
87
	struct blkcg_gq			*blkg;
Tejun Heo's avatar
Tejun Heo committed
88
	int				plid;
89
90
};

91
/*
92
93
94
95
96
 * Policies that need to keep per-blkcg data which is independent from any
 * request_queue associated to it should implement cpd_alloc/free_fn()
 * methods.  A policy can allocate private data area by allocating larger
 * data structure which embeds blkcg_policy_data at the beginning.
 * cpd_init() is invoked to let each policy handle per-blkcg data.
97
98
 */
struct blkcg_policy_data {
99
100
	/* the blkcg and policy id this per-policy data belongs to */
	struct blkcg			*blkcg;
101
102
103
	int				plid;
};

Tejun Heo's avatar
Tejun Heo committed
104
105
/* association between a blk cgroup and a request queue */
struct blkcg_gq {
106
	/* Pointer to the associated request_queue */
107
108
109
	struct request_queue		*q;
	struct list_head		q_node;
	struct hlist_node		blkcg_node;
Tejun Heo's avatar
Tejun Heo committed
110
	struct blkcg			*blkcg;
111

112
113
114
115
116
117
	/*
	 * Each blkg gets congested separately and the congestion state is
	 * propagated to the matching bdi_writeback_congested.
	 */
	struct bdi_writeback_congested	*wb_congested;

118
119
120
	/* all non-root blkcg_gq's are guaranteed to have access to parent */
	struct blkcg_gq			*parent;

121
122
	/* request allocation list for this blkcg-q pair */
	struct request_list		rl;
123

Tejun Heo's avatar
Tejun Heo committed
124
	/* reference count */
125
	atomic_t			refcnt;
126

127
128
129
	/* is this blkg online? protected by both blkcg and q locks */
	bool				online;

130
131
132
	struct blkg_rwstat		stat_bytes;
	struct blkg_rwstat		stat_ios;

133
	struct blkg_policy_data		*pd[BLKCG_MAX_POLS];
Tejun Heo's avatar
Tejun Heo committed
134

135
	struct rcu_head			rcu_head;
136
137
};

138
typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
139
typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
140
typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
141
typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
142
typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node);
143
144
145
typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
146
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
147
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
148

Tejun Heo's avatar
Tejun Heo committed
149
struct blkcg_policy {
150
151
	int				plid;
	/* cgroup files for the policy */
152
	struct cftype			*dfl_cftypes;
153
	struct cftype			*legacy_cftypes;
154
155

	/* operations */
156
	blkcg_pol_alloc_cpd_fn		*cpd_alloc_fn;
157
	blkcg_pol_init_cpd_fn		*cpd_init_fn;
158
	blkcg_pol_free_cpd_fn		*cpd_free_fn;
159
	blkcg_pol_bind_cpd_fn		*cpd_bind_fn;
160

161
	blkcg_pol_alloc_pd_fn		*pd_alloc_fn;
162
	blkcg_pol_init_pd_fn		*pd_init_fn;
163
164
	blkcg_pol_online_pd_fn		*pd_online_fn;
	blkcg_pol_offline_pd_fn		*pd_offline_fn;
165
	blkcg_pol_free_pd_fn		*pd_free_fn;
166
	blkcg_pol_reset_pd_stats_fn	*pd_reset_stats_fn;
167
168
};

Tejun Heo's avatar
Tejun Heo committed
169
extern struct blkcg blkcg_root;
Tejun Heo's avatar
Tejun Heo committed
170
extern struct cgroup_subsys_state * const blkcg_root_css;
171

Tejun Heo's avatar
Tejun Heo committed
172
173
struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
				      struct request_queue *q, bool update_hint);
Tejun Heo's avatar
Tejun Heo committed
174
175
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
				    struct request_queue *q);
176
177
178
int blkcg_init_queue(struct request_queue *q);
void blkcg_drain_queue(struct request_queue *q);
void blkcg_exit_queue(struct request_queue *q);
179

180
/* Blkio controller policy registration */
181
int blkcg_policy_register(struct blkcg_policy *pol);
Tejun Heo's avatar
Tejun Heo committed
182
void blkcg_policy_unregister(struct blkcg_policy *pol);
183
int blkcg_activate_policy(struct request_queue *q,
Tejun Heo's avatar
Tejun Heo committed
184
			  const struct blkcg_policy *pol);
185
void blkcg_deactivate_policy(struct request_queue *q,
Tejun Heo's avatar
Tejun Heo committed
186
			     const struct blkcg_policy *pol);
187

188
const char *blkg_dev_name(struct blkcg_gq *blkg);
Tejun Heo's avatar
Tejun Heo committed
189
void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
190
191
		       u64 (*prfill)(struct seq_file *,
				     struct blkg_policy_data *, int),
Tejun Heo's avatar
Tejun Heo committed
192
		       const struct blkcg_policy *pol, int data,
193
		       bool show_total);
194
195
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
196
			 const struct blkg_rwstat *rwstat);
197
198
199
u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
		       int off);
200
201
202
203
int blkg_print_stat_bytes(struct seq_file *sf, void *v);
int blkg_print_stat_ios(struct seq_file *sf, void *v);
int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
204

205
206
207
208
u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
			    struct blkcg_policy *pol, int off);
struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
					     struct blkcg_policy *pol, int off);
209

210
struct blkg_conf_ctx {
211
	struct gendisk			*disk;
Tejun Heo's avatar
Tejun Heo committed
212
	struct blkcg_gq			*blkg;
213
	char				*body;
214
215
};

Tejun Heo's avatar
Tejun Heo committed
216
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
217
		   char *input, struct blkg_conf_ctx *ctx);
218
219
220
void blkg_conf_finish(struct blkg_conf_ctx *ctx);


221
222
223
224
225
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
{
	return css ? container_of(css, struct blkcg, css) : NULL;
}

226
227
static inline struct blkcg *task_blkcg(struct task_struct *tsk)
{
228
	return css_to_blkcg(task_css(tsk, io_cgrp_id));
229
230
231
232
233
}

static inline struct blkcg *bio_blkcg(struct bio *bio)
{
	if (bio && bio->bi_css)
234
		return css_to_blkcg(bio->bi_css);
235
236
237
	return task_blkcg(current);
}

238
239
240
static inline struct cgroup_subsys_state *
task_get_blkcg_css(struct task_struct *task)
{
241
	return task_get_css(task, io_cgrp_id);
242
243
}

244
245
246
247
248
249
250
251
/**
 * blkcg_parent - get the parent of a blkcg
 * @blkcg: blkcg of interest
 *
 * Return the parent blkcg of @blkcg.  Can be called anytime.
 */
static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
{
Tejun Heo's avatar
Tejun Heo committed
252
	return css_to_blkcg(blkcg->css.parent);
253
254
}

Tejun Heo's avatar
Tejun Heo committed
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
/**
 * __blkg_lookup - internal version of blkg_lookup()
 * @blkcg: blkcg of interest
 * @q: request_queue of interest
 * @update_hint: whether to update lookup hint with the result or not
 *
 * This is internal version and shouldn't be used by policy
 * implementations.  Looks up blkgs for the @blkcg - @q pair regardless of
 * @q's bypass state.  If @update_hint is %true, the caller should be
 * holding @q->queue_lock and lookup hint is updated on success.
 */
static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
					     struct request_queue *q,
					     bool update_hint)
{
	struct blkcg_gq *blkg;

272
273
274
	if (blkcg == &blkcg_root)
		return q->root_blkg;

Tejun Heo's avatar
Tejun Heo committed
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
	blkg = rcu_dereference(blkcg->blkg_hint);
	if (blkg && blkg->q == q)
		return blkg;

	return blkg_lookup_slowpath(blkcg, q, update_hint);
}

/**
 * blkg_lookup - lookup blkg for the specified blkcg - q pair
 * @blkcg: blkcg of interest
 * @q: request_queue of interest
 *
 * Lookup blkg for the @blkcg - @q pair.  This function should be called
 * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
 * - see blk_queue_bypass_start() for details.
 */
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
					   struct request_queue *q)
{
	WARN_ON_ONCE(!rcu_read_lock_held());

	if (unlikely(blk_queue_bypass(q)))
		return NULL;
	return __blkg_lookup(blkcg, q, false);
}

301
302
303
304
305
306
307
/**
 * blkg_to_pdata - get policy private data
 * @blkg: blkg of interest
 * @pol: policy of interest
 *
 * Return pointer to private data associated with the @blkg-@pol pair.
 */
308
309
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
						  struct blkcg_policy *pol)
310
{
311
	return blkg ? blkg->pd[pol->plid] : NULL;
312
313
}

314
315
316
static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
						     struct blkcg_policy *pol)
{
317
	return blkcg ? blkcg->cpd[pol->plid] : NULL;
318
319
}

320
321
/**
 * pdata_to_blkg - get blkg associated with policy private data
322
 * @pd: policy private data of interest
323
 *
324
 * @pd is policy private data.  Determine the blkg it's associated with.
325
 */
326
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
327
{
328
	return pd ? pd->blkg : NULL;
329
330
}

331
332
333
334
335
static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
{
	return cpd ? cpd->blkcg : NULL;
}

Tejun Heo's avatar
Tejun Heo committed
336
337
338
339
340
341
342
343
/**
 * blkg_path - format cgroup path of blkg
 * @blkg: blkg of interest
 * @buf: target buffer
 * @buflen: target buffer length
 *
 * Format the path of the cgroup of @blkg into @buf.
 */
Tejun Heo's avatar
Tejun Heo committed
344
static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
345
{
Tejun Heo's avatar
Tejun Heo committed
346
	char *p;
Tejun Heo's avatar
Tejun Heo committed
347

Tejun Heo's avatar
Tejun Heo committed
348
349
	p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
	if (!p) {
Tejun Heo's avatar
Tejun Heo committed
350
		strncpy(buf, "<unavailable>", buflen);
Tejun Heo's avatar
Tejun Heo committed
351
352
353
354
355
		return -ENAMETOOLONG;
	}

	memmove(buf, p, buf + buflen - p);
	return 0;
356
357
}

Tejun Heo's avatar
Tejun Heo committed
358
359
360
361
/**
 * blkg_get - get a blkg reference
 * @blkg: blkg to get
 *
362
 * The caller should be holding an existing reference.
Tejun Heo's avatar
Tejun Heo committed
363
 */
Tejun Heo's avatar
Tejun Heo committed
364
static inline void blkg_get(struct blkcg_gq *blkg)
Tejun Heo's avatar
Tejun Heo committed
365
{
366
367
	WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
	atomic_inc(&blkg->refcnt);
Tejun Heo's avatar
Tejun Heo committed
368
369
}

370
void __blkg_release_rcu(struct rcu_head *rcu);
Tejun Heo's avatar
Tejun Heo committed
371
372
373
374
375

/**
 * blkg_put - put a blkg reference
 * @blkg: blkg to put
 */
Tejun Heo's avatar
Tejun Heo committed
376
static inline void blkg_put(struct blkcg_gq *blkg)
Tejun Heo's avatar
Tejun Heo committed
377
{
378
379
	WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
	if (atomic_dec_and_test(&blkg->refcnt))
380
		call_rcu(&blkg->rcu_head, __blkg_release_rcu);
Tejun Heo's avatar
Tejun Heo committed
381
382
}

383
384
385
/**
 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
 * @d_blkg: loop cursor pointing to the current descendant
386
 * @pos_css: used for iteration
387
388
389
390
391
 * @p_blkg: target blkg to walk descendants of
 *
 * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
 * read locked.  If called under either blkcg or queue lock, the iteration
 * is guaranteed to include all and only online blkgs.  The caller may
392
 * update @pos_css by calling css_rightmost_descendant() to skip subtree.
393
 * @p_blkg is included in the iteration and the first node to be visited.
394
 */
395
396
397
#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)		\
	css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)	\
		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
398
399
					      (p_blkg)->q, false)))

400
401
402
/**
 * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
 * @d_blkg: loop cursor pointing to the current descendant
403
 * @pos_css: used for iteration
404
405
406
 * @p_blkg: target blkg to walk descendants of
 *
 * Similar to blkg_for_each_descendant_pre() but performs post-order
407
408
 * traversal instead.  Synchronization rules are the same.  @p_blkg is
 * included in the iteration and the last node to be visited.
409
 */
410
411
412
#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)		\
	css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)	\
		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
413
414
					      (p_blkg)->q, false)))

415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
/**
 * blk_get_rl - get request_list to use
 * @q: request_queue of interest
 * @bio: bio which will be attached to the allocated request (may be %NULL)
 *
 * The caller wants to allocate a request from @q to use for @bio.  Find
 * the request_list to use and obtain a reference on it.  Should be called
 * under queue_lock.  This function is guaranteed to return non-%NULL
 * request_list.
 */
static inline struct request_list *blk_get_rl(struct request_queue *q,
					      struct bio *bio)
{
	struct blkcg *blkcg;
	struct blkcg_gq *blkg;

	rcu_read_lock();

	blkcg = bio_blkcg(bio);

	/* bypass blkg lookup and use @q->root_rl directly for root */
	if (blkcg == &blkcg_root)
		goto root_rl;

	/*
	 * Try to use blkg->rl.  blkg lookup may fail under memory pressure
	 * or if either the blkcg or queue is going away.  Fall back to
	 * root_rl in such cases.
	 */
444
445
	blkg = blkg_lookup(blkcg, q);
	if (unlikely(!blkg))
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
		goto root_rl;

	blkg_get(blkg);
	rcu_read_unlock();
	return &blkg->rl;
root_rl:
	rcu_read_unlock();
	return &q->root_rl;
}

/**
 * blk_put_rl - put request_list
 * @rl: request_list to put
 *
 * Put the reference acquired by blk_get_rl().  Should be called under
 * queue_lock.
 */
static inline void blk_put_rl(struct request_list *rl)
{
465
	if (rl->blkg->blkcg != &blkcg_root)
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
		blkg_put(rl->blkg);
}

/**
 * blk_rq_set_rl - associate a request with a request_list
 * @rq: request of interest
 * @rl: target request_list
 *
 * Associate @rq with @rl so that accounting and freeing can know the
 * request_list @rq came from.
 */
static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
{
	rq->rl = rl;
}

/**
 * blk_rq_rl - return the request_list a request came from
 * @rq: request of interest
 *
 * Return the request_list @rq is allocated from.
 */
static inline struct request_list *blk_rq_rl(struct request *rq)
{
	return rq->rl;
}

struct request_list *__blk_queue_next_rl(struct request_list *rl,
					 struct request_queue *q);
/**
 * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
 *
 * Should be used under queue_lock.
 */
#define blk_queue_for_each_rl(rl, q)	\
	for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))

Tejun Heo's avatar
Tejun Heo committed
503
static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp)
504
{
Tejun Heo's avatar
Tejun Heo committed
505
506
507
508
509
510
	int ret;

	ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
	if (ret)
		return ret;

511
	atomic64_set(&stat->aux_cnt, 0);
Tejun Heo's avatar
Tejun Heo committed
512
513
514
515
516
517
	return 0;
}

static inline void blkg_stat_exit(struct blkg_stat *stat)
{
	percpu_counter_destroy(&stat->cpu_cnt);
518
519
}

520
521
522
523
524
/**
 * blkg_stat_add - add a value to a blkg_stat
 * @stat: target blkg_stat
 * @val: value to add
 *
Tejun Heo's avatar
Tejun Heo committed
525
526
 * Add @val to @stat.  The caller must ensure that IRQ on the same CPU
 * don't re-enter this function for the same counter.
527
528
529
 */
static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
{
Tejun Heo's avatar
Tejun Heo committed
530
	__percpu_counter_add(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
531
532
533
534
535
536
537
538
}

/**
 * blkg_stat_read - read the current value of a blkg_stat
 * @stat: blkg_stat to read
 */
static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
{
Tejun Heo's avatar
Tejun Heo committed
539
	return percpu_counter_sum_positive(&stat->cpu_cnt);
540
541
542
543
544
545
546
547
}

/**
 * blkg_stat_reset - reset a blkg_stat
 * @stat: blkg_stat to reset
 */
static inline void blkg_stat_reset(struct blkg_stat *stat)
{
Tejun Heo's avatar
Tejun Heo committed
548
	percpu_counter_set(&stat->cpu_cnt, 0);
549
	atomic64_set(&stat->aux_cnt, 0);
550
551
}

552
/**
553
 * blkg_stat_add_aux - add a blkg_stat into another's aux count
554
555
556
 * @to: the destination blkg_stat
 * @from: the source
 *
557
 * Add @from's count including the aux one to @to's aux count.
558
 */
559
560
static inline void blkg_stat_add_aux(struct blkg_stat *to,
				     struct blkg_stat *from)
561
{
562
563
	atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt),
		     &to->aux_cnt);
564
565
}

Tejun Heo's avatar
Tejun Heo committed
566
static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
567
{
Tejun Heo's avatar
Tejun Heo committed
568
569
570
571
572
573
574
575
576
577
578
579
580
	int i, ret;

	for (i = 0; i < BLKG_RWSTAT_NR; i++) {
		ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
		if (ret) {
			while (--i >= 0)
				percpu_counter_destroy(&rwstat->cpu_cnt[i]);
			return ret;
		}
		atomic64_set(&rwstat->aux_cnt[i], 0);
	}
	return 0;
}
581

Tejun Heo's avatar
Tejun Heo committed
582
583
584
static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
{
	int i;
585
586

	for (i = 0; i < BLKG_RWSTAT_NR; i++)
Tejun Heo's avatar
Tejun Heo committed
587
		percpu_counter_destroy(&rwstat->cpu_cnt[i]);
588
589
}

590
591
592
/**
 * blkg_rwstat_add - add a value to a blkg_rwstat
 * @rwstat: target blkg_rwstat
593
594
 * @op: REQ_OP
 * @op_flags: rq_flag_bits
595
596
597
598
599
600
 * @val: value to add
 *
 * Add @val to @rwstat.  The counters are chosen according to @rw.  The
 * caller is responsible for synchronizing calls to this function.
 */
static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
601
				   int op, int op_flags, uint64_t val)
602
{
Tejun Heo's avatar
Tejun Heo committed
603
	struct percpu_counter *cnt;
604

605
	if (op_is_write(op))
Tejun Heo's avatar
Tejun Heo committed
606
		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
607
	else
Tejun Heo's avatar
Tejun Heo committed
608
609
610
611
		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];

	__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);

612
	if (op_flags & REQ_SYNC)
Tejun Heo's avatar
Tejun Heo committed
613
		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
614
	else
Tejun Heo's avatar
Tejun Heo committed
615
		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
616

Tejun Heo's avatar
Tejun Heo committed
617
	__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
618
619
620
621
622
623
}

/**
 * blkg_rwstat_read - read the current values of a blkg_rwstat
 * @rwstat: blkg_rwstat to read
 *
Tejun Heo's avatar
Tejun Heo committed
624
 * Read the current snapshot of @rwstat and return it in the aux counts.
625
 */
626
static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
627
{
Tejun Heo's avatar
Tejun Heo committed
628
629
	struct blkg_rwstat result;
	int i;
630

Tejun Heo's avatar
Tejun Heo committed
631
632
633
634
	for (i = 0; i < BLKG_RWSTAT_NR; i++)
		atomic64_set(&result.aux_cnt[i],
			     percpu_counter_sum_positive(&rwstat->cpu_cnt[i]));
	return result;
635
636
637
}

/**
638
 * blkg_rwstat_total - read the total count of a blkg_rwstat
639
640
641
642
643
644
 * @rwstat: blkg_rwstat to read
 *
 * Return the total count of @rwstat regardless of the IO direction.  This
 * function can be called without synchronization and takes care of u64
 * atomicity.
 */
645
static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
646
647
648
{
	struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);

Tejun Heo's avatar
Tejun Heo committed
649
650
	return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
		atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
651
652
653
654
655
656
657
658
}

/**
 * blkg_rwstat_reset - reset a blkg_rwstat
 * @rwstat: blkg_rwstat to reset
 */
static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
{
659
660
	int i;

Tejun Heo's avatar
Tejun Heo committed
661
662
	for (i = 0; i < BLKG_RWSTAT_NR; i++) {
		percpu_counter_set(&rwstat->cpu_cnt[i], 0);
663
		atomic64_set(&rwstat->aux_cnt[i], 0);
Tejun Heo's avatar
Tejun Heo committed
664
	}
665
666
}

667
/**
668
 * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
669
670
671
 * @to: the destination blkg_rwstat
 * @from: the source
 *
672
 * Add @from's count including the aux one to @to's aux count.
673
 */
674
675
static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
				       struct blkg_rwstat *from)
676
677
678
679
680
{
	struct blkg_rwstat v = blkg_rwstat_read(from);
	int i;

	for (i = 0; i < BLKG_RWSTAT_NR; i++)
Tejun Heo's avatar
Tejun Heo committed
681
682
		atomic64_add(atomic64_read(&v.aux_cnt[i]) +
			     atomic64_read(&from->aux_cnt[i]),
683
			     &to->aux_cnt[i]);
684
685
}

686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
#ifdef CONFIG_BLK_DEV_THROTTLING
extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
			   struct bio *bio);
#else
static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
				  struct bio *bio) { return false; }
#endif

static inline bool blkcg_bio_issue_check(struct request_queue *q,
					 struct bio *bio)
{
	struct blkcg *blkcg;
	struct blkcg_gq *blkg;
	bool throtl = false;

	rcu_read_lock();
	blkcg = bio_blkcg(bio);

	blkg = blkg_lookup(blkcg, q);
	if (unlikely(!blkg)) {
		spin_lock_irq(q->queue_lock);
		blkg = blkg_lookup_create(blkcg, q);
		if (IS_ERR(blkg))
			blkg = NULL;
		spin_unlock_irq(q->queue_lock);
	}

	throtl = blk_throtl_bio(q, blkg, bio);

715
716
	if (!throtl) {
		blkg = blkg ?: q->root_blkg;
717
		blkg_rwstat_add(&blkg->stat_bytes, bio_op(bio), bio->bi_rw,
718
				bio->bi_iter.bi_size);
719
		blkg_rwstat_add(&blkg->stat_ios, bio_op(bio), bio->bi_rw, 1);
720
721
	}

722
723
724
725
	rcu_read_unlock();
	return !throtl;
}

726
727
#else	/* CONFIG_BLK_CGROUP */

728
729
struct blkcg {
};
730

731
732
733
struct blkg_policy_data {
};

734
735
736
struct blkcg_policy_data {
};

Tejun Heo's avatar
Tejun Heo committed
737
struct blkcg_gq {
738
739
};

Tejun Heo's avatar
Tejun Heo committed
740
struct blkcg_policy {
741
742
};

Tejun Heo's avatar
Tejun Heo committed
743
744
#define blkcg_root_css	((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))

745
746
747
748
749
750
static inline struct cgroup_subsys_state *
task_get_blkcg_css(struct task_struct *task)
{
	return NULL;
}

751
752
#ifdef CONFIG_BLOCK

Tejun Heo's avatar
Tejun Heo committed
753
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
754
755
756
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_exit_queue(struct request_queue *q) { }
757
static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
Tejun Heo's avatar
Tejun Heo committed
758
static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
759
static inline int blkcg_activate_policy(struct request_queue *q,
Tejun Heo's avatar
Tejun Heo committed
760
					const struct blkcg_policy *pol) { return 0; }
761
static inline void blkcg_deactivate_policy(struct request_queue *q,
Tejun Heo's avatar
Tejun Heo committed
762
763
					   const struct blkcg_policy *pol) { }

764
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
765

766
767
768
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
						  struct blkcg_policy *pol) { return NULL; }
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
Tejun Heo's avatar
Tejun Heo committed
769
770
771
static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
772

773
774
775
776
777
778
static inline struct request_list *blk_get_rl(struct request_queue *q,
					      struct bio *bio) { return &q->root_rl; }
static inline void blk_put_rl(struct request_list *rl) { }
static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }

779
780
781
static inline bool blkcg_bio_issue_check(struct request_queue *q,
					 struct bio *bio) { return true; }

782
783
784
#define blk_queue_for_each_rl(rl, q)	\
	for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)

785
#endif	/* CONFIG_BLOCK */
786
787
#endif	/* CONFIG_BLK_CGROUP */
#endif	/* _BLK_CGROUP_H */