ftrace.c 117 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
/*
 * Infrastructure for profiling code inserted by 'gcc -pg'.
 *
 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
 * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
 *
 * Originally ported from the -rt patch by:
 *   Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Based on code in the latency_tracer, that is:
 *
 *  Copyright (C) 2004-2006 Ingo Molnar
13
 *  Copyright (C) 2004 Nadia Yvette Chambers
14
15
 */

16
17
18
#include <linux/stop_machine.h>
#include <linux/clocksource.h>
#include <linux/kallsyms.h>
19
#include <linux/seq_file.h>
20
#include <linux/suspend.h>
21
#include <linux/debugfs.h>
22
#include <linux/hardirq.h>
Ingo Molnar's avatar
Ingo Molnar committed
23
#include <linux/kthread.h>
24
#include <linux/uaccess.h>
25
#include <linux/bsearch.h>
26
#include <linux/module.h>
Ingo Molnar's avatar
Ingo Molnar committed
27
#include <linux/ftrace.h>
28
#include <linux/sysctl.h>
29
#include <linux/slab.h>
30
#include <linux/ctype.h>
31
#include <linux/sort.h>
32
#include <linux/list.h>
33
#include <linux/hash.h>
34
#include <linux/rcupdate.h>
35

36
#include <trace/events/sched.h>
37

38
#include <asm/setup.h>
39

40
#include "trace_output.h"
41
#include "trace_stat.h"
42

43
#define FTRACE_WARN_ON(cond)			\
44
45
46
	({					\
		int ___r = cond;		\
		if (WARN_ON(___r))		\
47
			ftrace_kill();		\
48
49
		___r;				\
	})
50
51

#define FTRACE_WARN_ON_ONCE(cond)		\
52
53
54
	({					\
		int ___r = cond;		\
		if (WARN_ON_ONCE(___r))		\
55
			ftrace_kill();		\
56
57
		___r;				\
	})
58

59
60
61
/* hash bits for specific function selection */
#define FTRACE_HASH_BITS 7
#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
62
63
#define FTRACE_HASH_DEFAULT_BITS 10
#define FTRACE_HASH_MAX_BITS 12
64

65
66
#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)

67
68
69
70
71
72
73
#ifdef CONFIG_DYNAMIC_FTRACE
#define INIT_REGEX_LOCK(opsname)	\
	.regex_lock	= __MUTEX_INITIALIZER(opsname.regex_lock),
#else
#define INIT_REGEX_LOCK(opsname)
#endif

74
75
static struct ftrace_ops ftrace_list_end __read_mostly = {
	.func		= ftrace_stub,
76
	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
77
78
};

79
80
/* ftrace_enabled is a method to turn ftrace on or off */
int ftrace_enabled __read_mostly;
81
static int last_ftrace_enabled;
82

83
/* Quick disabling of function tracer. */
84
85
86
87
int function_trace_stop __read_mostly;

/* Current function tracing op */
struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
88
89
/* What to set function_trace_op to */
static struct ftrace_ops *set_function_trace_op;
90

91
92
93
94
95
96
97
/* List for set_ftrace_pid's pids. */
LIST_HEAD(ftrace_pids);
struct ftrace_pid {
	struct list_head list;
	struct pid *pid;
};

98
99
100
101
102
103
/*
 * ftrace_disabled is set when an anomaly is discovered.
 * ftrace_disabled is much stronger than ftrace_enabled.
 */
static int ftrace_disabled __read_mostly;

104
static DEFINE_MUTEX(ftrace_lock);
105

106
static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
107
static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
108
static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
109
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
110
ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
111
static struct ftrace_ops global_ops;
112
static struct ftrace_ops control_ops;
113

114
115
#if ARCH_SUPPORTS_FTRACE_OPS
static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
116
				 struct ftrace_ops *op, struct pt_regs *regs);
117
118
119
120
121
#else
/* See comment below, where ftrace_ops_list_func is defined */
static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
#endif
122

123
124
/*
 * Traverse the ftrace_global_list, invoking all entries.  The reason that we
125
 * can use rcu_dereference_raw_notrace() is that elements removed from this list
126
 * are simply leaked, so there is no need to interact with a grace-period
127
 * mechanism.  The rcu_dereference_raw_notrace() calls are needed to handle
128
129
130
131
132
 * concurrent insertions into the ftrace_global_list.
 *
 * Silly Alpha and silly pointer-speculation compiler optimizations!
 */
#define do_for_each_ftrace_op(op, list)			\
133
	op = rcu_dereference_raw_notrace(list);			\
134
135
136
137
138
139
	do

/*
 * Optimized for just a single item in the list (as that is the normal case).
 */
#define while_for_each_ftrace_op(op)				\
140
	while (likely(op = rcu_dereference_raw_notrace((op)->next)) &&	\
141
142
	       unlikely((op) != &ftrace_list_end))

143
144
145
146
147
148
149
150
151
152
static inline void ftrace_ops_init(struct ftrace_ops *ops)
{
#ifdef CONFIG_DYNAMIC_FTRACE
	if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) {
		mutex_init(&ops->regex_lock);
		ops->flags |= FTRACE_OPS_FL_INITIALIZED;
	}
#endif
}

153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/**
 * ftrace_nr_registered_ops - return number of ops registered
 *
 * Returns the number of ftrace_ops registered and tracing functions
 */
int ftrace_nr_registered_ops(void)
{
	struct ftrace_ops *ops;
	int cnt = 0;

	mutex_lock(&ftrace_lock);

	for (ops = ftrace_ops_list;
	     ops != &ftrace_list_end; ops = ops->next)
		cnt++;

	mutex_unlock(&ftrace_lock);

	return cnt;
}

174
175
static void
ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
176
			struct ftrace_ops *op, struct pt_regs *regs)
177
{
178
179
	int bit;

180
181
	bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
	if (bit < 0)
182
		return;
183

184
	do_for_each_ftrace_op(op, ftrace_global_list) {
185
		op->func(ip, parent_ip, op, regs);
186
	} while_for_each_ftrace_op(op);
187
188

	trace_clear_recursion(bit);
189
190
}

191
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
192
			    struct ftrace_ops *op, struct pt_regs *regs)
193
{
194
	if (!test_tsk_trace_trace(current))
195
196
		return;

197
	ftrace_pid_function(ip, parent_ip, op, regs);
198
199
200
201
202
203
204
205
206
}

static void set_ftrace_pid_function(ftrace_func_t func)
{
	/* do not set ftrace_pid_function to itself! */
	if (func != ftrace_pid_func)
		ftrace_pid_function = func;
}

207
/**
208
 * clear_ftrace_function - reset the ftrace function
209
 *
210
211
 * This NULLs the ftrace function and in essence stops
 * tracing.  There may be lag
212
 */
213
void clear_ftrace_function(void)
214
{
215
	ftrace_trace_function = ftrace_stub;
216
	ftrace_pid_function = ftrace_stub;
217
218
}

219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
static void control_ops_disable_all(struct ftrace_ops *ops)
{
	int cpu;

	for_each_possible_cpu(cpu)
		*per_cpu_ptr(ops->disabled, cpu) = 1;
}

static int control_ops_alloc(struct ftrace_ops *ops)
{
	int __percpu *disabled;

	disabled = alloc_percpu(int);
	if (!disabled)
		return -ENOMEM;

	ops->disabled = disabled;
	control_ops_disable_all(ops);
	return 0;
}

static void control_ops_free(struct ftrace_ops *ops)
{
	free_percpu(ops->disabled);
}

245
static void update_global_ops(void)
246
{
247
248
249
250
251
	ftrace_func_t func = ftrace_global_list_func;
	void *private = NULL;

	/* The list has its own recursion protection. */
	global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
252
253
254
255
256
257

	/*
	 * If there's only one function registered, then call that
	 * function directly. Otherwise, we need to iterate over the
	 * registered callers.
	 */
258
	if (ftrace_global_list == &ftrace_list_end ||
259
	    ftrace_global_list->next == &ftrace_list_end) {
260
		func = ftrace_global_list->func;
261
		private = ftrace_global_list->private;
262
263
264
265
266
267
		/*
		 * As we are calling the function directly.
		 * If it does not have recursion protection,
		 * the function_trace_op needs to be updated
		 * accordingly.
		 */
268
		if (!(ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE))
269
270
271
			global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
	}

272
273
274
275
276
	/* If we filter on pids, update to use the pid function */
	if (!list_empty(&ftrace_pids)) {
		set_ftrace_pid_function(func);
		func = ftrace_pid_func;
	}
277
278

	global_ops.func = func;
279
	global_ops.private = private;
280
281
}

282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
static void ftrace_sync(struct work_struct *work)
{
	/*
	 * This function is just a stub to implement a hard force
	 * of synchronize_sched(). This requires synchronizing
	 * tasks even in userspace and idle.
	 *
	 * Yes, function tracing is rude.
	 */
}

static void ftrace_sync_ipi(void *data)
{
	/* Probably not needed, but do it anyway */
	smp_rmb();
}

299
300
301
302
303
304
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static void update_function_graph_func(void);
#else
static inline void update_function_graph_func(void) { }
#endif

305
306
307
308
309
310
static void update_ftrace_function(void)
{
	ftrace_func_t func;

	update_global_ops();

311
312
	/*
	 * If we are at the end of the list and this ops is
313
314
	 * recursion safe and not dynamic and the arch supports passing ops,
	 * then have the mcount trampoline call the function directly.
315
	 */
316
	if (ftrace_ops_list == &ftrace_list_end ||
317
	    (ftrace_ops_list->next == &ftrace_list_end &&
318
	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) &&
319
	     (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) &&
320
	     !FTRACE_FORCE_LIST_FUNC)) {
321
322
		/* Set the ftrace_ops that the arch callback uses */
		if (ftrace_ops_list == &global_ops)
323
			set_function_trace_op = ftrace_global_list;
324
		else
325
			set_function_trace_op = ftrace_ops_list;
326
		func = ftrace_ops_list->func;
327
328
	} else {
		/* Just use the default ftrace_ops */
329
		set_function_trace_op = &ftrace_list_end;
330
		func = ftrace_ops_list_func;
331
	}
332

333
334
335
336
	/* If there's no change, then do nothing more here */
	if (ftrace_trace_function == func)
		return;

337
338
	update_function_graph_func();

339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
	/*
	 * If we are using the list function, it doesn't care
	 * about the function_trace_ops.
	 */
	if (func == ftrace_ops_list_func) {
		ftrace_trace_function = func;
		/*
		 * Don't even bother setting function_trace_ops,
		 * it would be racy to do so anyway.
		 */
		return;
	}

#ifndef CONFIG_DYNAMIC_FTRACE
	/*
	 * For static tracing, we need to be a bit more careful.
	 * The function change takes affect immediately. Thus,
	 * we need to coorditate the setting of the function_trace_ops
	 * with the setting of the ftrace_trace_function.
	 *
	 * Set the function to the list ops, which will call the
	 * function we want, albeit indirectly, but it handles the
	 * ftrace_ops and doesn't depend on function_trace_op.
	 */
	ftrace_trace_function = ftrace_ops_list_func;
	/*
	 * Make sure all CPUs see this. Yes this is slow, but static
	 * tracing is slow and nasty to have enabled.
	 */
	schedule_on_each_cpu(ftrace_sync);
	/* Now all cpus are using the list ops. */
	function_trace_op = set_function_trace_op;
	/* Make sure the function_trace_op is visible on all CPUs */
	smp_wmb();
	/* Nasty way to force a rmb on all cpus */
	smp_call_function(ftrace_sync_ipi, NULL, 1);
	/* OK, we are all set to update the ftrace_trace_function now! */
#endif /* !CONFIG_DYNAMIC_FTRACE */

378
379
380
	ftrace_trace_function = func;
}

381
static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
382
{
383
	ops->next = *list;
384
	/*
385
	 * We are entering ops into the list but another
386
387
	 * CPU might be walking that list. We need to make sure
	 * the ops->next pointer is valid before another CPU sees
388
	 * the ops pointer included into the list.
389
	 */
390
	rcu_assign_pointer(*list, ops);
391
392
}

393
static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
394
395
396
397
{
	struct ftrace_ops **p;

	/*
398
399
	 * If we are removing the last function, then simply point
	 * to the ftrace_stub.
400
	 */
401
402
	if (*list == ops && ops->next == &ftrace_list_end) {
		*list = &ftrace_list_end;
Steven Rostedt's avatar
Steven Rostedt committed
403
		return 0;
404
405
	}

406
	for (p = list; *p != &ftrace_list_end; p = &(*p)->next)
407
408
409
		if (*p == ops)
			break;

Steven Rostedt's avatar
Steven Rostedt committed
410
411
	if (*p != ops)
		return -1;
412
413

	*p = (*p)->next;
414
415
	return 0;
}
416

417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
static void add_ftrace_list_ops(struct ftrace_ops **list,
				struct ftrace_ops *main_ops,
				struct ftrace_ops *ops)
{
	int first = *list == &ftrace_list_end;
	add_ftrace_ops(list, ops);
	if (first)
		add_ftrace_ops(&ftrace_ops_list, main_ops);
}

static int remove_ftrace_list_ops(struct ftrace_ops **list,
				  struct ftrace_ops *main_ops,
				  struct ftrace_ops *ops)
{
	int ret = remove_ftrace_ops(list, ops);
	if (!ret && *list == &ftrace_list_end)
		ret = remove_ftrace_ops(&ftrace_ops_list, main_ops);
	return ret;
}

437
438
439
440
441
static int __register_ftrace_function(struct ftrace_ops *ops)
{
	if (FTRACE_WARN_ON(ops == &global_ops))
		return -EINVAL;

442
443
444
	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
		return -EBUSY;

445
446
447
448
	/* We don't support both control and global flags set. */
	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
		return -EINVAL;

449
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
450
451
452
453
454
455
456
457
458
459
460
461
462
	/*
	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
	 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
	 * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant.
	 */
	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS &&
	    !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED))
		return -EINVAL;

	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)
		ops->flags |= FTRACE_OPS_FL_SAVE_REGS;
#endif

463
464
465
	if (!core_kernel_data((unsigned long)ops))
		ops->flags |= FTRACE_OPS_FL_DYNAMIC;

466
	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
467
		add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);
468
		ops->flags |= FTRACE_OPS_FL_ENABLED;
469
470
471
472
	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
		if (control_ops_alloc(ops))
			return -ENOMEM;
		add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
473
474
475
	} else
		add_ftrace_ops(&ftrace_ops_list, ops);

476
477
478
479
480
481
482
483
484
485
	if (ftrace_enabled)
		update_ftrace_function();

	return 0;
}

static int __unregister_ftrace_function(struct ftrace_ops *ops)
{
	int ret;

486
487
488
	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
		return -EBUSY;

489
490
491
	if (FTRACE_WARN_ON(ops == &global_ops))
		return -EINVAL;

492
	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
493
494
		ret = remove_ftrace_list_ops(&ftrace_global_list,
					     &global_ops, ops);
495
496
		if (!ret)
			ops->flags &= ~FTRACE_OPS_FL_ENABLED;
497
498
499
	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
		ret = remove_ftrace_list_ops(&ftrace_control_list,
					     &control_ops, ops);
500
501
502
	} else
		ret = remove_ftrace_ops(&ftrace_ops_list, ops);

503
504
	if (ret < 0)
		return ret;
505

506
507
	if (ftrace_enabled)
		update_ftrace_function();
508

Steven Rostedt's avatar
Steven Rostedt committed
509
	return 0;
510
511
}

512
513
static void ftrace_update_pid_func(void)
{
514
	/* Only do something if we are tracing something */
515
	if (ftrace_trace_function == ftrace_stub)
516
		return;
517

518
	update_ftrace_function();
519
520
}

521
522
523
524
525
#ifdef CONFIG_FUNCTION_PROFILER
struct ftrace_profile {
	struct hlist_node		node;
	unsigned long			ip;
	unsigned long			counter;
526
527
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	unsigned long long		time;
528
	unsigned long long		time_squared;
529
#endif
530
531
};

532
533
534
535
struct ftrace_profile_page {
	struct ftrace_profile_page	*next;
	unsigned long			index;
	struct ftrace_profile		records[];
536
537
};

538
539
540
541
542
543
544
545
struct ftrace_profile_stat {
	atomic_t			disabled;
	struct hlist_head		*hash;
	struct ftrace_profile_page	*pages;
	struct ftrace_profile_page	*start;
	struct tracer_stat		stat;
};

546
547
#define PROFILE_RECORDS_SIZE						\
	(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
548

549
550
#define PROFILES_PER_PAGE					\
	(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
551

552
553
554
static int ftrace_profile_enabled __read_mostly;

/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
555
556
static DEFINE_MUTEX(ftrace_profile_lock);

557
static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
558

559
560
#define FTRACE_PROFILE_HASH_BITS 10
#define FTRACE_PROFILE_HASH_SIZE (1 << FTRACE_PROFILE_HASH_BITS)
561

562
563
564
static void *
function_stat_next(void *v, int idx)
{
565
566
	struct ftrace_profile *rec = v;
	struct ftrace_profile_page *pg;
567

568
	pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
569
570

 again:
Li Zefan's avatar
Li Zefan committed
571
572
573
	if (idx != 0)
		rec++;

574
575
576
577
578
	if ((void *)rec >= (void *)&pg->records[pg->index]) {
		pg = pg->next;
		if (!pg)
			return NULL;
		rec = &pg->records[0];
579
580
		if (!rec->counter)
			goto again;
581
582
583
584
585
586
587
	}

	return rec;
}

static void *function_stat_start(struct tracer_stat *trace)
{
588
589
590
591
592
593
594
	struct ftrace_profile_stat *stat =
		container_of(trace, struct ftrace_profile_stat, stat);

	if (!stat || !stat->start)
		return NULL;

	return function_stat_next(&stat->start->records[0], 0);
595
596
}

597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* function graph compares on total time */
static int function_stat_cmp(void *p1, void *p2)
{
	struct ftrace_profile *a = p1;
	struct ftrace_profile *b = p2;

	if (a->time < b->time)
		return -1;
	if (a->time > b->time)
		return 1;
	else
		return 0;
}
#else
/* not function graph compares against hits */
613
614
static int function_stat_cmp(void *p1, void *p2)
{
615
616
	struct ftrace_profile *a = p1;
	struct ftrace_profile *b = p2;
617
618
619
620
621
622
623
624

	if (a->counter < b->counter)
		return -1;
	if (a->counter > b->counter)
		return 1;
	else
		return 0;
}
625
#endif
626
627
628

static int function_stat_headers(struct seq_file *m)
{
629
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
630
	seq_printf(m, "  Function                               "
631
		   "Hit    Time            Avg             s^2\n"
632
		      "  --------                               "
633
		   "---    ----            ---             ---\n");
634
#else
635
636
	seq_printf(m, "  Function                               Hit\n"
		      "  --------                               ---\n");
637
#endif
638
639
640
641
642
	return 0;
}

static int function_stat_show(struct seq_file *m, void *v)
{
643
	struct ftrace_profile *rec = v;
644
	char str[KSYM_SYMBOL_LEN];
645
	int ret = 0;
646
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
647
648
	static struct trace_seq s;
	unsigned long long avg;
649
	unsigned long long stddev;
650
#endif
651
652
653
654
655
656
657
	mutex_lock(&ftrace_profile_lock);

	/* we raced with function_profile_reset() */
	if (unlikely(rec->counter == 0)) {
		ret = -EBUSY;
		goto out;
	}
658
659

	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
660
661
662
663
	seq_printf(m, "  %-30.30s  %10lu", str, rec->counter);

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	seq_printf(m, "    ");
664
665
666
	avg = rec->time;
	do_div(avg, rec->counter);

667
668
669
670
	/* Sample standard deviation (s^2) */
	if (rec->counter <= 1)
		stddev = 0;
	else {
671
672
673
674
675
676
677
		/*
		 * Apply Welford's method:
		 * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
		 */
		stddev = rec->counter * rec->time_squared -
			 rec->time * rec->time;

678
679
680
681
		/*
		 * Divide only 1000 for ns^2 -> us^2 conversion.
		 * trace_print_graph_duration will divide 1000 again.
		 */
682
		do_div(stddev, rec->counter * (rec->counter - 1) * 1000);
683
684
	}

685
686
687
688
	trace_seq_init(&s);
	trace_print_graph_duration(rec->time, &s);
	trace_seq_puts(&s, "    ");
	trace_print_graph_duration(avg, &s);
689
690
	trace_seq_puts(&s, "    ");
	trace_print_graph_duration(stddev, &s);
691
692
693
	trace_print_seq(m, &s);
#endif
	seq_putc(m, '\n');
694
695
out:
	mutex_unlock(&ftrace_profile_lock);
696

697
	return ret;
698
699
}

700
static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
701
{
702
	struct ftrace_profile_page *pg;
703

704
	pg = stat->pages = stat->start;
705

706
707
708
709
	while (pg) {
		memset(pg->records, 0, PROFILE_RECORDS_SIZE);
		pg->index = 0;
		pg = pg->next;
710
711
	}

712
	memset(stat->hash, 0,
713
714
	       FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
}
715

716
int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
717
718
{
	struct ftrace_profile_page *pg;
719
720
	int functions;
	int pages;
721
	int i;
722

723
	/* If we already allocated, do nothing */
724
	if (stat->pages)
725
		return 0;
726

727
728
	stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
	if (!stat->pages)
729
		return -ENOMEM;
730

731
732
733
734
735
736
737
738
739
740
741
742
743
#ifdef CONFIG_DYNAMIC_FTRACE
	functions = ftrace_update_tot_cnt;
#else
	/*
	 * We do not know the number of functions that exist because
	 * dynamic tracing is what counts them. With past experience
	 * we have around 20K functions. That should be more than enough.
	 * It is highly unlikely we will execute every function in
	 * the kernel.
	 */
	functions = 20000;
#endif

744
	pg = stat->start = stat->pages;
745

746
747
	pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);

748
	for (i = 1; i < pages; i++) {
749
750
		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
		if (!pg->next)
751
			goto out_free;
752
753
754
755
		pg = pg->next;
	}

	return 0;
756
757
758
759
760
761
762
763
764
765
766
767
768
769

 out_free:
	pg = stat->start;
	while (pg) {
		unsigned long tmp = (unsigned long)pg;

		pg = pg->next;
		free_page(tmp);
	}

	stat->pages = NULL;
	stat->start = NULL;

	return -ENOMEM;
770
771
}

772
static int ftrace_profile_init_cpu(int cpu)
773
{
774
	struct ftrace_profile_stat *stat;
775
	int size;
776

777
778
779
	stat = &per_cpu(ftrace_profile_stats, cpu);

	if (stat->hash) {
780
		/* If the profile is already created, simply reset it */
781
		ftrace_profile_reset(stat);
782
783
		return 0;
	}
784

785
786
787
788
789
	/*
	 * We are profiling all functions, but usually only a few thousand
	 * functions are hit. We'll make a hash of 1024 items.
	 */
	size = FTRACE_PROFILE_HASH_SIZE;
790

791
	stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
792

793
	if (!stat->hash)
794
795
		return -ENOMEM;

796
	/* Preallocate the function profiling pages */
797
798
799
	if (ftrace_profile_pages_init(stat) < 0) {
		kfree(stat->hash);
		stat->hash = NULL;
800
801
802
803
		return -ENOMEM;
	}

	return 0;
804
805
}

806
807
808
809
810
static int ftrace_profile_init(void)
{
	int cpu;
	int ret = 0;

811
	for_each_possible_cpu(cpu) {
812
813
814
815
816
817
818
819
		ret = ftrace_profile_init_cpu(cpu);
		if (ret)
			break;
	}

	return ret;
}

820
/* interrupts must be disabled */
821
822
static struct ftrace_profile *
ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
823
{
824
	struct ftrace_profile *rec;
825
826
827
	struct hlist_head *hhd;
	unsigned long key;

828
	key = hash_long(ip, FTRACE_PROFILE_HASH_BITS);
829
	hhd = &stat->hash[key];
830
831
832
833

	if (hlist_empty(hhd))
		return NULL;

834
	hlist_for_each_entry_rcu_notrace(rec, hhd, node) {
835
		if (rec->ip == ip)
836
837
838
839
840
841
			return rec;
	}

	return NULL;
}

842
843
static void ftrace_add_profile(struct ftrace_profile_stat *stat,
			       struct ftrace_profile *rec)
844
845
846
{
	unsigned long key;

847
	key = hash_long(rec->ip, FTRACE_PROFILE_HASH_BITS);
848
	hlist_add_head_rcu(&rec->node, &stat->hash[key]);
849
850
}

851
852
853
/*
 * The memory is already allocated, this simply finds a new record to use.
 */
854
static struct ftrace_profile *
855
ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
856
857
858
{
	struct ftrace_profile *rec = NULL;

859
	/* prevent recursion (from NMIs) */
860
	if (atomic_inc_return(&stat->disabled) != 1)
861
862
863
		goto out;

	/*
864
865
	 * Try to find the function again since an NMI
	 * could have added it
866
	 */
867
	rec = ftrace_find_profiled_func(stat, ip);
868
	if (rec)
869
		goto out;
870

871
872
873
874
	if (stat->pages->index == PROFILES_PER_PAGE) {
		if (!stat->pages->next)
			goto out;
		stat->pages = stat->pages->next;
875
	}
876

877
	rec = &stat->pages->records[stat->pages->index++];
878
	rec->ip = ip;
879
	ftrace_add_profile(stat, rec);
880

881
 out:
882
	atomic_dec(&stat->disabled);
883
884
885
886
887

	return rec;
}

static void
888
function_profile_call(unsigned long ip, unsigned long parent_ip,
889
		      struct ftrace_ops *ops, struct pt_regs *regs)
890
{
891
	struct ftrace_profile_stat *stat;
892
	struct ftrace_profile *rec;
893
894
895
896
897
898
	unsigned long flags;

	if (!ftrace_profile_enabled)
		return;

	local_irq_save(flags);
899
900

	stat = &__get_cpu_var(ftrace_profile_stats);
901
	if (!stat->hash || !ftrace_profile_enabled)
902
903
904
		goto out;

	rec = ftrace_find_profiled_func(stat, ip);
905
	if (!rec) {
906
		rec = ftrace_profile_alloc(stat, ip);
907
908
909
		if (!rec)
			goto out;
	}
910
911
912
913
914
915

	rec->counter++;
 out:
	local_irq_restore(flags);
}

916
917
918
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int profile_graph_entry(struct ftrace_graph_ent *trace)
{
919
	function_profile_call(trace->func, 0, NULL, NULL);
920
921
922
923
924
	return 1;
}

static void profile_graph_return(struct ftrace_graph_ret *trace)
{
925
	struct ftrace_profile_stat *stat;
926
	unsigned long long calltime;
927
	struct ftrace_profile *rec;
928
	unsigned long flags;
929
930

	local_irq_save(flags);
931
	stat = &__get_cpu_var(ftrace_profile_stats);
932
	if (!stat->hash || !ftrace_profile_enabled)
933
934
		goto out;

935
936
937
938
	/* If the calltime was zero'd ignore it */
	if (!trace->calltime)
		goto out;

939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
	calltime = trace->rettime - trace->calltime;

	if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
		int index;

		index = trace->depth;

		/* Append this call time to the parent time to subtract */
		if (index)
			current->ret_stack[index - 1].subtime += calltime;

		if (current->ret_stack[index].subtime < calltime)
			calltime -= current->ret_stack[index].subtime;
		else
			calltime = 0;
	}

956
	rec = ftrace_find_profiled_func(stat, trace->func);
957
	if (rec) {
958
		rec->time += calltime;
959
960
		rec->time_squared += calltime * calltime;
	}
961

962
 out:
963
964
965
966
967
968
969
970
971
972
973
974
975
976
	local_irq_restore(flags);
}

static int register_ftrace_profiler(void)
{
	return register_ftrace_graph(&profile_graph_return,
				     &profile_graph_entry);
}

static void unregister_ftrace_profiler(void)
{
	unregister_ftrace_graph();
}
#else
977
static struct ftrace_ops ftrace_profile_ops __read_mostly = {
978
	.func		= function_profile_call,
979
980
	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
	INIT_REGEX_LOCK(ftrace_profile_ops)
981
982
};

983
984
985
986
987
988
989
990
991
992
993
static int register_ftrace_profiler(void)
{
	return register_ftrace_function(&ftrace_profile_ops);
}

static void unregister_ftrace_profiler(void)
{
	unregister_ftrace_function(&ftrace_profile_ops);
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

994
995
996
997
998
999
1000
static ssize_t
ftrace_profile_write(struct file *filp, const char __user *ubuf,
		     size_t cnt, loff_t *ppos)
{
	unsigned long val;
	int ret;

1001
1002
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
1003
1004
1005
1006
1007
1008
1009
		return ret;

	val = !!val;

	mutex_lock(&ftrace_profile_lock);
	if (ftrace_profile_enabled ^ val) {
		if (val) {
1010
1011
1012
1013
1014
1015
			ret = ftrace_profile_init();
			if (ret < 0) {
				cnt = ret;
				goto out;
			}

1016
1017
1018
1019
1020
			ret = register_ftrace_profiler();
			if (ret < 0) {
				cnt = ret;
				goto out;
			}
1021
1022
1023
			ftrace_profile_enabled = 1;
		} else {
			ftrace_profile_enabled = 0;
1024
1025
1026
1027
			/*
			 * unregister_ftrace_profiler calls stop_machine
			 * so this acts like an synchronize_sched.
			 */
1028
			unregister_ftrace_profiler();
1029
1030
		}
	}
1031
 out:
1032
1033
	mutex_unlock(&ftrace_profile_lock);

1034
	*ppos += cnt;
1035
1036
1037
1038

	return cnt;
}

1039
1040
1041
1042
static ssize_t
ftrace_profile_read(struct file *filp, char __user *ubuf,
		     size_t cnt, loff_t *ppos)
{
1043
	char buf[64];		/* big enough to hold a number */
1044
1045
1046
1047
1048
1049
	int r;

	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}

1050
1051
1052
1053
static const struct file_operations ftrace_profile_fops = {
	.open		= tracing_open_generic,
	.read		= ftrace_profile_read,
	.write		= ftrace_profile_write,
1054
	.llseek		= default_llseek,
1055
1056
};

1057
1058
/* used to initialize the real stat files */
static struct tracer_stat function_stats __initdata = {
1059
1060
1061
1062
1063
1064
	.name		= "functions",
	.stat_start	= function_stat_start,
	.stat_next	= function_stat_next,
	.stat_cmp	= function_stat_cmp,
	.stat_headers	= function_stat_headers,
	.stat_show	= function_stat_show
1065
1066
};

1067
static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
1068
{
1069
	struct ftrace_profile_stat *stat;
1070
	struct dentry *entry;
1071
	char *name;
1072
	int ret;
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
	int cpu;

	for_each_possible_cpu(cpu) {
		stat = &per_cpu(ftrace_profile_stats, cpu);

		/* allocate enough for function name + cpu number */
		name = kmalloc(32, GFP_KERNEL);
		if (!name) {
			/*
			 * The files created are permanent, if something happens
			 * we still do not free memory.
			 */
			WARN(1,
			     "Could not allocate stat file for cpu %d\n",
			     cpu);
			return;
		}
		stat->stat = function_stats;
		snprintf(name, 32, "function%d", cpu);
		stat->stat.name = name;
		ret = register_stat_tracer(&stat->stat);
		if (ret) {
			WARN(1,
			     "Could not register function stat for cpu %d\n",
			     cpu);
			kfree(name);
			return;
		}
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
	}

	entry = debugfs_create_file("function_profile_enabled", 0644,
				    d_tracer, NULL, &ftrace_profile_fops);
	if (!entry)
		pr_warning("Could not create debugfs "
			   "'function_profile_enabled' entry\n");
}

#else /* CONFIG_FUNCTION_PROFILER */
1111
static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
1112
1113
1114
1115
{
}
#endif /* CONFIG_FUNCTION_PROFILER */

1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
static struct pid * const ftrace_swapper_pid = &init_struct_pid;

#ifdef CONFIG_DYNAMIC_FTRACE

#ifndef CONFIG_FTRACE_MCOUNT_RECORD
# error Dynamic ftrace depends on MCOUNT_RECORD
#endif

static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;

struct ftrace_func_probe {
	struct hlist_node	node;
	struct ftrace_probe_ops	*ops;
	unsigned long		flags;
	unsigned long		ip;
	void			*data;
1132
	struct list_head	free_list;
1133
1134
};

1135
1136
1137
1138
1139
1140
1141
1142
1143
struct ftrace_func_entry {
	struct hlist_node hlist;