ftrace.c 114 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
/*
 * Infrastructure for profiling code inserted by 'gcc -pg'.
 *
 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
 * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
 *
 * Originally ported from the -rt patch by:
 *   Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Based on code in the latency_tracer, that is:
 *
 *  Copyright (C) 2004-2006 Ingo Molnar
13
 *  Copyright (C) 2004 Nadia Yvette Chambers
14
15
 */

16
17
18
#include <linux/stop_machine.h>
#include <linux/clocksource.h>
#include <linux/kallsyms.h>
19
#include <linux/seq_file.h>
20
#include <linux/suspend.h>
21
#include <linux/debugfs.h>
22
#include <linux/hardirq.h>
Ingo Molnar's avatar
Ingo Molnar committed
23
#include <linux/kthread.h>
24
#include <linux/uaccess.h>
25
#include <linux/bsearch.h>
26
#include <linux/module.h>
Ingo Molnar's avatar
Ingo Molnar committed
27
#include <linux/ftrace.h>
28
#include <linux/sysctl.h>
29
#include <linux/slab.h>
30
#include <linux/ctype.h>
31
#include <linux/sort.h>
32
#include <linux/list.h>
33
#include <linux/hash.h>
34
#include <linux/rcupdate.h>
35

36
#include <trace/events/sched.h>
37

38
#include <asm/setup.h>
39

40
#include "trace_output.h"
41
#include "trace_stat.h"
42

43
#define FTRACE_WARN_ON(cond)			\
44
45
46
	({					\
		int ___r = cond;		\
		if (WARN_ON(___r))		\
47
			ftrace_kill();		\
48
49
		___r;				\
	})
50
51

#define FTRACE_WARN_ON_ONCE(cond)		\
52
53
54
	({					\
		int ___r = cond;		\
		if (WARN_ON_ONCE(___r))		\
55
			ftrace_kill();		\
56
57
		___r;				\
	})
58

59
60
61
/* hash bits for specific function selection */
#define FTRACE_HASH_BITS 7
#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
62
63
#define FTRACE_HASH_DEFAULT_BITS 10
#define FTRACE_HASH_MAX_BITS 12
64

65
66
#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)

67
68
69
70
71
72
73
#ifdef CONFIG_DYNAMIC_FTRACE
#define INIT_REGEX_LOCK(opsname)	\
	.regex_lock	= __MUTEX_INITIALIZER(opsname.regex_lock),
#else
#define INIT_REGEX_LOCK(opsname)
#endif

74
75
static struct ftrace_ops ftrace_list_end __read_mostly = {
	.func		= ftrace_stub,
76
	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
77
78
};

79
80
/* ftrace_enabled is a method to turn ftrace on or off */
int ftrace_enabled __read_mostly;
81
static int last_ftrace_enabled;
82

83
/* Quick disabling of function tracer. */
84
85
86
87
int function_trace_stop __read_mostly;

/* Current function tracing op */
struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
88

89
90
91
92
93
94
95
/* List for set_ftrace_pid's pids. */
LIST_HEAD(ftrace_pids);
struct ftrace_pid {
	struct list_head list;
	struct pid *pid;
};

96
97
98
99
100
101
/*
 * ftrace_disabled is set when an anomaly is discovered.
 * ftrace_disabled is much stronger than ftrace_enabled.
 */
static int ftrace_disabled __read_mostly;

102
static DEFINE_MUTEX(ftrace_lock);
103

104
static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
105
static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
106
static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
107
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
108
ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
109
static struct ftrace_ops global_ops;
110
static struct ftrace_ops control_ops;
111

112
113
#if ARCH_SUPPORTS_FTRACE_OPS
static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
114
				 struct ftrace_ops *op, struct pt_regs *regs);
115
116
117
118
119
#else
/* See comment below, where ftrace_ops_list_func is defined */
static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
#endif
120

121
122
/*
 * Traverse the ftrace_global_list, invoking all entries.  The reason that we
123
 * can use rcu_dereference_raw_notrace() is that elements removed from this list
124
 * are simply leaked, so there is no need to interact with a grace-period
125
 * mechanism.  The rcu_dereference_raw_notrace() calls are needed to handle
126
127
128
129
130
 * concurrent insertions into the ftrace_global_list.
 *
 * Silly Alpha and silly pointer-speculation compiler optimizations!
 */
#define do_for_each_ftrace_op(op, list)			\
131
	op = rcu_dereference_raw_notrace(list);			\
132
133
134
135
136
137
	do

/*
 * Optimized for just a single item in the list (as that is the normal case).
 */
#define while_for_each_ftrace_op(op)				\
138
	while (likely(op = rcu_dereference_raw_notrace((op)->next)) &&	\
139
140
	       unlikely((op) != &ftrace_list_end))

141
142
143
144
145
146
147
148
149
150
static inline void ftrace_ops_init(struct ftrace_ops *ops)
{
#ifdef CONFIG_DYNAMIC_FTRACE
	if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) {
		mutex_init(&ops->regex_lock);
		ops->flags |= FTRACE_OPS_FL_INITIALIZED;
	}
#endif
}

151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/**
 * ftrace_nr_registered_ops - return number of ops registered
 *
 * Returns the number of ftrace_ops registered and tracing functions
 */
int ftrace_nr_registered_ops(void)
{
	struct ftrace_ops *ops;
	int cnt = 0;

	mutex_lock(&ftrace_lock);

	for (ops = ftrace_ops_list;
	     ops != &ftrace_list_end; ops = ops->next)
		cnt++;

	mutex_unlock(&ftrace_lock);

	return cnt;
}

172
173
static void
ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
174
			struct ftrace_ops *op, struct pt_regs *regs)
175
{
176
177
	int bit;

178
179
	bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
	if (bit < 0)
180
		return;
181

182
	do_for_each_ftrace_op(op, ftrace_global_list) {
183
		op->func(ip, parent_ip, op, regs);
184
	} while_for_each_ftrace_op(op);
185
186

	trace_clear_recursion(bit);
187
188
}

189
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
190
			    struct ftrace_ops *op, struct pt_regs *regs)
191
{
192
	if (!test_tsk_trace_trace(current))
193
194
		return;

195
	ftrace_pid_function(ip, parent_ip, op, regs);
196
197
198
199
200
201
202
203
204
}

static void set_ftrace_pid_function(ftrace_func_t func)
{
	/* do not set ftrace_pid_function to itself! */
	if (func != ftrace_pid_func)
		ftrace_pid_function = func;
}

205
/**
206
 * clear_ftrace_function - reset the ftrace function
207
 *
208
209
 * This NULLs the ftrace function and in essence stops
 * tracing.  There may be lag
210
 */
211
void clear_ftrace_function(void)
212
{
213
	ftrace_trace_function = ftrace_stub;
214
	ftrace_pid_function = ftrace_stub;
215
216
}

217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
static void control_ops_disable_all(struct ftrace_ops *ops)
{
	int cpu;

	for_each_possible_cpu(cpu)
		*per_cpu_ptr(ops->disabled, cpu) = 1;
}

static int control_ops_alloc(struct ftrace_ops *ops)
{
	int __percpu *disabled;

	disabled = alloc_percpu(int);
	if (!disabled)
		return -ENOMEM;

	ops->disabled = disabled;
	control_ops_disable_all(ops);
	return 0;
}

static void control_ops_free(struct ftrace_ops *ops)
{
	free_percpu(ops->disabled);
}

243
static void update_global_ops(void)
244
245
246
247
248
249
250
251
{
	ftrace_func_t func;

	/*
	 * If there's only one function registered, then call that
	 * function directly. Otherwise, we need to iterate over the
	 * registered callers.
	 */
252
	if (ftrace_global_list == &ftrace_list_end ||
253
	    ftrace_global_list->next == &ftrace_list_end) {
254
		func = ftrace_global_list->func;
255
256
257
258
259
260
261
262
263
264
265
		/*
		 * As we are calling the function directly.
		 * If it does not have recursion protection,
		 * the function_trace_op needs to be updated
		 * accordingly.
		 */
		if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
			global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
		else
			global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
	} else {
266
		func = ftrace_global_list_func;
267
268
269
270
		/* The list has its own recursion protection. */
		global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
	}

271
272
273
274
275
276

	/* If we filter on pids, update to use the pid function */
	if (!list_empty(&ftrace_pids)) {
		set_ftrace_pid_function(func);
		func = ftrace_pid_func;
	}
277
278
279
280
281
282
283
284
285
286

	global_ops.func = func;
}

static void update_ftrace_function(void)
{
	ftrace_func_t func;

	update_global_ops();

287
288
	/*
	 * If we are at the end of the list and this ops is
289
290
	 * recursion safe and not dynamic and the arch supports passing ops,
	 * then have the mcount trampoline call the function directly.
291
	 */
292
	if (ftrace_ops_list == &ftrace_list_end ||
293
	    (ftrace_ops_list->next == &ftrace_list_end &&
294
	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) &&
295
	     (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) &&
296
	     !FTRACE_FORCE_LIST_FUNC)) {
297
298
299
300
301
		/* Set the ftrace_ops that the arch callback uses */
		if (ftrace_ops_list == &global_ops)
			function_trace_op = ftrace_global_list;
		else
			function_trace_op = ftrace_ops_list;
302
		func = ftrace_ops_list->func;
303
304
305
	} else {
		/* Just use the default ftrace_ops */
		function_trace_op = &ftrace_list_end;
306
		func = ftrace_ops_list_func;
307
	}
308

309
310
311
	ftrace_trace_function = func;
}

312
static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
313
{
314
	ops->next = *list;
315
	/*
316
	 * We are entering ops into the list but another
317
318
	 * CPU might be walking that list. We need to make sure
	 * the ops->next pointer is valid before another CPU sees
319
	 * the ops pointer included into the list.
320
	 */
321
	rcu_assign_pointer(*list, ops);
322
323
}

324
static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
325
326
327
328
{
	struct ftrace_ops **p;

	/*
329
330
	 * If we are removing the last function, then simply point
	 * to the ftrace_stub.
331
	 */
332
333
	if (*list == ops && ops->next == &ftrace_list_end) {
		*list = &ftrace_list_end;
Steven Rostedt's avatar
Steven Rostedt committed
334
		return 0;
335
336
	}

337
	for (p = list; *p != &ftrace_list_end; p = &(*p)->next)
338
339
340
		if (*p == ops)
			break;

Steven Rostedt's avatar
Steven Rostedt committed
341
342
	if (*p != ops)
		return -1;
343
344

	*p = (*p)->next;
345
346
	return 0;
}
347

348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
static void add_ftrace_list_ops(struct ftrace_ops **list,
				struct ftrace_ops *main_ops,
				struct ftrace_ops *ops)
{
	int first = *list == &ftrace_list_end;
	add_ftrace_ops(list, ops);
	if (first)
		add_ftrace_ops(&ftrace_ops_list, main_ops);
}

static int remove_ftrace_list_ops(struct ftrace_ops **list,
				  struct ftrace_ops *main_ops,
				  struct ftrace_ops *ops)
{
	int ret = remove_ftrace_ops(list, ops);
	if (!ret && *list == &ftrace_list_end)
		ret = remove_ftrace_ops(&ftrace_ops_list, main_ops);
	return ret;
}

368
369
static int __register_ftrace_function(struct ftrace_ops *ops)
{
370
	if (unlikely(ftrace_disabled))
371
372
373
374
375
		return -ENODEV;

	if (FTRACE_WARN_ON(ops == &global_ops))
		return -EINVAL;

376
377
378
	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
		return -EBUSY;

379
380
381
382
	/* We don't support both control and global flags set. */
	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
		return -EINVAL;

383
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
384
385
386
387
388
389
390
391
392
393
394
395
396
	/*
	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
	 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
	 * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant.
	 */
	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS &&
	    !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED))
		return -EINVAL;

	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)
		ops->flags |= FTRACE_OPS_FL_SAVE_REGS;
#endif

397
398
399
	if (!core_kernel_data((unsigned long)ops))
		ops->flags |= FTRACE_OPS_FL_DYNAMIC;

400
	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
401
		add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);
402
		ops->flags |= FTRACE_OPS_FL_ENABLED;
403
404
405
406
	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
		if (control_ops_alloc(ops))
			return -ENOMEM;
		add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
407
408
409
	} else
		add_ftrace_ops(&ftrace_ops_list, ops);

410
411
412
413
414
415
	if (ftrace_enabled)
		update_ftrace_function();

	return 0;
}

416
417
418
419
420
421
422
423
424
425
426
static void ftrace_sync(struct work_struct *work)
{
	/*
	 * This function is just a stub to implement a hard force
	 * of synchronize_sched(). This requires synchronizing
	 * tasks even in userspace and idle.
	 *
	 * Yes, function tracing is rude.
	 */
}

427
428
429
430
431
432
433
static int __unregister_ftrace_function(struct ftrace_ops *ops)
{
	int ret;

	if (ftrace_disabled)
		return -ENODEV;

434
435
436
	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
		return -EBUSY;

437
438
439
	if (FTRACE_WARN_ON(ops == &global_ops))
		return -EINVAL;

440
	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
441
442
		ret = remove_ftrace_list_ops(&ftrace_global_list,
					     &global_ops, ops);
443
444
		if (!ret)
			ops->flags &= ~FTRACE_OPS_FL_ENABLED;
445
446
447
448
449
450
451
452
453
	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
		ret = remove_ftrace_list_ops(&ftrace_control_list,
					     &control_ops, ops);
		if (!ret) {
			/*
			 * The ftrace_ops is now removed from the list,
			 * so there'll be no new users. We must ensure
			 * all current users are done before we free
			 * the control data.
454
455
456
457
			 * Note synchronize_sched() is not enough, as we
			 * use preempt_disable() to do RCU, but the function
			 * tracer can be called where RCU is not active
			 * (before user_exit()).
458
			 */
459
			schedule_on_each_cpu(ftrace_sync);
460
461
			control_ops_free(ops);
		}
462
463
464
	} else
		ret = remove_ftrace_ops(&ftrace_ops_list, ops);

465
466
	if (ret < 0)
		return ret;
467

468
469
	if (ftrace_enabled)
		update_ftrace_function();
470

471
472
473
	/*
	 * Dynamic ops may be freed, we must make sure that all
	 * callers are done before leaving this function.
474
475
476
	 *
	 * Again, normal synchronize_sched() is not good enough.
	 * We need to do a hard force of sched synchronization.
477
478
	 */
	if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
479
480
		schedule_on_each_cpu(ftrace_sync);

481

Steven Rostedt's avatar
Steven Rostedt committed
482
	return 0;
483
484
}

485
486
static void ftrace_update_pid_func(void)
{
487
	/* Only do something if we are tracing something */
488
	if (ftrace_trace_function == ftrace_stub)
489
		return;
490

491
	update_ftrace_function();
492
493
}

494
495
496
497
498
#ifdef CONFIG_FUNCTION_PROFILER
struct ftrace_profile {
	struct hlist_node		node;
	unsigned long			ip;
	unsigned long			counter;
499
500
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	unsigned long long		time;
501
	unsigned long long		time_squared;
502
#endif
503
504
};

505
506
507
508
struct ftrace_profile_page {
	struct ftrace_profile_page	*next;
	unsigned long			index;
	struct ftrace_profile		records[];
509
510
};

511
512
513
514
515
516
517
518
struct ftrace_profile_stat {
	atomic_t			disabled;
	struct hlist_head		*hash;
	struct ftrace_profile_page	*pages;
	struct ftrace_profile_page	*start;
	struct tracer_stat		stat;
};

519
520
#define PROFILE_RECORDS_SIZE						\
	(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
521

522
523
#define PROFILES_PER_PAGE					\
	(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
524

525
526
527
static int ftrace_profile_enabled __read_mostly;

/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
528
529
static DEFINE_MUTEX(ftrace_profile_lock);

530
static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
531

532
533
#define FTRACE_PROFILE_HASH_BITS 10
#define FTRACE_PROFILE_HASH_SIZE (1 << FTRACE_PROFILE_HASH_BITS)
534

535
536
537
static void *
function_stat_next(void *v, int idx)
{
538
539
	struct ftrace_profile *rec = v;
	struct ftrace_profile_page *pg;
540

541
	pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
542
543

 again:
Li Zefan's avatar
Li Zefan committed
544
545
546
	if (idx != 0)
		rec++;

547
548
549
550
551
	if ((void *)rec >= (void *)&pg->records[pg->index]) {
		pg = pg->next;
		if (!pg)
			return NULL;
		rec = &pg->records[0];
552
553
		if (!rec->counter)
			goto again;
554
555
556
557
558
559
560
	}

	return rec;
}

static void *function_stat_start(struct tracer_stat *trace)
{
561
562
563
564
565
566
567
	struct ftrace_profile_stat *stat =
		container_of(trace, struct ftrace_profile_stat, stat);

	if (!stat || !stat->start)
		return NULL;

	return function_stat_next(&stat->start->records[0], 0);
568
569
}

570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* function graph compares on total time */
static int function_stat_cmp(void *p1, void *p2)
{
	struct ftrace_profile *a = p1;
	struct ftrace_profile *b = p2;

	if (a->time < b->time)
		return -1;
	if (a->time > b->time)
		return 1;
	else
		return 0;
}
#else
/* not function graph compares against hits */
586
587
static int function_stat_cmp(void *p1, void *p2)
{
588
589
	struct ftrace_profile *a = p1;
	struct ftrace_profile *b = p2;
590
591
592
593
594
595
596
597

	if (a->counter < b->counter)
		return -1;
	if (a->counter > b->counter)
		return 1;
	else
		return 0;
}
598
#endif
599
600
601

static int function_stat_headers(struct seq_file *m)
{
602
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
603
	seq_printf(m, "  Function                               "
604
		   "Hit    Time            Avg             s^2\n"
605
		      "  --------                               "
606
		   "---    ----            ---             ---\n");
607
#else
608
609
	seq_printf(m, "  Function                               Hit\n"
		      "  --------                               ---\n");
610
#endif
611
612
613
614
615
	return 0;
}

static int function_stat_show(struct seq_file *m, void *v)
{
616
	struct ftrace_profile *rec = v;
617
	char str[KSYM_SYMBOL_LEN];
618
	int ret = 0;
619
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
620
621
	static struct trace_seq s;
	unsigned long long avg;
622
	unsigned long long stddev;
623
#endif
624
625
626
627
628
629
630
	mutex_lock(&ftrace_profile_lock);

	/* we raced with function_profile_reset() */
	if (unlikely(rec->counter == 0)) {
		ret = -EBUSY;
		goto out;
	}
631
632

	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
633
634
635
636
	seq_printf(m, "  %-30.30s  %10lu", str, rec->counter);

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	seq_printf(m, "    ");
637
638
639
	avg = rec->time;
	do_div(avg, rec->counter);

640
641
642
643
	/* Sample standard deviation (s^2) */
	if (rec->counter <= 1)
		stddev = 0;
	else {
644
645
646
647
648
649
650
		/*
		 * Apply Welford's method:
		 * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
		 */
		stddev = rec->counter * rec->time_squared -
			 rec->time * rec->time;

651
652
653
654
		/*
		 * Divide only 1000 for ns^2 -> us^2 conversion.
		 * trace_print_graph_duration will divide 1000 again.
		 */
655
		do_div(stddev, rec->counter * (rec->counter - 1) * 1000);
656
657
	}

658
659
660
661
	trace_seq_init(&s);
	trace_print_graph_duration(rec->time, &s);
	trace_seq_puts(&s, "    ");
	trace_print_graph_duration(avg, &s);
662
663
	trace_seq_puts(&s, "    ");
	trace_print_graph_duration(stddev, &s);
664
665
666
	trace_print_seq(m, &s);
#endif
	seq_putc(m, '\n');
667
668
out:
	mutex_unlock(&ftrace_profile_lock);
669

670
	return ret;
671
672
}

673
static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
674
{
675
	struct ftrace_profile_page *pg;
676

677
	pg = stat->pages = stat->start;
678

679
680
681
682
	while (pg) {
		memset(pg->records, 0, PROFILE_RECORDS_SIZE);
		pg->index = 0;
		pg = pg->next;
683
684
	}

685
	memset(stat->hash, 0,
686
687
	       FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
}
688

689
int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
690
691
{
	struct ftrace_profile_page *pg;
692
693
	int functions;
	int pages;
694
	int i;
695

696
	/* If we already allocated, do nothing */
697
	if (stat->pages)
698
		return 0;
699

700
701
	stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
	if (!stat->pages)
702
		return -ENOMEM;
703

704
705
706
707
708
709
710
711
712
713
714
715
716
#ifdef CONFIG_DYNAMIC_FTRACE
	functions = ftrace_update_tot_cnt;
#else
	/*
	 * We do not know the number of functions that exist because
	 * dynamic tracing is what counts them. With past experience
	 * we have around 20K functions. That should be more than enough.
	 * It is highly unlikely we will execute every function in
	 * the kernel.
	 */
	functions = 20000;
#endif

717
	pg = stat->start = stat->pages;
718

719
720
	pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);

721
	for (i = 1; i < pages; i++) {
722
723
		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
		if (!pg->next)
724
			goto out_free;
725
726
727
728
		pg = pg->next;
	}

	return 0;
729
730
731
732
733
734
735
736
737
738
739
740
741
742

 out_free:
	pg = stat->start;
	while (pg) {
		unsigned long tmp = (unsigned long)pg;

		pg = pg->next;
		free_page(tmp);
	}

	stat->pages = NULL;
	stat->start = NULL;

	return -ENOMEM;
743
744
}

745
static int ftrace_profile_init_cpu(int cpu)
746
{
747
	struct ftrace_profile_stat *stat;
748
	int size;
749

750
751
752
	stat = &per_cpu(ftrace_profile_stats, cpu);

	if (stat->hash) {
753
		/* If the profile is already created, simply reset it */
754
		ftrace_profile_reset(stat);
755
756
		return 0;
	}
757

758
759
760
761
762
	/*
	 * We are profiling all functions, but usually only a few thousand
	 * functions are hit. We'll make a hash of 1024 items.
	 */
	size = FTRACE_PROFILE_HASH_SIZE;
763

764
	stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
765

766
	if (!stat->hash)
767
768
		return -ENOMEM;

769
	/* Preallocate the function profiling pages */
770
771
772
	if (ftrace_profile_pages_init(stat) < 0) {
		kfree(stat->hash);
		stat->hash = NULL;
773
774
775
776
		return -ENOMEM;
	}

	return 0;
777
778
}

779
780
781
782
783
784
785
786
787
788
789
790
791
792
static int ftrace_profile_init(void)
{
	int cpu;
	int ret = 0;

	for_each_online_cpu(cpu) {
		ret = ftrace_profile_init_cpu(cpu);
		if (ret)
			break;
	}

	return ret;
}

793
/* interrupts must be disabled */
794
795
static struct ftrace_profile *
ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
796
{
797
	struct ftrace_profile *rec;
798
799
800
	struct hlist_head *hhd;
	unsigned long key;

801
	key = hash_long(ip, FTRACE_PROFILE_HASH_BITS);
802
	hhd = &stat->hash[key];
803
804
805
806

	if (hlist_empty(hhd))
		return NULL;

807
	hlist_for_each_entry_rcu_notrace(rec, hhd, node) {
808
		if (rec->ip == ip)
809
810
811
812
813
814
			return rec;
	}

	return NULL;
}

815
816
static void ftrace_add_profile(struct ftrace_profile_stat *stat,
			       struct ftrace_profile *rec)
817
818
819
{
	unsigned long key;

820
	key = hash_long(rec->ip, FTRACE_PROFILE_HASH_BITS);
821
	hlist_add_head_rcu(&rec->node, &stat->hash[key]);
822
823
}

824
825
826
/*
 * The memory is already allocated, this simply finds a new record to use.
 */
827
static struct ftrace_profile *
828
ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
829
830
831
{
	struct ftrace_profile *rec = NULL;

832
	/* prevent recursion (from NMIs) */
833
	if (atomic_inc_return(&stat->disabled) != 1)
834
835
836
		goto out;

	/*
837
838
	 * Try to find the function again since an NMI
	 * could have added it
839
	 */
840
	rec = ftrace_find_profiled_func(stat, ip);
841
	if (rec)
842
		goto out;
843

844
845
846
847
	if (stat->pages->index == PROFILES_PER_PAGE) {
		if (!stat->pages->next)
			goto out;
		stat->pages = stat->pages->next;
848
	}
849

850
	rec = &stat->pages->records[stat->pages->index++];
851
	rec->ip = ip;
852
	ftrace_add_profile(stat, rec);
853

854
 out:
855
	atomic_dec(&stat->disabled);
856
857
858
859
860

	return rec;
}

static void
861
function_profile_call(unsigned long ip, unsigned long parent_ip,
862
		      struct ftrace_ops *ops, struct pt_regs *regs)
863
{
864
	struct ftrace_profile_stat *stat;
865
	struct ftrace_profile *rec;
866
867
868
869
870
871
	unsigned long flags;

	if (!ftrace_profile_enabled)
		return;

	local_irq_save(flags);
872
873

	stat = &__get_cpu_var(ftrace_profile_stats);
874
	if (!stat->hash || !ftrace_profile_enabled)
875
876
877
		goto out;

	rec = ftrace_find_profiled_func(stat, ip);
878
	if (!rec) {
879
		rec = ftrace_profile_alloc(stat, ip);
880
881
882
		if (!rec)
			goto out;
	}
883
884
885
886
887
888

	rec->counter++;
 out:
	local_irq_restore(flags);
}

889
890
891
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int profile_graph_entry(struct ftrace_graph_ent *trace)
{
892
	function_profile_call(trace->func, 0, NULL, NULL);
893
894
895
896
897
	return 1;
}

static void profile_graph_return(struct ftrace_graph_ret *trace)
{
898
	struct ftrace_profile_stat *stat;
899
	unsigned long long calltime;
900
	struct ftrace_profile *rec;
901
	unsigned long flags;
902
903

	local_irq_save(flags);
904
	stat = &__get_cpu_var(ftrace_profile_stats);
905
	if (!stat->hash || !ftrace_profile_enabled)
906
907
		goto out;

908
909
910
911
	/* If the calltime was zero'd ignore it */
	if (!trace->calltime)
		goto out;

912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
	calltime = trace->rettime - trace->calltime;

	if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
		int index;

		index = trace->depth;

		/* Append this call time to the parent time to subtract */
		if (index)
			current->ret_stack[index - 1].subtime += calltime;

		if (current->ret_stack[index].subtime < calltime)
			calltime -= current->ret_stack[index].subtime;
		else
			calltime = 0;
	}

929
	rec = ftrace_find_profiled_func(stat, trace->func);
930
	if (rec) {
931
		rec->time += calltime;
932
933
		rec->time_squared += calltime * calltime;
	}
934

935
 out:
936
937
938
939
940
941
942
943
944
945
946
947
948
949
	local_irq_restore(flags);
}

static int register_ftrace_profiler(void)
{
	return register_ftrace_graph(&profile_graph_return,
				     &profile_graph_entry);
}

static void unregister_ftrace_profiler(void)
{
	unregister_ftrace_graph();
}
#else
950
static struct ftrace_ops ftrace_profile_ops __read_mostly = {
951
	.func		= function_profile_call,
952
953
	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
	INIT_REGEX_LOCK(ftrace_profile_ops)
954
955
};

956
957
958
959
960
961
962
963
964
965
966
static int register_ftrace_profiler(void)
{
	return register_ftrace_function(&ftrace_profile_ops);
}

static void unregister_ftrace_profiler(void)
{
	unregister_ftrace_function(&ftrace_profile_ops);
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

967
968
969
970
971
972
973
static ssize_t
ftrace_profile_write(struct file *filp, const char __user *ubuf,
		     size_t cnt, loff_t *ppos)
{
	unsigned long val;
	int ret;

974
975
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
976
977
978
979
980
981
982
		return ret;

	val = !!val;

	mutex_lock(&ftrace_profile_lock);
	if (ftrace_profile_enabled ^ val) {
		if (val) {
983
984
985
986
987
988
			ret = ftrace_profile_init();
			if (ret < 0) {
				cnt = ret;
				goto out;
			}

989
990
991
992
993
			ret = register_ftrace_profiler();
			if (ret < 0) {
				cnt = ret;
				goto out;
			}
994
995
996
			ftrace_profile_enabled = 1;
		} else {
			ftrace_profile_enabled = 0;
997
998
999
1000
			/*
			 * unregister_ftrace_profiler calls stop_machine
			 * so this acts like an synchronize_sched.
			 */
1001
			unregister_ftrace_profiler();
1002
1003
		}
	}
1004
 out:
1005
1006
	mutex_unlock(&ftrace_profile_lock);

1007
	*ppos += cnt;
1008
1009
1010
1011

	return cnt;
}

1012
1013
1014
1015
static ssize_t
ftrace_profile_read(struct file *filp, char __user *ubuf,
		     size_t cnt, loff_t *ppos)
{
1016
	char buf[64];		/* big enough to hold a number */
1017
1018
1019
1020
1021
1022
	int r;

	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}

1023
1024
1025
1026
static const struct file_operations ftrace_profile_fops = {
	.open		= tracing_open_generic,
	.read		= ftrace_profile_read,
	.write		= ftrace_profile_write,
1027
	.llseek		= default_llseek,
1028
1029
};

1030
1031
/* used to initialize the real stat files */
static struct tracer_stat function_stats __initdata = {
1032
1033
1034
1035
1036
1037
	.name		= "functions",
	.stat_start	= function_stat_start,
	.stat_next	= function_stat_next,
	.stat_cmp	= function_stat_cmp,
	.stat_headers	= function_stat_headers,
	.stat_show	= function_stat_show
1038
1039
};

1040
static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
1041
{
1042
	struct ftrace_profile_stat *stat;
1043
	struct dentry *entry;
1044
	char *name;
1045
	int ret;
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
	int cpu;

	for_each_possible_cpu(cpu) {
		stat = &per_cpu(ftrace_profile_stats, cpu);

		/* allocate enough for function name + cpu number */
		name = kmalloc(32, GFP_KERNEL);
		if (!name) {
			/*
			 * The files created are permanent, if something happens
			 * we still do not free memory.
			 */
			WARN(1,
			     "Could not allocate stat file for cpu %d\n",
			     cpu);
			return;
		}
		stat->stat = function_stats;
		snprintf(name, 32, "function%d", cpu);
		stat->stat.name = name;
		ret = register_stat_tracer(&stat->stat);
		if (ret) {
			WARN(1,
			     "Could not register function stat for cpu %d\n",
			     cpu);
			kfree(name);
			return;
		}
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
	}

	entry = debugfs_create_file("function_profile_enabled", 0644,
				    d_tracer, NULL, &ftrace_profile_fops);
	if (!entry)
		pr_warning("Could not create debugfs "
			   "'function_profile_enabled' entry\n");
}

#else /* CONFIG_FUNCTION_PROFILER */
1084
static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
1085
1086
1087
1088
{
}
#endif /* CONFIG_FUNCTION_PROFILER */

1089
1090
static struct pid * const ftrace_swapper_pid = &init_struct_pid;

1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
loff_t
ftrace_filter_lseek(struct file *file, loff_t offset, int whence)
{
	loff_t ret;

	if (file->f_mode & FMODE_READ)
		ret = seq_lseek(file, offset, whence);
	else
		file->f_pos = ret = 1;

	return ret;
}

1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
#ifdef CONFIG_DYNAMIC_FTRACE

#ifndef CONFIG_FTRACE_MCOUNT_RECORD
# error Dynamic ftrace depends on MCOUNT_RECORD
#endif

static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;

struct ftrace_func_probe {
	struct hlist_node	node;
	struct ftrace_probe_ops	*ops;
	unsigned long		flags;
	unsigned long		ip;
	void			*data;
1118
	struct list_head	free_list;
1119
1120
};

1121
1122
1123
1124
1125
1126
1127
1128
1129
struct ftrace_func_entry {
	struct hlist_node hlist;
	unsigned long ip;
};

struct ftrace_hash {
	unsigned long		size_bits;
	struct hlist_head	*buckets;
	unsigned long		count;
1130
	struct rcu_head		rcu;
1131
1132
};

1133
1134
1135
1136
1137
1138
1139
1140
1141
/*
 * We make these constant because no one should touch them,
 * but they are used as the default "empty hash", to avoid allocating
 * it all the time. These are in a read only section such that if
 * anyone does try to modify it, it will cause an exception.
 */
static const struc