ftrace.c 115 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
/*
 * Infrastructure for profiling code inserted by 'gcc -pg'.
 *
 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
 * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
 *
 * Originally ported from the -rt patch by:
 *   Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Based on code in the latency_tracer, that is:
 *
 *  Copyright (C) 2004-2006 Ingo Molnar
13
 *  Copyright (C) 2004 Nadia Yvette Chambers
14
15
 */

16
17
18
#include <linux/stop_machine.h>
#include <linux/clocksource.h>
#include <linux/kallsyms.h>
19
#include <linux/seq_file.h>
20
#include <linux/suspend.h>
21
#include <linux/debugfs.h>
22
#include <linux/hardirq.h>
Ingo Molnar's avatar
Ingo Molnar committed
23
#include <linux/kthread.h>
24
#include <linux/uaccess.h>
25
#include <linux/bsearch.h>
26
#include <linux/module.h>
Ingo Molnar's avatar
Ingo Molnar committed
27
#include <linux/ftrace.h>
28
#include <linux/sysctl.h>
29
#include <linux/slab.h>
30
#include <linux/ctype.h>
31
#include <linux/sort.h>
32
#include <linux/list.h>
33
#include <linux/hash.h>
34
#include <linux/rcupdate.h>
35

36
#include <trace/events/sched.h>
37

38
#include <asm/setup.h>
39

40
#include "trace_output.h"
41
#include "trace_stat.h"
42

43
#define FTRACE_WARN_ON(cond)			\
44
45
46
	({					\
		int ___r = cond;		\
		if (WARN_ON(___r))		\
47
			ftrace_kill();		\
48
49
		___r;				\
	})
50
51

#define FTRACE_WARN_ON_ONCE(cond)		\
52
53
54
	({					\
		int ___r = cond;		\
		if (WARN_ON_ONCE(___r))		\
55
			ftrace_kill();		\
56
57
		___r;				\
	})
58

59
60
61
/* hash bits for specific function selection */
#define FTRACE_HASH_BITS 7
#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
62
63
#define FTRACE_HASH_DEFAULT_BITS 10
#define FTRACE_HASH_MAX_BITS 12
64

65
#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL)
66

67
68
69
70
71
72
73
#ifdef CONFIG_DYNAMIC_FTRACE
#define INIT_REGEX_LOCK(opsname)	\
	.regex_lock	= __MUTEX_INITIALIZER(opsname.regex_lock),
#else
#define INIT_REGEX_LOCK(opsname)
#endif

74
75
static struct ftrace_ops ftrace_list_end __read_mostly = {
	.func		= ftrace_stub,
76
	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
77
78
};

79
80
/* ftrace_enabled is a method to turn ftrace on or off */
int ftrace_enabled __read_mostly;
81
static int last_ftrace_enabled;
82

83
/* Quick disabling of function tracer. */
84
85
86
87
int function_trace_stop __read_mostly;

/* Current function tracing op */
struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
88
89
/* What to set function_trace_op to */
static struct ftrace_ops *set_function_trace_op;
90

91
92
93
94
95
96
97
/* List for set_ftrace_pid's pids. */
LIST_HEAD(ftrace_pids);
struct ftrace_pid {
	struct list_head list;
	struct pid *pid;
};

98
99
100
101
102
103
/*
 * ftrace_disabled is set when an anomaly is discovered.
 * ftrace_disabled is much stronger than ftrace_enabled.
 */
static int ftrace_disabled __read_mostly;

104
static DEFINE_MUTEX(ftrace_lock);
105

106
static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
107
static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
108
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
109
ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
110
static struct ftrace_ops global_ops;
111
static struct ftrace_ops control_ops;
112

113
114
#if ARCH_SUPPORTS_FTRACE_OPS
static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
115
				 struct ftrace_ops *op, struct pt_regs *regs);
116
117
118
119
120
#else
/* See comment below, where ftrace_ops_list_func is defined */
static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
#endif
121

122
123
/*
 * Traverse the ftrace_global_list, invoking all entries.  The reason that we
124
 * can use rcu_dereference_raw_notrace() is that elements removed from this list
125
 * are simply leaked, so there is no need to interact with a grace-period
126
 * mechanism.  The rcu_dereference_raw_notrace() calls are needed to handle
127
128
129
130
131
 * concurrent insertions into the ftrace_global_list.
 *
 * Silly Alpha and silly pointer-speculation compiler optimizations!
 */
#define do_for_each_ftrace_op(op, list)			\
132
	op = rcu_dereference_raw_notrace(list);			\
133
134
135
136
137
138
	do

/*
 * Optimized for just a single item in the list (as that is the normal case).
 */
#define while_for_each_ftrace_op(op)				\
139
	while (likely(op = rcu_dereference_raw_notrace((op)->next)) &&	\
140
141
	       unlikely((op) != &ftrace_list_end))

142
143
144
145
146
147
148
149
150
151
static inline void ftrace_ops_init(struct ftrace_ops *ops)
{
#ifdef CONFIG_DYNAMIC_FTRACE
	if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) {
		mutex_init(&ops->regex_lock);
		ops->flags |= FTRACE_OPS_FL_INITIALIZED;
	}
#endif
}

152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/**
 * ftrace_nr_registered_ops - return number of ops registered
 *
 * Returns the number of ftrace_ops registered and tracing functions
 */
int ftrace_nr_registered_ops(void)
{
	struct ftrace_ops *ops;
	int cnt = 0;

	mutex_lock(&ftrace_lock);

	for (ops = ftrace_ops_list;
	     ops != &ftrace_list_end; ops = ops->next)
		cnt++;

	mutex_unlock(&ftrace_lock);

	return cnt;
}

173
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
174
			    struct ftrace_ops *op, struct pt_regs *regs)
175
{
176
	if (!test_tsk_trace_trace(current))
177
178
		return;

179
	ftrace_pid_function(ip, parent_ip, op, regs);
180
181
182
183
184
185
186
187
188
}

static void set_ftrace_pid_function(ftrace_func_t func)
{
	/* do not set ftrace_pid_function to itself! */
	if (func != ftrace_pid_func)
		ftrace_pid_function = func;
}

189
/**
190
 * clear_ftrace_function - reset the ftrace function
191
 *
192
193
 * This NULLs the ftrace function and in essence stops
 * tracing.  There may be lag
194
 */
195
void clear_ftrace_function(void)
196
{
197
	ftrace_trace_function = ftrace_stub;
198
	ftrace_pid_function = ftrace_stub;
199
200
}

201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
static void control_ops_disable_all(struct ftrace_ops *ops)
{
	int cpu;

	for_each_possible_cpu(cpu)
		*per_cpu_ptr(ops->disabled, cpu) = 1;
}

static int control_ops_alloc(struct ftrace_ops *ops)
{
	int __percpu *disabled;

	disabled = alloc_percpu(int);
	if (!disabled)
		return -ENOMEM;

	ops->disabled = disabled;
	control_ops_disable_all(ops);
	return 0;
}

222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
static void ftrace_sync(struct work_struct *work)
{
	/*
	 * This function is just a stub to implement a hard force
	 * of synchronize_sched(). This requires synchronizing
	 * tasks even in userspace and idle.
	 *
	 * Yes, function tracing is rude.
	 */
}

static void ftrace_sync_ipi(void *data)
{
	/* Probably not needed, but do it anyway */
	smp_rmb();
}

239
240
241
242
243
244
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static void update_function_graph_func(void);
#else
static inline void update_function_graph_func(void) { }
#endif

245
246
247
248
static void update_ftrace_function(void)
{
	ftrace_func_t func;

249
250
	/*
	 * If we are at the end of the list and this ops is
251
252
	 * recursion safe and not dynamic and the arch supports passing ops,
	 * then have the mcount trampoline call the function directly.
253
	 */
254
	if (ftrace_ops_list == &ftrace_list_end ||
255
	    (ftrace_ops_list->next == &ftrace_list_end &&
256
	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) &&
257
	     (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) &&
258
	     !FTRACE_FORCE_LIST_FUNC)) {
259
		/* Set the ftrace_ops that the arch callback uses */
260
		set_function_trace_op = ftrace_ops_list;
261
		func = ftrace_ops_list->func;
262
263
	} else {
		/* Just use the default ftrace_ops */
264
		set_function_trace_op = &ftrace_list_end;
265
		func = ftrace_ops_list_func;
266
	}
267

268
269
270
271
	/* If there's no change, then do nothing more here */
	if (ftrace_trace_function == func)
		return;

272
273
	update_function_graph_func();

274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
	/*
	 * If we are using the list function, it doesn't care
	 * about the function_trace_ops.
	 */
	if (func == ftrace_ops_list_func) {
		ftrace_trace_function = func;
		/*
		 * Don't even bother setting function_trace_ops,
		 * it would be racy to do so anyway.
		 */
		return;
	}

#ifndef CONFIG_DYNAMIC_FTRACE
	/*
	 * For static tracing, we need to be a bit more careful.
	 * The function change takes affect immediately. Thus,
	 * we need to coorditate the setting of the function_trace_ops
	 * with the setting of the ftrace_trace_function.
	 *
	 * Set the function to the list ops, which will call the
	 * function we want, albeit indirectly, but it handles the
	 * ftrace_ops and doesn't depend on function_trace_op.
	 */
	ftrace_trace_function = ftrace_ops_list_func;
	/*
	 * Make sure all CPUs see this. Yes this is slow, but static
	 * tracing is slow and nasty to have enabled.
	 */
	schedule_on_each_cpu(ftrace_sync);
	/* Now all cpus are using the list ops. */
	function_trace_op = set_function_trace_op;
	/* Make sure the function_trace_op is visible on all CPUs */
	smp_wmb();
	/* Nasty way to force a rmb on all cpus */
	smp_call_function(ftrace_sync_ipi, NULL, 1);
	/* OK, we are all set to update the ftrace_trace_function now! */
#endif /* !CONFIG_DYNAMIC_FTRACE */

313
314
315
	ftrace_trace_function = func;
}

316
static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
317
{
318
	ops->next = *list;
319
	/*
320
	 * We are entering ops into the list but another
321
322
	 * CPU might be walking that list. We need to make sure
	 * the ops->next pointer is valid before another CPU sees
323
	 * the ops pointer included into the list.
324
	 */
325
	rcu_assign_pointer(*list, ops);
326
327
}

328
static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
329
330
331
332
{
	struct ftrace_ops **p;

	/*
333
334
	 * If we are removing the last function, then simply point
	 * to the ftrace_stub.
335
	 */
336
337
	if (*list == ops && ops->next == &ftrace_list_end) {
		*list = &ftrace_list_end;
Steven Rostedt's avatar
Steven Rostedt committed
338
		return 0;
339
340
	}

341
	for (p = list; *p != &ftrace_list_end; p = &(*p)->next)
342
343
344
		if (*p == ops)
			break;

Steven Rostedt's avatar
Steven Rostedt committed
345
346
	if (*p != ops)
		return -1;
347
348

	*p = (*p)->next;
349
350
	return 0;
}
351

352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
static void add_ftrace_list_ops(struct ftrace_ops **list,
				struct ftrace_ops *main_ops,
				struct ftrace_ops *ops)
{
	int first = *list == &ftrace_list_end;
	add_ftrace_ops(list, ops);
	if (first)
		add_ftrace_ops(&ftrace_ops_list, main_ops);
}

static int remove_ftrace_list_ops(struct ftrace_ops **list,
				  struct ftrace_ops *main_ops,
				  struct ftrace_ops *ops)
{
	int ret = remove_ftrace_ops(list, ops);
	if (!ret && *list == &ftrace_list_end)
		ret = remove_ftrace_ops(&ftrace_ops_list, main_ops);
	return ret;
}

372
373
static int __register_ftrace_function(struct ftrace_ops *ops)
{
374
375
376
	if (ops->flags & FTRACE_OPS_FL_DELETED)
		return -EINVAL;

377
378
379
	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
		return -EBUSY;

380
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
381
382
383
384
385
386
387
388
389
390
391
392
393
	/*
	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
	 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
	 * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant.
	 */
	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS &&
	    !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED))
		return -EINVAL;

	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)
		ops->flags |= FTRACE_OPS_FL_SAVE_REGS;
#endif

394
395
396
	if (!core_kernel_data((unsigned long)ops))
		ops->flags |= FTRACE_OPS_FL_DYNAMIC;

397
	if (ops->flags & FTRACE_OPS_FL_CONTROL) {
398
399
400
		if (control_ops_alloc(ops))
			return -ENOMEM;
		add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
401
402
403
	} else
		add_ftrace_ops(&ftrace_ops_list, ops);

404
405
406
407
408
409
410
411
412
413
	if (ftrace_enabled)
		update_ftrace_function();

	return 0;
}

static int __unregister_ftrace_function(struct ftrace_ops *ops)
{
	int ret;

414
415
416
	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
		return -EBUSY;

417
	if (ops->flags & FTRACE_OPS_FL_CONTROL) {
418
419
		ret = remove_ftrace_list_ops(&ftrace_control_list,
					     &control_ops, ops);
420
421
422
	} else
		ret = remove_ftrace_ops(&ftrace_ops_list, ops);

423
424
	if (ret < 0)
		return ret;
425

426
427
	if (ftrace_enabled)
		update_ftrace_function();
428

Steven Rostedt's avatar
Steven Rostedt committed
429
	return 0;
430
431
}

432
433
static void ftrace_update_pid_func(void)
{
434
	/* Only do something if we are tracing something */
435
	if (ftrace_trace_function == ftrace_stub)
436
		return;
437

438
	update_ftrace_function();
439
440
}

441
442
443
444
445
#ifdef CONFIG_FUNCTION_PROFILER
struct ftrace_profile {
	struct hlist_node		node;
	unsigned long			ip;
	unsigned long			counter;
446
447
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	unsigned long long		time;
448
	unsigned long long		time_squared;
449
#endif
450
451
};

452
453
454
455
struct ftrace_profile_page {
	struct ftrace_profile_page	*next;
	unsigned long			index;
	struct ftrace_profile		records[];
456
457
};

458
459
460
461
462
463
464
465
struct ftrace_profile_stat {
	atomic_t			disabled;
	struct hlist_head		*hash;
	struct ftrace_profile_page	*pages;
	struct ftrace_profile_page	*start;
	struct tracer_stat		stat;
};

466
467
#define PROFILE_RECORDS_SIZE						\
	(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
468

469
470
#define PROFILES_PER_PAGE					\
	(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
471

472
473
474
static int ftrace_profile_enabled __read_mostly;

/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
475
476
static DEFINE_MUTEX(ftrace_profile_lock);

477
static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
478

479
480
#define FTRACE_PROFILE_HASH_BITS 10
#define FTRACE_PROFILE_HASH_SIZE (1 << FTRACE_PROFILE_HASH_BITS)
481

482
483
484
static void *
function_stat_next(void *v, int idx)
{
485
486
	struct ftrace_profile *rec = v;
	struct ftrace_profile_page *pg;
487

488
	pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
489
490

 again:
Li Zefan's avatar
Li Zefan committed
491
492
493
	if (idx != 0)
		rec++;

494
495
496
497
498
	if ((void *)rec >= (void *)&pg->records[pg->index]) {
		pg = pg->next;
		if (!pg)
			return NULL;
		rec = &pg->records[0];
499
500
		if (!rec->counter)
			goto again;
501
502
503
504
505
506
507
	}

	return rec;
}

static void *function_stat_start(struct tracer_stat *trace)
{
508
509
510
511
512
513
514
	struct ftrace_profile_stat *stat =
		container_of(trace, struct ftrace_profile_stat, stat);

	if (!stat || !stat->start)
		return NULL;

	return function_stat_next(&stat->start->records[0], 0);
515
516
}

517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* function graph compares on total time */
static int function_stat_cmp(void *p1, void *p2)
{
	struct ftrace_profile *a = p1;
	struct ftrace_profile *b = p2;

	if (a->time < b->time)
		return -1;
	if (a->time > b->time)
		return 1;
	else
		return 0;
}
#else
/* not function graph compares against hits */
533
534
static int function_stat_cmp(void *p1, void *p2)
{
535
536
	struct ftrace_profile *a = p1;
	struct ftrace_profile *b = p2;
537
538
539
540
541
542
543
544

	if (a->counter < b->counter)
		return -1;
	if (a->counter > b->counter)
		return 1;
	else
		return 0;
}
545
#endif
546
547
548

static int function_stat_headers(struct seq_file *m)
{
549
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
550
	seq_printf(m, "  Function                               "
551
		   "Hit    Time            Avg             s^2\n"
552
		      "  --------                               "
553
		   "---    ----            ---             ---\n");
554
#else
555
556
	seq_printf(m, "  Function                               Hit\n"
		      "  --------                               ---\n");
557
#endif
558
559
560
561
562
	return 0;
}

static int function_stat_show(struct seq_file *m, void *v)
{
563
	struct ftrace_profile *rec = v;
564
	char str[KSYM_SYMBOL_LEN];
565
	int ret = 0;
566
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
567
568
	static struct trace_seq s;
	unsigned long long avg;
569
	unsigned long long stddev;
570
#endif
571
572
573
574
575
576
577
	mutex_lock(&ftrace_profile_lock);

	/* we raced with function_profile_reset() */
	if (unlikely(rec->counter == 0)) {
		ret = -EBUSY;
		goto out;
	}
578
579

	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
580
581
582
583
	seq_printf(m, "  %-30.30s  %10lu", str, rec->counter);

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	seq_printf(m, "    ");
584
585
586
	avg = rec->time;
	do_div(avg, rec->counter);

587
588
589
590
	/* Sample standard deviation (s^2) */
	if (rec->counter <= 1)
		stddev = 0;
	else {
591
592
593
594
595
596
597
		/*
		 * Apply Welford's method:
		 * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
		 */
		stddev = rec->counter * rec->time_squared -
			 rec->time * rec->time;

598
599
600
601
		/*
		 * Divide only 1000 for ns^2 -> us^2 conversion.
		 * trace_print_graph_duration will divide 1000 again.
		 */
602
		do_div(stddev, rec->counter * (rec->counter - 1) * 1000);
603
604
	}

605
606
607
608
	trace_seq_init(&s);
	trace_print_graph_duration(rec->time, &s);
	trace_seq_puts(&s, "    ");
	trace_print_graph_duration(avg, &s);
609
610
	trace_seq_puts(&s, "    ");
	trace_print_graph_duration(stddev, &s);
611
612
613
	trace_print_seq(m, &s);
#endif
	seq_putc(m, '\n');
614
615
out:
	mutex_unlock(&ftrace_profile_lock);
616

617
	return ret;
618
619
}

620
static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
621
{
622
	struct ftrace_profile_page *pg;
623

624
	pg = stat->pages = stat->start;
625

626
627
628
629
	while (pg) {
		memset(pg->records, 0, PROFILE_RECORDS_SIZE);
		pg->index = 0;
		pg = pg->next;
630
631
	}

632
	memset(stat->hash, 0,
633
634
	       FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
}
635

636
int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
637
638
{
	struct ftrace_profile_page *pg;
639
640
	int functions;
	int pages;
641
	int i;
642

643
	/* If we already allocated, do nothing */
644
	if (stat->pages)
645
		return 0;
646

647
648
	stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
	if (!stat->pages)
649
		return -ENOMEM;
650

651
652
653
654
655
656
657
658
659
660
661
662
663
#ifdef CONFIG_DYNAMIC_FTRACE
	functions = ftrace_update_tot_cnt;
#else
	/*
	 * We do not know the number of functions that exist because
	 * dynamic tracing is what counts them. With past experience
	 * we have around 20K functions. That should be more than enough.
	 * It is highly unlikely we will execute every function in
	 * the kernel.
	 */
	functions = 20000;
#endif

664
	pg = stat->start = stat->pages;
665

666
667
	pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);

668
	for (i = 1; i < pages; i++) {
669
670
		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
		if (!pg->next)
671
			goto out_free;
672
673
674
675
		pg = pg->next;
	}

	return 0;
676
677
678
679
680
681
682
683
684
685
686
687
688
689

 out_free:
	pg = stat->start;
	while (pg) {
		unsigned long tmp = (unsigned long)pg;

		pg = pg->next;
		free_page(tmp);
	}

	stat->pages = NULL;
	stat->start = NULL;

	return -ENOMEM;
690
691
}

692
static int ftrace_profile_init_cpu(int cpu)
693
{
694
	struct ftrace_profile_stat *stat;
695
	int size;
696

697
698
699
	stat = &per_cpu(ftrace_profile_stats, cpu);

	if (stat->hash) {
700
		/* If the profile is already created, simply reset it */
701
		ftrace_profile_reset(stat);
702
703
		return 0;
	}
704

705
706
707
708
709
	/*
	 * We are profiling all functions, but usually only a few thousand
	 * functions are hit. We'll make a hash of 1024 items.
	 */
	size = FTRACE_PROFILE_HASH_SIZE;
710

711
	stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
712

713
	if (!stat->hash)
714
715
		return -ENOMEM;

716
	/* Preallocate the function profiling pages */
717
718
719
	if (ftrace_profile_pages_init(stat) < 0) {
		kfree(stat->hash);
		stat->hash = NULL;
720
721
722
723
		return -ENOMEM;
	}

	return 0;
724
725
}

726
727
728
729
730
static int ftrace_profile_init(void)
{
	int cpu;
	int ret = 0;

731
	for_each_possible_cpu(cpu) {
732
733
734
735
736
737
738
739
		ret = ftrace_profile_init_cpu(cpu);
		if (ret)
			break;
	}

	return ret;
}

740
/* interrupts must be disabled */
741
742
static struct ftrace_profile *
ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
743
{
744
	struct ftrace_profile *rec;
745
746
747
	struct hlist_head *hhd;
	unsigned long key;

748
	key = hash_long(ip, FTRACE_PROFILE_HASH_BITS);
749
	hhd = &stat->hash[key];
750
751
752
753

	if (hlist_empty(hhd))
		return NULL;

754
	hlist_for_each_entry_rcu_notrace(rec, hhd, node) {
755
		if (rec->ip == ip)
756
757
758
759
760
761
			return rec;
	}

	return NULL;
}

762
763
static void ftrace_add_profile(struct ftrace_profile_stat *stat,
			       struct ftrace_profile *rec)
764
765
766
{
	unsigned long key;

767
	key = hash_long(rec->ip, FTRACE_PROFILE_HASH_BITS);
768
	hlist_add_head_rcu(&rec->node, &stat->hash[key]);
769
770
}

771
772
773
/*
 * The memory is already allocated, this simply finds a new record to use.
 */
774
static struct ftrace_profile *
775
ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
776
777
778
{
	struct ftrace_profile *rec = NULL;

779
	/* prevent recursion (from NMIs) */
780
	if (atomic_inc_return(&stat->disabled) != 1)
781
782
783
		goto out;

	/*
784
785
	 * Try to find the function again since an NMI
	 * could have added it
786
	 */
787
	rec = ftrace_find_profiled_func(stat, ip);
788
	if (rec)
789
		goto out;
790

791
792
793
794
	if (stat->pages->index == PROFILES_PER_PAGE) {
		if (!stat->pages->next)
			goto out;
		stat->pages = stat->pages->next;
795
	}
796

797
	rec = &stat->pages->records[stat->pages->index++];
798
	rec->ip = ip;
799
	ftrace_add_profile(stat, rec);
800

801
 out:
802
	atomic_dec(&stat->disabled);
803
804
805
806
807

	return rec;
}

static void
808
function_profile_call(unsigned long ip, unsigned long parent_ip,
809
		      struct ftrace_ops *ops, struct pt_regs *regs)
810
{
811
	struct ftrace_profile_stat *stat;
812
	struct ftrace_profile *rec;
813
814
815
816
817
818
	unsigned long flags;

	if (!ftrace_profile_enabled)
		return;

	local_irq_save(flags);
819
820

	stat = &__get_cpu_var(ftrace_profile_stats);
821
	if (!stat->hash || !ftrace_profile_enabled)
822
823
824
		goto out;

	rec = ftrace_find_profiled_func(stat, ip);
825
	if (!rec) {
826
		rec = ftrace_profile_alloc(stat, ip);
827
828
829
		if (!rec)
			goto out;
	}
830
831
832
833
834
835

	rec->counter++;
 out:
	local_irq_restore(flags);
}

836
837
838
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int profile_graph_entry(struct ftrace_graph_ent *trace)
{
839
	function_profile_call(trace->func, 0, NULL, NULL);
840
841
842
843
844
	return 1;
}

static void profile_graph_return(struct ftrace_graph_ret *trace)
{
845
	struct ftrace_profile_stat *stat;
846
	unsigned long long calltime;
847
	struct ftrace_profile *rec;
848
	unsigned long flags;
849
850

	local_irq_save(flags);
851
	stat = &__get_cpu_var(ftrace_profile_stats);
852
	if (!stat->hash || !ftrace_profile_enabled)
853
854
		goto out;

855
856
857
858
	/* If the calltime was zero'd ignore it */
	if (!trace->calltime)
		goto out;

859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
	calltime = trace->rettime - trace->calltime;

	if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
		int index;

		index = trace->depth;

		/* Append this call time to the parent time to subtract */
		if (index)
			current->ret_stack[index - 1].subtime += calltime;

		if (current->ret_stack[index].subtime < calltime)
			calltime -= current->ret_stack[index].subtime;
		else
			calltime = 0;
	}

876
	rec = ftrace_find_profiled_func(stat, trace->func);
877
	if (rec) {
878
		rec->time += calltime;
879
880
		rec->time_squared += calltime * calltime;
	}
881

882
 out:
883
884
885
886
887
888
889
890
891
892
893
894
895
896
	local_irq_restore(flags);
}

static int register_ftrace_profiler(void)
{
	return register_ftrace_graph(&profile_graph_return,
				     &profile_graph_entry);
}

static void unregister_ftrace_profiler(void)
{
	unregister_ftrace_graph();
}
#else
897
static struct ftrace_ops ftrace_profile_ops __read_mostly = {
898
	.func		= function_profile_call,
899
900
	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
	INIT_REGEX_LOCK(ftrace_profile_ops)
901
902
};

903
904
905
906
907
908
909
910
911
912
913
static int register_ftrace_profiler(void)
{
	return register_ftrace_function(&ftrace_profile_ops);
}

static void unregister_ftrace_profiler(void)
{
	unregister_ftrace_function(&ftrace_profile_ops);
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

914
915
916
917
918
919
920
static ssize_t
ftrace_profile_write(struct file *filp, const char __user *ubuf,
		     size_t cnt, loff_t *ppos)
{
	unsigned long val;
	int ret;

921
922
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
923
924
925
926
927
928
929
		return ret;

	val = !!val;

	mutex_lock(&ftrace_profile_lock);
	if (ftrace_profile_enabled ^ val) {
		if (val) {
930
931
932
933
934
935
			ret = ftrace_profile_init();
			if (ret < 0) {
				cnt = ret;
				goto out;
			}

936
937
938
939
940
			ret = register_ftrace_profiler();
			if (ret < 0) {
				cnt = ret;
				goto out;
			}
941
942
943
			ftrace_profile_enabled = 1;
		} else {
			ftrace_profile_enabled = 0;
944
945
946
947
			/*
			 * unregister_ftrace_profiler calls stop_machine
			 * so this acts like an synchronize_sched.
			 */
948
			unregister_ftrace_profiler();
949
950
		}
	}
951
 out:
952
953
	mutex_unlock(&ftrace_profile_lock);

954
	*ppos += cnt;
955
956
957
958

	return cnt;
}

959
960
961
962
static ssize_t
ftrace_profile_read(struct file *filp, char __user *ubuf,
		     size_t cnt, loff_t *ppos)
{
963
	char buf[64];		/* big enough to hold a number */
964
965
966
967
968
969
	int r;

	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}

970
971
972
973
static const struct file_operations ftrace_profile_fops = {
	.open		= tracing_open_generic,
	.read		= ftrace_profile_read,
	.write		= ftrace_profile_write,
974
	.llseek		= default_llseek,
975
976
};

977
978
/* used to initialize the real stat files */
static struct tracer_stat function_stats __initdata = {
979
980
981
982
983
984
	.name		= "functions",
	.stat_start	= function_stat_start,
	.stat_next	= function_stat_next,
	.stat_cmp	= function_stat_cmp,
	.stat_headers	= function_stat_headers,
	.stat_show	= function_stat_show
985
986
};

987
static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
988
{
989
	struct ftrace_profile_stat *stat;
990
	struct dentry *entry;
991
	char *name;
992
	int ret;
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
	int cpu;

	for_each_possible_cpu(cpu) {
		stat = &per_cpu(ftrace_profile_stats, cpu);

		/* allocate enough for function name + cpu number */
		name = kmalloc(32, GFP_KERNEL);
		if (!name) {
			/*
			 * The files created are permanent, if something happens
			 * we still do not free memory.
			 */
			WARN(1,
			     "Could not allocate stat file for cpu %d\n",
			     cpu);
			return;
		}
		stat->stat = function_stats;
		snprintf(name, 32, "function%d", cpu);
		stat->stat.name = name;
		ret = register_stat_tracer(&stat->stat);
		if (ret) {
			WARN(1,
			     "Could not register function stat for cpu %d\n",
			     cpu);
			kfree(name);
			return;
		}
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
	}

	entry = debugfs_create_file("function_profile_enabled", 0644,
				    d_tracer, NULL, &ftrace_profile_fops);
	if (!entry)
		pr_warning("Could not create debugfs "
			   "'function_profile_enabled' entry\n");
}

#else /* CONFIG_FUNCTION_PROFILER */
1031
static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
1032
1033
1034
1035
{
}
#endif /* CONFIG_FUNCTION_PROFILER */

1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
static struct pid * const ftrace_swapper_pid = &init_struct_pid;

#ifdef CONFIG_DYNAMIC_FTRACE

#ifndef CONFIG_FTRACE_MCOUNT_RECORD
# error Dynamic ftrace depends on MCOUNT_RECORD
#endif

static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;

struct ftrace_func_probe {
	struct hlist_node	node;
	struct ftrace_probe_ops	*ops;
	unsigned long		flags;
	unsigned long		ip;
	void			*data;
1052
	struct list_head	free_list;
1053
1054
};

1055
1056
1057
1058
1059
1060
1061
1062
1063
struct ftrace_func_entry {
	struct hlist_node hlist;
	unsigned long ip;
};

struct ftrace_hash {
	unsigned long		size_bits;
	struct hlist_head	*buckets;
	unsigned long		count;
1064
	struct rcu_head		rcu;
1065
1066
};

1067
1068
1069
1070
1071
1072
1073
1074
1075
/*
 * We make these constant because no one should touch them,
 * but they are used as the default "empty hash", to avoid allocating
 * it all the time. These are in a read only section such that if
 * anyone does try to modify it, it will cause an exception.
 */
static const struct hlist_head empty_buckets[1];
static const struct ftrace_hash empty_hash = {
	.buckets = (struct hlist_head *)empty_buckets,
1076
};
1077
#define EMPTY_HASH	((struct ftrace_hash *)&empty_hash)
1078

1079
static struct ftrace_ops global_ops = {
1080
	.func			= ftrace_stub,
1081
1082
	.notrace_hash		= EMPTY_HASH,
	.filter_hash		= EMPTY_HASH,
1083
1084
	.flags			= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
	INIT_REGEX_LOCK(global_ops)
1085
1086
};

1087
1088
struct ftrace_page {
	struct ftrace_page	*next;
1089
	struct dyn_ftrace	*records;
1090
	int			index;
1091
	int			size;
1092
1093
};

1094
1095
#define ENTRY_SIZE sizeof(struct dyn_ftrace)
#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
1096
1097
1098
1099
1100
1101
1102

/* estimate from running different kernels */
#define NR_TO_INIT		10000

static struct ftrace_page	*ftrace_pages_start;
static struct ftrace_page	*ftrace_pages;

1103
1104
1105
1106
1107
static bool ftrace_hash_empty(struct ftrace_hash *hash)
{
	return !hash || !hash->count;
}

1108
1109
1110
1111
1112
1113
1114
static struct ftrace_func_entry *
ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
{
	unsigned long key;
	struct ftrace_func_entry *entry;
	struct hlist_head *hhd;

1115
	if (ftrace_hash_empty(hash))
1116
1117
1118
1119
1120
1121
1122
1123
1124
		return NULL;

	if (hash->size_bits > 0)
		key = hash_long(ip, hash->size_bits);
	else
		key = 0;

	hhd = &hash->buckets[key];