Commit 86236611 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'tracing-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'tracing-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (244 commits)
  Revert "x86, bts: reenable ptrace branch trace support"
  tracing: do not translate event helper macros in print format
  ftrace/documentation: fix typo in function grapher name
  tracing/events: convert block trace points to TRACE_EVENT(), fix !CONFIG_BLOCK
  tracing: add protection around module events unload
  tracing: add trace_seq_vprint interface
  tracing: fix the block trace points print size
  tracing/events: convert block trace points to TRACE_EVENT()
  ring-buffer: fix ret in rb_add_time_stamp
  ring-buffer: pass in lockdep class key for reader_lock
  tracing: add annotation to what type of stack trace is recorded
  tracing: fix multiple use of __print_flags and __print_symbolic
  tracing/events: fix output format of user stack
  tracing/events: fix output format of kernel stack
  tracing/trace_stack: fix the number of entries in the header
  ring-buffer: discard timestamps that are at the start of the buffer
  ring-buffer: try to discard unneeded timestamps
  ring-buffer: fix bug in ring_buffer_discard_commit
  ftrace: do not profile functions when disabled
  tracing: make trace pipe recognize latency format flag
  ...
parents 57eee9ae 511b01bd
......@@ -13,7 +13,8 @@ DOCBOOKS := z8530book.xml mcabook.xml device-drivers.xml \
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
mac80211.xml debugobjects.xml sh.xml regulator.xml \
alsa-driver-api.xml writing-an-alsa-driver.xml
alsa-driver-api.xml writing-an-alsa-driver.xml \
tracepoint.xml
###
# The build process is as follows (targets):
......
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
<book id="Tracepoints">
<bookinfo>
<title>The Linux Kernel Tracepoint API</title>
<authorgroup>
<author>
<firstname>Jason</firstname>
<surname>Baron</surname>
<affiliation>
<address>
<email>jbaron@redhat.com</email>
</address>
</affiliation>
</author>
</authorgroup>
<legalnotice>
<para>
This documentation is free software; you can redistribute
it and/or modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later
version.
</para>
<para>
This program is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
</para>
<para>
You should have received a copy of the GNU General Public
License along with this program; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA 02111-1307 USA
</para>
<para>
For more details see the file COPYING in the source
distribution of Linux.
</para>
</legalnotice>
</bookinfo>
<toc></toc>
<chapter id="intro">
<title>Introduction</title>
<para>
Tracepoints are static probe points that are located in strategic points
throughout the kernel. 'Probes' register/unregister with tracepoints
via a callback mechanism. The 'probes' are strictly typed functions that
are passed a unique set of parameters defined by each tracepoint.
</para>
<para>
From this simple callback mechanism, 'probes' can be used to profile, debug,
and understand kernel behavior. There are a number of tools that provide a
framework for using 'probes'. These tools include Systemtap, ftrace, and
LTTng.
</para>
<para>
Tracepoints are defined in a number of header files via various macros. Thus,
the purpose of this document is to provide a clear accounting of the available
tracepoints. The intention is to understand not only what tracepoints are
available but also to understand where future tracepoints might be added.
</para>
<para>
The API presented has functions of the form:
<function>trace_tracepointname(function parameters)</function>. These are the
tracepoints callbacks that are found throughout the code. Registering and
unregistering probes with these callback sites is covered in the
<filename>Documentation/trace/*</filename> directory.
</para>
</chapter>
<chapter id="irq">
<title>IRQ</title>
!Iinclude/trace/events/irq.h
</chapter>
</book>
......@@ -56,7 +56,6 @@ parameter is applicable:
ISAPNP ISA PnP code is enabled.
ISDN Appropriate ISDN support is enabled.
JOY Appropriate joystick support is enabled.
KMEMTRACE kmemtrace is enabled.
LIBATA Libata driver is enabled
LP Printer support is enabled.
LOOP Loopback device support is enabled.
......@@ -754,12 +753,25 @@ and is between 256 and 4096 characters. It is defined in the file
ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
ftrace=[tracer]
[ftrace] will set and start the specified tracer
[FTRACE] will set and start the specified tracer
as early as possible in order to facilitate early
boot debugging.
ftrace_dump_on_oops
[ftrace] will dump the trace buffers on oops.
[FTRACE] will dump the trace buffers on oops.
ftrace_filter=[function-list]
[FTRACE] Limit the functions traced by the function
tracer at boot up. function-list is a comma separated
list of functions. This list can be changed at run
time by the set_ftrace_filter file in the debugfs
tracing directory.
ftrace_notrace=[function-list]
[FTRACE] Do not trace the functions specified in
function-list. This list can be changed at run time
by the set_ftrace_notrace file in the debugfs
tracing directory.
gamecon.map[2|3]=
[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
......@@ -1056,15 +1068,6 @@ and is between 256 and 4096 characters. It is defined in the file
use the HighMem zone if it exists, and the Normal
zone if it does not.
kmemtrace.enable= [KNL,KMEMTRACE] Format: { yes | no }
Controls whether kmemtrace is enabled
at boot-time.
kmemtrace.subbufs=n [KNL,KMEMTRACE] Overrides the number of
subbufs kmemtrace's relay channel has. Set this
higher than default (KMEMTRACE_N_SUBBUFS in code) if
you experience buffer overruns.
kgdboc= [HW] kgdb over consoles.
Requires a tty driver that supports console polling.
(only serial suported for now)
......
Event Tracing
Documentation written by Theodore Ts'o
Updated by Li Zefan
1. Introduction
===============
Tracepoints (see Documentation/trace/tracepoints.txt) can be used
without creating custom kernel modules to register probe functions
using the event tracing infrastructure.
Not all tracepoints can be traced using the event tracing system;
the kernel developer must provide code snippets which define how the
tracing information is saved into the tracing buffer, and how the
tracing information should be printed.
2. Using Event Tracing
======================
2.1 Via the 'set_event' interface
---------------------------------
The events which are available for tracing can be found in the file
/debug/tracing/available_events.
To enable a particular event, such as 'sched_wakeup', simply echo it
to /debug/tracing/set_event. For example:
# echo sched_wakeup >> /debug/tracing/set_event
[ Note: '>>' is necessary, otherwise it will firstly disable
all the events. ]
To disable an event, echo the event name to the set_event file prefixed
with an exclamation point:
# echo '!sched_wakeup' >> /debug/tracing/set_event
To disable all events, echo an empty line to the set_event file:
# echo > /debug/tracing/set_event
To enable all events, echo '*:*' or '*:' to the set_event file:
# echo *:* > /debug/tracing/set_event
The events are organized into subsystems, such as ext4, irq, sched,
etc., and a full event name looks like this: <subsystem>:<event>. The
subsystem name is optional, but it is displayed in the available_events
file. All of the events in a subsystem can be specified via the syntax
"<subsystem>:*"; for example, to enable all irq events, you can use the
command:
# echo 'irq:*' > /debug/tracing/set_event
2.2 Via the 'enable' toggle
---------------------------
The events available are also listed in /debug/tracing/events/ hierarchy
of directories.
To enable event 'sched_wakeup':
# echo 1 > /debug/tracing/events/sched/sched_wakeup/enable
To disable it:
# echo 0 > /debug/tracing/events/sched/sched_wakeup/enable
To enable all events in sched subsystem:
# echo 1 > /debug/tracing/events/sched/enable
To eanble all events:
# echo 1 > /debug/tracing/events/enable
When reading one of these enable files, there are four results:
0 - all events this file affects are disabled
1 - all events this file affects are enabled
X - there is a mixture of events enabled and disabled
? - this file does not affect any event
3. Defining an event-enabled tracepoint
=======================================
See The example provided in samples/trace_events
......@@ -179,7 +179,7 @@ Here is the list of current tracers that may be configured.
Function call tracer to trace all kernel functions.
"function_graph_tracer"
"function_graph"
Similar to the function tracer except that the
function tracer probes the functions on their entry
......
The power tracer collects detailed information about C-state and P-state
transitions, instead of just looking at the high-level "average"
information.
There is a helper script found in scrips/tracing/power.pl in the kernel
sources which can be used to parse this information and create a
Scalable Vector Graphics (SVG) picture from the trace data.
To use this tracer:
echo 0 > /sys/kernel/debug/tracing/tracing_enabled
echo power > /sys/kernel/debug/tracing/current_tracer
echo 1 > /sys/kernel/debug/tracing/tracing_enabled
sleep 1
echo 0 > /sys/kernel/debug/tracing/tracing_enabled
cat /sys/kernel/debug/tracing/trace | \
perl scripts/tracing/power.pl > out.sv
......@@ -174,6 +174,15 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
config X86_DS_SELFTEST
bool "DS selftest"
default y
depends on DEBUG_KERNEL
depends on X86_DS
---help---
Perform Debug Store selftests at boot time.
If in doubt, say "N".
config HAVE_MMIOTRACE_SUPPORT
def_bool y
......
......@@ -15,8 +15,8 @@
* - buffer allocation (memory accounting)
*
*
* Copyright (C) 2007-2008 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
* Copyright (C) 2007-2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
*/
#ifndef _ASM_X86_DS_H
......@@ -83,8 +83,10 @@ enum ds_feature {
* The interrupt threshold is independent from the overflow callback
* to allow users to use their own overflow interrupt handling mechanism.
*
* task: the task to request recording for;
* NULL for per-cpu recording on the current cpu
* The function might sleep.
*
* task: the task to request recording for
* cpu: the cpu to request recording for
* base: the base pointer for the (non-pageable) buffer;
* size: the size of the provided buffer in bytes
* ovfl: pointer to a function to be called on buffer overflow;
......@@ -93,19 +95,28 @@ enum ds_feature {
* -1 if no interrupt threshold is requested.
* flags: a bit-mask of the above flags
*/
extern struct bts_tracer *ds_request_bts(struct task_struct *task,
void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
/*
* Release BTS or PEBS resources
* Suspend and resume BTS or PEBS tracing
*
* Must be called with irq's enabled.
*
* tracer: the tracer handle returned from ds_request_~()
*/
extern void ds_release_bts(struct bts_tracer *tracer);
......@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer);
extern void ds_suspend_pebs(struct pebs_tracer *tracer);
extern void ds_resume_pebs(struct pebs_tracer *tracer);
/*
* Release BTS or PEBS resources
* Suspend and resume BTS or PEBS tracing
*
* Cpu tracers must call this on the traced cpu.
* Task tracers must call ds_release_~_noirq() for themselves.
*
* May be called with irq's disabled.
*
* Returns 0 if successful;
* -EPERM if the cpu tracer does not trace the current cpu.
* -EPERM if the task tracer does not trace itself.
*
* tracer: the tracer handle returned from ds_request_~()
*/
extern int ds_release_bts_noirq(struct bts_tracer *tracer);
extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
/*
* The raw DS buffer state as it is used for BTS and PEBS recording.
......@@ -170,9 +203,9 @@ struct bts_struct {
} lbr;
/* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
struct {
__u64 jiffies;
__u64 clock;
pid_t pid;
} timestamp;
} event;
} variant;
};
......@@ -201,8 +234,12 @@ struct bts_trace {
struct pebs_trace {
struct ds_trace ds;
/* the PEBS reset value */
unsigned long long reset_value;
/* the number of valid counters in the below array */
unsigned int counters;
#define MAX_PEBS_COUNTERS 4
/* the counter reset value */
unsigned long long counter_reset[MAX_PEBS_COUNTERS];
};
......@@ -237,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer);
* Returns 0 on success; -Eerrno on error
*
* tracer: the tracer handle returned from ds_request_pebs()
* counter: the index of the counter
* value: the new counter reset value
*/
extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
unsigned int counter, u64 value);
/*
* Initialization
......@@ -252,21 +291,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
*/
extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
/*
* Task clone/init and cleanup work
*/
extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
extern void ds_exit_thread(struct task_struct *tsk);
#else /* CONFIG_X86_DS */
struct cpuinfo_x86;
static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
static inline void ds_switch_to(struct task_struct *prev,
struct task_struct *next) {}
static inline void ds_copy_thread(struct task_struct *tsk,
struct task_struct *father) {}
static inline void ds_exit_thread(struct task_struct *tsk) {}
#endif /* CONFIG_X86_DS */
#endif /* _ASM_X86_DS_H */
......@@ -462,14 +462,8 @@ struct thread_struct {
unsigned io_bitmap_max;
/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
unsigned long debugctlmsr;
#ifdef CONFIG_X86_DS
/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
/* Debug Store context; see asm/ds.h */
struct ds_context *ds_ctx;
#endif /* CONFIG_X86_DS */
#ifdef CONFIG_X86_PTRACE_BTS
/* the signal to send on a bts buffer overflow */
unsigned int bts_ovfl_signal;
#endif /* CONFIG_X86_PTRACE_BTS */
};
static inline unsigned long native_get_debugreg(int regno)
......@@ -797,6 +791,21 @@ static inline unsigned long get_debugctlmsr(void)
return debugctlmsr;
}
static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
{
u64 debugctlmsr = 0;
u32 val1, val2;
#ifndef CONFIG_X86_DEBUGCTLMSR
if (boot_cpu_data.x86 < 6)
return 0;
#endif
rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
debugctlmsr = val1 | ((u64)val2 << 32);
return debugctlmsr;
}
static inline void update_debugctlmsr(unsigned long debugctlmsr)
{
#ifndef CONFIG_X86_DEBUGCTLMSR
......@@ -806,6 +815,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
}
static inline void update_debugctlmsr_on_cpu(int cpu,
unsigned long debugctlmsr)
{
#ifndef CONFIG_X86_DEBUGCTLMSR
if (boot_cpu_data.x86 < 6)
return;
#endif
wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
(u32)((u64)debugctlmsr),
(u32)((u64)debugctlmsr >> 32));
}
/*
* from system description table in BIOS. Mostly for MCA use, but
* others may find it useful:
......
......@@ -236,12 +236,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate);
extern void x86_ptrace_untrace(struct task_struct *);
extern void x86_ptrace_fork(struct task_struct *child,
unsigned long clone_flags);
#ifdef CONFIG_X86_PTRACE_BTS
extern void ptrace_bts_untrace(struct task_struct *tsk);
#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk)
#endif /* CONFIG_X86_PTRACE_BTS */
#endif /* __KERNEL__ */
......
......@@ -17,7 +17,7 @@
static inline void __native_flush_tlb(void)
{
write_cr3(read_cr3());
native_write_cr3(native_read_cr3());
}
static inline void __native_flush_tlb_global(void)
......@@ -32,11 +32,11 @@ static inline void __native_flush_tlb_global(void)
*/
raw_local_irq_save(flags);
cr4 = read_cr4();
cr4 = native_read_cr4();
/* clear PGE */
write_cr4(cr4 & ~X86_CR4_PGE);
native_write_cr4(cr4 & ~X86_CR4_PGE);
/* write old PGE again and flush TLBs */
write_cr4(cr4);
native_write_cr4(cr4);
raw_local_irq_restore(flags);
}
......
......@@ -44,6 +44,7 @@ obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
obj-$(CONFIG_X86_DS) += ds.o
obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_IA32_EMULATION) += tls.o
obj-y += step.o
......
This diff is collapsed.
/*
* Debug Store support - selftest
*
*
* Copyright (C) 2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2009
*/
#include "ds_selftest.h"
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <asm/ds.h>
#define BUFFER_SIZE 521 /* Intentionally chose an odd size. */
#define SMALL_BUFFER_SIZE 24 /* A single bts entry. */
struct ds_selftest_bts_conf {
struct bts_tracer *tracer;
int error;
int (*suspend)(struct bts_tracer *);
int (*resume)(struct bts_tracer *);
};
static int ds_selftest_bts_consistency(const struct bts_trace *trace)
{
int error = 0;
if (!trace) {
printk(KERN_CONT "failed to access trace...");
/* Bail out. Other tests are pointless. */
return -1;
}
if (!trace->read) {
printk(KERN_CONT "bts read not available...");
error = -1;
}
/* Do some sanity checks on the trace configuration. */
if (!trace->ds.n) {
printk(KERN_CONT "empty bts buffer...");
error = -1;
}
if (!trace->ds.size) {
printk(KERN_CONT "bad bts trace setup...");
error = -1;
}
if (trace->ds.end !=
(char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) {
printk(KERN_CONT "bad bts buffer setup...");
error = -1;
}
/*
* We allow top in [begin; end], since its not clear when the
* overflow adjustment happens: after the increment or before the
* write.
*/
if ((trace->ds.top < trace->ds.begin) ||
(trace->ds.end < trace->ds.top)) {
printk(KERN_CONT "bts top out of bounds...");
error = -1;
}
return error;
}
static int ds_selftest_bts_read(struct bts_tracer *tracer,
const struct bts_trace *trace,
const void *from, const void *to)
{
const unsigned char *at;
/*
* Check a few things which do not belong to this test.
* They should be covered by other tests.
*/
if (!trace)
return -1;
if (!trace->read)
return -1;
if (to < from)
return -1;