Commit f3f096cf authored by Srikar Dronamraju's avatar Srikar Dronamraju Committed by Ingo Molnar
Browse files

tracing: Provide trace events interface for uprobes



Implements trace_event support for uprobes. In its current form
it can be used to put probes at a specified offset in a file and
dump the required registers when the code flow reaches the
probed address.

The following example shows how to dump the instruction pointer
and %ax a register at the probed text address.  Here we are
trying to probe zfree in /bin/zsh:

 # cd /sys/kernel/debug/tracing/
 # cat /proc/`pgrep  zsh`/maps | grep /bin/zsh | grep r-xp
 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh
 # objdump -T /bin/zsh | grep -w zfree
 0000000000446420 g    DF .text  0000000000000012  Base
 zfree # echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events
 # cat uprobe_events
 p:uprobes/p_zsh_0x46420 /bin/zsh:0x0000000000046420
 # echo 1 > events/uprobes/enable
 # sleep 20
 # echo 0 > events/uprobes/enable
 # cat trace
 # tracer: nop
 #
 #           TASK-PID    CPU#    TIMESTAMP  FUNCTION
 #              | |       |          |         |
              zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
              zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
              zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
              zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
Signed-off-by: default avatarSrikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: default avatarSteven Rostedt <rostedt@goodmis.org>
Acked-by: default avatarMasami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@linux.vnet.ibm.com>
Cc: Linux-mm <linux-mm@kvack.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120411103043.GB29437@linux.vnet.ibm.com

Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 8ab83f56
Uprobe-tracer: Uprobe-based Event Tracing
=========================================
Documentation written by Srikar Dronamraju
Overview
--------
Uprobe based trace events are similar to kprobe based trace events.
To enable this feature, build your kernel with CONFIG_UPROBE_EVENTS=y.
Similar to the kprobe-event tracer, this doesn't need to be activated via
current_tracer. Instead of that, add probe points via
/sys/kernel/debug/tracing/uprobe_events, and enable it via
/sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled.
However unlike kprobe-event tracer, the uprobe event interface expects the
user to calculate the offset of the probepoint in the object
Synopsis of uprobe_tracer
-------------------------
p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a probe
GRP : Group name. If omitted, use "uprobes" for it.
EVENT : Event name. If omitted, the event name is generated
based on SYMBOL+offs.
PATH : path to an executable or a library.
SYMBOL[+offs] : Symbol+offset where the probe is inserted.
FETCHARGS : Arguments. Each probe can have up to 128 args.
%REG : Fetch register REG
Event Profiling
---------------
You can check the total number of probe hits and probe miss-hits via
/sys/kernel/debug/tracing/uprobe_profile.
The first column is event name, the second is the number of probe hits,
the third is the number of probe miss-hits.
Usage examples
--------------
To add a probe as a new event, write a new definition to uprobe_events
as below.
echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events
This sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash
echo > /sys/kernel/debug/tracing/uprobe_events
This clears all probe points.
The following example shows how to dump the instruction pointer and %ax
a register at the probed text address. Here we are trying to probe
function zfree in /bin/zsh
# cd /sys/kernel/debug/tracing/
# cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp
00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh
# objdump -T /bin/zsh | grep -w zfree
0000000000446420 g DF .text 0000000000000012 Base zfree
0x46420 is the offset of zfree in object /bin/zsh that is loaded at
0x00400000. Hence the command to probe would be :
# echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events
Please note: User has to explicitly calculate the offset of the probepoint
in the object. We can see the events that are registered by looking at the
uprobe_events file.
# cat uprobe_events
p:uprobes/p_zsh_0x46420 /bin/zsh:0x0000000000046420
Right after definition, each event is disabled by default. For tracing these
events, you need to enable it by:
# echo 1 > events/uprobes/enable
Lets disable the event after sleeping for some time.
# sleep 20
# echo 0 > events/uprobes/enable
And you can see the traced information via /sys/kernel/debug/tracing/trace.
# cat trace
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
Each line shows us probes were triggered for a pid 24842 with ip being
0x446421 and contents of ax register being 79.
......@@ -78,7 +78,7 @@ config OPTPROBES
config UPROBES
bool "Transparent user-space probes (EXPERIMENTAL)"
depends on ARCH_SUPPORTS_UPROBES && PERF_EVENTS
depends on UPROBE_EVENTS && PERF_EVENTS
default n
help
Uprobes is the user-space counterpart to kprobes: they
......
......@@ -386,6 +386,22 @@ config KPROBE_EVENT
This option is also required by perf-probe subcommand of perf tools.
If you want to use perf tools, this option is strongly recommended.
config UPROBE_EVENT
bool "Enable uprobes-based dynamic events"
depends on ARCH_SUPPORTS_UPROBES
depends on MMU
select UPROBES
select PROBE_EVENTS
select TRACING
default n
help
This allows the user to add tracing events on top of userspace
dynamic events (similar to tracepoints) on the fly via the trace
events interface. Those events can be inserted wherever uprobes
can probe, and record various registers.
This option is required if you plan to use perf-probe subcommand
of perf tools on user space applications.
config PROBE_EVENTS
def_bool n
......
......@@ -62,5 +62,6 @@ ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
libftrace-y := ftrace.o
......@@ -103,6 +103,11 @@ struct kretprobe_trace_entry_head {
unsigned long ret_ip;
};
struct uprobe_trace_entry_head {
struct trace_entry ent;
unsigned long ip;
};
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
......
......@@ -525,7 +525,7 @@ static int create_trace_probe(int argc, char **argv)
/* Parse fetch argument */
ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i],
is_return);
is_return, true);
if (ret) {
pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
goto error;
......
......@@ -550,7 +550,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
/* Recursive argument parser */
static int parse_probe_arg(char *arg, const struct fetch_type *t,
struct fetch_param *f, bool is_return)
struct fetch_param *f, bool is_return, bool is_kprobe)
{
unsigned long param;
long offset;
......@@ -558,6 +558,11 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,
int ret;
ret = 0;
/* Until uprobe_events supports only reg arguments */
if (!is_kprobe && arg[0] != '%')
return -EINVAL;
switch (arg[0]) {
case '$':
ret = parse_probe_vars(arg + 1, t, f, is_return);
......@@ -619,7 +624,8 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,
return -ENOMEM;
dprm->offset = offset;
ret = parse_probe_arg(arg, t2, &dprm->orig, is_return);
ret = parse_probe_arg(arg, t2, &dprm->orig, is_return,
is_kprobe);
if (ret)
kfree(dprm);
else {
......@@ -677,7 +683,7 @@ static int __parse_bitfield_probe_arg(const char *bf,
/* String length checking wrapper */
int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
struct probe_arg *parg, bool is_return)
struct probe_arg *parg, bool is_return, bool is_kprobe)
{
const char *t;
int ret;
......@@ -703,7 +709,7 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
}
parg->offset = *size;
*size += parg->type->size;
ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe);
if (ret >= 0 && t != NULL)
ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
......
......@@ -66,6 +66,7 @@
#define TP_FLAG_TRACE 1
#define TP_FLAG_PROFILE 2
#define TP_FLAG_REGISTERED 4
#define TP_FLAG_UPROBE 8
/* data_rloc: data relative location, compatible with u32 */
......@@ -143,7 +144,7 @@ static inline int is_good_name(const char *name)
}
extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
struct probe_arg *parg, bool is_return);
struct probe_arg *parg, bool is_return, bool is_kprobe);
extern int traceprobe_conflict_field_name(const char *name,
struct probe_arg *args, int narg);
......
/*
* uprobes-based tracing events
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Copyright (C) IBM Corporation, 2010-2012
* Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
*/
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/uprobes.h>
#include <linux/namei.h>
#include "trace_probe.h"
#define UPROBE_EVENT_SYSTEM "uprobes"
/*
* uprobe event core functions
*/
struct trace_uprobe;
struct uprobe_trace_consumer {
struct uprobe_consumer cons;
struct trace_uprobe *tu;
};
struct trace_uprobe {
struct list_head list;
struct ftrace_event_class class;
struct ftrace_event_call call;
struct uprobe_trace_consumer *consumer;
struct inode *inode;
char *filename;
unsigned long offset;
unsigned long nhit;
unsigned int flags; /* For TP_FLAG_* */
ssize_t size; /* trace entry size */
unsigned int nr_args;
struct probe_arg args[];
};
#define SIZEOF_TRACE_UPROBE(n) \
(offsetof(struct trace_uprobe, args) + \
(sizeof(struct probe_arg) * (n)))
static int register_uprobe_event(struct trace_uprobe *tu);
static void unregister_uprobe_event(struct trace_uprobe *tu);
static DEFINE_MUTEX(uprobe_lock);
static LIST_HEAD(uprobe_list);
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
/*
* Allocate new trace_uprobe and initialize it (including uprobes).
*/
static struct trace_uprobe *
alloc_trace_uprobe(const char *group, const char *event, int nargs)
{
struct trace_uprobe *tu;
if (!event || !is_good_name(event))
return ERR_PTR(-EINVAL);
if (!group || !is_good_name(group))
return ERR_PTR(-EINVAL);
tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
if (!tu)
return ERR_PTR(-ENOMEM);
tu->call.class = &tu->class;
tu->call.name = kstrdup(event, GFP_KERNEL);
if (!tu->call.name)
goto error;
tu->class.system = kstrdup(group, GFP_KERNEL);
if (!tu->class.system)
goto error;
INIT_LIST_HEAD(&tu->list);
return tu;
error:
kfree(tu->call.name);
kfree(tu);
return ERR_PTR(-ENOMEM);
}
static void free_trace_uprobe(struct trace_uprobe *tu)
{
int i;
for (i = 0; i < tu->nr_args; i++)
traceprobe_free_probe_arg(&tu->args[i]);
iput(tu->inode);
kfree(tu->call.class->system);
kfree(tu->call.name);
kfree(tu->filename);
kfree(tu);
}
static struct trace_uprobe *find_probe_event(const char *event, const char *group)
{
struct trace_uprobe *tu;
list_for_each_entry(tu, &uprobe_list, list)
if (strcmp(tu->call.name, event) == 0 &&
strcmp(tu->call.class->system, group) == 0)
return tu;
return NULL;
}
/* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
static void unregister_trace_uprobe(struct trace_uprobe *tu)
{
list_del(&tu->list);
unregister_uprobe_event(tu);
free_trace_uprobe(tu);
}
/* Register a trace_uprobe and probe_event */
static int register_trace_uprobe(struct trace_uprobe *tu)
{
struct trace_uprobe *old_tp;
int ret;
mutex_lock(&uprobe_lock);
/* register as an event */
old_tp = find_probe_event(tu->call.name, tu->call.class->system);
if (old_tp)
/* delete old event */
unregister_trace_uprobe(old_tp);
ret = register_uprobe_event(tu);
if (ret) {
pr_warning("Failed to register probe event(%d)\n", ret);
goto end;
}
list_add_tail(&tu->list, &uprobe_list);
end:
mutex_unlock(&uprobe_lock);
return ret;
}
/*
* Argument syntax:
* - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS]
*
* - Remove uprobe: -:[GRP/]EVENT
*/
static int create_trace_uprobe(int argc, char **argv)
{
struct trace_uprobe *tu;
struct inode *inode;
char *arg, *event, *group, *filename;
char buf[MAX_EVENT_NAME_LEN];
struct path path;
unsigned long offset;
bool is_delete;
int i, ret;
inode = NULL;
ret = 0;
is_delete = false;
event = NULL;
group = NULL;
/* argc must be >= 1 */
if (argv[0][0] == '-')
is_delete = true;
else if (argv[0][0] != 'p') {
pr_info("Probe definition must be started with 'p', 'r' or" " '-'.\n");
return -EINVAL;
}
if (argv[0][1] == ':') {
event = &argv[0][2];
arg = strchr(event, '/');
if (arg) {
group = event;
event = arg + 1;
event[-1] = '\0';
if (strlen(group) == 0) {
pr_info("Group name is not specified\n");
return -EINVAL;
}
}
if (strlen(event) == 0) {
pr_info("Event name is not specified\n");
return -EINVAL;
}
}
if (!group)
group = UPROBE_EVENT_SYSTEM;
if (is_delete) {
if (!event) {
pr_info("Delete command needs an event name.\n");
return -EINVAL;
}
mutex_lock(&uprobe_lock);
tu = find_probe_event(event, group);
if (!tu) {
mutex_unlock(&uprobe_lock);
pr_info("Event %s/%s doesn't exist.\n", group, event);
return -ENOENT;
}
/* delete an event */
unregister_trace_uprobe(tu);
mutex_unlock(&uprobe_lock);
return 0;
}
if (argc < 2) {
pr_info("Probe point is not specified.\n");
return -EINVAL;
}
if (isdigit(argv[1][0])) {
pr_info("probe point must be have a filename.\n");
return -EINVAL;
}
arg = strchr(argv[1], ':');
if (!arg)
goto fail_address_parse;
*arg++ = '\0';
filename = argv[1];
ret = kern_path(filename, LOOKUP_FOLLOW, &path);
if (ret)
goto fail_address_parse;
ret = strict_strtoul(arg, 0, &offset);
if (ret)
goto fail_address_parse;
inode = igrab(path.dentry->d_inode);
argc -= 2;
argv += 2;
/* setup a probe */
if (!event) {
char *tail = strrchr(filename, '/');
char *ptr;
ptr = kstrdup((tail ? tail + 1 : filename), GFP_KERNEL);
if (!ptr) {
ret = -ENOMEM;
goto fail_address_parse;
}
tail = ptr;
ptr = strpbrk(tail, ".-_");
if (ptr)
*ptr = '\0';
snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset);
event = buf;
kfree(tail);
}
tu = alloc_trace_uprobe(group, event, argc);
if (IS_ERR(tu)) {
pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
ret = PTR_ERR(tu);
goto fail_address_parse;
}
tu->offset = offset;
tu->inode = inode;
tu->filename = kstrdup(filename, GFP_KERNEL);
if (!tu->filename) {
pr_info("Failed to allocate filename.\n");
ret = -ENOMEM;
goto error;
}
/* parse arguments */
ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
/* Increment count for freeing args in error case */
tu->nr_args++;
/* Parse argument name */
arg = strchr(argv[i], '=');
if (arg) {
*arg++ = '\0';
tu->args[i].name = kstrdup(argv[i], GFP_KERNEL);
} else {
arg = argv[i];
/* If argument name is omitted, set "argN" */
snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
tu->args[i].name = kstrdup(buf, GFP_KERNEL);
}
if (!tu->args[i].name) {
pr_info("Failed to allocate argument[%d] name.\n", i);
ret = -ENOMEM;
goto error;
}
if (!is_good_name(tu->args[i].name)) {
pr_info("Invalid argument[%d] name: %s\n", i, tu->args[i].name);
ret = -EINVAL;
goto error;
}
if (traceprobe_conflict_field_name(tu->args[i].name, tu->args, i)) {
pr_info("Argument[%d] name '%s' conflicts with "
"another field.\n", i, argv[i]);
ret = -EINVAL;
goto error;
}
/* Parse fetch argument */
ret = traceprobe_parse_probe_arg(arg, &tu->size, &tu->args[i], false, false);
if (ret) {
pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
goto error;
}
}
ret = register_trace_uprobe(tu);
if (ret)
goto error;
return 0;
error:
free_trace_uprobe(tu);
return ret;
fail_address_parse:
if (inode)
iput(inode);
pr_info("Failed to parse address.\n");
return ret;
}
static void cleanup_all_probes(void)
{
struct trace_uprobe *tu;
mutex_lock(&uprobe_lock);
while (!list_empty(&uprobe_list)) {
tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
unregister_trace_uprobe(tu);
}
mutex_unlock(&uprobe_lock);
}
/* Probes listing interfaces */
static void *probes_seq_start(struct seq_file *m, loff_t *pos)
{
mutex_lock(&uprobe_lock);
return seq_list_start(&uprobe_list, *pos);
}
static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
return seq_list_next(v, &uprobe_list, pos);
}
static void probes_seq_stop(struct seq_file *m, void *v)
{
mutex_unlock(&uprobe_lock);
}
static int probes_seq_show(struct seq_file *m, void *v)
{
struct trace_uprobe *tu = v;
int i;
seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name);
seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset);
for (i = 0; i < tu->nr_args; i++)
seq_printf(m, " %s=%s", tu->args[i].name, tu->args[i].comm);