perf_event.c 56.5 KB
Newer Older
1
/*
2
 * Performance events x86 architecture code
3
 *
4
5
6
7
8
 *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
 *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
 *  Copyright (C) 2009 Jaswinder Singh Rajput
 *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
 *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9
 *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10
11
12
13
 *
 *  For licencing details see kernel-base/COPYING
 */

14
#include <linux/perf_event.h>
15
16
17
18
#include <linux/capability.h>
#include <linux/notifier.h>
#include <linux/hardirq.h>
#include <linux/kprobes.h>
19
#include <linux/module.h>
20
21
#include <linux/kdebug.h>
#include <linux/sched.h>
22
#include <linux/uaccess.h>
23
#include <linux/highmem.h>
24
#include <linux/cpu.h>
25
26

#include <asm/apic.h>
27
#include <asm/stacktrace.h>
Peter Zijlstra's avatar
Peter Zijlstra committed
28
#include <asm/nmi.h>
29

30
static u64 perf_event_mask __read_mostly;
31

32
33
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS	4
34
35
36
37
38

/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE		24

/* The size of a per-cpu BTS buffer in bytes: */
39
#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
40
41

/* The BTS overflow threshold in bytes from the end of the buffer: */
42
#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67


/*
 * Bits in the debugctlmsr controlling branch tracing.
 */
#define X86_DEBUGCTL_TR			(1 << 6)
#define X86_DEBUGCTL_BTS		(1 << 7)
#define X86_DEBUGCTL_BTINT		(1 << 8)
#define X86_DEBUGCTL_BTS_OFF_OS		(1 << 9)
#define X86_DEBUGCTL_BTS_OFF_USR	(1 << 10)

/*
 * A debug store configuration.
 *
 * We only support architectures that use 64bit fields.
 */
struct debug_store {
	u64	bts_buffer_base;
	u64	bts_index;
	u64	bts_absolute_maximum;
	u64	bts_interrupt_threshold;
	u64	pebs_buffer_base;
	u64	pebs_index;
	u64	pebs_absolute_maximum;
	u64	pebs_interrupt_threshold;
68
	u64	pebs_event_reset[MAX_PEBS_EVENTS];
69
70
};

71
72
struct cpu_hw_events {
	struct perf_event	*events[X86_PMC_IDX_MAX];
73
74
	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75
	unsigned long		interrupts;
76
	int			enabled;
77
	struct debug_store	*ds;
78
79
};

80
81
82
83
84
85
86
87
88
89
90
91
struct event_constraint {
	unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
	int		code;
};

#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
#define EVENT_CONSTRAINT_END  { .code = 0, .idxmsk[0] = 0 }

#define for_each_event_constraint(e, c) \
	for ((e) = (c); (e)->idxmsk[0]; (e)++)


92
/*
93
 * struct x86_pmu - generic x86 pmu
94
 */
95
struct x86_pmu {
96
97
	const char	*name;
	int		version;
98
	int		(*handle_irq)(struct pt_regs *);
99
100
	void		(*disable_all)(void);
	void		(*enable_all)(void);
101
102
	void		(*enable)(struct hw_perf_event *, int);
	void		(*disable)(struct hw_perf_event *, int);
103
104
	unsigned	eventsel;
	unsigned	perfctr;
105
106
	u64		(*event_map)(int);
	u64		(*raw_event)(u64);
107
	int		max_events;
108
109
110
111
	int		num_events;
	int		num_events_fixed;
	int		event_bits;
	u64		event_mask;
112
	int		apic;
113
	u64		max_period;
114
	u64		intel_ctrl;
115
116
	void		(*enable_bts)(u64 config);
	void		(*disable_bts)(void);
117
118
	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
					 struct hw_perf_event *hwc);
119
120
};

121
static struct x86_pmu x86_pmu __read_mostly;
122

123
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
124
125
	.enabled = 1,
};
126

127
static const struct event_constraint *event_constraints;
128

129
130
131
132
133
134
135
/*
 * Not sure about some of these
 */
static const u64 p6_perfmon_event_map[] =
{
  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
136
137
  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
138
139
140
141
142
  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
};

143
static u64 p6_pmu_event_map(int hw_event)
144
{
145
	return p6_perfmon_event_map[hw_event];
146
147
}

148
/*
149
 * Event setting that is specified not to count anything.
150
151
152
153
 * We use this to effectively disable a counter.
 *
 * L2_RQSTS with 0 MESI unit mask.
 */
154
#define P6_NOP_EVENT			0x0000002EULL
155

156
static u64 p6_pmu_raw_event(u64 hw_event)
157
158
159
160
161
{
#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
#define P6_EVNTSEL_INV_MASK		0x00800000ULL
162
#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
163
164
165
166
167
168

#define P6_EVNTSEL_MASK			\
	(P6_EVNTSEL_EVENT_MASK |	\
	 P6_EVNTSEL_UNIT_MASK  |	\
	 P6_EVNTSEL_EDGE_MASK  |	\
	 P6_EVNTSEL_INV_MASK   |	\
169
	 P6_EVNTSEL_REG_MASK)
170

171
	return hw_event & P6_EVNTSEL_MASK;
172
173
}

174
175
176
177
178
179
180
181
182
183
static const struct event_constraint intel_p6_event_constraints[] =
{
	EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
	EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
	EVENT_CONSTRAINT(0x11, 0x1),	/* FP_ASSIST */
	EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
	EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
	EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
	EVENT_CONSTRAINT_END
};
184

185
186
187
/*
 * Intel PerfMon v3. Used on Core2 and later.
 */
188
static const u64 intel_perfmon_event_map[] =
189
{
190
191
192
193
194
195
196
  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
197
198
};

199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
static const struct event_constraint intel_core_event_constraints[] =
{
	EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
	EVENT_CONSTRAINT(0x11, 0x2),	/* FP_ASSIST */
	EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
	EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
	EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
	EVENT_CONSTRAINT(0x18, 0x1),	/* IDLE_DURING_DIV */
	EVENT_CONSTRAINT(0x19, 0x2),	/* DELAYED_BYPASS */
	EVENT_CONSTRAINT(0xa1, 0x1),	/* RS_UOPS_DISPATCH_CYCLES */
	EVENT_CONSTRAINT(0xcb, 0x1),	/* MEM_LOAD_RETIRED */
	EVENT_CONSTRAINT_END
};

static const struct event_constraint intel_nehalem_event_constraints[] =
{
	EVENT_CONSTRAINT(0x40, 0x3),	/* L1D_CACHE_LD */
	EVENT_CONSTRAINT(0x41, 0x3),	/* L1D_CACHE_ST */
	EVENT_CONSTRAINT(0x42, 0x3),	/* L1D_CACHE_LOCK */
	EVENT_CONSTRAINT(0x43, 0x3),	/* L1D_ALL_REF */
	EVENT_CONSTRAINT(0x4e, 0x3),	/* L1D_PREFETCH */
	EVENT_CONSTRAINT(0x4c, 0x3),	/* LOAD_HIT_PRE */
	EVENT_CONSTRAINT(0x51, 0x3),	/* L1D */
	EVENT_CONSTRAINT(0x52, 0x3),	/* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
	EVENT_CONSTRAINT(0x53, 0x3),	/* L1D_CACHE_LOCK_FB_HIT */
	EVENT_CONSTRAINT(0xc5, 0x3),	/* CACHE_LOCK_CYCLES */
	EVENT_CONSTRAINT_END
};

228
static u64 intel_pmu_event_map(int hw_event)
229
{
230
	return intel_perfmon_event_map[hw_event];
231
}
232

233
/*
234
 * Generalized hw caching related hw_event table, filled
235
 * in on a per model basis. A value of 0 means
236
237
 * 'not supported', -1 means 'hw_event makes no sense on
 * this CPU', any other value means the raw hw_event
238
239
240
241
242
243
244
245
246
247
 * ID.
 */

#define C(x) PERF_COUNT_HW_CACHE_##x

static u64 __read_mostly hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX];

248
static __initconst u64 nehalem_hw_cache_event_ids
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
 [ C(L1D) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
269
		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
270
271
272
273
274
275
276
277
278
279
280
		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x0,
		[ C(RESULT_MISS)   ] = 0x0,
	},
 },
281
 [ C(LL  ) ] = {
282
283
284
285
286
287
288
289
290
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
	},
	[ C(OP_PREFETCH) ] = {
291
292
		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x0,
		[ C(RESULT_MISS)   ] = 0x0,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
312
		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
};

339
static __initconst u64 core2_hw_cache_event_ids
340
341
342
343
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
 [ C(L1D) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
372
 [ C(LL  ) ] = {
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
428
429
};

430
static __initconst u64 atom_hw_cache_event_ids
431
432
433
434
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
435
436
437
438
439
440
 [ C(L1D) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_WRITE) ] = {
441
		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
442
443
444
445
446
447
448
449
450
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
451
452
		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
453
454
455
456
457
458
459
460
461
462
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
463
 [ C(LL  ) ] = {
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
479
		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
480
481
482
		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
	},
	[ C(OP_WRITE) ] = {
483
		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
519
520
};

521
static u64 intel_pmu_raw_event(u64 hw_event)
522
{
523
524
#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
525
526
#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
527
#define CORE_EVNTSEL_REG_MASK		0xFF000000ULL
528

529
#define CORE_EVNTSEL_MASK		\
530
531
	(CORE_EVNTSEL_EVENT_MASK |	\
	 CORE_EVNTSEL_UNIT_MASK  |	\
532
533
	 CORE_EVNTSEL_EDGE_MASK  |	\
	 CORE_EVNTSEL_INV_MASK  |	\
534
	 CORE_EVNTSEL_REG_MASK)
535

536
	return hw_event & CORE_EVNTSEL_MASK;
537
538
}

539
static __initconst u64 amd_hw_cache_event_ids
540
541
542
543
544
545
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
 [ C(L1D) ] = {
	[ C(OP_READ) ] = {
546
547
		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
548
549
	},
	[ C(OP_WRITE) ] = {
550
		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
551
552
553
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_PREFETCH) ] = {
554
555
		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
556
557
558
559
560
561
562
563
564
565
566
567
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
568
		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
569
570
571
		[ C(RESULT_MISS)   ] = 0,
	},
 },
572
 [ C(LL  ) ] = {
573
	[ C(OP_READ) ] = {
574
575
		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
576
577
	},
	[ C(OP_WRITE) ] = {
578
		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
579
580
581
582
583
584
585
586
587
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
588
589
		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
};

630
631
632
/*
 * AMD Performance Monitor K7 and later.
 */
633
static const u64 amd_perfmon_event_map[] =
634
{
635
636
637
638
639
640
  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
641
642
};

643
static u64 amd_pmu_event_map(int hw_event)
644
{
645
	return amd_perfmon_event_map[hw_event];
646
647
}

648
static u64 amd_pmu_raw_event(u64 hw_event)
649
{
650
651
#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
652
653
#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
#define K7_EVNTSEL_INV_MASK	0x000800000ULL
654
#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
655
656
657
658

#define K7_EVNTSEL_MASK			\
	(K7_EVNTSEL_EVENT_MASK |	\
	 K7_EVNTSEL_UNIT_MASK  |	\
659
660
	 K7_EVNTSEL_EDGE_MASK  |	\
	 K7_EVNTSEL_INV_MASK   |	\
661
	 K7_EVNTSEL_REG_MASK)
662

663
	return hw_event & K7_EVNTSEL_MASK;
664
665
}

666
/*
667
668
 * Propagate event elapsed time into the generic event.
 * Can only be executed on the CPU where the event is active.
669
670
 * Returns the delta events processed.
 */
671
static u64
672
673
x86_perf_event_update(struct perf_event *event,
			struct hw_perf_event *hwc, int idx)
674
{
675
	int shift = 64 - x86_pmu.event_bits;
676
677
	u64 prev_raw_count, new_raw_count;
	s64 delta;
678

679
680
681
	if (idx == X86_PMC_IDX_FIXED_BTS)
		return 0;

682
	/*
683
	 * Careful: an NMI might modify the previous event value.
684
685
686
	 *
	 * Our tactic to handle this is to first atomically read and
	 * exchange a new raw count - then add that new-prev delta
687
	 * count to the generic event atomically:
688
689
690
	 */
again:
	prev_raw_count = atomic64_read(&hwc->prev_count);
691
	rdmsrl(hwc->event_base + idx, new_raw_count);
692
693
694
695
696
697
698
699

	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
					new_raw_count) != prev_raw_count)
		goto again;

	/*
	 * Now we have the new raw value and have updated the prev
	 * timestamp already. We can now calculate the elapsed delta
700
	 * (event-)time and add that to the generic event.
701
702
	 *
	 * Careful, not all hw sign-extends above the physical width
703
	 * of the count.
704
	 */
705
706
	delta = (new_raw_count << shift) - (prev_raw_count << shift);
	delta >>= shift;
707

708
	atomic64_add(delta, &event->count);
709
	atomic64_sub(delta, &hwc->period_left);
710
711

	return new_raw_count;
712
713
}

714
static atomic_t active_events;
Peter Zijlstra's avatar
Peter Zijlstra committed
715
716
717
718
static DEFINE_MUTEX(pmc_reserve_mutex);

static bool reserve_pmc_hardware(void)
{
719
#ifdef CONFIG_X86_LOCAL_APIC
Peter Zijlstra's avatar
Peter Zijlstra committed
720
721
722
723
724
	int i;

	if (nmi_watchdog == NMI_LOCAL_APIC)
		disable_lapic_nmi_watchdog();

725
	for (i = 0; i < x86_pmu.num_events; i++) {
726
		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
Peter Zijlstra's avatar
Peter Zijlstra committed
727
728
729
			goto perfctr_fail;
	}

730
	for (i = 0; i < x86_pmu.num_events; i++) {
731
		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
Peter Zijlstra's avatar
Peter Zijlstra committed
732
733
			goto eventsel_fail;
	}
734
#endif
Peter Zijlstra's avatar
Peter Zijlstra committed
735
736
737

	return true;

738
#ifdef CONFIG_X86_LOCAL_APIC
Peter Zijlstra's avatar
Peter Zijlstra committed
739
740
eventsel_fail:
	for (i--; i >= 0; i--)
741
		release_evntsel_nmi(x86_pmu.eventsel + i);
Peter Zijlstra's avatar
Peter Zijlstra committed
742

743
	i = x86_pmu.num_events;
Peter Zijlstra's avatar
Peter Zijlstra committed
744
745
746

perfctr_fail:
	for (i--; i >= 0; i--)
747
		release_perfctr_nmi(x86_pmu.perfctr + i);
Peter Zijlstra's avatar
Peter Zijlstra committed
748
749
750
751
752

	if (nmi_watchdog == NMI_LOCAL_APIC)
		enable_lapic_nmi_watchdog();

	return false;
753
#endif
Peter Zijlstra's avatar
Peter Zijlstra committed
754
755
756
757
}

static void release_pmc_hardware(void)
{
758
#ifdef CONFIG_X86_LOCAL_APIC
Peter Zijlstra's avatar
Peter Zijlstra committed
759
760
	int i;

761
	for (i = 0; i < x86_pmu.num_events; i++) {
762
763
		release_perfctr_nmi(x86_pmu.perfctr + i);
		release_evntsel_nmi(x86_pmu.eventsel + i);
Peter Zijlstra's avatar
Peter Zijlstra committed
764
765
766
767
	}

	if (nmi_watchdog == NMI_LOCAL_APIC)
		enable_lapic_nmi_watchdog();
768
#endif
Peter Zijlstra's avatar
Peter Zijlstra committed
769
770
}

771
772
773
774
775
776
777
static inline bool bts_available(void)
{
	return x86_pmu.enable_bts != NULL;
}

static inline void init_debug_store_on_cpu(int cpu)
{
778
	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
779
780
781
782
783

	if (!ds)
		return;

	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
784
785
		     (u32)((u64)(unsigned long)ds),
		     (u32)((u64)(unsigned long)ds >> 32));
786
787
788
789
}

static inline void fini_debug_store_on_cpu(int cpu)
{
790
	if (!per_cpu(cpu_hw_events, cpu).ds)
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
		return;

	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}

static void release_bts_hardware(void)
{
	int cpu;

	if (!bts_available())
		return;

	get_online_cpus();

	for_each_online_cpu(cpu)
		fini_debug_store_on_cpu(cpu);

	for_each_possible_cpu(cpu) {
809
		struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
810
811
812
813

		if (!ds)
			continue;

814
		per_cpu(cpu_hw_events, cpu).ds = NULL;
815

816
		kfree((void *)(unsigned long)ds->bts_buffer_base);
817
818
819
820
821
822
823
824
825
826
827
		kfree(ds);
	}

	put_online_cpus();
}

static int reserve_bts_hardware(void)
{
	int cpu, err = 0;

	if (!bts_available())
828
		return 0;
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846

	get_online_cpus();

	for_each_possible_cpu(cpu) {
		struct debug_store *ds;
		void *buffer;

		err = -ENOMEM;
		buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
		if (unlikely(!buffer))
			break;

		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
		if (unlikely(!ds)) {
			kfree(buffer);
			break;
		}

847
		ds->bts_buffer_base = (u64)(unsigned long)buffer;
848
849
850
851
852
853
		ds->bts_index = ds->bts_buffer_base;
		ds->bts_absolute_maximum =
			ds->bts_buffer_base + BTS_BUFFER_SIZE;
		ds->bts_interrupt_threshold =
			ds->bts_absolute_maximum - BTS_OVFL_TH;

854
		per_cpu(cpu_hw_events, cpu).ds = ds;
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
		err = 0;
	}

	if (err)
		release_bts_hardware();
	else {
		for_each_online_cpu(cpu)
			init_debug_store_on_cpu(cpu);
	}

	put_online_cpus();

	return err;
}

870
static void hw_perf_event_destroy(struct perf_event *event)
Peter Zijlstra's avatar
Peter Zijlstra committed
871
{
872
	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
Peter Zijlstra's avatar
Peter Zijlstra committed
873
		release_pmc_hardware();
874
		release_bts_hardware();
Peter Zijlstra's avatar
Peter Zijlstra committed
875
876
877
878
		mutex_unlock(&pmc_reserve_mutex);
	}
}

879
880
881
882
883
static inline int x86_pmu_initialized(void)
{
	return x86_pmu.handle_irq != NULL;
}

884
static inline int
885
set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
{
	unsigned int cache_type, cache_op, cache_result;
	u64 config, val;

	config = attr->config;

	cache_type = (config >>  0) & 0xff;
	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
		return -EINVAL;

	cache_op = (config >>  8) & 0xff;
	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
		return -EINVAL;

	cache_result = (config >> 16) & 0xff;
	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
		return -EINVAL;

	val = hw_cache_event_ids[cache_type][cache_op][cache_result];

	if (val == 0)
		return -ENOENT;

	if (val == -1)
		return -EINVAL;

	hwc->config |= val;

	return 0;
}

917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
static void intel_pmu_enable_bts(u64 config)
{
	unsigned long debugctlmsr;

	debugctlmsr = get_debugctlmsr();

	debugctlmsr |= X86_DEBUGCTL_TR;
	debugctlmsr |= X86_DEBUGCTL_BTS;
	debugctlmsr |= X86_DEBUGCTL_BTINT;

	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;

	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;

	update_debugctlmsr(debugctlmsr);
}

static void intel_pmu_disable_bts(void)
{
938
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
939
940
941
942
943
944
945
946
947
948
949
950
951
952
	unsigned long debugctlmsr;

	if (!cpuc->ds)
		return;

	debugctlmsr = get_debugctlmsr();

	debugctlmsr &=
		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);

	update_debugctlmsr(debugctlmsr);
}

953
/*
954
 * Setup the hardware configuration for a given attr_type
955
 */
956
static int __hw_perf_event_init(struct perf_event *event)
957
{
958
959
	struct perf_event_attr *attr = &event->attr;
	struct hw_perf_event *hwc = &event->hw;
960
	u64 config;
Peter Zijlstra's avatar
Peter Zijlstra committed
961
	int err;
962

963
964
	if (!x86_pmu_initialized())
		return -ENODEV;
965

Peter Zijlstra's avatar
Peter Zijlstra committed
966
	err = 0;
967
	if (!atomic_inc_not_zero(&active_events)) {
Peter Zijlstra's avatar
Peter Zijlstra committed
968
		mutex_lock(&pmc_reserve_mutex);
969
		if (atomic_read(&active_events) == 0) {
970
971
972
			if (!reserve_pmc_hardware())
				err = -EBUSY;
			else
973
				err = reserve_bts_hardware();
974
975
		}
		if (!err)
976
			atomic_inc(&active_events);
Peter Zijlstra's avatar
Peter Zijlstra committed
977
978
979
980
981
		mutex_unlock(&pmc_reserve_mutex);
	}
	if (err)
		return err;

982
	event->destroy = hw_perf_event_destroy;
983

984
	/*
985
	 * Generate PMC IRQs:
986
987
	 * (keep 'enabled' bit clear for now)
	 */
988
	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
989

990
991
	hwc->idx = -1;

992
	/*
993
	 * Count user and OS events unless requested not to.
994
	 */
995
	if (!attr->exclude_user)
996
		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
997
	if (!attr->exclude_kernel)
998
		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
999

1000
	if (!hwc->sample_period) {
1001
		hwc->sample_period = x86_pmu.max_period;
1002
		hwc->last_period = hwc->sample_period;
1003
		atomic64_set(&hwc->period_left, hwc->sample_period);
1004
1005
1006
1007
	} else {
		/*
		 * If we have a PMU initialized but no APIC
		 * interrupts, we cannot sample hardware
1008
1009
		 * events (user-space has to fall back and
		 * sample via a hrtimer based software event):
1010
1011
1012
		 */
		if (!x86_pmu.apic)
			return -EOPNOTSUPP;
1013
	}
1014

1015
	/*
1016
	 * Raw hw_event type provide the config in the hw_event structure
1017
	 */
1018
1019
	if (attr->type == PERF_TYPE_RAW) {
		hwc->config |= x86_pmu.raw_event(attr->config);
1020
		return 0;
1021
1022
	}

1023
1024
1025
1026
1027
	if (attr->type == PERF_TYPE_HW_CACHE)
		return set_ext_hw_attr(hwc, attr);

	if (attr->config >= x86_pmu.max_events)
		return -EINVAL;
1028

1029
1030
1031
	/*
	 * The generic map:
	 */
1032
1033
1034
1035
1036
1037
1038
1039
	config = x86_pmu.event_map(attr->config);

	if (config == 0)
		return -ENOENT;

	if (config == -1LL)
		return -EINVAL;

1040
1041
1042
1043
	/*
	 * Branch tracing:
	 */
	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
1044
1045
1046
1047
1048
1049
1050
1051
1052
	    (hwc->sample_period == 1)) {
		/* BTS is not supported by this architecture. */
		if (!bts_available())
			return -EOPNOTSUPP;

		/* BTS is currently only allowed for user-mode. */
		if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
			return -EOPNOTSUPP;
	}
1053

1054
	hwc->config |= config;
Peter Zijlstra's avatar
Peter Zijlstra committed
1055

1056
1057
1058
	return 0;
}

1059
1060
static void p6_pmu_disable_all(void)
{
1061
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1062
	u64 val;
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075

	if (!cpuc->enabled)
		return;

	cpuc->enabled = 0;
	barrier();

	/* p6 only has one enable register */
	rdmsrl(MSR_P6_EVNTSEL0, val);
	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
	wrmsrl(MSR_P6_EVNTSEL0, val);
}

1076
static void intel_pmu_disable_all(void)
1077
{
1078
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1079
1080
1081
1082
1083
1084
1085

	if (!cpuc->enabled)
		return;

	cpuc->enabled = 0;
	barrier();

1086
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
1087
1088
1089

	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
		intel_pmu_disable_bts();
1090
}
1091

1092
static void amd_pmu_disable_all(void)
1093
{
1094
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1095
1096
1097
1098
	int idx;

	if (!cpuc->enabled)
		return;
1099
1100

	cpuc->enabled = 0;
1101
1102
	/*
	 * ensure we write the disable before we start disabling the
1103
	 * events proper, so that amd_pmu_enable_event() does the
1104
	 * right thing.
1105
	 */
1106
	barrier();
1107

1108
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
1109
1110
		u64 val;

1111
		if (!test_bit(idx, cpuc->active_mask))
1112
			continue;
1113
		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
1114
1115
1116
1117
		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
			continue;
		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1118
1119
1120
	}
}

1121
void hw_perf_disable(void)
1122
{
1123
	if (!x86_pmu_initialized())
1124
1125
		return;
	return x86_pmu.disable_all();
1126
}
1127

1128
1129
static void p6_pmu_enable_all(void)
{
1130
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
	unsigned long val;

	if (cpuc->enabled)
		return;

	cpuc->enabled = 1;
	barrier();

	/* p6 only has one enable register */
	rdmsrl(MSR_P6_EVNTSEL0, val);
	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
	wrmsrl(MSR_P6_EVNTSEL0, val);
}

1145
static void intel_pmu_enable_all(void)
1146
{
1147
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1148
1149
1150
1151
1152
1153
1154

	if (cpuc->enabled)
		return;

	cpuc->enabled = 1;
	barrier();

1155
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1156
1157

	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1158
1159
		struct perf_event *event =
			cpuc->events[X86_PMC_IDX_FIXED_BTS];
1160

1161
		if (WARN_ON_ONCE(!event))
1162
1163
			return;

1164
		intel_pmu_enable_bts(event->hw.config);
1165
	}
1166
1167
}

1168
static void amd_pmu_enable_all(void)
1169
{
1170
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1171
1172
	int idx;

1173
	if (cpuc->enabled)
1174
1175
		return;

1176
1177
1178
	cpuc->enabled = 1;
	barrier();

1179
1180
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
		struct perf_event *event = cpuc->events[idx];
1181
		u64 val;
1182

1183
		if (!test_bit(idx, cpuc->active_mask))
1184
			continue;
1185

1186
		val = event->hw.config;
1187
1188
		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1189
1190
1191
	}
}

1192
void hw_perf_enable(void)
1193
{
1194
	if (!x86_pmu_initialized())
1195
		return;
1196
	x86_pmu.enable_all();
1197
1198
}

1199
static inline u64 intel_pmu_get_status(void)
1200
1201
1202
{
	u64 status;

1203
	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1204

1205
	return status;
1206
1207
}