perf_event.c 38 KB
Newer Older
1
/*
2
 * Performance events x86 architecture code
3
 *
4
5
6
7
8
 *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
 *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
 *  Copyright (C) 2009 Jaswinder Singh Rajput
 *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
 *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9
 *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10
 *  Copyright (C) 2009 Google, Inc., Stephane Eranian
11
12
13
14
 *
 *  For licencing details see kernel-base/COPYING
 */

15
#include <linux/perf_event.h>
16
17
18
19
#include <linux/capability.h>
#include <linux/notifier.h>
#include <linux/hardirq.h>
#include <linux/kprobes.h>
20
#include <linux/module.h>
21
22
#include <linux/kdebug.h>
#include <linux/sched.h>
23
#include <linux/uaccess.h>
24
#include <linux/slab.h>
25
#include <linux/highmem.h>
26
#include <linux/cpu.h>
27
#include <linux/bitops.h>
28
29

#include <asm/apic.h>
30
#include <asm/stacktrace.h>
Peter Zijlstra's avatar
Peter Zijlstra committed
31
#include <asm/nmi.h>
32
#include <asm/compat.h>
33

34
35
36
37
38
39
40
41
42
43
44
#if 0
#undef wrmsrl
#define wrmsrl(msr, val) 					\
do {								\
	trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
			(unsigned long)(val));			\
	native_write_msr((msr), (u32)((u64)(val)), 		\
			(u32)((u64)(val) >> 32));		\
} while (0)
#endif

45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
/*
 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
 */
static unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
{
	unsigned long offset, addr = (unsigned long)from;
	int type = in_nmi() ? KM_NMI : KM_IRQ0;
	unsigned long size, len = 0;
	struct page *page;
	void *map;
	int ret;

	do {
		ret = __get_user_pages_fast(addr, 1, 0, &page);
		if (!ret)
			break;

		offset = addr & (PAGE_SIZE - 1);
		size = min(PAGE_SIZE - offset, n - len);

		map = kmap_atomic(page, type);
		memcpy(to, map+offset, size);
		kunmap_atomic(map, type);
		put_page(page);

		len  += size;
		to   += size;
		addr += size;

	} while (len < n);

	return len;
}

80
struct event_constraint {
81
82
	union {
		unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
83
		u64		idxmsk64;
84
	};
85
86
	u64	code;
	u64	cmask;
87
	int	weight;
88
89
};

90
91
92
93
94
95
96
struct amd_nb {
	int nb_id;  /* NorthBridge id */
	int refcnt; /* reference count */
	struct perf_event *owners[X86_PMC_IDX_MAX];
	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};

97
98
#define MAX_LBR_ENTRIES		16

99
struct cpu_hw_events {
100
101
102
	/*
	 * Generic x86 PMC bits
	 */
103
	struct perf_event	*events[X86_PMC_IDX_MAX]; /* in counter order */
104
	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
105
	int			enabled;
106

107
108
109
	int			n_events;
	int			n_added;
	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
110
	u64			tags[X86_PMC_IDX_MAX];
111
	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
112

113
114
	unsigned int		group_flag;

115
116
117
118
119
120
	/*
	 * Intel DebugStore bits
	 */
	struct debug_store	*ds;
	u64			pebs_enabled;

121
122
123
124
125
126
127
128
	/*
	 * Intel LBR bits
	 */
	int				lbr_users;
	void				*lbr_context;
	struct perf_branch_stack	lbr_stack;
	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];

129
130
131
	/*
	 * AMD specific bits
	 */
132
	struct amd_nb		*amd_nb;
133
134
};

135
#define __EVENT_CONSTRAINT(c, n, m, w) {\
136
	{ .idxmsk64 = (n) },		\
137
138
	.code = (c),			\
	.cmask = (m),			\
139
	.weight = (w),			\
140
}
141

142
143
144
#define EVENT_CONSTRAINT(c, n, m)	\
	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))

145
146
147
/*
 * Constraint on the Event code.
 */
148
#define INTEL_EVENT_CONSTRAINT(c, n)	\
149
	EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
150

151
152
/*
 * Constraint on the Event code + UMask + fixed-mask
153
154
155
156
157
158
159
160
 *
 * filter mask to validate fixed counter events.
 * the following filters disqualify for fixed counters:
 *  - inv
 *  - edge
 *  - cnt-mask
 *  The other filters are supported by fixed counters.
 *  The any-thread option is supported starting with v3.
161
 */
162
#define FIXED_EVENT_CONSTRAINT(c, n)	\
163
	EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
164

165
166
167
168
169
170
/*
 * Constraint on the Event code + UMask
 */
#define PEBS_EVENT_CONSTRAINT(c, n)	\
	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)

171
172
173
174
#define EVENT_CONSTRAINT_END		\
	EVENT_CONSTRAINT(0, 0, 0)

#define for_each_event_constraint(e, c)	\
175
	for ((e) = (c); (e)->weight; (e)++)
176

177
178
179
180
181
182
183
184
185
186
187
union perf_capabilities {
	struct {
		u64	lbr_format    : 6;
		u64	pebs_trap     : 1;
		u64	pebs_arch_reg : 1;
		u64	pebs_format   : 4;
		u64	smm_freeze    : 1;
	};
	u64	capabilities;
};

188
/*
189
 * struct x86_pmu - generic x86 pmu
190
 */
191
struct x86_pmu {
192
193
194
	/*
	 * Generic x86 PMC bits
	 */
195
196
	const char	*name;
	int		version;
197
	int		(*handle_irq)(struct pt_regs *);
198
	void		(*disable_all)(void);
199
	void		(*enable_all)(int added);
200
201
	void		(*enable)(struct perf_event *);
	void		(*disable)(struct perf_event *);
202
	int		(*hw_config)(struct perf_event *event);
203
	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
204
205
	unsigned	eventsel;
	unsigned	perfctr;
206
	u64		(*event_map)(int);
207
	int		max_events;
208
209
210
211
	int		num_counters;
	int		num_counters_fixed;
	int		cntval_bits;
	u64		cntval_mask;
212
	int		apic;
213
	u64		max_period;
214
215
216
217
	struct event_constraint *
			(*get_event_constraints)(struct cpu_hw_events *cpuc,
						 struct perf_event *event);

218
219
	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
						 struct perf_event *event);
220
	struct event_constraint *event_constraints;
221
	void		(*quirks)(void);
222

223
	int		(*cpu_prepare)(int cpu);
224
225
226
	void		(*cpu_starting)(int cpu);
	void		(*cpu_dying)(int cpu);
	void		(*cpu_dead)(int cpu);
227
228
229
230

	/*
	 * Intel Arch Perfmon v2+
	 */
231
232
	u64			intel_ctrl;
	union perf_capabilities intel_cap;
233
234
235
236
237
238
239
240

	/*
	 * Intel DebugStore bits
	 */
	int		bts, pebs;
	int		pebs_record_size;
	void		(*drain_pebs)(struct pt_regs *regs);
	struct event_constraint *pebs_constraints;
241
242
243
244
245
246

	/*
	 * Intel LBR
	 */
	unsigned long	lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
	int		lbr_nr;			   /* hardware stack size */
247
248
};

249
static struct x86_pmu x86_pmu __read_mostly;
250

251
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
252
253
	.enabled = 1,
};
254

255
static int x86_perf_event_set_period(struct perf_event *event);
256

257
/*
258
 * Generalized hw caching related hw_event table, filled
259
 * in on a per model basis. A value of 0 means
260
261
 * 'not supported', -1 means 'hw_event makes no sense on
 * this CPU', any other value means the raw hw_event
262
263
264
265
266
267
268
269
270
271
 * ID.
 */

#define C(x) PERF_COUNT_HW_CACHE_##x

static u64 __read_mostly hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX];

272
/*
273
274
 * Propagate event elapsed time into the generic event.
 * Can only be executed on the CPU where the event is active.
275
276
 * Returns the delta events processed.
 */
277
static u64
278
x86_perf_event_update(struct perf_event *event)
279
{
280
	struct hw_perf_event *hwc = &event->hw;
281
	int shift = 64 - x86_pmu.cntval_bits;
282
	u64 prev_raw_count, new_raw_count;
283
	int idx = hwc->idx;
284
	s64 delta;
285

286
287
288
	if (idx == X86_PMC_IDX_FIXED_BTS)
		return 0;

289
	/*
290
	 * Careful: an NMI might modify the previous event value.
291
292
293
	 *
	 * Our tactic to handle this is to first atomically read and
	 * exchange a new raw count - then add that new-prev delta
294
	 * count to the generic event atomically:
295
296
297
	 */
again:
	prev_raw_count = atomic64_read(&hwc->prev_count);
298
	rdmsrl(hwc->event_base + idx, new_raw_count);
299
300
301
302
303
304
305
306

	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
					new_raw_count) != prev_raw_count)
		goto again;

	/*
	 * Now we have the new raw value and have updated the prev
	 * timestamp already. We can now calculate the elapsed delta
307
	 * (event-)time and add that to the generic event.
308
309
	 *
	 * Careful, not all hw sign-extends above the physical width
310
	 * of the count.
311
	 */
312
313
	delta = (new_raw_count << shift) - (prev_raw_count << shift);
	delta >>= shift;
314

315
	atomic64_add(delta, &event->count);
316
	atomic64_sub(delta, &hwc->period_left);
317
318

	return new_raw_count;
319
320
}

321
static atomic_t active_events;
Peter Zijlstra's avatar
Peter Zijlstra committed
322
323
static DEFINE_MUTEX(pmc_reserve_mutex);

324
325
#ifdef CONFIG_X86_LOCAL_APIC

Peter Zijlstra's avatar
Peter Zijlstra committed
326
327
328
329
330
331
332
static bool reserve_pmc_hardware(void)
{
	int i;

	if (nmi_watchdog == NMI_LOCAL_APIC)
		disable_lapic_nmi_watchdog();

333
	for (i = 0; i < x86_pmu.num_counters; i++) {
334
		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
Peter Zijlstra's avatar
Peter Zijlstra committed
335
336
337
			goto perfctr_fail;
	}

338
	for (i = 0; i < x86_pmu.num_counters; i++) {
339
		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
Peter Zijlstra's avatar
Peter Zijlstra committed
340
341
342
343
344
345
346
			goto eventsel_fail;
	}

	return true;

eventsel_fail:
	for (i--; i >= 0; i--)
347
		release_evntsel_nmi(x86_pmu.eventsel + i);
Peter Zijlstra's avatar
Peter Zijlstra committed
348

349
	i = x86_pmu.num_counters;
Peter Zijlstra's avatar
Peter Zijlstra committed
350
351
352

perfctr_fail:
	for (i--; i >= 0; i--)
353
		release_perfctr_nmi(x86_pmu.perfctr + i);
Peter Zijlstra's avatar
Peter Zijlstra committed
354
355
356
357
358
359
360
361
362
363
364

	if (nmi_watchdog == NMI_LOCAL_APIC)
		enable_lapic_nmi_watchdog();

	return false;
}

static void release_pmc_hardware(void)
{
	int i;

365
	for (i = 0; i < x86_pmu.num_counters; i++) {
366
367
		release_perfctr_nmi(x86_pmu.perfctr + i);
		release_evntsel_nmi(x86_pmu.eventsel + i);
Peter Zijlstra's avatar
Peter Zijlstra committed
368
369
370
371
372
373
	}

	if (nmi_watchdog == NMI_LOCAL_APIC)
		enable_lapic_nmi_watchdog();
}

374
375
376
377
378
379
380
#else

static bool reserve_pmc_hardware(void) { return true; }
static void release_pmc_hardware(void) {}

#endif

381
382
static int reserve_ds_buffers(void);
static void release_ds_buffers(void);
383

384
static void hw_perf_event_destroy(struct perf_event *event)
Peter Zijlstra's avatar
Peter Zijlstra committed
385
{
386
	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
Peter Zijlstra's avatar
Peter Zijlstra committed
387
		release_pmc_hardware();
388
		release_ds_buffers();
Peter Zijlstra's avatar
Peter Zijlstra committed
389
390
391
392
		mutex_unlock(&pmc_reserve_mutex);
	}
}

393
394
395
396
397
static inline int x86_pmu_initialized(void)
{
	return x86_pmu.handle_irq != NULL;
}

398
static inline int
399
set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
{
	unsigned int cache_type, cache_op, cache_result;
	u64 config, val;

	config = attr->config;

	cache_type = (config >>  0) & 0xff;
	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
		return -EINVAL;

	cache_op = (config >>  8) & 0xff;
	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
		return -EINVAL;

	cache_result = (config >> 16) & 0xff;
	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
		return -EINVAL;

	val = hw_cache_event_ids[cache_type][cache_op][cache_result];

	if (val == 0)
		return -ENOENT;

	if (val == -1)
		return -EINVAL;

	hwc->config |= val;

	return 0;
}

431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
static int x86_setup_perfctr(struct perf_event *event)
{
	struct perf_event_attr *attr = &event->attr;
	struct hw_perf_event *hwc = &event->hw;
	u64 config;

	if (!hwc->sample_period) {
		hwc->sample_period = x86_pmu.max_period;
		hwc->last_period = hwc->sample_period;
		atomic64_set(&hwc->period_left, hwc->sample_period);
	} else {
		/*
		 * If we have a PMU initialized but no APIC
		 * interrupts, we cannot sample hardware
		 * events (user-space has to fall back and
		 * sample via a hrtimer based software event):
		 */
		if (!x86_pmu.apic)
			return -EOPNOTSUPP;
	}

	if (attr->type == PERF_TYPE_RAW)
		return 0;

	if (attr->type == PERF_TYPE_HW_CACHE)
		return set_ext_hw_attr(hwc, attr);

	if (attr->config >= x86_pmu.max_events)
		return -EINVAL;

	/*
	 * The generic map:
	 */
	config = x86_pmu.event_map(attr->config);

	if (config == 0)
		return -ENOENT;

	if (config == -1LL)
		return -EINVAL;

	/*
	 * Branch tracing:
	 */
	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
	    (hwc->sample_period == 1)) {
		/* BTS is not supported by this architecture. */
		if (!x86_pmu.bts)
			return -EOPNOTSUPP;

		/* BTS is currently only allowed for user-mode. */
		if (!attr->exclude_kernel)
			return -EOPNOTSUPP;
	}

	hwc->config |= config;

	return 0;
}
490

491
static int x86_pmu_hw_config(struct perf_event *event)
492
{
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
	if (event->attr.precise_ip) {
		int precise = 0;

		/* Support for constant skid */
		if (x86_pmu.pebs)
			precise++;

		/* Support for IP fixup */
		if (x86_pmu.lbr_nr)
			precise++;

		if (event->attr.precise_ip > precise)
			return -EOPNOTSUPP;
	}

508
509
510
511
	/*
	 * Generate PMC IRQs:
	 * (keep 'enabled' bit clear for now)
	 */
512
	event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
513
514
515
516

	/*
	 * Count user and OS events unless requested not to
	 */
517
518
519
520
	if (!event->attr.exclude_user)
		event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
	if (!event->attr.exclude_kernel)
		event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
521

522
523
	if (event->attr.type == PERF_TYPE_RAW)
		event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
524

525
	return x86_setup_perfctr(event);
526
527
}

528
/*
529
 * Setup the hardware configuration for a given attr_type
530
 */
531
static int __hw_perf_event_init(struct perf_event *event)
532
{
Peter Zijlstra's avatar
Peter Zijlstra committed
533
	int err;
534

535
536
	if (!x86_pmu_initialized())
		return -ENODEV;
537

Peter Zijlstra's avatar
Peter Zijlstra committed
538
	err = 0;
539
	if (!atomic_inc_not_zero(&active_events)) {
Peter Zijlstra's avatar
Peter Zijlstra committed
540
		mutex_lock(&pmc_reserve_mutex);
541
		if (atomic_read(&active_events) == 0) {
542
543
			if (!reserve_pmc_hardware())
				err = -EBUSY;
544
			else {
545
				err = reserve_ds_buffers();
546
547
548
				if (err)
					release_pmc_hardware();
			}
549
550
		}
		if (!err)
551
			atomic_inc(&active_events);
Peter Zijlstra's avatar
Peter Zijlstra committed
552
553
554
555
556
		mutex_unlock(&pmc_reserve_mutex);
	}
	if (err)
		return err;

557
	event->destroy = hw_perf_event_destroy;
558

559
560
561
	event->hw.idx = -1;
	event->hw.last_cpu = -1;
	event->hw.last_tag = ~0ULL;
562

563
	return x86_pmu.hw_config(event);
564
565
}

566
static void x86_pmu_disable_all(void)
567
{
568
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
569
570
	int idx;

571
	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
572
573
		u64 val;

574
		if (!test_bit(idx, cpuc->active_mask))
575
			continue;
576
		rdmsrl(x86_pmu.eventsel + idx, val);
577
		if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
578
			continue;
579
		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
580
		wrmsrl(x86_pmu.eventsel + idx, val);
581
582
583
	}
}

584
void hw_perf_disable(void)
585
{
586
587
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);

588
	if (!x86_pmu_initialized())
589
		return;
590

591
592
593
594
595
596
	if (!cpuc->enabled)
		return;

	cpuc->n_added = 0;
	cpuc->enabled = 0;
	barrier();
597
598

	x86_pmu.disable_all();
599
}
600

601
static void x86_pmu_enable_all(int added)
602
{
603
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
604
605
	int idx;

606
	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
607
		struct perf_event *event = cpuc->events[idx];
608
		u64 val;
609

610
		if (!test_bit(idx, cpuc->active_mask))
611
			continue;
612

613
		val = event->hw.config;
614
		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
615
		wrmsrl(x86_pmu.eventsel + idx, val);
616
617
618
	}
}

619
620
621
622
623
624
625
626
627
static const struct pmu pmu;

static inline int is_x86_event(struct perf_event *event)
{
	return event->pmu == &pmu;
}

static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
628
	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
629
	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
630
	int i, j, w, wmax, num = 0;
631
632
633
634
635
	struct hw_perf_event *hwc;

	bitmap_zero(used_mask, X86_PMC_IDX_MAX);

	for (i = 0; i < n; i++) {
636
637
		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
		constraints[i] = c;
638
639
	}

640
641
642
	/*
	 * fastpath, try to reuse previous register
	 */
643
	for (i = 0; i < n; i++) {
644
		hwc = &cpuc->event_list[i]->hw;
645
		c = constraints[i];
646
647
648
649
650
651

		/* never assigned */
		if (hwc->idx == -1)
			break;

		/* constraint still honored */
652
		if (!test_bit(hwc->idx, c->idxmsk))
653
654
655
656
657
658
			break;

		/* not already used */
		if (test_bit(hwc->idx, used_mask))
			break;

659
		__set_bit(hwc->idx, used_mask);
660
661
662
		if (assign)
			assign[i] = hwc->idx;
	}
663
	if (i == n)
664
665
666
667
668
669
670
671
		goto done;

	/*
	 * begin slow path
	 */

	bitmap_zero(used_mask, X86_PMC_IDX_MAX);

672
673
674
675
676
677
678
679
680
	/*
	 * weight = number of possible counters
	 *
	 * 1    = most constrained, only works on one counter
	 * wmax = least constrained, works on any counter
	 *
	 * assign events to counters starting with most
	 * constrained events.
	 */
681
	wmax = x86_pmu.num_counters;
682
683
684
685
686
687

	/*
	 * when fixed event counters are present,
	 * wmax is incremented by 1 to account
	 * for one more choice
	 */
688
	if (x86_pmu.num_counters_fixed)
689
690
		wmax++;

691
	for (w = 1, num = n; num && w <= wmax; w++) {
692
		/* for each event */
693
		for (i = 0; num && i < n; i++) {
694
			c = constraints[i];
695
696
			hwc = &cpuc->event_list[i]->hw;

697
			if (c->weight != w)
698
699
				continue;

700
			for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
701
702
703
704
705
706
707
				if (!test_bit(j, used_mask))
					break;
			}

			if (j == X86_PMC_IDX_MAX)
				break;

708
			__set_bit(j, used_mask);
709

710
711
712
713
714
			if (assign)
				assign[i] = j;
			num--;
		}
	}
715
done:
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
	/*
	 * scheduling failed or is just a simulation,
	 * free resources if necessary
	 */
	if (!assign || num) {
		for (i = 0; i < n; i++) {
			if (x86_pmu.put_event_constraints)
				x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
		}
	}
	return num ? -ENOSPC : 0;
}

/*
 * dogrp: true if must collect siblings events (group)
 * returns total number of events and error code
 */
static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
{
	struct perf_event *event;
	int n, max_count;

738
	max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753

	/* current number of events already accepted */
	n = cpuc->n_events;

	if (is_x86_event(leader)) {
		if (n >= max_count)
			return -ENOSPC;
		cpuc->event_list[n] = leader;
		n++;
	}
	if (!dogrp)
		return n;

	list_for_each_entry(event, &leader->sibling_list, group_entry) {
		if (!is_x86_event(event) ||
754
		    event->state <= PERF_EVENT_STATE_OFF)
755
756
757
758
759
760
761
762
763
764
765
766
			continue;

		if (n >= max_count)
			return -ENOSPC;

		cpuc->event_list[n] = event;
		n++;
	}
	return n;
}

static inline void x86_assign_hw_event(struct perf_event *event,
767
				struct cpu_hw_events *cpuc, int i)
768
{
769
770
771
772
773
	struct hw_perf_event *hwc = &event->hw;

	hwc->idx = cpuc->assign[i];
	hwc->last_cpu = smp_processor_id();
	hwc->last_tag = ++cpuc->tags[i];
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791

	if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
		hwc->config_base = 0;
		hwc->event_base	= 0;
	} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
		/*
		 * We set it so that event_base + idx in wrmsr/rdmsr maps to
		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
		 */
		hwc->event_base =
			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
	} else {
		hwc->config_base = x86_pmu.eventsel;
		hwc->event_base  = x86_pmu.perfctr;
	}
}

792
793
794
795
796
797
798
799
800
static inline int match_prev_assignment(struct hw_perf_event *hwc,
					struct cpu_hw_events *cpuc,
					int i)
{
	return hwc->idx == cpuc->assign[i] &&
		hwc->last_cpu == smp_processor_id() &&
		hwc->last_tag == cpuc->tags[i];
}

Peter Zijlstra's avatar
Peter Zijlstra committed
801
static int x86_pmu_start(struct perf_event *event);
802
static void x86_pmu_stop(struct perf_event *event);
803

804
void hw_perf_enable(void)
805
{
806
807
808
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct perf_event *event;
	struct hw_perf_event *hwc;
809
	int i, added = cpuc->n_added;
810

811
	if (!x86_pmu_initialized())
812
		return;
813
814
815
816

	if (cpuc->enabled)
		return;

817
	if (cpuc->n_added) {
818
		int n_running = cpuc->n_events - cpuc->n_added;
819
820
821
822
823
824
825
		/*
		 * apply assignment obtained either from
		 * hw_perf_group_sched_in() or x86_pmu_enable()
		 *
		 * step1: save events moving to new counters
		 * step2: reprogram moved events into new counters
		 */
826
		for (i = 0; i < n_running; i++) {
827
828
829
			event = cpuc->event_list[i];
			hwc = &event->hw;

830
831
832
833
834
835
836
837
			/*
			 * we can avoid reprogramming counter if:
			 * - assigned same counter as last time
			 * - running on same CPU as last time
			 * - no other event has used the counter since
			 */
			if (hwc->idx == -1 ||
			    match_prev_assignment(hwc, cpuc, i))
838
839
				continue;

840
			x86_pmu_stop(event);
841
842
843
844
845
846
		}

		for (i = 0; i < cpuc->n_events; i++) {
			event = cpuc->event_list[i];
			hwc = &event->hw;

847
			if (!match_prev_assignment(hwc, cpuc, i))
848
				x86_assign_hw_event(event, cpuc, i);
849
850
			else if (i < n_running)
				continue;
851

Peter Zijlstra's avatar
Peter Zijlstra committed
852
			x86_pmu_start(event);
853
854
855
856
		}
		cpuc->n_added = 0;
		perf_events_lapic_init();
	}
857
858
859
860

	cpuc->enabled = 1;
	barrier();

861
	x86_pmu.enable_all(added);
862
863
}

864
865
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
					  u64 enable_mask)
866
{
867
	wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
868
869
}

870
static inline void x86_pmu_disable_event(struct perf_event *event)
871
{
872
	struct hw_perf_event *hwc = &event->hw;
873
874

	wrmsrl(hwc->config_base + hwc->idx, hwc->config);
875
876
}

877
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
878

879
880
/*
 * Set the next IRQ period, based on the hwc->period_left value.
881
 * To be called with the event disabled in hw:
882
 */
883
static int
884
x86_perf_event_set_period(struct perf_event *event)
885
{
886
	struct hw_perf_event *hwc = &event->hw;
887
	s64 left = atomic64_read(&hwc->period_left);
888
	s64 period = hwc->sample_period;
889
	int ret = 0, idx = hwc->idx;
890

891
892
893
	if (idx == X86_PMC_IDX_FIXED_BTS)
		return 0;

894
	/*
895
	 * If we are way outside a reasonable range then just skip forward:
896
897
898
899
	 */
	if (unlikely(left <= -period)) {
		left = period;
		atomic64_set(&hwc->period_left, left);
900
		hwc->last_period = period;
901
		ret = 1;
902
903
904
905
906
	}

	if (unlikely(left <= 0)) {
		left += period;
		atomic64_set(&hwc->period_left, left);
907
		hwc->last_period = period;
908
		ret = 1;
909
	}
910
	/*
911
	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
912
913
914
	 */
	if (unlikely(left < 2))
		left = 2;
915

916
917
918
	if (left > x86_pmu.max_period)
		left = x86_pmu.max_period;

919
	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
920
921

	/*
922
	 * The hw event starts counting from this event offset,
923
924
	 * mark it to be able to extra future deltas:
	 */
925
	atomic64_set(&hwc->prev_count, (u64)-left);
926

927
	wrmsrl(hwc->event_base + idx,
928
			(u64)(-left) & x86_pmu.cntval_mask);
929

930
	perf_event_update_userpage(event);
931

932
	return ret;
933
934
}

935
static void x86_pmu_enable_event(struct perf_event *event)
936
{
937
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
938
	if (cpuc->enabled)
939
940
		__x86_pmu_enable_event(&event->hw,
				       ARCH_PERFMON_EVENTSEL_ENABLE);
941
942
}

943
/*
944
945
946
947
948
949
950
 * activate a single event
 *
 * The event is added to the group of enabled events
 * but only if it can be scehduled with existing events.
 *
 * Called with PMU disabled. If successful and return value 1,
 * then guaranteed to call perf_enable() and hw_perf_enable()
951
952
953
954
 */
static int x86_pmu_enable(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
955
956
957
	struct hw_perf_event *hwc;
	int assign[X86_PMC_IDX_MAX];
	int n, n0, ret;
958

959
	hwc = &event->hw;
960

961
962
963
964
	n0 = cpuc->n_events;
	n = collect_events(cpuc, event, false);
	if (n < 0)
		return n;
965

966
967
968
969
970
971
972
973
	/*
	 * If group events scheduling transaction was started,
	 * skip the schedulability test here, it will be peformed
	 * at commit time(->commit_txn) as a whole
	 */
	if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
		goto out;

974
	ret = x86_pmu.schedule_events(cpuc, n, assign);
975
976
977
978
979
980
981
	if (ret)
		return ret;
	/*
	 * copy new assignment, now we know it is possible
	 * will be used by hw_perf_enable()
	 */
	memcpy(cpuc->assign, assign, n*sizeof(int));
982

983
out:
984
	cpuc->n_events = n;
985
	cpuc->n_added += n - n0;
986
987

	return 0;
988
989
}

990
991
static int x86_pmu_start(struct perf_event *event)
{
Peter Zijlstra's avatar
Peter Zijlstra committed
992
993
994
995
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	int idx = event->hw.idx;

	if (idx == -1)
996
997
		return -EAGAIN;

998
	x86_perf_event_set_period(event);
Peter Zijlstra's avatar
Peter Zijlstra committed
999
1000
	cpuc->events[idx] = event;
	__set_bit(idx, cpuc->active_mask);
1001
	x86_pmu.enable(event);
Peter Zijlstra's avatar
Peter Zijlstra committed
1002
	perf_event_update_userpage(event);
1003
1004
1005
1006

	return 0;
}

1007
static void x86_pmu_unthrottle(struct perf_event *event)
1008
{
1009
1010
	int ret = x86_pmu_start(event);
	WARN_ON_ONCE(ret);
1011
1012
}

1013
void perf_event_print_debug(void)
1014
{
1015
	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1016
	u64 pebs;
1017
	struct cpu_hw_events *cpuc;
1018
	unsigned long flags;
1019
1020
	int cpu, idx;

1021
	if (!x86_pmu.num_counters)
1022
		return;
1023

1024
	local_irq_save(flags);
1025
1026

	cpu = smp_processor_id();
1027
	cpuc = &per_cpu(cpu_hw_events, cpu);
1028

1029
	if (x86_pmu.version >= 2) {
1030
1031
1032
1033
		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1034
		rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
1035
1036
1037
1038
1039
1040

		pr_info("\n");
		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
1041
		pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
1042
	}
1043
	pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1044

1045
	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1046
1047
		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
1048

1049
		prev_left = per_cpu(pmc_prev_left[idx], cpu);
1050

1051
		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
1052
			cpu, idx, pmc_ctrl);
1053
		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
1054
			cpu, idx, pmc_count);
1055
		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
1056
			cpu, idx, prev_left);
1057
	}
1058
	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1059
1060
		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);