booke.c 44.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License, version 2, as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 *
 * Copyright IBM Corp. 2007
16
 * Copyright 2010-2011 Freescale Semiconductor, Inc.
17
18
19
 *
 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
 *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
20
21
 *          Scott Wood <scottwood@freescale.com>
 *          Varun Sethi <varun.sethi@freescale.com>
22
23
24
25
26
 */

#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
27
#include <linux/gfp.h>
28
29
30
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
31

32
33
34
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
35
#include <asm/cacheflush.h>
36
37
38
#include <asm/dbell.h>
#include <asm/hw_irq.h>
#include <asm/irq.h>
39
#include <asm/time.h>
40

41
#include "timing.h"
42
#include "booke.h"
43
#include "trace.h"
44

45
46
unsigned long kvmppc_booke_handlers;

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU

struct kvm_stats_debugfs_item debugfs_entries[] = {
	{ "mmio",       VCPU_STAT(mmio_exits) },
	{ "dcr",        VCPU_STAT(dcr_exits) },
	{ "sig",        VCPU_STAT(signal_exits) },
	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
	{ "dtlb_r",     VCPU_STAT(dtlb_real_miss_exits) },
	{ "dtlb_v",     VCPU_STAT(dtlb_virt_miss_exits) },
	{ "sysc",       VCPU_STAT(syscall_exits) },
	{ "isi",        VCPU_STAT(isi_exits) },
	{ "dsi",        VCPU_STAT(dsi_exits) },
	{ "inst_emu",   VCPU_STAT(emulated_inst_exits) },
	{ "dec",        VCPU_STAT(dec_exits) },
	{ "ext_intr",   VCPU_STAT(ext_intr_exits) },
64
	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
65
66
	{ "doorbell", VCPU_STAT(dbell_exits) },
	{ "guest doorbell", VCPU_STAT(gdbell_exits) },
67
	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
68
69
70
71
72
73
74
75
	{ NULL }
};

/* TODO: use vcpu_printf() */
void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
{
	int i;

76
	printk("pc:   %08lx msr:  %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr);
77
	printk("lr:   %08lx ctr:  %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
78
79
	printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0,
					    vcpu->arch.shared->srr1);
80
81
82
83

	printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);

	for (i = 0; i < 32; i += 4) {
84
		printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
85
86
87
88
		       kvmppc_get_gpr(vcpu, i),
		       kvmppc_get_gpr(vcpu, i+1),
		       kvmppc_get_gpr(vcpu, i+2),
		       kvmppc_get_gpr(vcpu, i+3));
89
90
91
	}
}

92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#ifdef CONFIG_SPE
void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu)
{
	preempt_disable();
	enable_kernel_spe();
	kvmppc_save_guest_spe(vcpu);
	vcpu->arch.shadow_msr &= ~MSR_SPE;
	preempt_enable();
}

static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu)
{
	preempt_disable();
	enable_kernel_spe();
	kvmppc_load_guest_spe(vcpu);
	vcpu->arch.shadow_msr |= MSR_SPE;
	preempt_enable();
}

static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
{
	if (vcpu->arch.shared->msr & MSR_SPE) {
		if (!(vcpu->arch.shadow_msr & MSR_SPE))
			kvmppc_vcpu_enable_spe(vcpu);
	} else if (vcpu->arch.shadow_msr & MSR_SPE) {
		kvmppc_vcpu_disable_spe(vcpu);
	}
}
#else
static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
{
}
#endif

126
127
128
129
130
131
132
133
134
135
static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
{
#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
	/* We always treat the FP bit as enabled from the host
	   perspective, so only need to adjust the shadow MSR */
	vcpu->arch.shadow_msr &= ~MSR_FP;
	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_FP;
#endif
}

136
137
138
139
/*
 * Helper function for "full" MSR writes.  No need to call this if only
 * EE/CE/ME/DE/RI are changing.
 */
140
141
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
{
142
	u32 old_msr = vcpu->arch.shared->msr;
143

144
145
146
147
#ifdef CONFIG_KVM_BOOKE_HV
	new_msr |= MSR_GS;
#endif

148
149
	vcpu->arch.shared->msr = new_msr;

150
	kvmppc_mmu_msr_notify(vcpu, old_msr);
151
	kvmppc_vcpu_sync_spe(vcpu);
152
	kvmppc_vcpu_sync_fpu(vcpu);
153
154
}

155
156
static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
                                       unsigned int priority)
157
{
158
	trace_kvm_booke_queue_irqprio(vcpu, priority);
159
160
161
	set_bit(priority, &vcpu->arch.pending_exceptions);
}

162
163
static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
                                        ulong dear_flags, ulong esr_flags)
164
{
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
	vcpu->arch.queued_dear = dear_flags;
	vcpu->arch.queued_esr = esr_flags;
	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
}

static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
                                           ulong dear_flags, ulong esr_flags)
{
	vcpu->arch.queued_dear = dear_flags;
	vcpu->arch.queued_esr = esr_flags;
	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
}

static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
                                           ulong esr_flags)
{
	vcpu->arch.queued_esr = esr_flags;
	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
}

185
186
187
188
189
190
191
192
static void kvmppc_core_queue_alignment(struct kvm_vcpu *vcpu, ulong dear_flags,
					ulong esr_flags)
{
	vcpu->arch.queued_dear = dear_flags;
	vcpu->arch.queued_esr = esr_flags;
	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALIGNMENT);
}

193
194
195
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
{
	vcpu->arch.queued_esr = esr_flags;
196
	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
197
198
199
200
}

void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
{
201
	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
202
203
204
205
}

int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
{
206
	return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
207
208
}

209
210
211
212
213
void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
{
	clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
}

214
215
216
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                struct kvm_interrupt *irq)
{
217
218
219
220
221
222
	unsigned int prio = BOOKE_IRQPRIO_EXTERNAL;

	if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
		prio = BOOKE_IRQPRIO_EXTERNAL_LEVEL;

	kvmppc_booke_queue_irqprio(vcpu, prio);
223
224
}

225
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
226
227
{
	clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
228
	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
229
230
}

231
232
233
234
235
236
237
238
239
240
static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
{
	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
}

static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
{
	clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
}

241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
{
#ifdef CONFIG_KVM_BOOKE_HV
	mtspr(SPRN_GSRR0, srr0);
	mtspr(SPRN_GSRR1, srr1);
#else
	vcpu->arch.shared->srr0 = srr0;
	vcpu->arch.shared->srr1 = srr1;
#endif
}

static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
{
	vcpu->arch.csrr0 = srr0;
	vcpu->arch.csrr1 = srr1;
}

static void set_guest_dsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
{
	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) {
		vcpu->arch.dsrr0 = srr0;
		vcpu->arch.dsrr1 = srr1;
	} else {
		set_guest_csrr(vcpu, srr0, srr1);
	}
}

static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
{
	vcpu->arch.mcsrr0 = srr0;
	vcpu->arch.mcsrr1 = srr1;
}

static unsigned long get_guest_dear(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_KVM_BOOKE_HV
	return mfspr(SPRN_GDEAR);
#else
	return vcpu->arch.shared->dar;
#endif
}

static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear)
{
#ifdef CONFIG_KVM_BOOKE_HV
	mtspr(SPRN_GDEAR, dear);
#else
	vcpu->arch.shared->dar = dear;
#endif
}

static unsigned long get_guest_esr(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_KVM_BOOKE_HV
	return mfspr(SPRN_GESR);
#else
	return vcpu->arch.shared->esr;
#endif
}

static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr)
{
#ifdef CONFIG_KVM_BOOKE_HV
	mtspr(SPRN_GESR, esr);
#else
	vcpu->arch.shared->esr = esr;
#endif
}

310
311
312
313
314
315
316
317
318
static unsigned long get_guest_epr(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_KVM_BOOKE_HV
	return mfspr(SPRN_GEPR);
#else
	return vcpu->arch.epr;
#endif
}

319
320
321
/* Deliver the interrupt of the corresponding priority, if possible. */
static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
                                        unsigned int priority)
322
{
323
	int allowed = 0;
324
	ulong msr_mask = 0;
325
	bool update_esr = false, update_dear = false, update_epr = false;
326
327
328
	ulong crit_raw = vcpu->arch.shared->critical;
	ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
	bool crit;
329
	bool keep_irq = false;
330
	enum int_class int_class;
331
	ulong new_msr = vcpu->arch.shared->msr;
332
333
334
335
336
337
338
339
340
341
342

	/* Truncate crit indicators in 32 bit mode */
	if (!(vcpu->arch.shared->msr & MSR_SF)) {
		crit_raw &= 0xffffffff;
		crit_r1 &= 0xffffffff;
	}

	/* Critical section when crit == r1 */
	crit = (crit_raw == crit_r1);
	/* ... and we're in supervisor mode */
	crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
343

344
345
346
347
348
	if (priority == BOOKE_IRQPRIO_EXTERNAL_LEVEL) {
		priority = BOOKE_IRQPRIO_EXTERNAL;
		keep_irq = true;
	}

349
350
351
	if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled)
		update_epr = true;

352
353
354
	switch (priority) {
	case BOOKE_IRQPRIO_DTLB_MISS:
	case BOOKE_IRQPRIO_DATA_STORAGE:
355
	case BOOKE_IRQPRIO_ALIGNMENT:
356
357
		update_dear = true;
		/* fall through */
358
	case BOOKE_IRQPRIO_INST_STORAGE:
359
360
361
362
363
	case BOOKE_IRQPRIO_PROGRAM:
		update_esr = true;
		/* fall through */
	case BOOKE_IRQPRIO_ITLB_MISS:
	case BOOKE_IRQPRIO_SYSCALL:
364
	case BOOKE_IRQPRIO_FP_UNAVAIL:
365
366
367
	case BOOKE_IRQPRIO_SPE_UNAVAIL:
	case BOOKE_IRQPRIO_SPE_FP_DATA:
	case BOOKE_IRQPRIO_SPE_FP_ROUND:
368
369
	case BOOKE_IRQPRIO_AP_UNAVAIL:
		allowed = 1;
370
		msr_mask = MSR_CE | MSR_ME | MSR_DE;
371
		int_class = INT_CLASS_NONCRIT;
372
		break;
373
	case BOOKE_IRQPRIO_WATCHDOG:
374
	case BOOKE_IRQPRIO_CRITICAL:
375
	case BOOKE_IRQPRIO_DBELL_CRIT:
376
		allowed = vcpu->arch.shared->msr & MSR_CE;
377
		allowed = allowed && !crit;
378
		msr_mask = MSR_ME;
379
		int_class = INT_CLASS_CRIT;
380
		break;
381
	case BOOKE_IRQPRIO_MACHINE_CHECK:
382
		allowed = vcpu->arch.shared->msr & MSR_ME;
383
384
		allowed = allowed && !crit;
		int_class = INT_CLASS_MC;
385
		break;
386
387
	case BOOKE_IRQPRIO_DECREMENTER:
	case BOOKE_IRQPRIO_FIT:
388
389
390
		keep_irq = true;
		/* fall through */
	case BOOKE_IRQPRIO_EXTERNAL:
391
	case BOOKE_IRQPRIO_DBELL:
392
		allowed = vcpu->arch.shared->msr & MSR_EE;
393
		allowed = allowed && !crit;
394
		msr_mask = MSR_CE | MSR_ME | MSR_DE;
395
		int_class = INT_CLASS_NONCRIT;
396
		break;
397
	case BOOKE_IRQPRIO_DEBUG:
398
		allowed = vcpu->arch.shared->msr & MSR_DE;
399
		allowed = allowed && !crit;
400
		msr_mask = MSR_ME;
401
		int_class = INT_CLASS_CRIT;
402
403
404
		break;
	}

405
	if (allowed) {
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
		switch (int_class) {
		case INT_CLASS_NONCRIT:
			set_guest_srr(vcpu, vcpu->arch.pc,
				      vcpu->arch.shared->msr);
			break;
		case INT_CLASS_CRIT:
			set_guest_csrr(vcpu, vcpu->arch.pc,
				       vcpu->arch.shared->msr);
			break;
		case INT_CLASS_DBG:
			set_guest_dsrr(vcpu, vcpu->arch.pc,
				       vcpu->arch.shared->msr);
			break;
		case INT_CLASS_MC:
			set_guest_mcsrr(vcpu, vcpu->arch.pc,
					vcpu->arch.shared->msr);
			break;
		}

425
		vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
426
		if (update_esr == true)
427
			set_guest_esr(vcpu, vcpu->arch.queued_esr);
428
		if (update_dear == true)
429
			set_guest_dear(vcpu, vcpu->arch.queued_dear);
430
431
		if (update_epr == true)
			kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
432
433
434
435
436
437
438

		new_msr &= msr_mask;
#if defined(CONFIG_64BIT)
		if (vcpu->arch.epcr & SPRN_EPCR_ICM)
			new_msr |= MSR_CM;
#endif
		kvmppc_set_msr(vcpu, new_msr);
439

440
441
		if (!keep_irq)
			clear_bit(priority, &vcpu->arch.pending_exceptions);
442
443
	}

444
445
446
447
448
449
450
451
452
453
454
455
456
457
#ifdef CONFIG_KVM_BOOKE_HV
	/*
	 * If an interrupt is pending but masked, raise a guest doorbell
	 * so that we are notified when the guest enables the relevant
	 * MSR bit.
	 */
	if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_EE)
		kvmppc_set_pending_interrupt(vcpu, INT_CLASS_NONCRIT);
	if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_CE)
		kvmppc_set_pending_interrupt(vcpu, INT_CLASS_CRIT);
	if (vcpu->arch.pending_exceptions & BOOKE_IRQPRIO_MACHINE_CHECK)
		kvmppc_set_pending_interrupt(vcpu, INT_CLASS_MC);
#endif

458
	return allowed;
459
460
}

461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
/*
 * Return the number of jiffies until the next timeout.  If the timeout is
 * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
 * because the larger value can break the timer APIs.
 */
static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
{
	u64 tb, wdt_tb, wdt_ticks = 0;
	u64 nr_jiffies = 0;
	u32 period = TCR_GET_WP(vcpu->arch.tcr);

	wdt_tb = 1ULL << (63 - period);
	tb = get_tb();
	/*
	 * The watchdog timeout will hapeen when TB bit corresponding
	 * to watchdog will toggle from 0 to 1.
	 */
	if (tb & wdt_tb)
		wdt_ticks = wdt_tb;

	wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));

	/* Convert timebase ticks to jiffies */
	nr_jiffies = wdt_ticks;

	if (do_div(nr_jiffies, tb_ticks_per_jiffy))
		nr_jiffies++;

	return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
}

static void arm_next_watchdog(struct kvm_vcpu *vcpu)
{
	unsigned long nr_jiffies;
	unsigned long flags;

	/*
	 * If TSR_ENW and TSR_WIS are not set then no need to exit to
	 * userspace, so clear the KVM_REQ_WATCHDOG request.
	 */
	if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS))
		clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);

	spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
	nr_jiffies = watchdog_next_timeout(vcpu);
	/*
	 * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
	 * then do not run the watchdog timer as this can break timer APIs.
	 */
	if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
		mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
	else
		del_timer(&vcpu->arch.wdt_timer);
	spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
}

void kvmppc_watchdog_func(unsigned long data)
{
	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
	u32 tsr, new_tsr;
	int final;

	do {
		new_tsr = tsr = vcpu->arch.tsr;
		final = 0;

		/* Time out event */
		if (tsr & TSR_ENW) {
			if (tsr & TSR_WIS)
				final = 1;
			else
				new_tsr = tsr | TSR_WIS;
		} else {
			new_tsr = tsr | TSR_ENW;
		}
	} while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);

	if (new_tsr & TSR_WIS) {
		smp_wmb();
		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
		kvm_vcpu_kick(vcpu);
	}

	/*
	 * If this is final watchdog expiry and some action is required
	 * then exit to userspace.
	 */
	if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
	    vcpu->arch.watchdog_enabled) {
		smp_wmb();
		kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
		kvm_vcpu_kick(vcpu);
	}

	/*
	 * Stop running the watchdog timer after final expiration to
	 * prevent the host from being flooded with timers if the
	 * guest sets a short period.
	 * Timers will resume when TSR/TCR is updated next time.
	 */
	if (!final)
		arm_next_watchdog(vcpu);
}

565
566
567
568
569
570
static void update_timer_ints(struct kvm_vcpu *vcpu)
{
	if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
		kvmppc_core_queue_dec(vcpu);
	else
		kvmppc_core_dequeue_dec(vcpu);
571
572
573
574
575

	if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
		kvmppc_core_queue_watchdog(vcpu);
	else
		kvmppc_core_dequeue_watchdog(vcpu);
576
577
}

578
static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
579
580
581
582
{
	unsigned long *pending = &vcpu->arch.pending_exceptions;
	unsigned int priority;

583
	priority = __ffs(*pending);
584
	while (priority < BOOKE_IRQPRIO_MAX) {
585
		if (kvmppc_booke_irqprio_deliver(vcpu, priority))
586
587
588
589
590
591
			break;

		priority = find_next_bit(pending,
		                         BITS_PER_BYTE * sizeof(*pending),
		                         priority + 1);
	}
592
593

	/* Tell the guest about our interrupt status */
594
	vcpu->arch.shared->int_pending = !!*pending;
595
596
}

597
/* Check pending exceptions and deliver one, if possible. */
598
int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
599
{
600
	int r = 0;
601
602
603
604
	WARN_ON_ONCE(!irqs_disabled());

	kvmppc_core_check_exceptions(vcpu);

605
606
607
608
609
	if (vcpu->requests) {
		/* Exception delivery raised request; start over */
		return 1;
	}

610
611
612
	if (vcpu->arch.shared->msr & MSR_WE) {
		local_irq_enable();
		kvm_vcpu_block(vcpu);
613
		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
614
615
616
		local_irq_disable();

		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
617
		r = 1;
618
	};
619
620
621
622

	return r;
}

623
int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
624
{
625
626
	int r = 1; /* Indicate we want to get back into the guest */

627
628
	if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
		update_timer_ints(vcpu);
629
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
630
631
	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
		kvmppc_core_flush_tlb(vcpu);
632
#endif
633

634
635
636
637
638
	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
		vcpu->run->exit_reason = KVM_EXIT_WATCHDOG;
		r = 0;
	}

639
640
641
642
643
644
645
	if (kvm_check_request(KVM_REQ_EPR_EXIT, vcpu)) {
		vcpu->run->epr.epr = 0;
		vcpu->arch.epr_needed = true;
		vcpu->run->exit_reason = KVM_EXIT_EPR;
		r = 0;
	}

646
	return r;
647
648
}

649
650
int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
651
	int ret, s;
652
653
654
655
656
#ifdef CONFIG_PPC_FPU
	unsigned int fpscr;
	int fpexc_mode;
	u64 fpr[32];
#endif
657

658
659
660
661
662
	if (!vcpu->arch.sane) {
		kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
		return -EINVAL;
	}

663
	local_irq_disable();
664
665
	s = kvmppc_prepare_to_enter(vcpu);
	if (s <= 0) {
666
		local_irq_enable();
667
		ret = s;
668
669
		goto out;
	}
670
	kvmppc_lazy_ee_enable();
671

672
	kvm_guest_enter();
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695

#ifdef CONFIG_PPC_FPU
	/* Save userspace FPU state in stack */
	enable_kernel_fp();
	memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
	fpscr = current->thread.fpscr.val;
	fpexc_mode = current->thread.fpexc_mode;

	/* Restore guest FPU state to thread */
	memcpy(current->thread.fpr, vcpu->arch.fpr, sizeof(vcpu->arch.fpr));
	current->thread.fpscr.val = vcpu->arch.fpscr;

	/*
	 * Since we can't trap on MSR_FP in GS-mode, we consider the guest
	 * as always using the FPU.  Kernel usage of FP (via
	 * enable_kernel_fp()) in this thread must not occur while
	 * vcpu->fpu_active is set.
	 */
	vcpu->fpu_active = 1;

	kvmppc_load_guest_fp(vcpu);
#endif

696
	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
697

698
699
700
	/* No need for kvm_guest_exit. It's done in handle_exit.
	   We also get here with interrupts enabled. */

701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
#ifdef CONFIG_PPC_FPU
	kvmppc_save_guest_fp(vcpu);

	vcpu->fpu_active = 0;

	/* Save guest FPU state from thread */
	memcpy(vcpu->arch.fpr, current->thread.fpr, sizeof(vcpu->arch.fpr));
	vcpu->arch.fpscr = current->thread.fpscr.val;

	/* Restore userspace FPU state from stack */
	memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
	current->thread.fpscr.val = fpscr;
	current->thread.fpexc_mode = fpexc_mode;
#endif

716
out:
717
	vcpu->mode = OUTSIDE_GUEST_MODE;
718
719
720
	return ret;
}

721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
	enum emulation_result er;

	er = kvmppc_emulate_instruction(run, vcpu);
	switch (er) {
	case EMULATE_DONE:
		/* don't overwrite subtypes, just account kvm_stats */
		kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
		/* Future optimization: only reload non-volatiles if
		 * they were actually modified by emulation. */
		return RESUME_GUEST_NV;

	case EMULATE_DO_DCR:
		run->exit_reason = KVM_EXIT_DCR;
		return RESUME_HOST;

	case EMULATE_FAIL:
		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
		       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
		/* For debugging, encode the failing instruction and
		 * report it to userspace. */
		run->hw.hardware_exit_reason = ~0ULL << 32;
		run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
745
		kvmppc_core_queue_program(vcpu, ESR_PIL);
746
747
748
749
750
751
752
		return RESUME_HOST;

	default:
		BUG();
	}
}

753
static void kvmppc_fill_pt_regs(struct pt_regs *regs)
754
{
755
	ulong r1, ip, msr, lr;
756

757
758
759
760
761
762
763
764
765
766
767
768
	asm("mr %0, 1" : "=r"(r1));
	asm("mflr %0" : "=r"(lr));
	asm("mfmsr %0" : "=r"(msr));
	asm("bl 1f; 1: mflr %0" : "=r"(ip));

	memset(regs, 0, sizeof(*regs));
	regs->gpr[1] = r1;
	regs->nip = ip;
	regs->msr = msr;
	regs->link = lr;
}

769
770
771
772
773
774
/*
 * For interrupts needed to be handled by host interrupt handlers,
 * corresponding host handler are called from here in similar way
 * (but not exact) as they are called from low level handler
 * (such as from arch/powerpc/kernel/head_fsl_booke.S).
 */
775
776
777
778
static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
				     unsigned int exit_nr)
{
	struct pt_regs regs;
779

780
781
	switch (exit_nr) {
	case BOOKE_INTERRUPT_EXTERNAL:
782
783
		kvmppc_fill_pt_regs(&regs);
		do_IRQ(&regs);
784
785
		break;
	case BOOKE_INTERRUPT_DECREMENTER:
786
787
		kvmppc_fill_pt_regs(&regs);
		timer_interrupt(&regs);
788
789
790
		break;
#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64)
	case BOOKE_INTERRUPT_DOORBELL:
791
792
		kvmppc_fill_pt_regs(&regs);
		doorbell_exception(&regs);
793
794
795
796
797
		break;
#endif
	case BOOKE_INTERRUPT_MACHINE_CHECK:
		/* FIXME */
		break;
798
799
800
801
	case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
		kvmppc_fill_pt_regs(&regs);
		performance_monitor_exception(&regs);
		break;
802
803
804
805
806
807
808
809
810
811
812
	case BOOKE_INTERRUPT_WATCHDOG:
		kvmppc_fill_pt_regs(&regs);
#ifdef CONFIG_BOOKE_WDT
		WatchdogException(&regs);
#else
		unknown_exception(&regs);
#endif
		break;
	case BOOKE_INTERRUPT_CRITICAL:
		unknown_exception(&regs);
		break;
813
	}
814
815
816
817
818
819
820
821
822
823
824
}

/**
 * kvmppc_handle_exit
 *
 * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
 */
int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                       unsigned int exit_nr)
{
	int r = RESUME_HOST;
825
	int s;
826
827
828
829
830
831

	/* update before a new last_exit_type is rewritten */
	kvmppc_update_timing_stats(vcpu);

	/* restart interrupts if they were meant for the host */
	kvmppc_restart_interrupt(vcpu, exit_nr);
832

833
834
	local_irq_enable();

835
	trace_kvm_exit(exit_nr, vcpu);
836
	kvm_guest_exit();
837

838
839
840
841
842
	run->exit_reason = KVM_EXIT_UNKNOWN;
	run->ready_for_interrupt_injection = 1;

	switch (exit_nr) {
	case BOOKE_INTERRUPT_MACHINE_CHECK:
843
844
845
846
847
848
		printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
		kvmppc_dump_vcpu(vcpu);
		/* For debugging, send invalid exit reason to user space */
		run->hw.hardware_exit_reason = ~1ULL << 32;
		run->hw.hardware_exit_reason |= mfspr(SPRN_MCSR);
		r = RESUME_HOST;
849
850
851
		break;

	case BOOKE_INTERRUPT_EXTERNAL:
852
		kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
853
854
855
		r = RESUME_GUEST;
		break;

856
	case BOOKE_INTERRUPT_DECREMENTER:
857
		kvmppc_account_exit(vcpu, DEC_EXITS);
858
859
860
		r = RESUME_GUEST;
		break;

861
862
863
864
	case BOOKE_INTERRUPT_WATCHDOG:
		r = RESUME_GUEST;
		break;

865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
	case BOOKE_INTERRUPT_DOORBELL:
		kvmppc_account_exit(vcpu, DBELL_EXITS);
		r = RESUME_GUEST;
		break;

	case BOOKE_INTERRUPT_GUEST_DBELL_CRIT:
		kvmppc_account_exit(vcpu, GDBELL_EXITS);

		/*
		 * We are here because there is a pending guest interrupt
		 * which could not be delivered as MSR_CE or MSR_ME was not
		 * set.  Once we break from here we will retry delivery.
		 */
		r = RESUME_GUEST;
		break;

	case BOOKE_INTERRUPT_GUEST_DBELL:
		kvmppc_account_exit(vcpu, GDBELL_EXITS);

		/*
		 * We are here because there is a pending guest interrupt
		 * which could not be delivered as MSR_EE was not set.  Once
		 * we break from here we will retry delivery.
		 */
		r = RESUME_GUEST;
		break;

892
893
894
895
	case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
		r = RESUME_GUEST;
		break;

896
897
898
899
	case BOOKE_INTERRUPT_HV_PRIV:
		r = emulation_exit(run, vcpu);
		break;

900
	case BOOKE_INTERRUPT_PROGRAM:
901
		if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) {
902
903
904
905
906
907
908
909
			/*
			 * Program traps generated by user-level software must
			 * be handled by the guest kernel.
			 *
			 * In GS mode, hypervisor privileged instructions trap
			 * on BOOKE_INTERRUPT_HV_PRIV, not here, so these are
			 * actual program interrupts, handled by the guest.
			 */
910
			kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
911
			r = RESUME_GUEST;
912
			kvmppc_account_exit(vcpu, USR_PR_INST);
913
914
915
			break;
		}

916
		r = emulation_exit(run, vcpu);
917
918
		break;

919
	case BOOKE_INTERRUPT_FP_UNAVAIL:
920
		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
921
		kvmppc_account_exit(vcpu, FP_UNAVAIL);
922
923
924
		r = RESUME_GUEST;
		break;

925
926
927
928
929
930
931
#ifdef CONFIG_SPE
	case BOOKE_INTERRUPT_SPE_UNAVAIL: {
		if (vcpu->arch.shared->msr & MSR_SPE)
			kvmppc_vcpu_enable_spe(vcpu);
		else
			kvmppc_booke_queue_irqprio(vcpu,
						   BOOKE_IRQPRIO_SPE_UNAVAIL);
932
933
		r = RESUME_GUEST;
		break;
934
	}
935
936
937
938
939
940
941
942
943
944

	case BOOKE_INTERRUPT_SPE_FP_DATA:
		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
		r = RESUME_GUEST;
		break;

	case BOOKE_INTERRUPT_SPE_FP_ROUND:
		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
		r = RESUME_GUEST;
		break;
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
#else
	case BOOKE_INTERRUPT_SPE_UNAVAIL:
		/*
		 * Guest wants SPE, but host kernel doesn't support it.  Send
		 * an "unimplemented operation" program check to the guest.
		 */
		kvmppc_core_queue_program(vcpu, ESR_PUO | ESR_SPV);
		r = RESUME_GUEST;
		break;

	/*
	 * These really should never happen without CONFIG_SPE,
	 * as we should never enable the real MSR[SPE] in the guest.
	 */
	case BOOKE_INTERRUPT_SPE_FP_DATA:
	case BOOKE_INTERRUPT_SPE_FP_ROUND:
		printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n",
		       __func__, exit_nr, vcpu->arch.pc);
		run->hw.hardware_exit_reason = exit_nr;
		r = RESUME_HOST;
		break;
#endif
967

968
	case BOOKE_INTERRUPT_DATA_STORAGE:
969
970
		kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
		                               vcpu->arch.fault_esr);
971
		kvmppc_account_exit(vcpu, DSI_EXITS);
972
973
974
975
		r = RESUME_GUEST;
		break;

	case BOOKE_INTERRUPT_INST_STORAGE:
976
		kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr);
977
		kvmppc_account_exit(vcpu, ISI_EXITS);
978
979
980
		r = RESUME_GUEST;
		break;

981
982
983
984
985
986
	case BOOKE_INTERRUPT_ALIGNMENT:
		kvmppc_core_queue_alignment(vcpu, vcpu->arch.fault_dear,
		                            vcpu->arch.fault_esr);
		r = RESUME_GUEST;
		break;

987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#ifdef CONFIG_KVM_BOOKE_HV
	case BOOKE_INTERRUPT_HV_SYSCALL:
		if (!(vcpu->arch.shared->msr & MSR_PR)) {
			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
		} else {
			/*
			 * hcall from guest userspace -- send privileged
			 * instruction program check.
			 */
			kvmppc_core_queue_program(vcpu, ESR_PPR);
		}

		r = RESUME_GUEST;
		break;