Commit 7753ea09 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'kvm-4.15-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Radim Krčmář:
 "Trimmed second batch of KVM changes for Linux 4.15:

   - GICv4 Support for KVM/ARM

   - re-introduce support for CPUs without virtual NMI (cc stable) and
     allow testing of KVM without virtual NMI on available CPUs

   - fix long-standing performance issues with assigned devices on AMD
     (cc stable)"

* tag 'kvm-4.15-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (30 commits)
  kvm: vmx: Allow disabling virtual NMI support
  kvm: vmx: Reinstate support for CPUs without virtual NMI
  KVM: SVM: obey guest PAT
  KVM: arm/arm64: Don't queue VLPIs on INV/INVALL
  KVM: arm/arm64: Fix GICv4 ITS initialization issues
  KVM: arm/arm64: GICv4: Theory of operations
  KVM: arm/arm64: GICv4: Enable VLPI support
  KVM: arm/arm64: GICv4: Prevent userspace from changing doorbell affinity
  KVM: arm/arm64: GICv4: Prevent a VM using GICv4 from being saved
  KVM: arm/arm64: GICv4: Enable virtual cpuif if VLPIs can be delivered
  KVM: arm/arm64: GICv4: Hook vPE scheduling into vgic flush/sync
  KVM: arm/arm64: GICv4: Use the doorbell interrupt as an unblocking source
  KVM: arm/arm64: GICv4: Add doorbell interrupt handling
  KVM: arm/arm64: GICv4: Use pending_last as a scheduling hint
  KVM: arm/arm64: GICv4: Handle INVALL applied to a vPE
  KVM: arm/arm64: GICv4: Propagate property updates to VLPIs
  KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE
  KVM: arm/arm64: GICv4: Handle CLEAR applied to a VLPI
  KVM: arm/arm64: GICv4: Propagate affinity changes to the physical ITS
  KVM: arm/arm64: GICv4: Unmap VLPI when freeing an LPI
  ...
parents 83ada031 d02fcf50
...@@ -1890,6 +1890,10 @@ ...@@ -1890,6 +1890,10 @@
[KVM,ARM] Trap guest accesses to GICv3 common [KVM,ARM] Trap guest accesses to GICv3 common
system registers system registers
kvm-arm.vgic_v4_enable=
[KVM,ARM] Allow use of GICv4 for direct injection of
LPIs.
kvm-intel.ept= [KVM,Intel] Disable extended page tables kvm-intel.ept= [KVM,Intel] Disable extended page tables
(virtualized MMU) support on capable Intel chips. (virtualized MMU) support on capable Intel chips.
Default is 1 (enabled) Default is 1 (enabled)
......
...@@ -64,6 +64,8 @@ Groups: ...@@ -64,6 +64,8 @@ Groups:
-EINVAL: Inconsistent restored data -EINVAL: Inconsistent restored data
-EFAULT: Invalid guest ram access -EFAULT: Invalid guest ram access
-EBUSY: One or more VCPUS are running -EBUSY: One or more VCPUS are running
-EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
state is not available
KVM_DEV_ARM_VGIC_GRP_ITS_REGS KVM_DEV_ARM_VGIC_GRP_ITS_REGS
Attributes: Attributes:
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
# #
source "virt/kvm/Kconfig" source "virt/kvm/Kconfig"
source "virt/lib/Kconfig"
menuconfig VIRTUALIZATION menuconfig VIRTUALIZATION
bool "Virtualization" bool "Virtualization"
...@@ -23,6 +24,8 @@ config KVM ...@@ -23,6 +24,8 @@ config KVM
select PREEMPT_NOTIFIERS select PREEMPT_NOTIFIERS
select ANON_INODES select ANON_INODES
select ARM_GIC select ARM_GIC
select ARM_GIC_V3
select ARM_GIC_V3_ITS
select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO select KVM_MMIO
...@@ -36,6 +39,8 @@ config KVM ...@@ -36,6 +39,8 @@ config KVM
select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI select HAVE_KVM_MSI
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
---help--- ---help---
Support hosting virtualized guest machines. Support hosting virtualized guest machines.
......
...@@ -32,6 +32,7 @@ obj-y += $(KVM)/arm/vgic/vgic-init.o ...@@ -32,6 +32,7 @@ obj-y += $(KVM)/arm/vgic/vgic-init.o
obj-y += $(KVM)/arm/vgic/vgic-irqfd.o obj-y += $(KVM)/arm/vgic/vgic-irqfd.o
obj-y += $(KVM)/arm/vgic/vgic-v2.o obj-y += $(KVM)/arm/vgic/vgic-v2.o
obj-y += $(KVM)/arm/vgic/vgic-v3.o obj-y += $(KVM)/arm/vgic/vgic-v3.o
obj-y += $(KVM)/arm/vgic/vgic-v4.o
obj-y += $(KVM)/arm/vgic/vgic-mmio.o obj-y += $(KVM)/arm/vgic/vgic-mmio.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
# #
source "virt/kvm/Kconfig" source "virt/kvm/Kconfig"
source "virt/lib/Kconfig"
menuconfig VIRTUALIZATION menuconfig VIRTUALIZATION
bool "Virtualization" bool "Virtualization"
...@@ -36,6 +37,8 @@ config KVM ...@@ -36,6 +37,8 @@ config KVM
select HAVE_KVM_MSI select HAVE_KVM_MSI
select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_IRQ_ROUTING
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
---help--- ---help---
Support hosting virtualized guest machines. Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple We don't support KVM with 16K page tables yet, due to the multiple
......
...@@ -27,6 +27,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o ...@@ -27,6 +27,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
......
...@@ -3671,6 +3671,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) ...@@ -3671,6 +3671,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u32 ecx = msr->index; u32 ecx = msr->index;
u64 data = msr->data; u64 data = msr->data;
switch (ecx) { switch (ecx) {
case MSR_IA32_CR_PAT:
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
return 1;
vcpu->arch.pat = data;
svm->vmcb->save.g_pat = data;
mark_dirty(svm->vmcb, VMCB_NPT);
break;
case MSR_IA32_TSC: case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr); kvm_write_tsc(vcpu, msr);
break; break;
......
...@@ -70,6 +70,9 @@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); ...@@ -70,6 +70,9 @@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
static bool __read_mostly enable_vpid = 1; static bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444); module_param_named(vpid, enable_vpid, bool, 0444);
static bool __read_mostly enable_vnmi = 1;
module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
static bool __read_mostly flexpriority_enabled = 1; static bool __read_mostly flexpriority_enabled = 1;
module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
...@@ -202,6 +205,10 @@ struct loaded_vmcs { ...@@ -202,6 +205,10 @@ struct loaded_vmcs {
bool nmi_known_unmasked; bool nmi_known_unmasked;
unsigned long vmcs_host_cr3; /* May not match real cr3 */ unsigned long vmcs_host_cr3; /* May not match real cr3 */
unsigned long vmcs_host_cr4; /* May not match real cr4 */ unsigned long vmcs_host_cr4; /* May not match real cr4 */
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
ktime_t entry_time;
s64 vnmi_blocked_time;
struct list_head loaded_vmcss_on_cpu_link; struct list_head loaded_vmcss_on_cpu_link;
}; };
...@@ -1291,6 +1298,11 @@ static inline bool cpu_has_vmx_invpcid(void) ...@@ -1291,6 +1298,11 @@ static inline bool cpu_has_vmx_invpcid(void)
SECONDARY_EXEC_ENABLE_INVPCID; SECONDARY_EXEC_ENABLE_INVPCID;
} }
static inline bool cpu_has_virtual_nmis(void)
{
return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
}
static inline bool cpu_has_vmx_wbinvd_exit(void) static inline bool cpu_has_vmx_wbinvd_exit(void)
{ {
return vmcs_config.cpu_based_2nd_exec_ctrl & return vmcs_config.cpu_based_2nd_exec_ctrl &
...@@ -1348,11 +1360,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit) ...@@ -1348,11 +1360,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
(vmcs12->secondary_vm_exec_control & bit); (vmcs12->secondary_vm_exec_control & bit);
} }
static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
{
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
}
static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12) static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
{ {
return vmcs12->pin_based_vm_exec_control & return vmcs12->pin_based_vm_exec_control &
...@@ -3712,9 +3719,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) ...@@ -3712,9 +3719,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_vmexit_control) < 0) &_vmexit_control) < 0)
return -EIO; return -EIO;
min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
PIN_BASED_VIRTUAL_NMIS; opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER; PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0) &_pin_based_exec_control) < 0)
return -EIO; return -EIO;
...@@ -5232,6 +5239,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) ...@@ -5232,6 +5239,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
if (!kvm_vcpu_apicv_active(&vmx->vcpu)) if (!kvm_vcpu_apicv_active(&vmx->vcpu))
pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
if (!enable_vnmi)
pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
/* Enable the preemption timer dynamically */ /* Enable the preemption timer dynamically */
pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
return pin_based_exec_ctrl; return pin_based_exec_ctrl;
...@@ -5666,7 +5677,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) ...@@ -5666,7 +5677,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
static void enable_nmi_window(struct kvm_vcpu *vcpu) static void enable_nmi_window(struct kvm_vcpu *vcpu)
{ {
if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { if (!enable_vnmi ||
vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
enable_irq_window(vcpu); enable_irq_window(vcpu);
return; return;
} }
...@@ -5706,6 +5718,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) ...@@ -5706,6 +5718,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
if (!enable_vnmi) {
/*
* Tracking the NMI-blocked state in software is built upon
* finding the next open IRQ window. This, in turn, depends on
* well-behaving guests: They have to keep IRQs disabled at
* least as long as the NMI handler runs. Otherwise we may
* cause NMI nesting, maybe breaking the guest. But as this is
* highly unlikely, we can live with the residual risk.
*/
vmx->loaded_vmcs->soft_vnmi_blocked = 1;
vmx->loaded_vmcs->vnmi_blocked_time = 0;
}
++vcpu->stat.nmi_injections; ++vcpu->stat.nmi_injections;
vmx->loaded_vmcs->nmi_known_unmasked = false; vmx->loaded_vmcs->nmi_known_unmasked = false;
...@@ -5724,6 +5749,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) ...@@ -5724,6 +5749,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
bool masked; bool masked;
if (!enable_vnmi)
return vmx->loaded_vmcs->soft_vnmi_blocked;
if (vmx->loaded_vmcs->nmi_known_unmasked) if (vmx->loaded_vmcs->nmi_known_unmasked)
return false; return false;
masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
...@@ -5735,13 +5762,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) ...@@ -5735,13 +5762,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
vmx->loaded_vmcs->nmi_known_unmasked = !masked; if (!enable_vnmi) {
if (masked) if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, vmx->loaded_vmcs->soft_vnmi_blocked = masked;
GUEST_INTR_STATE_NMI); vmx->loaded_vmcs->vnmi_blocked_time = 0;
else }
vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, } else {
GUEST_INTR_STATE_NMI); vmx->loaded_vmcs->nmi_known_unmasked = !masked;
if (masked)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
else
vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
}
} }
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
...@@ -5749,6 +5783,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) ...@@ -5749,6 +5783,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
if (to_vmx(vcpu)->nested.nested_run_pending) if (to_vmx(vcpu)->nested.nested_run_pending)
return 0; return 0;
if (!enable_vnmi &&
to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
return 0;
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
| GUEST_INTR_STATE_NMI)); | GUEST_INTR_STATE_NMI));
...@@ -6476,6 +6514,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) ...@@ -6476,6 +6514,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* AAK134, BY25. * AAK134, BY25.
*/ */
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI)) (exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
...@@ -6535,6 +6574,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) ...@@ -6535,6 +6574,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
static int handle_nmi_window(struct kvm_vcpu *vcpu) static int handle_nmi_window(struct kvm_vcpu *vcpu)
{ {
WARN_ON_ONCE(!enable_vnmi);
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_VIRTUAL_NMI_PENDING); CPU_BASED_VIRTUAL_NMI_PENDING);
++vcpu->stat.nmi_window_exits; ++vcpu->stat.nmi_window_exits;
...@@ -6758,6 +6798,9 @@ static __init int hardware_setup(void) ...@@ -6758,6 +6798,9 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_flexpriority()) if (!cpu_has_vmx_flexpriority())
flexpriority_enabled = 0; flexpriority_enabled = 0;
if (!cpu_has_virtual_nmis())
enable_vnmi = 0;
/* /*
* set_apic_access_page_addr() is used to reload apic access * set_apic_access_page_addr() is used to reload apic access
* page upon invalidation. No need to do anything if not * page upon invalidation. No need to do anything if not
...@@ -6962,7 +7005,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) ...@@ -6962,7 +7005,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
} }
/* Create a new VMCS */ /* Create a new VMCS */
item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
if (!item) if (!item)
return NULL; return NULL;
item->vmcs02.vmcs = alloc_vmcs(); item->vmcs02.vmcs = alloc_vmcs();
...@@ -7979,6 +8022,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) ...@@ -7979,6 +8022,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
* "blocked by NMI" bit has to be set before next VM entry. * "blocked by NMI" bit has to be set before next VM entry.
*/ */
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI)) (exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI); GUEST_INTR_STATE_NMI);
...@@ -8823,6 +8867,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) ...@@ -8823,6 +8867,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0; return 0;
} }
if (unlikely(!enable_vnmi &&
vmx->loaded_vmcs->soft_vnmi_blocked)) {
if (vmx_interrupt_allowed(vcpu)) {
vmx->loaded_vmcs->soft_vnmi_blocked = 0;
} else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
vcpu->arch.nmi_pending) {
/*
* This CPU don't support us in finding the end of an
* NMI-blocked window if the guest runs with IRQs
* disabled. So we pull the trigger after 1 s of
* futile waiting, but inform the user about this.
*/
printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
"state on VCPU %d after 1 s timeout\n",
__func__, vcpu->vcpu_id);
vmx->loaded_vmcs->soft_vnmi_blocked = 0;
}
}
if (exit_reason < kvm_vmx_max_exit_handlers if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason]) && kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu); return kvm_vmx_exit_handlers[exit_reason](vcpu);
...@@ -9105,33 +9168,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) ...@@ -9105,33 +9168,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
if (vmx->loaded_vmcs->nmi_known_unmasked) if (enable_vnmi) {
return; if (vmx->loaded_vmcs->nmi_known_unmasked)
/* return;
* Can't use vmx->exit_intr_info since we're not sure what /*
* the exit reason is. * Can't use vmx->exit_intr_info since we're not sure what
*/ * the exit reason is.
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); */
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
vector = exit_intr_info & INTR_INFO_VECTOR_MASK; unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
/* vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
* SDM 3: 27.7.1.2 (September 2008) /*
* Re-set bit "block by NMI" before VM entry if vmexit caused by * SDM 3: 27.7.1.2 (September 2008)
* a guest IRET fault. * Re-set bit "block by NMI" before VM entry if vmexit caused by
* SDM 3: 23.2.2 (September 2008) * a guest IRET fault.
* Bit 12 is undefined in any of the following cases: * SDM 3: 23.2.2 (September 2008)
* If the VM exit sets the valid bit in the IDT-vectoring * Bit 12 is undefined in any of the following cases:
* information field. * If the VM exit sets the valid bit in the IDT-vectoring
* If the VM exit is due to a double fault. * information field.
*/ * If the VM exit is due to a double fault.
if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && */
vector != DF_VECTOR && !idtv_info_valid) if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, vector != DF_VECTOR && !idtv_info_valid)
GUEST_INTR_STATE_NMI); vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
else GUEST_INTR_STATE_NMI);
vmx->loaded_vmcs->nmi_known_unmasked = else
!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) vmx->loaded_vmcs->nmi_known_unmasked =
& GUEST_INTR_STATE_NMI); !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
& GUEST_INTR_STATE_NMI);
} else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
vmx->loaded_vmcs->vnmi_blocked_time +=
ktime_to_ns(ktime_sub(ktime_get(),
vmx->loaded_vmcs->entry_time));
} }
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
...@@ -9248,6 +9316,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -9248,6 +9316,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr3, cr4; unsigned long debugctlmsr, cr3, cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
vmx->loaded_vmcs->soft_vnmi_blocked))
vmx->loaded_vmcs->entry_time = ktime_get();
/* Don't enter VMX if guest state is invalid, let the exit handler /* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */ start emulation until we arrive back to a valid state */
if (vmx->emulation_required) if (vmx->emulation_required)
......
...@@ -26,6 +26,8 @@ ...@@ -26,6 +26,8 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/jump_label.h> #include <linux/jump_label.h>
#include <linux/irqchip/arm-gic-v4.h>
#define VGIC_V3_MAX_CPUS 255 #define VGIC_V3_MAX_CPUS 255
#define VGIC_V2_MAX_CPUS 8 #define VGIC_V2_MAX_CPUS 8
#define VGIC_NR_IRQS_LEGACY 256 #define VGIC_NR_IRQS_LEGACY 256
...@@ -73,6 +75,9 @@ struct vgic_global { ...@@ -73,6 +75,9 @@ struct vgic_global {
/* Only needed for the legacy KVM_CREATE_IRQCHIP */ /* Only needed for the legacy KVM_CREATE_IRQCHIP */
bool can_emulate_gicv2; bool can_emulate_gicv2;
/* Hardware has GICv4? */
bool has_gicv4;
/* GIC system register CPU interface */ /* GIC system register CPU interface */
struct static_key_false gicv3_cpuif; struct static_key_false gicv3_cpuif;
...@@ -116,6 +121,7 @@ struct vgic_irq { ...@@ -116,6 +121,7 @@ struct vgic_irq {
bool hw; /* Tied to HW IRQ */ bool hw; /* Tied to HW IRQ */
struct kref refcount; /* Used for LPIs */ struct kref refcount; /* Used for LPIs */
u32 hwintid; /* HW INTID number */ u32 hwintid; /* HW INTID number */
unsigned int host_irq; /* linux irq corresponding to hwintid */
union { union {
u8 targets; /* GICv2 target VCPUs mask */ u8 targets; /* GICv2 target VCPUs mask */
u32 mpidr; /* GICv3 target VCPU */ u32 mpidr; /* GICv3 target VCPU */
...@@ -232,6 +238,15 @@ struct vgic_dist { ...@@ -232,6 +238,15 @@ struct vgic_dist {
/* used by vgic-debug */ /* used by vgic-debug */
struct vgic_state_iter *iter; struct vgic_state_iter *iter;
/*
* GICv4 ITS per-VM data, containing the IRQ domain, the VPE
* array, the property table pointer as well as allocation
* data. This essentially ties the Linux IRQ core and ITS
* together, and avoids leaking KVM's data structures anywhere
* else.
*/
struct its_vm its_vm;
}; };
struct vgic_v2_cpu_if { struct vgic_v2_cpu_if {
...@@ -250,6 +265,14 @@ struct vgic_v3_cpu_if { ...@@ -250,6 +265,14 @@ struct vgic_v3_cpu_if {
u32 vgic_ap0r[4]; u32 vgic_ap0r[4];
u32 vgic_ap1r[4]; u32 vgic_ap1r[4];
u64 vgic_lr[VGIC_V3_MAX_LRS]; u64 vgic_lr[VGIC_V3_MAX_LRS];
/*
* GICv4 ITS per-VPE data, containing the doorbell IRQ, the
* pending table pointer, the its_vm pointer and a few other
* HW specific things. As for the its_vm structure, this is
* linking the Linux IRQ subsystem and the ITS together.
*/
struct its_vpe its_vpe;
}; };
struct vgic_cpu { struct vgic_cpu {
...@@ -307,9 +330,10 @@ void kvm_vgic_init_cpu_hardware(void); ...@@ -307,9 +330,10 @@ void kvm_vgic_init_cpu_hardware(void);
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
bool level, void *owner); bool level, void *owner);
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); u32 vintid);
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
...@@ -349,4 +373,15 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm); ...@@ -349,4 +373,15 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm);
int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner); int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner);