KVM: arm/arm64: Sync ICH_VMCR_EL2 back when about to block
authorMarc Zyngier <maz@kernel.org>
Fri, 2 Aug 2019 09:28:32 +0000 (10:28 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 25 Aug 2019 08:47:59 +0000 (10:47 +0200)
commit 5eeaf10eec394b28fad2c58f1f5c3a5da0e87d1c upstream.

Since commit commit 328e56647944 ("KVM: arm/arm64: vgic: Defer
touching GICH_VMCR to vcpu_load/put"), we leave ICH_VMCR_EL2 (or
its GICv2 equivalent) loaded as long as we can, only syncing it
back when we're scheduled out.

There is a small snag with that though: kvm_vgic_vcpu_pending_irq(),
which is indirectly called from kvm_vcpu_check_block(), needs to
evaluate the guest's view of ICC_PMR_EL1. At the point were we
call kvm_vcpu_check_block(), the vcpu is still loaded, and whatever
changes to PMR is not visible in memory until we do a vcpu_put().

Things go really south if the guest does the following:

mov x0, #0 // or any small value masking interrupts
msr ICC_PMR_EL1, x0

[vcpu preempted, then rescheduled, VMCR sampled]

mov x0, #ff // allow all interrupts
msr ICC_PMR_EL1, x0
wfi // traps to EL2, so samping of VMCR

[interrupt arrives just after WFI]

Here, the hypervisor's view of PMR is zero, while the guest has enabled
its interrupts. kvm_vgic_vcpu_pending_irq() will then say that no
interrupts are pending (despite an interrupt being received) and we'll
block for no reason. If the guest doesn't have a periodic interrupt
firing once it has blocked, it will stay there forever.

To avoid this unfortuante situation, let's resync VMCR from
kvm_arch_vcpu_blocking(), ensuring that a following kvm_vcpu_check_block()
will observe the latest value of PMR.

This has been found by booting an arm64 Linux guest with the pseudo NMI
feature, and thus using interrupt priorities to mask interrupts instead
of the usual PSTATE masking.

Cc: stable@vger.kernel.org # 4.12
Fixes: 328e56647944 ("KVM: arm/arm64: vgic: Defer touching GICH_VMCR to vcpu_load/put")
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
include/kvm/arm_vgic.h
virt/kvm/arm/arm.c
virt/kvm/arm/vgic/vgic-v2.c
virt/kvm/arm/vgic/vgic-v3.c
virt/kvm/arm/vgic/vgic.c
virt/kvm/arm/vgic/vgic.h

index 90ac450745f1847f9bb9e5b36ba6a0753d0eca05..561fefc2a9801856b71c23725c369ad0f37961e7 100644 (file)
@@ -361,6 +361,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 
 void kvm_vgic_load(struct kvm_vcpu *vcpu);
 void kvm_vgic_put(struct kvm_vcpu *vcpu);
+void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu);
 
 #define irqchip_in_kernel(k)   (!!((k)->arch.vgic.in_kernel))
 #define vgic_initialized(k)    ((k)->arch.vgic.initialized)
index 02bac8abd206fd5f6ad357306d82fb721efc17d9..d982650deb33fe7a25fcdab08835503cb5154cb4 100644 (file)
@@ -338,6 +338,17 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 {
        kvm_timer_schedule(vcpu);
+       /*
+        * If we're about to block (most likely because we've just hit a
+        * WFI), we need to sync back the state of the GIC CPU interface
+        * so that we have the lastest PMR and group enables. This ensures
+        * that kvm_arch_vcpu_runnable has up-to-date data to decide
+        * whether we have pending interrupts.
+        */
+       preempt_disable();
+       kvm_vgic_vmcr_sync(vcpu);
+       preempt_enable();
+
        kvm_vgic_v4_enable_doorbell(vcpu);
 }
 
index 69b892abd7dc6faec17e820fe6f4c0f95800ecb3..57281c1594d0fb6bd31c694c2a2287ad67c02854 100644 (file)
@@ -495,10 +495,17 @@ void vgic_v2_load(struct kvm_vcpu *vcpu)
                       kvm_vgic_global_state.vctrl_base + GICH_APR);
 }
 
-void vgic_v2_put(struct kvm_vcpu *vcpu)
+void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu)
 {
        struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 
        cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
+}
+
+void vgic_v2_put(struct kvm_vcpu *vcpu)
+{
+       struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+
+       vgic_v2_vmcr_sync(vcpu);
        cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
 }
index 3f2350a4d4ab8375ad3d768a557c3aaa067785d3..5c55995a1a1643889aae7bd26fa9e72851cf3090 100644 (file)
@@ -674,12 +674,17 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
                __vgic_v3_activate_traps(vcpu);
 }
 
-void vgic_v3_put(struct kvm_vcpu *vcpu)
+void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
 {
        struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
 
        if (likely(cpu_if->vgic_sre))
                cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr);
+}
+
+void vgic_v3_put(struct kvm_vcpu *vcpu)
+{
+       vgic_v3_vmcr_sync(vcpu);
 
        kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
 
index c5165e3b80cbea92fe813f2416d3a8b1d84ba39f..250cd72c95a52d97602a2b34cf4d594791a62e23 100644 (file)
@@ -902,6 +902,17 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu)
                vgic_v3_put(vcpu);
 }
 
+void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
+{
+       if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+               return;
+
+       if (kvm_vgic_global_state.type == VGIC_V2)
+               vgic_v2_vmcr_sync(vcpu);
+       else
+               vgic_v3_vmcr_sync(vcpu);
+}
+
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
index a90024718ca44b941e8b4d7aa0f6bf20233c4596..d5e4542799252d35496c7373a209903ff448c59d 100644 (file)
@@ -204,6 +204,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
 void vgic_v2_init_lrs(void);
 void vgic_v2_load(struct kvm_vcpu *vcpu);
 void vgic_v2_put(struct kvm_vcpu *vcpu);
+void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
 
 void vgic_v2_save_state(struct kvm_vcpu *vcpu);
 void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
@@ -234,6 +235,7 @@ bool vgic_v3_check_base(struct kvm *kvm);
 
 void vgic_v3_load(struct kvm_vcpu *vcpu);
 void vgic_v3_put(struct kvm_vcpu *vcpu);
+void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu);
 
 bool vgic_has_its(struct kvm *kvm);
 int kvm_vgic_register_its_device(void);