Skip to content

Commit b96fb43

Browse files
bonzinirkrcmar
authored andcommitted
KVM: nVMX: fixes to nested virt interrupt injection
There are three issues in nested_vmx_check_exception: 1) it is not taking PFEC_MATCH/PFEC_MASK into account, as reported by Wanpeng Li; 2) it should rebuild the interruption info and exit qualification fields from scratch, as reported by Jim Mattson, because the values from the L2->L0 vmexit may be invalid (e.g. if an emulated instruction causes a page fault, the EPT misconfig's exit qualification is incorrect). 3) CR2 and DR6 should not be written for exception intercept vmexits (CR2 only for AMD). This patch fixes the first two and adds a comment about the last, outlining the fix. Cc: Jim Mattson <[email protected]> Cc: Wanpeng Li <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent 7313c69 commit b96fb43

File tree

2 files changed

+72
-25
lines changed

2 files changed

+72
-25
lines changed

arch/x86/kvm/svm.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2430,6 +2430,16 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
24302430
svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
24312431
svm->vmcb->control.exit_code_hi = 0;
24322432
svm->vmcb->control.exit_info_1 = error_code;
2433+
2434+
/*
2435+
* FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
2436+
* The fix is to add the ancillary datum (CR2 or DR6) to structs
2437+
* kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
2438+
* written only when inject_pending_event runs (DR6 would written here
2439+
* too). This should be conditional on a new capability---if the
2440+
* capability is disabled, kvm_multiple_exception would write the
2441+
* ancillary information to CR2 or DR6, for backwards ABI-compatibility.
2442+
*/
24332443
if (svm->vcpu.arch.exception.nested_apf)
24342444
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
24352445
else

arch/x86/kvm/vmx.c

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,10 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
927927
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
928928
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
929929
static int alloc_identity_pagetable(struct kvm *kvm);
930+
static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
931+
static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
932+
static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
933+
u16 error_code);
930934

931935
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
932936
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2428,6 +2432,30 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
24282432
vmx_set_interrupt_shadow(vcpu, 0);
24292433
}
24302434

2435+
static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
2436+
unsigned long exit_qual)
2437+
{
2438+
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2439+
unsigned int nr = vcpu->arch.exception.nr;
2440+
u32 intr_info = nr | INTR_INFO_VALID_MASK;
2441+
2442+
if (vcpu->arch.exception.has_error_code) {
2443+
vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
2444+
intr_info |= INTR_INFO_DELIVER_CODE_MASK;
2445+
}
2446+
2447+
if (kvm_exception_is_soft(nr))
2448+
intr_info |= INTR_TYPE_SOFT_EXCEPTION;
2449+
else
2450+
intr_info |= INTR_TYPE_HARD_EXCEPTION;
2451+
2452+
if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
2453+
vmx_get_nmi_mask(vcpu))
2454+
intr_info |= INTR_INFO_UNBLOCK_NMI;
2455+
2456+
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
2457+
}
2458+
24312459
/*
24322460
* KVM wants to inject page-faults which it got to the guest. This function
24332461
* checks whether in a nested guest, we need to inject them to L1 or L2.
@@ -2437,24 +2465,38 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
24372465
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
24382466
unsigned int nr = vcpu->arch.exception.nr;
24392467

2440-
if (!((vmcs12->exception_bitmap & (1u << nr)) ||
2441-
(nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
2442-
return 0;
2468+
if (nr == PF_VECTOR) {
2469+
if (vcpu->arch.exception.nested_apf) {
2470+
nested_vmx_inject_exception_vmexit(vcpu,
2471+
vcpu->arch.apf.nested_apf_token);
2472+
return 1;
2473+
}
2474+
/*
2475+
* FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
2476+
* The fix is to add the ancillary datum (CR2 or DR6) to structs
2477+
* kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
2478+
* can be written only when inject_pending_event runs. This should be
2479+
* conditional on a new capability---if the capability is disabled,
2480+
* kvm_multiple_exception would write the ancillary information to
2481+
* CR2 or DR6, for backwards ABI-compatibility.
2482+
*/
2483+
if (nested_vmx_is_page_fault_vmexit(vmcs12,
2484+
vcpu->arch.exception.error_code)) {
2485+
nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2);
2486+
return 1;
2487+
}
2488+
} else {
2489+
unsigned long exit_qual = 0;
2490+
if (nr == DB_VECTOR)
2491+
exit_qual = vcpu->arch.dr6;
24432492

2444-
if (vcpu->arch.exception.nested_apf) {
2445-
vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
2446-
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
2447-
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
2448-
INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
2449-
vcpu->arch.apf.nested_apf_token);
2450-
return 1;
2493+
if (vmcs12->exception_bitmap & (1u << nr)) {
2494+
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
2495+
return 1;
2496+
}
24512497
}
24522498

2453-
vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
2454-
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
2455-
vmcs_read32(VM_EXIT_INTR_INFO),
2456-
vmcs_readl(EXIT_QUALIFICATION));
2457-
return 1;
2499+
return 0;
24582500
}
24592501

24602502
static void vmx_queue_exception(struct kvm_vcpu *vcpu)
@@ -9529,10 +9571,11 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
95299571
WARN_ON(!is_guest_mode(vcpu));
95309572

95319573
if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) {
9532-
vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
9533-
nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
9534-
vmcs_read32(VM_EXIT_INTR_INFO),
9535-
vmcs_readl(EXIT_QUALIFICATION));
9574+
vmcs12->vm_exit_intr_error_code = fault->error_code;
9575+
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
9576+
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
9577+
INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
9578+
fault->address);
95369579
} else {
95379580
kvm_inject_page_fault(vcpu, fault);
95389581
}
@@ -10115,12 +10158,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
1011510158
* "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
1011610159
* vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
1011710160
* !enable_ept, EB.PF is 1, so the "or" will always be 1.
10118-
*
10119-
* A problem with this approach (when !enable_ept) is that L1 may be
10120-
* injected with more page faults than it asked for. This could have
10121-
* caused problems, but in practice existing hypervisors don't care.
10122-
* To fix this, we will need to emulate the PFEC checking (on the L1
10123-
* page tables), using walk_addr(), when injecting PFs to L1.
1012410161
*/
1012510162
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
1012610163
enable_ept ? vmcs12->page_fault_error_code_mask : 0);

0 commit comments

Comments
 (0)