@@ -927,6 +927,10 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
927927static void copy_vmcs12_to_shadow (struct vcpu_vmx * vmx );
928928static void copy_shadow_to_vmcs12 (struct vcpu_vmx * vmx );
929929static int alloc_identity_pagetable (struct kvm * kvm );
930+ static bool vmx_get_nmi_mask (struct kvm_vcpu * vcpu );
931+ static void vmx_set_nmi_mask (struct kvm_vcpu * vcpu , bool masked );
932+ static bool nested_vmx_is_page_fault_vmexit (struct vmcs12 * vmcs12 ,
933+ u16 error_code );
930934
931935static DEFINE_PER_CPU (struct vmcs * , vmxarea ) ;
932936static DEFINE_PER_CPU (struct vmcs * , current_vmcs ) ;
@@ -2428,6 +2432,30 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
24282432 vmx_set_interrupt_shadow (vcpu , 0 );
24292433}
24302434
2435+ static void nested_vmx_inject_exception_vmexit (struct kvm_vcpu * vcpu ,
2436+ unsigned long exit_qual )
2437+ {
2438+ struct vmcs12 * vmcs12 = get_vmcs12 (vcpu );
2439+ unsigned int nr = vcpu -> arch .exception .nr ;
2440+ u32 intr_info = nr | INTR_INFO_VALID_MASK ;
2441+
2442+ if (vcpu -> arch .exception .has_error_code ) {
2443+ vmcs12 -> vm_exit_intr_error_code = vcpu -> arch .exception .error_code ;
2444+ intr_info |= INTR_INFO_DELIVER_CODE_MASK ;
2445+ }
2446+
2447+ if (kvm_exception_is_soft (nr ))
2448+ intr_info |= INTR_TYPE_SOFT_EXCEPTION ;
2449+ else
2450+ intr_info |= INTR_TYPE_HARD_EXCEPTION ;
2451+
2452+ if (!(vmcs12 -> idt_vectoring_info_field & VECTORING_INFO_VALID_MASK ) &&
2453+ vmx_get_nmi_mask (vcpu ))
2454+ intr_info |= INTR_INFO_UNBLOCK_NMI ;
2455+
2456+ nested_vmx_vmexit (vcpu , EXIT_REASON_EXCEPTION_NMI , intr_info , exit_qual );
2457+ }
2458+
24312459/*
24322460 * KVM wants to inject page-faults which it got to the guest. This function
24332461 * checks whether in a nested guest, we need to inject them to L1 or L2.
@@ -2437,24 +2465,38 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
24372465 struct vmcs12 * vmcs12 = get_vmcs12 (vcpu );
24382466 unsigned int nr = vcpu -> arch .exception .nr ;
24392467
2440- if (!((vmcs12 -> exception_bitmap & (1u << nr )) ||
2441- (nr == PF_VECTOR && vcpu -> arch .exception .nested_apf )))
2442- return 0 ;
2468+ if (nr == PF_VECTOR ) {
2469+ if (vcpu -> arch .exception .nested_apf ) {
2470+ nested_vmx_inject_exception_vmexit (vcpu ,
2471+ vcpu -> arch .apf .nested_apf_token );
2472+ return 1 ;
2473+ }
2474+ /*
2475+ * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
2476+ * The fix is to add the ancillary datum (CR2 or DR6) to structs
2477+ * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
2478+ * can be written only when inject_pending_event runs. This should be
2479+ * conditional on a new capability---if the capability is disabled,
2480+ * kvm_multiple_exception would write the ancillary information to
2481+ * CR2 or DR6, for backwards ABI-compatibility.
2482+ */
2483+ if (nested_vmx_is_page_fault_vmexit (vmcs12 ,
2484+ vcpu -> arch .exception .error_code )) {
2485+ nested_vmx_inject_exception_vmexit (vcpu , vcpu -> arch .cr2 );
2486+ return 1 ;
2487+ }
2488+ } else {
2489+ unsigned long exit_qual = 0 ;
2490+ if (nr == DB_VECTOR )
2491+ exit_qual = vcpu -> arch .dr6 ;
24432492
2444- if (vcpu -> arch .exception .nested_apf ) {
2445- vmcs12 -> vm_exit_intr_error_code = vcpu -> arch .exception .error_code ;
2446- nested_vmx_vmexit (vcpu , EXIT_REASON_EXCEPTION_NMI ,
2447- PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
2448- INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK ,
2449- vcpu -> arch .apf .nested_apf_token );
2450- return 1 ;
2493+ if (vmcs12 -> exception_bitmap & (1u << nr )) {
2494+ nested_vmx_inject_exception_vmexit (vcpu , exit_qual );
2495+ return 1 ;
2496+ }
24512497 }
24522498
2453- vmcs12 -> vm_exit_intr_error_code = vmcs_read32 (VM_EXIT_INTR_ERROR_CODE );
2454- nested_vmx_vmexit (vcpu , EXIT_REASON_EXCEPTION_NMI ,
2455- vmcs_read32 (VM_EXIT_INTR_INFO ),
2456- vmcs_readl (EXIT_QUALIFICATION ));
2457- return 1 ;
2499+ return 0 ;
24582500}
24592501
24602502static void vmx_queue_exception (struct kvm_vcpu * vcpu )
@@ -9529,10 +9571,11 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
95299571 WARN_ON (!is_guest_mode (vcpu ));
95309572
95319573 if (nested_vmx_is_page_fault_vmexit (vmcs12 , fault -> error_code )) {
9532- vmcs12 -> vm_exit_intr_error_code = vmcs_read32 (VM_EXIT_INTR_ERROR_CODE );
9533- nested_vmx_vmexit (vcpu , to_vmx (vcpu )-> exit_reason ,
9534- vmcs_read32 (VM_EXIT_INTR_INFO ),
9535- vmcs_readl (EXIT_QUALIFICATION ));
9574+ vmcs12 -> vm_exit_intr_error_code = fault -> error_code ;
9575+ nested_vmx_vmexit (vcpu , EXIT_REASON_EXCEPTION_NMI ,
9576+ PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
9577+ INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK ,
9578+ fault -> address );
95369579 } else {
95379580 kvm_inject_page_fault (vcpu , fault );
95389581 }
@@ -10115,12 +10158,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
1011510158 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
1011610159 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
1011710160 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
10118- *
10119- * A problem with this approach (when !enable_ept) is that L1 may be
10120- * injected with more page faults than it asked for. This could have
10121- * caused problems, but in practice existing hypervisors don't care.
10122- * To fix this, we will need to emulate the PFEC checking (on the L1
10123- * page tables), using walk_addr(), when injecting PFs to L1.
1012410161 */
1012510162 vmcs_write32 (PAGE_FAULT_ERROR_CODE_MASK ,
1012610163 enable_ept ? vmcs12 -> page_fault_error_code_mask : 0 );
0 commit comments