@@ -101,6 +101,9 @@ module_param(nested, bool, S_IRUGO);
101101
102102static u64 __read_mostly host_xss ;
103103
104+ static bool __read_mostly enable_pml = 1 ;
105+ module_param_named (pml , enable_pml , bool , S_IRUGO );
106+
104107#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
105108#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
106109#define KVM_VM_CR0_ALWAYS_ON \
@@ -516,6 +519,10 @@ struct vcpu_vmx {
516519 /* Dynamic PLE window. */
517520 int ple_window ;
518521 bool ple_window_dirty ;
522+
523+ /* Support for PML */
524+ #define PML_ENTITY_NUM 512
525+ struct page * pml_pg ;
519526};
520527
521528enum segment_cache_field {
@@ -1068,6 +1075,11 @@ static inline bool cpu_has_vmx_shadow_vmcs(void)
10681075 SECONDARY_EXEC_SHADOW_VMCS ;
10691076}
10701077
1078+ static inline bool cpu_has_vmx_pml (void )
1079+ {
1080+ return vmcs_config .cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML ;
1081+ }
1082+
10711083static inline bool report_flexpriority (void )
10721084{
10731085 return flexpriority_enabled ;
@@ -2924,7 +2936,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
29242936 SECONDARY_EXEC_APIC_REGISTER_VIRT |
29252937 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
29262938 SECONDARY_EXEC_SHADOW_VMCS |
2927- SECONDARY_EXEC_XSAVES ;
2939+ SECONDARY_EXEC_XSAVES |
2940+ SECONDARY_EXEC_ENABLE_PML ;
29282941 if (adjust_vmx_controls (min2 , opt2 ,
29292942 MSR_IA32_VMX_PROCBASED_CTLS2 ,
29302943 & _cpu_based_2nd_exec_control ) < 0 )
@@ -4355,6 +4368,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
43554368 a current VMCS12
43564369 */
43574370 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS ;
4371+ /* PML is enabled/disabled in creating/destorying vcpu */
4372+ exec_control &= ~SECONDARY_EXEC_ENABLE_PML ;
4373+
43584374 return exec_control ;
43594375}
43604376
@@ -5942,6 +5958,20 @@ static __init int hardware_setup(void)
59425958
59435959 update_ple_window_actual_max ();
59445960
5961+ /*
5962+ * Only enable PML when hardware supports PML feature, and both EPT
5963+ * and EPT A/D bit features are enabled -- PML depends on them to work.
5964+ */
5965+ if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml ())
5966+ enable_pml = 0 ;
5967+
5968+ if (!enable_pml ) {
5969+ kvm_x86_ops -> slot_enable_log_dirty = NULL ;
5970+ kvm_x86_ops -> slot_disable_log_dirty = NULL ;
5971+ kvm_x86_ops -> flush_log_dirty = NULL ;
5972+ kvm_x86_ops -> enable_log_dirty_pt_masked = NULL ;
5973+ }
5974+
59455975 return alloc_kvm_area ();
59465976
59475977out7 :
@@ -6971,6 +7001,31 @@ static bool vmx_test_pir(struct kvm_vcpu *vcpu, int vector)
69717001 return pi_test_pir (vector , & vmx -> pi_desc );
69727002}
69737003
7004+ static int handle_pml_full (struct kvm_vcpu * vcpu )
7005+ {
7006+ unsigned long exit_qualification ;
7007+
7008+ trace_kvm_pml_full (vcpu -> vcpu_id );
7009+
7010+ exit_qualification = vmcs_readl (EXIT_QUALIFICATION );
7011+
7012+ /*
7013+ * PML buffer FULL happened while executing iret from NMI,
7014+ * "blocked by NMI" bit has to be set before next VM entry.
7015+ */
7016+ if (!(to_vmx (vcpu )-> idt_vectoring_info & VECTORING_INFO_VALID_MASK ) &&
7017+ cpu_has_virtual_nmis () &&
7018+ (exit_qualification & INTR_INFO_UNBLOCK_NMI ))
7019+ vmcs_set_bits (GUEST_INTERRUPTIBILITY_INFO ,
7020+ GUEST_INTR_STATE_NMI );
7021+
7022+ /*
7023+ * PML buffer already flushed at beginning of VMEXIT. Nothing to do
7024+ * here.., and there's no userspace involvement needed for PML.
7025+ */
7026+ return 1 ;
7027+ }
7028+
69747029/*
69757030 * The exit handlers return 1 if the exit was handled fully and guest execution
69767031 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -7019,6 +7074,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
70197074 [EXIT_REASON_INVVPID ] = handle_invvpid ,
70207075 [EXIT_REASON_XSAVES ] = handle_xsaves ,
70217076 [EXIT_REASON_XRSTORS ] = handle_xrstors ,
7077+ [EXIT_REASON_PML_FULL ] = handle_pml_full ,
70227078};
70237079
70247080static const int kvm_vmx_max_exit_handlers =
@@ -7325,6 +7381,89 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
73257381 * info2 = vmcs_read32 (VM_EXIT_INTR_INFO );
73267382}
73277383
7384+ static int vmx_enable_pml (struct vcpu_vmx * vmx )
7385+ {
7386+ struct page * pml_pg ;
7387+ u32 exec_control ;
7388+
7389+ pml_pg = alloc_page (GFP_KERNEL | __GFP_ZERO );
7390+ if (!pml_pg )
7391+ return - ENOMEM ;
7392+
7393+ vmx -> pml_pg = pml_pg ;
7394+
7395+ vmcs_write64 (PML_ADDRESS , page_to_phys (vmx -> pml_pg ));
7396+ vmcs_write16 (GUEST_PML_INDEX , PML_ENTITY_NUM - 1 );
7397+
7398+ exec_control = vmcs_read32 (SECONDARY_VM_EXEC_CONTROL );
7399+ exec_control |= SECONDARY_EXEC_ENABLE_PML ;
7400+ vmcs_write32 (SECONDARY_VM_EXEC_CONTROL , exec_control );
7401+
7402+ return 0 ;
7403+ }
7404+
7405+ static void vmx_disable_pml (struct vcpu_vmx * vmx )
7406+ {
7407+ u32 exec_control ;
7408+
7409+ ASSERT (vmx -> pml_pg );
7410+ __free_page (vmx -> pml_pg );
7411+ vmx -> pml_pg = NULL ;
7412+
7413+ exec_control = vmcs_read32 (SECONDARY_VM_EXEC_CONTROL );
7414+ exec_control &= ~SECONDARY_EXEC_ENABLE_PML ;
7415+ vmcs_write32 (SECONDARY_VM_EXEC_CONTROL , exec_control );
7416+ }
7417+
7418+ static void vmx_flush_pml_buffer (struct vcpu_vmx * vmx )
7419+ {
7420+ struct kvm * kvm = vmx -> vcpu .kvm ;
7421+ u64 * pml_buf ;
7422+ u16 pml_idx ;
7423+
7424+ pml_idx = vmcs_read16 (GUEST_PML_INDEX );
7425+
7426+ /* Do nothing if PML buffer is empty */
7427+ if (pml_idx == (PML_ENTITY_NUM - 1 ))
7428+ return ;
7429+
7430+ /* PML index always points to next available PML buffer entity */
7431+ if (pml_idx >= PML_ENTITY_NUM )
7432+ pml_idx = 0 ;
7433+ else
7434+ pml_idx ++ ;
7435+
7436+ pml_buf = page_address (vmx -> pml_pg );
7437+ for (; pml_idx < PML_ENTITY_NUM ; pml_idx ++ ) {
7438+ u64 gpa ;
7439+
7440+ gpa = pml_buf [pml_idx ];
7441+ WARN_ON (gpa & (PAGE_SIZE - 1 ));
7442+ mark_page_dirty (kvm , gpa >> PAGE_SHIFT );
7443+ }
7444+
7445+ /* reset PML index */
7446+ vmcs_write16 (GUEST_PML_INDEX , PML_ENTITY_NUM - 1 );
7447+ }
7448+
7449+ /*
7450+ * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
7451+ * Called before reporting dirty_bitmap to userspace.
7452+ */
7453+ static void kvm_flush_pml_buffers (struct kvm * kvm )
7454+ {
7455+ int i ;
7456+ struct kvm_vcpu * vcpu ;
7457+ /*
7458+ * We only need to kick vcpu out of guest mode here, as PML buffer
7459+ * is flushed at beginning of all VMEXITs, and it's obvious that only
7460+ * vcpus running in guest are possible to have unflushed GPAs in PML
7461+ * buffer.
7462+ */
7463+ kvm_for_each_vcpu (i , vcpu , kvm )
7464+ kvm_vcpu_kick (vcpu );
7465+ }
7466+
73287467/*
73297468 * The guest has exited. See if we can fix it or if we need userspace
73307469 * assistance.
@@ -7335,6 +7474,16 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
73357474 u32 exit_reason = vmx -> exit_reason ;
73367475 u32 vectoring_info = vmx -> idt_vectoring_info ;
73377476
7477+ /*
7478+ * Flush logged GPAs PML buffer, this will make dirty_bitmap more
7479+ * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
7480+ * querying dirty_bitmap, we only need to kick all vcpus out of guest
7481+ * mode as if vcpus is in root mode, the PML buffer must has been
7482+ * flushed already.
7483+ */
7484+ if (enable_pml )
7485+ vmx_flush_pml_buffer (vmx );
7486+
73387487 /* If guest state is invalid, start emulating */
73397488 if (vmx -> emulation_required )
73407489 return handle_invalid_guest_state (vcpu );
@@ -7981,6 +8130,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
79818130{
79828131 struct vcpu_vmx * vmx = to_vmx (vcpu );
79838132
8133+ if (enable_pml )
8134+ vmx_disable_pml (vmx );
79848135 free_vpid (vmx );
79858136 leave_guest_mode (vcpu );
79868137 vmx_load_vmcs01 (vcpu );
@@ -8051,6 +8202,18 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
80518202 vmx -> nested .current_vmptr = -1ull ;
80528203 vmx -> nested .current_vmcs12 = NULL ;
80538204
8205+ /*
8206+ * If PML is turned on, failure on enabling PML just results in failure
8207+ * of creating the vcpu, therefore we can simplify PML logic (by
8208+ * avoiding dealing with cases, such as enabling PML partially on vcpus
8209+ * for the guest, etc.
8210+ */
8211+ if (enable_pml ) {
8212+ err = vmx_enable_pml (vmx );
8213+ if (err )
8214+ goto free_vmcs ;
8215+ }
8216+
80548217 return & vmx -> vcpu ;
80558218
80568219free_vmcs :
@@ -9492,6 +9655,31 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
94929655 shrink_ple_window (vcpu );
94939656}
94949657
9658+ static void vmx_slot_enable_log_dirty (struct kvm * kvm ,
9659+ struct kvm_memory_slot * slot )
9660+ {
9661+ kvm_mmu_slot_leaf_clear_dirty (kvm , slot );
9662+ kvm_mmu_slot_largepage_remove_write_access (kvm , slot );
9663+ }
9664+
9665+ static void vmx_slot_disable_log_dirty (struct kvm * kvm ,
9666+ struct kvm_memory_slot * slot )
9667+ {
9668+ kvm_mmu_slot_set_dirty (kvm , slot );
9669+ }
9670+
9671+ static void vmx_flush_log_dirty (struct kvm * kvm )
9672+ {
9673+ kvm_flush_pml_buffers (kvm );
9674+ }
9675+
9676+ static void vmx_enable_log_dirty_pt_masked (struct kvm * kvm ,
9677+ struct kvm_memory_slot * memslot ,
9678+ gfn_t offset , unsigned long mask )
9679+ {
9680+ kvm_mmu_clear_dirty_pt_masked (kvm , memslot , offset , mask );
9681+ }
9682+
94959683static struct kvm_x86_ops vmx_x86_ops = {
94969684 .cpu_has_kvm_support = cpu_has_kvm_support ,
94979685 .disabled_by_bios = vmx_disabled_by_bios ,
@@ -9601,6 +9789,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
96019789 .check_nested_events = vmx_check_nested_events ,
96029790
96039791 .sched_in = vmx_sched_in ,
9792+
9793+ .slot_enable_log_dirty = vmx_slot_enable_log_dirty ,
9794+ .slot_disable_log_dirty = vmx_slot_disable_log_dirty ,
9795+ .flush_log_dirty = vmx_flush_log_dirty ,
9796+ .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked ,
96049797};
96059798
96069799static int __init vmx_init (void )
0 commit comments