Skip to content

Commit 843e433

Browse files
Kai Huangbonzini
authored andcommitted
KVM: VMX: Add PML support in VMX
This patch adds PML support in VMX. A new module parameter 'enable_pml' is added to allow user to enable/disable it manually. Signed-off-by: Kai Huang <[email protected]> Reviewed-by: Xiao Guangrong <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent 88178fd commit 843e433

File tree

5 files changed

+218
-1
lines changed

5 files changed

+218
-1
lines changed

arch/x86/include/asm/vmx.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
7070
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
7171
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
72+
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
7273
#define SECONDARY_EXEC_XSAVES 0x00100000
7374

7475

@@ -121,6 +122,7 @@ enum vmcs_field {
121122
GUEST_LDTR_SELECTOR = 0x0000080c,
122123
GUEST_TR_SELECTOR = 0x0000080e,
123124
GUEST_INTR_STATUS = 0x00000810,
125+
GUEST_PML_INDEX = 0x00000812,
124126
HOST_ES_SELECTOR = 0x00000c00,
125127
HOST_CS_SELECTOR = 0x00000c02,
126128
HOST_SS_SELECTOR = 0x00000c04,
@@ -140,6 +142,8 @@ enum vmcs_field {
140142
VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
141143
VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
142144
VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
145+
PML_ADDRESS = 0x0000200e,
146+
PML_ADDRESS_HIGH = 0x0000200f,
143147
TSC_OFFSET = 0x00002010,
144148
TSC_OFFSET_HIGH = 0x00002011,
145149
VIRTUAL_APIC_PAGE_ADDR = 0x00002012,

arch/x86/include/uapi/asm/vmx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
#define EXIT_REASON_XSETBV 55
7474
#define EXIT_REASON_APIC_WRITE 56
7575
#define EXIT_REASON_INVPCID 58
76+
#define EXIT_REASON_PML_FULL 62
7677
#define EXIT_REASON_XSAVES 63
7778
#define EXIT_REASON_XRSTORS 64
7879

arch/x86/kvm/trace.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,24 @@ TRACE_EVENT(kvm_track_tsc,
848848

849849
#endif /* CONFIG_X86_64 */
850850

851+
/*
852+
* Tracepoint for PML full VMEXIT.
853+
*/
854+
TRACE_EVENT(kvm_pml_full,
855+
TP_PROTO(unsigned int vcpu_id),
856+
TP_ARGS(vcpu_id),
857+
858+
TP_STRUCT__entry(
859+
__field( unsigned int, vcpu_id )
860+
),
861+
862+
TP_fast_assign(
863+
__entry->vcpu_id = vcpu_id;
864+
),
865+
866+
TP_printk("vcpu %d: PML full", __entry->vcpu_id)
867+
);
868+
851869
TRACE_EVENT(kvm_ple_window,
852870
TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
853871
TP_ARGS(grow, vcpu_id, new, old),

arch/x86/kvm/vmx.c

Lines changed: 194 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ module_param(nested, bool, S_IRUGO);
101101

102102
static u64 __read_mostly host_xss;
103103

104+
static bool __read_mostly enable_pml = 1;
105+
module_param_named(pml, enable_pml, bool, S_IRUGO);
106+
104107
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
105108
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
106109
#define KVM_VM_CR0_ALWAYS_ON \
@@ -516,6 +519,10 @@ struct vcpu_vmx {
516519
/* Dynamic PLE window. */
517520
int ple_window;
518521
bool ple_window_dirty;
522+
523+
/* Support for PML */
524+
#define PML_ENTITY_NUM 512
525+
struct page *pml_pg;
519526
};
520527

521528
enum segment_cache_field {
@@ -1068,6 +1075,11 @@ static inline bool cpu_has_vmx_shadow_vmcs(void)
10681075
SECONDARY_EXEC_SHADOW_VMCS;
10691076
}
10701077

1078+
static inline bool cpu_has_vmx_pml(void)
1079+
{
1080+
return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
1081+
}
1082+
10711083
static inline bool report_flexpriority(void)
10721084
{
10731085
return flexpriority_enabled;
@@ -2924,7 +2936,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
29242936
SECONDARY_EXEC_APIC_REGISTER_VIRT |
29252937
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
29262938
SECONDARY_EXEC_SHADOW_VMCS |
2927-
SECONDARY_EXEC_XSAVES;
2939+
SECONDARY_EXEC_XSAVES |
2940+
SECONDARY_EXEC_ENABLE_PML;
29282941
if (adjust_vmx_controls(min2, opt2,
29292942
MSR_IA32_VMX_PROCBASED_CTLS2,
29302943
&_cpu_based_2nd_exec_control) < 0)
@@ -4355,6 +4368,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
43554368
a current VMCS12
43564369
*/
43574370
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
4371+
/* PML is enabled/disabled in creating/destorying vcpu */
4372+
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
4373+
43584374
return exec_control;
43594375
}
43604376

@@ -5942,6 +5958,20 @@ static __init int hardware_setup(void)
59425958

59435959
update_ple_window_actual_max();
59445960

5961+
/*
5962+
* Only enable PML when hardware supports PML feature, and both EPT
5963+
* and EPT A/D bit features are enabled -- PML depends on them to work.
5964+
*/
5965+
if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
5966+
enable_pml = 0;
5967+
5968+
if (!enable_pml) {
5969+
kvm_x86_ops->slot_enable_log_dirty = NULL;
5970+
kvm_x86_ops->slot_disable_log_dirty = NULL;
5971+
kvm_x86_ops->flush_log_dirty = NULL;
5972+
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
5973+
}
5974+
59455975
return alloc_kvm_area();
59465976

59475977
out7:
@@ -6971,6 +7001,31 @@ static bool vmx_test_pir(struct kvm_vcpu *vcpu, int vector)
69717001
return pi_test_pir(vector, &vmx->pi_desc);
69727002
}
69737003

7004+
static int handle_pml_full(struct kvm_vcpu *vcpu)
7005+
{
7006+
unsigned long exit_qualification;
7007+
7008+
trace_kvm_pml_full(vcpu->vcpu_id);
7009+
7010+
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7011+
7012+
/*
7013+
* PML buffer FULL happened while executing iret from NMI,
7014+
* "blocked by NMI" bit has to be set before next VM entry.
7015+
*/
7016+
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
7017+
cpu_has_virtual_nmis() &&
7018+
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
7019+
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
7020+
GUEST_INTR_STATE_NMI);
7021+
7022+
/*
7023+
* PML buffer already flushed at beginning of VMEXIT. Nothing to do
7024+
* here.., and there's no userspace involvement needed for PML.
7025+
*/
7026+
return 1;
7027+
}
7028+
69747029
/*
69757030
* The exit handlers return 1 if the exit was handled fully and guest execution
69767031
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -7019,6 +7074,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
70197074
[EXIT_REASON_INVVPID] = handle_invvpid,
70207075
[EXIT_REASON_XSAVES] = handle_xsaves,
70217076
[EXIT_REASON_XRSTORS] = handle_xrstors,
7077+
[EXIT_REASON_PML_FULL] = handle_pml_full,
70227078
};
70237079

70247080
static const int kvm_vmx_max_exit_handlers =
@@ -7325,6 +7381,89 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
73257381
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
73267382
}
73277383

7384+
static int vmx_enable_pml(struct vcpu_vmx *vmx)
7385+
{
7386+
struct page *pml_pg;
7387+
u32 exec_control;
7388+
7389+
pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
7390+
if (!pml_pg)
7391+
return -ENOMEM;
7392+
7393+
vmx->pml_pg = pml_pg;
7394+
7395+
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
7396+
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
7397+
7398+
exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
7399+
exec_control |= SECONDARY_EXEC_ENABLE_PML;
7400+
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
7401+
7402+
return 0;
7403+
}
7404+
7405+
static void vmx_disable_pml(struct vcpu_vmx *vmx)
7406+
{
7407+
u32 exec_control;
7408+
7409+
ASSERT(vmx->pml_pg);
7410+
__free_page(vmx->pml_pg);
7411+
vmx->pml_pg = NULL;
7412+
7413+
exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
7414+
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
7415+
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
7416+
}
7417+
7418+
static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
7419+
{
7420+
struct kvm *kvm = vmx->vcpu.kvm;
7421+
u64 *pml_buf;
7422+
u16 pml_idx;
7423+
7424+
pml_idx = vmcs_read16(GUEST_PML_INDEX);
7425+
7426+
/* Do nothing if PML buffer is empty */
7427+
if (pml_idx == (PML_ENTITY_NUM - 1))
7428+
return;
7429+
7430+
/* PML index always points to next available PML buffer entity */
7431+
if (pml_idx >= PML_ENTITY_NUM)
7432+
pml_idx = 0;
7433+
else
7434+
pml_idx++;
7435+
7436+
pml_buf = page_address(vmx->pml_pg);
7437+
for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
7438+
u64 gpa;
7439+
7440+
gpa = pml_buf[pml_idx];
7441+
WARN_ON(gpa & (PAGE_SIZE - 1));
7442+
mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
7443+
}
7444+
7445+
/* reset PML index */
7446+
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
7447+
}
7448+
7449+
/*
7450+
* Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
7451+
* Called before reporting dirty_bitmap to userspace.
7452+
*/
7453+
static void kvm_flush_pml_buffers(struct kvm *kvm)
7454+
{
7455+
int i;
7456+
struct kvm_vcpu *vcpu;
7457+
/*
7458+
* We only need to kick vcpu out of guest mode here, as PML buffer
7459+
* is flushed at beginning of all VMEXITs, and it's obvious that only
7460+
* vcpus running in guest are possible to have unflushed GPAs in PML
7461+
* buffer.
7462+
*/
7463+
kvm_for_each_vcpu(i, vcpu, kvm)
7464+
kvm_vcpu_kick(vcpu);
7465+
}
7466+
73287467
/*
73297468
* The guest has exited. See if we can fix it or if we need userspace
73307469
* assistance.
@@ -7335,6 +7474,16 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
73357474
u32 exit_reason = vmx->exit_reason;
73367475
u32 vectoring_info = vmx->idt_vectoring_info;
73377476

7477+
/*
7478+
* Flush logged GPAs PML buffer, this will make dirty_bitmap more
7479+
* updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
7480+
* querying dirty_bitmap, we only need to kick all vcpus out of guest
7481+
* mode as if vcpus is in root mode, the PML buffer must has been
7482+
* flushed already.
7483+
*/
7484+
if (enable_pml)
7485+
vmx_flush_pml_buffer(vmx);
7486+
73387487
/* If guest state is invalid, start emulating */
73397488
if (vmx->emulation_required)
73407489
return handle_invalid_guest_state(vcpu);
@@ -7981,6 +8130,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
79818130
{
79828131
struct vcpu_vmx *vmx = to_vmx(vcpu);
79838132

8133+
if (enable_pml)
8134+
vmx_disable_pml(vmx);
79848135
free_vpid(vmx);
79858136
leave_guest_mode(vcpu);
79868137
vmx_load_vmcs01(vcpu);
@@ -8051,6 +8202,18 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
80518202
vmx->nested.current_vmptr = -1ull;
80528203
vmx->nested.current_vmcs12 = NULL;
80538204

8205+
/*
8206+
* If PML is turned on, failure on enabling PML just results in failure
8207+
* of creating the vcpu, therefore we can simplify PML logic (by
8208+
* avoiding dealing with cases, such as enabling PML partially on vcpus
8209+
* for the guest, etc.
8210+
*/
8211+
if (enable_pml) {
8212+
err = vmx_enable_pml(vmx);
8213+
if (err)
8214+
goto free_vmcs;
8215+
}
8216+
80548217
return &vmx->vcpu;
80558218

80568219
free_vmcs:
@@ -9492,6 +9655,31 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
94929655
shrink_ple_window(vcpu);
94939656
}
94949657

9658+
static void vmx_slot_enable_log_dirty(struct kvm *kvm,
9659+
struct kvm_memory_slot *slot)
9660+
{
9661+
kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
9662+
kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
9663+
}
9664+
9665+
static void vmx_slot_disable_log_dirty(struct kvm *kvm,
9666+
struct kvm_memory_slot *slot)
9667+
{
9668+
kvm_mmu_slot_set_dirty(kvm, slot);
9669+
}
9670+
9671+
static void vmx_flush_log_dirty(struct kvm *kvm)
9672+
{
9673+
kvm_flush_pml_buffers(kvm);
9674+
}
9675+
9676+
static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
9677+
struct kvm_memory_slot *memslot,
9678+
gfn_t offset, unsigned long mask)
9679+
{
9680+
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
9681+
}
9682+
94959683
static struct kvm_x86_ops vmx_x86_ops = {
94969684
.cpu_has_kvm_support = cpu_has_kvm_support,
94979685
.disabled_by_bios = vmx_disabled_by_bios,
@@ -9601,6 +9789,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
96019789
.check_nested_events = vmx_check_nested_events,
96029790

96039791
.sched_in = vmx_sched_in,
9792+
9793+
.slot_enable_log_dirty = vmx_slot_enable_log_dirty,
9794+
.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
9795+
.flush_log_dirty = vmx_flush_log_dirty,
9796+
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
96049797
};
96059798

96069799
static int __init vmx_init(void)

arch/x86/kvm/x86.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7880,3 +7880,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
78807880
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
78817881
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
78827882
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
7883+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);

0 commit comments

Comments
 (0)