Skip to content

Commit 4180bf1

Browse files
Wanpeng Libonzini
authored andcommitted
KVM: X86: Implement "send IPI" hypercall
Using hypercall to send IPIs by one vmexit instead of one by one for xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster mode. Intel guest can enter x2apic cluster mode when interrupt remmaping is enabled in qemu, however, latest AMD EPYC still just supports xapic mode which can get great improvement by Exit-less IPIs. This patchset lets a guest send multicast IPIs, with at most 128 destinations per hypercall in 64-bit mode and 64 vCPUs per hypercall in 32-bit mode. Hardware: Xeon Skylake 2.5GHz, 2 sockets, 40 cores, 80 threads, the VM is 80 vCPUs, IPI microbenchmark(https://lkml.org/lkml/2017/12/19/141): x2apic cluster mode, vanilla Dry-run: 0, 2392199 ns Self-IPI: 6907514, 15027589 ns Normal IPI: 223910476, 251301666 ns Broadcast IPI: 0, 9282161150 ns Broadcast lock: 0, 8812934104 ns x2apic cluster mode, pv-ipi Dry-run: 0, 2449341 ns Self-IPI: 6720360, 15028732 ns Normal IPI: 228643307, 255708477 ns Broadcast IPI: 0, 7572293590 ns => 22% performance boost Broadcast lock: 0, 8316124651 ns x2apic physical mode, vanilla Dry-run: 0, 3135933 ns Self-IPI: 8572670, 17901757 ns Normal IPI: 226444334, 255421709 ns Broadcast IPI: 0, 19845070887 ns Broadcast lock: 0, 19827383656 ns x2apic physical mode, pv-ipi Dry-run: 0, 2446381 ns Self-IPI: 6788217, 15021056 ns Normal IPI: 219454441, 249583458 ns Broadcast IPI: 0, 7806540019 ns => 154% performance boost Broadcast lock: 0, 9143618799 ns Cc: Paolo Bonzini <[email protected]> Cc: Radim Krčmář <[email protected]> Cc: Vitaly Kuznetsov <[email protected]> Signed-off-by: Wanpeng Li <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent 74fec5b commit 4180bf1

File tree

7 files changed

+74
-1
lines changed

7 files changed

+74
-1
lines changed

Documentation/virtual/kvm/cpuid.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
6262
|| || can be enabled by setting bit 2
6363
|| || when writing to msr 0x4b564d02
6464
------------------------------------------------------------------------------
65+
KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit
66+
|| || before using paravirtualized
67+
|| || send IPIs.
68+
------------------------------------------------------------------------------
6569
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
6670
|| || per-cpu warps are expected in
6771
|| || kvmclock.

Documentation/virtual/kvm/hypercalls.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,23 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
121121

122122
Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
123123
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
124+
125+
6. KVM_HC_SEND_IPI
126+
------------------------
127+
Architecture: x86
128+
Status: active
129+
Purpose: Send IPIs to multiple vCPUs.
130+
131+
a0: lower part of the bitmap of destination APIC IDs
132+
a1: higher part of the bitmap of destination APIC IDs
133+
a2: the lowest APIC ID in bitmap
134+
a3: APIC ICR
135+
136+
The hypercall lets a guest send multicast IPIs, with at most 128
137+
128 destinations per hypercall in 64-bit mode and 64 vCPUs per
138+
hypercall in 32-bit mode. The destinations are represented by a
139+
bitmap contained in the first two arguments (a0 and a1). Bit 0 of
140+
a0 corresponds to the APIC ID in the third argument (a2), bit 1
141+
corresponds to the APIC ID a2+1, and so on.
142+
143+
Returns the number of CPUs to which the IPIs were delivered successfully.

arch/x86/include/asm/kvm_host.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,6 +1457,10 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
14571457
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
14581458
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
14591459

1460+
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
1461+
unsigned long ipi_bitmap_high, int min,
1462+
unsigned long icr, int op_64_bit);
1463+
14601464
void kvm_define_shared_msr(unsigned index, u32 msr);
14611465
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
14621466

arch/x86/kvm/cpuid.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
621621
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
622622
(1 << KVM_FEATURE_PV_UNHALT) |
623623
(1 << KVM_FEATURE_PV_TLB_FLUSH) |
624-
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
624+
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
625+
(1 << KVM_FEATURE_PV_SEND_IPI);
625626

626627
if (sched_info_on())
627628
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);

arch/x86/kvm/lapic.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,46 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
547547
irq->level, irq->trig_mode, dest_map);
548548
}
549549

550+
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
551+
unsigned long ipi_bitmap_high, int min,
552+
unsigned long icr, int op_64_bit)
553+
{
554+
int i;
555+
struct kvm_apic_map *map;
556+
struct kvm_vcpu *vcpu;
557+
struct kvm_lapic_irq irq = {0};
558+
int cluster_size = op_64_bit ? 64 : 32;
559+
int count = 0;
560+
561+
irq.vector = icr & APIC_VECTOR_MASK;
562+
irq.delivery_mode = icr & APIC_MODE_MASK;
563+
irq.level = (icr & APIC_INT_ASSERT) != 0;
564+
irq.trig_mode = icr & APIC_INT_LEVELTRIG;
565+
566+
if (icr & APIC_DEST_MASK)
567+
return -KVM_EINVAL;
568+
if (icr & APIC_SHORT_MASK)
569+
return -KVM_EINVAL;
570+
571+
rcu_read_lock();
572+
map = rcu_dereference(kvm->arch.apic_map);
573+
574+
/* Bits above cluster_size are masked in the caller. */
575+
for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
576+
vcpu = map->phys_map[min + i]->vcpu;
577+
count += kvm_apic_set_irq(vcpu, &irq, NULL);
578+
}
579+
580+
min += cluster_size;
581+
for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
582+
vcpu = map->phys_map[min + i]->vcpu;
583+
count += kvm_apic_set_irq(vcpu, &irq, NULL);
584+
}
585+
586+
rcu_read_unlock();
587+
return count;
588+
}
589+
550590
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
551591
{
552592

arch/x86/kvm/x86.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6802,6 +6802,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
68026802
case KVM_HC_CLOCK_PAIRING:
68036803
ret = kvm_pv_clock_pairing(vcpu, a0, a1);
68046804
break;
6805+
case KVM_HC_SEND_IPI:
6806+
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
6807+
break;
68056808
#endif
68066809
default:
68076810
ret = -KVM_ENOSYS;

include/uapi/linux/kvm_para.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
/* Return values for hypercalls */
1414
#define KVM_ENOSYS 1000
1515
#define KVM_EFAULT EFAULT
16+
#define KVM_EINVAL EINVAL
1617
#define KVM_E2BIG E2BIG
1718
#define KVM_EPERM EPERM
1819
#define KVM_EOPNOTSUPP 95

0 commit comments

Comments
 (0)