Skip to content

Commit e0b7ec0

Browse files
paulusmackagraf
authored andcommitted
KVM: PPC: Book3S HV: Align physical and virtual CPU thread numbers
On a threaded processor such as POWER7, we group VCPUs into virtual cores and arrange that the VCPUs in a virtual core run on the same physical core. Currently we don't enforce any correspondence between virtual thread numbers within a virtual core and physical thread numbers. Physical threads are allocated starting at 0 on a first-come first-served basis to runnable virtual threads (VCPUs). POWER8 implements a new "msgsndp" instruction which guest kernels can use to interrupt other threads in the same core or sub-core. Since the instruction takes the destination physical thread ID as a parameter, it becomes necessary to align the physical thread IDs with the virtual thread IDs, that is, to make sure virtual thread N within a virtual core always runs on physical thread N. This means that it's possible that thread 0, which is where we call __kvmppc_vcore_entry, may end up running some other vcpu than the one whose task called kvmppc_run_core(), or it may end up running no vcpu at all, if for example thread 0 of the virtual core is currently executing in userspace. However, we do need thread 0 to be responsible for switching the MMU -- a previous version of this patch that had other threads switching the MMU was found to be responsible for occasional memory corruption and machine check interrupts in the guest on POWER7 machines. To accommodate this, we no longer pass the vcpu pointer to __kvmppc_vcore_entry, but instead let the assembly code load it from the PACA. Since the assembly code will need to know the kvm pointer and the thread ID for threads which don't have a vcpu, we move the thread ID into the PACA and we add a kvm pointer to the virtual core structure. In the case where thread 0 has no vcpu to run, it still calls into kvmppc_hv_entry in order to do the MMU switch, and then naps until either its vcpu is ready to run in the guest, or some other thread needs to exit the guest. In the latter case, thread 0 jumps to the code that switches the MMU back to the host. This control flow means that now we switch the MMU before loading any guest vcpu state. Similarly, on guest exit we now save all the guest vcpu state before switching the MMU back to the host. This has required substantial code movement, making the diff rather large. Signed-off-by: Paul Mackerras <[email protected]> Signed-off-by: Alexander Graf <[email protected]>
1 parent eee7ff9 commit e0b7ec0

File tree

6 files changed

+397
-337
lines changed

6 files changed

+397
-337
lines changed

arch/powerpc/include/asm/kvm_book3s_asm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ struct kvmppc_host_state {
8787
u8 hwthread_req;
8888
u8 hwthread_state;
8989
u8 host_ipi;
90+
u8 ptid;
9091
struct kvm_vcpu *kvm_vcpu;
9192
struct kvmppc_vcore *kvm_vcore;
9293
unsigned long xics_phys;

arch/powerpc/include/asm/kvm_host.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ struct kvmppc_vcore {
288288
int n_woken;
289289
int nap_count;
290290
int napping_threads;
291+
int first_vcpuid;
291292
u16 pcpu;
292293
u16 last_cpu;
293294
u8 vcore_state;
@@ -298,6 +299,7 @@ struct kvmppc_vcore {
298299
u64 stolen_tb;
299300
u64 preempt_tb;
300301
struct kvm_vcpu *runner;
302+
struct kvm *kvm;
301303
u64 tb_offset; /* guest timebase - host timebase */
302304
ulong lpcr;
303305
u32 arch_compat;

arch/powerpc/kernel/asm-offsets.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,14 +506,14 @@ int main(void)
506506
DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
507507
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
508508
DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
509-
DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
510509
DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
511510
DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
512511
DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
513512
DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
514513
DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
515514
DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
516515
DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
516+
DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
517517
DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
518518
DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
519519
DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
@@ -583,6 +583,7 @@ int main(void)
583583
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
584584
HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
585585
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
586+
HSTATE_FIELD(HSTATE_PTID, ptid);
586587
HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
587588
HSTATE_FIELD(HSTATE_PMC, host_pmc);
588589
HSTATE_FIELD(HSTATE_PURR, host_purr);

arch/powerpc/kvm/book3s_hv.c

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
990990
init_waitqueue_head(&vcore->wq);
991991
vcore->preempt_tb = TB_NIL;
992992
vcore->lpcr = kvm->arch.lpcr;
993+
vcore->first_vcpuid = core * threads_per_core;
994+
vcore->kvm = kvm;
993995
}
994996
kvm->arch.vcores[core] = vcore;
995997
kvm->arch.online_vcores++;
@@ -1003,6 +1005,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
10031005
++vcore->num_threads;
10041006
spin_unlock(&vcore->lock);
10051007
vcpu->arch.vcore = vcore;
1008+
vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
10061009

10071010
vcpu->arch.cpu_type = KVM_CPU_3S_64;
10081011
kvmppc_sanity_check(vcpu);
@@ -1066,7 +1069,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
10661069
}
10671070
}
10681071

1069-
extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
1072+
extern void __kvmppc_vcore_entry(void);
10701073

10711074
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
10721075
struct kvm_vcpu *vcpu)
@@ -1140,15 +1143,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
11401143
tpaca = &paca[cpu];
11411144
tpaca->kvm_hstate.kvm_vcpu = vcpu;
11421145
tpaca->kvm_hstate.kvm_vcore = vc;
1143-
tpaca->kvm_hstate.napping = 0;
1146+
tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
11441147
vcpu->cpu = vc->pcpu;
11451148
smp_wmb();
11461149
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
1147-
if (vcpu->arch.ptid) {
1150+
if (cpu != smp_processor_id()) {
11481151
#ifdef CONFIG_KVM_XICS
11491152
xics_wake_cpu(cpu);
11501153
#endif
1151-
++vc->n_woken;
1154+
if (vcpu->arch.ptid)
1155+
++vc->n_woken;
11521156
}
11531157
#endif
11541158
}
@@ -1205,10 +1209,10 @@ static int on_primary_thread(void)
12051209
*/
12061210
static void kvmppc_run_core(struct kvmppc_vcore *vc)
12071211
{
1208-
struct kvm_vcpu *vcpu, *vcpu0, *vnext;
1212+
struct kvm_vcpu *vcpu, *vnext;
12091213
long ret;
12101214
u64 now;
1211-
int ptid, i, need_vpa_update;
1215+
int i, need_vpa_update;
12121216
int srcu_idx;
12131217
struct kvm_vcpu *vcpus_to_update[threads_per_core];
12141218

@@ -1245,25 +1249,6 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
12451249
spin_lock(&vc->lock);
12461250
}
12471251

1248-
/*
1249-
* Assign physical thread IDs, first to non-ceded vcpus
1250-
* and then to ceded ones.
1251-
*/
1252-
ptid = 0;
1253-
vcpu0 = NULL;
1254-
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
1255-
if (!vcpu->arch.ceded) {
1256-
if (!ptid)
1257-
vcpu0 = vcpu;
1258-
vcpu->arch.ptid = ptid++;
1259-
}
1260-
}
1261-
if (!vcpu0)
1262-
goto out; /* nothing to run; should never happen */
1263-
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
1264-
if (vcpu->arch.ceded)
1265-
vcpu->arch.ptid = ptid++;
1266-
12671252
/*
12681253
* Make sure we are running on thread 0, and that
12691254
* secondary threads are offline.
@@ -1280,15 +1265,19 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
12801265
kvmppc_create_dtl_entry(vcpu, vc);
12811266
}
12821267

1268+
/* Set this explicitly in case thread 0 doesn't have a vcpu */
1269+
get_paca()->kvm_hstate.kvm_vcore = vc;
1270+
get_paca()->kvm_hstate.ptid = 0;
1271+
12831272
vc->vcore_state = VCORE_RUNNING;
12841273
preempt_disable();
12851274
spin_unlock(&vc->lock);
12861275

12871276
kvm_guest_enter();
12881277

1289-
srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
1278+
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
12901279

1291-
__kvmppc_vcore_entry(NULL, vcpu0);
1280+
__kvmppc_vcore_entry();
12921281

12931282
spin_lock(&vc->lock);
12941283
/* disable sending of IPIs on virtual external irqs */
@@ -1303,7 +1292,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
13031292
vc->vcore_state = VCORE_EXITING;
13041293
spin_unlock(&vc->lock);
13051294

1306-
srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx);
1295+
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
13071296

13081297
/* make sure updates to secondary vcpu structs are visible now */
13091298
smp_mb();
@@ -1411,7 +1400,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
14111400
if (!signal_pending(current)) {
14121401
if (vc->vcore_state == VCORE_RUNNING &&
14131402
VCORE_EXIT_COUNT(vc) == 0) {
1414-
vcpu->arch.ptid = vc->n_runnable - 1;
14151403
kvmppc_create_dtl_entry(vcpu, vc);
14161404
kvmppc_start_thread(vcpu);
14171405
} else if (vc->vcore_state == VCORE_SLEEPING) {

arch/powerpc/kvm/book3s_hv_interrupts.S

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
****************************************************************************/
3636

3737
/* Registers:
38-
* r4: vcpu pointer
38+
* none
3939
*/
4040
_GLOBAL(__kvmppc_vcore_entry)
4141

@@ -71,7 +71,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
7171
mtmsrd r10,1
7272

7373
/* Save host PMU registers */
74-
/* R4 is live here (vcpu pointer) but not r3 or r5 */
7574
li r3, 1
7675
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
7776
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
@@ -136,16 +135,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
136135
* enters the guest with interrupts enabled.
137136
*/
138137
BEGIN_FTR_SECTION
138+
ld r4, HSTATE_KVM_VCPU(r13)
139139
ld r0, VCPU_PENDING_EXC(r4)
140140
li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
141141
oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
142142
and. r0, r0, r7
143143
beq 32f
144-
mr r31, r4
145144
lhz r3, PACAPACAINDEX(r13)
146145
bl smp_send_reschedule
147146
nop
148-
mr r4, r31
149147
32:
150148
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
151149
#endif /* CONFIG_SMP */

0 commit comments

Comments
 (0)