@@ -124,6 +124,51 @@ static u64 get_cc_mask(void)
124124 return BIT_ULL (gpa_width - 1 );
125125}
126126
127+ /*
128+ * The TDX module spec states that #VE may be injected for a limited set of
129+ * reasons:
130+ *
131+ * - Emulation of the architectural #VE injection on EPT violation;
132+ *
133+ * - As a result of guest TD execution of a disallowed instruction,
134+ * a disallowed MSR access, or CPUID virtualization;
135+ *
136+ * - A notification to the guest TD about anomalous behavior;
137+ *
138+ * The last one is opt-in and is not used by the kernel.
139+ *
140+ * The Intel Software Developer's Manual describes cases when instruction
141+ * length field can be used in section "Information for VM Exits Due to
142+ * Instruction Execution".
143+ *
144+ * For TDX, it ultimately means GET_VEINFO provides reliable instruction length
145+ * information if #VE occurred due to instruction execution, but not for EPT
146+ * violations.
147+ */
148+ static int ve_instr_len (struct ve_info * ve )
149+ {
150+ switch (ve -> exit_reason ) {
151+ case EXIT_REASON_HLT :
152+ case EXIT_REASON_MSR_READ :
153+ case EXIT_REASON_MSR_WRITE :
154+ case EXIT_REASON_CPUID :
155+ case EXIT_REASON_IO_INSTRUCTION :
156+ /* It is safe to use ve->instr_len for #VE due instructions */
157+ return ve -> instr_len ;
158+ case EXIT_REASON_EPT_VIOLATION :
159+ /*
160+ * For EPT violations, ve->insn_len is not defined. For those,
161+ * the kernel must decode instructions manually and should not
162+ * be using this function.
163+ */
164+ WARN_ONCE (1 , "ve->instr_len is not defined for EPT violations" );
165+ return 0 ;
166+ default :
167+ WARN_ONCE (1 , "Unexpected #VE-type: %lld\n" , ve -> exit_reason );
168+ return ve -> instr_len ;
169+ }
170+ }
171+
127172static u64 __cpuidle __halt (const bool irq_disabled , const bool do_sti )
128173{
129174 struct tdx_hypercall_args args = {
@@ -147,7 +192,7 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
147192 return __tdx_hypercall (& args , do_sti ? TDX_HCALL_ISSUE_STI : 0 );
148193}
149194
150- static bool handle_halt (void )
195+ static int handle_halt (struct ve_info * ve )
151196{
152197 /*
153198 * Since non safe halt is mainly used in CPU offlining
@@ -158,9 +203,9 @@ static bool handle_halt(void)
158203 const bool do_sti = false;
159204
160205 if (__halt (irq_disabled , do_sti ))
161- return false ;
206+ return - EIO ;
162207
163- return true ;
208+ return ve_instr_len ( ve ) ;
164209}
165210
166211void __cpuidle tdx_safe_halt (void )
@@ -180,7 +225,7 @@ void __cpuidle tdx_safe_halt(void)
180225 WARN_ONCE (1 , "HLT instruction emulation failed\n" );
181226}
182227
183- static bool read_msr (struct pt_regs * regs )
228+ static int read_msr (struct pt_regs * regs , struct ve_info * ve )
184229{
185230 struct tdx_hypercall_args args = {
186231 .r10 = TDX_HYPERCALL_STANDARD ,
@@ -194,14 +239,14 @@ static bool read_msr(struct pt_regs *regs)
194239 * (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
195240 */
196241 if (__tdx_hypercall (& args , TDX_HCALL_HAS_OUTPUT ))
197- return false ;
242+ return - EIO ;
198243
199244 regs -> ax = lower_32_bits (args .r11 );
200245 regs -> dx = upper_32_bits (args .r11 );
201- return true ;
246+ return ve_instr_len ( ve ) ;
202247}
203248
204- static bool write_msr (struct pt_regs * regs )
249+ static int write_msr (struct pt_regs * regs , struct ve_info * ve )
205250{
206251 struct tdx_hypercall_args args = {
207252 .r10 = TDX_HYPERCALL_STANDARD ,
@@ -215,10 +260,13 @@ static bool write_msr(struct pt_regs *regs)
215260 * can be found in TDX Guest-Host-Communication Interface
216261 * (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
217262 */
218- return !__tdx_hypercall (& args , 0 );
263+ if (__tdx_hypercall (& args , 0 ))
264+ return - EIO ;
265+
266+ return ve_instr_len (ve );
219267}
220268
221- static bool handle_cpuid (struct pt_regs * regs )
269+ static int handle_cpuid (struct pt_regs * regs , struct ve_info * ve )
222270{
223271 struct tdx_hypercall_args args = {
224272 .r10 = TDX_HYPERCALL_STANDARD ,
@@ -236,7 +284,7 @@ static bool handle_cpuid(struct pt_regs *regs)
236284 */
237285 if (regs -> ax < 0x40000000 || regs -> ax > 0x4FFFFFFF ) {
238286 regs -> ax = regs -> bx = regs -> cx = regs -> dx = 0 ;
239- return true ;
287+ return ve_instr_len ( ve ) ;
240288 }
241289
242290 /*
@@ -245,7 +293,7 @@ static bool handle_cpuid(struct pt_regs *regs)
245293 * (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
246294 */
247295 if (__tdx_hypercall (& args , TDX_HCALL_HAS_OUTPUT ))
248- return false ;
296+ return - EIO ;
249297
250298 /*
251299 * As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
@@ -257,7 +305,7 @@ static bool handle_cpuid(struct pt_regs *regs)
257305 regs -> cx = args .r14 ;
258306 regs -> dx = args .r15 ;
259307
260- return true ;
308+ return ve_instr_len ( ve ) ;
261309}
262310
263311static bool mmio_read (int size , unsigned long addr , unsigned long * val )
@@ -283,7 +331,7 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val)
283331 EPT_WRITE , addr , val );
284332}
285333
286- static bool handle_mmio (struct pt_regs * regs , struct ve_info * ve )
334+ static int handle_mmio (struct pt_regs * regs , struct ve_info * ve )
287335{
288336 char buffer [MAX_INSN_SIZE ];
289337 unsigned long * reg , val ;
@@ -294,34 +342,36 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
294342
295343 /* Only in-kernel MMIO is supported */
296344 if (WARN_ON_ONCE (user_mode (regs )))
297- return false ;
345+ return - EFAULT ;
298346
299347 if (copy_from_kernel_nofault (buffer , (void * )regs -> ip , MAX_INSN_SIZE ))
300- return false ;
348+ return - EFAULT ;
301349
302350 if (insn_decode (& insn , buffer , MAX_INSN_SIZE , INSN_MODE_64 ))
303- return false ;
351+ return - EINVAL ;
304352
305353 mmio = insn_decode_mmio (& insn , & size );
306354 if (WARN_ON_ONCE (mmio == MMIO_DECODE_FAILED ))
307- return false ;
355+ return - EINVAL ;
308356
309357 if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS ) {
310358 reg = insn_get_modrm_reg_ptr (& insn , regs );
311359 if (!reg )
312- return false ;
360+ return - EINVAL ;
313361 }
314362
315- ve -> instr_len = insn .length ;
316-
317363 /* Handle writes first */
318364 switch (mmio ) {
319365 case MMIO_WRITE :
320366 memcpy (& val , reg , size );
321- return mmio_write (size , ve -> gpa , val );
367+ if (!mmio_write (size , ve -> gpa , val ))
368+ return - EIO ;
369+ return insn .length ;
322370 case MMIO_WRITE_IMM :
323371 val = insn .immediate .value ;
324- return mmio_write (size , ve -> gpa , val );
372+ if (!mmio_write (size , ve -> gpa , val ))
373+ return - EIO ;
374+ return insn .length ;
325375 case MMIO_READ :
326376 case MMIO_READ_ZERO_EXTEND :
327377 case MMIO_READ_SIGN_EXTEND :
@@ -334,15 +384,15 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
334384 * decoded or handled properly. It was likely not using io.h
335385 * helpers or accessed MMIO accidentally.
336386 */
337- return false ;
387+ return - EINVAL ;
338388 default :
339389 WARN_ONCE (1 , "Unknown insn_decode_mmio() decode value?" );
340- return false ;
390+ return - EINVAL ;
341391 }
342392
343393 /* Handle reads */
344394 if (!mmio_read (size , ve -> gpa , & val ))
345- return false ;
395+ return - EIO ;
346396
347397 switch (mmio ) {
348398 case MMIO_READ :
@@ -364,13 +414,13 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
364414 default :
365415 /* All other cases has to be covered with the first switch() */
366416 WARN_ON_ONCE (1 );
367- return false ;
417+ return - EINVAL ;
368418 }
369419
370420 if (extend_size )
371421 memset (reg , extend_val , extend_size );
372422 memcpy (reg , & val , size );
373- return true ;
423+ return insn . length ;
374424}
375425
376426static bool handle_in (struct pt_regs * regs , int size , int port )
@@ -421,23 +471,28 @@ static bool handle_out(struct pt_regs *regs, int size, int port)
421471 *
422472 * Return True on success or False on failure.
423473 */
424- static bool handle_io (struct pt_regs * regs , u32 exit_qual )
474+ static int handle_io (struct pt_regs * regs , struct ve_info * ve )
425475{
476+ u32 exit_qual = ve -> exit_qual ;
426477 int size , port ;
427- bool in ;
478+ bool in , ret ;
428479
429480 if (VE_IS_IO_STRING (exit_qual ))
430- return false ;
481+ return - EIO ;
431482
432483 in = VE_IS_IO_IN (exit_qual );
433484 size = VE_GET_IO_SIZE (exit_qual );
434485 port = VE_GET_PORT_NUM (exit_qual );
435486
436487
437488 if (in )
438- return handle_in (regs , size , port );
489+ ret = handle_in (regs , size , port );
439490 else
440- return handle_out (regs , size , port );
491+ ret = handle_out (regs , size , port );
492+ if (!ret )
493+ return - EIO ;
494+
495+ return ve_instr_len (ve );
441496}
442497
443498/*
@@ -447,17 +502,19 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
447502__init bool tdx_early_handle_ve (struct pt_regs * regs )
448503{
449504 struct ve_info ve ;
450- bool ret ;
505+ int insn_len ;
451506
452507 tdx_get_ve_info (& ve );
453508
454509 if (ve .exit_reason != EXIT_REASON_IO_INSTRUCTION )
455510 return false;
456511
457- ret = handle_io (regs , ve .exit_qual );
458- if (ret )
459- regs -> ip += ve .instr_len ;
460- return ret ;
512+ insn_len = handle_io (regs , & ve );
513+ if (insn_len < 0 )
514+ return false;
515+
516+ regs -> ip += insn_len ;
517+ return true;
461518}
462519
463520void tdx_get_ve_info (struct ve_info * ve )
@@ -490,54 +547,65 @@ void tdx_get_ve_info(struct ve_info *ve)
490547 ve -> instr_info = upper_32_bits (out .r10 );
491548}
492549
493- /* Handle the user initiated #VE */
494- static bool virt_exception_user (struct pt_regs * regs , struct ve_info * ve )
550+ /*
551+ * Handle the user initiated #VE.
552+ *
553+ * On success, returns the number of bytes RIP should be incremented (>=0)
554+ * or -errno on error.
555+ */
556+ static int virt_exception_user (struct pt_regs * regs , struct ve_info * ve )
495557{
496558 switch (ve -> exit_reason ) {
497559 case EXIT_REASON_CPUID :
498- return handle_cpuid (regs );
560+ return handle_cpuid (regs , ve );
499561 default :
500562 pr_warn ("Unexpected #VE: %lld\n" , ve -> exit_reason );
501- return false ;
563+ return - EIO ;
502564 }
503565}
504566
505- /* Handle the kernel #VE */
506- static bool virt_exception_kernel (struct pt_regs * regs , struct ve_info * ve )
567+ /*
568+ * Handle the kernel #VE.
569+ *
570+ * On success, returns the number of bytes RIP should be incremented (>=0)
571+ * or -errno on error.
572+ */
573+ static int virt_exception_kernel (struct pt_regs * regs , struct ve_info * ve )
507574{
508575 switch (ve -> exit_reason ) {
509576 case EXIT_REASON_HLT :
510- return handle_halt ();
577+ return handle_halt (ve );
511578 case EXIT_REASON_MSR_READ :
512- return read_msr (regs );
579+ return read_msr (regs , ve );
513580 case EXIT_REASON_MSR_WRITE :
514- return write_msr (regs );
581+ return write_msr (regs , ve );
515582 case EXIT_REASON_CPUID :
516- return handle_cpuid (regs );
583+ return handle_cpuid (regs , ve );
517584 case EXIT_REASON_EPT_VIOLATION :
518585 return handle_mmio (regs , ve );
519586 case EXIT_REASON_IO_INSTRUCTION :
520- return handle_io (regs , ve -> exit_qual );
587+ return handle_io (regs , ve );
521588 default :
522589 pr_warn ("Unexpected #VE: %lld\n" , ve -> exit_reason );
523- return false ;
590+ return - EIO ;
524591 }
525592}
526593
527594bool tdx_handle_virt_exception (struct pt_regs * regs , struct ve_info * ve )
528595{
529- bool ret ;
596+ int insn_len ;
530597
531598 if (user_mode (regs ))
532- ret = virt_exception_user (regs , ve );
599+ insn_len = virt_exception_user (regs , ve );
533600 else
534- ret = virt_exception_kernel (regs , ve );
601+ insn_len = virt_exception_kernel (regs , ve );
602+ if (insn_len < 0 )
603+ return false;
535604
536605 /* After successful #VE handling, move the IP */
537- if (ret )
538- regs -> ip += ve -> instr_len ;
606+ regs -> ip += insn_len ;
539607
540- return ret ;
608+ return true ;
541609}
542610
543611static bool tdx_tlb_flush_required (bool private )
0 commit comments