@@ -124,6 +124,51 @@ static u64 get_cc_mask(void)
124124 return BIT_ULL (gpa_width - 1 );
125125}
126126
127+ /*
128+ * The TDX module spec states that #VE may be injected for a limited set of
129+ * reasons:
130+ *
131+ * - Emulation of the architectural #VE injection on EPT violation;
132+ *
133+ * - As a result of guest TD execution of a disallowed instruction,
134+ * a disallowed MSR access, or CPUID virtualization;
135+ *
136+ * - A notification to the guest TD about anomalous behavior;
137+ *
138+ * The last one is opt-in and is not used by the kernel.
139+ *
140+ * The Intel Software Developer's Manual describes cases when instruction
141+ * length field can be used in section "Information for VM Exits Due to
142+ * Instruction Execution".
143+ *
144+ * For TDX, it ultimately means GET_VEINFO provides reliable instruction length
145+ * information if #VE occurred due to instruction execution, but not for EPT
146+ * violations.
147+ */
148+ static int ve_instr_len (struct ve_info * ve )
149+ {
150+ switch (ve -> exit_reason ) {
151+ case EXIT_REASON_HLT :
152+ case EXIT_REASON_MSR_READ :
153+ case EXIT_REASON_MSR_WRITE :
154+ case EXIT_REASON_CPUID :
155+ case EXIT_REASON_IO_INSTRUCTION :
156+ /* It is safe to use ve->instr_len for #VE due instructions */
157+ return ve -> instr_len ;
158+ case EXIT_REASON_EPT_VIOLATION :
159+ /*
160+ * For EPT violations, ve->insn_len is not defined. For those,
161+ * the kernel must decode instructions manually and should not
162+ * be using this function.
163+ */
164+ WARN_ONCE (1 , "ve->instr_len is not defined for EPT violations" );
165+ return 0 ;
166+ default :
167+ WARN_ONCE (1 , "Unexpected #VE-type: %lld\n" , ve -> exit_reason );
168+ return ve -> instr_len ;
169+ }
170+ }
171+
127172static u64 __cpuidle __halt (const bool irq_disabled , const bool do_sti )
128173{
129174 struct tdx_hypercall_args args = {
@@ -147,7 +192,7 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
147192 return __tdx_hypercall (& args , do_sti ? TDX_HCALL_ISSUE_STI : 0 );
148193}
149194
150- static bool handle_halt (void )
195+ static int handle_halt (struct ve_info * ve )
151196{
152197 /*
153198 * Since non safe halt is mainly used in CPU offlining
@@ -158,9 +203,9 @@ static bool handle_halt(void)
158203 const bool do_sti = false;
159204
160205 if (__halt (irq_disabled , do_sti ))
161- return false ;
206+ return - EIO ;
162207
163- return true ;
208+ return ve_instr_len ( ve ) ;
164209}
165210
166211void __cpuidle tdx_safe_halt (void )
@@ -180,7 +225,7 @@ void __cpuidle tdx_safe_halt(void)
180225 WARN_ONCE (1 , "HLT instruction emulation failed\n" );
181226}
182227
183- static bool read_msr (struct pt_regs * regs )
228+ static int read_msr (struct pt_regs * regs , struct ve_info * ve )
184229{
185230 struct tdx_hypercall_args args = {
186231 .r10 = TDX_HYPERCALL_STANDARD ,
@@ -194,14 +239,14 @@ static bool read_msr(struct pt_regs *regs)
194239 * (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
195240 */
196241 if (__tdx_hypercall (& args , TDX_HCALL_HAS_OUTPUT ))
197- return false ;
242+ return - EIO ;
198243
199244 regs -> ax = lower_32_bits (args .r11 );
200245 regs -> dx = upper_32_bits (args .r11 );
201- return true ;
246+ return ve_instr_len ( ve ) ;
202247}
203248
204- static bool write_msr (struct pt_regs * regs )
249+ static int write_msr (struct pt_regs * regs , struct ve_info * ve )
205250{
206251 struct tdx_hypercall_args args = {
207252 .r10 = TDX_HYPERCALL_STANDARD ,
@@ -215,10 +260,13 @@ static bool write_msr(struct pt_regs *regs)
215260 * can be found in TDX Guest-Host-Communication Interface
216261 * (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
217262 */
218- return !__tdx_hypercall (& args , 0 );
263+ if (__tdx_hypercall (& args , 0 ))
264+ return - EIO ;
265+
266+ return ve_instr_len (ve );
219267}
220268
221- static bool handle_cpuid (struct pt_regs * regs )
269+ static int handle_cpuid (struct pt_regs * regs , struct ve_info * ve )
222270{
223271 struct tdx_hypercall_args args = {
224272 .r10 = TDX_HYPERCALL_STANDARD ,
@@ -236,7 +284,7 @@ static bool handle_cpuid(struct pt_regs *regs)
236284 */
237285 if (regs -> ax < 0x40000000 || regs -> ax > 0x4FFFFFFF ) {
238286 regs -> ax = regs -> bx = regs -> cx = regs -> dx = 0 ;
239- return true ;
287+ return ve_instr_len ( ve ) ;
240288 }
241289
242290 /*
@@ -245,7 +293,7 @@ static bool handle_cpuid(struct pt_regs *regs)
245293 * (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
246294 */
247295 if (__tdx_hypercall (& args , TDX_HCALL_HAS_OUTPUT ))
248- return false ;
296+ return - EIO ;
249297
250298 /*
251299 * As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
@@ -257,7 +305,7 @@ static bool handle_cpuid(struct pt_regs *regs)
257305 regs -> cx = args .r14 ;
258306 regs -> dx = args .r15 ;
259307
260- return true ;
308+ return ve_instr_len ( ve ) ;
261309}
262310
263311static bool mmio_read (int size , unsigned long addr , unsigned long * val )
@@ -283,45 +331,60 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val)
283331 EPT_WRITE , addr , val );
284332}
285333
286- static bool handle_mmio (struct pt_regs * regs , struct ve_info * ve )
334+ static int handle_mmio (struct pt_regs * regs , struct ve_info * ve )
287335{
336+ unsigned long * reg , val , vaddr ;
288337 char buffer [MAX_INSN_SIZE ];
289- unsigned long * reg , val ;
290338 struct insn insn = {};
291339 enum mmio_type mmio ;
292340 int size , extend_size ;
293341 u8 extend_val = 0 ;
294342
295343 /* Only in-kernel MMIO is supported */
296344 if (WARN_ON_ONCE (user_mode (regs )))
297- return false ;
345+ return - EFAULT ;
298346
299347 if (copy_from_kernel_nofault (buffer , (void * )regs -> ip , MAX_INSN_SIZE ))
300- return false ;
348+ return - EFAULT ;
301349
302350 if (insn_decode (& insn , buffer , MAX_INSN_SIZE , INSN_MODE_64 ))
303- return false ;
351+ return - EINVAL ;
304352
305353 mmio = insn_decode_mmio (& insn , & size );
306354 if (WARN_ON_ONCE (mmio == MMIO_DECODE_FAILED ))
307- return false ;
355+ return - EINVAL ;
308356
309357 if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS ) {
310358 reg = insn_get_modrm_reg_ptr (& insn , regs );
311359 if (!reg )
312- return false ;
360+ return - EINVAL ;
313361 }
314362
315- ve -> instr_len = insn .length ;
363+ /*
364+ * Reject EPT violation #VEs that split pages.
365+ *
366+ * MMIO accesses are supposed to be naturally aligned and therefore
367+ * never cross page boundaries. Seeing split page accesses indicates
368+ * a bug or a load_unaligned_zeropad() that stepped into an MMIO page.
369+ *
370+ * load_unaligned_zeropad() will recover using exception fixups.
371+ */
372+ vaddr = (unsigned long )insn_get_addr_ref (& insn , regs );
373+ if (vaddr / PAGE_SIZE != (vaddr + size - 1 ) / PAGE_SIZE )
374+ return - EFAULT ;
316375
317376 /* Handle writes first */
318377 switch (mmio ) {
319378 case MMIO_WRITE :
320379 memcpy (& val , reg , size );
321- return mmio_write (size , ve -> gpa , val );
380+ if (!mmio_write (size , ve -> gpa , val ))
381+ return - EIO ;
382+ return insn .length ;
322383 case MMIO_WRITE_IMM :
323384 val = insn .immediate .value ;
324- return mmio_write (size , ve -> gpa , val );
385+ if (!mmio_write (size , ve -> gpa , val ))
386+ return - EIO ;
387+ return insn .length ;
325388 case MMIO_READ :
326389 case MMIO_READ_ZERO_EXTEND :
327390 case MMIO_READ_SIGN_EXTEND :
@@ -334,15 +397,15 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
334397 * decoded or handled properly. It was likely not using io.h
335398 * helpers or accessed MMIO accidentally.
336399 */
337- return false ;
400+ return - EINVAL ;
338401 default :
339402 WARN_ONCE (1 , "Unknown insn_decode_mmio() decode value?" );
340- return false ;
403+ return - EINVAL ;
341404 }
342405
343406 /* Handle reads */
344407 if (!mmio_read (size , ve -> gpa , & val ))
345- return false ;
408+ return - EIO ;
346409
347410 switch (mmio ) {
348411 case MMIO_READ :
@@ -364,13 +427,13 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
364427 default :
365428 /* All other cases has to be covered with the first switch() */
366429 WARN_ON_ONCE (1 );
367- return false ;
430+ return - EINVAL ;
368431 }
369432
370433 if (extend_size )
371434 memset (reg , extend_val , extend_size );
372435 memcpy (reg , & val , size );
373- return true ;
436+ return insn . length ;
374437}
375438
376439static bool handle_in (struct pt_regs * regs , int size , int port )
@@ -421,23 +484,28 @@ static bool handle_out(struct pt_regs *regs, int size, int port)
421484 *
422485 * Return True on success or False on failure.
423486 */
424- static bool handle_io (struct pt_regs * regs , u32 exit_qual )
487+ static int handle_io (struct pt_regs * regs , struct ve_info * ve )
425488{
489+ u32 exit_qual = ve -> exit_qual ;
426490 int size , port ;
427- bool in ;
491+ bool in , ret ;
428492
429493 if (VE_IS_IO_STRING (exit_qual ))
430- return false ;
494+ return - EIO ;
431495
432496 in = VE_IS_IO_IN (exit_qual );
433497 size = VE_GET_IO_SIZE (exit_qual );
434498 port = VE_GET_PORT_NUM (exit_qual );
435499
436500
437501 if (in )
438- return handle_in (regs , size , port );
502+ ret = handle_in (regs , size , port );
439503 else
440- return handle_out (regs , size , port );
504+ ret = handle_out (regs , size , port );
505+ if (!ret )
506+ return - EIO ;
507+
508+ return ve_instr_len (ve );
441509}
442510
443511/*
@@ -447,13 +515,19 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
447515__init bool tdx_early_handle_ve (struct pt_regs * regs )
448516{
449517 struct ve_info ve ;
518+ int insn_len ;
450519
451520 tdx_get_ve_info (& ve );
452521
453522 if (ve .exit_reason != EXIT_REASON_IO_INSTRUCTION )
454523 return false;
455524
456- return handle_io (regs , ve .exit_qual );
525+ insn_len = handle_io (regs , & ve );
526+ if (insn_len < 0 )
527+ return false;
528+
529+ regs -> ip += insn_len ;
530+ return true;
457531}
458532
459533void tdx_get_ve_info (struct ve_info * ve )
@@ -486,54 +560,65 @@ void tdx_get_ve_info(struct ve_info *ve)
486560 ve -> instr_info = upper_32_bits (out .r10 );
487561}
488562
489- /* Handle the user initiated #VE */
490- static bool virt_exception_user (struct pt_regs * regs , struct ve_info * ve )
563+ /*
564+ * Handle the user initiated #VE.
565+ *
566+ * On success, returns the number of bytes RIP should be incremented (>=0)
567+ * or -errno on error.
568+ */
569+ static int virt_exception_user (struct pt_regs * regs , struct ve_info * ve )
491570{
492571 switch (ve -> exit_reason ) {
493572 case EXIT_REASON_CPUID :
494- return handle_cpuid (regs );
573+ return handle_cpuid (regs , ve );
495574 default :
496575 pr_warn ("Unexpected #VE: %lld\n" , ve -> exit_reason );
497- return false ;
576+ return - EIO ;
498577 }
499578}
500579
501- /* Handle the kernel #VE */
502- static bool virt_exception_kernel (struct pt_regs * regs , struct ve_info * ve )
580+ /*
581+ * Handle the kernel #VE.
582+ *
583+ * On success, returns the number of bytes RIP should be incremented (>=0)
584+ * or -errno on error.
585+ */
586+ static int virt_exception_kernel (struct pt_regs * regs , struct ve_info * ve )
503587{
504588 switch (ve -> exit_reason ) {
505589 case EXIT_REASON_HLT :
506- return handle_halt ();
590+ return handle_halt (ve );
507591 case EXIT_REASON_MSR_READ :
508- return read_msr (regs );
592+ return read_msr (regs , ve );
509593 case EXIT_REASON_MSR_WRITE :
510- return write_msr (regs );
594+ return write_msr (regs , ve );
511595 case EXIT_REASON_CPUID :
512- return handle_cpuid (regs );
596+ return handle_cpuid (regs , ve );
513597 case EXIT_REASON_EPT_VIOLATION :
514598 return handle_mmio (regs , ve );
515599 case EXIT_REASON_IO_INSTRUCTION :
516- return handle_io (regs , ve -> exit_qual );
600+ return handle_io (regs , ve );
517601 default :
518602 pr_warn ("Unexpected #VE: %lld\n" , ve -> exit_reason );
519- return false ;
603+ return - EIO ;
520604 }
521605}
522606
523607bool tdx_handle_virt_exception (struct pt_regs * regs , struct ve_info * ve )
524608{
525- bool ret ;
609+ int insn_len ;
526610
527611 if (user_mode (regs ))
528- ret = virt_exception_user (regs , ve );
612+ insn_len = virt_exception_user (regs , ve );
529613 else
530- ret = virt_exception_kernel (regs , ve );
614+ insn_len = virt_exception_kernel (regs , ve );
615+ if (insn_len < 0 )
616+ return false;
531617
532618 /* After successful #VE handling, move the IP */
533- if (ret )
534- regs -> ip += ve -> instr_len ;
619+ regs -> ip += insn_len ;
535620
536- return ret ;
621+ return true ;
537622}
538623
539624static bool tdx_tlb_flush_required (bool private )
0 commit comments