@@ -200,6 +200,7 @@ struct io_ring_ctx {
200200 struct io_uring_sqe * sq_sqes ;
201201
202202 struct list_head defer_list ;
203+ struct list_head timeout_list ;
203204 } ____cacheline_aligned_in_smp ;
204205
205206 /* IO offload */
@@ -216,6 +217,7 @@ struct io_ring_ctx {
216217 struct wait_queue_head cq_wait ;
217218 struct fasync_struct * cq_fasync ;
218219 struct eventfd_ctx * cq_ev_fd ;
220+ atomic_t cq_timeouts ;
219221 } ____cacheline_aligned_in_smp ;
220222
221223 struct io_rings * rings ;
@@ -283,6 +285,11 @@ struct io_poll_iocb {
283285 struct wait_queue_entry wait ;
284286};
285287
288+ struct io_timeout {
289+ struct file * file ;
290+ struct hrtimer timer ;
291+ };
292+
286293/*
287294 * NOTE! Each of the iocb union members has the file pointer
288295 * as the first entry in their struct definition. So you can
@@ -294,6 +301,7 @@ struct io_kiocb {
294301 struct file * file ;
295302 struct kiocb rw ;
296303 struct io_poll_iocb poll ;
304+ struct io_timeout timeout ;
297305 };
298306
299307 struct sqe_submit submit ;
@@ -313,6 +321,7 @@ struct io_kiocb {
313321#define REQ_F_LINK_DONE 128 /* linked sqes done */
314322#define REQ_F_FAIL_LINK 256 /* fail rest of links */
315323#define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */
324+ #define REQ_F_TIMEOUT 1024 /* timeout request */
316325 u64 user_data ;
317326 u32 result ;
318327 u32 sequence ;
@@ -344,6 +353,8 @@ struct io_submit_state {
344353};
345354
346355static void io_sq_wq_submit_work (struct work_struct * work );
356+ static void io_cqring_fill_event (struct io_ring_ctx * ctx , u64 ki_user_data ,
357+ long res );
347358static void __io_free_req (struct io_kiocb * req );
348359
349360static struct kmem_cache * req_cachep ;
@@ -400,26 +411,30 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
400411 INIT_LIST_HEAD (& ctx -> poll_list );
401412 INIT_LIST_HEAD (& ctx -> cancel_list );
402413 INIT_LIST_HEAD (& ctx -> defer_list );
414+ INIT_LIST_HEAD (& ctx -> timeout_list );
403415 return ctx ;
404416}
405417
406418static inline bool io_sequence_defer (struct io_ring_ctx * ctx ,
407419 struct io_kiocb * req )
408420{
409- if ((req -> flags & (REQ_F_IO_DRAIN |REQ_F_IO_DRAINED )) != REQ_F_IO_DRAIN )
421+ /* timeout requests always honor sequence */
422+ if (!(req -> flags & REQ_F_TIMEOUT ) &&
423+ (req -> flags & (REQ_F_IO_DRAIN |REQ_F_IO_DRAINED )) != REQ_F_IO_DRAIN )
410424 return false;
411425
412426 return req -> sequence != ctx -> cached_cq_tail + ctx -> rings -> sq_dropped ;
413427}
414428
415- static struct io_kiocb * io_get_deferred_req (struct io_ring_ctx * ctx )
429+ static struct io_kiocb * __io_get_deferred_req (struct io_ring_ctx * ctx ,
430+ struct list_head * list )
416431{
417432 struct io_kiocb * req ;
418433
419- if (list_empty (& ctx -> defer_list ))
434+ if (list_empty (list ))
420435 return NULL ;
421436
422- req = list_first_entry (& ctx -> defer_list , struct io_kiocb , list );
437+ req = list_first_entry (list , struct io_kiocb , list );
423438 if (!io_sequence_defer (ctx , req )) {
424439 list_del_init (& req -> list );
425440 return req ;
@@ -428,6 +443,16 @@ static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
428443 return NULL ;
429444}
430445
446+ static struct io_kiocb * io_get_deferred_req (struct io_ring_ctx * ctx )
447+ {
448+ return __io_get_deferred_req (ctx , & ctx -> defer_list );
449+ }
450+
451+ static struct io_kiocb * io_get_timeout_req (struct io_ring_ctx * ctx )
452+ {
453+ return __io_get_deferred_req (ctx , & ctx -> timeout_list );
454+ }
455+
431456static void __io_commit_cqring (struct io_ring_ctx * ctx )
432457{
433458 struct io_rings * rings = ctx -> rings ;
@@ -460,10 +485,36 @@ static inline void io_queue_async_work(struct io_ring_ctx *ctx,
460485 queue_work (ctx -> sqo_wq [rw ], & req -> work );
461486}
462487
488+ static void io_kill_timeout (struct io_kiocb * req )
489+ {
490+ int ret ;
491+
492+ ret = hrtimer_try_to_cancel (& req -> timeout .timer );
493+ if (ret != -1 ) {
494+ atomic_inc (& req -> ctx -> cq_timeouts );
495+ list_del (& req -> list );
496+ io_cqring_fill_event (req -> ctx , req -> user_data , 0 );
497+ __io_free_req (req );
498+ }
499+ }
500+
501+ static void io_kill_timeouts (struct io_ring_ctx * ctx )
502+ {
503+ struct io_kiocb * req , * tmp ;
504+
505+ spin_lock_irq (& ctx -> completion_lock );
506+ list_for_each_entry_safe (req , tmp , & ctx -> timeout_list , list )
507+ io_kill_timeout (req );
508+ spin_unlock_irq (& ctx -> completion_lock );
509+ }
510+
463511static void io_commit_cqring (struct io_ring_ctx * ctx )
464512{
465513 struct io_kiocb * req ;
466514
515+ while ((req = io_get_timeout_req (ctx )) != NULL )
516+ io_kill_timeout (req );
517+
467518 __io_commit_cqring (ctx );
468519
469520 while ((req = io_get_deferred_req (ctx )) != NULL ) {
@@ -1765,6 +1816,81 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
17651816 return ipt .error ;
17661817}
17671818
1819+ static enum hrtimer_restart io_timeout_fn (struct hrtimer * timer )
1820+ {
1821+ struct io_ring_ctx * ctx ;
1822+ struct io_kiocb * req ;
1823+ unsigned long flags ;
1824+
1825+ req = container_of (timer , struct io_kiocb , timeout .timer );
1826+ ctx = req -> ctx ;
1827+ atomic_inc (& ctx -> cq_timeouts );
1828+
1829+ spin_lock_irqsave (& ctx -> completion_lock , flags );
1830+ list_del (& req -> list );
1831+
1832+ io_cqring_fill_event (ctx , req -> user_data , - ETIME );
1833+ io_commit_cqring (ctx );
1834+ spin_unlock_irqrestore (& ctx -> completion_lock , flags );
1835+
1836+ io_cqring_ev_posted (ctx );
1837+
1838+ io_put_req (req );
1839+ return HRTIMER_NORESTART ;
1840+ }
1841+
1842+ static int io_timeout (struct io_kiocb * req , const struct io_uring_sqe * sqe )
1843+ {
1844+ unsigned count , req_dist , tail_index ;
1845+ struct io_ring_ctx * ctx = req -> ctx ;
1846+ struct list_head * entry ;
1847+ struct timespec ts ;
1848+
1849+ if (unlikely (ctx -> flags & IORING_SETUP_IOPOLL ))
1850+ return - EINVAL ;
1851+ if (sqe -> flags || sqe -> ioprio || sqe -> buf_index || sqe -> timeout_flags ||
1852+ sqe -> len != 1 )
1853+ return - EINVAL ;
1854+ if (copy_from_user (& ts , (void __user * ) (unsigned long ) sqe -> addr ,
1855+ sizeof (ts )))
1856+ return - EFAULT ;
1857+
1858+ /*
1859+ * sqe->off holds how many events that need to occur for this
1860+ * timeout event to be satisfied.
1861+ */
1862+ count = READ_ONCE (sqe -> off );
1863+ if (!count )
1864+ count = 1 ;
1865+
1866+ req -> sequence = ctx -> cached_sq_head + count - 1 ;
1867+ req -> flags |= REQ_F_TIMEOUT ;
1868+
1869+ /*
1870+ * Insertion sort, ensuring the first entry in the list is always
1871+ * the one we need first.
1872+ */
1873+ tail_index = ctx -> cached_cq_tail - ctx -> rings -> sq_dropped ;
1874+ req_dist = req -> sequence - tail_index ;
1875+ spin_lock_irq (& ctx -> completion_lock );
1876+ list_for_each_prev (entry , & ctx -> timeout_list ) {
1877+ struct io_kiocb * nxt = list_entry (entry , struct io_kiocb , list );
1878+ unsigned dist ;
1879+
1880+ dist = nxt -> sequence - tail_index ;
1881+ if (req_dist >= dist )
1882+ break ;
1883+ }
1884+ list_add (& req -> list , entry );
1885+ spin_unlock_irq (& ctx -> completion_lock );
1886+
1887+ hrtimer_init (& req -> timeout .timer , CLOCK_MONOTONIC , HRTIMER_MODE_REL );
1888+ req -> timeout .timer .function = io_timeout_fn ;
1889+ hrtimer_start (& req -> timeout .timer , timespec_to_ktime (ts ),
1890+ HRTIMER_MODE_REL );
1891+ return 0 ;
1892+ }
1893+
17681894static int io_req_defer (struct io_ring_ctx * ctx , struct io_kiocb * req ,
17691895 const struct io_uring_sqe * sqe )
17701896{
@@ -1842,6 +1968,9 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
18421968 case IORING_OP_RECVMSG :
18431969 ret = io_recvmsg (req , s -> sqe , force_nonblock );
18441970 break ;
1971+ case IORING_OP_TIMEOUT :
1972+ ret = io_timeout (req , s -> sqe );
1973+ break ;
18451974 default :
18461975 ret = - EINVAL ;
18471976 break ;
@@ -2599,6 +2728,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
25992728 const sigset_t __user * sig , size_t sigsz )
26002729{
26012730 struct io_rings * rings = ctx -> rings ;
2731+ unsigned nr_timeouts ;
26022732 int ret ;
26032733
26042734 if (io_cqring_events (rings ) >= min_events )
@@ -2617,7 +2747,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
26172747 return ret ;
26182748 }
26192749
2620- ret = wait_event_interruptible (ctx -> wait , io_cqring_events (rings ) >= min_events );
2750+ nr_timeouts = atomic_read (& ctx -> cq_timeouts );
2751+ /*
2752+ * Return if we have enough events, or if a timeout occured since
2753+ * we started waiting. For timeouts, we always want to return to
2754+ * userspace.
2755+ */
2756+ ret = wait_event_interruptible (ctx -> wait ,
2757+ io_cqring_events (rings ) >= min_events ||
2758+ atomic_read (& ctx -> cq_timeouts ) != nr_timeouts );
26212759 restore_saved_sigmask_unless (ret == - ERESTARTSYS );
26222760 if (ret == - ERESTARTSYS )
26232761 ret = - EINTR ;
@@ -3288,6 +3426,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
32883426 percpu_ref_kill (& ctx -> refs );
32893427 mutex_unlock (& ctx -> uring_lock );
32903428
3429+ io_kill_timeouts (ctx );
32913430 io_poll_remove_all (ctx );
32923431 io_iopoll_reap_events (ctx );
32933432 wait_for_completion (& ctx -> ctx_done );
0 commit comments