Skip to content

Commit 5262f56

Browse files
committed
io_uring: IORING_OP_TIMEOUT support
There's been a few requests for functionality similar to io_getevents() and epoll_wait(), where the user can specify a timeout for waiting on events. I deliberately did not add support for this through the system call initially to avoid overloading the args, but I can see that the use cases for this are valid. This adds support for IORING_OP_TIMEOUT. If a user wants to get woken when waiting for events, simply submit one of these timeout commands with your wait call (or before). This ensures that the application sleeping on the CQ ring waiting for events will get woken. The timeout command is passed in as a pointer to a struct timespec. Timeouts are relative. The timeout command also includes a way to auto-cancel after N events has passed. Signed-off-by: Jens Axboe <[email protected]>
1 parent 9831a90 commit 5262f56

File tree

2 files changed

+146
-5
lines changed

2 files changed

+146
-5
lines changed

fs/io_uring.c

Lines changed: 144 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ struct io_ring_ctx {
200200
struct io_uring_sqe *sq_sqes;
201201

202202
struct list_head defer_list;
203+
struct list_head timeout_list;
203204
} ____cacheline_aligned_in_smp;
204205

205206
/* IO offload */
@@ -216,6 +217,7 @@ struct io_ring_ctx {
216217
struct wait_queue_head cq_wait;
217218
struct fasync_struct *cq_fasync;
218219
struct eventfd_ctx *cq_ev_fd;
220+
atomic_t cq_timeouts;
219221
} ____cacheline_aligned_in_smp;
220222

221223
struct io_rings *rings;
@@ -283,6 +285,11 @@ struct io_poll_iocb {
283285
struct wait_queue_entry wait;
284286
};
285287

288+
struct io_timeout {
289+
struct file *file;
290+
struct hrtimer timer;
291+
};
292+
286293
/*
287294
* NOTE! Each of the iocb union members has the file pointer
288295
* as the first entry in their struct definition. So you can
@@ -294,6 +301,7 @@ struct io_kiocb {
294301
struct file *file;
295302
struct kiocb rw;
296303
struct io_poll_iocb poll;
304+
struct io_timeout timeout;
297305
};
298306

299307
struct sqe_submit submit;
@@ -313,6 +321,7 @@ struct io_kiocb {
313321
#define REQ_F_LINK_DONE 128 /* linked sqes done */
314322
#define REQ_F_FAIL_LINK 256 /* fail rest of links */
315323
#define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */
324+
#define REQ_F_TIMEOUT 1024 /* timeout request */
316325
u64 user_data;
317326
u32 result;
318327
u32 sequence;
@@ -344,6 +353,8 @@ struct io_submit_state {
344353
};
345354

346355
static void io_sq_wq_submit_work(struct work_struct *work);
356+
static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
357+
long res);
347358
static void __io_free_req(struct io_kiocb *req);
348359

349360
static struct kmem_cache *req_cachep;
@@ -400,26 +411,30 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
400411
INIT_LIST_HEAD(&ctx->poll_list);
401412
INIT_LIST_HEAD(&ctx->cancel_list);
402413
INIT_LIST_HEAD(&ctx->defer_list);
414+
INIT_LIST_HEAD(&ctx->timeout_list);
403415
return ctx;
404416
}
405417

406418
static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
407419
struct io_kiocb *req)
408420
{
409-
if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
421+
/* timeout requests always honor sequence */
422+
if (!(req->flags & REQ_F_TIMEOUT) &&
423+
(req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
410424
return false;
411425

412426
return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
413427
}
414428

415-
static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
429+
static struct io_kiocb *__io_get_deferred_req(struct io_ring_ctx *ctx,
430+
struct list_head *list)
416431
{
417432
struct io_kiocb *req;
418433

419-
if (list_empty(&ctx->defer_list))
434+
if (list_empty(list))
420435
return NULL;
421436

422-
req = list_first_entry(&ctx->defer_list, struct io_kiocb, list);
437+
req = list_first_entry(list, struct io_kiocb, list);
423438
if (!io_sequence_defer(ctx, req)) {
424439
list_del_init(&req->list);
425440
return req;
@@ -428,6 +443,16 @@ static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
428443
return NULL;
429444
}
430445

446+
static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
447+
{
448+
return __io_get_deferred_req(ctx, &ctx->defer_list);
449+
}
450+
451+
static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx)
452+
{
453+
return __io_get_deferred_req(ctx, &ctx->timeout_list);
454+
}
455+
431456
static void __io_commit_cqring(struct io_ring_ctx *ctx)
432457
{
433458
struct io_rings *rings = ctx->rings;
@@ -460,10 +485,36 @@ static inline void io_queue_async_work(struct io_ring_ctx *ctx,
460485
queue_work(ctx->sqo_wq[rw], &req->work);
461486
}
462487

488+
static void io_kill_timeout(struct io_kiocb *req)
489+
{
490+
int ret;
491+
492+
ret = hrtimer_try_to_cancel(&req->timeout.timer);
493+
if (ret != -1) {
494+
atomic_inc(&req->ctx->cq_timeouts);
495+
list_del(&req->list);
496+
io_cqring_fill_event(req->ctx, req->user_data, 0);
497+
__io_free_req(req);
498+
}
499+
}
500+
501+
static void io_kill_timeouts(struct io_ring_ctx *ctx)
502+
{
503+
struct io_kiocb *req, *tmp;
504+
505+
spin_lock_irq(&ctx->completion_lock);
506+
list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
507+
io_kill_timeout(req);
508+
spin_unlock_irq(&ctx->completion_lock);
509+
}
510+
463511
static void io_commit_cqring(struct io_ring_ctx *ctx)
464512
{
465513
struct io_kiocb *req;
466514

515+
while ((req = io_get_timeout_req(ctx)) != NULL)
516+
io_kill_timeout(req);
517+
467518
__io_commit_cqring(ctx);
468519

469520
while ((req = io_get_deferred_req(ctx)) != NULL) {
@@ -1765,6 +1816,81 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
17651816
return ipt.error;
17661817
}
17671818

1819+
static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
1820+
{
1821+
struct io_ring_ctx *ctx;
1822+
struct io_kiocb *req;
1823+
unsigned long flags;
1824+
1825+
req = container_of(timer, struct io_kiocb, timeout.timer);
1826+
ctx = req->ctx;
1827+
atomic_inc(&ctx->cq_timeouts);
1828+
1829+
spin_lock_irqsave(&ctx->completion_lock, flags);
1830+
list_del(&req->list);
1831+
1832+
io_cqring_fill_event(ctx, req->user_data, -ETIME);
1833+
io_commit_cqring(ctx);
1834+
spin_unlock_irqrestore(&ctx->completion_lock, flags);
1835+
1836+
io_cqring_ev_posted(ctx);
1837+
1838+
io_put_req(req);
1839+
return HRTIMER_NORESTART;
1840+
}
1841+
1842+
static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1843+
{
1844+
unsigned count, req_dist, tail_index;
1845+
struct io_ring_ctx *ctx = req->ctx;
1846+
struct list_head *entry;
1847+
struct timespec ts;
1848+
1849+
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
1850+
return -EINVAL;
1851+
if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->timeout_flags ||
1852+
sqe->len != 1)
1853+
return -EINVAL;
1854+
if (copy_from_user(&ts, (void __user *) (unsigned long) sqe->addr,
1855+
sizeof(ts)))
1856+
return -EFAULT;
1857+
1858+
/*
1859+
* sqe->off holds how many events that need to occur for this
1860+
* timeout event to be satisfied.
1861+
*/
1862+
count = READ_ONCE(sqe->off);
1863+
if (!count)
1864+
count = 1;
1865+
1866+
req->sequence = ctx->cached_sq_head + count - 1;
1867+
req->flags |= REQ_F_TIMEOUT;
1868+
1869+
/*
1870+
* Insertion sort, ensuring the first entry in the list is always
1871+
* the one we need first.
1872+
*/
1873+
tail_index = ctx->cached_cq_tail - ctx->rings->sq_dropped;
1874+
req_dist = req->sequence - tail_index;
1875+
spin_lock_irq(&ctx->completion_lock);
1876+
list_for_each_prev(entry, &ctx->timeout_list) {
1877+
struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
1878+
unsigned dist;
1879+
1880+
dist = nxt->sequence - tail_index;
1881+
if (req_dist >= dist)
1882+
break;
1883+
}
1884+
list_add(&req->list, entry);
1885+
spin_unlock_irq(&ctx->completion_lock);
1886+
1887+
hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1888+
req->timeout.timer.function = io_timeout_fn;
1889+
hrtimer_start(&req->timeout.timer, timespec_to_ktime(ts),
1890+
HRTIMER_MODE_REL);
1891+
return 0;
1892+
}
1893+
17681894
static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
17691895
const struct io_uring_sqe *sqe)
17701896
{
@@ -1842,6 +1968,9 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
18421968
case IORING_OP_RECVMSG:
18431969
ret = io_recvmsg(req, s->sqe, force_nonblock);
18441970
break;
1971+
case IORING_OP_TIMEOUT:
1972+
ret = io_timeout(req, s->sqe);
1973+
break;
18451974
default:
18461975
ret = -EINVAL;
18471976
break;
@@ -2599,6 +2728,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
25992728
const sigset_t __user *sig, size_t sigsz)
26002729
{
26012730
struct io_rings *rings = ctx->rings;
2731+
unsigned nr_timeouts;
26022732
int ret;
26032733

26042734
if (io_cqring_events(rings) >= min_events)
@@ -2617,7 +2747,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
26172747
return ret;
26182748
}
26192749

2620-
ret = wait_event_interruptible(ctx->wait, io_cqring_events(rings) >= min_events);
2750+
nr_timeouts = atomic_read(&ctx->cq_timeouts);
2751+
/*
2752+
* Return if we have enough events, or if a timeout occured since
2753+
* we started waiting. For timeouts, we always want to return to
2754+
* userspace.
2755+
*/
2756+
ret = wait_event_interruptible(ctx->wait,
2757+
io_cqring_events(rings) >= min_events ||
2758+
atomic_read(&ctx->cq_timeouts) != nr_timeouts);
26212759
restore_saved_sigmask_unless(ret == -ERESTARTSYS);
26222760
if (ret == -ERESTARTSYS)
26232761
ret = -EINTR;
@@ -3288,6 +3426,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
32883426
percpu_ref_kill(&ctx->refs);
32893427
mutex_unlock(&ctx->uring_lock);
32903428

3429+
io_kill_timeouts(ctx);
32913430
io_poll_remove_all(ctx);
32923431
io_iopoll_reap_events(ctx);
32933432
wait_for_completion(&ctx->ctx_done);

include/uapi/linux/io_uring.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ struct io_uring_sqe {
2828
__u16 poll_events;
2929
__u32 sync_range_flags;
3030
__u32 msg_flags;
31+
__u32 timeout_flags;
3132
};
3233
__u64 user_data; /* data to be passed back at completion time */
3334
union {
@@ -61,6 +62,7 @@ struct io_uring_sqe {
6162
#define IORING_OP_SYNC_FILE_RANGE 8
6263
#define IORING_OP_SENDMSG 9
6364
#define IORING_OP_RECVMSG 10
65+
#define IORING_OP_TIMEOUT 11
6466

6567
/*
6668
* sqe->fsync_flags

0 commit comments

Comments
 (0)