Skip to content

Commit 18bceab

Browse files
committed
io_uring: allow POLL_ADD with double poll_wait() users
Some file descriptors use separate waitqueues for their f_ops->poll() handler, most commonly one for read and one for write. The io_uring poll implementation doesn't work with that, as the 2nd poll_wait() call will cause the io_uring poll request to -EINVAL. This affects (at least) tty devices and /dev/random as well. This is a big problem for event loops where some file descriptors work, and others don't. With this fix, io_uring handles multiple waitqueues. Signed-off-by: Jens Axboe <[email protected]>
1 parent 4a38aed commit 18bceab

File tree

1 file changed

+146
-72
lines changed

1 file changed

+146
-72
lines changed

fs/io_uring.c

Lines changed: 146 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -4099,27 +4099,6 @@ struct io_poll_table {
40994099
int error;
41004100
};
41014101

4102-
static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
4103-
struct wait_queue_head *head)
4104-
{
4105-
if (unlikely(poll->head)) {
4106-
pt->error = -EINVAL;
4107-
return;
4108-
}
4109-
4110-
pt->error = 0;
4111-
poll->head = head;
4112-
add_wait_queue(head, &poll->wait);
4113-
}
4114-
4115-
static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
4116-
struct poll_table_struct *p)
4117-
{
4118-
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
4119-
4120-
__io_queue_proc(&pt->req->apoll->poll, pt, head);
4121-
}
4122-
41234102
static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
41244103
__poll_t mask, task_work_func_t func)
41254104
{
@@ -4171,6 +4150,144 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
41714150
return false;
41724151
}
41734152

4153+
static void io_poll_remove_double(struct io_kiocb *req)
4154+
{
4155+
struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
4156+
4157+
lockdep_assert_held(&req->ctx->completion_lock);
4158+
4159+
if (poll && poll->head) {
4160+
struct wait_queue_head *head = poll->head;
4161+
4162+
spin_lock(&head->lock);
4163+
list_del_init(&poll->wait.entry);
4164+
if (poll->wait.private)
4165+
refcount_dec(&req->refs);
4166+
poll->head = NULL;
4167+
spin_unlock(&head->lock);
4168+
}
4169+
}
4170+
4171+
static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
4172+
{
4173+
struct io_ring_ctx *ctx = req->ctx;
4174+
4175+
io_poll_remove_double(req);
4176+
req->poll.done = true;
4177+
io_cqring_fill_event(req, error ? error : mangle_poll(mask));
4178+
io_commit_cqring(ctx);
4179+
}
4180+
4181+
static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
4182+
{
4183+
struct io_ring_ctx *ctx = req->ctx;
4184+
4185+
if (io_poll_rewait(req, &req->poll)) {
4186+
spin_unlock_irq(&ctx->completion_lock);
4187+
return;
4188+
}
4189+
4190+
hash_del(&req->hash_node);
4191+
io_poll_complete(req, req->result, 0);
4192+
req->flags |= REQ_F_COMP_LOCKED;
4193+
io_put_req_find_next(req, nxt);
4194+
spin_unlock_irq(&ctx->completion_lock);
4195+
4196+
io_cqring_ev_posted(ctx);
4197+
}
4198+
4199+
static void io_poll_task_func(struct callback_head *cb)
4200+
{
4201+
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
4202+
struct io_kiocb *nxt = NULL;
4203+
4204+
io_poll_task_handler(req, &nxt);
4205+
if (nxt) {
4206+
struct io_ring_ctx *ctx = nxt->ctx;
4207+
4208+
mutex_lock(&ctx->uring_lock);
4209+
__io_queue_sqe(nxt, NULL);
4210+
mutex_unlock(&ctx->uring_lock);
4211+
}
4212+
}
4213+
4214+
static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
4215+
int sync, void *key)
4216+
{
4217+
struct io_kiocb *req = wait->private;
4218+
struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
4219+
__poll_t mask = key_to_poll(key);
4220+
4221+
/* for instances that support it check for an event match first: */
4222+
if (mask && !(mask & poll->events))
4223+
return 0;
4224+
4225+
if (req->poll.head) {
4226+
bool done;
4227+
4228+
spin_lock(&req->poll.head->lock);
4229+
done = list_empty(&req->poll.wait.entry);
4230+
if (!done)
4231+
list_del_init(&req->poll.wait.entry);
4232+
spin_unlock(&req->poll.head->lock);
4233+
if (!done)
4234+
__io_async_wake(req, poll, mask, io_poll_task_func);
4235+
}
4236+
refcount_dec(&req->refs);
4237+
return 1;
4238+
}
4239+
4240+
static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
4241+
wait_queue_func_t wake_func)
4242+
{
4243+
poll->head = NULL;
4244+
poll->done = false;
4245+
poll->canceled = false;
4246+
poll->events = events;
4247+
INIT_LIST_HEAD(&poll->wait.entry);
4248+
init_waitqueue_func_entry(&poll->wait, wake_func);
4249+
}
4250+
4251+
static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
4252+
struct wait_queue_head *head)
4253+
{
4254+
struct io_kiocb *req = pt->req;
4255+
4256+
/*
4257+
* If poll->head is already set, it's because the file being polled
4258+
* uses multiple waitqueues for poll handling (eg one for read, one
4259+
* for write). Setup a separate io_poll_iocb if this happens.
4260+
*/
4261+
if (unlikely(poll->head)) {
4262+
/* already have a 2nd entry, fail a third attempt */
4263+
if (req->io) {
4264+
pt->error = -EINVAL;
4265+
return;
4266+
}
4267+
poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
4268+
if (!poll) {
4269+
pt->error = -ENOMEM;
4270+
return;
4271+
}
4272+
io_init_poll_iocb(poll, req->poll.events, io_poll_double_wake);
4273+
refcount_inc(&req->refs);
4274+
poll->wait.private = req;
4275+
req->io = (void *) poll;
4276+
}
4277+
4278+
pt->error = 0;
4279+
poll->head = head;
4280+
add_wait_queue(head, &poll->wait);
4281+
}
4282+
4283+
static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
4284+
struct poll_table_struct *p)
4285+
{
4286+
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
4287+
4288+
__io_queue_proc(&pt->req->apoll->poll, pt, head);
4289+
}
4290+
41744291
static void io_async_task_func(struct callback_head *cb)
41754292
{
41764293
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
@@ -4246,18 +4363,13 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
42464363
bool cancel = false;
42474364

42484365
poll->file = req->file;
4249-
poll->head = NULL;
4250-
poll->done = poll->canceled = false;
4251-
poll->events = mask;
4366+
io_init_poll_iocb(poll, mask, wake_func);
4367+
poll->wait.private = req;
42524368

42534369
ipt->pt._key = mask;
42544370
ipt->req = req;
42554371
ipt->error = -EINVAL;
42564372

4257-
INIT_LIST_HEAD(&poll->wait.entry);
4258-
init_waitqueue_func_entry(&poll->wait, wake_func);
4259-
poll->wait.private = req;
4260-
42614373
mask = vfs_poll(req->file, &ipt->pt) & poll->events;
42624374

42634375
spin_lock_irq(&ctx->completion_lock);
@@ -4288,6 +4400,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
42884400
struct async_poll *apoll;
42894401
struct io_poll_table ipt;
42904402
__poll_t mask, ret;
4403+
bool had_io;
42914404

42924405
if (!req->file || !file_can_poll(req->file))
42934406
return false;
@@ -4302,6 +4415,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
43024415

43034416
req->flags |= REQ_F_POLLED;
43044417
memcpy(&apoll->work, &req->work, sizeof(req->work));
4418+
had_io = req->io != NULL;
43054419

43064420
get_task_struct(current);
43074421
req->task = current;
@@ -4321,7 +4435,9 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
43214435
io_async_wake);
43224436
if (ret) {
43234437
ipt.error = 0;
4324-
apoll->poll.done = true;
4438+
/* only remove double add if we did it here */
4439+
if (!had_io)
4440+
io_poll_remove_double(req);
43254441
spin_unlock_irq(&ctx->completion_lock);
43264442
memcpy(&req->work, &apoll->work, sizeof(req->work));
43274443
kfree(apoll);
@@ -4354,6 +4470,7 @@ static bool io_poll_remove_one(struct io_kiocb *req)
43544470
bool do_complete;
43554471

43564472
if (req->opcode == IORING_OP_POLL_ADD) {
4473+
io_poll_remove_double(req);
43574474
do_complete = __io_poll_remove_one(req, &req->poll);
43584475
} else {
43594476
apoll = req->apoll;
@@ -4455,49 +4572,6 @@ static int io_poll_remove(struct io_kiocb *req)
44554572
return 0;
44564573
}
44574574

4458-
static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
4459-
{
4460-
struct io_ring_ctx *ctx = req->ctx;
4461-
4462-
req->poll.done = true;
4463-
io_cqring_fill_event(req, error ? error : mangle_poll(mask));
4464-
io_commit_cqring(ctx);
4465-
}
4466-
4467-
static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
4468-
{
4469-
struct io_ring_ctx *ctx = req->ctx;
4470-
struct io_poll_iocb *poll = &req->poll;
4471-
4472-
if (io_poll_rewait(req, poll)) {
4473-
spin_unlock_irq(&ctx->completion_lock);
4474-
return;
4475-
}
4476-
4477-
hash_del(&req->hash_node);
4478-
io_poll_complete(req, req->result, 0);
4479-
req->flags |= REQ_F_COMP_LOCKED;
4480-
io_put_req_find_next(req, nxt);
4481-
spin_unlock_irq(&ctx->completion_lock);
4482-
4483-
io_cqring_ev_posted(ctx);
4484-
}
4485-
4486-
static void io_poll_task_func(struct callback_head *cb)
4487-
{
4488-
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
4489-
struct io_kiocb *nxt = NULL;
4490-
4491-
io_poll_task_handler(req, &nxt);
4492-
if (nxt) {
4493-
struct io_ring_ctx *ctx = nxt->ctx;
4494-
4495-
mutex_lock(&ctx->uring_lock);
4496-
__io_queue_sqe(nxt, NULL);
4497-
mutex_unlock(&ctx->uring_lock);
4498-
}
4499-
}
4500-
45014575
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
45024576
void *key)
45034577
{

0 commit comments

Comments
 (0)