|
| 1 | +s390/virtio_ccw: Don't allocate/assign airqs for non-existing queues |
| 2 | + |
| 3 | +jira LE-3587 |
| 4 | +Rebuild_History Non-Buildable kernel-4.18.0-553.62.1.el8_10 |
| 5 | +commit-author David Hildenbrand < [email protected]> |
| 6 | +commit 2ccd42b959aaf490333dbd3b9b102eaf295c036a |
| 7 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 8 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 9 | +ciq/ciq_backports/kernel-4.18.0-553.62.1.el8_10/2ccd42b9.failed |
| 10 | + |
| 11 | +If we finds a vq without a name in our input array in |
| 12 | +virtio_ccw_find_vqs(), we treat it as "non-existing" and set the vq pointer |
| 13 | +to NULL; we will not call virtio_ccw_setup_vq() to allocate/setup a vq. |
| 14 | + |
| 15 | +Consequently, we create only a queue if it actually exists (name != NULL) |
| 16 | +and assign an incremental queue index to each such existing queue. |
| 17 | + |
| 18 | +However, in virtio_ccw_register_adapter_ind()->get_airq_indicator() we |
| 19 | +will not ignore these "non-existing queues", but instead assign an airq |
| 20 | +indicator to them. |
| 21 | + |
| 22 | +Besides never releasing them in virtio_ccw_drop_indicators() (because |
| 23 | +there is no virtqueue), the bigger issue seems to be that there will be a |
| 24 | +disagreement between the device and the Linux guest about the airq |
| 25 | +indicator to be used for notifying a queue, because the indicator bit |
| 26 | +for adapter I/O interrupt is derived from the queue index. |
| 27 | + |
| 28 | +The virtio spec states under "Setting Up Two-Stage Queue Indicators": |
| 29 | + |
| 30 | + ... indicator contains the guest address of an area wherein the |
| 31 | + indicators for the devices are contained, starting at bit_nr, one |
| 32 | + bit per virtqueue of the device. |
| 33 | + |
| 34 | +And further in "Notification via Adapter I/O Interrupts": |
| 35 | + |
| 36 | + For notifying the driver of virtqueue buffers, the device sets the |
| 37 | + bit in the guest-provided indicator area at the corresponding |
| 38 | + offset. |
| 39 | + |
| 40 | +For example, QEMU uses in virtio_ccw_notify() the queue index (passed as |
| 41 | +"vector") to select the relevant indicator bit. If a queue does not exist, |
| 42 | +it does not have a corresponding indicator bit assigned, because it |
| 43 | +effectively doesn't have a queue index. |
| 44 | + |
| 45 | +Using a virtio-balloon-ccw device under QEMU with free-page-hinting |
| 46 | +disabled ("free-page-hint=off") but free-page-reporting enabled |
| 47 | +("free-page-reporting=on") will result in free page reporting |
| 48 | +not working as expected: in the virtio_balloon driver, we'll be stuck |
| 49 | +forever in virtballoon_free_page_report()->wait_event(), because the |
| 50 | +waitqueue will not be woken up as the notification from the device is |
| 51 | +lost: it would use the wrong indicator bit. |
| 52 | + |
| 53 | +Free page reporting stops working and we get splats (when configured to |
| 54 | +detect hung wqs) like: |
| 55 | + |
| 56 | + INFO: task kworker/1:3:463 blocked for more than 61 seconds. |
| 57 | + Not tainted 6.14.0 #4 |
| 58 | + "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. |
| 59 | + task:kworker/1:3 [...] |
| 60 | + Workqueue: events page_reporting_process |
| 61 | + Call Trace: |
| 62 | + [<000002f404e6dfb2>] __schedule+0x402/0x1640 |
| 63 | + [<000002f404e6f22e>] schedule+0x3e/0xe0 |
| 64 | + [<000002f3846a88fa>] virtballoon_free_page_report+0xaa/0x110 [virtio_balloon] |
| 65 | + [<000002f40435c8a4>] page_reporting_process+0x2e4/0x740 |
| 66 | + [<000002f403fd3ee2>] process_one_work+0x1c2/0x400 |
| 67 | + [<000002f403fd4b96>] worker_thread+0x296/0x420 |
| 68 | + [<000002f403fe10b4>] kthread+0x124/0x290 |
| 69 | + [<000002f403f4e0dc>] __ret_from_fork+0x3c/0x60 |
| 70 | + [<000002f404e77272>] ret_from_fork+0xa/0x38 |
| 71 | + |
| 72 | +There was recently a discussion [1] whether the "holes" should be |
| 73 | +treated differently again, effectively assigning also non-existing |
| 74 | +queues a queue index: that should also fix the issue, but requires other |
| 75 | +workarounds to not break existing setups. |
| 76 | + |
| 77 | +Let's fix it without affecting existing setups for now by properly ignoring |
| 78 | +the non-existing queues, so the indicator bits will match the queue |
| 79 | +indexes. |
| 80 | + |
| 81 | +[1] https://lore.kernel.org/all/ [email protected]/ |
| 82 | + |
| 83 | +Fixes: a229989d975e ("virtio: don't allocate vqs when names[i] = NULL") |
| 84 | + Reported-by: Chandra Merla < [email protected]> |
| 85 | + |
| 86 | + Signed-off-by: David Hildenbrand < [email protected]> |
| 87 | + Tested-by: Thomas Huth < [email protected]> |
| 88 | + Reviewed-by: Thomas Huth < [email protected]> |
| 89 | + Reviewed-by: Cornelia Huck < [email protected]> |
| 90 | + Acked-by: Michael S. Tsirkin < [email protected]> |
| 91 | + Acked-by: Christian Borntraeger < [email protected]> |
| 92 | +Link: https://lore.kernel.org/r/ [email protected] |
| 93 | + Signed-off-by: Heiko Carstens < [email protected]> |
| 94 | +(cherry picked from commit 2ccd42b959aaf490333dbd3b9b102eaf295c036a) |
| 95 | + Signed-off-by: Jonathan Maple < [email protected]> |
| 96 | + |
| 97 | +# Conflicts: |
| 98 | +# drivers/s390/virtio/virtio_ccw.c |
| 99 | +diff --cc drivers/s390/virtio/virtio_ccw.c |
| 100 | +index eb609e021105,4904b831c0a7..000000000000 |
| 101 | +--- a/drivers/s390/virtio/virtio_ccw.c |
| 102 | ++++ b/drivers/s390/virtio/virtio_ccw.c |
| 103 | +@@@ -260,14 -299,20 +260,20 @@@ static struct airq_info *new_airq_info( |
| 104 | + return info; |
| 105 | + } |
| 106 | + |
| 107 | + -static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs, |
| 108 | + - u64 *first, void **airq_info) |
| 109 | + +static unsigned long get_airq_indicator(struct virtqueue *vqs[], int nvqs, |
| 110 | + + u64 *first, void **airq_info) |
| 111 | + { |
| 112 | +- int i, j; |
| 113 | ++ int i, j, queue_idx, highest_queue_idx = -1; |
| 114 | + struct airq_info *info; |
| 115 | + - unsigned long *indicator_addr = NULL; |
| 116 | + + unsigned long indicator_addr = 0; |
| 117 | + unsigned long bit, flags; |
| 118 | + |
| 119 | ++ /* Array entries without an actual queue pointer must be ignored. */ |
| 120 | ++ for (i = 0; i < nvqs; i++) { |
| 121 | ++ if (vqs[i]) |
| 122 | ++ highest_queue_idx++; |
| 123 | ++ } |
| 124 | ++ |
| 125 | + for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) { |
| 126 | + mutex_lock(&airq_areas_lock); |
| 127 | + if (!airq_areas[i]) |
| 128 | +@@@ -275,9 -320,9 +281,9 @@@ |
| 129 | + info = airq_areas[i]; |
| 130 | + mutex_unlock(&airq_areas_lock); |
| 131 | + if (!info) |
| 132 | + - return NULL; |
| 133 | + + return 0; |
| 134 | + write_lock_irqsave(&info->lock, flags); |
| 135 | +- bit = airq_iv_alloc(info->aiv, nvqs); |
| 136 | ++ bit = airq_iv_alloc(info->aiv, highest_queue_idx + 1); |
| 137 | + if (bit == -1UL) { |
| 138 | + /* Not enough vacancies. */ |
| 139 | + write_unlock_irqrestore(&info->lock, flags); |
| 140 | +@@@ -285,9 -330,11 +291,17 @@@ |
| 141 | + } |
| 142 | + *first = bit; |
| 143 | + *airq_info = info; |
| 144 | +++<<<<<<< HEAD |
| 145 | + + indicator_addr = (unsigned long)info->aiv->vector; |
| 146 | + + for (j = 0; j < nvqs; j++) { |
| 147 | + + airq_iv_set_ptr(info->aiv, bit + j, |
| 148 | +++======= |
| 149 | ++ indicator_addr = info->aiv->vector; |
| 150 | ++ for (j = 0, queue_idx = 0; j < nvqs; j++) { |
| 151 | ++ if (!vqs[j]) |
| 152 | ++ continue; |
| 153 | ++ airq_iv_set_ptr(info->aiv, bit + queue_idx++, |
| 154 | +++>>>>>>> 2ccd42b959aa (s390/virtio_ccw: Don't allocate/assign airqs for non-existing queues) |
| 155 | + (unsigned long)vqs[j]); |
| 156 | + } |
| 157 | + write_unlock_irqrestore(&info->lock, flags); |
| 158 | +* Unmerged path drivers/s390/virtio/virtio_ccw.c |
0 commit comments