|
| 1 | +vfio: Follow a strict lifetime for struct iommu_group |
| 2 | + |
| 3 | +jira LE-1907 |
| 4 | +Rebuild_History Non-Buildable kernel-rt-5.14.0-284.30.1.rt14.315.el9_2 |
| 5 | +commit-author Jason Gunthorpe < [email protected]> |
| 6 | +commit ca5f21b2574903a7430fcb3590e534d92b2fa816 |
| 7 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 8 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 9 | +ciq/ciq_backports/kernel-rt-5.14.0-284.30.1.rt14.315.el9_2/ca5f21b2.failed |
| 10 | + |
| 11 | +The iommu_group comes from the struct device that a driver has been bound |
| 12 | +to and then created a struct vfio_device against. To keep the iommu layer |
| 13 | +sane we want to have a simple rule that only an attached driver should be |
| 14 | +using the iommu API. Particularly only an attached driver should hold |
| 15 | +ownership. |
| 16 | + |
| 17 | +In VFIO's case since it uses the group APIs and it shares between |
| 18 | +different drivers it is a bit more complicated, but the principle still |
| 19 | +holds. |
| 20 | + |
| 21 | +Solve this by waiting for all users of the vfio_group to stop before |
| 22 | +allowing vfio_unregister_group_dev() to complete. This is done with a new |
| 23 | +completion to know when the users go away and an additional refcount to |
| 24 | +keep track of how many device drivers are sharing the vfio group. The last |
| 25 | +driver to be unregistered will clean up the group. |
| 26 | + |
| 27 | +This solves crashes in the S390 iommu driver that come because VFIO ends |
| 28 | +up racing releasing ownership (which attaches the default iommu_domain to |
| 29 | +the device) with the removal of that same device from the iommu |
| 30 | +driver. This is a side case that iommu drivers should not have to cope |
| 31 | +with. |
| 32 | + |
| 33 | + iommu driver failed to attach the default/blocking domain |
| 34 | + WARNING: CPU: 0 PID: 5082 at drivers/iommu/iommu.c:1961 iommu_detach_group+0x6c/0x80 |
| 35 | + Modules linked in: macvtap macvlan tap vfio_pci vfio_pci_core irqbypass vfio_virqfd kvm nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables nfnetlink mlx5_ib sunrpc ib_uverbs ism smc uvdevice ib_core s390_trng eadm_sch tape_3590 tape tape_class vfio_ccw mdev vfio_iommu_type1 vfio zcrypt_cex4 sch_fq_codel configfs ghash_s390 prng chacha_s390 libchacha aes_s390 mlx5_core des_s390 libdes sha3_512_s390 nvme sha3_256_s390 sha512_s390 sha256_s390 sha1_s390 sha_common nvme_core zfcp scsi_transport_fc pkey zcrypt rng_core autofs4 |
| 36 | + CPU: 0 PID: 5082 Comm: qemu-system-s39 Tainted: G W 6.0.0-rc3 #5 |
| 37 | + Hardware name: IBM 3931 A01 782 (LPAR) |
| 38 | + Krnl PSW : 0704c00180000000 000000095bb10d28 (iommu_detach_group+0x70/0x80) |
| 39 | + R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 |
| 40 | + Krnl GPRS: 0000000000000001 0000000900000027 0000000000000039 000000095c97ffe0 |
| 41 | + 00000000fffeffff 00000009fc290000 00000000af1fda50 00000000af590b58 |
| 42 | + 00000000af1fdaf0 0000000135c7a320 0000000135e52258 0000000135e52200 |
| 43 | + 00000000a29e8000 00000000af590b40 000000095bb10d24 0000038004b13c98 |
| 44 | + Krnl Code: 000000095bb10d18: c020003d56fc larl %r2,000000095c2bbb10 |
| 45 | + 000000095bb10d1e: c0e50019d901 brasl %r14,000000095be4bf20 |
| 46 | + #000000095bb10d24: af000000 mc 0,0 |
| 47 | + >000000095bb10d28: b904002a lgr %r2,%r10 |
| 48 | + 000000095bb10d2c: ebaff0a00004 lmg %r10,%r15,160(%r15) |
| 49 | + 000000095bb10d32: c0f4001aa867 brcl 15,000000095be65e00 |
| 50 | + 000000095bb10d38: c004002168e0 brcl 0,000000095bf3def8 |
| 51 | + 000000095bb10d3e: eb6ff0480024 stmg %r6,%r15,72(%r15) |
| 52 | + Call Trace: |
| 53 | + [<000000095bb10d28>] iommu_detach_group+0x70/0x80 |
| 54 | + ([<000000095bb10d24>] iommu_detach_group+0x6c/0x80) |
| 55 | + [<000003ff80243b0e>] vfio_iommu_type1_detach_group+0x136/0x6c8 [vfio_iommu_type1] |
| 56 | + [<000003ff80137780>] __vfio_group_unset_container+0x58/0x158 [vfio] |
| 57 | + [<000003ff80138a16>] vfio_group_fops_unl_ioctl+0x1b6/0x210 [vfio] |
| 58 | + pci 0004:00:00.0: Removing from iommu group 4 |
| 59 | + [<000000095b5b62e8>] __s390x_sys_ioctl+0xc0/0x100 |
| 60 | + [<000000095be5d3b4>] __do_syscall+0x1d4/0x200 |
| 61 | + [<000000095be6c072>] system_call+0x82/0xb0 |
| 62 | + Last Breaking-Event-Address: |
| 63 | + [<000000095be4bf80>] __warn_printk+0x60/0x68 |
| 64 | + |
| 65 | +It indicates that domain->ops->attach_dev() failed because the driver has |
| 66 | +already passed the point of destructing the device. |
| 67 | + |
| 68 | +Fixes: 9ac8545199a1 ("iommu: Fix use-after-free in iommu_release_device") |
| 69 | + Reported-by: Matthew Rosato < [email protected]> |
| 70 | + Tested-by: Matthew Rosato < [email protected]> |
| 71 | + Reviewed-by: Yi Liu < [email protected]> |
| 72 | + Signed-off-by: Jason Gunthorpe < [email protected]> |
| 73 | +Link: https://lore.kernel.org/r/ [email protected] |
| 74 | + Signed-off-by: Alex Williamson < [email protected]> |
| 75 | +(cherry picked from commit ca5f21b2574903a7430fcb3590e534d92b2fa816) |
| 76 | + Signed-off-by: Jonathan Maple < [email protected]> |
| 77 | + |
| 78 | +# Conflicts: |
| 79 | +# drivers/vfio/vfio.h |
| 80 | +# drivers/vfio/vfio_main.c |
| 81 | +diff --cc drivers/vfio/vfio.h |
| 82 | +index 503bea6c843d,039e3208d286..000000000000 |
| 83 | +--- a/drivers/vfio/vfio.h |
| 84 | ++++ b/drivers/vfio/vfio.h |
| 85 | +@@@ -28,6 -38,32 +28,35 @@@ enum vfio_group_type |
| 86 | + VFIO_NO_IOMMU, |
| 87 | + }; |
| 88 | + |
| 89 | +++<<<<<<< HEAD |
| 90 | +++======= |
| 91 | ++ struct vfio_group { |
| 92 | ++ struct device dev; |
| 93 | ++ struct cdev cdev; |
| 94 | ++ /* |
| 95 | ++ * When drivers is non-zero a driver is attached to the struct device |
| 96 | ++ * that provided the iommu_group and thus the iommu_group is a valid |
| 97 | ++ * pointer. When drivers is 0 the driver is being detached. Once users |
| 98 | ++ * reaches 0 then the iommu_group is invalid. |
| 99 | ++ */ |
| 100 | ++ refcount_t drivers; |
| 101 | ++ refcount_t users; |
| 102 | ++ struct completion users_comp; |
| 103 | ++ unsigned int container_users; |
| 104 | ++ struct iommu_group *iommu_group; |
| 105 | ++ struct vfio_container *container; |
| 106 | ++ struct list_head device_list; |
| 107 | ++ struct mutex device_lock; |
| 108 | ++ struct list_head vfio_next; |
| 109 | ++ struct list_head container_next; |
| 110 | ++ enum vfio_group_type type; |
| 111 | ++ struct rw_semaphore group_rwsem; |
| 112 | ++ struct kvm *kvm; |
| 113 | ++ struct file *opened_file; |
| 114 | ++ struct blocking_notifier_head notifier; |
| 115 | ++ }; |
| 116 | ++ |
| 117 | +++>>>>>>> ca5f21b25749 (vfio: Follow a strict lifetime for struct iommu_group) |
| 118 | + /* events for the backend driver notify callback */ |
| 119 | + enum vfio_iommu_notify_type { |
| 120 | + VFIO_IOMMU_CONTAINER_CLOSE = 0, |
| 121 | +diff --cc drivers/vfio/vfio_main.c |
| 122 | +index eb849d5b81b0,f19171cad9a2..000000000000 |
| 123 | +--- a/drivers/vfio/vfio_main.c |
| 124 | ++++ b/drivers/vfio/vfio_main.c |
| 125 | +@@@ -164,159 -125,6 +164,162 @@@ static void vfio_release_device_set(str |
| 126 | + xa_unlock(&vfio_device_set_xa); |
| 127 | + } |
| 128 | + |
| 129 | +++<<<<<<< HEAD |
| 130 | + +#ifdef CONFIG_VFIO_NOIOMMU |
| 131 | + +static void *vfio_noiommu_open(unsigned long arg) |
| 132 | + +{ |
| 133 | + + if (arg != VFIO_NOIOMMU_IOMMU) |
| 134 | + + return ERR_PTR(-EINVAL); |
| 135 | + + if (!capable(CAP_SYS_RAWIO)) |
| 136 | + + return ERR_PTR(-EPERM); |
| 137 | + + |
| 138 | + + return NULL; |
| 139 | + +} |
| 140 | + + |
| 141 | + +static void vfio_noiommu_release(void *iommu_data) |
| 142 | + +{ |
| 143 | + +} |
| 144 | + + |
| 145 | + +static long vfio_noiommu_ioctl(void *iommu_data, |
| 146 | + + unsigned int cmd, unsigned long arg) |
| 147 | + +{ |
| 148 | + + if (cmd == VFIO_CHECK_EXTENSION) |
| 149 | + + return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; |
| 150 | + + |
| 151 | + + return -ENOTTY; |
| 152 | + +} |
| 153 | + + |
| 154 | + +static int vfio_noiommu_attach_group(void *iommu_data, |
| 155 | + + struct iommu_group *iommu_group, enum vfio_group_type type) |
| 156 | + +{ |
| 157 | + + return 0; |
| 158 | + +} |
| 159 | + + |
| 160 | + +static void vfio_noiommu_detach_group(void *iommu_data, |
| 161 | + + struct iommu_group *iommu_group) |
| 162 | + +{ |
| 163 | + +} |
| 164 | + + |
| 165 | + +static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { |
| 166 | + + .name = "vfio-noiommu", |
| 167 | + + .owner = THIS_MODULE, |
| 168 | + + .open = vfio_noiommu_open, |
| 169 | + + .release = vfio_noiommu_release, |
| 170 | + + .ioctl = vfio_noiommu_ioctl, |
| 171 | + + .attach_group = vfio_noiommu_attach_group, |
| 172 | + + .detach_group = vfio_noiommu_detach_group, |
| 173 | + +}; |
| 174 | + + |
| 175 | + +/* |
| 176 | + + * Only noiommu containers can use vfio-noiommu and noiommu containers can only |
| 177 | + + * use vfio-noiommu. |
| 178 | + + */ |
| 179 | + +static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, |
| 180 | + + const struct vfio_iommu_driver *driver) |
| 181 | + +{ |
| 182 | + + return container->noiommu == (driver->ops == &vfio_noiommu_ops); |
| 183 | + +} |
| 184 | + +#else |
| 185 | + +static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, |
| 186 | + + const struct vfio_iommu_driver *driver) |
| 187 | + +{ |
| 188 | + + return true; |
| 189 | + +} |
| 190 | + +#endif /* CONFIG_VFIO_NOIOMMU */ |
| 191 | + + |
| 192 | + +/* |
| 193 | + + * IOMMU driver registration |
| 194 | + + */ |
| 195 | + +int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) |
| 196 | + +{ |
| 197 | + + struct vfio_iommu_driver *driver, *tmp; |
| 198 | + + |
| 199 | + + if (WARN_ON(!ops->register_device != !ops->unregister_device)) |
| 200 | + + return -EINVAL; |
| 201 | + + |
| 202 | + + driver = kzalloc(sizeof(*driver), GFP_KERNEL); |
| 203 | + + if (!driver) |
| 204 | + + return -ENOMEM; |
| 205 | + + |
| 206 | + + driver->ops = ops; |
| 207 | + + |
| 208 | + + mutex_lock(&vfio.iommu_drivers_lock); |
| 209 | + + |
| 210 | + + /* Check for duplicates */ |
| 211 | + + list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { |
| 212 | + + if (tmp->ops == ops) { |
| 213 | + + mutex_unlock(&vfio.iommu_drivers_lock); |
| 214 | + + kfree(driver); |
| 215 | + + return -EINVAL; |
| 216 | + + } |
| 217 | + + } |
| 218 | + + |
| 219 | + + list_add(&driver->vfio_next, &vfio.iommu_drivers_list); |
| 220 | + + |
| 221 | + + mutex_unlock(&vfio.iommu_drivers_lock); |
| 222 | + + |
| 223 | + + return 0; |
| 224 | + +} |
| 225 | + +EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); |
| 226 | + + |
| 227 | + +void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) |
| 228 | + +{ |
| 229 | + + struct vfio_iommu_driver *driver; |
| 230 | + + |
| 231 | + + mutex_lock(&vfio.iommu_drivers_lock); |
| 232 | + + list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { |
| 233 | + + if (driver->ops == ops) { |
| 234 | + + list_del(&driver->vfio_next); |
| 235 | + + mutex_unlock(&vfio.iommu_drivers_lock); |
| 236 | + + kfree(driver); |
| 237 | + + return; |
| 238 | + + } |
| 239 | + + } |
| 240 | + + mutex_unlock(&vfio.iommu_drivers_lock); |
| 241 | + +} |
| 242 | + +EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); |
| 243 | + + |
| 244 | + +static void vfio_group_get(struct vfio_group *group); |
| 245 | + + |
| 246 | +++======= |
| 247 | +++>>>>>>> ca5f21b25749 (vfio: Follow a strict lifetime for struct iommu_group) |
| 248 | + +/* |
| 249 | + + * Container objects - containers are created when /dev/vfio/vfio is |
| 250 | + + * opened, but their lifecycle extends until the last user is done, so |
| 251 | + + * it's freed via kref. Must support container/group/device being |
| 252 | + + * closed in any order. |
| 253 | + + */ |
| 254 | + +static void vfio_container_get(struct vfio_container *container) |
| 255 | + +{ |
| 256 | + + kref_get(&container->kref); |
| 257 | + +} |
| 258 | + + |
| 259 | + +static void vfio_container_release(struct kref *kref) |
| 260 | + +{ |
| 261 | + + struct vfio_container *container; |
| 262 | + + container = container_of(kref, struct vfio_container, kref); |
| 263 | + + |
| 264 | + + kfree(container); |
| 265 | + +} |
| 266 | + + |
| 267 | + +static void vfio_container_put(struct vfio_container *container) |
| 268 | + +{ |
| 269 | + + kref_put(&container->kref, vfio_container_release); |
| 270 | + +} |
| 271 | + + |
| 272 | + +unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set) |
| 273 | + +{ |
| 274 | + + struct vfio_device *cur; |
| 275 | + + unsigned int open_count = 0; |
| 276 | + + |
| 277 | + + lockdep_assert_held(&dev_set->lock); |
| 278 | + + |
| 279 | + + list_for_each_entry(cur, &dev_set->device_list, dev_set_list) |
| 280 | + + open_count += cur->open_count; |
| 281 | + + return open_count; |
| 282 | + +} |
| 283 | + +EXPORT_SYMBOL_GPL(vfio_device_set_open_count); |
| 284 | + + |
| 285 | + /* |
| 286 | + * Group objects - create, release, get, put, search |
| 287 | + */ |
| 288 | +@@@ -592,7 -517,12 +622,11 @@@ static int __vfio_register_dev(struct v |
| 289 | + struct vfio_group *group) |
| 290 | + { |
| 291 | + struct vfio_device *existing_device; |
| 292 | + - int ret; |
| 293 | + |
| 294 | ++ /* |
| 295 | ++ * In all cases group is the output of one of the group allocation |
| 296 | ++ * functions and we have group->drivers incremented for us. |
| 297 | ++ */ |
| 298 | + if (IS_ERR(group)) |
| 299 | + return PTR_ERR(group); |
| 300 | + |
| 301 | +@@@ -627,6 -561,9 +661,12 @@@ |
| 302 | + mutex_unlock(&group->device_lock); |
| 303 | + |
| 304 | + return 0; |
| 305 | +++<<<<<<< HEAD |
| 306 | +++======= |
| 307 | ++ err_out: |
| 308 | ++ vfio_device_remove_group(device); |
| 309 | ++ return ret; |
| 310 | +++>>>>>>> ca5f21b25749 (vfio: Follow a strict lifetime for struct iommu_group) |
| 311 | + } |
| 312 | + |
| 313 | + int vfio_register_group_dev(struct vfio_device *device) |
| 314 | +@@@ -711,14 -648,12 +751,21 @@@ void vfio_unregister_group_dev(struct v |
| 315 | + |
| 316 | + mutex_lock(&group->device_lock); |
| 317 | + list_del(&device->group_next); |
| 318 | + + group->dev_counter--; |
| 319 | + mutex_unlock(&group->device_lock); |
| 320 | + |
| 321 | +++<<<<<<< HEAD |
| 322 | + + if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) |
| 323 | + + iommu_group_remove_device(device->dev); |
| 324 | + + |
| 325 | + + /* Matches the get in vfio_register_group_dev() */ |
| 326 | + + vfio_group_put(group); |
| 327 | +++======= |
| 328 | ++ /* Balances device_add in register path */ |
| 329 | ++ device_del(&device->device); |
| 330 | ++ |
| 331 | ++ vfio_device_remove_group(device); |
| 332 | +++>>>>>>> ca5f21b25749 (vfio: Follow a strict lifetime for struct iommu_group) |
| 333 | + } |
| 334 | + EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); |
| 335 | + |
| 336 | +* Unmerged path drivers/vfio/vfio.h |
| 337 | +* Unmerged path drivers/vfio/vfio_main.c |
0 commit comments