Skip to content

Commit 3d5985a

Browse files
jacob-kelleranguy11
authored andcommitted
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated once at the time of VF creation. The VF structures are then accessed from the array by their VF ID. The ID must be between 0 and the number of allocated VFs. Multiple threads can access this table: * .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust * interrupts, such as due to messages from the VF using the virtchnl communication * processing such as device reset * commands to add or remove VFs The current implementation does not keep track of when all threads are done operating on a VF and can potentially result in use-after-free issues caused by one thread accessing a VF structure after it has been released when removing VFs. Some of these are prevented with various state flags and checks. In addition, this structure is quite static and does not support a planned future where virtualization can be more dynamic. As we begin to look at supporting Scalable IOV with the ice driver (as opposed to just supporting Single Root IOV), this structure is not sufficient. In the future, VFs will be able to be added and removed individually and dynamically. To allow for this, and to better protect against a whole class of use-after-free bugs, replace the VF storage with a combination of a hash table and krefs to reference track all of the accesses to VFs through the hash table. A hash table still allows efficient look up of the VF given its ID, but also allows adding and removing VFs. It does not require contiguous VF IDs. The use of krefs allows the cleanup of the VF memory to be delayed until after all threads have released their reference (by calling ice_put_vf). To prevent corruption of the hash table, a combination of RCU and the mutex table_lock are used. Addition and removal from the hash table use the RCU-aware hash macros. This allows simple read-only look ups that iterate to locate a single VF can be fast using RCU. Accesses which modify the hash table, or which can't take RCU because they sleep, will hold the mutex lock. By using this design, we have a stronger guarantee that the VF structure can't be released until after all threads are finished operating on it. We also pave the way for the more dynamic Scalable IOV implementation in the future. Signed-off-by: Jacob Keller <[email protected]> Tested-by: Konrad Jankowski <[email protected]> Signed-off-by: Tony Nguyen <[email protected]>
1 parent fb916db commit 3d5985a

File tree

8 files changed

+363
-125
lines changed

8 files changed

+363
-125
lines changed

drivers/net/ethernet/intel/ice/ice_eswitch.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,8 @@ static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf)
209209
rx_ring->q_vector = q_vector;
210210
rx_ring->next = NULL;
211211
rx_ring->netdev = repr->netdev;
212+
213+
ice_put_vf(vf);
212214
}
213215
}
214216

@@ -223,6 +225,8 @@ ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi)
223225
struct ice_vf *vf;
224226
unsigned int bkt;
225227

228+
lockdep_assert_held(&pf->vfs.table_lock);
229+
226230
ice_for_each_vf(pf, bkt, vf) {
227231
struct ice_vsi *vsi = vf->repr->src_vsi;
228232

@@ -251,6 +255,8 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf)
251255
struct ice_vf *vf;
252256
unsigned int bkt;
253257

258+
lockdep_assert_held(&pf->vfs.table_lock);
259+
254260
ice_for_each_vf(pf, bkt, vf) {
255261
struct ice_vsi *vsi = vf->repr->src_vsi;
256262

@@ -430,6 +436,8 @@ static void ice_eswitch_napi_del(struct ice_pf *pf)
430436
struct ice_vf *vf;
431437
unsigned int bkt;
432438

439+
lockdep_assert_held(&pf->vfs.table_lock);
440+
433441
ice_for_each_vf(pf, bkt, vf)
434442
netif_napi_del(&vf->repr->q_vector->napi);
435443
}
@@ -443,6 +451,8 @@ static void ice_eswitch_napi_enable(struct ice_pf *pf)
443451
struct ice_vf *vf;
444452
unsigned int bkt;
445453

454+
lockdep_assert_held(&pf->vfs.table_lock);
455+
446456
ice_for_each_vf(pf, bkt, vf)
447457
napi_enable(&vf->repr->q_vector->napi);
448458
}
@@ -456,6 +466,8 @@ static void ice_eswitch_napi_disable(struct ice_pf *pf)
456466
struct ice_vf *vf;
457467
unsigned int bkt;
458468

469+
lockdep_assert_held(&pf->vfs.table_lock);
470+
459471
ice_for_each_vf(pf, bkt, vf)
460472
napi_disable(&vf->repr->q_vector->napi);
461473
}
@@ -629,6 +641,8 @@ static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf)
629641
struct ice_vf *vf;
630642
unsigned int bkt;
631643

644+
lockdep_assert_held(&pf->vfs.table_lock);
645+
632646
if (test_bit(ICE_DOWN, pf->state))
633647
return;
634648

@@ -647,6 +661,8 @@ void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf)
647661
struct ice_vf *vf;
648662
unsigned int bkt;
649663

664+
lockdep_assert_held(&pf->vfs.table_lock);
665+
650666
if (test_bit(ICE_DOWN, pf->state))
651667
return;
652668

drivers/net/ethernet/intel/ice/ice_ethtool.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -316,15 +316,20 @@ ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
316316
*/
317317
static bool ice_active_vfs(struct ice_pf *pf)
318318
{
319+
bool active = false;
319320
struct ice_vf *vf;
320321
unsigned int bkt;
321322

322-
ice_for_each_vf(pf, bkt, vf) {
323-
if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
324-
return true;
323+
rcu_read_lock();
324+
ice_for_each_vf_rcu(pf, bkt, vf) {
325+
if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
326+
active = true;
327+
break;
328+
}
325329
}
330+
rcu_read_unlock();
326331

327-
return false;
332+
return active;
328333
}
329334

330335
/**

drivers/net/ethernet/intel/ice/ice_lib.c

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -439,8 +439,10 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d
439439
if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring)
440440
return IRQ_HANDLED;
441441

442-
ice_for_each_vf(pf, bkt, vf)
442+
rcu_read_lock();
443+
ice_for_each_vf_rcu(pf, bkt, vf)
443444
napi_schedule(&vf->repr->q_vector->napi);
445+
rcu_read_unlock();
444446

445447
return IRQ_HANDLED;
446448
}
@@ -1345,11 +1347,17 @@ static int ice_get_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi)
13451347
{
13461348
struct ice_vf *vf;
13471349
unsigned int bkt;
1350+
int base;
13481351

1349-
ice_for_each_vf(pf, bkt, vf) {
1350-
if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI)
1351-
return pf->vsi[vf->ctrl_vsi_idx]->base_vector;
1352+
rcu_read_lock();
1353+
ice_for_each_vf_rcu(pf, bkt, vf) {
1354+
if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) {
1355+
base = pf->vsi[vf->ctrl_vsi_idx]->base_vector;
1356+
rcu_read_unlock();
1357+
return base;
1358+
}
13521359
}
1360+
rcu_read_unlock();
13531361

13541362
return ice_get_res(pf, pf->irq_tracker, vsi->num_q_vectors,
13551363
ICE_RES_VF_CTRL_VEC_ID);
@@ -2894,10 +2902,14 @@ static void ice_free_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi)
28942902
struct ice_vf *vf;
28952903
unsigned int bkt;
28962904

2897-
ice_for_each_vf(pf, bkt, vf) {
2898-
if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI)
2905+
rcu_read_lock();
2906+
ice_for_each_vf_rcu(pf, bkt, vf) {
2907+
if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) {
2908+
rcu_read_unlock();
28992909
return;
2910+
}
29002911
}
2912+
rcu_read_unlock();
29012913

29022914
/* No other VFs left that have control VSI. It is now safe to reclaim
29032915
* SW interrupts back to the common pool.

drivers/net/ethernet/intel/ice/ice_main.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,8 +521,10 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
521521
ice_vc_notify_reset(pf);
522522

523523
/* Disable VFs until reset is completed */
524+
mutex_lock(&pf->vfs.table_lock);
524525
ice_for_each_vf(pf, bkt, vf)
525526
ice_set_vf_state_qs_dis(vf);
527+
mutex_unlock(&pf->vfs.table_lock);
526528

527529
if (ice_is_eswitch_mode_switchdev(pf)) {
528530
if (reset_type != ICE_RESET_PFR)
@@ -1756,6 +1758,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
17561758
/* Check to see if one of the VFs caused an MDD event, and then
17571759
* increment counters and set print pending
17581760
*/
1761+
mutex_lock(&pf->vfs.table_lock);
17591762
ice_for_each_vf(pf, bkt, vf) {
17601763
reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id));
17611764
if (reg & VP_MDET_TX_PQM_VALID_M) {
@@ -1811,6 +1814,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
18111814
}
18121815
}
18131816
}
1817+
mutex_unlock(&pf->vfs.table_lock);
18141818

18151819
ice_print_vfs_mdd_events(pf);
18161820
}
@@ -3680,6 +3684,7 @@ static void ice_deinit_pf(struct ice_pf *pf)
36803684
mutex_destroy(&pf->sw_mutex);
36813685
mutex_destroy(&pf->tc_mutex);
36823686
mutex_destroy(&pf->avail_q_mutex);
3687+
mutex_destroy(&pf->vfs.table_lock);
36833688

36843689
if (pf->avail_txqs) {
36853690
bitmap_free(pf->avail_txqs);
@@ -3779,6 +3784,9 @@ static int ice_init_pf(struct ice_pf *pf)
37793784
return -ENOMEM;
37803785
}
37813786

3787+
mutex_init(&pf->vfs.table_lock);
3788+
hash_init(pf->vfs.table);
3789+
37823790
return 0;
37833791
}
37843792

drivers/net/ethernet/intel/ice/ice_repr.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,8 @@ void ice_repr_rem_from_all_vfs(struct ice_pf *pf)
341341
struct ice_vf *vf;
342342
unsigned int bkt;
343343

344+
lockdep_assert_held(&pf->vfs.table_lock);
345+
344346
ice_for_each_vf(pf, bkt, vf)
345347
ice_repr_rem(vf);
346348
}
@@ -355,6 +357,8 @@ int ice_repr_add_for_all_vfs(struct ice_pf *pf)
355357
unsigned int bkt;
356358
int err;
357359

360+
lockdep_assert_held(&pf->vfs.table_lock);
361+
358362
ice_for_each_vf(pf, bkt, vf) {
359363
err = ice_repr_add(vf);
360364
if (err)

drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1578,6 +1578,7 @@ void ice_flush_fdir_ctx(struct ice_pf *pf)
15781578
if (!test_and_clear_bit(ICE_FD_VF_FLUSH_CTX, pf->state))
15791579
return;
15801580

1581+
mutex_lock(&pf->vfs.table_lock);
15811582
ice_for_each_vf(pf, bkt, vf) {
15821583
struct device *dev = ice_pf_to_dev(pf);
15831584
enum virtchnl_fdir_prgm_status status;
@@ -1634,6 +1635,7 @@ void ice_flush_fdir_ctx(struct ice_pf *pf)
16341635
ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
16351636
spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
16361637
}
1638+
mutex_unlock(&pf->vfs.table_lock);
16371639
}
16381640

16391641
/**

0 commit comments

Comments
 (0)