Skip to content

Commit 07af482

Browse files
aloktionkuba-moo
authored andcommitted
i40e: add ability to reset VF for Tx and Rx MDD events
Implement "mdd-auto-reset-vf" priv-flag to handle Tx and Rx MDD events for VFs. This flag is also used in other network adapters like ICE. Usage: - "on" - The problematic VF will be automatically reset if a malformed descriptor is detected. - "off" - The problematic VF will be disabled. In cases where a VF sends malformed packets classified as malicious, it can cause the Tx queue to freeze, rendering it unusable for several minutes. When an MDD event occurs, this new implementation allows for a graceful VF reset to quickly restore operational state. Currently, VF queues are disabled if an MDD event occurs. This patch adds the ability to reset the VF if a Tx or Rx MDD event occurs. It also includes MDD event logging throttling to avoid dmesg pollution and unifies the format of Tx and Rx MDD messages. Note: Standard message rate limiting functions like dev_info_ratelimited() do not meet our requirements. Custom rate limiting is implemented, please see the code for details. Co-developed-by: Jan Sokolowski <[email protected]> Signed-off-by: Jan Sokolowski <[email protected]> Co-developed-by: Padraig J Connolly <[email protected]> Signed-off-by: Padraig J Connolly <[email protected]> Signed-off-by: Aleksandr Loktionov <[email protected]> Reviewed-by: Michal Schmidt <[email protected]> Tested-by: Rafal Romanowski <[email protected]> Signed-off-by: Tony Nguyen <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 35f715c commit 07af482

File tree

7 files changed

+123
-17
lines changed

7 files changed

+123
-17
lines changed

Documentation/networking/device_drivers/ethernet/intel/i40e.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,18 @@ Use ethtool to view and set link-down-on-close, as follows::
299299
ethtool --show-priv-flags ethX
300300
ethtool --set-priv-flags ethX link-down-on-close [on|off]
301301

302+
Setting the mdd-auto-reset-vf Private Flag
303+
------------------------------------------
304+
305+
When the mdd-auto-reset-vf private flag is set to "on", the problematic VF will
306+
be automatically reset if a malformed descriptor is detected. If the flag is
307+
set to "off", the problematic VF will be disabled.
308+
309+
Use ethtool to view and set mdd-auto-reset-vf, as follows::
310+
311+
ethtool --show-priv-flags ethX
312+
ethtool --set-priv-flags ethX mdd-auto-reset-vf [on|off]
313+
302314
Viewing Link Messages
303315
---------------------
304316
Link messages will not be displayed to the console if the distribution is

drivers/net/ethernet/intel/i40e/i40e.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ enum i40e_state {
8888
__I40E_SERVICE_SCHED,
8989
__I40E_ADMINQ_EVENT_PENDING,
9090
__I40E_MDD_EVENT_PENDING,
91+
__I40E_MDD_VF_PRINT_PENDING,
9192
__I40E_VFLR_EVENT_PENDING,
9293
__I40E_RESET_RECOVERY_PENDING,
9394
__I40E_TIMEOUT_RECOVERY_PENDING,
@@ -191,6 +192,7 @@ enum i40e_pf_flags {
191192
*/
192193
I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
193194
I40E_FLAG_VF_VLAN_PRUNING_ENA,
195+
I40E_FLAG_MDD_AUTO_RESET_VF,
194196
I40E_PF_FLAGS_NBITS, /* must be last */
195197
};
196198

@@ -572,7 +574,7 @@ struct i40e_pf {
572574
int num_alloc_vfs; /* actual number of VFs allocated */
573575
u32 vf_aq_requests;
574576
u32 arq_overflows; /* Not fatal, possibly indicative of problems */
575-
577+
struct ratelimit_state mdd_message_rate_limit;
576578
/* DCBx/DCBNL capability for PF that indicates
577579
* whether DCBx is managed by firmware or host
578580
* based agent (LLDPAD). Also, indicates what

drivers/net/ethernet/intel/i40e/i40e_debugfs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,7 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
722722
dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
723723
vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
724724
dev_info(&pf->pdev->dev, " num MDD=%lld\n",
725-
vf->num_mdd_events);
725+
vf->mdd_tx_events.count + vf->mdd_rx_events.count);
726726
} else {
727727
dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
728728
}

drivers/net/ethernet/intel/i40e/i40e_ethtool.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
459459
I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
460460
I40E_PRIV_FLAG("vf-vlan-pruning",
461461
I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
462+
I40E_PRIV_FLAG("mdd-auto-reset-vf",
463+
I40E_FLAG_MDD_AUTO_RESET_VF, 0),
462464
};
463465

464466
#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)

drivers/net/ethernet/intel/i40e/i40e_main.c

Lines changed: 94 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11179,6 +11179,67 @@ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
1117911179
i40e_reset_and_rebuild(pf, false, lock_acquired);
1118011180
}
1118111181

11182+
/**
11183+
* i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver detect event
11184+
* @pf: board private structure
11185+
* @vf: pointer to the VF structure
11186+
* @is_tx: true - for Tx event, false - for Rx
11187+
*/
11188+
static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf,
11189+
bool is_tx)
11190+
{
11191+
dev_err(&pf->pdev->dev, is_tx ?
11192+
"%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" :
11193+
"%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
11194+
is_tx ? vf->mdd_tx_events.count : vf->mdd_rx_events.count,
11195+
pf->hw.pf_id,
11196+
vf->vf_id,
11197+
vf->default_lan_addr.addr,
11198+
str_on_off(test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)));
11199+
}
11200+
11201+
/**
11202+
* i40e_print_vfs_mdd_events - print VFs malicious driver detect event
11203+
* @pf: pointer to the PF structure
11204+
*
11205+
* Called from i40e_handle_mdd_event to rate limit and print VFs MDD events.
11206+
*/
11207+
static void i40e_print_vfs_mdd_events(struct i40e_pf *pf)
11208+
{
11209+
unsigned int i;
11210+
11211+
/* check that there are pending MDD events to print */
11212+
if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state))
11213+
return;
11214+
11215+
if (!__ratelimit(&pf->mdd_message_rate_limit))
11216+
return;
11217+
11218+
for (i = 0; i < pf->num_alloc_vfs; i++) {
11219+
struct i40e_vf *vf = &pf->vf[i];
11220+
bool is_printed = false;
11221+
11222+
/* only print Rx MDD event message if there are new events */
11223+
if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
11224+
vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count;
11225+
i40e_print_vf_mdd_event(pf, vf, false);
11226+
is_printed = true;
11227+
}
11228+
11229+
/* only print Tx MDD event message if there are new events */
11230+
if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
11231+
vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count;
11232+
i40e_print_vf_mdd_event(pf, vf, true);
11233+
is_printed = true;
11234+
}
11235+
11236+
if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
11237+
dev_info(&pf->pdev->dev,
11238+
"Use PF Control I/F to re-enable the VF #%d\n",
11239+
i);
11240+
}
11241+
}
11242+
1118211243
/**
1118311244
* i40e_handle_mdd_event
1118411245
* @pf: pointer to the PF structure
@@ -11193,8 +11254,13 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
1119311254
u32 reg;
1119411255
int i;
1119511256

11196-
if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
11257+
if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) {
11258+
/* Since the VF MDD event logging is rate limited, check if
11259+
* there are pending MDD events.
11260+
*/
11261+
i40e_print_vfs_mdd_events(pf);
1119711262
return;
11263+
}
1119811264

1119911265
/* find what triggered the MDD event */
1120011266
reg = rd32(hw, I40E_GL_MDET_TX);
@@ -11238,36 +11304,48 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
1123811304

1123911305
/* see if one of the VFs needs its hand slapped */
1124011306
for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
11307+
bool is_mdd_on_tx = false;
11308+
bool is_mdd_on_rx = false;
11309+
1124111310
vf = &(pf->vf[i]);
1124211311
reg = rd32(hw, I40E_VP_MDET_TX(i));
1124311312
if (reg & I40E_VP_MDET_TX_VALID_MASK) {
11313+
set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
1124411314
wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
11245-
vf->num_mdd_events++;
11246-
dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
11247-
i);
11248-
dev_info(&pf->pdev->dev,
11249-
"Use PF Control I/F to re-enable the VF\n");
11315+
vf->mdd_tx_events.count++;
1125011316
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
11317+
is_mdd_on_tx = true;
1125111318
}
1125211319

1125311320
reg = rd32(hw, I40E_VP_MDET_RX(i));
1125411321
if (reg & I40E_VP_MDET_RX_VALID_MASK) {
11322+
set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
1125511323
wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
11256-
vf->num_mdd_events++;
11257-
dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
11258-
i);
11259-
dev_info(&pf->pdev->dev,
11260-
"Use PF Control I/F to re-enable the VF\n");
11324+
vf->mdd_rx_events.count++;
1126111325
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
11326+
is_mdd_on_rx = true;
11327+
}
11328+
11329+
if ((is_mdd_on_tx || is_mdd_on_rx) &&
11330+
test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
11331+
/* VF MDD event counters will be cleared by
11332+
* reset, so print the event prior to reset.
11333+
*/
11334+
if (is_mdd_on_rx)
11335+
i40e_print_vf_mdd_event(pf, vf, false);
11336+
if (is_mdd_on_tx)
11337+
i40e_print_vf_mdd_event(pf, vf, true);
11338+
11339+
i40e_vc_reset_vf(vf, true);
1126211340
}
1126311341
}
1126411342

11265-
/* re-enable mdd interrupt cause */
11266-
clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
1126711343
reg = rd32(hw, I40E_PFINT_ICR0_ENA);
1126811344
reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
1126911345
wr32(hw, I40E_PFINT_ICR0_ENA, reg);
1127011346
i40e_flush(hw);
11347+
11348+
i40e_print_vfs_mdd_events(pf);
1127111349
}
1127211350

1127311351
/**
@@ -15878,6 +15956,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1587815956
ERR_PTR(err),
1587915957
i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
1588015958

15959+
/* VF MDD event logs are rate limited to one second intervals */
15960+
ratelimit_state_init(&pf->mdd_message_rate_limit, 1 * HZ, 1);
15961+
1588115962
/* Reconfigure hardware for allowing smaller MSS in the case
1588215963
* of TSO, so that we avoid the MDD being fired and causing
1588315964
* a reset in the case of small MSS+TSO.

drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
216216
* @notify_vf: notify vf about reset or not
217217
* Reset VF handler.
218218
**/
219-
static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
219+
void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
220220
{
221221
struct i40e_pf *pf = vf->pf;
222222
int i;

drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ struct i40evf_channel {
6464
u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
6565
};
6666

67+
struct i40e_mdd_vf_events {
68+
u64 count; /* total count of Rx|Tx events */
69+
/* count number of the last printed event */
70+
u64 last_printed;
71+
};
72+
6773
/* VF information structure */
6874
struct i40e_vf {
6975
struct i40e_pf *pf;
@@ -92,7 +98,9 @@ struct i40e_vf {
9298

9399
u8 num_queue_pairs; /* num of qps assigned to VF vsis */
94100
u8 num_req_queues; /* num of requested qps */
95-
u64 num_mdd_events; /* num of mdd events detected */
101+
/* num of mdd tx and rx events detected */
102+
struct i40e_mdd_vf_events mdd_rx_events;
103+
struct i40e_mdd_vf_events mdd_tx_events;
96104

97105
unsigned long vf_caps; /* vf's adv. capabilities */
98106
unsigned long vf_states; /* vf's runtime states */
@@ -120,6 +128,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
120128
int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
121129
u32 v_retval, u8 *msg, u16 msglen);
122130
int i40e_vc_process_vflr_event(struct i40e_pf *pf);
131+
void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf);
123132
bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
124133
bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
125134
void i40e_vc_notify_vf_reset(struct i40e_vf *vf);

0 commit comments

Comments
 (0)