Skip to content

Commit 91427e6

Browse files
vraj-amranguy11
authored andcommitted
ice: Support 5 layer topology
There is a performance issue when the number of VSIs are not multiple of 8. This is caused due to the max children limitation per node(8) in 9 layer topology. The BW credits are shared evenly among the children by default. Assume one node has 8 children and the other has 1. The parent of these nodes share the BW credit equally among them. Apparently this causes a problem for the first node which has 8 children. The 9th VM get more BW credits than the first 8 VMs. Example: 1) With 8 VM's: for x in 0 1 2 3 4 5 6 7; do taskset -c ${x} netperf -P0 -H 172.68.169.125 & sleep .1 ; done tx_queue_0_packets: 23283027 tx_queue_1_packets: 23292289 tx_queue_2_packets: 23276136 tx_queue_3_packets: 23279828 tx_queue_4_packets: 23279828 tx_queue_5_packets: 23279333 tx_queue_6_packets: 23277745 tx_queue_7_packets: 23279950 tx_queue_8_packets: 0 2) With 9 VM's: for x in 0 1 2 3 4 5 6 7 8; do taskset -c ${x} netperf -P0 -H 172.68.169.125 & sleep .1 ; done tx_queue_0_packets: 24163396 tx_queue_1_packets: 24164623 tx_queue_2_packets: 24163188 tx_queue_3_packets: 24163701 tx_queue_4_packets: 24163683 tx_queue_5_packets: 24164668 tx_queue_6_packets: 23327200 tx_queue_7_packets: 24163853 tx_queue_8_packets: 91101417 So on average queue 8 statistics show that 3.7 times more packets were send there than to the other queues. The FW starting with version 3.20, has increased the max number of children per node by reducing the number of layers from 9 to 5. Reflect this on driver side. Signed-off-by: Raj Victor <[email protected]> Co-developed-by: Michal Wilczynski <[email protected]> Signed-off-by: Michal Wilczynski <[email protected]> Co-developed-by: Mateusz Polchlopek <[email protected]> Signed-off-by: Mateusz Polchlopek <[email protected]> Tested-by: Pucha Himasekhar Reddy <[email protected]> Signed-off-by: Tony Nguyen <[email protected]>
1 parent 5625ca5 commit 91427e6

File tree

6 files changed

+251
-0
lines changed

6 files changed

+251
-0
lines changed

drivers/net/ethernet/intel/ice/ice_adminq_cmd.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ struct ice_aqc_list_caps_elem {
121121
#define ICE_AQC_CAPS_PCIE_RESET_AVOIDANCE 0x0076
122122
#define ICE_AQC_CAPS_POST_UPDATE_RESET_RESTRICT 0x0077
123123
#define ICE_AQC_CAPS_NVM_MGMT 0x0080
124+
#define ICE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE 0x0085
124125
#define ICE_AQC_CAPS_FW_LAG_SUPPORT 0x0092
125126
#define ICE_AQC_BIT_ROCEV2_LAG 0x01
126127
#define ICE_AQC_BIT_SRIOV_LAG 0x02
@@ -810,6 +811,23 @@ struct ice_aqc_get_topo {
810811
__le32 addr_low;
811812
};
812813

814+
/* Get/Set Tx Topology (indirect 0x0418/0x0417) */
815+
struct ice_aqc_get_set_tx_topo {
816+
u8 set_flags;
817+
#define ICE_AQC_TX_TOPO_FLAGS_CORRER BIT(0)
818+
#define ICE_AQC_TX_TOPO_FLAGS_SRC_RAM BIT(1)
819+
#define ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW BIT(4)
820+
#define ICE_AQC_TX_TOPO_FLAGS_ISSUED BIT(5)
821+
822+
u8 get_flags;
823+
#define ICE_AQC_TX_TOPO_GET_RAM 2
824+
825+
__le16 reserved1;
826+
__le32 reserved2;
827+
__le32 addr_high;
828+
__le32 addr_low;
829+
};
830+
813831
/* Update TSE (indirect 0x0403)
814832
* Get TSE (indirect 0x0404)
815833
* Add TSE (indirect 0x0401)
@@ -2538,6 +2556,7 @@ struct ice_aq_desc {
25382556
struct ice_aqc_get_link_topo get_link_topo;
25392557
struct ice_aqc_i2c read_write_i2c;
25402558
struct ice_aqc_read_i2c_resp read_i2c_resp;
2559+
struct ice_aqc_get_set_tx_topo get_set_tx_topo;
25412560
} params;
25422561
};
25432562

@@ -2644,6 +2663,10 @@ enum ice_adminq_opc {
26442663
ice_aqc_opc_query_sched_res = 0x0412,
26452664
ice_aqc_opc_remove_rl_profiles = 0x0415,
26462665

2666+
/* tx topology commands */
2667+
ice_aqc_opc_set_tx_topo = 0x0417,
2668+
ice_aqc_opc_get_tx_topo = 0x0418,
2669+
26472670
/* PHY commands */
26482671
ice_aqc_opc_get_phy_caps = 0x0600,
26492672
ice_aqc_opc_set_phy_cfg = 0x0601,

drivers/net/ethernet/intel/ice/ice_common.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1617,6 +1617,8 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf,
16171617
case ice_aqc_opc_set_port_params:
16181618
case ice_aqc_opc_get_vlan_mode_parameters:
16191619
case ice_aqc_opc_set_vlan_mode_parameters:
1620+
case ice_aqc_opc_set_tx_topo:
1621+
case ice_aqc_opc_get_tx_topo:
16201622
case ice_aqc_opc_add_recipe:
16211623
case ice_aqc_opc_recipe_to_profile:
16221624
case ice_aqc_opc_get_recipe:
@@ -2173,6 +2175,9 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps,
21732175
ice_debug(hw, ICE_DBG_INIT, "%s: sriov_lag = %u\n",
21742176
prefix, caps->sriov_lag);
21752177
break;
2178+
case ICE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE:
2179+
caps->tx_sched_topo_comp_mode_en = (number == 1);
2180+
break;
21762181
default:
21772182
/* Not one of the recognized common capabilities */
21782183
found = false;

drivers/net/ethernet/intel/ice/ice_ddp.c

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "ice_common.h"
55
#include "ice.h"
66
#include "ice_ddp.h"
7+
#include "ice_sched.h"
78

89
/* For supporting double VLAN mode, it is necessary to enable or disable certain
910
* boost tcam entries. The metadata labels names that match the following
@@ -2272,3 +2273,211 @@ enum ice_ddp_state ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf,
22722273

22732274
return state;
22742275
}
2276+
2277+
/**
2278+
* ice_get_set_tx_topo - get or set Tx topology
2279+
* @hw: pointer to the HW struct
2280+
* @buf: pointer to Tx topology buffer
2281+
* @buf_size: buffer size
2282+
* @cd: pointer to command details structure or NULL
2283+
* @flags: pointer to descriptor flags
2284+
* @set: 0-get, 1-set topology
2285+
*
2286+
* The function will get or set Tx topology
2287+
*
2288+
* Return: zero when set was successful, negative values otherwise.
2289+
*/
2290+
static int
2291+
ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size,
2292+
struct ice_sq_cd *cd, u8 *flags, bool set)
2293+
{
2294+
struct ice_aqc_get_set_tx_topo *cmd;
2295+
struct ice_aq_desc desc;
2296+
int status;
2297+
2298+
cmd = &desc.params.get_set_tx_topo;
2299+
if (set) {
2300+
ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_tx_topo);
2301+
cmd->set_flags = ICE_AQC_TX_TOPO_FLAGS_ISSUED;
2302+
/* requested to update a new topology, not a default topology */
2303+
if (buf)
2304+
cmd->set_flags |= ICE_AQC_TX_TOPO_FLAGS_SRC_RAM |
2305+
ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW;
2306+
2307+
if (ice_is_e825c(hw))
2308+
desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
2309+
} else {
2310+
ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_tx_topo);
2311+
cmd->get_flags = ICE_AQC_TX_TOPO_GET_RAM;
2312+
}
2313+
2314+
if (!ice_is_e825c(hw))
2315+
desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
2316+
2317+
status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
2318+
if (status)
2319+
return status;
2320+
/* read the return flag values (first byte) for get operation */
2321+
if (!set && flags)
2322+
*flags = desc.params.get_set_tx_topo.set_flags;
2323+
2324+
return 0;
2325+
}
2326+
2327+
/**
2328+
* ice_cfg_tx_topo - Initialize new Tx topology if available
2329+
* @hw: pointer to the HW struct
2330+
* @buf: pointer to Tx topology buffer
2331+
* @len: buffer size
2332+
*
2333+
* The function will apply the new Tx topology from the package buffer
2334+
* if available.
2335+
*
2336+
* Return: zero when update was successful, negative values otherwise.
2337+
*/
2338+
int ice_cfg_tx_topo(struct ice_hw *hw, u8 *buf, u32 len)
2339+
{
2340+
u8 *current_topo, *new_topo = NULL;
2341+
struct ice_run_time_cfg_seg *seg;
2342+
struct ice_buf_hdr *section;
2343+
struct ice_pkg_hdr *pkg_hdr;
2344+
enum ice_ddp_state state;
2345+
u16 offset, size = 0;
2346+
u32 reg = 0;
2347+
int status;
2348+
u8 flags;
2349+
2350+
if (!buf || !len)
2351+
return -EINVAL;
2352+
2353+
/* Does FW support new Tx topology mode ? */
2354+
if (!hw->func_caps.common_cap.tx_sched_topo_comp_mode_en) {
2355+
ice_debug(hw, ICE_DBG_INIT, "FW doesn't support compatibility mode\n");
2356+
return -EOPNOTSUPP;
2357+
}
2358+
2359+
current_topo = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
2360+
if (!current_topo)
2361+
return -ENOMEM;
2362+
2363+
/* Get the current Tx topology */
2364+
status = ice_get_set_tx_topo(hw, current_topo, ICE_AQ_MAX_BUF_LEN, NULL,
2365+
&flags, false);
2366+
2367+
kfree(current_topo);
2368+
2369+
if (status) {
2370+
ice_debug(hw, ICE_DBG_INIT, "Get current topology is failed\n");
2371+
return status;
2372+
}
2373+
2374+
/* Is default topology already applied ? */
2375+
if (!(flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) &&
2376+
hw->num_tx_sched_layers == ICE_SCHED_9_LAYERS) {
2377+
ice_debug(hw, ICE_DBG_INIT, "Default topology already applied\n");
2378+
return -EEXIST;
2379+
}
2380+
2381+
/* Is new topology already applied ? */
2382+
if ((flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) &&
2383+
hw->num_tx_sched_layers == ICE_SCHED_5_LAYERS) {
2384+
ice_debug(hw, ICE_DBG_INIT, "New topology already applied\n");
2385+
return -EEXIST;
2386+
}
2387+
2388+
/* Setting topology already issued? */
2389+
if (flags & ICE_AQC_TX_TOPO_FLAGS_ISSUED) {
2390+
ice_debug(hw, ICE_DBG_INIT, "Update Tx topology was done by another PF\n");
2391+
/* Add a small delay before exiting */
2392+
msleep(2000);
2393+
return -EEXIST;
2394+
}
2395+
2396+
/* Change the topology from new to default (5 to 9) */
2397+
if (!(flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) &&
2398+
hw->num_tx_sched_layers == ICE_SCHED_5_LAYERS) {
2399+
ice_debug(hw, ICE_DBG_INIT, "Change topology from 5 to 9 layers\n");
2400+
goto update_topo;
2401+
}
2402+
2403+
pkg_hdr = (struct ice_pkg_hdr *)buf;
2404+
state = ice_verify_pkg(pkg_hdr, len);
2405+
if (state) {
2406+
ice_debug(hw, ICE_DBG_INIT, "Failed to verify pkg (err: %d)\n",
2407+
state);
2408+
return -EIO;
2409+
}
2410+
2411+
/* Find runtime configuration segment */
2412+
seg = (struct ice_run_time_cfg_seg *)
2413+
ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE_RUN_TIME_CFG, pkg_hdr);
2414+
if (!seg) {
2415+
ice_debug(hw, ICE_DBG_INIT, "5 layer topology segment is missing\n");
2416+
return -EIO;
2417+
}
2418+
2419+
if (le32_to_cpu(seg->buf_table.buf_count) < ICE_MIN_S_COUNT) {
2420+
ice_debug(hw, ICE_DBG_INIT, "5 layer topology segment count(%d) is wrong\n",
2421+
seg->buf_table.buf_count);
2422+
return -EIO;
2423+
}
2424+
2425+
section = ice_pkg_val_buf(seg->buf_table.buf_array);
2426+
if (!section || le32_to_cpu(section->section_entry[0].type) !=
2427+
ICE_SID_TX_5_LAYER_TOPO) {
2428+
ice_debug(hw, ICE_DBG_INIT, "5 layer topology section type is wrong\n");
2429+
return -EIO;
2430+
}
2431+
2432+
size = le16_to_cpu(section->section_entry[0].size);
2433+
offset = le16_to_cpu(section->section_entry[0].offset);
2434+
if (size < ICE_MIN_S_SZ || size > ICE_MAX_S_SZ) {
2435+
ice_debug(hw, ICE_DBG_INIT, "5 layer topology section size is wrong\n");
2436+
return -EIO;
2437+
}
2438+
2439+
/* Make sure the section fits in the buffer */
2440+
if (offset + size > ICE_PKG_BUF_SIZE) {
2441+
ice_debug(hw, ICE_DBG_INIT, "5 layer topology buffer > 4K\n");
2442+
return -EIO;
2443+
}
2444+
2445+
/* Get the new topology buffer */
2446+
new_topo = ((u8 *)section) + offset;
2447+
2448+
update_topo:
2449+
/* Acquire global lock to make sure that set topology issued
2450+
* by one PF.
2451+
*/
2452+
status = ice_acquire_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID, ICE_RES_WRITE,
2453+
ICE_GLOBAL_CFG_LOCK_TIMEOUT);
2454+
if (status) {
2455+
ice_debug(hw, ICE_DBG_INIT, "Failed to acquire global lock\n");
2456+
return status;
2457+
}
2458+
2459+
/* Check if reset was triggered already. */
2460+
reg = rd32(hw, GLGEN_RSTAT);
2461+
if (reg & GLGEN_RSTAT_DEVSTATE_M) {
2462+
/* Reset is in progress, re-init the HW again */
2463+
ice_debug(hw, ICE_DBG_INIT, "Reset is in progress. Layer topology might be applied already\n");
2464+
ice_check_reset(hw);
2465+
return 0;
2466+
}
2467+
2468+
/* Set new topology */
2469+
status = ice_get_set_tx_topo(hw, new_topo, size, NULL, NULL, true);
2470+
if (status) {
2471+
ice_debug(hw, ICE_DBG_INIT, "Failed setting Tx topology\n");
2472+
return status;
2473+
}
2474+
2475+
/* New topology is updated, delay 1 second before issuing the CORER */
2476+
msleep(1000);
2477+
ice_reset(hw, ICE_RESET_CORER);
2478+
/* CORER will clear the global lock, so no explicit call
2479+
* required for release.
2480+
*/
2481+
2482+
return 0;
2483+
}

drivers/net/ethernet/intel/ice/ice_ddp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,4 +454,6 @@ u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld);
454454
void *ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
455455
u32 sect_type);
456456

457+
int ice_cfg_tx_topo(struct ice_hw *hw, u8 *buf, u32 len);
458+
457459
#endif

drivers/net/ethernet/intel/ice/ice_sched.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,17 @@
66

77
#include "ice_common.h"
88

9+
/**
10+
* DOC: ice_sched.h
11+
*
12+
* This header file stores everything that is needed for broadly understood
13+
* scheduler. It consists of defines related to layers, structures related to
14+
* aggregator, functions declarations and others.
15+
*/
16+
17+
#define ICE_SCHED_5_LAYERS 5
18+
#define ICE_SCHED_9_LAYERS 9
19+
920
#define SCHED_NODE_NAME_MAX_LEN 32
1021

1122
#define ICE_QGRP_LAYER_OFFSET 2

drivers/net/ethernet/intel/ice/ice_type.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ struct ice_hw_common_caps {
296296
bool pcie_reset_avoidance;
297297
/* Post update reset restriction */
298298
bool reset_restrict_support;
299+
bool tx_sched_topo_comp_mode_en;
299300
};
300301

301302
/* IEEE 1588 TIME_SYNC specific info */

0 commit comments

Comments
 (0)