Skip to content

Commit 43da44c

Browse files
committed
Merge branch 'net-stmmac-CPU-Performance-Improvements'
Jose Abreu says: ==================== net: stmmac: CPU Performance Improvements CPU Performance improvements for stmmac. Please check bellow for results before and after the series. Patch 1/7, allows RX Interrupt on Completion to be disabled and only use the RX HW Watchdog. Patch 2/7, setups the default RX coalesce settings instead of using the minimum value. Patch 3/7 and 4/7, removes the uneeded computations for RX Flow Control activation/de-activation, on some cases. Patch 5/7, tunes-up the default coalesce settings. Patch 6/7, re-works the TX coalesce timer activation logic. Patch 7/7, removes the now uneeded TBU interrupt. NetPerf UDP Results: -------------------- Socket Message Elapsed Messages CPU Service Size Size Time Okay Errors Throughput Util Demand bytes bytes secs # # 10^6bits/sec % SS us/KB --- [email protected]: Before 212992 1400 10.00 2100620 0 2351.7 36.69 5.112 212992 10.00 2100539 2351.6 26.18 3.648 --- [email protected]: After 212992 1400 10.00 2108972 0 2361.5 21.73 3.015 212992 10.00 2097038 2348.1 19.21 2.666 --- GMAC5@1G: Before 212992 1400 10.00 786000 0 880.2 34.71 12.923 212992 10.00 786000 880.2 23.42 8.719 --- GMAC5@1G: After 212992 1400 10.00 842648 0 943.7 14.12 4.903 212992 10.00 842648 943.7 12.73 4.418 Perf TCP Results on RX Path: ---------------------------- --- [email protected]: Before 22.51% swapper [stmmac] [k] dwxgmac2_dma_interrupt 10.82% swapper [stmmac] [k] dwxgmac2_host_mtl_irq_status 5.21% swapper [stmmac] [k] dwxgmac2_host_irq_status 4.67% swapper [stmmac] [k] dwxgmac3_safety_feat_irq_status 3.63% swapper [kernel.kallsyms] [k] stack_trace_consume_entry 2.74% iperf3 [kernel.kallsyms] [k] copy_user_enhanced_fast_string 2.52% swapper [kernel.kallsyms] [k] update_stack_state 1.94% ksoftirqd/0 [stmmac] [k] dwxgmac2_dma_interrupt 1.45% iperf3 [kernel.kallsyms] [k] queued_spin_lock_slowpath 1.26% swapper [kernel.kallsyms] [k] create_object --- [email protected]: After 7.43% swapper [kernel.kallsyms] [k] stack_trace_consume_entry 5.86% swapper [stmmac] [k] dwxgmac2_dma_interrupt 5.68% swapper [kernel.kallsyms] [k] update_stack_state 4.71% iperf3 [kernel.kallsyms] [k] copy_user_enhanced_fast_string 2.88% swapper [kernel.kallsyms] [k] create_object 2.69% swapper [stmmac] [k] dwxgmac2_host_mtl_irq_status 2.61% swapper [stmmac] [k] stmmac_napi_poll_rx 2.52% swapper [kernel.kallsyms] [k] unwind_next_frame.part.4 1.48% swapper [kernel.kallsyms] [k] unwind_get_return_address 1.38% swapper [kernel.kallsyms] [k] arch_stack_walk --- GMAC5@1G: Before 31.29% swapper [stmmac] [k] dwmac4_dma_interrupt 14.57% swapper [stmmac] [k] dwmac4_irq_mtl_status 10.66% swapper [stmmac] [k] dwmac4_irq_status 1.97% swapper [kernel.kallsyms] [k] stack_trace_consume_entry 1.73% iperf3 [kernel.kallsyms] [k] copy_user_enhanced_fast_string 1.59% swapper [kernel.kallsyms] [k] update_stack_state 1.15% iperf3 [kernel.kallsyms] [k] do_syscall_64 1.01% ksoftirqd/0 [stmmac] [k] dwmac4_dma_interrupt 0.89% swapper [kernel.kallsyms] [k] __default_send_IPI_dest_field 0.75% swapper [stmmac] [k] stmmac_napi_poll_rx --- GMAC5@1G: After 6.70% swapper [kernel.kallsyms] [k] stack_trace_consume_entry 5.79% swapper [stmmac] [k] dwmac4_dma_interrupt 5.29% swapper [kernel.kallsyms] [k] update_stack_state 3.52% iperf3 [kernel.kallsyms] [k] copy_user_enhanced_fast_string 2.83% swapper [stmmac] [k] dwmac4_irq_mtl_status 2.62% swapper [kernel.kallsyms] [k] create_object 2.46% swapper [stmmac] [k] stmmac_napi_poll_rx 2.32% swapper [kernel.kallsyms] [k] unwind_next_frame.part.4 2.19% swapper [stmmac] [k] dwmac4_irq_status 1.39% swapper [kernel.kallsyms] [k] unwind_get_return_address ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 983db61 + 8d07a79 commit 43da44c

File tree

5 files changed

+59
-50
lines changed

5 files changed

+59
-50
lines changed

drivers/net/ethernet/stmicro/stmmac/common.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,12 +248,13 @@ struct stmmac_safety_stats {
248248
/* Max/Min RI Watchdog Timer count value */
249249
#define MAX_DMA_RIWT 0xff
250250
#define MIN_DMA_RIWT 0x10
251+
#define DEF_DMA_RIWT 0xa0
251252
/* Tx coalesce parameters */
252253
#define STMMAC_COAL_TX_TIMER 1000
253254
#define STMMAC_MAX_COAL_TX_TICK 100000
254255
#define STMMAC_TX_MAX_FRAMES 256
255-
#define STMMAC_TX_FRAMES 1
256-
#define STMMAC_RX_FRAMES 25
256+
#define STMMAC_TX_FRAMES 25
257+
#define STMMAC_RX_FRAMES 0
257258

258259
/* Packets types */
259260
enum packets_types {

drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -252,19 +252,9 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
252252
rfa = 0x01; /* Full-1.5K */
253253
break;
254254

255-
case 8192:
256-
rfd = 0x06; /* Full-4K */
257-
rfa = 0x0a; /* Full-6K */
258-
break;
259-
260-
case 16384:
261-
rfd = 0x06; /* Full-4K */
262-
rfa = 0x12; /* Full-10K */
263-
break;
264-
265255
default:
266-
rfd = 0x06; /* Full-4K */
267-
rfa = 0x1e; /* Full-16K */
256+
rfd = 0x07; /* Full-4.5K */
257+
rfa = 0x04; /* Full-3K */
268258
break;
269259
}
270260

drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@
360360
#define XGMAC_TBUE BIT(2)
361361
#define XGMAC_TIE BIT(0)
362362
#define XGMAC_DMA_INT_DEFAULT_EN (XGMAC_NIE | XGMAC_AIE | XGMAC_RBUE | \
363-
XGMAC_RIE | XGMAC_TBUE | XGMAC_TIE)
363+
XGMAC_RIE | XGMAC_TIE)
364364
#define XGMAC_DMA_CH_Rx_WATCHDOG(x) (0x0000313c + (0x80 * (x)))
365365
#define XGMAC_RWT GENMASK(7, 0)
366366
#define XGMAC_DMA_CH_STATUS(x) (0x00003160 + (0x80 * (x)))

drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -183,19 +183,9 @@ static void dwxgmac2_dma_rx_mode(void __iomem *ioaddr, int mode,
183183
rfa = 0x01; /* Full-1.5K */
184184
break;
185185

186-
case 8192:
187-
rfd = 0x06; /* Full-4K */
188-
rfa = 0x0a; /* Full-6K */
189-
break;
190-
191-
case 16384:
192-
rfd = 0x06; /* Full-4K */
193-
rfa = 0x12; /* Full-10K */
194-
break;
195-
196186
default:
197-
rfd = 0x06; /* Full-4K */
198-
rfa = 0x1e; /* Full-16K */
187+
rfd = 0x07; /* Full-4.5K */
188+
rfa = 0x04; /* Full-3K */
199189
break;
200190
}
201191

drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

Lines changed: 51 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2605,9 +2605,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
26052605
priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
26062606

26072607
if (priv->use_riwt) {
2608-
ret = stmmac_rx_watchdog(priv, priv->ioaddr, MIN_DMA_RIWT, rx_cnt);
2609-
if (!ret)
2610-
priv->rx_riwt = MIN_DMA_RIWT;
2608+
if (!priv->rx_riwt)
2609+
priv->rx_riwt = DEF_DMA_RIWT;
2610+
2611+
ret = stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
26112612
}
26122613

26132614
if (priv->hw->pcs)
@@ -2915,16 +2916,17 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
29152916
struct stmmac_priv *priv = netdev_priv(dev);
29162917
int nfrags = skb_shinfo(skb)->nr_frags;
29172918
u32 queue = skb_get_queue_mapping(skb);
2919+
unsigned int first_entry, tx_packets;
2920+
int tmp_pay_len = 0, first_tx;
29182921
struct stmmac_tx_queue *tx_q;
2919-
unsigned int first_entry;
29202922
u8 proto_hdr_len, hdr;
2921-
int tmp_pay_len = 0;
2923+
bool has_vlan, set_ic;
29222924
u32 pay_len, mss;
29232925
dma_addr_t des;
2924-
bool has_vlan;
29252926
int i;
29262927

29272928
tx_q = &priv->tx_queue[queue];
2929+
first_tx = tx_q->cur_tx;
29282930

29292931
/* Compute header lengths */
29302932
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
@@ -3032,16 +3034,27 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
30323034
tx_q->tx_skbuff[tx_q->cur_tx] = skb;
30333035

30343036
/* Manage tx mitigation */
3035-
tx_q->tx_count_frames += nfrags + 1;
3036-
if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
3037-
!((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
3038-
priv->hwts_tx_en)) {
3039-
stmmac_tx_timer_arm(priv, queue);
3040-
} else {
3037+
tx_packets = (tx_q->cur_tx + 1) - first_tx;
3038+
tx_q->tx_count_frames += tx_packets;
3039+
3040+
if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en)
3041+
set_ic = true;
3042+
else if (!priv->tx_coal_frames)
3043+
set_ic = false;
3044+
else if (tx_packets > priv->tx_coal_frames)
3045+
set_ic = true;
3046+
else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < tx_packets)
3047+
set_ic = true;
3048+
else
3049+
set_ic = false;
3050+
3051+
if (set_ic) {
30413052
desc = &tx_q->dma_tx[tx_q->cur_tx];
30423053
tx_q->tx_count_frames = 0;
30433054
stmmac_set_tx_ic(priv, desc);
30443055
priv->xstats.tx_set_ic_bit++;
3056+
} else {
3057+
stmmac_tx_timer_arm(priv, queue);
30453058
}
30463059

30473060
/* We've used all descriptors we need for this skb, however,
@@ -3132,6 +3145,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
31323145
*/
31333146
static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
31343147
{
3148+
unsigned int first_entry, tx_packets, enh_desc;
31353149
struct stmmac_priv *priv = netdev_priv(dev);
31363150
unsigned int nopaged_len = skb_headlen(skb);
31373151
int i, csum_insertion = 0, is_jumbo = 0;
@@ -3140,13 +3154,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
31403154
int gso = skb_shinfo(skb)->gso_type;
31413155
struct dma_desc *desc, *first;
31423156
struct stmmac_tx_queue *tx_q;
3143-
unsigned int first_entry;
3144-
unsigned int enh_desc;
3157+
bool has_vlan, set_ic;
3158+
int entry, first_tx;
31453159
dma_addr_t des;
3146-
bool has_vlan;
3147-
int entry;
31483160

31493161
tx_q = &priv->tx_queue[queue];
3162+
first_tx = tx_q->cur_tx;
31503163

31513164
if (priv->tx_path_in_lpi_mode)
31523165
stmmac_disable_eee_mode(priv);
@@ -3240,12 +3253,21 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
32403253
* This approach takes care about the fragments: desc is the first
32413254
* element in case of no SG.
32423255
*/
3243-
tx_q->tx_count_frames += nfrags + 1;
3244-
if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
3245-
!((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
3246-
priv->hwts_tx_en)) {
3247-
stmmac_tx_timer_arm(priv, queue);
3248-
} else {
3256+
tx_packets = (entry + 1) - first_tx;
3257+
tx_q->tx_count_frames += tx_packets;
3258+
3259+
if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en)
3260+
set_ic = true;
3261+
else if (!priv->tx_coal_frames)
3262+
set_ic = false;
3263+
else if (tx_packets > priv->tx_coal_frames)
3264+
set_ic = true;
3265+
else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < tx_packets)
3266+
set_ic = true;
3267+
else
3268+
set_ic = false;
3269+
3270+
if (set_ic) {
32493271
if (likely(priv->extend_desc))
32503272
desc = &tx_q->dma_etx[entry].basic;
32513273
else
@@ -3254,6 +3276,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
32543276
tx_q->tx_count_frames = 0;
32553277
stmmac_set_tx_ic(priv, desc);
32563278
priv->xstats.tx_set_ic_bit++;
3279+
} else {
3280+
stmmac_tx_timer_arm(priv, queue);
32573281
}
32583282

32593283
/* We've used all descriptors we need for this skb, however,
@@ -3440,7 +3464,11 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
34403464
rx_q->rx_count_frames += priv->rx_coal_frames;
34413465
if (rx_q->rx_count_frames > priv->rx_coal_frames)
34423466
rx_q->rx_count_frames = 0;
3443-
use_rx_wd = priv->use_riwt && rx_q->rx_count_frames;
3467+
3468+
use_rx_wd = !priv->rx_coal_frames;
3469+
use_rx_wd |= rx_q->rx_count_frames > 0;
3470+
if (!priv->use_riwt)
3471+
use_rx_wd = false;
34443472

34453473
dma_wmb();
34463474
stmmac_set_rx_owner(priv, p, use_rx_wd);

0 commit comments

Comments
 (0)