Skip to content

Commit 8bec9e0

Browse files
cpaasch-oaiNipaLocal
authored andcommitted
net/mlx5: Avoid copying payload to the skb's linear part
mlx5e_skb_from_cqe_mpwrq_nonlinear() copies MLX5E_RX_MAX_HEAD (256) bytes from the page-pool to the skb's linear part. Those 256 bytes include part of the payload. When attempting to do GRO in skb_gro_receive, if headlen > data_offset (and skb->head_frag is not set), we end up aggregating packets in the frag_list. This is of course not good when we are CPU-limited. Also causes a worse skb->len/truesize ratio,... So, let's avoid copying parts of the payload to the linear part. The goal here is to err on the side of caution and prefer to copy too little instead of copying too much (because once it has been copied over, we trigger the above described behavior in skb_gro_receive). So, we can do a rough estimate of the header-space by looking at cqe_l3/l4_hdr_type and kind of do a lower-bound estimate. This is now done in mlx5e_cqe_get_min_hdr_len(). We always assume that TCP timestamps are present, as that's the most common use-case. That header-len is then used in mlx5e_skb_from_cqe_mpwrq_nonlinear for the headlen (which defines what is being copied over). We still allocate MLX5E_RX_MAX_HEAD for the skb so that if the networking stack needs to call pskb_may_pull() later on, we don't need to reallocate memory. This gives a nice throughput increase (ARM Neoverse-V2 with CX-7 NIC and LRO enabled): BEFORE: ======= (netserver pinned to core receiving interrupts) $ netperf -H 10.221.81.118 -T 80,9 -P 0 -l 60 -- -m 256K -M 256K 87380 16384 262144 60.01 32547.82 (netserver pinned to adjacent core receiving interrupts) $ netperf -H 10.221.81.118 -T 80,10 -P 0 -l 60 -- -m 256K -M 256K 87380 16384 262144 60.00 52531.67 AFTER: ====== (netserver pinned to core receiving interrupts) $ netperf -H 10.221.81.118 -T 80,9 -P 0 -l 60 -- -m 256K -M 256K 87380 16384 262144 60.00 52896.06 (netserver pinned to adjacent core receiving interrupts) $ netperf -H 10.221.81.118 -T 80,10 -P 0 -l 60 -- -m 256K -M 256K 87380 16384 262144 60.00 85094.90 Signed-off-by: Christoph Paasch <[email protected]> Signed-off-by: NipaLocal <nipa@local>
1 parent ee58a0d commit 8bec9e0

File tree

1 file changed

+32
-1
lines changed
  • drivers/net/ethernet/mellanox/mlx5/core

1 file changed

+32
-1
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1986,13 +1986,40 @@ mlx5e_shampo_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq,
19861986
} while (data_bcnt);
19871987
}
19881988

1989+
static u16
1990+
mlx5e_cqe_get_min_hdr_len(const struct mlx5_cqe64 *cqe)
1991+
{
1992+
u16 min_hdr_len = sizeof(struct ethhdr);
1993+
u8 l3_type = get_cqe_l3_hdr_type(cqe);
1994+
u8 l4_type = get_cqe_l4_hdr_type(cqe);
1995+
1996+
if (cqe_has_vlan(cqe))
1997+
min_hdr_len += VLAN_HLEN;
1998+
1999+
if (l3_type == CQE_L3_HDR_TYPE_IPV4)
2000+
min_hdr_len += sizeof(struct iphdr);
2001+
else if (l3_type == CQE_L3_HDR_TYPE_IPV6)
2002+
min_hdr_len += sizeof(struct ipv6hdr);
2003+
2004+
if (l4_type == CQE_L4_HDR_TYPE_UDP)
2005+
min_hdr_len += sizeof(struct udphdr);
2006+
else if (l4_type & (CQE_L4_HDR_TYPE_TCP_NO_ACK |
2007+
CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA |
2008+
CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA))
2009+
/* Previous condition works because we know that
2010+
* l4_type != 0x2 (CQE_L4_HDR_TYPE_UDP)
2011+
*/
2012+
min_hdr_len += sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
2013+
2014+
return min_hdr_len;
2015+
}
2016+
19892017
static struct sk_buff *
19902018
mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
19912019
struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset,
19922020
u32 page_idx)
19932021
{
19942022
struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx];
1995-
u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
19962023
struct mlx5e_frag_page *head_page = frag_page;
19972024
struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf;
19982025
u32 frag_offset = head_offset;
@@ -2004,10 +2031,14 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
20042031
u32 linear_frame_sz;
20052032
u16 linear_data_len;
20062033
u16 linear_hr;
2034+
u16 headlen;
20072035
void *va;
20082036

20092037
prog = rcu_dereference(rq->xdp_prog);
20102038

2039+
headlen = min3(mlx5e_cqe_get_min_hdr_len(cqe), cqe_bcnt,
2040+
(u16)MLX5E_RX_MAX_HEAD);
2041+
20112042
if (prog) {
20122043
/* area for bpf_xdp_[store|load]_bytes */
20132044
net_prefetchw(netmem_address(frag_page->netmem) + frag_offset);

0 commit comments

Comments
 (0)