Skip to content

Commit f8e284a

Browse files
sriramykuba-moo
authored andcommitted
igb: Add AF_XDP zero-copy Tx support
Add support for AF_XDP zero-copy transmit path. A new TX buffer type IGB_TYPE_XSK is introduced to indicate that the Tx frame was allocated from the xsk buff pool, so igb_clean_tx_ring() and igb_clean_tx_irq() can clean the buffers correctly based on type. igb_xmit_zc() performs the actual packet transmit when AF_XDP zero-copy is enabled. We share the TX ring between slow path, XDP and AF_XDP zero-copy, so we use the netdev queue lock to ensure mutual exclusion. Signed-off-by: Sriram Yagnaraman <[email protected]> [Kurt: Set olinfo_status in igb_xmit_zc() so that frames are transmitted, Use READ_ONCE() for xsk_pool and check Tx disabled and carrier in igb_xmit_zc(), Add FIXME for RS bit] Signed-off-by: Kurt Kanzenbach <[email protected]> Reviewed-by: Maciej Fijalkowski <[email protected]> Tested-by: George Kuruvinakunnel <[email protected]> Signed-off-by: Tony Nguyen <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 2c61960 commit f8e284a

File tree

3 files changed

+116
-10
lines changed

3 files changed

+116
-10
lines changed

drivers/net/ethernet/intel/igb/igb.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ enum igb_tx_flags {
258258
enum igb_tx_buf_type {
259259
IGB_TYPE_SKB = 0,
260260
IGB_TYPE_XDP,
261+
IGB_TYPE_XSK
261262
};
262263

263264
/* wrapper around a pointer to a socket buffer,
@@ -859,6 +860,7 @@ bool igb_alloc_rx_buffers_zc(struct igb_ring *rx_ring,
859860
void igb_clean_rx_ring_zc(struct igb_ring *rx_ring);
860861
int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector,
861862
struct xsk_buff_pool *xsk_pool, const int budget);
863+
bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool);
862864
int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
863865

864866
#endif /* _IGB_H_ */

drivers/net/ethernet/intel/igb/igb_main.c

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2983,6 +2983,9 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
29832983
if (unlikely(!tx_ring))
29842984
return -ENXIO;
29852985

2986+
if (unlikely(test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags)))
2987+
return -ENXIO;
2988+
29862989
nq = txring_txq(tx_ring);
29872990
__netif_tx_lock(nq, cpu);
29882991

@@ -3330,7 +3333,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
33303333
netdev->priv_flags |= IFF_SUPP_NOFCS;
33313334

33323335
netdev->priv_flags |= IFF_UNICAST_FLT;
3333-
netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
3336+
netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
3337+
NETDEV_XDP_ACT_XSK_ZEROCOPY;
33343338

33353339
/* MTU range: 68 - 9216 */
33363340
netdev->min_mtu = ETH_MIN_MTU;
@@ -4904,15 +4908,20 @@ void igb_clean_tx_ring(struct igb_ring *tx_ring)
49044908
{
49054909
u16 i = tx_ring->next_to_clean;
49064910
struct igb_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
4911+
u32 xsk_frames = 0;
49074912

49084913
while (i != tx_ring->next_to_use) {
49094914
union e1000_adv_tx_desc *eop_desc, *tx_desc;
49104915

49114916
/* Free all the Tx ring sk_buffs or xdp frames */
4912-
if (tx_buffer->type == IGB_TYPE_SKB)
4917+
if (tx_buffer->type == IGB_TYPE_SKB) {
49134918
dev_kfree_skb_any(tx_buffer->skb);
4914-
else
4919+
} else if (tx_buffer->type == IGB_TYPE_XDP) {
49154920
xdp_return_frame(tx_buffer->xdpf);
4921+
} else if (tx_buffer->type == IGB_TYPE_XSK) {
4922+
xsk_frames++;
4923+
goto skip_for_xsk;
4924+
}
49164925

49174926
/* unmap skb header data */
49184927
dma_unmap_single(tx_ring->dev,
@@ -4943,6 +4952,7 @@ void igb_clean_tx_ring(struct igb_ring *tx_ring)
49434952
DMA_TO_DEVICE);
49444953
}
49454954

4955+
skip_for_xsk:
49464956
tx_buffer->next_to_watch = NULL;
49474957

49484958
/* move us one more past the eop_desc for start of next pkt */
@@ -4957,6 +4967,9 @@ void igb_clean_tx_ring(struct igb_ring *tx_ring)
49574967
/* reset BQL for queue */
49584968
netdev_tx_reset_queue(txring_txq(tx_ring));
49594969

4970+
if (tx_ring->xsk_pool && xsk_frames)
4971+
xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
4972+
49604973
/* reset next_to_use and next_to_clean */
49614974
tx_ring->next_to_use = 0;
49624975
tx_ring->next_to_clean = 0;
@@ -6490,6 +6503,9 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
64906503
return NETDEV_TX_BUSY;
64916504
}
64926505

6506+
if (unlikely(test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags)))
6507+
return NETDEV_TX_BUSY;
6508+
64936509
/* record the location of the first descriptor for this packet */
64946510
first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
64956511
first->type = IGB_TYPE_SKB;
@@ -8264,13 +8280,18 @@ static int igb_poll(struct napi_struct *napi, int budget)
82648280
**/
82658281
static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
82668282
{
8267-
struct igb_adapter *adapter = q_vector->adapter;
8268-
struct igb_ring *tx_ring = q_vector->tx.ring;
8269-
struct igb_tx_buffer *tx_buffer;
8270-
union e1000_adv_tx_desc *tx_desc;
82718283
unsigned int total_bytes = 0, total_packets = 0;
8284+
struct igb_adapter *adapter = q_vector->adapter;
82728285
unsigned int budget = q_vector->tx.work_limit;
8286+
struct igb_ring *tx_ring = q_vector->tx.ring;
82738287
unsigned int i = tx_ring->next_to_clean;
8288+
union e1000_adv_tx_desc *tx_desc;
8289+
struct igb_tx_buffer *tx_buffer;
8290+
struct xsk_buff_pool *xsk_pool;
8291+
int cpu = smp_processor_id();
8292+
bool xsk_xmit_done = true;
8293+
struct netdev_queue *nq;
8294+
u32 xsk_frames = 0;
82748295

82758296
if (test_bit(__IGB_DOWN, &adapter->state))
82768297
return true;
@@ -8301,10 +8322,14 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
83018322
total_packets += tx_buffer->gso_segs;
83028323

83038324
/* free the skb */
8304-
if (tx_buffer->type == IGB_TYPE_SKB)
8325+
if (tx_buffer->type == IGB_TYPE_SKB) {
83058326
napi_consume_skb(tx_buffer->skb, napi_budget);
8306-
else
8327+
} else if (tx_buffer->type == IGB_TYPE_XDP) {
83078328
xdp_return_frame(tx_buffer->xdpf);
8329+
} else if (tx_buffer->type == IGB_TYPE_XSK) {
8330+
xsk_frames++;
8331+
goto skip_for_xsk;
8332+
}
83088333

83098334
/* unmap skb header data */
83108335
dma_unmap_single(tx_ring->dev,
@@ -8336,6 +8361,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
83368361
}
83378362
}
83388363

8364+
skip_for_xsk:
83398365
/* move us one more past the eop_desc for start of next pkt */
83408366
tx_buffer++;
83418367
tx_desc++;
@@ -8364,6 +8390,21 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
83648390
q_vector->tx.total_bytes += total_bytes;
83658391
q_vector->tx.total_packets += total_packets;
83668392

8393+
xsk_pool = READ_ONCE(tx_ring->xsk_pool);
8394+
if (xsk_pool) {
8395+
if (xsk_frames)
8396+
xsk_tx_completed(xsk_pool, xsk_frames);
8397+
if (xsk_uses_need_wakeup(xsk_pool))
8398+
xsk_set_tx_need_wakeup(xsk_pool);
8399+
8400+
nq = txring_txq(tx_ring);
8401+
__netif_tx_lock(nq, cpu);
8402+
/* Avoid transmit queue timeout since we share it with the slow path */
8403+
txq_trans_cond_update(nq);
8404+
xsk_xmit_done = igb_xmit_zc(tx_ring, xsk_pool);
8405+
__netif_tx_unlock(nq);
8406+
}
8407+
83678408
if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
83688409
struct e1000_hw *hw = &adapter->hw;
83698410

@@ -8426,7 +8467,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
84268467
}
84278468
}
84288469

8429-
return !!budget;
8470+
return !!budget && xsk_xmit_done;
84308471
}
84318472

84328473
/**

drivers/net/ethernet/intel/igb/igb_xsk.c

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,69 @@ int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector,
461461
return failure ? budget : (int)total_packets;
462462
}
463463

464+
bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool)
465+
{
466+
unsigned int budget = igb_desc_unused(tx_ring);
467+
u32 cmd_type, olinfo_status, nb_pkts, i = 0;
468+
struct xdp_desc *descs = xsk_pool->tx_descs;
469+
union e1000_adv_tx_desc *tx_desc = NULL;
470+
struct igb_tx_buffer *tx_buffer_info;
471+
unsigned int total_bytes = 0;
472+
dma_addr_t dma;
473+
474+
if (!netif_carrier_ok(tx_ring->netdev))
475+
return true;
476+
477+
if (test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags))
478+
return true;
479+
480+
nb_pkts = xsk_tx_peek_release_desc_batch(xsk_pool, budget);
481+
if (!nb_pkts)
482+
return true;
483+
484+
while (nb_pkts-- > 0) {
485+
dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr);
486+
xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len);
487+
488+
tx_buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
489+
tx_buffer_info->bytecount = descs[i].len;
490+
tx_buffer_info->type = IGB_TYPE_XSK;
491+
tx_buffer_info->xdpf = NULL;
492+
tx_buffer_info->gso_segs = 1;
493+
tx_buffer_info->time_stamp = jiffies;
494+
495+
tx_desc = IGB_TX_DESC(tx_ring, tx_ring->next_to_use);
496+
tx_desc->read.buffer_addr = cpu_to_le64(dma);
497+
498+
/* put descriptor type bits */
499+
cmd_type = E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_DEXT |
500+
E1000_ADVTXD_DCMD_IFCS;
501+
olinfo_status = descs[i].len << E1000_ADVTXD_PAYLEN_SHIFT;
502+
503+
/* FIXME: This sets the Report Status (RS) bit for every
504+
* descriptor. One nice to have optimization would be to set it
505+
* only for the last descriptor in the whole batch. See Intel
506+
* ice driver for an example on how to do it.
507+
*/
508+
cmd_type |= descs[i].len | IGB_TXD_DCMD;
509+
tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
510+
tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
511+
512+
total_bytes += descs[i].len;
513+
514+
i++;
515+
tx_ring->next_to_use++;
516+
tx_buffer_info->next_to_watch = tx_desc;
517+
if (tx_ring->next_to_use == tx_ring->count)
518+
tx_ring->next_to_use = 0;
519+
}
520+
521+
netdev_tx_sent_queue(txring_txq(tx_ring), total_bytes);
522+
igb_xdp_ring_update_tail(tx_ring);
523+
524+
return nb_pkts < budget;
525+
}
526+
464527
int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
465528
{
466529
struct igb_adapter *adapter = netdev_priv(dev);

0 commit comments

Comments
 (0)