Skip to content

Commit 8221c5e

Browse files
Björn TöpelJeff Kirsher
authored andcommitted
ixgbe: add AF_XDP zero-copy Tx support
This patch adds zero-copy Tx support for AF_XDP sockets. It implements the ndo_xsk_async_xmit netdev ndo and performs all the Tx logic from a NAPI context. This means pulling egress packets from the Tx ring, placing the frames on the NIC HW descriptor ring and completing sent frames back to the application via the completion ring. The regular XDP Tx ring is used for AF_XDP as well. This rationale for this is as follows: XDP_REDIRECT guarantees mutual exclusion between different NAPI contexts based on CPU id. In other words, a netdev can XDP_REDIRECT to another netdev with a different NAPI context, since the operation is bound to a specific core and each core has its own hardware ring. As the AF_XDP Tx action is running in the same NAPI context and using the same ring, it will also be protected from XDP_REDIRECT actions with the exact same mechanism. As with AF_XDP Rx, all AF_XDP Tx specific functions are added to ixgbe_xsk.c. Signed-off-by: Björn Töpel <[email protected]> Tested-by: William Tu <[email protected]> Tested-by: Andrew Bowers <[email protected]> Signed-off-by: Jeff Kirsher <[email protected]>
1 parent 05ae861 commit 8221c5e

File tree

3 files changed

+195
-1
lines changed

3 files changed

+195
-1
lines changed

drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3161,7 +3161,11 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
31613161
#endif
31623162

31633163
ixgbe_for_each_ring(ring, q_vector->tx) {
3164-
if (!ixgbe_clean_tx_irq(q_vector, ring, budget))
3164+
bool wd = ring->xsk_umem ?
3165+
ixgbe_clean_xdp_tx_irq(q_vector, ring, budget) :
3166+
ixgbe_clean_tx_irq(q_vector, ring, budget);
3167+
3168+
if (!wd)
31653169
clean_complete = false;
31663170
}
31673171

@@ -3470,6 +3474,10 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
34703474
u32 txdctl = IXGBE_TXDCTL_ENABLE;
34713475
u8 reg_idx = ring->reg_idx;
34723476

3477+
ring->xsk_umem = NULL;
3478+
if (ring_is_xdp(ring))
3479+
ring->xsk_umem = ixgbe_xsk_umem(adapter, ring);
3480+
34733481
/* disable queue to avoid issues while updating state */
34743482
IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), 0);
34753483
IXGBE_WRITE_FLUSH(hw);
@@ -5942,6 +5950,11 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
59425950
u16 i = tx_ring->next_to_clean;
59435951
struct ixgbe_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
59445952

5953+
if (tx_ring->xsk_umem) {
5954+
ixgbe_xsk_clean_tx_ring(tx_ring);
5955+
goto out;
5956+
}
5957+
59455958
while (i != tx_ring->next_to_use) {
59465959
union ixgbe_adv_tx_desc *eop_desc, *tx_desc;
59475960

@@ -5993,6 +6006,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
59936006
if (!ring_is_xdp(tx_ring))
59946007
netdev_tx_reset_queue(txring_txq(tx_ring));
59956008

6009+
out:
59966010
/* reset next_to_use and next_to_clean */
59976011
tx_ring->next_to_use = 0;
59986012
tx_ring->next_to_clean = 0;
@@ -10348,6 +10362,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
1034810362
.ndo_features_check = ixgbe_features_check,
1034910363
.ndo_bpf = ixgbe_xdp,
1035010364
.ndo_xdp_xmit = ixgbe_xdp_xmit,
10365+
.ndo_xsk_async_xmit = ixgbe_xsk_async_xmit,
1035110366
};
1035210367

1035310368
static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter,

drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,9 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
4242
struct ixgbe_ring *rx_ring,
4343
const int budget);
4444
void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring);
45+
bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
46+
struct ixgbe_ring *tx_ring, int napi_budget);
47+
int ixgbe_xsk_async_xmit(struct net_device *dev, u32 queue_id);
48+
void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring);
4549

4650
#endif /* #define _IXGBE_TXRX_COMMON_H_ */

drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,3 +624,178 @@ void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring)
624624
}
625625
}
626626
}
627+
628+
static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
629+
{
630+
union ixgbe_adv_tx_desc *tx_desc = NULL;
631+
struct ixgbe_tx_buffer *tx_bi;
632+
bool work_done = true;
633+
u32 len, cmd_type;
634+
dma_addr_t dma;
635+
636+
while (budget-- > 0) {
637+
if (unlikely(!ixgbe_desc_unused(xdp_ring))) {
638+
work_done = false;
639+
break;
640+
}
641+
642+
if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &dma, &len))
643+
break;
644+
645+
dma_sync_single_for_device(xdp_ring->dev, dma, len,
646+
DMA_BIDIRECTIONAL);
647+
648+
tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
649+
tx_bi->bytecount = len;
650+
tx_bi->xdpf = NULL;
651+
652+
tx_desc = IXGBE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
653+
tx_desc->read.buffer_addr = cpu_to_le64(dma);
654+
655+
/* put descriptor type bits */
656+
cmd_type = IXGBE_ADVTXD_DTYP_DATA |
657+
IXGBE_ADVTXD_DCMD_DEXT |
658+
IXGBE_ADVTXD_DCMD_IFCS;
659+
cmd_type |= len | IXGBE_TXD_CMD;
660+
tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
661+
tx_desc->read.olinfo_status =
662+
cpu_to_le32(len << IXGBE_ADVTXD_PAYLEN_SHIFT);
663+
664+
xdp_ring->next_to_use++;
665+
if (xdp_ring->next_to_use == xdp_ring->count)
666+
xdp_ring->next_to_use = 0;
667+
}
668+
669+
if (tx_desc) {
670+
ixgbe_xdp_ring_update_tail(xdp_ring);
671+
xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
672+
}
673+
674+
return !!budget && work_done;
675+
}
676+
677+
static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring *tx_ring,
678+
struct ixgbe_tx_buffer *tx_bi)
679+
{
680+
xdp_return_frame(tx_bi->xdpf);
681+
dma_unmap_single(tx_ring->dev,
682+
dma_unmap_addr(tx_bi, dma),
683+
dma_unmap_len(tx_bi, len), DMA_TO_DEVICE);
684+
dma_unmap_len_set(tx_bi, len, 0);
685+
}
686+
687+
bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
688+
struct ixgbe_ring *tx_ring, int napi_budget)
689+
{
690+
unsigned int total_packets = 0, total_bytes = 0;
691+
u32 i = tx_ring->next_to_clean, xsk_frames = 0;
692+
unsigned int budget = q_vector->tx.work_limit;
693+
struct xdp_umem *umem = tx_ring->xsk_umem;
694+
union ixgbe_adv_tx_desc *tx_desc;
695+
struct ixgbe_tx_buffer *tx_bi;
696+
bool xmit_done;
697+
698+
tx_bi = &tx_ring->tx_buffer_info[i];
699+
tx_desc = IXGBE_TX_DESC(tx_ring, i);
700+
i -= tx_ring->count;
701+
702+
do {
703+
if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
704+
break;
705+
706+
total_bytes += tx_bi->bytecount;
707+
total_packets += tx_bi->gso_segs;
708+
709+
if (tx_bi->xdpf)
710+
ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi);
711+
else
712+
xsk_frames++;
713+
714+
tx_bi->xdpf = NULL;
715+
total_bytes += tx_bi->bytecount;
716+
717+
tx_bi++;
718+
tx_desc++;
719+
i++;
720+
if (unlikely(!i)) {
721+
i -= tx_ring->count;
722+
tx_bi = tx_ring->tx_buffer_info;
723+
tx_desc = IXGBE_TX_DESC(tx_ring, 0);
724+
}
725+
726+
/* issue prefetch for next Tx descriptor */
727+
prefetch(tx_desc);
728+
729+
/* update budget accounting */
730+
budget--;
731+
} while (likely(budget));
732+
733+
i += tx_ring->count;
734+
tx_ring->next_to_clean = i;
735+
736+
u64_stats_update_begin(&tx_ring->syncp);
737+
tx_ring->stats.bytes += total_bytes;
738+
tx_ring->stats.packets += total_packets;
739+
u64_stats_update_end(&tx_ring->syncp);
740+
q_vector->tx.total_bytes += total_bytes;
741+
q_vector->tx.total_packets += total_packets;
742+
743+
if (xsk_frames)
744+
xsk_umem_complete_tx(umem, xsk_frames);
745+
746+
xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit);
747+
return budget > 0 && xmit_done;
748+
}
749+
750+
int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid)
751+
{
752+
struct ixgbe_adapter *adapter = netdev_priv(dev);
753+
struct ixgbe_ring *ring;
754+
755+
if (test_bit(__IXGBE_DOWN, &adapter->state))
756+
return -ENETDOWN;
757+
758+
if (!READ_ONCE(adapter->xdp_prog))
759+
return -ENXIO;
760+
761+
if (qid >= adapter->num_xdp_queues)
762+
return -ENXIO;
763+
764+
if (!adapter->xsk_umems || !adapter->xsk_umems[qid])
765+
return -ENXIO;
766+
767+
ring = adapter->xdp_ring[qid];
768+
if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
769+
u64 eics = BIT_ULL(ring->q_vector->v_idx);
770+
771+
ixgbe_irq_rearm_queues(adapter, eics);
772+
}
773+
774+
return 0;
775+
}
776+
777+
void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring)
778+
{
779+
u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
780+
struct xdp_umem *umem = tx_ring->xsk_umem;
781+
struct ixgbe_tx_buffer *tx_bi;
782+
u32 xsk_frames = 0;
783+
784+
while (ntc != ntu) {
785+
tx_bi = &tx_ring->tx_buffer_info[ntc];
786+
787+
if (tx_bi->xdpf)
788+
ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi);
789+
else
790+
xsk_frames++;
791+
792+
tx_bi->xdpf = NULL;
793+
794+
ntc++;
795+
if (ntc == tx_ring->count)
796+
ntc = 0;
797+
}
798+
799+
if (xsk_frames)
800+
xsk_umem_complete_tx(umem, xsk_frames);
801+
}

0 commit comments

Comments
 (0)