Skip to content

Commit 2932279

Browse files
mfijalkoanguy11
authored andcommitted
ice: xsk: change batched Tx descriptor cleaning
AF_XDP Tx descriptor cleaning in ice driver currently works in a "lazy" way - descriptors are not cleaned immediately after send. We rather hold on with cleaning until we see that free space in ring drops below particular threshold. This was supposed to reduce the amount of unnecessary work related to cleaning and instead of keeping the ring empty, ring was rather saturated. In AF_XDP realm cleaning Tx descriptors implies producing them to CQ. This is a way of letting know user space that particular descriptor has been sent, as John points out in [0]. We tried to implement serial descriptor cleaning which would be used in conjunction with batched cleaning but it made code base more convoluted and probably harder to maintain in future. Therefore we step away from batched cleaning in a current form in favor of an approach where we set RS bit on every last descriptor from a batch and clean always at the beginning of ice_xmit_zc(). This means that we give up a bit of Tx performance, but this doesn't hurt l2fwd scenario which is way more meaningful than txonly as this can be treaten as AF_XDP based packet generator. l2fwd is not hurt due to the fact that Tx side is much faster than Rx and Rx is the one that has to catch Tx up. FWIW Tx descriptors are still produced in a batched way. [0]: https://lore.kernel.org/bpf/[email protected]/ Fixes: 126cdfe ("ice: xsk: Improve AF_XDP ZC Tx and use batching API") Signed-off-by: Maciej Fijalkowski <[email protected]> Tested-by: George Kuruvinakunnel <[email protected]> Signed-off-by: Tony Nguyen <[email protected]>
1 parent 797666c commit 2932279

File tree

3 files changed

+64
-88
lines changed

3 files changed

+64
-88
lines changed

drivers/net/ethernet/intel/ice/ice_txrx.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1467,7 +1467,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
14671467
bool wd;
14681468

14691469
if (tx_ring->xsk_pool)
1470-
wd = ice_xmit_zc(tx_ring, ICE_DESC_UNUSED(tx_ring), budget);
1470+
wd = ice_xmit_zc(tx_ring);
14711471
else if (ice_ring_is_xdp(tx_ring))
14721472
wd = true;
14731473
else

drivers/net/ethernet/intel/ice/ice_xsk.c

Lines changed: 61 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -788,69 +788,57 @@ ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
788788
}
789789

790790
/**
791-
* ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring
792-
* @xdp_ring: XDP ring to clean
793-
* @napi_budget: amount of descriptors that NAPI allows us to clean
794-
*
795-
* Returns count of cleaned descriptors
791+
* ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
792+
* @xdp_ring: XDP Tx ring
796793
*/
797-
static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget)
794+
static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
798795
{
799-
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
800-
int budget = napi_budget / tx_thresh;
801-
u16 next_dd = xdp_ring->next_dd;
802-
u16 ntc, cleared_dds = 0;
803-
804-
do {
805-
struct ice_tx_desc *next_dd_desc;
806-
u16 desc_cnt = xdp_ring->count;
807-
struct ice_tx_buf *tx_buf;
808-
u32 xsk_frames;
809-
u16 i;
810-
811-
next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
812-
if (!(next_dd_desc->cmd_type_offset_bsz &
813-
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
814-
break;
796+
u16 ntc = xdp_ring->next_to_clean;
797+
struct ice_tx_desc *tx_desc;
798+
u16 cnt = xdp_ring->count;
799+
struct ice_tx_buf *tx_buf;
800+
u16 xsk_frames = 0;
801+
u16 last_rs;
802+
int i;
815803

816-
cleared_dds++;
817-
xsk_frames = 0;
818-
if (likely(!xdp_ring->xdp_tx_active)) {
819-
xsk_frames = tx_thresh;
820-
goto skip;
821-
}
804+
last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1;
805+
tx_desc = ICE_TX_DESC(xdp_ring, last_rs);
806+
if ((tx_desc->cmd_type_offset_bsz &
807+
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) {
808+
if (last_rs >= ntc)
809+
xsk_frames = last_rs - ntc + 1;
810+
else
811+
xsk_frames = last_rs + cnt - ntc + 1;
812+
}
822813

823-
ntc = xdp_ring->next_to_clean;
814+
if (!xsk_frames)
815+
return;
824816

825-
for (i = 0; i < tx_thresh; i++) {
826-
tx_buf = &xdp_ring->tx_buf[ntc];
817+
if (likely(!xdp_ring->xdp_tx_active))
818+
goto skip;
827819

828-
if (tx_buf->raw_buf) {
829-
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
830-
tx_buf->raw_buf = NULL;
831-
} else {
832-
xsk_frames++;
833-
}
820+
ntc = xdp_ring->next_to_clean;
821+
for (i = 0; i < xsk_frames; i++) {
822+
tx_buf = &xdp_ring->tx_buf[ntc];
834823

835-
ntc++;
836-
if (ntc >= xdp_ring->count)
837-
ntc = 0;
824+
if (tx_buf->raw_buf) {
825+
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
826+
tx_buf->raw_buf = NULL;
827+
} else {
828+
xsk_frames++;
838829
}
830+
831+
ntc++;
832+
if (ntc >= xdp_ring->count)
833+
ntc = 0;
834+
}
839835
skip:
840-
xdp_ring->next_to_clean += tx_thresh;
841-
if (xdp_ring->next_to_clean >= desc_cnt)
842-
xdp_ring->next_to_clean -= desc_cnt;
843-
if (xsk_frames)
844-
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
845-
next_dd_desc->cmd_type_offset_bsz = 0;
846-
next_dd = next_dd + tx_thresh;
847-
if (next_dd >= desc_cnt)
848-
next_dd = tx_thresh - 1;
849-
} while (--budget);
850-
851-
xdp_ring->next_dd = next_dd;
852-
853-
return cleared_dds * tx_thresh;
836+
tx_desc->cmd_type_offset_bsz = 0;
837+
xdp_ring->next_to_clean += xsk_frames;
838+
if (xdp_ring->next_to_clean >= cnt)
839+
xdp_ring->next_to_clean -= cnt;
840+
if (xsk_frames)
841+
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
854842
}
855843

856844
/**
@@ -885,7 +873,6 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
885873
static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
886874
unsigned int *total_bytes)
887875
{
888-
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
889876
u16 ntu = xdp_ring->next_to_use;
890877
struct ice_tx_desc *tx_desc;
891878
u32 i;
@@ -905,13 +892,6 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
905892
}
906893

907894
xdp_ring->next_to_use = ntu;
908-
909-
if (xdp_ring->next_to_use > xdp_ring->next_rs) {
910-
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
911-
tx_desc->cmd_type_offset_bsz |=
912-
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
913-
xdp_ring->next_rs += tx_thresh;
914-
}
915895
}
916896

917897
/**
@@ -924,7 +904,6 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
924904
static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
925905
u32 nb_pkts, unsigned int *total_bytes)
926906
{
927-
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
928907
u32 batched, leftover, i;
929908

930909
batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
@@ -933,54 +912,54 @@ static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *d
933912
ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
934913
for (; i < batched + leftover; i++)
935914
ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
915+
}
936916

937-
if (xdp_ring->next_to_use > xdp_ring->next_rs) {
938-
struct ice_tx_desc *tx_desc;
917+
/**
918+
* ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU)
919+
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
920+
*/
921+
static void ice_set_rs_bit(struct ice_tx_ring *xdp_ring)
922+
{
923+
u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
924+
struct ice_tx_desc *tx_desc;
939925

940-
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
941-
tx_desc->cmd_type_offset_bsz |=
942-
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
943-
xdp_ring->next_rs += tx_thresh;
944-
}
926+
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
927+
tx_desc->cmd_type_offset_bsz |=
928+
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
945929
}
946930

947931
/**
948932
* ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
949933
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
950-
* @budget: number of free descriptors on HW Tx ring that can be used
951-
* @napi_budget: amount of descriptors that NAPI allows us to clean
952934
*
953935
* Returns true if there is no more work that needs to be done, false otherwise
954936
*/
955-
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget)
937+
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring)
956938
{
957939
struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
958-
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
959940
u32 nb_pkts, nb_processed = 0;
960941
unsigned int total_bytes = 0;
942+
int budget;
943+
944+
ice_clean_xdp_irq_zc(xdp_ring);
961945

962-
if (budget < tx_thresh)
963-
budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget);
946+
budget = ICE_DESC_UNUSED(xdp_ring);
947+
budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring));
964948

965949
nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
966950
if (!nb_pkts)
967951
return true;
968952

969953
if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
970-
struct ice_tx_desc *tx_desc;
971-
972954
nb_processed = xdp_ring->count - xdp_ring->next_to_use;
973955
ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
974-
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
975-
tx_desc->cmd_type_offset_bsz |=
976-
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
977-
xdp_ring->next_rs = tx_thresh - 1;
978956
xdp_ring->next_to_use = 0;
979957
}
980958

981959
ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
982960
&total_bytes);
983961

962+
ice_set_rs_bit(xdp_ring);
984963
ice_xdp_ring_update_tail(xdp_ring);
985964
ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
986965

drivers/net/ethernet/intel/ice/ice_xsk.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,10 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count);
2626
bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
2727
void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring);
2828
void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring);
29-
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget);
29+
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring);
3030
int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc);
3131
#else
32-
static inline bool
33-
ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring,
34-
u32 __always_unused budget,
35-
int __always_unused napi_budget)
32+
static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring)
3633
{
3734
return false;
3835
}

0 commit comments

Comments
 (0)