@@ -679,134 +679,212 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
679679}
680680
681681/**
682- * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
682+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
683683 * @xdp_ring: XDP Tx ring
684- * @budget: max number of frames to xmit
684+ * @tx_buf: Tx buffer to clean
685+ */
686+ static void
687+ ice_clean_xdp_tx_buf (struct ice_tx_ring * xdp_ring , struct ice_tx_buf * tx_buf )
688+ {
689+ xdp_return_frame ((struct xdp_frame * )tx_buf -> raw_buf );
690+ dma_unmap_single (xdp_ring -> dev , dma_unmap_addr (tx_buf , dma ),
691+ dma_unmap_len (tx_buf , len ), DMA_TO_DEVICE );
692+ dma_unmap_len_set (tx_buf , len , 0 );
693+ }
694+
695+ /**
696+ * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring
697+ * @xdp_ring: XDP ring to clean
698+ * @napi_budget: amount of descriptors that NAPI allows us to clean
685699 *
686- * Returns true if cleanup/transmission is done.
700+ * Returns count of cleaned descriptors
687701 */
688- static bool ice_xmit_zc (struct ice_tx_ring * xdp_ring , int budget )
702+ static u16 ice_clean_xdp_irq_zc (struct ice_tx_ring * xdp_ring , int napi_budget )
689703{
690- struct ice_tx_desc * tx_desc = NULL ;
691- bool work_done = true;
692- struct xdp_desc desc ;
693- dma_addr_t dma ;
704+ u16 tx_thresh = ICE_RING_QUARTER (xdp_ring );
705+ int budget = napi_budget / tx_thresh ;
706+ u16 ntc = xdp_ring -> next_to_clean ;
707+ u16 next_dd = xdp_ring -> next_dd ;
708+ u16 cleared_dds = 0 ;
694709
695- while (likely (budget -- > 0 )) {
710+ do {
711+ struct ice_tx_desc * next_dd_desc ;
712+ u16 desc_cnt = xdp_ring -> count ;
696713 struct ice_tx_buf * tx_buf ;
714+ u32 xsk_frames ;
715+ u16 i ;
697716
698- if (unlikely (!ICE_DESC_UNUSED (xdp_ring ))) {
699- xdp_ring -> tx_stats .tx_busy ++ ;
700- work_done = false;
701- break ;
702- }
703-
704- tx_buf = & xdp_ring -> tx_buf [xdp_ring -> next_to_use ];
705-
706- if (!xsk_tx_peek_desc (xdp_ring -> xsk_pool , & desc ))
717+ next_dd_desc = ICE_TX_DESC (xdp_ring , next_dd );
718+ if (!(next_dd_desc -> cmd_type_offset_bsz &
719+ cpu_to_le64 (ICE_TX_DESC_DTYPE_DESC_DONE )))
707720 break ;
708721
709- dma = xsk_buff_raw_get_dma (xdp_ring -> xsk_pool , desc .addr );
710- xsk_buff_raw_dma_sync_for_device (xdp_ring -> xsk_pool , dma ,
711- desc .len );
722+ cleared_dds ++ ;
723+ xsk_frames = 0 ;
712724
713- tx_buf -> bytecount = desc .len ;
725+ for (i = 0 ; i < tx_thresh ; i ++ ) {
726+ tx_buf = & xdp_ring -> tx_buf [ntc ];
714727
715- tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_to_use );
716- tx_desc -> buf_addr = cpu_to_le64 (dma );
717- tx_desc -> cmd_type_offset_bsz =
718- ice_build_ctob (ICE_TXD_LAST_DESC_CMD , 0 , desc .len , 0 );
728+ if (tx_buf -> raw_buf ) {
729+ ice_clean_xdp_tx_buf (xdp_ring , tx_buf );
730+ tx_buf -> raw_buf = NULL ;
731+ } else {
732+ xsk_frames ++ ;
733+ }
719734
720- xdp_ring -> next_to_use ++ ;
721- if (xdp_ring -> next_to_use == xdp_ring -> count )
722- xdp_ring -> next_to_use = 0 ;
723- }
735+ ntc ++ ;
736+ if (ntc >= xdp_ring -> count )
737+ ntc = 0 ;
738+ }
739+ if (xsk_frames )
740+ xsk_tx_completed (xdp_ring -> xsk_pool , xsk_frames );
741+ next_dd_desc -> cmd_type_offset_bsz = 0 ;
742+ next_dd = next_dd + tx_thresh ;
743+ if (next_dd >= desc_cnt )
744+ next_dd = tx_thresh - 1 ;
745+ } while (budget -- );
724746
725- if (tx_desc ) {
726- ice_xdp_ring_update_tail (xdp_ring );
727- xsk_tx_release (xdp_ring -> xsk_pool );
728- }
747+ xdp_ring -> next_to_clean = ntc ;
748+ xdp_ring -> next_dd = next_dd ;
729749
730- return budget > 0 && work_done ;
750+ return cleared_dds * tx_thresh ;
731751}
732752
733753/**
734- * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
735- * @xdp_ring: XDP Tx ring
736- * @tx_buf: Tx buffer to clean
754+ * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
755+ * @xdp_ring: XDP ring to produce the HW Tx descriptor on
756+ * @desc: AF_XDP descriptor to pull the DMA address and length from
757+ * @total_bytes: bytes accumulator that will be used for stats update
737758 */
738- static void
739- ice_clean_xdp_tx_buf ( struct ice_tx_ring * xdp_ring , struct ice_tx_buf * tx_buf )
759+ static void ice_xmit_pkt ( struct ice_tx_ring * xdp_ring , struct xdp_desc * desc ,
760+ unsigned int * total_bytes )
740761{
741- xdp_return_frame ((struct xdp_frame * )tx_buf -> raw_buf );
742- dma_unmap_single (xdp_ring -> dev , dma_unmap_addr (tx_buf , dma ),
743- dma_unmap_len (tx_buf , len ), DMA_TO_DEVICE );
744- dma_unmap_len_set (tx_buf , len , 0 );
762+ struct ice_tx_desc * tx_desc ;
763+ dma_addr_t dma ;
764+
765+ dma = xsk_buff_raw_get_dma (xdp_ring -> xsk_pool , desc -> addr );
766+ xsk_buff_raw_dma_sync_for_device (xdp_ring -> xsk_pool , dma , desc -> len );
767+
768+ tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_to_use ++ );
769+ tx_desc -> buf_addr = cpu_to_le64 (dma );
770+ tx_desc -> cmd_type_offset_bsz = ice_build_ctob (ICE_TX_DESC_CMD_EOP ,
771+ 0 , desc -> len , 0 );
772+
773+ * total_bytes += desc -> len ;
745774}
746775
747776/**
748- * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
749- * @xdp_ring: XDP Tx ring
750- * @budget: NAPI budget
751- *
752- * Returns true if cleanup/tranmission is done.
777+ * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
778+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
779+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
780+ * @total_bytes: bytes accumulator that will be used for stats update
753781 */
754- bool ice_clean_tx_irq_zc (struct ice_tx_ring * xdp_ring , int budget )
782+ static void ice_xmit_pkt_batch (struct ice_tx_ring * xdp_ring , struct xdp_desc * descs ,
783+ unsigned int * total_bytes )
755784{
756- int total_packets = 0 , total_bytes = 0 ;
757- s16 ntc = xdp_ring -> next_to_clean ;
785+ u16 tx_thresh = ICE_RING_QUARTER ( xdp_ring ) ;
786+ u16 ntu = xdp_ring -> next_to_use ;
758787 struct ice_tx_desc * tx_desc ;
759- struct ice_tx_buf * tx_buf ;
760- u32 xsk_frames = 0 ;
761- bool xmit_done ;
788+ u32 i ;
762789
763- tx_desc = ICE_TX_DESC (xdp_ring , ntc );
764- tx_buf = & xdp_ring -> tx_buf [ntc ];
765- ntc -= xdp_ring -> count ;
790+ loop_unrolled_for (i = 0 ; i < PKTS_PER_BATCH ; i ++ ) {
791+ dma_addr_t dma ;
766792
767- do {
768- if (!(tx_desc -> cmd_type_offset_bsz &
769- cpu_to_le64 (ICE_TX_DESC_DTYPE_DESC_DONE )))
770- break ;
793+ dma = xsk_buff_raw_get_dma (xdp_ring -> xsk_pool , descs [i ].addr );
794+ xsk_buff_raw_dma_sync_for_device (xdp_ring -> xsk_pool , dma , descs [i ].len );
771795
772- total_bytes += tx_buf -> bytecount ;
773- total_packets ++ ;
796+ tx_desc = ICE_TX_DESC (xdp_ring , ntu ++ );
797+ tx_desc -> buf_addr = cpu_to_le64 (dma );
798+ tx_desc -> cmd_type_offset_bsz = ice_build_ctob (ICE_TX_DESC_CMD_EOP ,
799+ 0 , descs [i ].len , 0 );
774800
775- if (tx_buf -> raw_buf ) {
776- ice_clean_xdp_tx_buf (xdp_ring , tx_buf );
777- tx_buf -> raw_buf = NULL ;
778- } else {
779- xsk_frames ++ ;
780- }
801+ * total_bytes += descs [i ].len ;
802+ }
781803
782- tx_desc -> cmd_type_offset_bsz = 0 ;
783- tx_buf ++ ;
784- tx_desc ++ ;
785- ntc ++ ;
804+ xdp_ring -> next_to_use = ntu ;
786805
787- if (unlikely (!ntc )) {
788- ntc -= xdp_ring -> count ;
789- tx_buf = xdp_ring -> tx_buf ;
790- tx_desc = ICE_TX_DESC (xdp_ring , 0 );
791- }
806+ if (xdp_ring -> next_to_use > xdp_ring -> next_rs ) {
807+ tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_rs );
808+ tx_desc -> cmd_type_offset_bsz |=
809+ cpu_to_le64 (ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S );
810+ xdp_ring -> next_rs += tx_thresh ;
811+ }
812+ }
792813
793- prefetch (tx_desc );
814+ /**
815+ * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
816+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
817+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
818+ * @nb_pkts: count of packets to be send
819+ * @total_bytes: bytes accumulator that will be used for stats update
820+ */
821+ static void ice_fill_tx_hw_ring (struct ice_tx_ring * xdp_ring , struct xdp_desc * descs ,
822+ u32 nb_pkts , unsigned int * total_bytes )
823+ {
824+ u16 tx_thresh = ICE_RING_QUARTER (xdp_ring );
825+ u32 batched , leftover , i ;
826+
827+ batched = ALIGN_DOWN (nb_pkts , PKTS_PER_BATCH );
828+ leftover = nb_pkts & (PKTS_PER_BATCH - 1 );
829+ for (i = 0 ; i < batched ; i += PKTS_PER_BATCH )
830+ ice_xmit_pkt_batch (xdp_ring , & descs [i ], total_bytes );
831+ for (; i < batched + leftover ; i ++ )
832+ ice_xmit_pkt (xdp_ring , & descs [i ], total_bytes );
833+
834+ if (xdp_ring -> next_to_use > xdp_ring -> next_rs ) {
835+ struct ice_tx_desc * tx_desc ;
836+
837+ tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_rs );
838+ tx_desc -> cmd_type_offset_bsz |=
839+ cpu_to_le64 (ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S );
840+ xdp_ring -> next_rs += tx_thresh ;
841+ }
842+ }
794843
795- } while (likely (-- budget ));
844+ /**
845+ * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
846+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
847+ * @budget: number of free descriptors on HW Tx ring that can be used
848+ * @napi_budget: amount of descriptors that NAPI allows us to clean
849+ *
850+ * Returns true if there is no more work that needs to be done, false otherwise
851+ */
852+ bool ice_xmit_zc (struct ice_tx_ring * xdp_ring , u32 budget , int napi_budget )
853+ {
854+ struct xdp_desc * descs = xdp_ring -> xsk_pool -> tx_descs ;
855+ u16 tx_thresh = ICE_RING_QUARTER (xdp_ring );
856+ u32 nb_pkts , nb_processed = 0 ;
857+ unsigned int total_bytes = 0 ;
858+
859+ if (budget < tx_thresh )
860+ budget += ice_clean_xdp_irq_zc (xdp_ring , napi_budget );
861+
862+ nb_pkts = xsk_tx_peek_release_desc_batch (xdp_ring -> xsk_pool , budget );
863+ if (!nb_pkts )
864+ return true;
865+
866+ if (xdp_ring -> next_to_use + nb_pkts >= xdp_ring -> count ) {
867+ struct ice_tx_desc * tx_desc ;
868+
869+ nb_processed = xdp_ring -> count - xdp_ring -> next_to_use ;
870+ ice_fill_tx_hw_ring (xdp_ring , descs , nb_processed , & total_bytes );
871+ tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_rs );
872+ tx_desc -> cmd_type_offset_bsz |=
873+ cpu_to_le64 (ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S );
874+ xdp_ring -> next_rs = tx_thresh - 1 ;
875+ xdp_ring -> next_to_use = 0 ;
876+ }
796877
797- ntc += xdp_ring -> count ;
798- xdp_ring -> next_to_clean = ntc ;
878+ ice_fill_tx_hw_ring ( xdp_ring , & descs [ nb_processed ], nb_pkts - nb_processed ,
879+ & total_bytes ) ;
799880
800- if ( xsk_frames )
801- xsk_tx_completed (xdp_ring -> xsk_pool , xsk_frames );
881+ ice_xdp_ring_update_tail ( xdp_ring );
882+ ice_update_tx_ring_stats (xdp_ring , nb_pkts , total_bytes );
802883
803884 if (xsk_uses_need_wakeup (xdp_ring -> xsk_pool ))
804885 xsk_set_tx_need_wakeup (xdp_ring -> xsk_pool );
805886
806- ice_update_tx_ring_stats (xdp_ring , total_packets , total_bytes );
807- xmit_done = ice_xmit_zc (xdp_ring , ICE_DFLT_IRQ_WORK );
808-
809- return budget > 0 && xmit_done ;
887+ return nb_pkts < budget ;
810888}
811889
812890/**
0 commit comments