Skip to content

Commit 5fb6237

Browse files
committed
Merge branch 'dpaa2-eth-send-a-scatter-gather-FD-instead-of-realloc-ing'
Ioana Ciornei says: ==================== dpaa2-eth: send a scatter-gather FD instead of realloc-ing This patch set changes the behaviour in case the Tx path is confroted with an SKB with insufficient headroom for our hardware necessities (SW annotation area). In the first patch, instead of realloc-ing the SKB we now send a S/G frames descriptor while the second one adds a new software held counter to account for for these types of frames. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 17af2c4 + 4c96c0a commit 5fb6237

File tree

4 files changed

+166
-32
lines changed

4 files changed

+166
-32
lines changed

drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ static int dpaa2_dbg_cpu_show(struct seq_file *file, void *offset)
2121
seq_printf(file, "Per-CPU stats for %s\n", priv->net_dev->name);
2222
seq_printf(file, "%s%16s%16s%16s%16s%16s%16s%16s%16s%16s\n",
2323
"CPU", "Rx", "Rx Err", "Rx SG", "Tx", "Tx Err", "Tx conf",
24-
"Tx SG", "Tx realloc", "Enq busy");
24+
"Tx SG", "Tx converted to SG", "Enq busy");
2525

2626
for_each_online_cpu(i) {
2727
stats = per_cpu_ptr(priv->percpu_stats, i);
@@ -35,7 +35,7 @@ static int dpaa2_dbg_cpu_show(struct seq_file *file, void *offset)
3535
stats->tx_errors,
3636
extras->tx_conf_frames,
3737
extras->tx_sg_frames,
38-
extras->tx_reallocs,
38+
extras->tx_converted_sg_frames,
3939
extras->tx_portal_busy);
4040
}
4141

drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c

Lines changed: 151 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,86 @@ static int build_sg_fd(struct dpaa2_eth_priv *priv,
685685
return err;
686686
}
687687

688+
/* Create a SG frame descriptor based on a linear skb.
689+
*
690+
* This function is used on the Tx path when the skb headroom is not large
691+
* enough for the HW requirements, thus instead of realloc-ing the skb we
692+
* create a SG frame descriptor with only one entry.
693+
*/
694+
static int build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
695+
struct sk_buff *skb,
696+
struct dpaa2_fd *fd)
697+
{
698+
struct device *dev = priv->net_dev->dev.parent;
699+
struct dpaa2_eth_sgt_cache *sgt_cache;
700+
struct dpaa2_sg_entry *sgt;
701+
struct dpaa2_eth_swa *swa;
702+
dma_addr_t addr, sgt_addr;
703+
void *sgt_buf = NULL;
704+
int sgt_buf_size;
705+
int err;
706+
707+
/* Prepare the HW SGT structure */
708+
sgt_cache = this_cpu_ptr(priv->sgt_cache);
709+
sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry);
710+
711+
if (sgt_cache->count == 0)
712+
sgt_buf = kzalloc(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN,
713+
GFP_ATOMIC);
714+
else
715+
sgt_buf = sgt_cache->buf[--sgt_cache->count];
716+
if (unlikely(!sgt_buf))
717+
return -ENOMEM;
718+
719+
sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN);
720+
sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
721+
722+
addr = dma_map_single(dev, skb->data, skb->len, DMA_BIDIRECTIONAL);
723+
if (unlikely(dma_mapping_error(dev, addr))) {
724+
err = -ENOMEM;
725+
goto data_map_failed;
726+
}
727+
728+
/* Fill in the HW SGT structure */
729+
dpaa2_sg_set_addr(sgt, addr);
730+
dpaa2_sg_set_len(sgt, skb->len);
731+
dpaa2_sg_set_final(sgt, true);
732+
733+
/* Store the skb backpointer in the SGT buffer */
734+
swa = (struct dpaa2_eth_swa *)sgt_buf;
735+
swa->type = DPAA2_ETH_SWA_SINGLE;
736+
swa->single.skb = skb;
737+
swa->sg.sgt_size = sgt_buf_size;
738+
739+
/* Separately map the SGT buffer */
740+
sgt_addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL);
741+
if (unlikely(dma_mapping_error(dev, sgt_addr))) {
742+
err = -ENOMEM;
743+
goto sgt_map_failed;
744+
}
745+
746+
dpaa2_fd_set_offset(fd, priv->tx_data_offset);
747+
dpaa2_fd_set_format(fd, dpaa2_fd_sg);
748+
dpaa2_fd_set_addr(fd, sgt_addr);
749+
dpaa2_fd_set_len(fd, skb->len);
750+
dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
751+
752+
if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
753+
enable_tx_tstamp(fd, sgt_buf);
754+
755+
return 0;
756+
757+
sgt_map_failed:
758+
dma_unmap_single(dev, addr, skb->len, DMA_BIDIRECTIONAL);
759+
data_map_failed:
760+
if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
761+
kfree(sgt_buf);
762+
else
763+
sgt_cache->buf[sgt_cache->count++] = sgt_buf;
764+
765+
return err;
766+
}
767+
688768
/* Create a frame descriptor based on a linear skb */
689769
static int build_single_fd(struct dpaa2_eth_priv *priv,
690770
struct sk_buff *skb,
@@ -743,13 +823,16 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
743823
const struct dpaa2_fd *fd, bool in_napi)
744824
{
745825
struct device *dev = priv->net_dev->dev.parent;
746-
dma_addr_t fd_addr;
826+
dma_addr_t fd_addr, sg_addr;
747827
struct sk_buff *skb = NULL;
748828
unsigned char *buffer_start;
749829
struct dpaa2_eth_swa *swa;
750830
u8 fd_format = dpaa2_fd_get_format(fd);
751831
u32 fd_len = dpaa2_fd_get_len(fd);
752832

833+
struct dpaa2_eth_sgt_cache *sgt_cache;
834+
struct dpaa2_sg_entry *sgt;
835+
753836
fd_addr = dpaa2_fd_get_addr(fd);
754837
buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr);
755838
swa = (struct dpaa2_eth_swa *)buffer_start;
@@ -769,16 +852,29 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
769852
DMA_BIDIRECTIONAL);
770853
}
771854
} else if (fd_format == dpaa2_fd_sg) {
772-
skb = swa->sg.skb;
855+
if (swa->type == DPAA2_ETH_SWA_SG) {
856+
skb = swa->sg.skb;
857+
858+
/* Unmap the scatterlist */
859+
dma_unmap_sg(dev, swa->sg.scl, swa->sg.num_sg,
860+
DMA_BIDIRECTIONAL);
861+
kfree(swa->sg.scl);
773862

774-
/* Unmap the scatterlist */
775-
dma_unmap_sg(dev, swa->sg.scl, swa->sg.num_sg,
776-
DMA_BIDIRECTIONAL);
777-
kfree(swa->sg.scl);
863+
/* Unmap the SGT buffer */
864+
dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
865+
DMA_BIDIRECTIONAL);
866+
} else {
867+
skb = swa->single.skb;
778868

779-
/* Unmap the SGT buffer */
780-
dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
781-
DMA_BIDIRECTIONAL);
869+
/* Unmap the SGT Buffer */
870+
dma_unmap_single(dev, fd_addr, swa->single.sgt_size,
871+
DMA_BIDIRECTIONAL);
872+
873+
sgt = (struct dpaa2_sg_entry *)(buffer_start +
874+
priv->tx_data_offset);
875+
sg_addr = dpaa2_sg_get_addr(sgt);
876+
dma_unmap_single(dev, sg_addr, skb->len, DMA_BIDIRECTIONAL);
877+
}
782878
} else {
783879
netdev_dbg(priv->net_dev, "Invalid FD format\n");
784880
return;
@@ -808,8 +904,17 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
808904
}
809905

810906
/* Free SGT buffer allocated on tx */
811-
if (fd_format != dpaa2_fd_single)
812-
skb_free_frag(buffer_start);
907+
if (fd_format != dpaa2_fd_single) {
908+
sgt_cache = this_cpu_ptr(priv->sgt_cache);
909+
if (swa->type == DPAA2_ETH_SWA_SG) {
910+
skb_free_frag(buffer_start);
911+
} else {
912+
if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
913+
kfree(buffer_start);
914+
else
915+
sgt_cache->buf[sgt_cache->count++] = buffer_start;
916+
}
917+
}
813918

814919
/* Move on with skb release */
815920
napi_consume_skb(skb, in_napi);
@@ -833,22 +938,6 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
833938
percpu_extras = this_cpu_ptr(priv->percpu_extras);
834939

835940
needed_headroom = dpaa2_eth_needed_headroom(priv, skb);
836-
if (skb_headroom(skb) < needed_headroom) {
837-
struct sk_buff *ns;
838-
839-
ns = skb_realloc_headroom(skb, needed_headroom);
840-
if (unlikely(!ns)) {
841-
percpu_stats->tx_dropped++;
842-
goto err_alloc_headroom;
843-
}
844-
percpu_extras->tx_reallocs++;
845-
846-
if (skb->sk)
847-
skb_set_owner_w(ns, skb->sk);
848-
849-
dev_kfree_skb(skb);
850-
skb = ns;
851-
}
852941

853942
/* We'll be holding a back-reference to the skb until Tx Confirmation;
854943
* we don't want that overwritten by a concurrent Tx with a cloned skb.
@@ -867,6 +956,12 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
867956
err = build_sg_fd(priv, skb, &fd);
868957
percpu_extras->tx_sg_frames++;
869958
percpu_extras->tx_sg_bytes += skb->len;
959+
} else if (skb_headroom(skb) < needed_headroom) {
960+
err = build_sg_fd_single_buf(priv, skb, &fd);
961+
percpu_extras->tx_sg_frames++;
962+
percpu_extras->tx_sg_bytes += skb->len;
963+
percpu_extras->tx_converted_sg_frames++;
964+
percpu_extras->tx_converted_sg_bytes += skb->len;
870965
} else {
871966
err = build_single_fd(priv, skb, &fd);
872967
}
@@ -924,7 +1019,6 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
9241019
return NETDEV_TX_OK;
9251020

9261021
err_build_fd:
927-
err_alloc_headroom:
9281022
dev_kfree_skb(skb);
9291023

9301024
return NETDEV_TX_OK;
@@ -1161,6 +1255,22 @@ static int refill_pool(struct dpaa2_eth_priv *priv,
11611255
return 0;
11621256
}
11631257

1258+
static void dpaa2_eth_sgt_cache_drain(struct dpaa2_eth_priv *priv)
1259+
{
1260+
struct dpaa2_eth_sgt_cache *sgt_cache;
1261+
u16 count;
1262+
int k, i;
1263+
1264+
for_each_online_cpu(k) {
1265+
sgt_cache = per_cpu_ptr(priv->sgt_cache, k);
1266+
count = sgt_cache->count;
1267+
1268+
for (i = 0; i < count; i++)
1269+
kfree(sgt_cache->buf[i]);
1270+
sgt_cache->count = 0;
1271+
}
1272+
}
1273+
11641274
static int pull_channel(struct dpaa2_eth_channel *ch)
11651275
{
11661276
int err;
@@ -1562,6 +1672,9 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
15621672
/* Empty the buffer pool */
15631673
drain_pool(priv);
15641674

1675+
/* Empty the Scatter-Gather Buffer cache */
1676+
dpaa2_eth_sgt_cache_drain(priv);
1677+
15651678
return 0;
15661679
}
15671680

@@ -3846,6 +3959,13 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
38463959
goto err_alloc_percpu_extras;
38473960
}
38483961

3962+
priv->sgt_cache = alloc_percpu(*priv->sgt_cache);
3963+
if (!priv->sgt_cache) {
3964+
dev_err(dev, "alloc_percpu(sgt_cache) failed\n");
3965+
err = -ENOMEM;
3966+
goto err_alloc_sgt_cache;
3967+
}
3968+
38493969
err = netdev_init(net_dev);
38503970
if (err)
38513971
goto err_netdev_init;
@@ -3914,6 +4034,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
39144034
err_alloc_rings:
39154035
err_csum:
39164036
err_netdev_init:
4037+
free_percpu(priv->sgt_cache);
4038+
err_alloc_sgt_cache:
39174039
free_percpu(priv->percpu_extras);
39184040
err_alloc_percpu_extras:
39194041
free_percpu(priv->percpu_stats);
@@ -3959,6 +4081,7 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
39594081
fsl_mc_free_irqs(ls_dev);
39604082

39614083
free_rings(priv);
4084+
free_percpu(priv->sgt_cache);
39624085
free_percpu(priv->percpu_stats);
39634086
free_percpu(priv->percpu_extras);
39644087

drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ struct dpaa2_eth_swa {
125125
union {
126126
struct {
127127
struct sk_buff *skb;
128+
int sgt_size;
128129
} single;
129130
struct {
130131
struct sk_buff *skb;
@@ -282,9 +283,11 @@ struct dpaa2_eth_drv_stats {
282283
__u64 tx_conf_bytes;
283284
__u64 tx_sg_frames;
284285
__u64 tx_sg_bytes;
285-
__u64 tx_reallocs;
286286
__u64 rx_sg_frames;
287287
__u64 rx_sg_bytes;
288+
/* Linear skbs sent as a S/G FD due to insufficient headroom */
289+
__u64 tx_converted_sg_frames;
290+
__u64 tx_converted_sg_bytes;
288291
/* Enqueues retried due to portal busy */
289292
__u64 tx_portal_busy;
290293
};
@@ -395,6 +398,12 @@ struct dpaa2_eth_cls_rule {
395398
u8 in_use;
396399
};
397400

401+
#define DPAA2_ETH_SGT_CACHE_SIZE 256
402+
struct dpaa2_eth_sgt_cache {
403+
void *buf[DPAA2_ETH_SGT_CACHE_SIZE];
404+
u16 count;
405+
};
406+
398407
/* Driver private data */
399408
struct dpaa2_eth_priv {
400409
struct net_device *net_dev;
@@ -409,6 +418,7 @@ struct dpaa2_eth_priv {
409418

410419
u8 num_channels;
411420
struct dpaa2_eth_channel *channel[DPAA2_ETH_MAX_DPCONS];
421+
struct dpaa2_eth_sgt_cache __percpu *sgt_cache;
412422

413423
struct dpni_attr dpni_attrs;
414424
u16 dpni_ver_major;

drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,10 @@ static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = {
4343
"[drv] tx conf bytes",
4444
"[drv] tx sg frames",
4545
"[drv] tx sg bytes",
46-
"[drv] tx realloc frames",
4746
"[drv] rx sg frames",
4847
"[drv] rx sg bytes",
48+
"[drv] tx converted sg frames",
49+
"[drv] tx converted sg bytes",
4950
"[drv] enqueue portal busy",
5051
/* Channel stats */
5152
"[drv] dequeue portal busy",

0 commit comments

Comments
 (0)