Skip to content

Commit 323ebb6

Browse files
ecree-solarflaredavem330
authored andcommitted
net: use listified RX for handling GRO_NORMAL skbs
When GRO decides not to coalesce a packet, in napi_frags_finish(), instead of passing it to the stack immediately, place it on a list in the napi struct. Then, at flush time (napi_complete_done(), napi_poll(), or napi_busy_loop()), call netif_receive_skb_list_internal() on the list. We'd like to do that in napi_gro_flush(), but it's not called if !napi->gro_bitmask, so we have to do it in the callers instead. (There are a handful of drivers that call napi_gro_flush() themselves, but it's not clear why, or whether this will affect them.) Because a full 64 packets is an inefficiently large batch, also consume the list whenever it exceeds gro_normal_batch, a new net/core sysctl that defaults to 8. Signed-off-by: Edward Cree <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 6727013 commit 323ebb6

File tree

3 files changed

+52
-3
lines changed

3 files changed

+52
-3
lines changed

include/linux/netdevice.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,8 @@ struct napi_struct {
332332
struct net_device *dev;
333333
struct gro_list gro_hash[GRO_HASH_BUCKETS];
334334
struct sk_buff *skb;
335+
struct list_head rx_list; /* Pending GRO_NORMAL skbs */
336+
int rx_count; /* length of rx_list */
335337
struct hrtimer timer;
336338
struct list_head dev_list;
337339
struct hlist_node napi_hash_node;
@@ -4239,6 +4241,7 @@ extern int dev_weight_rx_bias;
42394241
extern int dev_weight_tx_bias;
42404242
extern int dev_rx_weight;
42414243
extern int dev_tx_weight;
4244+
extern int gro_normal_batch;
42424245

42434246
bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
42444247
struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,

net/core/dev.c

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3963,6 +3963,8 @@ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
39633963
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
39643964
int dev_rx_weight __read_mostly = 64;
39653965
int dev_tx_weight __read_mostly = 64;
3966+
/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
3967+
int gro_normal_batch __read_mostly = 8;
39663968

39673969
/* Called with irq disabled */
39683970
static inline void ____napi_schedule(struct softnet_data *sd,
@@ -5747,6 +5749,26 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
57475749
}
57485750
EXPORT_SYMBOL(napi_get_frags);
57495751

5752+
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
5753+
static void gro_normal_list(struct napi_struct *napi)
5754+
{
5755+
if (!napi->rx_count)
5756+
return;
5757+
netif_receive_skb_list_internal(&napi->rx_list);
5758+
INIT_LIST_HEAD(&napi->rx_list);
5759+
napi->rx_count = 0;
5760+
}
5761+
5762+
/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
5763+
* pass the whole batch up to the stack.
5764+
*/
5765+
static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
5766+
{
5767+
list_add_tail(&skb->list, &napi->rx_list);
5768+
if (++napi->rx_count >= gro_normal_batch)
5769+
gro_normal_list(napi);
5770+
}
5771+
57505772
static gro_result_t napi_frags_finish(struct napi_struct *napi,
57515773
struct sk_buff *skb,
57525774
gro_result_t ret)
@@ -5756,8 +5778,8 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
57565778
case GRO_HELD:
57575779
__skb_push(skb, ETH_HLEN);
57585780
skb->protocol = eth_type_trans(skb, skb->dev);
5759-
if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
5760-
ret = GRO_DROP;
5781+
if (ret == GRO_NORMAL)
5782+
gro_normal_one(napi, skb);
57615783
break;
57625784

57635785
case GRO_DROP:
@@ -6034,6 +6056,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
60346056
NAPIF_STATE_IN_BUSY_POLL)))
60356057
return false;
60366058

6059+
gro_normal_list(n);
6060+
60376061
if (n->gro_bitmask) {
60386062
unsigned long timeout = 0;
60396063

@@ -6119,10 +6143,19 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
61196143
* Ideally, a new ndo_busy_poll_stop() could avoid another round.
61206144
*/
61216145
rc = napi->poll(napi, BUSY_POLL_BUDGET);
6146+
/* We can't gro_normal_list() here, because napi->poll() might have
6147+
* rearmed the napi (napi_complete_done()) in which case it could
6148+
* already be running on another CPU.
6149+
*/
61226150
trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
61236151
netpoll_poll_unlock(have_poll_lock);
6124-
if (rc == BUSY_POLL_BUDGET)
6152+
if (rc == BUSY_POLL_BUDGET) {
6153+
/* As the whole budget was spent, we still own the napi so can
6154+
* safely handle the rx_list.
6155+
*/
6156+
gro_normal_list(napi);
61256157
__napi_schedule(napi);
6158+
}
61266159
local_bh_enable();
61276160
}
61286161

@@ -6167,6 +6200,7 @@ void napi_busy_loop(unsigned int napi_id,
61676200
}
61686201
work = napi_poll(napi, BUSY_POLL_BUDGET);
61696202
trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
6203+
gro_normal_list(napi);
61706204
count:
61716205
if (work > 0)
61726206
__NET_ADD_STATS(dev_net(napi->dev),
@@ -6272,6 +6306,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
62726306
napi->timer.function = napi_watchdog;
62736307
init_gro_hash(napi);
62746308
napi->skb = NULL;
6309+
INIT_LIST_HEAD(&napi->rx_list);
6310+
napi->rx_count = 0;
62756311
napi->poll = poll;
62766312
if (weight > NAPI_POLL_WEIGHT)
62776313
netdev_err_once(dev, "%s() called with weight %d\n", __func__,
@@ -6368,6 +6404,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
63686404
goto out_unlock;
63696405
}
63706406

6407+
gro_normal_list(n);
6408+
63716409
if (n->gro_bitmask) {
63726410
/* flush too old packets
63736411
* If HZ < 1000, flush all packets.

net/core/sysctl_net_core.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,14 @@ static struct ctl_table net_core_table[] = {
567567
.mode = 0644,
568568
.proc_handler = proc_do_static_key,
569569
},
570+
{
571+
.procname = "gro_normal_batch",
572+
.data = &gro_normal_batch,
573+
.maxlen = sizeof(unsigned int),
574+
.mode = 0644,
575+
.proc_handler = proc_dointvec_minmax,
576+
.extra1 = SYSCTL_ONE,
577+
},
570578
{ }
571579
};
572580

0 commit comments

Comments
 (0)