Skip to content

Commit 225d9b8

Browse files
Eric Dumazetdavem330
authored andcommitted
sch_sfq: rehash queues in perturb timer
A known Out Of Order (OOO) problem hurts SFQ when timer changes perturbation value, since all new packets delivered to SFQ enqueue might end on different slots than previous in-flight packets. With round robin delivery, we can thus deliver packets in a different order. Since SFQ is limited to small amount of in-flight packets, we can rehash packets so that this OOO problem is fixed. This rehashing is performed only if internal flow classifier is in use. We now store in skb->cb[] the "struct flow_keys" so that we dont call skb_flow_dissect() again while rehashing. Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 4e68ea2 commit 225d9b8

File tree

1 file changed

+81
-6
lines changed

1 file changed

+81
-6
lines changed

net/sched/sch_sfq.c

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,16 +136,30 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
136136
return &q->dep[val - SFQ_SLOTS];
137137
}
138138

139+
/*
140+
* In order to be able to quickly rehash our queue when timer changes
141+
* q->perturbation, we store flow_keys in skb->cb[]
142+
*/
143+
struct sfq_skb_cb {
144+
struct flow_keys keys;
145+
};
146+
147+
static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
148+
{
149+
BUILD_BUG_ON(sizeof(skb->cb) <
150+
sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb));
151+
return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
152+
}
153+
139154
static unsigned int sfq_hash(const struct sfq_sched_data *q,
140155
const struct sk_buff *skb)
141156
{
142-
struct flow_keys keys;
157+
const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
143158
unsigned int hash;
144159

145-
skb_flow_dissect(skb, &keys);
146-
hash = jhash_3words((__force u32)keys.dst,
147-
(__force u32)keys.src ^ keys.ip_proto,
148-
(__force u32)keys.ports, q->perturbation);
160+
hash = jhash_3words((__force u32)keys->dst,
161+
(__force u32)keys->src ^ keys->ip_proto,
162+
(__force u32)keys->ports, q->perturbation);
149163
return hash & (q->divisor - 1);
150164
}
151165

@@ -161,8 +175,10 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
161175
TC_H_MIN(skb->priority) <= q->divisor)
162176
return TC_H_MIN(skb->priority);
163177

164-
if (!q->filter_list)
178+
if (!q->filter_list) {
179+
skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
165180
return sfq_hash(q, skb) + 1;
181+
}
166182

167183
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
168184
result = tc_classify(skb, q->filter_list, &res);
@@ -423,12 +439,71 @@ sfq_reset(struct Qdisc *sch)
423439
kfree_skb(skb);
424440
}
425441

442+
/*
443+
* When q->perturbation is changed, we rehash all queued skbs
444+
* to avoid OOO (Out Of Order) effects.
445+
* We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
446+
* counters.
447+
*/
448+
static void sfq_rehash(struct sfq_sched_data *q)
449+
{
450+
struct sk_buff *skb;
451+
int i;
452+
struct sfq_slot *slot;
453+
struct sk_buff_head list;
454+
455+
__skb_queue_head_init(&list);
456+
457+
for (i = 0; i < SFQ_SLOTS; i++) {
458+
slot = &q->slots[i];
459+
if (!slot->qlen)
460+
continue;
461+
while (slot->qlen) {
462+
skb = slot_dequeue_head(slot);
463+
sfq_dec(q, i);
464+
__skb_queue_tail(&list, skb);
465+
}
466+
q->ht[slot->hash] = SFQ_EMPTY_SLOT;
467+
}
468+
q->tail = NULL;
469+
470+
while ((skb = __skb_dequeue(&list)) != NULL) {
471+
unsigned int hash = sfq_hash(q, skb);
472+
sfq_index x = q->ht[hash];
473+
474+
slot = &q->slots[x];
475+
if (x == SFQ_EMPTY_SLOT) {
476+
x = q->dep[0].next; /* get a free slot */
477+
q->ht[hash] = x;
478+
slot = &q->slots[x];
479+
slot->hash = hash;
480+
}
481+
slot_queue_add(slot, skb);
482+
sfq_inc(q, x);
483+
if (slot->qlen == 1) { /* The flow is new */
484+
if (q->tail == NULL) { /* It is the first flow */
485+
slot->next = x;
486+
} else {
487+
slot->next = q->tail->next;
488+
q->tail->next = x;
489+
}
490+
q->tail = slot;
491+
slot->allot = q->scaled_quantum;
492+
}
493+
}
494+
}
495+
426496
static void sfq_perturbation(unsigned long arg)
427497
{
428498
struct Qdisc *sch = (struct Qdisc *)arg;
429499
struct sfq_sched_data *q = qdisc_priv(sch);
500+
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
430501

502+
spin_lock(root_lock);
431503
q->perturbation = net_random();
504+
if (!q->filter_list && q->tail)
505+
sfq_rehash(q);
506+
spin_unlock(root_lock);
432507

433508
if (q->perturb_period)
434509
mod_timer(&q->perturb_timer, jiffies + q->perturb_period);

0 commit comments

Comments
 (0)