|
2 | 2 | /* |
3 | 3 | * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing) |
4 | 4 | * |
5 | | - * Copyright (C) 2013-2015 Eric Dumazet <[email protected]> |
| 5 | + * Copyright (C) 2013-2023 Eric Dumazet <[email protected]> |
6 | 6 | * |
7 | 7 | * Meant to be mostly used for locally generated traffic : |
8 | 8 | * Fast classification depends on skb->sk being set before reaching us. |
@@ -73,7 +73,13 @@ struct fq_flow { |
73 | 73 | struct sk_buff *tail; /* last skb in the list */ |
74 | 74 | unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */ |
75 | 75 | }; |
76 | | - struct rb_node fq_node; /* anchor in fq_root[] trees */ |
| 76 | + union { |
| 77 | + struct rb_node fq_node; /* anchor in fq_root[] trees */ |
| 78 | + /* Following field is only used for q->internal, |
| 79 | + * because q->internal is not hashed in fq_root[] |
| 80 | + */ |
| 81 | + u64 stat_fastpath_packets; |
| 82 | + }; |
77 | 83 | struct sock *sk; |
78 | 84 | u32 socket_hash; /* sk_hash */ |
79 | 85 | int qlen; /* number of packets in flow queue */ |
@@ -134,7 +140,7 @@ struct fq_sched_data { |
134 | 140 |
|
135 | 141 | /* Seldom used fields. */ |
136 | 142 |
|
137 | | - u64 stat_internal_packets; |
| 143 | + u64 stat_internal_packets; /* aka highprio */ |
138 | 144 | u64 stat_ce_mark; |
139 | 145 | u64 stat_horizon_drops; |
140 | 146 | u64 stat_horizon_caps; |
@@ -266,17 +272,64 @@ static void fq_gc(struct fq_sched_data *q, |
266 | 272 | kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree); |
267 | 273 | } |
268 | 274 |
|
269 | | -static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) |
| 275 | +/* Fast path can be used if : |
| 276 | + * 1) Packet tstamp is in the past. |
| 277 | + * 2) FQ qlen == 0 OR |
| 278 | + * (no flow is currently eligible for transmit, |
| 279 | + * AND fast path queue has less than 8 packets) |
| 280 | + * 3) No SO_MAX_PACING_RATE on the socket (if any). |
| 281 | + * 4) No @maxrate attribute on this qdisc, |
| 282 | + * |
| 283 | + * FQ can not use generic TCQ_F_CAN_BYPASS infrastructure. |
| 284 | + */ |
| 285 | +static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb) |
| 286 | +{ |
| 287 | + const struct fq_sched_data *q = qdisc_priv(sch); |
| 288 | + const struct sock *sk; |
| 289 | + |
| 290 | + if (fq_skb_cb(skb)->time_to_send > q->ktime_cache) |
| 291 | + return false; |
| 292 | + |
| 293 | + if (sch->q.qlen != 0) { |
| 294 | + /* Even if some packets are stored in this qdisc, |
| 295 | + * we can still enable fast path if all of them are |
| 296 | + * scheduled in the future (ie no flows are eligible) |
| 297 | + * or in the fast path queue. |
| 298 | + */ |
| 299 | + if (q->flows != q->inactive_flows + q->throttled_flows) |
| 300 | + return false; |
| 301 | + |
| 302 | + /* Do not allow fast path queue to explode, we want Fair Queue mode |
| 303 | + * under pressure. |
| 304 | + */ |
| 305 | + if (q->internal.qlen >= 8) |
| 306 | + return false; |
| 307 | + } |
| 308 | + |
| 309 | + sk = skb->sk; |
| 310 | + if (sk && sk_fullsock(sk) && !sk_is_tcp(sk) && |
| 311 | + sk->sk_max_pacing_rate != ~0UL) |
| 312 | + return false; |
| 313 | + |
| 314 | + if (q->flow_max_rate != ~0UL) |
| 315 | + return false; |
| 316 | + |
| 317 | + return true; |
| 318 | +} |
| 319 | + |
| 320 | +static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb) |
270 | 321 | { |
| 322 | + struct fq_sched_data *q = qdisc_priv(sch); |
271 | 323 | struct rb_node **p, *parent; |
272 | 324 | struct sock *sk = skb->sk; |
273 | 325 | struct rb_root *root; |
274 | 326 | struct fq_flow *f; |
275 | 327 |
|
276 | 328 | /* warning: no starvation prevention... */ |
277 | | - if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) |
| 329 | + if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) { |
| 330 | + q->stat_internal_packets++; /* highprio packet */ |
278 | 331 | return &q->internal; |
279 | | - |
| 332 | + } |
280 | 333 | /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket |
281 | 334 | * or a listener (SYNCOOKIE mode) |
282 | 335 | * 1) request sockets are not full blown, |
@@ -307,6 +360,11 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) |
307 | 360 | sk = (struct sock *)((hash << 1) | 1UL); |
308 | 361 | } |
309 | 362 |
|
| 363 | + if (fq_fastpath_check(sch, skb)) { |
| 364 | + q->internal.stat_fastpath_packets++; |
| 365 | + return &q->internal; |
| 366 | + } |
| 367 | + |
310 | 368 | root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)]; |
311 | 369 |
|
312 | 370 | if (q->flows >= (2U << q->fq_trees_log) && |
@@ -402,12 +460,8 @@ static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow, |
402 | 460 | static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow, |
403 | 461 | struct sk_buff *skb) |
404 | 462 | { |
405 | | - struct fq_sched_data *q = qdisc_priv(sch); |
406 | | - |
407 | 463 | fq_erase_head(sch, flow, skb); |
408 | 464 | skb_mark_not_on_list(skb); |
409 | | - if (--flow->qlen == 0) |
410 | | - q->inactive_flows++; |
411 | 465 | qdisc_qstats_backlog_dec(sch, skb); |
412 | 466 | sch->q.qlen--; |
413 | 467 | } |
@@ -459,49 +513,45 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, |
459 | 513 | if (unlikely(sch->q.qlen >= sch->limit)) |
460 | 514 | return qdisc_drop(skb, sch, to_free); |
461 | 515 |
|
| 516 | + q->ktime_cache = ktime_get_ns(); |
462 | 517 | if (!skb->tstamp) { |
463 | | - fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns(); |
| 518 | + fq_skb_cb(skb)->time_to_send = q->ktime_cache; |
464 | 519 | } else { |
465 | | - /* Check if packet timestamp is too far in the future. |
466 | | - * Try first if our cached value, to avoid ktime_get_ns() |
467 | | - * cost in most cases. |
468 | | - */ |
| 520 | + /* Check if packet timestamp is too far in the future. */ |
469 | 521 | if (fq_packet_beyond_horizon(skb, q)) { |
470 | | - /* Refresh our cache and check another time */ |
471 | | - q->ktime_cache = ktime_get_ns(); |
472 | | - if (fq_packet_beyond_horizon(skb, q)) { |
473 | | - if (q->horizon_drop) { |
| 522 | + if (q->horizon_drop) { |
474 | 523 | q->stat_horizon_drops++; |
475 | 524 | return qdisc_drop(skb, sch, to_free); |
476 | | - } |
477 | | - q->stat_horizon_caps++; |
478 | | - skb->tstamp = q->ktime_cache + q->horizon; |
479 | 525 | } |
| 526 | + q->stat_horizon_caps++; |
| 527 | + skb->tstamp = q->ktime_cache + q->horizon; |
480 | 528 | } |
481 | 529 | fq_skb_cb(skb)->time_to_send = skb->tstamp; |
482 | 530 | } |
483 | 531 |
|
484 | | - f = fq_classify(skb, q); |
485 | | - if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { |
486 | | - q->stat_flows_plimit++; |
487 | | - return qdisc_drop(skb, sch, to_free); |
488 | | - } |
| 532 | + f = fq_classify(sch, skb); |
489 | 533 |
|
490 | | - if (f->qlen++ == 0) |
491 | | - q->inactive_flows--; |
492 | | - qdisc_qstats_backlog_inc(sch, skb); |
493 | | - if (fq_flow_is_detached(f)) { |
494 | | - fq_flow_add_tail(&q->new_flows, f); |
495 | | - if (time_after(jiffies, f->age + q->flow_refill_delay)) |
496 | | - f->credit = max_t(u32, f->credit, q->quantum); |
| 534 | + if (f != &q->internal) { |
| 535 | + if (unlikely(f->qlen >= q->flow_plimit)) { |
| 536 | + q->stat_flows_plimit++; |
| 537 | + return qdisc_drop(skb, sch, to_free); |
| 538 | + } |
| 539 | + |
| 540 | + if (fq_flow_is_detached(f)) { |
| 541 | + fq_flow_add_tail(&q->new_flows, f); |
| 542 | + if (time_after(jiffies, f->age + q->flow_refill_delay)) |
| 543 | + f->credit = max_t(u32, f->credit, q->quantum); |
| 544 | + } |
| 545 | + |
| 546 | + if (f->qlen == 0) |
| 547 | + q->inactive_flows--; |
497 | 548 | } |
498 | 549 |
|
| 550 | + f->qlen++; |
499 | 551 | /* Note: this overwrites f->age */ |
500 | 552 | flow_queue_add(f, skb); |
501 | 553 |
|
502 | | - if (unlikely(f == &q->internal)) { |
503 | | - q->stat_internal_packets++; |
504 | | - } |
| 554 | + qdisc_qstats_backlog_inc(sch, skb); |
505 | 555 | sch->q.qlen++; |
506 | 556 |
|
507 | 557 | return NET_XMIT_SUCCESS; |
@@ -549,6 +599,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) |
549 | 599 |
|
550 | 600 | skb = fq_peek(&q->internal); |
551 | 601 | if (unlikely(skb)) { |
| 602 | + q->internal.qlen--; |
552 | 603 | fq_dequeue_skb(sch, &q->internal, skb); |
553 | 604 | goto out; |
554 | 605 | } |
@@ -592,6 +643,8 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) |
592 | 643 | INET_ECN_set_ce(skb); |
593 | 644 | q->stat_ce_mark++; |
594 | 645 | } |
| 646 | + if (--f->qlen == 0) |
| 647 | + q->inactive_flows++; |
595 | 648 | fq_dequeue_skb(sch, f, skb); |
596 | 649 | } else { |
597 | 650 | head->first = f->next; |
@@ -1024,6 +1077,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) |
1024 | 1077 |
|
1025 | 1078 | st.gc_flows = q->stat_gc_flows; |
1026 | 1079 | st.highprio_packets = q->stat_internal_packets; |
| 1080 | + st.fastpath_packets = q->internal.stat_fastpath_packets; |
1027 | 1081 | st.tcp_retrans = 0; |
1028 | 1082 | st.throttled = q->stat_throttled; |
1029 | 1083 | st.flows_plimit = q->stat_flows_plimit; |
|
0 commit comments