Skip to content

Commit c7cba83

Browse files
committed
Merge branch 'net_sched-allow-use-of-hrtimer-slack'
Eric Dumazet says: ==================== net_sched: allow use of hrtimer slack Packet schedulers have used hrtimers with exact expiry times. Some of them can afford having a slack, in order to reduce the number of timer interrupts and feed bigger batches to increase efficiency. FQ for example does not care if throttled packets are sent with an additional (small) delay. Original observation of having maybe too many interrupts was made by Willem de Bruijn. v2: added strict netlink checking (Jakub Kicinski) ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 24ee865 + 583396f commit c7cba83

File tree

4 files changed

+42
-12
lines changed

4 files changed

+42
-12
lines changed

include/net/pkt_sched.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,15 @@ struct qdisc_watchdog {
7575
void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
7676
clockid_t clockid);
7777
void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
78-
void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires);
78+
79+
void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
80+
u64 delta_ns);
81+
82+
static inline void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd,
83+
u64 expires)
84+
{
85+
return qdisc_watchdog_schedule_range_ns(wd, expires, 0ULL);
86+
}
7987

8088
static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd,
8189
psched_time_t expires)

include/uapi/linux/pkt_sched.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,8 @@ enum {
911911

912912
TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */
913913

914+
TCA_FQ_TIMER_SLACK, /* timer slack */
915+
914916
__TCA_FQ_MAX
915917
};
916918

net/sched/sch_api.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -618,21 +618,28 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
618618
}
619619
EXPORT_SYMBOL(qdisc_watchdog_init);
620620

621-
void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
621+
void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
622+
u64 delta_ns)
622623
{
623624
if (test_bit(__QDISC_STATE_DEACTIVATED,
624625
&qdisc_root_sleeping(wd->qdisc)->state))
625626
return;
626627

627-
if (wd->last_expires == expires)
628-
return;
628+
if (hrtimer_is_queued(&wd->timer)) {
629+
/* If timer is already set in [expires, expires + delta_ns],
630+
* do not reprogram it.
631+
*/
632+
if (wd->last_expires - expires <= delta_ns)
633+
return;
634+
}
629635

630636
wd->last_expires = expires;
631-
hrtimer_start(&wd->timer,
632-
ns_to_ktime(expires),
633-
HRTIMER_MODE_ABS_PINNED);
637+
hrtimer_start_range_ns(&wd->timer,
638+
ns_to_ktime(expires),
639+
delta_ns,
640+
HRTIMER_MODE_ABS_PINNED);
634641
}
635-
EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
642+
EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
636643

637644
void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
638645
{

net/sched/sch_fq.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ struct fq_sched_data {
121121
u64 stat_flows_plimit;
122122
u64 stat_pkts_too_long;
123123
u64 stat_allocation_errors;
124+
125+
u32 timer_slack; /* hrtimer slack in ns */
124126
struct qdisc_watchdog watchdog;
125127
};
126128

@@ -504,8 +506,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
504506
head = &q->old_flows;
505507
if (!head->first) {
506508
if (q->time_next_delayed_flow != ~0ULL)
507-
qdisc_watchdog_schedule_ns(&q->watchdog,
508-
q->time_next_delayed_flow);
509+
qdisc_watchdog_schedule_range_ns(&q->watchdog,
510+
q->time_next_delayed_flow,
511+
q->timer_slack);
509512
return NULL;
510513
}
511514
}
@@ -735,6 +738,8 @@ static int fq_resize(struct Qdisc *sch, u32 log)
735738
}
736739

737740
static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
741+
[TCA_FQ_UNSPEC] = { .strict_start_type = TCA_FQ_TIMER_SLACK },
742+
738743
[TCA_FQ_PLIMIT] = { .type = NLA_U32 },
739744
[TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 },
740745
[TCA_FQ_QUANTUM] = { .type = NLA_U32 },
@@ -747,6 +752,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
747752
[TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 },
748753
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
749754
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
755+
[TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 },
750756
};
751757

752758
static int fq_change(struct Qdisc *sch, struct nlattr *opt,
@@ -833,6 +839,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
833839
q->ce_threshold = (u64)NSEC_PER_USEC *
834840
nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
835841

842+
if (tb[TCA_FQ_TIMER_SLACK])
843+
q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
844+
836845
if (!err) {
837846
sch_tree_unlock(sch);
838847
err = fq_resize(sch, fq_log);
@@ -884,6 +893,8 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
884893
q->orphan_mask = 1024 - 1;
885894
q->low_rate_threshold = 550000 / 8;
886895

896+
q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */
897+
887898
/* Default ce_threshold of 4294 seconds */
888899
q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;
889900

@@ -924,7 +935,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
924935
nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
925936
q->low_rate_threshold) ||
926937
nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
927-
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
938+
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
939+
nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack))
928940
goto nla_put_failure;
929941

930942
return nla_nest_end(skb, opts);
@@ -947,7 +959,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
947959
st.flows_plimit = q->stat_flows_plimit;
948960
st.pkts_too_long = q->stat_pkts_too_long;
949961
st.allocation_errors = q->stat_allocation_errors;
950-
st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns();
962+
st.time_next_delayed_flow = q->time_next_delayed_flow + q->timer_slack -
963+
ktime_get_ns();
951964
st.flows = q->flows;
952965
st.inactive_flows = q->inactive_flows;
953966
st.throttled_flows = q->throttled_flows;

0 commit comments

Comments
 (0)