From ff2998f29f390d963299103f0b247cc79106ced5 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 26 Feb 2026 14:44:12 +0100 Subject: net: sched: introduce qdisc-specific drop reason tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create new enum qdisc_drop_reason and trace_qdisc_drop tracepoint for qdisc layer drop diagnostics with direct qdisc context visibility. The new tracepoint includes qdisc handle, parent, kind (name), and device information. Existing SKB_DROP_REASON_QDISC_DROP is retained for backwards compatibility via kfree_skb_reason(). Convert qdiscs with drop reasons to use the new infrastructure. Change CAKE's cobalt_should_drop() return type from enum skb_drop_reason to enum qdisc_drop_reason to fix implicit enum conversion warnings. Use QDISC_DROP_UNSPEC as the 'not dropped' sentinel instead of SKB_NOT_DROPPED_YET. Both have the same compiled value (0), so the comparison logic remains semantically equivalent. Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/177211345275.3011628.1974310302645218067.stgit@firesoul Signed-off-by: Jakub Kicinski --- include/trace/events/qdisc.h | 51 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index ff33f41a9db7..d8a5c2677470 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -74,6 +74,57 @@ TRACE_EVENT(qdisc_enqueue, __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) ); +#undef FN +#undef FNe +#define FN(reason) TRACE_DEFINE_ENUM(QDISC_DROP_##reason); +#define FNe(reason) TRACE_DEFINE_ENUM(QDISC_DROP_##reason); +DEFINE_QDISC_DROP_REASON(FN, FNe) + +#undef FN +#undef FNe +#define FN(reason) { QDISC_DROP_##reason, #reason }, +#define FNe(reason) { QDISC_DROP_##reason, #reason } + +TRACE_EVENT(qdisc_drop, + + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, + struct net_device *dev, struct sk_buff *skb, + enum qdisc_drop_reason reason), + + TP_ARGS(qdisc, txq, dev, skb, reason), + + TP_STRUCT__entry( + __field(struct Qdisc *, qdisc) + __field(const struct netdev_queue *, txq) + __field(void *, skbaddr) + __field(int, ifindex) + __field(u32, handle) + __field(u32, parent) + __field(enum qdisc_drop_reason, reason) + __string(kind, qdisc->ops->id) + ), + + TP_fast_assign( + __entry->qdisc = qdisc; + __entry->txq = txq; + __entry->skbaddr = skb; + __entry->ifindex = dev ? dev->ifindex : 0; + __entry->handle = qdisc->handle; + __entry->parent = qdisc->parent; + __entry->reason = reason; + __assign_str(kind); + ), + + TP_printk("drop ifindex=%d kind=%s handle=0x%X parent=0x%X skbaddr=%p reason=%s", + __entry->ifindex, __get_str(kind), __entry->handle, + __entry->parent, __entry->skbaddr, + __print_symbolic(__entry->reason, + DEFINE_QDISC_DROP_REASON(FN, FNe))) +); + +#undef FN +#undef FNe + TRACE_EVENT(qdisc_reset, TP_PROTO(struct Qdisc *q), -- cgit v1.2.3 From 0f5531879afbf904f19a15b39f687a9ec47a82cc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:03:56 +0100 Subject: devlink: add helpers to get bus_name/dev_name Introduce devlink_bus_name() and devlink_dev_name() helpers and convert all direct accesses to devlink->dev->bus->name and dev_name(devlink->dev) to use them. This prepares for dev-less devlink instances where these helpers will be extended to handle the missing device. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-3-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- include/trace/events/devlink.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index f241e204fe6b..32304ce9ad15 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -21,8 +21,8 @@ TRACE_EVENT(devlink_hwmsg, TP_ARGS(devlink, incoming, type, buf, len), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(bus_name, devlink_bus_name(devlink)) + __string(dev_name, devlink_dev_name(devlink)) __string(driver_name, devlink_to_dev(devlink)->driver->name) __field(bool, incoming) __field(unsigned long, type) @@ -55,8 +55,8 @@ TRACE_EVENT(devlink_hwerr, TP_ARGS(devlink, err, msg), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(bus_name, devlink_bus_name(devlink)) + __string(dev_name, devlink_dev_name(devlink)) __string(driver_name, devlink_to_dev(devlink)->driver->name) __field(int, err) __string(msg, msg) @@ -85,8 +85,8 @@ TRACE_EVENT(devlink_health_report, TP_ARGS(devlink, reporter_name, msg), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(bus_name, devlink_bus_name(devlink)) + __string(dev_name, devlink_dev_name(devlink)) __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(reporter_name, reporter_name) __string(msg, msg) @@ -116,8 +116,8 @@ TRACE_EVENT(devlink_health_recover_aborted, TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(bus_name, devlink_bus_name(devlink)) + __string(dev_name, devlink_dev_name(devlink)) __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(reporter_name, reporter_name) __field(bool, health_state) @@ -150,8 +150,8 @@ TRACE_EVENT(devlink_health_reporter_state_update, TP_ARGS(devlink, reporter_name, new_state), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(bus_name, devlink_bus_name(devlink)) + __string(dev_name, devlink_dev_name(devlink)) __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(reporter_name, reporter_name) __field(u8, new_state) @@ -181,8 +181,8 @@ TRACE_EVENT(devlink_trap_report, TP_ARGS(devlink, skb, metadata), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(bus_name, devlink_bus_name(devlink)) + __string(dev_name, devlink_dev_name(devlink)) __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(trap_name, metadata->trap_name) __string(trap_group_name, metadata->trap_group_name) -- cgit v1.2.3 From 20b0f383aae7d26990a769d52b4d5c0e570e659c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:04:02 +0100 Subject: devlink: add devlink_dev_driver_name() helper and use it in trace events In preparation to dev-less devlinks, add devlink_dev_driver_name() that returns the driver name stored in devlink struct, and use it in all trace events. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-9-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- include/trace/events/devlink.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index 32304ce9ad15..4f8edf77dfbe 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -23,7 +23,7 @@ TRACE_EVENT(devlink_hwmsg, TP_STRUCT__entry( __string(bus_name, devlink_bus_name(devlink)) __string(dev_name, devlink_dev_name(devlink)) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(driver_name, devlink_dev_driver_name(devlink)) __field(bool, incoming) __field(unsigned long, type) __dynamic_array(u8, buf, len) @@ -57,7 +57,7 @@ TRACE_EVENT(devlink_hwerr, TP_STRUCT__entry( __string(bus_name, devlink_bus_name(devlink)) __string(dev_name, devlink_dev_name(devlink)) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(driver_name, devlink_dev_driver_name(devlink)) __field(int, err) __string(msg, msg) ), @@ -87,7 +87,7 @@ TRACE_EVENT(devlink_health_report, TP_STRUCT__entry( __string(bus_name, devlink_bus_name(devlink)) __string(dev_name, devlink_dev_name(devlink)) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(driver_name, devlink_dev_driver_name(devlink)) __string(reporter_name, reporter_name) __string(msg, msg) ), @@ -118,7 +118,7 @@ TRACE_EVENT(devlink_health_recover_aborted, TP_STRUCT__entry( __string(bus_name, devlink_bus_name(devlink)) __string(dev_name, devlink_dev_name(devlink)) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(driver_name, devlink_dev_driver_name(devlink)) __string(reporter_name, reporter_name) __field(bool, health_state) __field(u64, time_since_last_recover) @@ -152,7 +152,7 @@ TRACE_EVENT(devlink_health_reporter_state_update, TP_STRUCT__entry( __string(bus_name, devlink_bus_name(devlink)) __string(dev_name, devlink_dev_name(devlink)) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(driver_name, devlink_dev_driver_name(devlink)) __string(reporter_name, reporter_name) __field(u8, new_state) ), @@ -183,7 +183,7 @@ TRACE_EVENT(devlink_trap_report, TP_STRUCT__entry( __string(bus_name, devlink_bus_name(devlink)) __string(dev_name, devlink_dev_name(devlink)) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(driver_name, devlink_dev_driver_name(devlink)) __string(trap_name, metadata->trap_name) __string(trap_group_name, metadata->trap_group_name) __array(char, input_dev_name, IFNAMSIZ) -- cgit v1.2.3 From d2000361e4ddf5047d660a902a3b0ed7520be1e5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 7 Apr 2026 10:45:17 +0200 Subject: mptcp: better mptcp-level RTT estimator The current MPTCP-level RTT estimator has several issues. On high speed links, the MPTCP-level receive buffer auto-tuning happens with a frequency well above the TCP-level's one. That in turn can cause excessive/unneeded receive buffer increase. On such links, the initial rtt_us value is considerably higher than the actual delay, and the current mptcp_rcv_space_adjust() updates msk->rcvq_space.rtt_us with a period equal to the such field previous value. If the initial rtt_us is 40ms, its first update will happen after 40ms, even if the subflows see actual RTT orders of magnitude lower. Additionally: - setting the msk RTT to the maximum among all the subflows RTTs makes DRS constantly overshooting the rcvbuf size when a subflow has considerable higher latency than the other(s). - during unidirectional bulk transfers with multiple active subflows, the TCP-level RTT estimator occasionally sees considerably higher value than the real link delay, i.e. when the packet scheduler reacts to an incoming ACK on given subflow pushing data on a different subflow. - currently inactive but still open subflows (i.e. switched to backup mode) are always considered when computing the msk-level RTT. Address the all the issues above with a more accurate RTT estimation strategy: the MPTCP-level RTT is set to the minimum of all the subflows actually feeding data into the MPTCP receive buffer, using a small sliding window. While at it, also use EWMA to compute the msk-level scaling_ratio, to that MPTCP can avoid traversing the subflow list is mptcp_rcv_space_adjust(). Use some care to avoid updating msk and ssk level fields too often. Fixes: a6b118febbab ("mptcp: add receive buffer auto-tuning") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260407-net-next-mptcp-reduce-rbuf-v2-1-0d1d135bf6f6@kernel.org Signed-off-by: Jakub Kicinski --- include/trace/events/mptcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index 269d949b2025..04521acba483 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -219,7 +219,7 @@ TRACE_EVENT(mptcp_rcvbuf_grow, __be32 *p32; __entry->time = time; - __entry->rtt_us = msk->rcvq_space.rtt_us >> 3; + __entry->rtt_us = mptcp_rtt_us_est(msk) >> 3; __entry->copied = msk->rcvq_space.copied; __entry->inq = mptcp_inq_hint(sk); __entry->space = msk->rcvq_space.space; -- cgit v1.2.3