summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2026-03-23 23:49:20 +0000
committerJakub Kicinski <kuba@kernel.org>2026-03-24 21:00:38 -0700
commitd1e59a46973719e458bec78d00dd767d7a7ba71f (patch)
tree43fa8dcafd5334b6373a93d77d56ee0f2a1e464f /net
parent112f4c6320070b19e7d49cba758400adc279e377 (diff)
tcp: add cwnd_event_tx_start to tcp_congestion_ops
(tcp_congestion_ops)->cwnd_event() is called very often, with @event oscillating between CA_EVENT_TX_START and other values. This is not branch prediction friendly. Provide a new cwnd_event_tx_start pointer dedicated for CA_EVENT_TX_START. Both BBR and CUBIC benefit from this change, since they only care about CA_EVENT_TX_START. No change in kernel size: $ scripts/bloat-o-meter -t vmlinux.0 vmlinux add/remove: 4/4 grow/shrink: 3/1 up/down: 564/-568 (-4) Function old new delta bbr_cwnd_event_tx_start - 450 +450 cubictcp_cwnd_event_tx_start - 70 +70 __pfx_cubictcp_cwnd_event_tx_start - 16 +16 __pfx_bbr_cwnd_event_tx_start - 16 +16 tcp_unregister_congestion_control 93 99 +6 tcp_update_congestion_control 518 521 +3 tcp_register_congestion_control 422 425 +3 __tcp_transmit_skb 3308 3306 -2 __pfx_cubictcp_cwnd_event 16 - -16 __pfx_bbr_cwnd_event 16 - -16 cubictcp_cwnd_event 80 - -80 bbr_cwnd_event 454 - -454 Total: Before=25240512, After=25240508, chg -0.00% Signed-off-by: Eric Dumazet <edumazet@google.com> Link: https://patch.msgid.link/20260323234920.1097858-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/bpf_tcp_ca.c5
-rw-r--r--net/ipv4/tcp_bbr.c8
-rw-r--r--net/ipv4/tcp_cubic.c35
-rw-r--r--net/ipv4/tcp_dctcp.c12
-rw-r--r--net/ipv4/tcp_vegas.c9
-rw-r--r--net/ipv4/tcp_vegas.h1
-rw-r--r--net/ipv4/tcp_veno.c8
-rw-r--r--net/ipv4/tcp_yeah.c1
8 files changed, 50 insertions, 29 deletions
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index e01492234b0b..008edc7f6688 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -272,6 +272,10 @@ static void bpf_tcp_ca_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
{
}
+static void bpf_tcp_ca_cwnd_event_tx_start(struct sock *sk)
+{
+}
+
static void bpf_tcp_ca_in_ack_event(struct sock *sk, u32 flags)
{
}
@@ -313,6 +317,7 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
.cong_avoid = bpf_tcp_ca_cong_avoid,
.set_state = bpf_tcp_ca_set_state,
.cwnd_event = bpf_tcp_ca_cwnd_event,
+ .cwnd_event_tx_start = bpf_tcp_ca_cwnd_event_tx_start,
.in_ack_event = bpf_tcp_ca_in_ack_event,
.pkts_acked = bpf_tcp_ca_pkts_acked,
.min_tso_segs = bpf_tcp_ca_min_tso_segs,
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 05d52372ca8f..1ddc20a399b0 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -330,12 +330,12 @@ static void bbr_save_cwnd(struct sock *sk)
bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
}
-__bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+__bpf_kfunc static void bbr_cwnd_event_tx_start(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- if (event == CA_EVENT_TX_START && tp->app_limited) {
+ if (tp->app_limited) {
bbr->idle_restart = 1;
bbr->ack_epoch_mstamp = tp->tcp_mstamp;
bbr->ack_epoch_acked = 0;
@@ -1149,7 +1149,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.cong_control = bbr_main,
.sndbuf_expand = bbr_sndbuf_expand,
.undo_cwnd = bbr_undo_cwnd,
- .cwnd_event = bbr_cwnd_event,
+ .cwnd_event_tx_start = bbr_cwnd_event_tx_start,
.ssthresh = bbr_ssthresh,
.min_tso_segs = bbr_min_tso_segs,
.get_info = bbr_get_info,
@@ -1161,7 +1161,7 @@ BTF_ID_FLAGS(func, bbr_init)
BTF_ID_FLAGS(func, bbr_main)
BTF_ID_FLAGS(func, bbr_sndbuf_expand)
BTF_ID_FLAGS(func, bbr_undo_cwnd)
-BTF_ID_FLAGS(func, bbr_cwnd_event)
+BTF_ID_FLAGS(func, bbr_cwnd_event_tx_start)
BTF_ID_FLAGS(func, bbr_ssthresh)
BTF_ID_FLAGS(func, bbr_min_tso_segs)
BTF_ID_FLAGS(func, bbr_set_state)
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 76c23675ae50..ab78b5ae8d0e 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -139,24 +139,21 @@ __bpf_kfunc static void cubictcp_init(struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
-__bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+__bpf_kfunc static void cubictcp_cwnd_event_tx_start(struct sock *sk)
{
- if (event == CA_EVENT_TX_START) {
- struct bictcp *ca = inet_csk_ca(sk);
- u32 now = tcp_jiffies32;
- s32 delta;
-
- delta = now - tcp_sk(sk)->lsndtime;
-
- /* We were application limited (idle) for a while.
- * Shift epoch_start to keep cwnd growth to cubic curve.
- */
- if (ca->epoch_start && delta > 0) {
- ca->epoch_start += delta;
- if (after(ca->epoch_start, now))
- ca->epoch_start = now;
- }
- return;
+ struct bictcp *ca = inet_csk_ca(sk);
+ u32 now = tcp_jiffies32;
+ s32 delta;
+
+ delta = now - tcp_sk(sk)->lsndtime;
+
+ /* We were application limited (idle) for a while.
+ * Shift epoch_start to keep cwnd growth to cubic curve.
+ */
+ if (ca->epoch_start && delta > 0) {
+ ca->epoch_start += delta;
+ if (after(ca->epoch_start, now))
+ ca->epoch_start = now;
}
}
@@ -481,7 +478,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
.cong_avoid = cubictcp_cong_avoid,
.set_state = cubictcp_state,
.undo_cwnd = tcp_reno_undo_cwnd,
- .cwnd_event = cubictcp_cwnd_event,
+ .cwnd_event_tx_start = cubictcp_cwnd_event_tx_start,
.pkts_acked = cubictcp_acked,
.owner = THIS_MODULE,
.name = "cubic",
@@ -492,7 +489,7 @@ BTF_ID_FLAGS(func, cubictcp_init)
BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh)
BTF_ID_FLAGS(func, cubictcp_cong_avoid)
BTF_ID_FLAGS(func, cubictcp_state)
-BTF_ID_FLAGS(func, cubictcp_cwnd_event)
+BTF_ID_FLAGS(func, cubictcp_cwnd_event_tx_start)
BTF_ID_FLAGS(func, cubictcp_acked)
BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids)
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 03abe0848420..96c99999e09d 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -203,15 +203,19 @@ __bpf_kfunc static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
tcp_plb_update_state_upon_rto(sk, &ca->plb);
dctcp_react_to_loss(sk);
break;
- case CA_EVENT_TX_START:
- tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */
- break;
default:
/* Don't care for the rest. */
break;
}
}
+__bpf_kfunc static void dctcp_cwnd_event_tx_start(struct sock *sk)
+{
+ struct dctcp *ca = inet_csk_ca(sk);
+
+ tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */
+}
+
static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info)
{
@@ -252,6 +256,7 @@ static struct tcp_congestion_ops dctcp __read_mostly = {
.init = dctcp_init,
.in_ack_event = dctcp_update_alpha,
.cwnd_event = dctcp_cwnd_event,
+ .cwnd_event_tx_start = dctcp_cwnd_event_tx_start,
.ssthresh = dctcp_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
.undo_cwnd = dctcp_cwnd_undo,
@@ -275,6 +280,7 @@ BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids)
BTF_ID_FLAGS(func, dctcp_init)
BTF_ID_FLAGS(func, dctcp_update_alpha)
BTF_ID_FLAGS(func, dctcp_cwnd_event)
+BTF_ID_FLAGS(func, dctcp_cwnd_event_tx_start)
BTF_ID_FLAGS(func, dctcp_ssthresh)
BTF_ID_FLAGS(func, dctcp_cwnd_undo)
BTF_ID_FLAGS(func, dctcp_state)
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 786848ad37ea..cf12fb6be079 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -151,12 +151,17 @@ EXPORT_SYMBOL_GPL(tcp_vegas_state);
*/
void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
- if (event == CA_EVENT_CWND_RESTART ||
- event == CA_EVENT_TX_START)
+ if (event == CA_EVENT_CWND_RESTART)
tcp_vegas_init(sk);
}
EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
+void tcp_vegas_cwnd_event_tx_start(struct sock *sk)
+{
+ tcp_vegas_init(sk);
+}
+EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event_tx_start);
+
static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
{
return min(tp->snd_ssthresh, tcp_snd_cwnd(tp));
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index 4f24d0e37d9c..602af8e600c7 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -20,6 +20,7 @@ void tcp_vegas_init(struct sock *sk);
void tcp_vegas_state(struct sock *sk, u8 ca_state);
void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample);
void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
+void tcp_vegas_cwnd_event_tx_start(struct sock *sk);
size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info);
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 366ff6f214b2..1b2e1b947901 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -112,10 +112,15 @@ static void tcp_veno_state(struct sock *sk, u8 ca_state)
*/
static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
- if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START)
+ if (event == CA_EVENT_CWND_RESTART)
tcp_veno_init(sk);
}
+static void tcp_veno_cwnd_event_tx_start(struct sock *sk)
+{
+ tcp_veno_init(sk);
+}
+
static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -213,6 +218,7 @@ static struct tcp_congestion_ops tcp_veno __read_mostly = {
.pkts_acked = tcp_veno_pkts_acked,
.set_state = tcp_veno_state,
.cwnd_event = tcp_veno_cwnd_event,
+ .cwnd_event_tx_start = tcp_veno_cwnd_event_tx_start,
.owner = THIS_MODULE,
.name = "veno",
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 18b07ff5d20e..b22b3dccd05e 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -212,6 +212,7 @@ static struct tcp_congestion_ops tcp_yeah __read_mostly = {
.cong_avoid = tcp_yeah_cong_avoid,
.set_state = tcp_vegas_state,
.cwnd_event = tcp_vegas_cwnd_event,
+ .cwnd_event_tx_start = tcp_vegas_cwnd_event_tx_start,
.get_info = tcp_vegas_get_info,
.pkts_acked = tcp_vegas_pkts_acked,