summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-11-10 16:43:51 -0800
committerJakub Kicinski <kuba@kernel.org>2025-11-10 16:43:51 -0800
commit7fc2bf8d30beebceac5841ad2227e0bd41abdf27 (patch)
tree2c15b4330839599ba6867a144102f9c0164f6cf4 /net
parent38f073a71e85c726b09f935e7886de72bc57b15b (diff)
parent67f4cfb530150387dedc13bac7e2ab7f1a525d7f (diff)
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says: ==================== pull-request: bpf-next 2025-11-10 We've added 19 non-merge commits during the last 3 day(s) which contain a total of 22 files changed, 1345 insertions(+), 197 deletions(-). The main changes are: 1) Preserve skb metadata after a TC BPF program has changed the skb, from Jakub Sitnicki. This allows a TC program at the end of a TC filter chain to still see the skb metadata, even if another TC program at the front of the chain has changed the skb using BPF helpers. 2) Initial af_smc bpf_struct_ops support to control the smc specific syn/synack options, from D. Wythe. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: bpf/selftests: Add selftest for bpf_smc_hs_ctrl net/smc: bpf: Introduce generic hook for handshake flow bpf: Export necessary symbols for modules with struct_ops selftests/bpf: Cover skb metadata access after bpf_skb_change_proto selftests/bpf: Cover skb metadata access after change_head/tail helper selftests/bpf: Cover skb metadata access after bpf_skb_adjust_room selftests/bpf: Cover skb metadata access after vlan push/pop helper selftests/bpf: Expect unclone to preserve skb metadata selftests/bpf: Dump skb metadata on verification failure selftests/bpf: Verify skb metadata in BPF instead of userspace bpf: Make bpf_skb_change_head helper metadata-safe bpf: Make bpf_skb_change_proto helper metadata-safe bpf: Make bpf_skb_adjust_room metadata-safe bpf: Make bpf_skb_vlan_push helper metadata-safe bpf: Make bpf_skb_vlan_pop helper metadata-safe vlan: Make vlan_remove_tag return nothing bpf: Unclone skb head on bpf_dynptr_write to skb metadata net: Preserve metadata on pskb_expand_head net: Helper to move packet data and metadata after skb_push/pull ==================== Link: https://patch.msgid.link/20251110232427.3929291-1-martin.lau@linux.dev Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/core/filter.c34
-rw-r--r--net/core/skbuff.c6
-rw-r--r--net/ipv4/tcp_output.c31
-rw-r--r--net/smc/Kconfig10
-rw-r--r--net/smc/Makefile1
-rw-r--r--net/smc/af_smc.c9
-rw-r--r--net/smc/smc_hs_bpf.c140
-rw-r--r--net/smc/smc_hs_bpf.h31
-rw-r--r--net/smc/smc_sysctl.c91
9 files changed, 325 insertions, 28 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 52721efba332..4124becf8604 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3253,11 +3253,11 @@ static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto)
static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
{
- /* Caller already did skb_cow() with len as headroom,
+ /* Caller already did skb_cow() with meta_len+len as headroom,
* so no need to do it here.
*/
skb_push(skb, len);
- memmove(skb->data, skb->data + len, off);
+ skb_postpush_data_move(skb, len, off);
memset(skb->data + off, 0, len);
/* No skb_postpush_rcsum(skb, skb->data + off, len)
@@ -3281,7 +3281,7 @@ static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
old_data = skb->data;
__skb_pull(skb, len);
skb_postpull_rcsum(skb, old_data + off, len);
- memmove(skb->data, old_data, off);
+ skb_postpull_data_move(skb, len, off);
return 0;
}
@@ -3326,10 +3326,11 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
{
const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
+ const u8 meta_len = skb_metadata_len(skb);
u32 off = skb_mac_header_len(skb);
int ret;
- ret = skb_cow(skb, len_diff);
+ ret = skb_cow(skb, meta_len + len_diff);
if (unlikely(ret < 0))
return ret;
@@ -3489,6 +3490,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
u16 mac_len = 0, inner_net = 0, inner_trans = 0;
+ const u8 meta_len = skb_metadata_len(skb);
unsigned int gso_type = SKB_GSO_DODGY;
int ret;
@@ -3499,7 +3501,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
return -ENOTSUPP;
}
- ret = skb_cow_head(skb, len_diff);
+ ret = skb_cow_head(skb, meta_len + len_diff);
if (unlikely(ret < 0))
return ret;
@@ -3873,6 +3875,7 @@ static const struct bpf_func_proto sk_skb_change_tail_proto = {
static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
u64 flags)
{
+ const u8 meta_len = skb_metadata_len(skb);
u32 max_len = BPF_SKB_MAX_LEN;
u32 new_len = skb->len + head_room;
int ret;
@@ -3882,7 +3885,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
new_len < skb->len))
return -EINVAL;
- ret = skb_cow(skb, head_room);
+ ret = skb_cow(skb, meta_len + head_room);
if (likely(!ret)) {
/* Idea for this helper is that we currently only
* allow to expand on mac header. This means that
@@ -3894,6 +3897,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
* for redirection into L2 device.
*/
__skb_push(skb, head_room);
+ skb_postpush_data_move(skb, head_room, 0);
memset(skb->data, 0, head_room);
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
@@ -12102,6 +12106,18 @@ void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
return skb_metadata_end(skb) - skb_metadata_len(skb) + offset;
}
+int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset,
+ const void *from, u32 len, u64 flags)
+{
+ if (unlikely(flags))
+ return -EINVAL;
+ if (unlikely(bpf_try_make_writable(skb, 0)))
+ return -EFAULT;
+
+ memmove(bpf_skb_meta_pointer(skb, offset), from, len);
+ return 0;
+}
+
__bpf_kfunc_start_defs();
__bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
struct bpf_dynptr *ptr__uninit)
@@ -12129,9 +12145,6 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
* XDP context with bpf_xdp_adjust_meta(). Serves as an alternative to
* &__sk_buff->data_meta.
*
- * If passed @skb_ is a clone which shares the data with the original, the
- * dynptr will be read-only. This limitation may be lifted in the future.
- *
* Return:
* * %0 - dynptr ready to use
* * %-EINVAL - invalid flags, dynptr set to null
@@ -12149,9 +12162,6 @@ __bpf_kfunc int bpf_dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags,
bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB_META, 0, skb_metadata_len(skb));
- if (skb_cloned(skb))
- bpf_dynptr_set_rdonly(ptr);
-
return 0;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7ac5f8aa1235..d95658b738d1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2234,6 +2234,10 @@ EXPORT_SYMBOL(__pskb_copy_fclone);
*
* All the pointers pointing into skb header may change and must be
* reloaded after call to this function.
+ *
+ * Note: If you skb_push() the start of the buffer after reallocating the
+ * header, call skb_postpush_data_move() first to move the metadata out of
+ * the way before writing to &sk_buff->data.
*/
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
@@ -2305,8 +2309,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
- skb_metadata_clear(skb);
-
/* It is not generally safe to change skb->truesize.
* For the moment, we really care of rx path, or
* when skb is orphaned (not attached to a socket).
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7f5df7a71f62..479afb714bdf 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -40,6 +40,7 @@
#include <net/tcp.h>
#include <net/tcp_ecn.h>
#include <net/mptcp.h>
+#include <net/smc.h>
#include <net/proto_memory.h>
#include <net/psp.h>
@@ -802,34 +803,36 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
mptcp_options_write(th, ptr, tp, opts);
}
-static void smc_set_option(const struct tcp_sock *tp,
+static void smc_set_option(struct tcp_sock *tp,
struct tcp_out_options *opts,
unsigned int *remaining)
{
#if IS_ENABLED(CONFIG_SMC)
- if (static_branch_unlikely(&tcp_have_smc)) {
- if (tp->syn_smc) {
- if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
- opts->options |= OPTION_SMC;
- *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
- }
+ if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc) {
+ tp->syn_smc = !!smc_call_hsbpf(1, tp, syn_option);
+ /* re-check syn_smc */
+ if (tp->syn_smc &&
+ *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+ opts->options |= OPTION_SMC;
+ *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
}
}
#endif
}
static void smc_set_option_cond(const struct tcp_sock *tp,
- const struct inet_request_sock *ireq,
+ struct inet_request_sock *ireq,
struct tcp_out_options *opts,
unsigned int *remaining)
{
#if IS_ENABLED(CONFIG_SMC)
- if (static_branch_unlikely(&tcp_have_smc)) {
- if (tp->syn_smc && ireq->smc_ok) {
- if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
- opts->options |= OPTION_SMC;
- *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
- }
+ if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc && ireq->smc_ok) {
+ ireq->smc_ok = !!smc_call_hsbpf(1, tp, synack_option, ireq);
+ /* re-check smc_ok */
+ if (ireq->smc_ok &&
+ *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+ opts->options |= OPTION_SMC;
+ *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
}
}
#endif
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index 99ecd59d1f4b..325addf83cc6 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -19,3 +19,13 @@ config SMC_DIAG
smcss.
if unsure, say Y.
+
+config SMC_HS_CTRL_BPF
+ bool "Generic eBPF hook for SMC handshake flow"
+ depends on SMC && BPF_SYSCALL
+ default y
+ help
+ SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC
+ handshake flow, which offer much greater flexibility in modifying the behavior
+ of the SMC protocol stack compared to a complete kernel-based approach. Select
+ this option if you want filtring the handshake process via eBPF programs. \ No newline at end of file
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 0e754cbc38f9..5368634c5dd6 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -6,3 +6,4 @@ smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
smc-y += smc_tracepoint.o smc_inet.o
smc-$(CONFIG_SYSCTL) += smc_sysctl.o
+smc-$(CONFIG_SMC_HS_CTRL_BPF) += smc_hs_bpf.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 0ef3e16a8517..e388de8dca09 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -58,6 +58,7 @@
#include "smc_tracepoint.h"
#include "smc_sysctl.h"
#include "smc_inet.h"
+#include "smc_hs_bpf.h"
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
@@ -3600,8 +3601,16 @@ static int __init smc_init(void)
pr_err("%s: smc_inet_init fails with %d\n", __func__, rc);
goto out_ulp;
}
+ rc = bpf_smc_hs_ctrl_init();
+ if (rc) {
+ pr_err("%s: bpf_smc_hs_ctrl_init fails with %d\n", __func__,
+ rc);
+ goto out_inet;
+ }
static_branch_enable(&tcp_have_smc);
return 0;
+out_inet:
+ smc_inet_exit();
out_ulp:
tcp_unregister_ulp(&smc_ulp_ops);
out_ib:
diff --git a/net/smc/smc_hs_bpf.c b/net/smc/smc_hs_bpf.c
new file mode 100644
index 000000000000..063d23d85850
--- /dev/null
+++ b/net/smc/smc_hs_bpf.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Generic hook for SMC handshake flow.
+ *
+ * Copyright IBM Corp. 2016
+ * Copyright (c) 2025, Alibaba Inc.
+ *
+ * Author: D. Wythe <alibuda@linux.alibaba.com>
+ */
+
+#include <linux/bpf_verifier.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/rculist.h>
+
+#include "smc_hs_bpf.h"
+
+static DEFINE_SPINLOCK(smc_hs_ctrl_list_lock);
+static LIST_HEAD(smc_hs_ctrl_list);
+
+static int smc_hs_ctrl_reg(struct smc_hs_ctrl *ctrl)
+{
+ int ret = 0;
+
+ spin_lock(&smc_hs_ctrl_list_lock);
+ /* already exist or duplicate name */
+ if (smc_hs_ctrl_find_by_name(ctrl->name))
+ ret = -EEXIST;
+ else
+ list_add_tail_rcu(&ctrl->list, &smc_hs_ctrl_list);
+ spin_unlock(&smc_hs_ctrl_list_lock);
+ return ret;
+}
+
+static void smc_hs_ctrl_unreg(struct smc_hs_ctrl *ctrl)
+{
+ spin_lock(&smc_hs_ctrl_list_lock);
+ list_del_rcu(&ctrl->list);
+ spin_unlock(&smc_hs_ctrl_list_lock);
+
+ /* Ensure that all readers to complete */
+ synchronize_rcu();
+}
+
+struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name)
+{
+ struct smc_hs_ctrl *ctrl;
+
+ list_for_each_entry_rcu(ctrl, &smc_hs_ctrl_list, list) {
+ if (strcmp(ctrl->name, name) == 0)
+ return ctrl;
+ }
+ return NULL;
+}
+
+static int __smc_bpf_stub_set_tcp_option(struct tcp_sock *tp) { return 1; }
+static int __smc_bpf_stub_set_tcp_option_cond(const struct tcp_sock *tp,
+ struct inet_request_sock *ireq)
+{
+ return 1;
+}
+
+static struct smc_hs_ctrl __smc_bpf_hs_ctrl = {
+ .syn_option = __smc_bpf_stub_set_tcp_option,
+ .synack_option = __smc_bpf_stub_set_tcp_option_cond,
+};
+
+static int smc_bpf_hs_ctrl_init(struct btf *btf) { return 0; }
+
+static int smc_bpf_hs_ctrl_reg(void *kdata, struct bpf_link *link)
+{
+ if (link)
+ return -EOPNOTSUPP;
+
+ return smc_hs_ctrl_reg(kdata);
+}
+
+static void smc_bpf_hs_ctrl_unreg(void *kdata, struct bpf_link *link)
+{
+ smc_hs_ctrl_unreg(kdata);
+}
+
+static int smc_bpf_hs_ctrl_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ const struct smc_hs_ctrl *u_ctrl;
+ struct smc_hs_ctrl *k_ctrl;
+ u32 moff;
+
+ u_ctrl = (const struct smc_hs_ctrl *)udata;
+ k_ctrl = (struct smc_hs_ctrl *)kdata;
+
+ moff = __btf_member_bit_offset(t, member) / 8;
+ switch (moff) {
+ case offsetof(struct smc_hs_ctrl, name):
+ if (bpf_obj_name_cpy(k_ctrl->name, u_ctrl->name,
+ sizeof(u_ctrl->name)) <= 0)
+ return -EINVAL;
+ return 1;
+ case offsetof(struct smc_hs_ctrl, flags):
+ if (u_ctrl->flags & ~SMC_HS_CTRL_ALL_FLAGS)
+ return -EINVAL;
+ k_ctrl->flags = u_ctrl->flags;
+ return 1;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static const struct bpf_func_proto *
+bpf_smc_hs_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ return bpf_base_func_proto(func_id, prog);
+}
+
+static const struct bpf_verifier_ops smc_bpf_verifier_ops = {
+ .get_func_proto = bpf_smc_hs_func_proto,
+ .is_valid_access = bpf_tracing_btf_ctx_access,
+};
+
+static struct bpf_struct_ops bpf_smc_hs_ctrl_ops = {
+ .name = "smc_hs_ctrl",
+ .init = smc_bpf_hs_ctrl_init,
+ .reg = smc_bpf_hs_ctrl_reg,
+ .unreg = smc_bpf_hs_ctrl_unreg,
+ .cfi_stubs = &__smc_bpf_hs_ctrl,
+ .verifier_ops = &smc_bpf_verifier_ops,
+ .init_member = smc_bpf_hs_ctrl_init_member,
+ .owner = THIS_MODULE,
+};
+
+int bpf_smc_hs_ctrl_init(void)
+{
+ return register_bpf_struct_ops(&bpf_smc_hs_ctrl_ops, smc_hs_ctrl);
+}
diff --git a/net/smc/smc_hs_bpf.h b/net/smc/smc_hs_bpf.h
new file mode 100644
index 000000000000..f5f1807c079e
--- /dev/null
+++ b/net/smc/smc_hs_bpf.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Generic hook for SMC handshake flow.
+ *
+ * Copyright IBM Corp. 2016
+ * Copyright (c) 2025, Alibaba Inc.
+ *
+ * Author: D. Wythe <alibuda@linux.alibaba.com>
+ */
+
+#ifndef __SMC_HS_CTRL
+#define __SMC_HS_CTRL
+
+#include <net/smc.h>
+
+/* Find hs_ctrl by the target name, which required to be a c-string.
+ * Return NULL if no such ctrl was found,otherwise, return a valid ctrl.
+ *
+ * Note: Caller MUST ensure it's was invoked under rcu_read_lock.
+ */
+struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name);
+
+#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
+int bpf_smc_hs_ctrl_init(void);
+#else
+static inline int bpf_smc_hs_ctrl_init(void) { return 0; }
+#endif /* CONFIG_SMC_HS_CTRL_BPF */
+
+#endif /* __SMC_HS_CTRL */
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 7b2471904d04..b1efed546243 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -12,12 +12,14 @@
#include <linux/init.h>
#include <linux/sysctl.h>
+#include <linux/bpf.h>
#include <net/net_namespace.h>
#include "smc.h"
#include "smc_core.h"
#include "smc_llc.h"
#include "smc_sysctl.h"
+#include "smc_hs_bpf.h"
static int min_sndbuf = SMC_BUF_MIN_SIZE;
static int min_rcvbuf = SMC_BUF_MIN_SIZE;
@@ -32,6 +34,69 @@ static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX;
static unsigned int smcr_max_wr_min = 2;
static unsigned int smcr_max_wr_max = 2048;
+#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
+static int smc_net_replace_smc_hs_ctrl(struct net *net, const char *name)
+{
+ struct smc_hs_ctrl *ctrl = NULL;
+
+ rcu_read_lock();
+ /* null or empty name ask to clear current ctrl */
+ if (name && name[0]) {
+ ctrl = smc_hs_ctrl_find_by_name(name);
+ if (!ctrl) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ /* no change, just return */
+ if (ctrl == rcu_dereference(net->smc.hs_ctrl)) {
+ rcu_read_unlock();
+ return 0;
+ }
+ if (!bpf_try_module_get(ctrl, ctrl->owner)) {
+ rcu_read_unlock();
+ return -EBUSY;
+ }
+ }
+ /* xhcg old ctrl with the new one atomically */
+ ctrl = unrcu_pointer(xchg(&net->smc.hs_ctrl, RCU_INITIALIZER(ctrl)));
+ /* release old ctrl */
+ if (ctrl)
+ bpf_module_put(ctrl, ctrl->owner);
+
+ rcu_read_unlock();
+ return 0;
+}
+
+static int proc_smc_hs_ctrl(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net = container_of(ctl->data, struct net, smc.hs_ctrl);
+ char val[SMC_HS_CTRL_NAME_MAX];
+ const struct ctl_table tbl = {
+ .data = val,
+ .maxlen = SMC_HS_CTRL_NAME_MAX,
+ };
+ struct smc_hs_ctrl *ctrl;
+ int ret;
+
+ rcu_read_lock();
+ ctrl = rcu_dereference(net->smc.hs_ctrl);
+ if (ctrl)
+ memcpy(val, ctrl->name, sizeof(ctrl->name));
+ else
+ val[0] = '\0';
+ rcu_read_unlock();
+
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ if (ret)
+ return ret;
+
+ if (write)
+ ret = smc_net_replace_smc_hs_ctrl(net, val);
+ return ret;
+}
+#endif /* CONFIG_SMC_HS_CTRL_BPF */
+
static struct ctl_table smc_table[] = {
{
.procname = "autocorking_size",
@@ -119,6 +184,15 @@ static struct ctl_table smc_table[] = {
.extra1 = &smcr_max_wr_min,
.extra2 = &smcr_max_wr_max,
},
+#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
+ {
+ .procname = "hs_ctrl",
+ .data = &init_net.smc.hs_ctrl,
+ .mode = 0644,
+ .maxlen = SMC_HS_CTRL_NAME_MAX,
+ .proc_handler = proc_smc_hs_ctrl,
+ },
+#endif /* CONFIG_SMC_HS_CTRL_BPF */
};
int __net_init smc_sysctl_net_init(struct net *net)
@@ -129,6 +203,16 @@ int __net_init smc_sysctl_net_init(struct net *net)
table = smc_table;
if (!net_eq(net, &init_net)) {
int i;
+#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
+ struct smc_hs_ctrl *ctrl;
+
+ rcu_read_lock();
+ ctrl = rcu_dereference(init_net.smc.hs_ctrl);
+ if (ctrl && ctrl->flags & SMC_HS_CTRL_FLAG_INHERITABLE &&
+ bpf_try_module_get(ctrl, ctrl->owner))
+ rcu_assign_pointer(net->smc.hs_ctrl, ctrl);
+ rcu_read_unlock();
+#endif /* CONFIG_SMC_HS_CTRL_BPF */
table = kmemdup(table, sizeof(smc_table), GFP_KERNEL);
if (!table)
@@ -161,6 +245,9 @@ err_reg:
if (!net_eq(net, &init_net))
kfree(table);
err_alloc:
+#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
+ smc_net_replace_smc_hs_ctrl(net, NULL);
+#endif /* CONFIG_SMC_HS_CTRL_BPF */
return -ENOMEM;
}
@@ -170,6 +257,10 @@ void __net_exit smc_sysctl_net_exit(struct net *net)
table = net->smc.smc_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->smc.smc_hdr);
+#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
+ smc_net_replace_smc_hs_ctrl(net, NULL);
+#endif /* CONFIG_SMC_HS_CTRL_BPF */
+
if (!net_eq(net, &init_net))
kfree(table);
}