diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2025-11-10 16:43:51 -0800 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-11-10 16:43:51 -0800 |
| commit | 7fc2bf8d30beebceac5841ad2227e0bd41abdf27 (patch) | |
| tree | 2c15b4330839599ba6867a144102f9c0164f6cf4 /net | |
| parent | 38f073a71e85c726b09f935e7886de72bc57b15b (diff) | |
| parent | 67f4cfb530150387dedc13bac7e2ab7f1a525d7f (diff) | |
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says:
====================
pull-request: bpf-next 2025-11-10
We've added 19 non-merge commits during the last 3 day(s) which contain
a total of 22 files changed, 1345 insertions(+), 197 deletions(-).
The main changes are:
1) Preserve skb metadata after a TC BPF program has changed the skb,
from Jakub Sitnicki.
This allows a TC program at the end of a TC filter chain to still see
the skb metadata, even if another TC program at the front of the chain
has changed the skb using BPF helpers.
2) Initial af_smc bpf_struct_ops support to control the smc specific
syn/synack options, from D. Wythe.
* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next:
bpf/selftests: Add selftest for bpf_smc_hs_ctrl
net/smc: bpf: Introduce generic hook for handshake flow
bpf: Export necessary symbols for modules with struct_ops
selftests/bpf: Cover skb metadata access after bpf_skb_change_proto
selftests/bpf: Cover skb metadata access after change_head/tail helper
selftests/bpf: Cover skb metadata access after bpf_skb_adjust_room
selftests/bpf: Cover skb metadata access after vlan push/pop helper
selftests/bpf: Expect unclone to preserve skb metadata
selftests/bpf: Dump skb metadata on verification failure
selftests/bpf: Verify skb metadata in BPF instead of userspace
bpf: Make bpf_skb_change_head helper metadata-safe
bpf: Make bpf_skb_change_proto helper metadata-safe
bpf: Make bpf_skb_adjust_room metadata-safe
bpf: Make bpf_skb_vlan_push helper metadata-safe
bpf: Make bpf_skb_vlan_pop helper metadata-safe
vlan: Make vlan_remove_tag return nothing
bpf: Unclone skb head on bpf_dynptr_write to skb metadata
net: Preserve metadata on pskb_expand_head
net: Helper to move packet data and metadata after skb_push/pull
====================
Link: https://patch.msgid.link/20251110232427.3929291-1-martin.lau@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
| -rw-r--r-- | net/core/filter.c | 34 | ||||
| -rw-r--r-- | net/core/skbuff.c | 6 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 31 | ||||
| -rw-r--r-- | net/smc/Kconfig | 10 | ||||
| -rw-r--r-- | net/smc/Makefile | 1 | ||||
| -rw-r--r-- | net/smc/af_smc.c | 9 | ||||
| -rw-r--r-- | net/smc/smc_hs_bpf.c | 140 | ||||
| -rw-r--r-- | net/smc/smc_hs_bpf.h | 31 | ||||
| -rw-r--r-- | net/smc/smc_sysctl.c | 91 |
9 files changed, 325 insertions, 28 deletions
diff --git a/net/core/filter.c b/net/core/filter.c index 52721efba332..4124becf8604 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3253,11 +3253,11 @@ static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto) static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) { - /* Caller already did skb_cow() with len as headroom, + /* Caller already did skb_cow() with meta_len+len as headroom, * so no need to do it here. */ skb_push(skb, len); - memmove(skb->data, skb->data + len, off); + skb_postpush_data_move(skb, len, off); memset(skb->data + off, 0, len); /* No skb_postpush_rcsum(skb, skb->data + off, len) @@ -3281,7 +3281,7 @@ static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) old_data = skb->data; __skb_pull(skb, len); skb_postpull_rcsum(skb, old_data + off, len); - memmove(skb->data, old_data, off); + skb_postpull_data_move(skb, len, off); return 0; } @@ -3326,10 +3326,11 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len) static int bpf_skb_proto_4_to_6(struct sk_buff *skb) { const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); + const u8 meta_len = skb_metadata_len(skb); u32 off = skb_mac_header_len(skb); int ret; - ret = skb_cow(skb, len_diff); + ret = skb_cow(skb, meta_len + len_diff); if (unlikely(ret < 0)) return ret; @@ -3489,6 +3490,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT; bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK; u16 mac_len = 0, inner_net = 0, inner_trans = 0; + const u8 meta_len = skb_metadata_len(skb); unsigned int gso_type = SKB_GSO_DODGY; int ret; @@ -3499,7 +3501,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, return -ENOTSUPP; } - ret = skb_cow_head(skb, len_diff); + ret = skb_cow_head(skb, meta_len + len_diff); if (unlikely(ret < 0)) return ret; @@ -3873,6 +3875,7 @@ static const struct bpf_func_proto sk_skb_change_tail_proto = { static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, u64 flags) { + const u8 meta_len = skb_metadata_len(skb); u32 max_len = BPF_SKB_MAX_LEN; u32 new_len = skb->len + head_room; int ret; @@ -3882,7 +3885,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, new_len < skb->len)) return -EINVAL; - ret = skb_cow(skb, head_room); + ret = skb_cow(skb, meta_len + head_room); if (likely(!ret)) { /* Idea for this helper is that we currently only * allow to expand on mac header. This means that @@ -3894,6 +3897,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, * for redirection into L2 device. */ __skb_push(skb, head_room); + skb_postpush_data_move(skb, head_room, 0); memset(skb->data, 0, head_room); skb_reset_mac_header(skb); skb_reset_mac_len(skb); @@ -12102,6 +12106,18 @@ void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset) return skb_metadata_end(skb) - skb_metadata_len(skb) + offset; } +int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset, + const void *from, u32 len, u64 flags) +{ + if (unlikely(flags)) + return -EINVAL; + if (unlikely(bpf_try_make_writable(skb, 0))) + return -EFAULT; + + memmove(bpf_skb_meta_pointer(skb, offset), from, len); + return 0; +} + __bpf_kfunc_start_defs(); __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, struct bpf_dynptr *ptr__uninit) @@ -12129,9 +12145,6 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, * XDP context with bpf_xdp_adjust_meta(). Serves as an alternative to * &__sk_buff->data_meta. * - * If passed @skb_ is a clone which shares the data with the original, the - * dynptr will be read-only. This limitation may be lifted in the future. - * * Return: * * %0 - dynptr ready to use * * %-EINVAL - invalid flags, dynptr set to null @@ -12149,9 +12162,6 @@ __bpf_kfunc int bpf_dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags, bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB_META, 0, skb_metadata_len(skb)); - if (skb_cloned(skb)) - bpf_dynptr_set_rdonly(ptr); - return 0; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7ac5f8aa1235..d95658b738d1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2234,6 +2234,10 @@ EXPORT_SYMBOL(__pskb_copy_fclone); * * All the pointers pointing into skb header may change and must be * reloaded after call to this function. + * + * Note: If you skb_push() the start of the buffer after reallocating the + * header, call skb_postpush_data_move() first to move the metadata out of + * the way before writing to &sk_buff->data. */ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, @@ -2305,8 +2309,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); - skb_metadata_clear(skb); - /* It is not generally safe to change skb->truesize. * For the moment, we really care of rx path, or * when skb is orphaned (not attached to a socket). diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7f5df7a71f62..479afb714bdf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -40,6 +40,7 @@ #include <net/tcp.h> #include <net/tcp_ecn.h> #include <net/mptcp.h> +#include <net/smc.h> #include <net/proto_memory.h> #include <net/psp.h> @@ -802,34 +803,36 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, mptcp_options_write(th, ptr, tp, opts); } -static void smc_set_option(const struct tcp_sock *tp, +static void smc_set_option(struct tcp_sock *tp, struct tcp_out_options *opts, unsigned int *remaining) { #if IS_ENABLED(CONFIG_SMC) - if (static_branch_unlikely(&tcp_have_smc)) { - if (tp->syn_smc) { - if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { - opts->options |= OPTION_SMC; - *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; - } + if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc) { + tp->syn_smc = !!smc_call_hsbpf(1, tp, syn_option); + /* re-check syn_smc */ + if (tp->syn_smc && + *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { + opts->options |= OPTION_SMC; + *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; } } #endif } static void smc_set_option_cond(const struct tcp_sock *tp, - const struct inet_request_sock *ireq, + struct inet_request_sock *ireq, struct tcp_out_options *opts, unsigned int *remaining) { #if IS_ENABLED(CONFIG_SMC) - if (static_branch_unlikely(&tcp_have_smc)) { - if (tp->syn_smc && ireq->smc_ok) { - if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { - opts->options |= OPTION_SMC; - *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; - } + if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc && ireq->smc_ok) { + ireq->smc_ok = !!smc_call_hsbpf(1, tp, synack_option, ireq); + /* re-check smc_ok */ + if (ireq->smc_ok && + *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { + opts->options |= OPTION_SMC; + *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; } } #endif diff --git a/net/smc/Kconfig b/net/smc/Kconfig index 99ecd59d1f4b..325addf83cc6 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -19,3 +19,13 @@ config SMC_DIAG smcss. if unsure, say Y. + +config SMC_HS_CTRL_BPF + bool "Generic eBPF hook for SMC handshake flow" + depends on SMC && BPF_SYSCALL + default y + help + SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC + handshake flow, which offer much greater flexibility in modifying the behavior + of the SMC protocol stack compared to a complete kernel-based approach. Select + this option if you want filtring the handshake process via eBPF programs.
\ No newline at end of file diff --git a/net/smc/Makefile b/net/smc/Makefile index 0e754cbc38f9..5368634c5dd6 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -6,3 +6,4 @@ smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o smc-y += smc_tracepoint.o smc_inet.o smc-$(CONFIG_SYSCTL) += smc_sysctl.o +smc-$(CONFIG_SMC_HS_CTRL_BPF) += smc_hs_bpf.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 0ef3e16a8517..e388de8dca09 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -58,6 +58,7 @@ #include "smc_tracepoint.h" #include "smc_sysctl.h" #include "smc_inet.h" +#include "smc_hs_bpf.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -3600,8 +3601,16 @@ static int __init smc_init(void) pr_err("%s: smc_inet_init fails with %d\n", __func__, rc); goto out_ulp; } + rc = bpf_smc_hs_ctrl_init(); + if (rc) { + pr_err("%s: bpf_smc_hs_ctrl_init fails with %d\n", __func__, + rc); + goto out_inet; + } static_branch_enable(&tcp_have_smc); return 0; +out_inet: + smc_inet_exit(); out_ulp: tcp_unregister_ulp(&smc_ulp_ops); out_ib: diff --git a/net/smc/smc_hs_bpf.c b/net/smc/smc_hs_bpf.c new file mode 100644 index 000000000000..063d23d85850 --- /dev/null +++ b/net/smc/smc_hs_bpf.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Generic hook for SMC handshake flow. + * + * Copyright IBM Corp. 2016 + * Copyright (c) 2025, Alibaba Inc. + * + * Author: D. Wythe <alibuda@linux.alibaba.com> + */ + +#include <linux/bpf_verifier.h> +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/rculist.h> + +#include "smc_hs_bpf.h" + +static DEFINE_SPINLOCK(smc_hs_ctrl_list_lock); +static LIST_HEAD(smc_hs_ctrl_list); + +static int smc_hs_ctrl_reg(struct smc_hs_ctrl *ctrl) +{ + int ret = 0; + + spin_lock(&smc_hs_ctrl_list_lock); + /* already exist or duplicate name */ + if (smc_hs_ctrl_find_by_name(ctrl->name)) + ret = -EEXIST; + else + list_add_tail_rcu(&ctrl->list, &smc_hs_ctrl_list); + spin_unlock(&smc_hs_ctrl_list_lock); + return ret; +} + +static void smc_hs_ctrl_unreg(struct smc_hs_ctrl *ctrl) +{ + spin_lock(&smc_hs_ctrl_list_lock); + list_del_rcu(&ctrl->list); + spin_unlock(&smc_hs_ctrl_list_lock); + + /* Ensure that all readers to complete */ + synchronize_rcu(); +} + +struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name) +{ + struct smc_hs_ctrl *ctrl; + + list_for_each_entry_rcu(ctrl, &smc_hs_ctrl_list, list) { + if (strcmp(ctrl->name, name) == 0) + return ctrl; + } + return NULL; +} + +static int __smc_bpf_stub_set_tcp_option(struct tcp_sock *tp) { return 1; } +static int __smc_bpf_stub_set_tcp_option_cond(const struct tcp_sock *tp, + struct inet_request_sock *ireq) +{ + return 1; +} + +static struct smc_hs_ctrl __smc_bpf_hs_ctrl = { + .syn_option = __smc_bpf_stub_set_tcp_option, + .synack_option = __smc_bpf_stub_set_tcp_option_cond, +}; + +static int smc_bpf_hs_ctrl_init(struct btf *btf) { return 0; } + +static int smc_bpf_hs_ctrl_reg(void *kdata, struct bpf_link *link) +{ + if (link) + return -EOPNOTSUPP; + + return smc_hs_ctrl_reg(kdata); +} + +static void smc_bpf_hs_ctrl_unreg(void *kdata, struct bpf_link *link) +{ + smc_hs_ctrl_unreg(kdata); +} + +static int smc_bpf_hs_ctrl_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + const struct smc_hs_ctrl *u_ctrl; + struct smc_hs_ctrl *k_ctrl; + u32 moff; + + u_ctrl = (const struct smc_hs_ctrl *)udata; + k_ctrl = (struct smc_hs_ctrl *)kdata; + + moff = __btf_member_bit_offset(t, member) / 8; + switch (moff) { + case offsetof(struct smc_hs_ctrl, name): + if (bpf_obj_name_cpy(k_ctrl->name, u_ctrl->name, + sizeof(u_ctrl->name)) <= 0) + return -EINVAL; + return 1; + case offsetof(struct smc_hs_ctrl, flags): + if (u_ctrl->flags & ~SMC_HS_CTRL_ALL_FLAGS) + return -EINVAL; + k_ctrl->flags = u_ctrl->flags; + return 1; + default: + break; + } + + return 0; +} + +static const struct bpf_func_proto * +bpf_smc_hs_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + return bpf_base_func_proto(func_id, prog); +} + +static const struct bpf_verifier_ops smc_bpf_verifier_ops = { + .get_func_proto = bpf_smc_hs_func_proto, + .is_valid_access = bpf_tracing_btf_ctx_access, +}; + +static struct bpf_struct_ops bpf_smc_hs_ctrl_ops = { + .name = "smc_hs_ctrl", + .init = smc_bpf_hs_ctrl_init, + .reg = smc_bpf_hs_ctrl_reg, + .unreg = smc_bpf_hs_ctrl_unreg, + .cfi_stubs = &__smc_bpf_hs_ctrl, + .verifier_ops = &smc_bpf_verifier_ops, + .init_member = smc_bpf_hs_ctrl_init_member, + .owner = THIS_MODULE, +}; + +int bpf_smc_hs_ctrl_init(void) +{ + return register_bpf_struct_ops(&bpf_smc_hs_ctrl_ops, smc_hs_ctrl); +} diff --git a/net/smc/smc_hs_bpf.h b/net/smc/smc_hs_bpf.h new file mode 100644 index 000000000000..f5f1807c079e --- /dev/null +++ b/net/smc/smc_hs_bpf.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Generic hook for SMC handshake flow. + * + * Copyright IBM Corp. 2016 + * Copyright (c) 2025, Alibaba Inc. + * + * Author: D. Wythe <alibuda@linux.alibaba.com> + */ + +#ifndef __SMC_HS_CTRL +#define __SMC_HS_CTRL + +#include <net/smc.h> + +/* Find hs_ctrl by the target name, which required to be a c-string. + * Return NULL if no such ctrl was found,otherwise, return a valid ctrl. + * + * Note: Caller MUST ensure it's was invoked under rcu_read_lock. + */ +struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name); + +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) +int bpf_smc_hs_ctrl_init(void); +#else +static inline int bpf_smc_hs_ctrl_init(void) { return 0; } +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + +#endif /* __SMC_HS_CTRL */ diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 7b2471904d04..b1efed546243 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -12,12 +12,14 @@ #include <linux/init.h> #include <linux/sysctl.h> +#include <linux/bpf.h> #include <net/net_namespace.h> #include "smc.h" #include "smc_core.h" #include "smc_llc.h" #include "smc_sysctl.h" +#include "smc_hs_bpf.h" static int min_sndbuf = SMC_BUF_MIN_SIZE; static int min_rcvbuf = SMC_BUF_MIN_SIZE; @@ -32,6 +34,69 @@ static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX; static unsigned int smcr_max_wr_min = 2; static unsigned int smcr_max_wr_max = 2048; +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) +static int smc_net_replace_smc_hs_ctrl(struct net *net, const char *name) +{ + struct smc_hs_ctrl *ctrl = NULL; + + rcu_read_lock(); + /* null or empty name ask to clear current ctrl */ + if (name && name[0]) { + ctrl = smc_hs_ctrl_find_by_name(name); + if (!ctrl) { + rcu_read_unlock(); + return -EINVAL; + } + /* no change, just return */ + if (ctrl == rcu_dereference(net->smc.hs_ctrl)) { + rcu_read_unlock(); + return 0; + } + if (!bpf_try_module_get(ctrl, ctrl->owner)) { + rcu_read_unlock(); + return -EBUSY; + } + } + /* xhcg old ctrl with the new one atomically */ + ctrl = unrcu_pointer(xchg(&net->smc.hs_ctrl, RCU_INITIALIZER(ctrl))); + /* release old ctrl */ + if (ctrl) + bpf_module_put(ctrl, ctrl->owner); + + rcu_read_unlock(); + return 0; +} + +static int proc_smc_hs_ctrl(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = container_of(ctl->data, struct net, smc.hs_ctrl); + char val[SMC_HS_CTRL_NAME_MAX]; + const struct ctl_table tbl = { + .data = val, + .maxlen = SMC_HS_CTRL_NAME_MAX, + }; + struct smc_hs_ctrl *ctrl; + int ret; + + rcu_read_lock(); + ctrl = rcu_dereference(net->smc.hs_ctrl); + if (ctrl) + memcpy(val, ctrl->name, sizeof(ctrl->name)); + else + val[0] = '\0'; + rcu_read_unlock(); + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (ret) + return ret; + + if (write) + ret = smc_net_replace_smc_hs_ctrl(net, val); + return ret; +} +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + static struct ctl_table smc_table[] = { { .procname = "autocorking_size", @@ -119,6 +184,15 @@ static struct ctl_table smc_table[] = { .extra1 = &smcr_max_wr_min, .extra2 = &smcr_max_wr_max, }, +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + { + .procname = "hs_ctrl", + .data = &init_net.smc.hs_ctrl, + .mode = 0644, + .maxlen = SMC_HS_CTRL_NAME_MAX, + .proc_handler = proc_smc_hs_ctrl, + }, +#endif /* CONFIG_SMC_HS_CTRL_BPF */ }; int __net_init smc_sysctl_net_init(struct net *net) @@ -129,6 +203,16 @@ int __net_init smc_sysctl_net_init(struct net *net) table = smc_table; if (!net_eq(net, &init_net)) { int i; +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + struct smc_hs_ctrl *ctrl; + + rcu_read_lock(); + ctrl = rcu_dereference(init_net.smc.hs_ctrl); + if (ctrl && ctrl->flags & SMC_HS_CTRL_FLAG_INHERITABLE && + bpf_try_module_get(ctrl, ctrl->owner)) + rcu_assign_pointer(net->smc.hs_ctrl, ctrl); + rcu_read_unlock(); +#endif /* CONFIG_SMC_HS_CTRL_BPF */ table = kmemdup(table, sizeof(smc_table), GFP_KERNEL); if (!table) @@ -161,6 +245,9 @@ err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + smc_net_replace_smc_hs_ctrl(net, NULL); +#endif /* CONFIG_SMC_HS_CTRL_BPF */ return -ENOMEM; } @@ -170,6 +257,10 @@ void __net_exit smc_sysctl_net_exit(struct net *net) table = net->smc.smc_hdr->ctl_table_arg; unregister_net_sysctl_table(net->smc.smc_hdr); +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + smc_net_replace_smc_hs_ctrl(net, NULL); +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + if (!net_eq(net, &init_net)) kfree(table); } |
