diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2022-02-28 23:12:55 -0800 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2022-02-28 23:12:55 -0800 |
| commit | 1136fa0c07de570dc17858745af8be169d1440ba (patch) | |
| tree | 3221b003517dd3cb13df5ba4b85637cd9ed82692 /include/net | |
| parent | ba115adf61b36b8c167126425a62b0efc23f72c0 (diff) | |
| parent | 754e0b0e35608ed5206d6a67a791563c631cec07 (diff) | |
Merge tag 'v5.17-rc4' into for-linus
Merge with mainline to get the Intel ASoC generic helpers header and
other changes.
Diffstat (limited to 'include/net')
103 files changed, 2917 insertions, 906 deletions
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 03614de86942..24a509f559ee 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * include/net/9p/9p.h - * * 9P protocol definitions. * * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> @@ -32,13 +30,13 @@ */ enum p9_debug_flags { - P9_DEBUG_ERROR = (1<<0), - P9_DEBUG_9P = (1<<2), + P9_DEBUG_ERROR = (1<<0), + P9_DEBUG_9P = (1<<2), P9_DEBUG_VFS = (1<<3), P9_DEBUG_CONV = (1<<4), P9_DEBUG_MUX = (1<<5), P9_DEBUG_TRANS = (1<<6), - P9_DEBUG_SLABS = (1<<7), + P9_DEBUG_SLABS = (1<<7), P9_DEBUG_FCALL = (1<<8), P9_DEBUG_FID = (1<<9), P9_DEBUG_PKT = (1<<10), @@ -317,8 +315,8 @@ enum p9_qid_t { }; /* 9P Magic Numbers */ -#define P9_NOTAG (u16)(~0) -#define P9_NOFID (u32)(~0) +#define P9_NOTAG ((u16)(~0)) +#define P9_NOFID ((u32)(~0)) #define P9_MAXWELEM 16 /* Minimal header size: size[4] type[1] tag[2] */ @@ -553,6 +551,4 @@ struct p9_fcall { int p9_errstr2errno(char *errstr, int len); int p9_error_init(void); -int p9_trans_fd_init(void); -void p9_trans_fd_exit(void); #endif /* NET_9P_H */ diff --git a/include/net/9p/client.h b/include/net/9p/client.h index e1c308d8d288..ec1d1706f43c 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * include/net/9p/client.h - * * 9P Client Definitions * * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> @@ -23,7 +21,7 @@ * @p9_proto_2000L: 9P2000.L extension */ -enum p9_proto_versions{ +enum p9_proto_versions { p9_proto_legacy, p9_proto_2000u, p9_proto_2000L, @@ -219,13 +217,13 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, u64 request_mask); int p9_client_mknod_dotl(struct p9_fid *oldfid, const char *name, int mode, - dev_t rdev, kgid_t gid, struct p9_qid *); + dev_t rdev, kgid_t gid, struct p9_qid *qid); int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, - kgid_t gid, struct p9_qid *); + kgid_t gid, struct p9_qid *qid); int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); void p9_fcall_fini(struct p9_fcall *fc); -struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); +struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag); static inline void p9_req_get(struct p9_req_t *r) { @@ -241,14 +239,18 @@ int p9_req_put(struct p9_req_t *r); void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); -int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int); -int p9stat_read(struct p9_client *, char *, int, struct p9_wstat *); -void p9stat_free(struct p9_wstat *); +int p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, + int16_t *tag, int rewind); +int p9stat_read(struct p9_client *clnt, char *buf, int len, + struct p9_wstat *st); +void p9stat_free(struct p9_wstat *stbuf); int p9_is_proto_dotu(struct p9_client *clnt); int p9_is_proto_dotl(struct p9_client *clnt); -struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *); -int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int); +struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, + const char *attr_name, u64 *attr_size); +int p9_client_xattrcreate(struct p9_fid *fid, const char *name, + u64 attr_size, int flags); int p9_client_readlink(struct p9_fid *fid, char **target); int p9_client_init(void); diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index 3eb4261b2958..ff842f963071 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * include/net/9p/transport.h - * * Transport Definition * * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> @@ -11,6 +9,8 @@ #ifndef NET_9P_TRANSPORT_H #define NET_9P_TRANSPORT_H +#include <linux/module.h> + #define P9_DEF_MIN_RESVPORT (665U) #define P9_DEF_MAX_RESVPORT (1023U) @@ -40,19 +40,25 @@ struct p9_trans_module { int maxsize; /* max message size of transport */ int def; /* this transport should be default */ struct module *owner; - int (*create)(struct p9_client *, const char *, char *); - void (*close) (struct p9_client *); - int (*request) (struct p9_client *, struct p9_req_t *req); - int (*cancel) (struct p9_client *, struct p9_req_t *req); - int (*cancelled)(struct p9_client *, struct p9_req_t *req); - int (*zc_request)(struct p9_client *, struct p9_req_t *, - struct iov_iter *, struct iov_iter *, int , int, int); - int (*show_options)(struct seq_file *, struct p9_client *); + int (*create)(struct p9_client *client, + const char *devname, char *args); + void (*close)(struct p9_client *client); + int (*request)(struct p9_client *client, struct p9_req_t *req); + int (*cancel)(struct p9_client *client, struct p9_req_t *req); + int (*cancelled)(struct p9_client *client, struct p9_req_t *req); + int (*zc_request)(struct p9_client *client, struct p9_req_t *req, + struct iov_iter *uidata, struct iov_iter *uodata, + int inlen, int outlen, int in_hdr_len); + int (*show_options)(struct seq_file *m, struct p9_client *client); }; void v9fs_register_trans(struct p9_trans_module *m); void v9fs_unregister_trans(struct p9_trans_module *m); -struct p9_trans_module *v9fs_get_trans_by_name(char *s); +struct p9_trans_module *v9fs_get_trans_by_name(const char *s); struct p9_trans_module *v9fs_get_default_trans(void); void v9fs_put_trans(struct p9_trans_module *m); + +#define MODULE_ALIAS_9P(transport) \ + MODULE_ALIAS("9p-" transport) + #endif /* NET_9P_TRANSPORT_H */ diff --git a/include/net/act_api.h b/include/net/act_api.h index f19f7f4a463c..3049cb69c025 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -7,6 +7,7 @@ */ #include <linux/refcount.h> +#include <net/flow_offload.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/net_namespace.h> @@ -30,13 +31,13 @@ struct tc_action { atomic_t tcfa_bindcnt; int tcfa_action; struct tcf_t tcfa_tm; - struct gnet_stats_basic_packed tcfa_bstats; - struct gnet_stats_basic_packed tcfa_bstats_hw; + struct gnet_stats_basic_sync tcfa_bstats; + struct gnet_stats_basic_sync tcfa_bstats_hw; struct gnet_stats_queue tcfa_qstats; struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; - struct gnet_stats_basic_cpu __percpu *cpu_bstats; - struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw; + struct gnet_stats_basic_sync __percpu *cpu_bstats; + struct gnet_stats_basic_sync __percpu *cpu_bstats_hw; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie __rcu *act_cookie; struct tcf_chain __rcu *goto_chain; @@ -44,6 +45,7 @@ struct tc_action { u8 hw_stats; u8 used_hw_stats; bool used_hw_stats_valid; + u32 in_hw_count; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt @@ -88,6 +90,16 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) dtm->expires = jiffies_to_clock_t(stm->expires); } +static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) +{ + if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY)) + return FLOW_ACTION_HW_STATS_DONT_CARE; + else if (!hw_stats) + return FLOW_ACTION_HW_STATS_DISABLED; + + return hw_stats; +} + #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 @@ -121,6 +133,8 @@ struct tc_action_ops { struct psample_group * (*get_psample_group)(const struct tc_action *a, tc_action_priv_destructor *destructor); + int (*offload_act_setup)(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind); }; struct tc_action_net { @@ -189,7 +203,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action *actions[], int init_res[], size_t *attr_size, - u32 flags, struct netlink_ext_ack *extack); + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack); struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, bool rtnl_held, struct netlink_ext_ack *extack); @@ -206,7 +220,7 @@ static inline void tcf_action_update_bstats(struct tc_action *a, struct sk_buff *skb) { if (likely(a->cpu_bstats)) { - bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb); + bstats_update(this_cpu_ptr(a->cpu_bstats), skb); return; } spin_lock(&a->tcfa_lock); @@ -240,6 +254,9 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, u64 drops, bool hw); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); +int tcf_action_update_hw_stats(struct tc_action *action); +int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, + void *cb_priv, bool add); int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, struct tcf_chain **handle, struct netlink_ext_ack *newchain); @@ -251,6 +268,14 @@ DECLARE_STATIC_KEY_FALSE(tcf_frag_xmit_count); #endif int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); + +#else /* !CONFIG_NET_CLS_ACT */ + +static inline int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, + void *cb_priv, bool add) { + return 0; +} + #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 78ea3e332688..e7ce719838b5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -6,6 +6,8 @@ #define RTR_SOLICITATION_INTERVAL (4*HZ) #define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ +#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ + #define TEMP_VALID_LIFETIME (7*86400) #define TEMP_PREFERRED_LIFETIME (86400) #define REGEN_MAX_RETRY (3) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 7d142e8a0550..a7ef624ed726 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -20,13 +20,12 @@ struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_BITS 8 extern unsigned int unix_tot_inflight; -extern spinlock_t unix_table_lock; +extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { refcount_t refcnt; int len; - unsigned int hash; struct sockaddr_un name[]; }; diff --git a/include/net/amt.h b/include/net/amt.h new file mode 100644 index 000000000000..7a4db8b903ee --- /dev/null +++ b/include/net/amt.h @@ -0,0 +1,385 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) 2021 Taehee Yoo <ap420073@gmail.com> + */ +#ifndef _NET_AMT_H_ +#define _NET_AMT_H_ + +#include <linux/siphash.h> +#include <linux/jhash.h> + +enum amt_msg_type { + AMT_MSG_DISCOVERY = 1, + AMT_MSG_ADVERTISEMENT, + AMT_MSG_REQUEST, + AMT_MSG_MEMBERSHIP_QUERY, + AMT_MSG_MEMBERSHIP_UPDATE, + AMT_MSG_MULTICAST_DATA, + AMT_MSG_TEARDOWM, + __AMT_MSG_MAX, +}; + +#define AMT_MSG_MAX (__AMT_MSG_MAX - 1) + +enum amt_ops { + /* A*B */ + AMT_OPS_INT, + /* A+B */ + AMT_OPS_UNI, + /* A-B */ + AMT_OPS_SUB, + /* B-A */ + AMT_OPS_SUB_REV, + __AMT_OPS_MAX, +}; + +#define AMT_OPS_MAX (__AMT_OPS_MAX - 1) + +enum amt_filter { + AMT_FILTER_FWD, + AMT_FILTER_D_FWD, + AMT_FILTER_FWD_NEW, + AMT_FILTER_D_FWD_NEW, + AMT_FILTER_ALL, + AMT_FILTER_NONE_NEW, + AMT_FILTER_BOTH, + AMT_FILTER_BOTH_NEW, + __AMT_FILTER_MAX, +}; + +#define AMT_FILTER_MAX (__AMT_FILTER_MAX - 1) + +enum amt_act { + AMT_ACT_GMI, + AMT_ACT_GMI_ZERO, + AMT_ACT_GT, + AMT_ACT_STATUS_FWD_NEW, + AMT_ACT_STATUS_D_FWD_NEW, + AMT_ACT_STATUS_NONE_NEW, + __AMT_ACT_MAX, +}; + +#define AMT_ACT_MAX (__AMT_ACT_MAX - 1) + +enum amt_status { + AMT_STATUS_INIT, + AMT_STATUS_SENT_DISCOVERY, + AMT_STATUS_RECEIVED_DISCOVERY, + AMT_STATUS_SENT_ADVERTISEMENT, + AMT_STATUS_RECEIVED_ADVERTISEMENT, + AMT_STATUS_SENT_REQUEST, + AMT_STATUS_RECEIVED_REQUEST, + AMT_STATUS_SENT_QUERY, + AMT_STATUS_RECEIVED_QUERY, + AMT_STATUS_SENT_UPDATE, + AMT_STATUS_RECEIVED_UPDATE, + __AMT_STATUS_MAX, +}; + +#define AMT_STATUS_MAX (__AMT_STATUS_MAX - 1) + +struct amt_header { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 type:4, + version:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 version:4, + type:4; +#else +#error "Please fix <asm/byteorder.h>" +#endif +} __packed; + +struct amt_header_discovery { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u32 type:4, + version:4, + reserved:24; +#elif defined(__BIG_ENDIAN_BITFIELD) + u32 version:4, + type:4, + reserved:24; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __be32 nonce; +} __packed; + +struct amt_header_advertisement { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u32 type:4, + version:4, + reserved:24; +#elif defined(__BIG_ENDIAN_BITFIELD) + u32 version:4, + type:4, + reserved:24; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __be32 nonce; + __be32 ip4; +} __packed; + +struct amt_header_request { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u32 type:4, + version:4, + reserved1:7, + p:1, + reserved2:16; +#elif defined(__BIG_ENDIAN_BITFIELD) + u32 version:4, + type:4, + p:1, + reserved1:7, + reserved2:16; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __be32 nonce; +} __packed; + +struct amt_header_membership_query { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u64 type:4, + version:4, + reserved:6, + l:1, + g:1, + response_mac:48; +#elif defined(__BIG_ENDIAN_BITFIELD) + u64 version:4, + type:4, + g:1, + l:1, + reserved:6, + response_mac:48; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __be32 nonce; +} __packed; + +struct amt_header_membership_update { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u64 type:4, + version:4, + reserved:8, + response_mac:48; +#elif defined(__BIG_ENDIAN_BITFIELD) + u64 version:4, + type:4, + reserved:8, + response_mac:48; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __be32 nonce; +} __packed; + +struct amt_header_mcast_data { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u16 type:4, + version:4, + reserved:8; +#elif defined(__BIG_ENDIAN_BITFIELD) + u16 version:4, + type:4, + reserved:8; +#else +#error "Please fix <asm/byteorder.h>" +#endif +} __packed; + +struct amt_headers { + union { + struct amt_header_discovery discovery; + struct amt_header_advertisement advertisement; + struct amt_header_request request; + struct amt_header_membership_query query; + struct amt_header_membership_update update; + struct amt_header_mcast_data data; + }; +} __packed; + +struct amt_gw_headers { + union { + struct amt_header_discovery discovery; + struct amt_header_request request; + struct amt_header_membership_update update; + }; +} __packed; + +struct amt_relay_headers { + union { + struct amt_header_advertisement advertisement; + struct amt_header_membership_query query; + struct amt_header_mcast_data data; + }; +} __packed; + +struct amt_skb_cb { + struct amt_tunnel_list *tunnel; +}; + +struct amt_tunnel_list { + struct list_head list; + /* Protect All resources under an amt_tunne_list */ + spinlock_t lock; + struct amt_dev *amt; + u32 nr_groups; + u32 nr_sources; + enum amt_status status; + struct delayed_work gc_wq; + __be16 source_port; + __be32 ip4; + __be32 nonce; + siphash_key_t key; + u64 mac:48, + reserved:16; + struct rcu_head rcu; + struct hlist_head groups[]; +}; + +union amt_addr { + __be32 ip4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr ip6; +#endif +}; + +/* RFC 3810 + * + * When the router is in EXCLUDE mode, the router state is represented + * by the notation EXCLUDE (X,Y), where X is called the "Requested List" + * and Y is called the "Exclude List". All sources, except those from + * the Exclude List, will be forwarded by the router + */ +enum amt_source_status { + AMT_SOURCE_STATUS_NONE, + /* Node of Requested List */ + AMT_SOURCE_STATUS_FWD, + /* Node of Exclude List */ + AMT_SOURCE_STATUS_D_FWD, +}; + +/* protected by gnode->lock */ +struct amt_source_node { + struct hlist_node node; + struct amt_group_node *gnode; + struct delayed_work source_timer; + union amt_addr source_addr; + enum amt_source_status status; +#define AMT_SOURCE_OLD 0 +#define AMT_SOURCE_NEW 1 + u8 flags; + struct rcu_head rcu; +}; + +/* Protected by amt_tunnel_list->lock */ +struct amt_group_node { + struct amt_dev *amt; + union amt_addr group_addr; + union amt_addr host_addr; + bool v6; + u8 filter_mode; + u32 nr_sources; + struct amt_tunnel_list *tunnel_list; + struct hlist_node node; + struct delayed_work group_timer; + struct rcu_head rcu; + struct hlist_head sources[]; +}; + +struct amt_dev { + struct net_device *dev; + struct net_device *stream_dev; + struct net *net; + /* Global lock for amt device */ + spinlock_t lock; + /* Used only in relay mode */ + struct list_head tunnel_list; + struct gro_cells gro_cells; + + /* Protected by RTNL */ + struct delayed_work discovery_wq; + /* Protected by RTNL */ + struct delayed_work req_wq; + /* Protected by RTNL */ + struct delayed_work secret_wq; + /* AMT status */ + enum amt_status status; + /* Generated key */ + siphash_key_t key; + struct socket __rcu *sock; + u32 max_groups; + u32 max_sources; + u32 hash_buckets; + u32 hash_seed; + /* Default 128 */ + u32 max_tunnels; + /* Default 128 */ + u32 nr_tunnels; + /* Gateway or Relay mode */ + u32 mode; + /* Default 2268 */ + __be16 relay_port; + /* Default 2268 */ + __be16 gw_port; + /* Outer local ip */ + __be32 local_ip; + /* Outer remote ip */ + __be32 remote_ip; + /* Outer discovery ip */ + __be32 discovery_ip; + /* Only used in gateway mode */ + __be32 nonce; + /* Gateway sent request and received query */ + bool ready4; + bool ready6; + u8 req_cnt; + u8 qi; + u64 qrv; + u64 qri; + /* Used only in gateway mode */ + u64 mac:48, + reserved:16; +}; + +#define AMT_TOS 0xc0 +#define AMT_IPHDR_OPTS 4 +#define AMT_IP6HDR_OPTS 8 +#define AMT_GC_INTERVAL (30 * 1000) +#define AMT_MAX_GROUP 32 +#define AMT_MAX_SOURCE 128 +#define AMT_HSIZE_SHIFT 8 +#define AMT_HSIZE (1 << AMT_HSIZE_SHIFT) + +#define AMT_DISCOVERY_TIMEOUT 5000 +#define AMT_INIT_REQ_TIMEOUT 1 +#define AMT_INIT_QUERY_INTERVAL 125 +#define AMT_MAX_REQ_TIMEOUT 120 +#define AMT_MAX_REQ_COUNT 3 +#define AMT_SECRET_TIMEOUT 60000 +#define IANA_AMT_UDP_PORT 2268 +#define AMT_MAX_TUNNELS 128 +#define AMT_MAX_REQS 128 +#define AMT_GW_HLEN (sizeof(struct iphdr) + \ + sizeof(struct udphdr) + \ + sizeof(struct amt_gw_headers)) +#define AMT_RELAY_HLEN (sizeof(struct iphdr) + \ + sizeof(struct udphdr) + \ + sizeof(struct amt_relay_headers)) + +static inline bool netif_is_amt(const struct net_device *dev) +{ + return dev->rtnl_link_ops && !strcmp(dev->rtnl_link_ops->kind, "amt"); +} + +static inline u64 amt_gmi(const struct amt_dev *amt) +{ + return ((amt->qrv * amt->qi) + amt->qri) * 1000; +} + +#endif /* _NET_AMT_H_ */ diff --git a/include/net/arp.h b/include/net/arp.h index 4950191f6b2b..031374ac2f22 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -53,13 +53,7 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) rcu_read_lock_bh(); n = __ipv4_neigh_lookup_noref(dev, key); - if (n) { - unsigned long now = jiffies; - - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); - } + neigh_confirm(n); rcu_read_unlock_bh(); } diff --git a/include/net/ax25.h b/include/net/ax25.h index 8b7eb46ad72d..8221af1811df 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -229,13 +229,17 @@ struct ctl_table; typedef struct ax25_dev { struct ax25_dev *next; + struct net_device *dev; + netdevice_tracker dev_tracker; + struct net_device *forward; struct ctl_table_header *sysheader; int values[AX25_MAX_VALUES]; #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) ax25_dama_info dama; #endif + refcount_t refcount; } ax25_dev; typedef struct ax25_cb { @@ -290,6 +294,17 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } +static inline void ax25_dev_hold(ax25_dev *ax25_dev) +{ + refcount_inc(&ax25_dev->refcount); +} + +static inline void ax25_dev_put(ax25_dev *ax25_dev) +{ + if (refcount_dec_and_test(&ax25_dev->refcount)) { + kfree(ax25_dev); + } +} static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->dev = dev; @@ -304,7 +319,7 @@ extern spinlock_t ax25_list_lock; void ax25_cb_add(ax25_cb *); struct sock *ax25_find_listener(ax25_address *, int, struct net_device *, int); struct sock *ax25_get_socket(ax25_address *, ax25_address *, int); -ax25_cb *ax25_find_cb(ax25_address *, ax25_address *, ax25_digi *, +ax25_cb *ax25_find_cb(const ax25_address *, ax25_address *, ax25_digi *, struct net_device *); void ax25_send_to_raw(ax25_address *, struct sk_buff *, int); void ax25_destroy_socket(ax25_cb *); @@ -384,10 +399,11 @@ struct ax25_linkfail { void ax25_linkfail_register(struct ax25_linkfail *lf); void ax25_linkfail_release(struct ax25_linkfail *lf); -int __must_check ax25_listen_register(ax25_address *, struct net_device *); -void ax25_listen_release(ax25_address *, struct net_device *); +int __must_check ax25_listen_register(const ax25_address *, + struct net_device *); +void ax25_listen_release(const ax25_address *, struct net_device *); int(*ax25_protocol_function(unsigned int))(struct sk_buff *, ax25_cb *); -int ax25_listen_mine(ax25_address *, struct net_device *); +int ax25_listen_mine(const ax25_address *, struct net_device *); void ax25_link_failed(ax25_cb *, int); int ax25_protocol_is_registered(unsigned int); @@ -401,8 +417,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb); extern const struct header_ops ax25_header_ops; /* ax25_out.c */ -ax25_cb *ax25_send_frame(struct sk_buff *, int, ax25_address *, ax25_address *, - ax25_digi *, struct net_device *); +ax25_cb *ax25_send_frame(struct sk_buff *, int, const ax25_address *, + ax25_address *, ax25_digi *, struct net_device *); void ax25_output(ax25_cb *, int, struct sk_buff *); void ax25_kick(ax25_cb *); void ax25_transmit_buffer(ax25_cb *, struct sk_buff *, int); diff --git a/include/net/bareudp.h b/include/net/bareudp.h index dc65a0d71d9b..17610c8d6361 100644 --- a/include/net/bareudp.h +++ b/include/net/bareudp.h @@ -3,21 +3,10 @@ #ifndef __NET_BAREUDP_H #define __NET_BAREUDP_H +#include <linux/netdevice.h> #include <linux/types.h> -#include <linux/skbuff.h> #include <net/rtnetlink.h> -struct bareudp_conf { - __be16 ethertype; - __be16 port; - u16 sport_min; - bool multi_proto_mode; -}; - -struct net_device *bareudp_dev_create(struct net *net, const char *name, - u8 name_assign_type, - struct bareudp_conf *info); - static inline bool netif_is_bareudp(const struct net_device *dev) { return dev->rtnl_link_ops && diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 9125effbf448..4b3d0b16c185 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -153,6 +153,30 @@ struct bt_voice { #define BT_SCM_PKT_STATUS 0x03 +#define BT_CODEC 19 + +struct bt_codec_caps { + __u8 len; + __u8 data[]; +} __packed; + +struct bt_codec { + __u8 id; + __u16 cid; + __u16 vid; + __u8 data_path; + __u8 num_caps; +} __packed; + +struct bt_codecs { + __u8 num_codecs; + struct bt_codec codecs[]; +} __packed; + +#define BT_CODEC_CVSD 0x02 +#define BT_CODEC_TRANSPARENT 0x03 +#define BT_CODEC_MSBC 0x05 + __printf(1, 2) void bt_info(const char *fmt, ...); __printf(1, 2) @@ -356,6 +380,7 @@ typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status, #define HCI_REQ_SKB BIT(1) struct hci_ctrl { + struct sock *sk; u16 opcode; u8 req_flags; u8 req_event; @@ -365,6 +390,11 @@ struct hci_ctrl { }; }; +struct mgmt_ctrl { + struct hci_dev *hdev; + u16 opcode; +}; + struct bt_skb_cb { u8 pkt_type; u8 force_active; @@ -374,6 +404,7 @@ struct bt_skb_cb { struct l2cap_ctrl l2cap; struct sco_ctrl sco; struct hci_ctrl hci; + struct mgmt_ctrl mgmt; }; }; #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb)) @@ -381,6 +412,8 @@ struct bt_skb_cb { #define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type #define hci_skb_expect(skb) bt_cb((skb))->expect #define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode +#define hci_skb_event(skb) bt_cb((skb))->hci.req_event +#define hci_skb_sk(skb) bt_cb((skb))->hci.sk static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how) { @@ -420,6 +453,72 @@ out: return NULL; } +/* Shall not be called with lock_sock held */ +static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk, + struct msghdr *msg, + size_t len, size_t mtu, + size_t headroom, size_t tailroom) +{ + struct sk_buff *skb; + size_t size = min_t(size_t, len, mtu); + int err; + + skb = bt_skb_send_alloc(sk, size + headroom + tailroom, + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + return ERR_PTR(err); + + skb_reserve(skb, headroom); + skb_tailroom_reserve(skb, mtu, tailroom); + + if (!copy_from_iter_full(skb_put(skb, size), size, &msg->msg_iter)) { + kfree_skb(skb); + return ERR_PTR(-EFAULT); + } + + skb->priority = sk->sk_priority; + + return skb; +} + +/* Similar to bt_skb_sendmsg but can split the msg into multiple fragments + * accourding to the MTU. + */ +static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, + struct msghdr *msg, + size_t len, size_t mtu, + size_t headroom, size_t tailroom) +{ + struct sk_buff *skb, **frag; + + skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); + if (IS_ERR_OR_NULL(skb)) + return skb; + + len -= skb->len; + if (!len) + return skb; + + /* Add remaining data over MTU as continuation fragments */ + frag = &skb_shinfo(skb)->frag_list; + while (len) { + struct sk_buff *tmp; + + tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); + if (IS_ERR(tmp)) { + kfree_skb(skb); + return tmp; + } + + len -= tmp->len; + + *frag = tmp; + frag = &(*frag)->next; + } + + return skb; +} + int bt_to_errno(u16 code); void hci_sock_set_flag(struct sock *sk, int nr); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index b80415011dcd..35c073d44ec5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -246,6 +246,15 @@ enum { * HCI after resume. */ HCI_QUIRK_NO_SUSPEND_NOTIFIER, + + /* + * When this quirk is set, LE tx power is not queried on startup + * and the min/max tx power values default to HCI_TX_POWER_INVALID. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, }; /* HCI device flags */ @@ -330,6 +339,9 @@ enum { HCI_ENABLE_LL_PRIVACY, HCI_CMD_PENDING, HCI_FORCE_NO_MITM, + HCI_QUALITY_REPORT, + HCI_OFFLOAD_CODECS_ENABLED, + HCI_LE_SIMULTANEOUS_ROLES, __HCI_NUM_FLAGS, }; @@ -564,6 +576,7 @@ enum { #define HCI_ERROR_INVALID_LL_PARAMS 0x1e #define HCI_ERROR_UNSPECIFIED 0x1f #define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c +#define HCI_ERROR_CANCELLED_BY_HOST 0x44 /* Flow control modes */ #define HCI_FLOW_CTL_MODE_PACKET_BASED 0x00 @@ -871,6 +884,40 @@ struct hci_cp_logical_link_cancel { __u8 flow_spec_id; } __packed; +#define HCI_OP_ENHANCED_SETUP_SYNC_CONN 0x043d +struct hci_coding_format { + __u8 id; + __le16 cid; + __le16 vid; +} __packed; + +struct hci_cp_enhanced_setup_sync_conn { + __le16 handle; + __le32 tx_bandwidth; + __le32 rx_bandwidth; + struct hci_coding_format tx_coding_format; + struct hci_coding_format rx_coding_format; + __le16 tx_codec_frame_size; + __le16 rx_codec_frame_size; + __le32 in_bandwidth; + __le32 out_bandwidth; + struct hci_coding_format in_coding_format; + struct hci_coding_format out_coding_format; + __le16 in_coded_data_size; + __le16 out_coded_data_size; + __u8 in_pcm_data_format; + __u8 out_pcm_data_format; + __u8 in_pcm_sample_payload_msb_pos; + __u8 out_pcm_sample_payload_msb_pos; + __u8 in_data_path; + __u8 out_data_path; + __u8 in_transport_unit_size; + __u8 out_transport_unit_size; + __le16 max_latency; + __le16 pkt_type; + __u8 retrans_effort; +} __packed; + struct hci_rp_logical_link_cancel { __u8 status; __u8 phy_handle; @@ -1010,8 +1057,8 @@ struct hci_cp_read_stored_link_key { } __packed; struct hci_rp_read_stored_link_key { __u8 status; - __u8 max_keys; - __u8 num_keys; + __le16 max_keys; + __le16 num_keys; } __packed; #define HCI_OP_DELETE_STORED_LINK_KEY 0x0c12 @@ -1021,7 +1068,7 @@ struct hci_cp_delete_stored_link_key { } __packed; struct hci_rp_delete_stored_link_key { __u8 status; - __u8 num_keys; + __le16 num_keys; } __packed; #define HCI_MAX_NAME_LENGTH 248 @@ -1250,6 +1297,14 @@ struct hci_rp_read_local_oob_ext_data { __u8 rand256[16]; } __packed; +#define HCI_CONFIGURE_DATA_PATH 0x0c83 +struct hci_op_configure_data_path { + __u8 direction; + __u8 data_path_id; + __u8 vnd_len; + __u8 vnd_data[]; +} __packed; + #define HCI_OP_READ_LOCAL_VERSION 0x1001 struct hci_rp_read_local_version { __u8 status; @@ -1307,6 +1362,28 @@ struct hci_rp_read_data_block_size { } __packed; #define HCI_OP_READ_LOCAL_CODECS 0x100b +struct hci_std_codecs { + __u8 num; + __u8 codec[]; +} __packed; + +struct hci_vnd_codec { + /* company id */ + __le16 cid; + /* vendor codec id */ + __le16 vid; +} __packed; + +struct hci_vnd_codecs { + __u8 num; + struct hci_vnd_codec codec[]; +} __packed; + +struct hci_rp_read_local_supported_codecs { + __u8 status; + struct hci_std_codecs std_codecs; + struct hci_vnd_codecs vnd_codecs; +} __packed; #define HCI_OP_READ_LOCAL_PAIRING_OPTS 0x100c struct hci_rp_read_local_pairing_opts { @@ -1315,6 +1392,54 @@ struct hci_rp_read_local_pairing_opts { __u8 max_key_size; } __packed; +#define HCI_OP_READ_LOCAL_CODECS_V2 0x100d +struct hci_std_codec_v2 { + __u8 id; + __u8 transport; +} __packed; + +struct hci_std_codecs_v2 { + __u8 num; + struct hci_std_codec_v2 codec[]; +} __packed; + +struct hci_vnd_codec_v2 { + __u8 id; + __le16 cid; + __le16 vid; + __u8 transport; +} __packed; + +struct hci_vnd_codecs_v2 { + __u8 num; + struct hci_vnd_codec_v2 codec[]; +} __packed; + +struct hci_rp_read_local_supported_codecs_v2 { + __u8 status; + struct hci_std_codecs_v2 std_codecs; + struct hci_vnd_codecs_v2 vendor_codecs; +} __packed; + +#define HCI_OP_READ_LOCAL_CODEC_CAPS 0x100e +struct hci_op_read_local_codec_caps { + __u8 id; + __le16 cid; + __le16 vid; + __u8 transport; + __u8 direction; +} __packed; + +struct hci_codec_caps { + __u8 len; + __u8 data[]; +} __packed; + +struct hci_rp_read_local_codec_caps { + __u8 status; + __u8 num_caps; +} __packed; + #define HCI_OP_READ_PAGE_SCAN_ACTIVITY 0x0c1b struct hci_rp_read_page_scan_activity { __u8 status; @@ -1816,6 +1941,16 @@ struct hci_rp_le_read_transmit_power { __s8 max_le_tx_power; } __packed; +#define HCI_NETWORK_PRIVACY 0x00 +#define HCI_DEVICE_PRIVACY 0x01 + +#define HCI_OP_LE_SET_PRIVACY_MODE 0x204e +struct hci_cp_le_set_privacy_mode { + __u8 bdaddr_type; + bdaddr_t bdaddr; + __u8 mode; +} __packed; + #define HCI_OP_LE_READ_BUFFER_SIZE_V2 0x2060 struct hci_rp_le_read_buffer_size_v2 { __u8 status; @@ -1897,6 +2032,10 @@ struct hci_cp_le_reject_cis { } __packed; /* ---- HCI Events ---- */ +struct hci_ev_status { + __u8 status; +} __packed; + #define HCI_EV_INQUIRY_COMPLETE 0x01 #define HCI_EV_INQUIRY_RESULT 0x02 @@ -1909,6 +2048,11 @@ struct inquiry_info { __le16 clock_offset; } __packed; +struct hci_ev_inquiry_result { + __u8 num; + struct inquiry_info info[]; +}; + #define HCI_EV_CONN_COMPLETE 0x03 struct hci_ev_conn_complete { __u8 status; @@ -2020,7 +2164,7 @@ struct hci_comp_pkts_info { } __packed; struct hci_ev_num_comp_pkts { - __u8 num_hndl; + __u8 num; struct hci_comp_pkts_info handles[]; } __packed; @@ -2070,7 +2214,7 @@ struct hci_ev_pscan_rep_mode { } __packed; #define HCI_EV_INQUIRY_RESULT_WITH_RSSI 0x22 -struct inquiry_info_with_rssi { +struct inquiry_info_rssi { bdaddr_t bdaddr; __u8 pscan_rep_mode; __u8 pscan_period_mode; @@ -2078,7 +2222,7 @@ struct inquiry_info_with_rssi { __le16 clock_offset; __s8 rssi; } __packed; -struct inquiry_info_with_rssi_and_pscan_mode { +struct inquiry_info_rssi_pscan { bdaddr_t bdaddr; __u8 pscan_rep_mode; __u8 pscan_period_mode; @@ -2087,6 +2231,10 @@ struct inquiry_info_with_rssi_and_pscan_mode { __le16 clock_offset; __s8 rssi; } __packed; +struct hci_ev_inquiry_result_rssi { + __u8 num; + __u8 data[]; +} __packed; #define HCI_EV_REMOTE_EXT_FEATURES 0x23 struct hci_ev_remote_ext_features { @@ -2141,6 +2289,11 @@ struct extended_inquiry_info { __u8 data[240]; } __packed; +struct hci_ev_ext_inquiry_result { + __u8 num; + struct extended_inquiry_info info[]; +} __packed; + #define HCI_EV_KEY_REFRESH_COMPLETE 0x30 struct hci_ev_key_refresh_complete { __u8 status; @@ -2308,13 +2461,18 @@ struct hci_ev_le_conn_complete { #define HCI_EV_LE_ADVERTISING_REPORT 0x02 struct hci_ev_le_advertising_info { - __u8 evt_type; + __u8 type; __u8 bdaddr_type; bdaddr_t bdaddr; __u8 length; __u8 data[]; } __packed; +struct hci_ev_le_advertising_report { + __u8 num; + struct hci_ev_le_advertising_info info[]; +} __packed; + #define HCI_EV_LE_CONN_UPDATE_COMPLETE 0x03 struct hci_ev_le_conn_update_complete { __u8 status; @@ -2358,7 +2516,7 @@ struct hci_ev_le_data_len_change { #define HCI_EV_LE_DIRECT_ADV_REPORT 0x0B struct hci_ev_le_direct_adv_info { - __u8 evt_type; + __u8 type; __u8 bdaddr_type; bdaddr_t bdaddr; __u8 direct_addr_type; @@ -2366,6 +2524,11 @@ struct hci_ev_le_direct_adv_info { __s8 rssi; } __packed; +struct hci_ev_le_direct_adv_report { + __u8 num; + struct hci_ev_le_direct_adv_info info[]; +} __packed; + #define HCI_EV_LE_PHY_UPDATE_COMPLETE 0x0c struct hci_ev_le_phy_update_complete { __u8 status; @@ -2375,8 +2538,8 @@ struct hci_ev_le_phy_update_complete { } __packed; #define HCI_EV_LE_EXT_ADV_REPORT 0x0d -struct hci_ev_le_ext_adv_report { - __le16 evt_type; +struct hci_ev_le_ext_adv_info { + __le16 type; __u8 bdaddr_type; bdaddr_t bdaddr; __u8 primary_phy; @@ -2384,11 +2547,16 @@ struct hci_ev_le_ext_adv_report { __u8 sid; __u8 tx_power; __s8 rssi; - __le16 interval; - __u8 direct_addr_type; + __le16 interval; + __u8 direct_addr_type; bdaddr_t direct_addr; - __u8 length; - __u8 data[]; + __u8 length; + __u8 data[]; +} __packed; + +struct hci_ev_le_ext_adv_report { + __u8 num; + struct hci_ev_le_ext_adv_info info[]; } __packed; #define HCI_EV_LE_ENHANCED_CONN_COMPLETE 0x0a @@ -2551,6 +2719,9 @@ static inline struct hci_sco_hdr *hci_sco_hdr(const struct sk_buff *skb) #define hci_iso_data_len(h) ((h) & 0x3fff) #define hci_iso_data_flags(h) ((h) >> 14) +/* codec transport types */ +#define HCI_TRANSPORT_SCO_ESCO 0x01 + /* le24 support */ static inline void hci_cpu_to_le24(__u32 val, __u8 dst[3]) { diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a7360c8c72f8..586f69d084a2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -30,6 +30,7 @@ #include <linux/rculist.h> #include <net/bluetooth/hci.h> +#include <net/bluetooth/hci_sync.h> #include <net/bluetooth/hci_sock.h> /* HCI priority */ @@ -87,6 +88,7 @@ struct discovery_state { u8 (*uuids)[16]; unsigned long scan_start; unsigned long scan_duration; + unsigned long name_resolve_timeout; }; #define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ @@ -131,30 +133,41 @@ struct bdaddr_list { u8 bdaddr_type; }; -struct bdaddr_list_with_irk { +struct codec_list { struct list_head list; - bdaddr_t bdaddr; - u8 bdaddr_type; - u8 peer_irk[16]; - u8 local_irk[16]; + u8 id; + __u16 cid; + __u16 vid; + u8 transport; + u8 num_caps; + u32 len; + struct hci_codec_caps caps[]; }; -struct bdaddr_list_with_flags { +struct bdaddr_list_with_irk { struct list_head list; bdaddr_t bdaddr; u8 bdaddr_type; - u32 current_flags; + u8 peer_irk[16]; + u8 local_irk[16]; }; enum hci_conn_flags { HCI_CONN_FLAG_REMOTE_WAKEUP, - HCI_CONN_FLAG_MAX -}; + HCI_CONN_FLAG_DEVICE_PRIVACY, -#define hci_conn_test_flag(nr, flags) ((flags) & (1U << nr)) + __HCI_CONN_NUM_FLAGS, +}; /* Make sure number of flags doesn't exceed sizeof(current_flags) */ -static_assert(HCI_CONN_FLAG_MAX < 32); +static_assert(__HCI_CONN_NUM_FLAGS < 32); + +struct bdaddr_list_with_flags { + struct list_head list; + bdaddr_t bdaddr; + u8 bdaddr_type; + DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS); +}; struct bt_uuid { struct list_head list; @@ -340,8 +353,8 @@ struct hci_dev { __u16 lmp_subver; __u16 voice_setting; __u8 num_iac; - __u8 stored_max_keys; - __u8 stored_num_keys; + __u16 stored_max_keys; + __u16 stored_num_keys; __u8 io_capability; __s8 inq_tx_power; __u8 err_data_reporting; @@ -464,6 +477,10 @@ struct hci_dev { struct work_struct power_on; struct delayed_work power_off; struct work_struct error_reset; + struct work_struct cmd_sync_work; + struct list_head cmd_sync_work_list; + struct mutex cmd_sync_work_lock; + struct work_struct cmd_sync_cancel_work; __u16 discov_timeout; struct delayed_work discov_off; @@ -478,10 +495,7 @@ struct hci_dev { struct work_struct tx_work; struct work_struct discov_update; - struct work_struct bg_scan_update; struct work_struct scan_update; - struct work_struct connectable_update; - struct work_struct discoverable_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; @@ -508,7 +522,6 @@ struct hci_dev { bool advertising_paused; struct notifier_block suspend_notifier; - struct work_struct suspend_prepare; enum suspended_state suspend_state_next; enum suspended_state suspend_state; bool scanning_paused; @@ -517,9 +530,6 @@ struct hci_dev { bdaddr_t wake_addr; u8 wake_addr_type; - wait_queue_head_t suspend_wait_q; - DECLARE_BITMAP(suspend_tasks, __SUSPEND_NUM_TASKS); - struct hci_conn_hash conn_hash; struct list_head mgmt_pending; @@ -536,6 +546,7 @@ struct hci_dev { struct list_head pend_le_conns; struct list_head pend_le_reports; struct list_head blocked_keys; + struct list_head local_codecs; struct hci_dev_stats stat; @@ -550,6 +561,7 @@ struct hci_dev { struct rfkill *rfkill; DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS); + DECLARE_BITMAP(conn_flags, __HCI_CONN_NUM_FLAGS); __s8 adv_tx_power; __u8 adv_data[HCI_MAX_EXT_AD_LENGTH]; @@ -591,6 +603,7 @@ struct hci_dev { #if IS_ENABLED(CONFIG_BT_AOSPEXT) bool aosp_capable; + bool aosp_quality_report; #endif int (*open)(struct hci_dev *hdev); @@ -605,7 +618,12 @@ struct hci_dev { int (*set_diag)(struct hci_dev *hdev, bool enable); int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr); void (*cmd_timeout)(struct hci_dev *hdev); - bool (*prevent_wake)(struct hci_dev *hdev); + bool (*wakeup)(struct hci_dev *hdev); + int (*set_quality_report)(struct hci_dev *hdev, bool enable); + int (*get_data_path_id)(struct hci_dev *hdev, __u8 *data_path); + int (*get_codec_config_data)(struct hci_dev *hdev, __u8 type, + struct bt_codec *codec, __u8 *vnd_len, + __u8 **vnd_data); }; #define HCI_PHY_HANDLE(handle) (handle & 0xff) @@ -699,6 +717,7 @@ struct hci_conn { struct amp_mgr *amp_mgr; struct hci_conn *link; + struct bt_codec codec; void (*connect_cfm_cb) (struct hci_conn *conn, u8 status); void (*security_cfm_cb) (struct hci_conn *conn, u8 status); @@ -738,7 +757,8 @@ struct hci_conn_params { struct hci_conn *conn; bool explicit_connect; - u32 current_flags; + DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS); + u8 privacy_mode; }; extern struct list_head hci_dev_list; @@ -760,8 +780,15 @@ extern struct mutex hci_cb_list_lock; hci_dev_clear_flag(hdev, HCI_LE_ADV); \ hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\ hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); \ + hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \ } while (0) +#define hci_dev_le_state_simultaneous(hdev) \ + (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \ + (hdev->le_states[4] & 0x08) && /* Central */ \ + (hdev->le_states[4] & 0x40) && /* Peripheral */ \ + (hdev->le_states[3] & 0x10)) /* Simultaneous */ + /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); int l2cap_disconn_ind(struct hci_conn *hcon); @@ -1099,13 +1126,13 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u16 conn_timeout, enum conn_reasons conn_reason); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, - u8 dst_type, u8 sec_level, u16 conn_timeout, - u8 role, bdaddr_t *direct_rpa); + u8 dst_type, bool dst_resolved, u8 sec_level, + u16 conn_timeout, u8 role); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, enum conn_reasons conn_reason); struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, - __u16 setting); + __u16 setting, struct bt_codec *codec); int hci_conn_check_link_mode(struct hci_conn *conn); int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level); int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, @@ -1360,6 +1387,8 @@ int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, u16 scan_rsp_len, u8 *scan_rsp_data); int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance); void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired); +u32 hci_adv_instance_flags(struct hci_dev *hdev, u8 instance); +bool hci_adv_instance_is_scannable(struct hci_dev *hdev, u8 instance); void hci_adv_monitors_clear(struct hci_dev *hdev); void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); @@ -1439,8 +1468,17 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) +#define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) + /* Use LL Privacy based address resolution if supported */ -#define use_ll_privacy(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) +#define use_ll_privacy(dev) (ll_privacy_capable(dev) && \ + hci_dev_test_flag(dev, HCI_ENABLE_LL_PRIVACY)) + +#define privacy_mode_capable(dev) (use_ll_privacy(dev) && \ + (hdev->commands[39] & 0x04)) + +/* Use enhanced synchronous connection if command is supported */ +#define enhanced_sco_capable(dev) ((dev)->commands[29] & 0x08) /* Use ext scanning if set ext scan param and ext scan enable is supported */ #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \ @@ -1609,43 +1647,6 @@ static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, mutex_unlock(&hci_cb_list_lock); } -static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type, - size_t *data_len) -{ - size_t parsed = 0; - - if (eir_len < 2) - return NULL; - - while (parsed < eir_len - 1) { - u8 field_len = eir[0]; - - if (field_len == 0) - break; - - parsed += field_len + 1; - - if (parsed > eir_len) - break; - - if (eir[1] != type) { - eir += field_len + 1; - continue; - } - - /* Zero length data */ - if (field_len == 1) - return NULL; - - if (data_len) - *data_len = field_len - 1; - - return &eir[2]; - } - - return NULL; -} - static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) { if (addr_type != ADDR_LE_DEV_RANDOM) @@ -1702,10 +1703,6 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, int hci_register_cb(struct hci_cb *hcb); int hci_unregister_cb(struct hci_cb *hcb); -struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u32 timeout); -struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u8 event, u32 timeout); int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); @@ -1716,9 +1713,6 @@ void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode); -struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u32 timeout); - u32 hci_conn_get_phy(struct hci_conn *conn); /* ----- HCI Sockets ----- */ @@ -1777,6 +1771,8 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_LE_FAST_ADV_INT_MIN 0x00A0 /* 100 msec */ #define DISCOV_LE_FAST_ADV_INT_MAX 0x00F0 /* 150 msec */ +#define NAME_RESOLVE_DURATION msecs_to_jiffies(10240) /* 10.24 sec */ + void mgmt_fill_version_info(void *ver); int mgmt_new_settings(struct hci_dev *hdev); void mgmt_index_added(struct hci_dev *hdev); @@ -1818,7 +1814,6 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 entered); void mgmt_auth_failed(struct hci_conn *conn, u8 status); void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status); -void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status); void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, u8 status); void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status); @@ -1843,8 +1838,6 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, u16 max_interval, u16 latency, u16 timeout); void mgmt_smp_complete(struct hci_conn *conn, bool complete); bool mgmt_get_connectable(struct hci_dev *hdev); -void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status); -void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status); u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); @@ -1867,4 +1860,9 @@ void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, #define SCO_AIRMODE_CVSD 0x0000 #define SCO_AIRMODE_TRANSP 0x0003 +#define LOCAL_CODEC_ACL_MASK BIT(0) +#define LOCAL_CODEC_SCO_MASK BIT(1) + +#define TRANSPORT_TYPE_MAX 0x04 + #endif /* __HCI_CORE_H */ diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h new file mode 100644 index 000000000000..2492e3b46a8f --- /dev/null +++ b/include/net/bluetooth/hci_sync.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * BlueZ - Bluetooth protocol stack for Linux + * + * Copyright (C) 2021 Intel Corporation + */ + +typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data); +typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data, + int err); + +struct hci_cmd_sync_work_entry { + struct list_head list; + hci_cmd_sync_work_func_t func; + void *data; + hci_cmd_sync_work_destroy_t destroy; +}; + +/* Function with sync suffix shall not be called with hdev->lock held as they + * wait the command to complete and in the meantime an event could be received + * which could attempt to acquire hdev->lock causing a deadlock. + */ +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); +struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); +struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout); +struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout, + struct sock *sk); +int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); +int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout, + struct sock *sk); + +void hci_cmd_sync_init(struct hci_dev *hdev); +void hci_cmd_sync_clear(struct hci_dev *hdev); +void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); +void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err); + +int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); + +int hci_update_eir_sync(struct hci_dev *hdev); +int hci_update_class_sync(struct hci_dev *hdev); + +int hci_update_eir_sync(struct hci_dev *hdev); +int hci_update_class_sync(struct hci_dev *hdev); +int hci_update_name_sync(struct hci_dev *hdev); +int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode); + +int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy, + bool rpa, u8 *own_addr_type); + +int hci_update_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance); +int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance); +int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, + bool force); + +int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance); +int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance); +int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance); +int hci_enable_advertising_sync(struct hci_dev *hdev); +int hci_enable_advertising(struct hci_dev *hdev); + +int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, + u8 instance, bool force); +int hci_disable_advertising_sync(struct hci_dev *hdev); + +int hci_update_passive_scan_sync(struct hci_dev *hdev); +int hci_update_passive_scan(struct hci_dev *hdev); +int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle); +int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type); +int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val); +int hci_read_clock_sync(struct hci_dev *hdev, struct hci_cp_read_clock *cp); + +int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable); +int hci_update_scan_sync(struct hci_dev *hdev); + +int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul); +int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance, + struct sock *sk); +struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool ext, + struct sock *sk); + +int hci_reset_sync(struct hci_dev *hdev); +int hci_dev_open_sync(struct hci_dev *hdev); +int hci_dev_close_sync(struct hci_dev *hdev); + +int hci_powered_update_sync(struct hci_dev *hdev); +int hci_set_powered_sync(struct hci_dev *hdev, u8 val); + +int hci_update_discoverable_sync(struct hci_dev *hdev); +int hci_update_discoverable(struct hci_dev *hdev); + +int hci_update_connectable_sync(struct hci_dev *hdev); + +int hci_start_discovery_sync(struct hci_dev *hdev); +int hci_stop_discovery_sync(struct hci_dev *hdev); + +int hci_suspend_sync(struct hci_dev *hdev); +int hci_resume_sync(struct hci_dev *hdev); + +struct hci_conn; + +int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 23a0524061b7..107b25deae68 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -936,10 +936,11 @@ struct mgmt_ev_auth_failed { __u8 status; } __packed; -#define MGMT_DEV_FOUND_CONFIRM_NAME 0x01 -#define MGMT_DEV_FOUND_LEGACY_PAIRING 0x02 -#define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04 -#define MGMT_DEV_FOUND_INITIATED_CONN 0x08 +#define MGMT_DEV_FOUND_CONFIRM_NAME 0x01 +#define MGMT_DEV_FOUND_LEGACY_PAIRING 0x02 +#define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04 +#define MGMT_DEV_FOUND_INITIATED_CONN 0x08 +#define MGMT_DEV_FOUND_NAME_REQUEST_FAILED 0x10 #define MGMT_EV_DEVICE_FOUND 0x0012 struct mgmt_ev_device_found { diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index f6af76c87a6c..191c36afa1f4 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -126,7 +126,7 @@ struct tlb_slave_info { struct alb_bond_info { struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ u32 unbalanced_load; - int tx_rebalance_counter; + atomic_t tx_rebalance_counter; int lp_counter; /* -------- rlb parameters -------- */ int rlb_enabled; diff --git a/include/net/bond_options.h b/include/net/bond_options.h index e64833a674eb..dd75c071f67e 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -65,6 +65,7 @@ enum { BOND_OPT_NUM_PEER_NOTIF_ALIAS, BOND_OPT_PEER_NOTIF_DELAY, BOND_OPT_LACP_ACTIVE, + BOND_OPT_MISSED_MAX, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 15e083e18f75..83cfd2d70247 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -121,6 +121,7 @@ struct bond_params { int xmit_policy; int miimon; u8 num_peer_notif; + u8 missed_max; int arp_interval; int arp_validate; int arp_all_targets; @@ -345,7 +346,7 @@ static inline bool bond_uses_primary(struct bonding *bond) static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) { - struct slave *slave = rcu_dereference(bond->curr_active_slave); + struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave); return bond_uses_primary(bond) && slave ? slave->dev : NULL; } diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 40296ed976a9..c4898fcbf923 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -130,6 +130,20 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL + if (unlikely(READ_ONCE(sk->sk_napi_id) != skb->napi_id)) + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); +#endif + sk_rx_queue_update(sk, skb); +} + +/* Variant of sk_mark_napi_id() for passive flow setup, + * as sk->sk_napi_id and sk->sk_rx_queue_mapping content + * needs to be set. + */ +static inline void sk_mark_napi_id_set(struct sock *sk, + const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif sk_rx_queue_set(sk, skb); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 27336fc70467..d19e48f9fdc6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -740,6 +740,22 @@ struct cfg80211_tid_config { }; /** + * struct cfg80211_fils_aad - FILS AAD data + * @macaddr: STA MAC address + * @kek: FILS KEK + * @kek_len: FILS KEK length + * @snonce: STA Nonce + * @anonce: AP Nonce + */ +struct cfg80211_fils_aad { + const u8 *macaddr; + const u8 *kek; + u8 kek_len; + const u8 *snonce; + const u8 *anonce; +}; + +/** * cfg80211_get_chandef_type - return old channel type from chandef * @chandef: the channel definition * @@ -1041,6 +1057,36 @@ struct cfg80211_crypto_settings { }; /** + * struct cfg80211_mbssid_config - AP settings for multi bssid + * + * @tx_wdev: pointer to the transmitted interface in the MBSSID set + * @index: index of this AP in the multi bssid group. + * @ema: set to true if the beacons should be sent out in EMA mode. + */ +struct cfg80211_mbssid_config { + struct wireless_dev *tx_wdev; + u8 index; + bool ema; +}; + +/** + * struct cfg80211_mbssid_elems - Multiple BSSID elements + * + * @cnt: Number of elements in array %elems. + * + * @elem: Array of multiple BSSID element(s) to be added into Beacon frames. + * @elem.data: Data for multiple BSSID elements. + * @elem.len: Length of data. + */ +struct cfg80211_mbssid_elems { + u8 cnt; + struct { + const u8 *data; + size_t len; + } elem[]; +}; + +/** * struct cfg80211_beacon_data - beacon data * @head: head portion of beacon (before TIM IE) * or %NULL if not changed @@ -1058,6 +1104,7 @@ struct cfg80211_crypto_settings { * @assocresp_ies_len: length of assocresp_ies in octets * @probe_resp_len: length of probe response template (@probe_resp) * @probe_resp: probe response template (AP mode only) + * @mbssid_ies: multiple BSSID elements * @ftm_responder: enable FTM responder functionality; -1 for no change * (which also implies no change in LCI/civic location data) * @lci: Measurement Report element content, starting with Measurement Token @@ -1075,6 +1122,7 @@ struct cfg80211_beacon_data { const u8 *probe_resp; const u8 *lci; const u8 *civicloc; + struct cfg80211_mbssid_elems *mbssid_ies; s8 ftm_responder; size_t head_len, tail_len; @@ -1140,17 +1188,6 @@ struct cfg80211_unsol_bcast_probe_resp { }; /** - * enum cfg80211_ap_settings_flags - AP settings flags - * - * Used by cfg80211_ap_settings - * - * @AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external authentication - */ -enum cfg80211_ap_settings_flags { - AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = BIT(0), -}; - -/** * struct cfg80211_ap_settings - AP configuration * * Used to configure an AP interface. @@ -1189,6 +1226,7 @@ enum cfg80211_ap_settings_flags { * @he_oper: HE operation IE (or %NULL if HE isn't enabled) * @fils_discovery: FILS discovery transmission parameters * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters + * @mbssid_config: AP settings for multiple bssid */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -1221,6 +1259,7 @@ struct cfg80211_ap_settings { struct cfg80211_he_bss_color he_bss_color; struct cfg80211_fils_discovery fils_discovery; struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; + struct cfg80211_mbssid_config mbssid_config; }; /** @@ -4018,6 +4057,19 @@ struct mgmt_frame_regs { * @set_sar_specs: Update the SAR (TX power) settings. * * @color_change: Initiate a color change. + * + * @set_fils_aad: Set FILS AAD data to the AP driver so that the driver can use + * those to decrypt (Re)Association Request and encrypt (Re)Association + * Response frame. + * + * @set_radar_background: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Background radar/CAC detection allows to avoid the CAC downtime + * switching to a different channel during CAC detection on the selected + * radar channel. + * The caller is expected to set chandef pointer to NULL in order to + * disable background CAC/radar detection. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4348,6 +4400,10 @@ struct cfg80211_ops { int (*color_change)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_color_change_settings *params); + int (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_fils_aad *fils_aad); + int (*set_radar_background)(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef); }; /* @@ -4981,6 +5037,13 @@ struct wiphy_iftype_akm_suites { * %NL80211_TID_CONFIG_ATTR_RETRY_LONG attributes * @sar_capa: SAR control capabilities * @rfkill: a pointer to the rfkill structure + * + * @mbssid_max_interfaces: maximum number of interfaces supported by the driver + * in a multiple BSSID set. This field must be set to a non-zero value + * by the driver to advertise MBSSID support. + * @ema_max_profile_periodicity: maximum profile periodicity supported by + * the driver. Setting this field to a non-zero value indicates that the + * driver supports enhanced multi-BSSID advertisements (EMA AP). */ struct wiphy { struct mutex mtx; @@ -5125,6 +5188,9 @@ struct wiphy { struct rfkill *rfkill; + u8 mbssid_max_interfaces; + u8 ema_max_profile_periodicity; + char priv[] __aligned(NETDEV_ALIGN); }; @@ -5490,7 +5556,7 @@ struct wireless_dev { unsigned long unprot_beacon_reported; }; -static inline u8 *wdev_address(struct wireless_dev *wdev) +static inline const u8 *wdev_address(struct wireless_dev *wdev) { if (wdev->netdev) return wdev->netdev->dev_addr; @@ -6344,6 +6410,19 @@ enum cfg80211_bss_frame_type { }; /** + * cfg80211_get_ies_channel_number - returns the channel number from ies + * @ie: IEs + * @ielen: length of IEs + * @band: enum nl80211_band of the channel + * @ftype: frame type + * + * Returns the channel number, or -1 if none could be determined. + */ +int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, + enum nl80211_band band, + enum cfg80211_bss_frame_type ftype); + +/** * cfg80211_inform_bss_data - inform cfg80211 of a new BSS * * @wiphy: the wiphy reporting the BSS @@ -7517,15 +7596,33 @@ void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp); /** - * cfg80211_radar_event - radar detection event + * __cfg80211_radar_event - radar detection event * @wiphy: the wiphy * @chandef: chandef for the current channel + * @offchan: the radar has been detected on the offchannel chain * @gfp: context flags * * This function is called when a radar is detected on the current chanenl. */ -void cfg80211_radar_event(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, gfp_t gfp); +void __cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + bool offchan, gfp_t gfp); + +static inline void +cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + __cfg80211_radar_event(wiphy, chandef, false, gfp); +} + +static inline void +cfg80211_background_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + __cfg80211_radar_event(wiphy, chandef, true, gfp); +} /** * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event @@ -7556,6 +7653,14 @@ void cfg80211_cac_event(struct net_device *netdev, const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, gfp_t gfp); +/** + * cfg80211_background_cac_abort - Channel Availability Check offchan abort event + * @wiphy: the wiphy + * + * This function is called by the driver when a Channel Availability Check + * (CAC) is aborted by a offchannel dedicated chain. + */ +void cfg80211_background_cac_abort(struct wiphy *wiphy); /** * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying @@ -8173,6 +8278,18 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, bool is_4addr, u8 check_swif); +/** + * cfg80211_assoc_comeback - notification of association that was + * temporarly rejected with a comeback + * @netdev: network device + * @bss: the bss entry with which association is in progress. + * @timeout: timeout interval value TUs. + * + * this function may sleep. the caller must hold the corresponding wdev's mutex. + */ +void cfg80211_assoc_comeback(struct net_device *netdev, + struct cfg80211_bss *bss, u32 timeout); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/include/net/checksum.h b/include/net/checksum.h index 5b96d5bd6e54..5218041e5c8f 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -180,4 +180,8 @@ static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } +static inline __wsum wsum_negate(__wsum val) +{ + return (__force __wsum)-((__force u32)val); +} #endif diff --git a/include/net/codel.h b/include/net/codel.h index a6e428f80135..5fed2f16cb8d 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -44,8 +44,6 @@ #include <linux/types.h> #include <linux/ktime.h> #include <linux/skbuff.h> -#include <net/pkt_sched.h> -#include <net/inet_ecn.h> /* Controlling Queue Delay (CoDel) algorithm * ========================================= @@ -102,6 +100,9 @@ static inline u32 codel_time_to_us(codel_time_t val) * @interval: width of moving time window * @mtu: device mtu, or minimal queue backlog in bytes. * @ecn: is Explicit Congestion Notification enabled + * @ce_threshold_selector: apply ce_threshold to packets matching this value + * in the diffserv/ECN byte of the IP header + * @ce_threshold_mask: mask to apply to ce_threshold_selector comparison */ struct codel_params { codel_time_t target; @@ -109,6 +110,8 @@ struct codel_params { codel_time_t interval; u32 mtu; bool ecn; + u8 ce_threshold_selector; + u8 ce_threshold_mask; }; /** diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index d289b91dcd65..78a27ac73070 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -49,11 +49,15 @@ * Implemented on linux by Dave Taht and Eric Dumazet */ +#include <net/inet_ecn.h> + static void codel_params_init(struct codel_params *params) { params->interval = MS2TIME(100); params->target = MS2TIME(5); params->ce_threshold = CODEL_DISABLED_THRESHOLD; + params->ce_threshold_mask = 0; + params->ce_threshold_selector = 0; params->ecn = false; } @@ -246,9 +250,19 @@ static struct sk_buff *codel_dequeue(void *ctx, vars->rec_inv_sqrt); } end: - if (skb && codel_time_after(vars->ldelay, params->ce_threshold) && - INET_ECN_set_ce(skb)) - stats->ce_mark++; + if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) { + bool set_ce = true; + + if (params->ce_threshold_mask) { + int dsfield = skb_get_dsfield(skb); + + set_ce = (dsfield >= 0 && + (((u8)dsfield & params->ce_threshold_mask) == + params->ce_threshold_selector)); + } + if (set_ce && INET_ECN_set_ce(skb)) + stats->ce_mark++; + } return skb; } diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h index 098630f83a55..58b6d0ebea10 100644 --- a/include/net/codel_qdisc.h +++ b/include/net/codel_qdisc.h @@ -49,6 +49,8 @@ * Implemented on linux by Dave Taht and Eric Dumazet */ +#include <net/pkt_sched.h> + /* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */ struct codel_skb_cb { codel_time_t enqueue_time; diff --git a/include/net/datalink.h b/include/net/datalink.h index a9663229b913..d9b7faaa539f 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -12,7 +12,7 @@ struct datalink_proto { int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); int (*request)(struct datalink_proto *, struct sk_buff *, - unsigned char *); + const unsigned char *); struct list_head node; }; diff --git a/include/net/devlink.h b/include/net/devlink.h index 154cf0dbca37..8d5349d2fb68 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -21,45 +21,7 @@ #include <linux/xarray.h> #include <linux/firmware.h> -#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \ - (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX) - -struct devlink_dev_stats { - u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; - u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; -}; - -struct devlink_ops; - -struct devlink { - u32 index; - struct list_head port_list; - struct list_head rate_list; - struct list_head sb_list; - struct list_head dpipe_table_list; - struct list_head resource_list; - struct list_head param_list; - struct list_head region_list; - struct list_head reporter_list; - struct mutex reporters_lock; /* protects reporter_list */ - struct devlink_dpipe_headers *dpipe_headers; - struct list_head trap_list; - struct list_head trap_group_list; - struct list_head trap_policer_list; - const struct devlink_ops *ops; - struct xarray snapshot_ids; - struct devlink_dev_stats stats; - struct device *dev; - possible_net_t _net; - struct mutex lock; /* Serializes access to devlink instance specific objects such as - * port, sb, dpipe, resource, params, region, traps and more. - */ - u8 reload_failed:1, - reload_enabled:1; - refcount_t refcount; - struct completion comp; - char priv[0] __aligned(NETDEV_ALIGN); -}; +struct devlink; struct devlink_port_phys_attrs { u32 port_number; /* Same value as "split group". @@ -399,33 +361,6 @@ devlink_resource_size_params_init(struct devlink_resource_size_params *size_para typedef u64 devlink_resource_occ_get_t(void *priv); -/** - * struct devlink_resource - devlink resource - * @name: name of the resource - * @id: id, per devlink instance - * @size: size of the resource - * @size_new: updated size of the resource, reload is needed - * @size_valid: valid in case the total size of the resource is valid - * including its children - * @parent: parent resource - * @size_params: size parameters - * @list: parent list - * @resource_list: list of child resources - */ -struct devlink_resource { - const char *name; - u64 id; - u64 size; - u64 size_new; - bool size_valid; - struct devlink_resource *parent; - struct devlink_resource_size_params size_params; - struct list_head list; - struct list_head resource_list; - devlink_resource_occ_get_t *occ_get; - void *occ_get_priv; -}; - #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 #define DEVLINK_RESOURCE_GENERIC_NAME_PORTS "physical_ports" @@ -506,7 +441,6 @@ struct devlink_param_item { const struct devlink_param *param; union devlink_param_value driverinit_value; bool driverinit_value_valid; - bool published; }; enum devlink_param_generic_id { @@ -524,6 +458,9 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -573,6 +510,15 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME "enable_vnet" #define DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME "enable_iwarp" +#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE DEVLINK_PARAM_TYPE_BOOL + +#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME "io_eq_size" +#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32 + +#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME "event_eq_size" +#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ @@ -726,13 +672,17 @@ struct devlink_health_reporter_ops { * @trap_name: Trap name. * @trap_group_name: Trap group name. * @input_dev: Input netdevice. + * @dev_tracker: refcount tracker for @input_dev. * @fa_cookie: Flow action user cookie. * @trap_type: Trap type. */ struct devlink_trap_metadata { const char *trap_name; const char *trap_group_name; + struct net_device *input_dev; + netdevice_tracker dev_tracker; + const struct flow_action_cookie *fa_cookie; enum devlink_trap_type trap_type; }; @@ -1224,6 +1174,11 @@ enum devlink_trap_group_generic_id { .min_burst = _min_burst, \ } +enum { + /* device supports reload operations */ + DEVLINK_F_RELOAD = 1UL << 0, +}; + struct devlink_ops { /** * @supported_flash_update_params: @@ -1520,34 +1475,9 @@ struct devlink_ops { struct netlink_ext_ack *extack); }; -static inline void *devlink_priv(struct devlink *devlink) -{ - BUG_ON(!devlink); - return &devlink->priv; -} - -static inline struct devlink *priv_to_devlink(void *priv) -{ - BUG_ON(!priv); - return container_of(priv, struct devlink, priv); -} - -static inline struct devlink_port * -netdev_to_devlink_port(struct net_device *dev) -{ - if (dev->netdev_ops->ndo_get_devlink_port) - return dev->netdev_ops->ndo_get_devlink_port(dev); - return NULL; -} - -static inline struct devlink *netdev_to_devlink(struct net_device *dev) -{ - struct devlink_port *devlink_port = netdev_to_devlink_port(dev); - - if (devlink_port) - return devlink_port->devlink; - return NULL; -} +void *devlink_priv(struct devlink *devlink); +struct devlink *priv_to_devlink(void *priv); +struct device *devlink_to_dev(const struct devlink *devlink); struct ib_device; @@ -1566,10 +1496,9 @@ static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, { return devlink_alloc_ns(ops, priv_size, &init_net, dev); } -int devlink_register(struct devlink *devlink); +void devlink_set_features(struct devlink *devlink, u64 features); +void devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); -void devlink_reload_enable(struct devlink *devlink); -void devlink_reload_disable(struct devlink *devlink); void devlink_free(struct devlink *devlink); int devlink_port_register(struct devlink *devlink, struct devlink_port *devlink_port, @@ -1627,8 +1556,7 @@ int devlink_resource_register(struct devlink *devlink, u64 resource_id, u64 parent_resource_id, const struct devlink_resource_size_params *size_params); -void devlink_resources_unregister(struct devlink *devlink, - struct devlink_resource *resource); +void devlink_resources_unregister(struct devlink *devlink); int devlink_resource_size_get(struct devlink *devlink, u64 resource_id, u64 *p_resource_size); @@ -1651,34 +1579,11 @@ int devlink_param_register(struct devlink *devlink, const struct devlink_param *param); void devlink_param_unregister(struct devlink *devlink, const struct devlink_param *param); -void devlink_params_publish(struct devlink *devlink); -void devlink_params_unpublish(struct devlink *devlink); -void devlink_param_publish(struct devlink *devlink, - const struct devlink_param *param); -void devlink_param_unpublish(struct devlink *devlink, - const struct devlink_param *param); -int devlink_port_params_register(struct devlink_port *devlink_port, - const struct devlink_param *params, - size_t params_count); -void devlink_port_params_unregister(struct devlink_port *devlink_port, - const struct devlink_param *params, - size_t params_count); int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *init_val); int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val); -int -devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, - u32 param_id, - union devlink_param_value *init_val); -int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port, - u32 param_id, - union devlink_param_value init_val); void devlink_param_value_changed(struct devlink *devlink, u32 param_id); -void devlink_port_param_value_changed(struct devlink_port *devlink_port, - u32 param_id); -void devlink_param_value_str_fill(union devlink_param_value *dst_val, - const char *src); struct devlink_region * devlink_region_create(struct devlink *devlink, const struct devlink_region_ops *ops, @@ -1723,10 +1628,7 @@ int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, const char *name); int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg); -int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value); -int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value); int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value); -int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value); int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value); int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, u16 value_len); @@ -1812,9 +1714,12 @@ devlink_trap_policers_unregister(struct devlink *devlink, #if IS_ENABLED(CONFIG_NET_DEVLINK) -void devlink_compat_running_version(struct net_device *dev, +struct devlink *__must_check devlink_try_get(struct devlink *devlink); +void devlink_put(struct devlink *devlink); + +void devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len); -int devlink_compat_flash_update(struct net_device *dev, const char *file_name); +int devlink_compat_flash_update(struct devlink *devlink, const char *file_name); int devlink_compat_phys_port_name_get(struct net_device *dev, char *name, size_t len); int devlink_compat_switch_id_get(struct net_device *dev, @@ -1822,13 +1727,22 @@ int devlink_compat_switch_id_get(struct net_device *dev, #else +static inline struct devlink *devlink_try_get(struct devlink *devlink) +{ + return NULL; +} + +static inline void devlink_put(struct devlink *devlink) +{ +} + static inline void -devlink_compat_running_version(struct net_device *dev, char *buf, size_t len) +devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { } static inline int -devlink_compat_flash_update(struct net_device *dev, const char *file_name) +devlink_compat_flash_update(struct devlink *devlink, const char *file_name) { return -EOPNOTSUPP; } diff --git a/include/net/dn.h b/include/net/dn.h index 56ab0726c641..ba9655b0098a 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -166,7 +166,7 @@ struct dn_skb_cb { int iif; }; -static inline __le16 dn_eth2dn(unsigned char *ethaddr) +static inline __le16 dn_eth2dn(const unsigned char *ethaddr) { return get_unaligned((__le16 *)(ethaddr + 4)); } diff --git a/include/net/dsa.h b/include/net/dsa.h index d784e76113b8..57b3e4e7413b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -51,6 +51,7 @@ struct phylink_link_state; #define DSA_TAG_PROTO_SEVILLE_VALUE 21 #define DSA_TAG_PROTO_BRCM_LEGACY_VALUE 22 #define DSA_TAG_PROTO_SJA1110_VALUE 23 +#define DSA_TAG_PROTO_RTL8_4_VALUE 24 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -77,6 +78,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_OCELOT_8021Q = DSA_TAG_PROTO_OCELOT_8021Q_VALUE, DSA_TAG_PROTO_SEVILLE = DSA_TAG_PROTO_SEVILLE_VALUE, DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE, + DSA_TAG_PROTO_RTL8_4 = DSA_TAG_PROTO_RTL8_4_VALUE, }; struct dsa_switch; @@ -86,6 +88,8 @@ struct dsa_device_ops { struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); + int (*connect)(struct dsa_switch *ds); + void (*disconnect)(struct dsa_switch *ds); unsigned int needed_headroom; unsigned int needed_tailroom; const char *name; @@ -115,6 +119,9 @@ struct dsa_netdevice_ops { struct dsa_switch_tree { struct list_head list; + /* List of switch ports */ + struct list_head ports; + /* Notifier chain for switch-wide events */ struct raw_notifier_head nh; @@ -124,8 +131,10 @@ struct dsa_switch_tree { /* Number of switches attached to this tree */ struct kref refcount; - /* Has this tree been applied to the hardware? */ - bool setup; + /* Maps offloaded LAG netdevs to a zero-based linear ID for + * drivers that need it. + */ + struct net_device **lags; /* Tagging protocol operations */ const struct dsa_device_ops *tag_ops; @@ -135,22 +144,19 @@ struct dsa_switch_tree { */ enum dsa_tag_protocol default_proto; + /* Has this tree been applied to the hardware? */ + bool setup; + /* * Configuration data for the platform device that owns * this dsa switch tree instance. */ struct dsa_platform_data *pd; - /* List of switch ports */ - struct list_head ports; - /* List of DSA links composing the routing table */ struct list_head rtable; - /* Maps offloaded LAG netdevs to a zero-based linear ID for - * drivers that need it. - */ - struct net_device **lags; + /* Length of "lags" array */ unsigned int lags_len; /* Track the largest switch index within a tree */ @@ -217,6 +223,12 @@ struct dsa_mall_tc_entry { }; }; +struct dsa_bridge { + struct net_device *dev; + unsigned int num; + bool tx_fwd_offload; + refcount_t refcount; +}; struct dsa_port { /* A CPU port is physically connected to a master device. @@ -236,6 +248,10 @@ struct dsa_port { struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); + struct dsa_switch *ds; + + unsigned int index; + enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU, @@ -243,36 +259,40 @@ struct dsa_port { DSA_PORT_TYPE_USER, } type; - struct dsa_switch *ds; - unsigned int index; const char *name; struct dsa_port *cpu_dp; u8 mac[ETH_ALEN]; + + u8 stp_state; + + /* Warning: the following bit fields are not atomic, and updating them + * can only be done from code paths where concurrency is not possible + * (probe time or under rtnl_lock). + */ + u8 vlan_filtering:1; + + /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ + u8 learning:1; + + u8 lag_tx_enabled:1; + + u8 devlink_port_setup:1; + + u8 setup:1; + struct device_node *dn; unsigned int ageing_time; - bool vlan_filtering; - /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ - bool learning; - u8 stp_state; - struct net_device *bridge_dev; - int bridge_num; + + struct dsa_bridge *bridge; struct devlink_port devlink_port; - bool devlink_port_setup; struct phylink *pl; struct phylink_config pl_config; struct net_device *lag_dev; - bool lag_tx_enabled; struct net_device *hsr_dev; struct list_head list; /* - * Give the switch driver somewhere to hang its per-port private data - * structures (accessible from the tagger). - */ - void *priv; - - /* * Original copy of the master netdev ethtool_ops */ const struct ethtool_ops *orig_ethtool_ops; @@ -285,10 +305,9 @@ struct dsa_port { /* List of MAC addresses that must be forwarded on this port. * These are only valid on CPU ports and DSA links. */ + struct mutex addr_lists_lock; struct list_head fdbs; struct list_head mdbs; - - bool setup; }; /* TODO: ideally DSA ports would have a single dp->link_dp member, @@ -310,8 +329,6 @@ struct dsa_mac_addr { }; struct dsa_switch { - bool setup; - struct device *dev; /* @@ -320,6 +337,57 @@ struct dsa_switch { struct dsa_switch_tree *dst; unsigned int index; + /* Warning: the following bit fields are not atomic, and updating them + * can only be done from code paths where concurrency is not possible + * (probe time or under rtnl_lock). + */ + u32 setup:1; + + /* Disallow bridge core from requesting different VLAN awareness + * settings on ports if not hardware-supported + */ + u32 vlan_filtering_is_global:1; + + /* Keep VLAN filtering enabled on ports not offloading any upper */ + u32 needs_standalone_vlan_filtering:1; + + /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges + * that have vlan_filtering=0. All drivers should ideally set this (and + * then the option would get removed), but it is unknown whether this + * would break things or not. + */ + u32 configure_vlan_while_not_filtering:1; + + /* If the switch driver always programs the CPU port as egress tagged + * despite the VLAN configuration indicating otherwise, then setting + * @untag_bridge_pvid will force the DSA receive path to pop the + * bridge's default_pvid VLAN tagged frames to offer a consistent + * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge + * device. + */ + u32 untag_bridge_pvid:1; + + /* Let DSA manage the FDB entries towards the + * CPU, based on the software bridge database. + */ + u32 assisted_learning_on_cpu_port:1; + + /* In case vlan_filtering_is_global is set, the VLAN awareness state + * should be retrieved from here and not from the per-port settings. + */ + u32 vlan_filtering:1; + + /* MAC PCS does not provide link state change interrupt, and requires + * polling. Flag passed on to PHYLINK. + */ + u32 pcs_poll:1; + + /* For switches that only have the MRU configurable. To ensure the + * configured MTU is not exceeded, normalization of MRU on all bridged + * interfaces is needed. + */ + u32 mtu_enforcement_ingress:1; + /* Listener for switch fabric events */ struct notifier_block nb; @@ -329,6 +397,8 @@ struct dsa_switch { */ void *priv; + void *tagger_data; + /* * Configuration data for this switch. */ @@ -358,50 +428,6 @@ struct dsa_switch { /* Number of switch port queues */ unsigned int num_tx_queues; - /* Disallow bridge core from requesting different VLAN awareness - * settings on ports if not hardware-supported - */ - bool vlan_filtering_is_global; - - /* Keep VLAN filtering enabled on ports not offloading any upper. */ - bool needs_standalone_vlan_filtering; - - /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges - * that have vlan_filtering=0. All drivers should ideally set this (and - * then the option would get removed), but it is unknown whether this - * would break things or not. - */ - bool configure_vlan_while_not_filtering; - - /* If the switch driver always programs the CPU port as egress tagged - * despite the VLAN configuration indicating otherwise, then setting - * @untag_bridge_pvid will force the DSA receive path to pop the bridge's - * default_pvid VLAN tagged frames to offer a consistent behavior - * between a vlan_filtering=0 and vlan_filtering=1 bridge device. - */ - bool untag_bridge_pvid; - - /* Let DSA manage the FDB entries towards the CPU, based on the - * software bridge database. - */ - bool assisted_learning_on_cpu_port; - - /* In case vlan_filtering_is_global is set, the VLAN awareness state - * should be retrieved from here and not from the per-port settings. - */ - bool vlan_filtering; - - /* MAC PCS does not provide link state change interrupt, and requires - * polling. Flag passed on to PHYLINK. - */ - bool pcs_poll; - - /* For switches that only have the MRU configurable. To ensure the - * configured MTU is not exceeded, normalization of MRU on all bridged - * interfaces is needed. - */ - bool mtu_enforcement_ingress; - /* Drivers that benefit from having an ID associated with each * offloaded LAG should set this to the maximum number of * supported IDs. DSA will then maintain a mapping of _at @@ -410,14 +436,14 @@ struct dsa_switch { */ unsigned int num_lag_ids; - /* Drivers that support bridge forwarding offload should set this to - * the maximum number of bridges spanning the same switch tree (or all - * trees, in the case of cross-tree bridging support) that can be - * offloaded. + /* Drivers that support bridge forwarding offload or FDB isolation + * should set this to the maximum number of bridges spanning the same + * switch tree (or all trees, in the case of cross-tree bridging + * support) that can be offloaded. */ - unsigned int num_fwd_offloading_bridges; + unsigned int max_num_bridges; - size_t num_ports; + unsigned int num_ports; }; static inline struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) @@ -472,14 +498,41 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_USER; } +#define dsa_tree_for_each_user_port(_dp, _dst) \ + list_for_each_entry((_dp), &(_dst)->ports, list) \ + if (dsa_port_is_user((_dp))) + +#define dsa_switch_for_each_port(_dp, _ds) \ + list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ + if ((_dp)->ds == (_ds)) + +#define dsa_switch_for_each_port_safe(_dp, _next, _ds) \ + list_for_each_entry_safe((_dp), (_next), &(_ds)->dst->ports, list) \ + if ((_dp)->ds == (_ds)) + +#define dsa_switch_for_each_port_continue_reverse(_dp, _ds) \ + list_for_each_entry_continue_reverse((_dp), &(_ds)->dst->ports, list) \ + if ((_dp)->ds == (_ds)) + +#define dsa_switch_for_each_available_port(_dp, _ds) \ + dsa_switch_for_each_port((_dp), (_ds)) \ + if (!dsa_port_is_unused((_dp))) + +#define dsa_switch_for_each_user_port(_dp, _ds) \ + dsa_switch_for_each_port((_dp), (_ds)) \ + if (dsa_port_is_user((_dp))) + +#define dsa_switch_for_each_cpu_port(_dp, _ds) \ + dsa_switch_for_each_port((_dp), (_ds)) \ + if (dsa_port_is_cpu((_dp))) + static inline u32 dsa_user_ports(struct dsa_switch *ds) { + struct dsa_port *dp; u32 mask = 0; - int p; - for (p = 0; p < ds->num_ports; p++) - if (dsa_is_user_port(ds, p)) - mask |= BIT(p); + dsa_switch_for_each_user_port(dp, ds) + mask |= BIT(dp->index); return mask; } @@ -558,7 +611,7 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) static inline struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) { - if (!dp->bridge_dev) + if (!dp->bridge) return NULL; if (dp->lag_dev) @@ -569,6 +622,76 @@ struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) return dp->slave; } +static inline struct net_device * +dsa_port_bridge_dev_get(const struct dsa_port *dp) +{ + return dp->bridge ? dp->bridge->dev : NULL; +} + +static inline unsigned int dsa_port_bridge_num_get(struct dsa_port *dp) +{ + return dp->bridge ? dp->bridge->num : 0; +} + +static inline bool dsa_port_bridge_same(const struct dsa_port *a, + const struct dsa_port *b) +{ + struct net_device *br_a = dsa_port_bridge_dev_get(a); + struct net_device *br_b = dsa_port_bridge_dev_get(b); + + /* Standalone ports are not in the same bridge with one another */ + return (!br_a || !br_b) ? false : (br_a == br_b); +} + +static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, + const struct net_device *dev) +{ + return dsa_port_to_bridge_port(dp) == dev; +} + +static inline bool +dsa_port_offloads_bridge_dev(struct dsa_port *dp, + const struct net_device *bridge_dev) +{ + /* DSA ports connected to a bridge, and event was emitted + * for the bridge. + */ + return dsa_port_bridge_dev_get(dp) == bridge_dev; +} + +static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, + const struct dsa_bridge *bridge) +{ + return dsa_port_bridge_dev_get(dp) == bridge->dev; +} + +/* Returns true if any port of this tree offloads the given net_device */ +static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, + const struct net_device *dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge_port(dp, dev)) + return true; + + return false; +} + +/* Returns true if any port of this tree offloads the given bridge */ +static inline bool +dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst, + const struct net_device *bridge_dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge_dev(dp, bridge_dev)) + return true; + + return false; +} + typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { @@ -584,6 +707,13 @@ struct dsa_switch_ops { enum dsa_tag_protocol mprot); int (*change_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol proto); + /* + * Method for switch drivers to connect to the tagging protocol driver + * in current use. The switch driver can provide handlers for certain + * types of packets for switch management. + */ + int (*connect_tag_protocol)(struct dsa_switch *ds, + enum dsa_tag_protocol proto); /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); @@ -615,6 +745,8 @@ struct dsa_switch_ops { /* * PHYLINK integration */ + void (*phylink_get_caps)(struct dsa_switch *ds, int port, + struct phylink_config *config); void (*phylink_validate)(struct dsa_switch *ds, int port, unsigned long *supported, struct phylink_link_state *state); @@ -645,6 +777,12 @@ struct dsa_switch_ops { int (*get_sset_count)(struct dsa_switch *ds, int port, int sset); void (*get_ethtool_phy_stats)(struct dsa_switch *ds, int port, uint64_t *data); + void (*get_eth_phy_stats)(struct dsa_switch *ds, int port, + struct ethtool_eth_phy_stats *phy_stats); + void (*get_eth_mac_stats)(struct dsa_switch *ds, int port, + struct ethtool_eth_mac_stats *mac_stats); + void (*get_eth_ctrl_stats)(struct dsa_switch *ds, int port, + struct ethtool_eth_ctrl_stats *ctrl_stats); void (*get_stats64)(struct dsa_switch *ds, int port, struct rtnl_link_stats64 *s); void (*self_test)(struct dsa_switch *ds, int port, @@ -710,17 +848,10 @@ struct dsa_switch_ops { */ int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, - struct net_device *bridge); + struct dsa_bridge bridge, + bool *tx_fwd_offload); void (*port_bridge_leave)(struct dsa_switch *ds, int port, - struct net_device *bridge); - /* Called right after .port_bridge_join() */ - int (*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port, - struct net_device *bridge, - int bridge_num); - /* Called right before .port_bridge_leave() */ - void (*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port, - struct net_device *bridge, - int bridge_num); + struct dsa_bridge bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); @@ -792,10 +923,10 @@ struct dsa_switch_ops { */ int (*crosschip_bridge_join)(struct dsa_switch *ds, int tree_index, int sw_index, int port, - struct net_device *br); + struct dsa_bridge bridge); void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, - struct net_device *br); + struct dsa_bridge bridge); int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, int port); int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, diff --git a/include/net/dst.h b/include/net/dst.h index a057319aabef..6aa252c3fc55 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -77,6 +77,7 @@ struct dst_entry { #ifndef CONFIG_64BIT atomic_t __refcnt; /* 32-bit offset 64 */ #endif + netdevice_tracker dev_tracker; }; struct dst_metrics { diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h index 67634675e919..df6622a5fe98 100644 --- a/include/net/dst_cache.h +++ b/include/net/dst_cache.h @@ -80,6 +80,17 @@ static inline void dst_cache_reset(struct dst_cache *dst_cache) } /** + * dst_cache_reset_now - invalidate the cache contents immediately + * @dst_cache: the cache + * + * The caller must be sure there are no concurrent users, as this frees + * all dst_cache users immediately, rather than waiting for the next + * per-cpu usage like dst_cache_reset does. Most callers should use the + * higher speed lazily-freed dst_cache_reset function instead. + */ +void dst_cache_reset_now(struct dst_cache *dst_cache); + +/** * dst_cache_init - initialize the cache, allocating the required storage * @dst_cache: the cache * @gfp: allocation flags diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 14efa0ded75d..adab27ba1ecb 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -123,8 +123,20 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size); +#ifdef CONFIG_DST_CACHE + /* Unclone the dst cache if there is one */ + if (new_md->u.tun_info.dst_cache.cache) { + int ret; + + ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); + if (ret) { + metadata_dst_free(new_md); + return ERR_PTR(ret); + } + } +#endif + skb_dst_drop(skb); - dst_hold(&new_md->dst); skb_dst_set(skb, &new_md->dst); return new_md; } diff --git a/include/net/failover.h b/include/net/failover.h index bb15438f39c7..f2b42b4b9cd6 100644 --- a/include/net/failover.h +++ b/include/net/failover.h @@ -25,6 +25,7 @@ struct failover_ops { struct failover { struct list_head list; struct net_device __rcu *failover_dev; + netdevice_tracker dev_tracker; struct failover_ops __rcu *ops; }; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 4b10676c69d1..82da359bca03 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -69,7 +69,7 @@ struct fib_rules_ops { int (*action)(struct fib_rule *, struct flowi *, int, struct fib_lookup_arg *); - bool (*suppress)(struct fib_rule *, + bool (*suppress)(struct fib_rule *, int, struct fib_lookup_arg *); int (*match)(struct fib_rule *, struct flowi *, int); @@ -91,7 +91,6 @@ struct fib_rules_ops { void (*flush_cache)(struct fib_rules_ops *ops); int nlgroup; - const struct nla_policy *policy; struct list_head rules_list; struct module *owner; struct net *fro_net; @@ -103,26 +102,6 @@ struct fib_rule_notifier_info { struct fib_rule *rule; }; -#define FRA_GENERIC_POLICY \ - [FRA_UNSPEC] = { .strict_start_type = FRA_DPORT_RANGE + 1 }, \ - [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ - [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ - [FRA_PRIORITY] = { .type = NLA_U32 }, \ - [FRA_FWMARK] = { .type = NLA_U32 }, \ - [FRA_TUN_ID] = { .type = NLA_U64 }, \ - [FRA_FWMASK] = { .type = NLA_U32 }, \ - [FRA_TABLE] = { .type = NLA_U32 }, \ - [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ - [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 }, \ - [FRA_L3MDEV] = { .type = NLA_U8 }, \ - [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \ - [FRA_PROTOCOL] = { .type = NLA_U8 }, \ - [FRA_IP_PROTO] = { .type = NLA_U8 }, \ - [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \ - [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) } - - static inline void fib_rule_get(struct fib_rule *rule) { refcount_inc(&rule->refcnt); @@ -218,7 +197,9 @@ INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule, + int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule, + int flags, struct fib_lookup_arg *arg)); #endif diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index ffd386ea0dbb..aa33e1092e2c 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -287,6 +287,7 @@ enum flow_dissector_key_id { #define FLOW_DISSECTOR_F_PARSE_1ST_FRAG BIT(0) #define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(1) #define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(2) +#define FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP BIT(3) struct flow_dissector_key { enum flow_dissector_key_id key_id; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 3961461d9c8b..5b8c54eb7a6b 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -197,6 +197,7 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie); struct flow_action_entry { enum flow_action_id id; + u32 hw_index; enum flow_action_hw_stats hw_stats; action_destr destructor; void *destructor_priv; @@ -232,7 +233,6 @@ struct flow_action_entry { bool truncate; } sample; struct { /* FLOW_ACTION_POLICE */ - u32 index; u32 burst; u64 rate_bytes_ps; u64 burst_pkt; @@ -267,7 +267,6 @@ struct flow_action_entry { u8 ttl; } mpls_mangle; struct { - u32 index; s32 prio; u64 basetime; u64 cycletime; @@ -552,6 +551,23 @@ struct flow_cls_offload { u32 classid; }; +enum offload_act_command { + FLOW_ACT_REPLACE, + FLOW_ACT_DESTROY, + FLOW_ACT_STATS, +}; + +struct flow_offload_action { + struct netlink_ext_ack *extack; /* NULL in FLOW_ACT_STATS process*/ + enum offload_act_command command; + enum flow_action_id id; + u32 index; + struct flow_stats stats; + struct flow_action action; +}; + +struct flow_offload_action *offload_action_alloc(unsigned int num_actions); + static inline struct flow_rule * flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd) { diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 1424e02cef90..7aa2b8e1fb29 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -7,14 +7,17 @@ #include <linux/rtnetlink.h> #include <linux/pkt_sched.h> -/* Note: this used to be in include/uapi/linux/gen_stats.h */ -struct gnet_stats_basic_packed { - __u64 bytes; - __u64 packets; -}; - -struct gnet_stats_basic_cpu { - struct gnet_stats_basic_packed bstats; +/* Throughput stats. + * Must be initialized beforehand with gnet_stats_basic_sync_init(). + * + * If no reads can ever occur parallel to writes (e.g. stack-allocated + * bstats), then the internal stat values can be written to and read + * from directly. Otherwise, use _bstats_set/update() for writes and + * gnet_stats_add_basic() for reads. + */ +struct gnet_stats_basic_sync { + u64_stats_t bytes; + u64_stats_t packets; struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); @@ -34,6 +37,7 @@ struct gnet_dump { struct tc_stats tc_stats; }; +void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b); int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, struct gnet_dump *d, int padattr); @@ -42,41 +46,38 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, spinlock_t *lock, struct gnet_dump *d, int padattr); -int gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_dump *d, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b); -void __gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b); -int gnet_stats_copy_basic_hw(const seqcount_t *running, - struct gnet_dump *d, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b); +int gnet_stats_copy_basic(struct gnet_dump *d, + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b, bool running); +void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b, bool running); +int gnet_stats_copy_basic_hw(struct gnet_dump *d, + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b, bool running); int gnet_stats_copy_rate_est(struct gnet_dump *d, struct net_rate_estimator __rcu **ptr); int gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue __percpu *cpu_q, struct gnet_stats_queue *q, __u32 qlen); -void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, - const struct gnet_stats_queue __percpu *cpu_q, - const struct gnet_stats_queue *q, __u32 qlen); +void gnet_stats_add_queue(struct gnet_stats_queue *qstats, + const struct gnet_stats_queue __percpu *cpu_q, + const struct gnet_stats_queue *q); int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); int gnet_stats_finish_copy(struct gnet_dump *d); -int gen_new_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu_bstats, +int gen_new_estimator(struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, - seqcount_t *running, struct nlattr *opt); + bool running, struct nlattr *opt); void gen_kill_estimator(struct net_rate_estimator __rcu **ptr); -int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu_bstats, +int gen_replace_estimator(struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu_bstats, struct net_rate_estimator __rcu **ptr, spinlock_t *lock, - seqcount_t *running, struct nlattr *opt); + bool running, struct nlattr *opt); bool gen_estimator_active(struct net_rate_estimator __rcu **ptr); bool gen_estimator_read(struct net_rate_estimator __rcu **ptr, struct gnet_stats_rate_est64 *sample); diff --git a/include/net/gro.h b/include/net/gro.h index 01edaf3fdda0..8f75802d50fd 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -4,9 +4,368 @@ #define _NET_IPV6_GRO_H #include <linux/indirect_call_wrapper.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <net/ip6_checksum.h> +#include <linux/skbuff.h> +#include <net/udp.h> -struct list_head; -struct sk_buff; +struct napi_gro_cb { + /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ + void *frag0; + + /* Length of frag0. */ + unsigned int frag0_len; + + /* This indicates where we are processing relative to skb->data. */ + int data_offset; + + /* This is non-zero if the packet cannot be merged with the new skb. */ + u16 flush; + + /* Save the IP ID here and check when we get to the transport layer */ + u16 flush_id; + + /* Number of segments aggregated. */ + u16 count; + + /* Start offset for remote checksum offload */ + u16 gro_remcsum_start; + + /* jiffies when first packet was created/queued */ + unsigned long age; + + /* Used in ipv6_gro_receive() and foo-over-udp */ + u16 proto; + + /* This is non-zero if the packet may be of the same flow. */ + u8 same_flow:1; + + /* Used in tunnel GRO receive */ + u8 encap_mark:1; + + /* GRO checksum is valid */ + u8 csum_valid:1; + + /* Number of checksums via CHECKSUM_UNNECESSARY */ + u8 csum_cnt:3; + + /* Free the skb? */ + u8 free:2; +#define NAPI_GRO_FREE 1 +#define NAPI_GRO_FREE_STOLEN_HEAD 2 + + /* Used in foo-over-udp, set in udp[46]_gro_receive */ + u8 is_ipv6:1; + + /* Used in GRE, set in fou/gue_gro_receive */ + u8 is_fou:1; + + /* Used to determine if flush_id can be ignored */ + u8 is_atomic:1; + + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; + + /* GRO is done by frag_list pointer chaining. */ + u8 is_flist:1; + + /* used to support CHECKSUM_COMPLETE for tunneling protocols */ + __wsum csum; + + /* used in skb_gro_receive() slow path */ + struct sk_buff *last; +}; + +#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) + +#define GRO_RECURSION_LIMIT 15 +static inline int gro_recursion_inc_test(struct sk_buff *skb) +{ + return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; +} + +typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); +static inline struct sk_buff *call_gro_receive(gro_receive_t cb, + struct list_head *head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(head, skb); +} + +typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, + struct sk_buff *); +static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, + struct sock *sk, + struct list_head *head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(sk, head, skb); +} + +static inline unsigned int skb_gro_offset(const struct sk_buff *skb) +{ + return NAPI_GRO_CB(skb)->data_offset; +} + +static inline unsigned int skb_gro_len(const struct sk_buff *skb) +{ + return skb->len - NAPI_GRO_CB(skb)->data_offset; +} + +static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) +{ + NAPI_GRO_CB(skb)->data_offset += len; +} + +static inline void *skb_gro_header_fast(struct sk_buff *skb, + unsigned int offset) +{ + return NAPI_GRO_CB(skb)->frag0 + offset; +} + +static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) +{ + return NAPI_GRO_CB(skb)->frag0_len < hlen; +} + +static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) +{ + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; +} + +static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, + unsigned int offset) +{ + if (!pskb_may_pull(skb, hlen)) + return NULL; + + skb_gro_frag0_invalidate(skb); + return skb->data + offset; +} + +static inline void *skb_gro_network_header(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + + skb_network_offset(skb); +} + +static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + + return csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), proto, 0); +} + +static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + if (NAPI_GRO_CB(skb)->csum_valid) + NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len, + wsum_negate(NAPI_GRO_CB(skb)->csum))); +} + +/* GRO checksum functions. These are logical equivalents of the normal + * checksum functions (in skbuff.h) except that they operate on the GRO + * offsets and fields in sk_buff. + */ + +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb); + +static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); +} + +static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, + bool zero_okay, + __sum16 check) +{ + return ((skb->ip_summed != CHECKSUM_PARTIAL || + skb_checksum_start_offset(skb) < + skb_gro_offset(skb)) && + !skb_at_gro_remcsum_start(skb) && + NAPI_GRO_CB(skb)->csum_cnt == 0 && + (!zero_okay || check)); +} + +static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, + __wsum psum) +{ + if (NAPI_GRO_CB(skb)->csum_valid && + !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) + return 0; + + NAPI_GRO_CB(skb)->csum = psum; + + return __skb_gro_checksum_complete(skb); +} + +static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->csum_cnt > 0) { + /* Consume a checksum from CHECKSUM_UNNECESSARY */ + NAPI_GRO_CB(skb)->csum_cnt--; + } else { + /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we + * verified a new top level checksum or an encapsulated one + * during GRO. This saves work if we fallback to normal path. + */ + __skb_incr_checksum_unnecessary(skb); + } +} + +#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ + compute_pseudo) \ +({ \ + __sum16 __ret = 0; \ + if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ + __ret = __skb_gro_checksum_validate_complete(skb, \ + compute_pseudo(skb, proto)); \ + if (!__ret) \ + skb_gro_incr_csum_unnecessary(skb); \ + __ret; \ +}) + +#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) + +#define skb_gro_checksum_validate_zero_check(skb, proto, check, \ + compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) + +#define skb_gro_checksum_simple_validate(skb) \ + __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) + +static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->csum_cnt == 0 && + !NAPI_GRO_CB(skb)->csum_valid); +} + +static inline void __skb_gro_checksum_convert(struct sk_buff *skb, + __wsum pseudo) +{ + NAPI_GRO_CB(skb)->csum = ~pseudo; + NAPI_GRO_CB(skb)->csum_valid = 1; +} + +#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ +do { \ + if (__skb_gro_checksum_convert_check(skb)) \ + __skb_gro_checksum_convert(skb, \ + compute_pseudo(skb, proto)); \ +} while (0) + +struct gro_remcsum { + int offset; + __wsum delta; +}; + +static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) +{ + grc->offset = 0; + grc->delta = 0; +} + +static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, + unsigned int off, size_t hdrlen, + int start, int offset, + struct gro_remcsum *grc, + bool nopartial) +{ + __wsum delta; + size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); + + BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); + + if (!nopartial) { + NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; + return ptr; + } + + ptr = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, off + plen)) { + ptr = skb_gro_header_slow(skb, off + plen, off); + if (!ptr) + return NULL; + } + + delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, + start, offset); + + /* Adjust skb->csum since we changed the packet */ + NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); + + grc->offset = off + hdrlen + offset; + grc->delta = delta; + + return ptr; +} + +static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, + struct gro_remcsum *grc) +{ + void *ptr; + size_t plen = grc->offset + sizeof(u16); + + if (!grc->delta) + return; + + ptr = skb_gro_header_fast(skb, grc->offset); + if (skb_gro_header_hard(skb, grc->offset + sizeof(u16))) { + ptr = skb_gro_header_slow(skb, plen, grc->offset); + if (!ptr) + return; + } + + remcsum_unadjust((__sum16 *)ptr, grc->delta); +} + +#ifdef CONFIG_XFRM_OFFLOAD +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) +{ + if (PTR_ERR(pp) != -EINPROGRESS) + NAPI_GRO_CB(skb)->flush |= flush; +} +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff *pp, + int flush, + struct gro_remcsum *grc) +{ + if (PTR_ERR(pp) != -EINPROGRESS) { + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; + } +} +#else +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) +{ + NAPI_GRO_CB(skb)->flush |= flush; +} +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff *pp, + int flush, + struct gro_remcsum *grc) +{ + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; +} +#endif INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, struct sk_buff *)); @@ -15,6 +374,14 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); + +INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); + #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ ({ \ unlikely(gro_recursion_inc_test(skb)) ? \ @@ -22,4 +389,54 @@ INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); INDIRECT_CALL_INET(cb, f2, f1, head, skb); \ }) +struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, + struct udphdr *uh, struct sock *sk); +int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); + +static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) +{ + struct udphdr *uh; + unsigned int hlen, off; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*uh); + uh = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) + uh = skb_gro_header_slow(skb, hlen, off); + + return uh; +} + +static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) +{ + const struct ipv6hdr *iph = skb_gro_network_header(skb); + + return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, + skb_gro_len(skb), proto, 0)); +} + +int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); + +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +static inline void gro_normal_list(struct napi_struct *napi) +{ + if (!napi->rx_count) + return; + netif_receive_skb_list_internal(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; +} + +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) +{ + list_add_tail(&skb->list, &napi->rx_list); + napi->rx_count += segs; + if (napi->rx_count >= gro_normal_batch) + gro_normal_list(napi); +} + + #endif /* _NET_IPV6_GRO_H */ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 653e7d0f65cb..f026cf08a8e8 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -160,6 +160,7 @@ struct ipv6_devstat { struct inet6_dev { struct net_device *dev; + netdevice_tracker dev_tracker; struct list_head addr_list; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b06c2d02ec84..4ad47d9f9d27 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -289,7 +289,7 @@ static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) { /* The below has to be done to allow calling inet_csk_destroy_sock */ sock_set_flag(sk, SOCK_DEAD); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(*sk->sk_prot->orphan_count); } void inet_csk_destroy_sock(struct sock *sk); @@ -304,7 +304,7 @@ static inline __poll_t inet_csk_listen_poll(const struct sock *sk) (EPOLLIN | EPOLLRDNORM) : 0; } -int inet_csk_listen_start(struct sock *sk, int backlog); +int inet_csk_listen_start(struct sock *sk); void inet_csk_listen_stop(struct sock *sk); void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index ba77f47ef61e..ea32393464a2 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -188,6 +188,23 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) return 0; } +static inline int skb_get_dsfield(struct sk_buff *skb) +{ + switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + break; + return ipv4_get_dsfield(ip_hdr(skb)); + + case cpu_to_be16(ETH_P_IPV6): + if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) + break; + return ipv6_get_dsfield(ipv6_hdr(skb)); + } + + return -1; +} + static inline int INET_ECN_set_ect1(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 48cc5795ceda..63540be0fc34 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -117,8 +117,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); static inline void fqdir_pre_exit(struct fqdir *fqdir) { - fqdir->high_thresh = 0; /* prevent creation of new frags */ - fqdir->dead = true; + /* Prevent creation of new frags. + * Pairs with READ_ONCE() in inet_frag_find(). + */ + WRITE_ONCE(fqdir->high_thresh, 0); + + /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() + * and ip6frag_expire_frag_queue(). + */ + WRITE_ONCE(fqdir->dead, true); } void fqdir_exit(struct fqdir *fqdir); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 89163ef8cf4b..234d70ae5f4c 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -207,11 +207,10 @@ struct inet_sock { __be32 inet_saddr; __s16 uc_ttl; __u16 cmsg_flags; + struct ip_options_rcu __rcu *inet_opt; __be16 inet_sport; __u16 inet_id; - struct ip_options_rcu __rcu *inet_opt; - int rx_dst_ifindex; __u8 tos; __u8 min_ttl; __u8 mc_ttl; @@ -373,4 +372,16 @@ static inline bool inet_can_nonlocal_bind(struct net *net, inet->freebind || inet->transparent; } +static inline bool inet_addr_valid_or_nonlocal(struct net *net, + struct inet_sock *inet, + __be32 addr, + int addr_type) +{ + return inet_can_nonlocal_bind(net, inet) || + addr == htonl(INADDR_ANY) || + addr_type == RTN_LOCAL || + addr_type == RTN_MULTICAST || + addr_type == RTN_BROADCAST; +} + #endif /* _INET_SOCK_H */ diff --git a/include/net/ioam6.h b/include/net/ioam6.h index 3c2993bc48c8..3f45ba37a2c6 100644 --- a/include/net/ioam6.h +++ b/include/net/ioam6.h @@ -56,7 +56,8 @@ static inline struct ioam6_pernet_data *ioam6_pernet(struct net *net) struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id); void ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, - struct ioam6_trace_hdr *trace); + struct ioam6_trace_hdr *trace, + bool is_input); int ioam6_init(void); void ioam6_exit(void); diff --git a/include/net/ip.h b/include/net/ip.h index 9192444f2964..b51bae43b0dd 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -24,6 +24,7 @@ #include <linux/skbuff.h> #include <linux/jhash.h> #include <linux/sockptr.h> +#include <linux/static_key.h> #include <net/inet_sock.h> #include <net/route.h> @@ -291,7 +292,11 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, #define NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) #define __NET_ADD_STATS(net, field, adnd) __SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) -u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct); +static inline u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offt) +{ + return *(((unsigned long *)per_cpu_ptr(mib, cpu)) + offt); +} + unsigned long snmp_fold_field(void __percpu *mib, int offt); #if BITS_PER_LONG==32 u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, @@ -520,19 +525,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); + /* We had many attacks based on IPID, use the private + * generator as much as we can. + */ + if (sk && inet_sk(sk)->inet_daddr) { + iph->id = htons(inet_sk(sk)->inet_id); + inet_sk(sk)->inet_id += segs; + return; + } if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { - /* This is only to work around buggy Windows95/2000 - * VJ compression implementations. If the ID field - * does not change, they drop every other packet in - * a TCP stream using header compression. - */ - if (sk && inet_sk(sk)->inet_daddr) { - iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += segs; - } else { - iph->id = 0; - } + iph->id = 0; } else { + /* Unfortunately we need the big hammer to get a suitable IPID */ __ip_select_ident(net, iph, segs); } } @@ -563,14 +567,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } -static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) -{ - const struct iphdr *iph = skb_gro_network_header(skb); - - return csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), proto, 0); -} - /* * Map a multicast IP onto multicast MAC for type ethernet. */ @@ -746,6 +742,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); +DECLARE_STATIC_KEY_FALSE(ip4_min_ttl); int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, @@ -785,5 +782,6 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val); void ip_sock_set_pktinfo(struct sock *sk); void ip_sock_set_recverr(struct sock *sk); void ip_sock_set_tos(struct sock *sk, int val); +void __ip_sock_set_tos(struct sock *sk, int val); #endif /* _IP_H */ diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index b3f4eaa88672..c8a96b888277 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -43,14 +43,6 @@ static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto) skb->len, proto, 0)); } -static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) -{ - const struct ipv6hdr *iph = skb_gro_network_header(skb); - - return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), proto, 0)); -} - static __inline__ __sum16 tcp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, @@ -65,15 +57,9 @@ static inline void __tcp_v6_send_check(struct sk_buff *skb, { struct tcphdr *th = tcp_hdr(skb); - if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - } else { - th->check = tcp_v6_check(skb->len, saddr, daddr, - csum_partial(th, th->doff << 2, - skb->csum)); - } + th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); } static inline void tcp_v6_gso_csum_prep(struct sk_buff *skb) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index c412dde4d67d..40ae8f1b18e5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -20,6 +20,7 @@ #include <net/inetpeer.h> #include <net/fib_notifier.h> #include <linux/indirect_call_wrapper.h> +#include <uapi/linux/bpf.h> #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 @@ -281,7 +282,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, fn = rcu_dereference(f6i->fib6_node); if (fn) { - *cookie = fn->fn_sernum; + *cookie = READ_ONCE(fn->fn_sernum); /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */ smp_rmb(); status = true; @@ -485,6 +486,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib6_nh_release(struct fib6_nh *fib6_nh); +void fib6_nh_release_dsts(struct fib6_nh *fib6_nh); int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5efd0b71dc67..ca2d6b60e1ec 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -263,19 +263,19 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); -static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) +static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) { - unsigned int mtu; - - struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + const struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; + const struct dst_entry *dst = skb_dst(skb); + unsigned int mtu; if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { - mtu = READ_ONCE(skb_dst(skb)->dev->mtu); - mtu -= lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); - } else - mtu = dst_mtu(skb_dst(skb)); - + mtu = READ_ONCE(dst->dev->mtu); + mtu -= lwtunnel_headroom(dst->lwtstate, mtu); + } else { + mtu = dst_mtu(dst); + } return mtu; } diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 028eaea1c854..a38c4f1e4e5c 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -46,6 +46,7 @@ struct __ip6_tnl_parm { struct ip6_tnl { struct ip6_tnl __rcu *next; /* next tunnel in list */ struct net_device *dev; /* virtual device associated with tunnel */ + netdevice_tracker dev_tracker; struct net *net; /* netns for packet i/o */ struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ab5348e57db1..c4297704bbcb 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -79,6 +79,7 @@ struct fnhe_hash_bucket { struct fib_nh_common { struct net_device *nhc_dev; + netdevice_tracker nhc_dev_tracker; int nhc_oif; unsigned char nhc_scope; u8 nhc_family; @@ -111,6 +112,7 @@ struct fib_nh { int nh_saddr_genid; #define fib_nh_family nh_common.nhc_family #define fib_nh_dev nh_common.nhc_dev +#define fib_nh_dev_tracker nh_common.nhc_dev_tracker #define fib_nh_oif nh_common.nhc_oif #define fib_nh_flags nh_common.nhc_flags #define fib_nh_lws nh_common.nhc_lwtstate @@ -438,7 +440,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { - return net->ipv4.fib_num_tclassid_users; + return atomic_read(&net->ipv4.fib_num_tclassid_users); } #else static inline int fib_num_tclassid_users(struct net *net) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index bc3b13ec93c9..0219fe907b26 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -104,7 +104,10 @@ struct metadata_dst; struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; + struct net_device *dev; + netdevice_tracker dev_tracker; + struct net *net; /* netns for packet i/o */ unsigned long err_time; /* Time when the last ICMP error diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 7cb5a1aace40..ff1804a0c469 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -931,6 +931,7 @@ struct netns_ipvs { int sysctl_conn_reuse_mode; int sysctl_schedule_icmp; int sysctl_ignore_tunneled; + int sysctl_run_estimation; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1071,6 +1072,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) return ipvs->sysctl_cache_bypass; } +static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_run_estimation; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -1163,6 +1169,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) return 0; } +static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) +{ + return 1; +} + #endif /* IPVS core functions diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f2d0ecc257bb..3afcb128e064 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -21,6 +21,8 @@ #include <net/snmp.h> #include <net/netns/hash.h> +struct ip_tunnel_info; + #define SIN6_LEN_RFC2133 24 #define IPV6_MAXPLEN 65535 @@ -345,9 +347,9 @@ struct ipcm6_cookie { struct sockcm_cookie sockc; __s16 hlimit; __s16 tclass; + __u16 gso_size; __s8 dontfrag; struct ipv6_txoptions *opt; - __u16 gso_size; }; static inline void ipcm6_init(struct ipcm6_cookie *ipc6) @@ -1092,6 +1094,7 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, /* * socket options (ipv6_sockglue.c) */ +DECLARE_STATIC_KEY_FALSE(ip6_min_hopcount); int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 851029ecff13..0a4779175a52 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) struct sk_buff *head; rcu_read_lock(); - if (fq->q.fqdir->dead) + /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(fq->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&fq->q.lock); diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index afbce90c4480..45e0339be6fa 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -47,6 +47,7 @@ struct ipv6_stub { struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void (*fib6_nh_release)(struct fib6_nh *fib6_nh); + void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh); void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt); int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify); void (*fib6_rt_update)(struct net *net, struct fib6_info *rt, diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index ff06246dbbb9..df85d19fbf84 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -112,10 +112,12 @@ enum iucv_tx_notify { struct iucv_sock { struct sock sk; - char src_user_id[8]; - char src_name[8]; - char dst_user_id[8]; - char dst_name[8]; + struct_group(init, + char src_user_id[8]; + char src_name[8]; + char dst_user_id[8]; + char dst_name[8]; + ); struct list_head accept_q; spinlock_t accept_q_lock; struct sock *parent; diff --git a/include/net/llc.h b/include/net/llc.h index df282d9b4017..e250dca03963 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -72,7 +72,9 @@ struct llc_sap { static inline struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex) { - return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES]; + u32 bucket = hash_32(ifindex, LLC_SK_DEV_HASH_BITS); + + return &sap->sk_dev_hash[bucket]; } static inline @@ -133,7 +135,7 @@ static inline void llc_sap_put(struct llc_sap *sap) struct llc_sap *llc_sap_find(unsigned char sap_value); int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb, - unsigned char *dmac, unsigned char dsap); + const unsigned char *dmac, unsigned char dsap); void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb); void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb); diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index ea985aa7a6c5..2c1ea3414640 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -38,6 +38,7 @@ struct llc_sock { struct llc_addr laddr; /* lsap/mac pair */ struct llc_addr daddr; /* dsap/mac pair */ struct net_device *dev; /* device to send to remote */ + netdevice_tracker dev_tracker; u32 copied_seq; /* head of yet unread data */ u8 retry_count; /* number of retries */ u8 ack_must_be_send; diff --git a/include/net/llc_if.h b/include/net/llc_if.h index 8d5c543cd620..c72570a21a4f 100644 --- a/include/net/llc_if.h +++ b/include/net/llc_if.h @@ -62,7 +62,8 @@ #define LLC_STATUS_CONFLICT 7 /* disconnect conn */ #define LLC_STATUS_RESET_DONE 8 /* */ -int llc_establish_connection(struct sock *sk, u8 *lmac, u8 *dmac, u8 dsap); +int llc_establish_connection(struct sock *sk, const u8 *lmac, u8 *dmac, + u8 dsap); int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb); int llc_send_disc(struct sock *sk); #endif /* LLC_IF_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 618d1f427cb2..c50221d7e82c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -632,6 +632,10 @@ struct ieee80211_fils_discovery { * @s1g: BSS is S1G BSS (affects Association Request format). * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed * to driver when rate control is offloaded to firmware. + * @power_type: power type of BSS for 6 GHz + * @tx_pwr_env: transmit power envelope array of BSS. + * @tx_pwr_env_num: number of @tx_pwr_env. + * @pwr_reduction: power constraint of BSS. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -702,6 +706,10 @@ struct ieee80211_bss_conf { u32 unsol_bcast_probe_resp_interval; bool s1g; struct cfg80211_bitrate_mask beacon_tx_rate; + enum ieee80211_ap_reg_power power_type; + struct ieee80211_tx_pwr_env tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; + u8 tx_pwr_env_num; + u8 pwr_reduction; }; /** @@ -1197,12 +1205,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) /* clear the rate counts */ for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) info->status.rates[i].count = 0; - - BUILD_BUG_ON( - offsetof(struct ieee80211_tx_info, status.ack_signal) != 20); - memset(&info->status.ampdu_ack_len, 0, - sizeof(struct ieee80211_tx_info) - - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); + memset_after(&info->status, 0, rates); } @@ -1715,6 +1718,7 @@ enum ieee80211_offload_flags { * write-protected by sdata_lock and local->mtx so holding either is fine * for read access. * @color_change_color: the bss color that will be used after the change. + * @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled. */ struct ieee80211_vif { enum nl80211_iftype type; @@ -1746,6 +1750,8 @@ struct ieee80211_vif { bool color_change_active; u8 color_change_color; + struct ieee80211_vif *mbssid_tx_vif; + /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; @@ -3933,6 +3939,16 @@ struct ieee80211_prep_tx_info { * twt structure. * @twt_teardown_request: Update the hw with TWT teardown request received * from the peer. + * @set_radar_background: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Background radar/CAC detection allows to avoid the CAC downtime + * switching to a different channel during CAC detection on the selected + * radar channel. + * The caller is expected to set chandef pointer to NULL in order to + * disable background CAC/radar detection. + * @net_fill_forward_path: Called from .ndo_fill_forward_path in order to + * resolve a path for hardware flow offloading */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4261,6 +4277,13 @@ struct ieee80211_ops { struct ieee80211_twt_setup *twt); void (*twt_teardown_request)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, u8 flowid); + int (*set_radar_background)(struct ieee80211_hw *hw, + struct cfg80211_chan_def *chandef); + int (*net_fill_forward_path)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct net_device_path_ctx *ctx, + struct net_device_path *path); }; /** @@ -5591,6 +5614,9 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, * This function iterates over the interfaces associated with a given * hardware that are currently active and calls the callback for them. * This version can only be used while holding the wiphy mutex. + * The driver must not call this with a lock held that it can also take in + * response to callbacks from mac80211, and it must not call this within + * callbacks made by mac80211 - both would result in deadlocks. * * @hw: the hardware struct of which the interfaces should be iterated over * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags @@ -5605,6 +5631,24 @@ void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw, void *data); /** + * ieee80211_iterate_stations - iterate stations + * + * This function iterates over all stations associated with a given + * hardware that are currently uploaded to the driver and calls the callback + * function for them. + * This function allows the iterator function to sleep, when the iterator + * function is atomic @ieee80211_iterate_stations_atomic can be used. + * + * @hw: the hardware struct of which the interfaces should be iterated over + * @iterator: the iterator function to call, cannot sleep + * @data: first argument of the iterator function + */ +void ieee80211_iterate_stations(struct ieee80211_hw *hw, + void (*iterator)(void *data, + struct ieee80211_sta *sta), + void *data); + +/** * ieee80211_iterate_stations_atomic - iterate stations * * This function iterates over all stations associated with a given @@ -6053,6 +6097,18 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw); void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); /** + * ieee80211_channel_switch_disconnect - disconnect due to channel switch error + * @vif &struct ieee80211_vif pointer from the add_interface callback. + * @block_tx: if %true, do not send deauth frame. + * + * Instruct mac80211 to disconnect due to a channel switch error. The channel + * switch can request to block the tx and so, we need to make sure we do not send + * a deauth frame in this case. + */ +void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, + bool block_tx); + +/** * ieee80211_request_smps - request SM PS transition * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @smps_mode: new SM PS mode diff --git a/include/net/mctp.h b/include/net/mctp.h index ffd2c23bd76d..7e35ec79b909 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -11,6 +11,7 @@ #include <linux/bits.h> #include <linux/mctp.h> +#include <linux/netdevice.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -58,39 +59,53 @@ struct mctp_sock { mctp_eid_t bind_addr; __u8 bind_type; + /* sendmsg()/recvmsg() uses struct sockaddr_mctp_ext */ + bool addr_ext; + /* list of mctp_sk_key, for incoming tag lookup. updates protected * by sk->net->keys_lock */ struct hlist_head keys; + + /* mechanism for expiring allocated keys; will release an allocated + * tag, and any netdev state for a request/response pairing + */ + struct timer_list key_expiry; }; /* Key for matching incoming packets to sockets or reassembly contexts. * Packets are matched on (src,dest,tag). * - * Lifetime requirements: + * Lifetime / locking requirements: + * + * - individual key data (ie, the struct itself) is protected by key->lock; + * changes must be made with that lock held. + * + * - the lookup fields: peer_addr, local_addr and tag are set before the + * key is added to lookup lists, and never updated. * - * - keys are free()ed via RCU + * - A ref to the key must be held (throuh key->refs) if a pointer to the + * key is to be accessed after key->lock is released. * * - a mctp_sk_key contains a reference to a struct sock; this is valid * for the life of the key. On sock destruction (through unhash), the key is - * removed from lists (see below), and will not be observable after a RCU - * grace period. - * - * any RX occurring within that grace period may still queue to the socket, - * but will hit the SOCK_DEAD case before the socket is freed. + * removed from lists (see below), and marked invalid. * * - these mctp_sk_keys appear on two lists: * 1) the struct mctp_sock->keys list * 2) the struct netns_mctp->keys list * - * updates to either list are performed under the netns_mctp->keys - * lock. + * presences on these lists requires a (single) refcount to be held; both + * lists are updated as a single operation. + * + * Updates and lookups in either list are performed under the + * netns_mctp->keys lock. Lookup functions will need to lock the key and + * take a reference before unlocking the keys_lock. Consequently, the list's + * keys_lock *cannot* be acquired with the individual key->lock held. * * - a key may have a sk_buff attached as part of an in-progress message - * reassembly (->reasm_head). The reassembly context is protected by - * reasm_lock, which may be acquired with the keys lock (above) held, if - * necessary. Consequently, keys lock *cannot* be acquired with the - * reasm_lock held. + * reassembly (->reasm_head). The reasm data is protected by the individual + * key->lock. * * - there are two destruction paths for a mctp_sk_key: * @@ -101,6 +116,8 @@ struct mctp_sock { * the (complete) reply, or during reassembly errors. Here, we clean up * the reassembly context (marking reasm_dead, to prevent another from * starting), and remove the socket from the netns & socket lists. + * + * - through an expiry timeout, on a per-socket timer */ struct mctp_sk_key { mctp_eid_t peer_addr; @@ -116,20 +133,40 @@ struct mctp_sk_key { /* per-socket list */ struct hlist_node sklist; + /* lock protects against concurrent updates to the reassembly and + * expiry data below. + */ + spinlock_t lock; + + /* Keys are referenced during the output path, which may sleep */ + refcount_t refs; + /* incoming fragment reassembly context */ - spinlock_t reasm_lock; struct sk_buff *reasm_head; struct sk_buff **reasm_tailp; bool reasm_dead; u8 last_seq; - struct rcu_head rcu; + /* key validity */ + bool valid; + + /* expiry timeout; valid (above) cleared on expiry */ + unsigned long expiry; + + /* free to use for device flow state tracking. Initialised to + * zero on initial key creation + */ + unsigned long dev_flow_state; + struct mctp_dev *dev; }; struct mctp_skb_cb { unsigned int magic; unsigned int net; + int ifindex; /* extended/direct addressing if set */ mctp_eid_t src; + unsigned char halen; + unsigned char haddr[MAX_ADDR_LEN]; }; /* skb control-block accessors with a little extra debugging for initial @@ -153,10 +190,18 @@ static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb) { struct mctp_skb_cb *cb = (void *)skb->cb; + BUILD_BUG_ON(sizeof(struct mctp_skb_cb) > sizeof(skb->cb)); WARN_ON(cb->magic != 0x4d435450); return (void *)(skb->cb); } +/* If CONFIG_MCTP_FLOWS, we may add one of these as a SKB extension, + * indicating the flow to the device driver. + */ +struct mctp_flow { + struct mctp_sk_key *key; +}; + /* Route definition. * * These are held in the pernet->mctp.routes list, with RCU protection for @@ -165,8 +210,7 @@ static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb) * * Updates to the route table are performed under rtnl; all reads under RCU, * so routes cannot be referenced over a RCU grace period. Specifically: A - * caller cannot block between mctp_route_lookup and passing the route to - * mctp_do_route. + * caller cannot block between mctp_route_lookup and mctp_route_release() */ struct mctp_route { mctp_eid_t min, max; @@ -186,11 +230,11 @@ struct mctp_route { struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, mctp_eid_t daddr); -int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb); - int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); +void mctp_key_unref(struct mctp_sk_key *key); + /* routing <--> device interface */ unsigned int mctp_default_net(struct net *net); int mctp_default_net_set(struct net *net, unsigned int index); diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h index 71a11012fac7..5c0d04b5c12c 100644 --- a/include/net/mctpdevice.h +++ b/include/net/mctpdevice.h @@ -14,11 +14,17 @@ #include <linux/types.h> #include <linux/refcount.h> +struct mctp_sk_key; + struct mctp_dev { struct net_device *dev; + refcount_t refs; + unsigned int net; + const struct mctp_netdev_ops *ops; + /* Only modified under RTNL. Reads have addrs_lock held */ u8 *addrs; size_t num_addrs; @@ -27,9 +33,24 @@ struct mctp_dev { struct rcu_head rcu; }; +struct mctp_netdev_ops { + void (*release_flow)(struct mctp_dev *dev, + struct mctp_sk_key *key); +}; + #define MCTP_INITIAL_DEFAULT_NET 1 struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev); struct mctp_dev *__mctp_dev_get(const struct net_device *dev); +int mctp_register_netdev(struct net_device *dev, + const struct mctp_netdev_ops *ops); +void mctp_unregister_netdev(struct net_device *dev); + +void mctp_dev_hold(struct mctp_dev *mdev); +void mctp_dev_put(struct mctp_dev *mdev); + +void mctp_dev_set_key(struct mctp_dev *dev, struct mctp_sk_key *key); +void mctp_dev_release_key(struct mctp_dev *dev, struct mctp_sk_key *key); + #endif /* __NET_MCTPDEVICE_H */ diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 3214848402ec..a925349b4b89 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -12,6 +12,8 @@ #include <linux/tcp.h> #include <linux/types.h> +struct mptcp_info; +struct mptcp_sock; struct seq_file; /* MPTCP sk_buff extension data */ @@ -125,6 +127,8 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts); +void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info); + /* move the skb extension owership, with the assumption that 'to' is * newly allocated */ diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 38e4094960ce..53cb8de0e589 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -137,7 +137,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, u8 *opt, int opt_len, struct ndisc_options *ndopts); -void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, +void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, int data_len, int pad); #define NDISC_OPS_REDIRECT_DATA_SPACE 2 @@ -411,13 +411,7 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref(dev, pkey); - if (n) { - unsigned long now = jiffies; - - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); - } + neigh_confirm(n); rcu_read_unlock_bh(); } @@ -428,13 +422,7 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref_stub(dev, pkey); - if (n) { - unsigned long now = jiffies; - - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); - } + neigh_confirm(n); rcu_read_unlock_bh(); } diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 22ced1381ede..87419f7f5421 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -70,6 +70,7 @@ enum { struct neigh_parms { possible_net_t net; struct net_device *dev; + netdevice_tracker dev_tracker; struct list_head list; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; @@ -144,19 +145,21 @@ struct neighbour { struct timer_list timer; unsigned long used; atomic_t probes; - __u8 flags; - __u8 nud_state; - __u8 type; - __u8 dead; + u8 nud_state; + u8 type; + u8 dead; u8 protocol; + u32 flags; seqlock_t ha_lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8); struct hh_cache hh; int (*output)(struct neighbour *, struct sk_buff *); const struct neigh_ops *ops; struct list_head gc_list; + struct list_head managed_list; struct rcu_head rcu; struct net_device *dev; + netdevice_tracker dev_tracker; u8 primary_key[0]; } __randomize_layout; @@ -172,7 +175,8 @@ struct pneigh_entry { struct pneigh_entry *next; possible_net_t net; struct net_device *dev; - u8 flags; + netdevice_tracker dev_tracker; + u32 flags; u8 protocol; u8 key[]; }; @@ -216,11 +220,13 @@ struct neigh_table { int gc_thresh3; unsigned long last_flush; struct delayed_work gc_work; + struct delayed_work managed_work; struct timer_list proxy_timer; struct sk_buff_head proxy_queue; atomic_t entries; atomic_t gc_entries; struct list_head gc_list; + struct list_head managed_list; rwlock_t lock; unsigned long last_rand; struct neigh_statistics __percpu *stats; @@ -250,12 +256,21 @@ static inline void *neighbour_priv(const struct neighbour *n) } /* flags for neigh_update() */ -#define NEIGH_UPDATE_F_OVERRIDE 0x00000001 -#define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 -#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 -#define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 -#define NEIGH_UPDATE_F_ISROUTER 0x40000000 -#define NEIGH_UPDATE_F_ADMIN 0x80000000 +#define NEIGH_UPDATE_F_OVERRIDE BIT(0) +#define NEIGH_UPDATE_F_WEAK_OVERRIDE BIT(1) +#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER BIT(2) +#define NEIGH_UPDATE_F_USE BIT(3) +#define NEIGH_UPDATE_F_MANAGED BIT(4) +#define NEIGH_UPDATE_F_EXT_LEARNED BIT(5) +#define NEIGH_UPDATE_F_ISROUTER BIT(6) +#define NEIGH_UPDATE_F_ADMIN BIT(7) + +/* In-kernel representation for NDA_FLAGS_EXT flags: */ +#define NTF_OLD_MASK 0xff +#define NTF_EXT_SHIFT 8 +#define NTF_EXT_MASK (NTF_EXT_MANAGED) + +#define NTF_MANAGED (NTF_EXT_MANAGED << NTF_EXT_SHIFT) extern const struct nla_policy nda_policy[]; @@ -309,6 +324,17 @@ static inline struct neighbour *__neigh_lookup_noref(struct neigh_table *tbl, return ___neigh_lookup_noref(tbl, tbl->key_eq, tbl->hash, pkey, dev); } +static inline void neigh_confirm(struct neighbour *n) +{ + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); + } +} + void neigh_table_init(int index, struct neigh_table *tbl); int neigh_table_clear(int index, struct neigh_table *tbl); struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, @@ -324,7 +350,8 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl, return __neigh_create(tbl, pkey, dev, true); } void neigh_destroy(struct neighbour *neigh); -int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); +int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, + const bool immediate_ok); int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); @@ -434,17 +461,24 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh) #define neigh_hold(n) refcount_inc(&(n)->refcnt) -static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +static __always_inline int neigh_event_send_probe(struct neighbour *neigh, + struct sk_buff *skb, + const bool immediate_ok) { unsigned long now = jiffies; - + if (READ_ONCE(neigh->used) != now) WRITE_ONCE(neigh->used, now); - if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) - return __neigh_event_send(neigh, skb); + if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))) + return __neigh_event_send(neigh, skb, immediate_ok); return 0; } +static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +{ + return neigh_event_send_probe(neigh, skb, true); +} + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) { @@ -504,10 +538,15 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, { const struct hh_cache *hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache) + /* n->nud_state and hh->hh_len could be changed under us. + * neigh_hh_output() is taking care of the race later. + */ + if (!skip_cache && + (READ_ONCE(n->nud_state) & NUD_CONNECTED) && + READ_ONCE(hh->hh_len)) return neigh_hh_output(hh, skb); - else - return n->output(n, skb); + + return n->output(n, skb); } static inline struct neighbour * diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bb5fa5914032..5b61c462e534 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -34,6 +34,7 @@ #include <net/netns/smc.h> #include <net/netns/bpf.h> #include <net/netns/mctp.h> +#include <net/net_trackers.h> #include <linux/ns_common.h> #include <linux/idr.h> #include <linux/skbuff.h> @@ -87,6 +88,7 @@ struct net { struct idr netns_ids; struct ns_common ns; + struct ref_tracker_dir refcnt_tracker; struct list_head dev_base_head; struct proc_dir_entry *proc_net; @@ -240,6 +242,7 @@ void ipx_unregister_sysctl(void); #ifdef CONFIG_NET_NS void __put_net(struct net *net); +/* Try using get_net_track() instead */ static inline struct net *get_net(struct net *net) { refcount_inc(&net->ns.count); @@ -258,6 +261,7 @@ static inline struct net *maybe_get_net(struct net *net) return net; } +/* Try using put_net_track() instead */ static inline void put_net(struct net *net) { if (refcount_dec_and_test(&net->ns.count)) @@ -308,6 +312,36 @@ static inline int check_net(const struct net *net) #endif +static inline void netns_tracker_alloc(struct net *net, + netns_tracker *tracker, gfp_t gfp) +{ +#ifdef CONFIG_NET_NS_REFCNT_TRACKER + ref_tracker_alloc(&net->refcnt_tracker, tracker, gfp); +#endif +} + +static inline void netns_tracker_free(struct net *net, + netns_tracker *tracker) +{ +#ifdef CONFIG_NET_NS_REFCNT_TRACKER + ref_tracker_free(&net->refcnt_tracker, tracker); +#endif +} + +static inline struct net *get_net_track(struct net *net, + netns_tracker *tracker, gfp_t gfp) +{ + get_net(net); + netns_tracker_alloc(net, tracker, gfp); + return net; +} + +static inline void put_net_track(struct net *net, netns_tracker *tracker) +{ + netns_tracker_free(net, tracker); + put_net(net); +} + typedef struct { #ifdef CONFIG_NET_NS struct net *net; diff --git a/include/net/net_trackers.h b/include/net/net_trackers.h new file mode 100644 index 000000000000..d94c76cf15a9 --- /dev/null +++ b/include/net/net_trackers.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_NET_TRACKERS_H +#define __NET_NET_TRACKERS_H +#include <linux/ref_tracker.h> + +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +typedef struct ref_tracker *netdevice_tracker; +#else +typedef struct {} netdevice_tracker; +#endif + +#ifdef CONFIG_NET_NS_REFCNT_TRACKER +typedef struct ref_tracker *netns_tracker; +#else +typedef struct {} netns_tracker; +#endif + +#endif /* __NET_NET_TRACKERS_H */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cc663c68ddc4..8731d5bcb47d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -76,6 +76,8 @@ struct nf_conn { * Hint, SKB address this struct and refcnt via skb->_nfct and * helpers nf_conntrack_get() and nf_conntrack_put(). * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, + * except that the latter uses internal indirection and does not + * result in a conntrack module dependency. * beware nf_ct_get() is different and don't inc refcnt. */ struct nf_conntrack ct_general; @@ -95,6 +97,7 @@ struct nf_conn { unsigned long status; u16 cpu; + u16 local_origin:1; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) @@ -169,11 +172,13 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) return (struct nf_conn *)(nfct & NFCT_PTRMASK); } +void nf_ct_destroy(struct nf_conntrack *nfct); + /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { - WARN_ON(!ct); - nf_conntrack_put(&ct->ct_general); + if (ct && refcount_dec_and_test(&ct->ct_general.use)) + nf_ct_destroy(&ct->ct_general); } /* Protocol module loading */ @@ -276,14 +281,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { - s32 timeout = ct->timeout - nfct_time_stamp; + s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; - return timeout > 0 ? timeout : 0; + return max(timeout, 0); } static inline bool nf_ct_is_expired(const struct nf_conn *ct) { - return (__s32)(ct->timeout - nfct_time_stamp) <= 0; + return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0; } /* use after obtaining a reference count */ @@ -302,7 +307,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) static inline void nf_ct_offload_timeout(struct nf_conn *ct) { if (nf_ct_expires(ct) < NF_CT_DAY / 2) - ct->timeout = nfct_time_stamp + NF_CT_DAY; + WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); } struct kernel_param; diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h new file mode 100644 index 000000000000..078d3c52c03f --- /dev/null +++ b/include/net/netfilter/nf_conntrack_act_ct.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NF_CONNTRACK_ACT_CT_H +#define _NF_CONNTRACK_ACT_CT_H + +#include <net/netfilter/nf_conntrack.h> +#include <linux/netfilter/nf_conntrack_common.h> +#include <net/netfilter/nf_conntrack_extend.h> + +struct nf_conn_act_ct_ext { + int ifindex[IP_CT_DIR_MAX]; +}; + +static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + return nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); +#else + return NULL; +#endif +} + +static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); + + if (act_ct) + return act_ct; + + act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC); + return act_ct; +#else + return NULL; +#endif +} + +static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(ct); + if (dev_net(skb->dev) == &init_net && act_ct_ext) + act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; +#endif +} + +#endif /* _NF_CONNTRACK_ACT_CT_H */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index e1e588387103..c7515d82ab06 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -28,6 +28,9 @@ enum nf_ct_ext_id { #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) NF_CT_EXT_SYNPROXY, #endif +#if IS_ENABLED(CONFIG_NET_ACT_CT) + NF_CT_EXT_ACT_CT, +#endif NF_CT_EXT_NUM, }; @@ -40,6 +43,7 @@ enum nf_ct_ext_id { #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels #define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy +#define NF_CT_EXT_ACT_CT_TYPE struct nf_conn_act_ct_ext /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a16171c5fd9e..eaf55da9a205 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -21,13 +21,19 @@ struct module; #define NFT_JUMP_STACK_SIZE 16 +enum { + NFT_PKTINFO_L4PROTO = (1 << 0), + NFT_PKTINFO_INNER = (1 << 1), +}; + struct nft_pktinfo { struct sk_buff *skb; const struct nf_hook_state *state; - bool tprot_set; + u8 flags; u8 tprot; u16 fragoff; unsigned int thoff; + unsigned int inneroff; }; static inline struct sock *nft_sk(const struct nft_pktinfo *pkt) @@ -75,7 +81,7 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt) { - pkt->tprot_set = false; + pkt->flags = 0; pkt->tprot = 0; pkt->thoff = 0; pkt->fragoff = 0; @@ -99,6 +105,8 @@ struct nft_data { }; } __attribute__((aligned(__alignof__(u64)))); +#define NFT_REG32_NUM 20 + /** * struct nft_regs - nf_tables register set * @@ -109,11 +117,21 @@ struct nft_data { */ struct nft_regs { union { - u32 data[20]; + u32 data[NFT_REG32_NUM]; struct nft_verdict verdict; }; }; +struct nft_regs_track { + struct { + const struct nft_expr *selector; + const struct nft_expr *bitwise; + } regs[NFT_REG32_NUM]; + + const struct nft_expr *cur; + const struct nft_expr *last; +}; + /* Store/load an u8, u16 or u64 integer to/from the u32 data register. * * Note, when using concatenations, register allocation happens at 32-bit @@ -340,6 +358,8 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr); +bool nft_expr_reduce_bitwise(struct nft_regs_track *track, + const struct nft_expr *expr); struct nft_set_ext; @@ -878,6 +898,8 @@ struct nft_expr_ops { int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); + bool (*reduce)(struct nft_regs_track *track, + const struct nft_expr *expr); bool (*gc)(struct net *net, const struct nft_expr *expr); int (*offload)(struct nft_offload_ctx *ctx, @@ -968,6 +990,20 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, #define NFT_CHAIN_POLICY_UNSET U8_MAX +struct nft_rule_dp { + u64 is_last:1, + dlen:12, + handle:42; /* for tracing */ + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +struct nft_rule_blob { + unsigned long size; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_rule_dp)))); +}; + /** * struct nft_chain - nf_tables chain * @@ -981,8 +1017,8 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, * @name: name of the chain */ struct nft_chain { - struct nft_rule *__rcu *rules_gen_0; - struct nft_rule *__rcu *rules_gen_1; + struct nft_rule_blob __rcu *blob_gen_0; + struct nft_rule_blob __rcu *blob_gen_1; struct list_head rules; struct list_head list; struct rhlist_head rhlhead; @@ -997,7 +1033,7 @@ struct nft_chain { u8 *udata; /* Only used during control plane commit phase: */ - struct nft_rule **rules_next; + struct nft_rule_blob *blob_next; }; int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); @@ -1315,7 +1351,7 @@ struct nft_traceinfo { const struct nft_pktinfo *pkt; const struct nft_base_chain *basechain; const struct nft_chain *chain; - const struct nft_rule *rule; + const struct nft_rule_dp *rule; const struct nft_verdict *verdict; enum nft_trace_types type; bool packet_dumped; diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 0fa5a6d98a00..b6fb1fdff9b2 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -7,6 +7,7 @@ extern struct nft_expr_type nft_imm_type; extern struct nft_expr_type nft_cmp_type; +extern struct nft_expr_type nft_counter_type; extern struct nft_expr_type nft_lookup_type; extern struct nft_expr_type nft_bitwise_type; extern struct nft_expr_type nft_byteorder_type; @@ -21,6 +22,7 @@ extern struct nft_expr_type nft_last_type; #ifdef CONFIG_NETWORK_SECMARK extern struct nft_object_type nft_secmark_obj_type; #endif +extern struct nft_object_type nft_counter_obj_type; int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); @@ -120,6 +122,8 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext); +void nft_counter_init_seqcount(void); + struct nft_expr; struct nft_regs; struct nft_pktinfo; @@ -143,4 +147,6 @@ void nft_dynset_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_rt_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index eb4c094cd54d..c4a6147b0ef8 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -10,7 +10,7 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt) struct iphdr *ip; ip = ip_hdr(pkt->skb); - pkt->tprot_set = true; + pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = ip->protocol; pkt->thoff = ip_hdrlen(pkt->skb); pkt->fragoff = ntohs(ip->frag_off) & IP_OFFSET; @@ -36,7 +36,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) else if (len < thoff) return -1; - pkt->tprot_set = true; + pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = iph->protocol; pkt->thoff = thoff; pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; @@ -71,7 +71,7 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt) goto inhdr_error; } - pkt->tprot_set = true; + pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = iph->protocol; pkt->thoff = thoff; pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; @@ -82,4 +82,5 @@ inhdr_error: __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INHDRERRORS); return -1; } + #endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 7595e02b00ba..ec7eaeaf4f04 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -18,7 +18,7 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt) return; } - pkt->tprot_set = true; + pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; pkt->thoff = thoff; pkt->fragoff = frag_off; @@ -50,7 +50,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) if (protohdr < 0) return -1; - pkt->tprot_set = true; + pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; pkt->thoff = thoff; pkt->fragoff = frag_off; @@ -96,7 +96,7 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) if (protohdr < 0) goto inhdr_error; - pkt->tprot_set = true; + pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; pkt->thoff = thoff; pkt->fragoff = frag_off; diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h index 832ab69efda5..4c3809e141f4 100644 --- a/include/net/netfilter/xt_rateest.h +++ b/include/net/netfilter/xt_rateest.h @@ -6,7 +6,7 @@ struct xt_rateest { /* keep lock and bstats on same cache line to speedup xt_rateest_tg() */ - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; spinlock_t lock; diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h index 0ca6a1b87185..2c01a278d1eb 100644 --- a/include/net/netns/bpf.h +++ b/include/net/netns/bpf.h @@ -6,11 +6,18 @@ #ifndef __NETNS_BPF_H__ #define __NETNS_BPF_H__ -#include <linux/bpf-netns.h> +#include <linux/list.h> struct bpf_prog; struct bpf_prog_array; +enum netns_bpf_attach_type { + NETNS_BPF_INVALID = -1, + NETNS_BPF_FLOW_DISSECTOR = 0, + NETNS_BPF_SK_LOOKUP, + MAX_NETNS_BPF_ATTACH_TYPE +}; + struct netns_bpf { /* Array of programs to run compiled from progs or links */ struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 36c2d998a43c..552bc25b1933 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -12,7 +12,6 @@ struct netns_core { int sysctl_somaxconn; #ifdef CONFIG_PROC_FS - int __percpu *sock_inuse; struct prot_inuse __percpu *prot_inuse; #endif }; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2f65701a43c9..78557643526e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -65,7 +65,7 @@ struct netns_ipv4 { bool fib_has_custom_local_routes; bool fib_offload_disabled; #ifdef CONFIG_IP_ROUTE_CLASSID - int fib_num_tclassid_users; + atomic_t fib_num_tclassid_users; #endif struct hlist_head *fib_table_hash; struct sock *fibnl; @@ -85,6 +85,9 @@ struct netns_ipv4 { int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; + u32 ip_rt_min_pmtu; + int ip_rt_mtu_expires; + struct local_ports ip_local_ports; u8 sysctl_tcp_ecn; diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index a964daedc17b..ea8595651c38 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -30,6 +30,7 @@ enum nci_flag { NCI_UP, NCI_DATA_EXCHANGE, NCI_DATA_EXCHANGE_TO, + NCI_UNREG, }; /* NCI device states */ diff --git a/include/net/nl802154.h b/include/net/nl802154.h index ddcee128f5d9..145acb8f2509 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -19,6 +19,8 @@ * */ +#include <linux/types.h> + #define NL802154_GENL_NAME "nl802154" enum nl802154_commands { @@ -150,10 +152,9 @@ enum nl802154_attrs { }; enum nl802154_iftype { - /* for backwards compatibility TODO */ - NL802154_IFTYPE_UNSPEC = -1, + NL802154_IFTYPE_UNSPEC = (~(__u32)0), - NL802154_IFTYPE_NODE, + NL802154_IFTYPE_NODE = 0, NL802154_IFTYPE_MONITOR, NL802154_IFTYPE_COORD, diff --git a/include/net/page_pool.h b/include/net/page_pool.h index a4082406a003..79a805542d0f 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -80,6 +80,8 @@ struct page_pool_params { enum dma_data_direction dma_dir; /* DMA mapping direction */ unsigned int max_len; /* max DMA sync memory size */ unsigned int offset; /* DMA addr offset */ + void (*init_callback)(struct page *page, void *arg); + void *init_arg; }; struct page_pool { @@ -94,6 +96,7 @@ struct page_pool { unsigned int frag_offset; struct page *frag_page; long frag_users; + u32 xdp_mem_id; /* * Data structure for allocation side @@ -168,9 +171,12 @@ bool page_pool_return_skb_page(struct page *page); struct page_pool *page_pool_create(const struct page_pool_params *params); +struct xdp_mem_info; + #ifdef CONFIG_PAGE_POOL void page_pool_destroy(struct page_pool *pool); -void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), + struct xdp_mem_info *mem); void page_pool_release_page(struct page_pool *pool, struct page *page); void page_pool_put_page_bulk(struct page_pool *pool, void **data, int count); @@ -180,7 +186,8 @@ static inline void page_pool_destroy(struct page_pool *pool) } static inline void page_pool_use_xdp_mem(struct page_pool *pool, - void (*disconnect)(void *)) + void (*disconnect)(void *), + struct xdp_mem_info *mem) { } static inline void page_pool_release_page(struct page_pool *pool, diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 83a6d0792180..676cb8ea9e15 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -202,7 +202,8 @@ struct tcf_exts { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ int nr_actions; struct tc_action **actions; - struct net *net; + struct net *net; + netns_tracker ns_tracker; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -217,6 +218,9 @@ static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net, #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; + /* Note: we do not own yet a reference on net. + * This reference might be taken later from tcf_exts_get_net(). + */ exts->net = net; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); @@ -236,6 +240,8 @@ static inline bool tcf_exts_get_net(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT exts->net = maybe_get_net(exts->net); + if (exts->net) + netns_tracker_alloc(exts->net, &exts->ns_tracker, GFP_KERNEL); return exts->net != NULL; #else return true; @@ -246,7 +252,7 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT if (exts->net) - put_net(exts->net); + put_net_track(exts->net, &exts->ns_tracker); #endif } @@ -258,26 +264,31 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) for (; 0; (void)(i), (void)(a), (void)(exts)) #endif +#define tcf_act_for_each_action(i, a, actions) \ + for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++) + static inline void -tcf_exts_stats_update(const struct tcf_exts *exts, - u64 bytes, u64 packets, u64 drops, u64 lastuse, - u8 used_hw_stats, bool used_hw_stats_valid) +tcf_exts_hw_stats_update(const struct tcf_exts *exts, + u64 bytes, u64 packets, u64 drops, u64 lastuse, + u8 used_hw_stats, bool used_hw_stats_valid) { #ifdef CONFIG_NET_CLS_ACT int i; - preempt_disable(); - for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - tcf_action_stats_update(a, bytes, packets, drops, - lastuse, true); - a->used_hw_stats = used_hw_stats; - a->used_hw_stats_valid = used_hw_stats_valid; - } + /* if stats from hw, just skip */ + if (tcf_action_update_hw_stats(a)) { + preempt_disable(); + tcf_action_stats_update(a, bytes, packets, drops, + lastuse, true); + preempt_enable(); - preempt_enable(); + a->used_hw_stats = used_hw_stats; + a->used_hw_stats_valid = used_hw_stats_valid; + } + } #endif } @@ -321,6 +332,9 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, u32 flags, struct netlink_ext_ack *extack); +int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb, + struct nlattr *rate_tlv, struct tcf_exts *exts, + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); @@ -532,9 +546,11 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) return ifindex == skb->skb_iif; } -int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts); -void tc_cleanup_flow_action(struct flow_action *flow_action); +int tc_setup_offload_action(struct flow_action *flow_action, + const struct tcf_exts *exts); +void tc_cleanup_offload_action(struct flow_action *flow_action); +int tc_setup_action(struct flow_action *flow_action, + struct tc_action *actions[]); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); @@ -765,7 +781,7 @@ struct tc_cookie { }; struct tc_qopt_offload_stats { - struct gnet_stats_basic_packed *bstats; + struct gnet_stats_basic_sync *bstats; struct gnet_stats_queue *qstats; }; @@ -885,7 +901,7 @@ struct tc_gred_qopt_offload_params { }; struct tc_gred_qopt_offload_stats { - struct gnet_stats_basic_packed bstats[MAX_DPs]; + struct gnet_stats_basic_sync bstats[MAX_DPs]; struct gnet_stats_queue qstats[MAX_DPs]; struct red_stats *xstats[MAX_DPs]; }; @@ -977,6 +993,7 @@ enum tc_tbf_command { TC_TBF_REPLACE, TC_TBF_DESTROY, TC_TBF_STATS, + TC_TBF_GRAFT, }; struct tc_tbf_qopt_offload_replace_params { @@ -992,6 +1009,7 @@ struct tc_tbf_qopt_offload { union { struct tc_tbf_qopt_offload_replace_params replace_params; struct tc_qopt_offload_stats stats; + u32 child_handle; }; }; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index bf79f3a890af..9e7b21c0b3a6 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -193,4 +193,22 @@ static inline void skb_txtime_consumed(struct sk_buff *skb) skb->tstamp = ktime_set(0, 0); } +struct tc_skb_cb { + struct qdisc_skb_cb qdisc_cb; + + u16 mru; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; + u16 zone; /* Only valid if post_ct = true */ +}; + +static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) +{ + struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; + + BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); + return cb; +} + #endif diff --git a/include/net/rose.h b/include/net/rose.h index cf517d306a28..0f0a4ce0fee7 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -162,8 +162,8 @@ extern int sysctl_rose_link_fail_timeout; extern int sysctl_rose_maximum_vcs; extern int sysctl_rose_window_size; -int rosecmp(rose_address *, rose_address *); -int rosecmpm(rose_address *, rose_address *, unsigned short); +int rosecmp(const rose_address *, const rose_address *); +int rosecmpm(const rose_address *, const rose_address *, unsigned short); char *rose2asc(char *buf, const rose_address *); struct sock *rose_find_socket(unsigned int, struct rose_neigh *); void rose_kill_by_neigh(struct rose_neigh *); @@ -205,8 +205,8 @@ extern const struct seq_operations rose_node_seqops; extern struct seq_operations rose_route_seqops; void rose_add_loopback_neigh(void); -int __must_check rose_add_loopback_node(rose_address *); -void rose_del_loopback_node(rose_address *); +int __must_check rose_add_loopback_node(const rose_address *); +void rose_del_loopback_node(const rose_address *); void rose_rt_device_down(struct net_device *); void rose_link_device_down(struct net_device *); struct net_device *rose_dev_first(void); diff --git a/include/net/route.h b/include/net/route.h index 2e6c0e153e3a..25404fc2b483 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -43,6 +43,7 @@ #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) +struct ip_tunnel_info; struct fib_nh; struct fib_info; struct uncached_list; @@ -369,7 +370,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, { struct neighbour *neigh; - neigh = __ipv4_neigh_lookup_noref(dev, daddr); + neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &daddr, dev, false); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c0069ac00e62..472843eedbae 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -40,6 +40,13 @@ enum qdisc_state_t { __QDISC_STATE_DRAINING, }; +enum qdisc_state2_t { + /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly. + * Use qdisc_run_begin/end() or qdisc_is_running() instead. + */ + __QDISC_STATE2_RUNNING, +}; + #define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) #define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING) @@ -97,7 +104,7 @@ struct Qdisc { struct netdev_queue *dev_queue; struct net_rate_estimator __rcu *rate_est; - struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_basic_sync __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; int pad; refcount_t refcnt; @@ -107,10 +114,10 @@ struct Qdisc { */ struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; struct qdisc_skb_head q; - struct gnet_stats_basic_packed bstats; - seqcount_t running; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; unsigned long state; + unsigned long state2; /* must be written under qdisc spinlock */ struct Qdisc *next_sched; struct sk_buff_head skb_bad_txq; @@ -118,7 +125,7 @@ struct Qdisc { spinlock_t seqlock; struct rcu_head rcu; - + netdevice_tracker dev_tracker; /* private data */ long privdata[] ____cacheline_aligned; }; @@ -143,11 +150,15 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc) return NULL; } +/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc + * root_lock section, or provide their own memory barriers -- ordering + * against qdisc_run_begin/end() atomic bit operations. + */ static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) return spin_is_locked(&qdisc->seqlock); - return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; + return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); } static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) @@ -167,6 +178,9 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc) return !READ_ONCE(qdisc->q.qlen); } +/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with + * the qdisc root lock acquired. + */ static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { @@ -206,15 +220,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) * after it releases the lock at the end of qdisc_run_end(). */ return spin_trylock(&qdisc->seqlock); - } else if (qdisc_is_running(qdisc)) { - return false; } - /* Variant of write_seqcount_begin() telling lockdep a trylock - * was attempted. - */ - raw_write_seqcount_begin(&qdisc->running); - seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); - return true; + return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); } static inline void qdisc_run_end(struct Qdisc *qdisc) @@ -226,7 +233,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) &qdisc->state))) __netif_schedule(qdisc); } else { - write_seqcount_end(&qdisc->running); + __clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); } } @@ -308,6 +315,8 @@ struct Qdisc_ops { struct netlink_ext_ack *extack); void (*attach)(struct Qdisc *sch); int (*change_tx_queue_len)(struct Qdisc *, unsigned int); + void (*change_real_num_tx)(struct Qdisc *sch, + unsigned int new_real_tx); int (*dump)(struct Qdisc *, struct sk_buff *); int (*dump_stats)(struct Qdisc *, struct gnet_dump *); @@ -438,8 +447,6 @@ struct qdisc_skb_cb { }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; - u16 mru; - bool post_ct; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); @@ -590,14 +597,6 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) return qdisc_lock(root); } -static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) -{ - struct Qdisc *root = qdisc_root_sleeping(qdisc); - - ASSERT_RTNL(); - return &root->running; -} - static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) { return qdisc->dev_queue->dev; @@ -684,6 +683,8 @@ void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); void qdisc_class_hash_destroy(struct Qdisc_class_hash *); int dev_qdisc_change_tx_queue_len(struct net_device *dev); +void dev_qdisc_change_real_num_tx(struct net_device *dev, + unsigned int new_real_tx); void dev_init_scheduler(struct net_device *dev); void dev_shutdown(struct net_device *dev); void dev_activate(struct net_device *dev); @@ -845,14 +846,16 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return sch->enqueue(skb, sch, to_free); } -static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, +static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, __u64 bytes, __u32 packets) { - bstats->bytes += bytes; - bstats->packets += packets; + u64_stats_update_begin(&bstats->syncp); + u64_stats_add(&bstats->bytes, bytes); + u64_stats_add(&bstats->packets, packets); + u64_stats_update_end(&bstats->syncp); } -static inline void bstats_update(struct gnet_stats_basic_packed *bstats, +static inline void bstats_update(struct gnet_stats_basic_sync *bstats, const struct sk_buff *skb) { _bstats_update(bstats, @@ -860,26 +863,10 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats, skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1); } -static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, - __u64 bytes, __u32 packets) -{ - u64_stats_update_begin(&bstats->syncp); - _bstats_update(&bstats->bstats, bytes, packets); - u64_stats_update_end(&bstats->syncp); -} - -static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, - const struct sk_buff *skb) -{ - u64_stats_update_begin(&bstats->syncp); - bstats_update(&bstats->bstats, skb); - u64_stats_update_end(&bstats->syncp); -} - static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, const struct sk_buff *skb) { - bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb); + bstats_update(this_cpu_ptr(sch->cpu_bstats), skb); } static inline void qdisc_bstats_update(struct Qdisc *sch, @@ -968,10 +955,9 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, __u32 *backlog) { struct gnet_stats_queue qstats = { 0 }; - __u32 len = qdisc_qlen_sum(sch); - __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len); - *qlen = qstats.qlen; + gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats); + *qlen = qstats.qlen + qdisc_qlen(sch); *backlog = qstats.backlog; } @@ -1258,6 +1244,7 @@ struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; + u16 mpu; u8 linklayer; u8 shift; }; @@ -1267,6 +1254,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, { len += r->overhead; + if (len < r->mpu) + len = r->mpu; + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; @@ -1289,6 +1279,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->rate = min_t(u64, r->rate_bytes_ps, ~0U); res->overhead = r->overhead; + res->mpu = r->mpu; res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } @@ -1312,15 +1303,15 @@ void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64); struct mini_Qdisc { struct tcf_proto *filter_list; struct tcf_block *block; - struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_basic_sync __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; - struct rcu_head rcu; + unsigned long rcu_state; }; static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq, const struct sk_buff *skb) { - bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb); + bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb); } static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq) @@ -1341,6 +1332,8 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, struct tcf_block *block); +void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx); + int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); #endif diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 69bab88ad66b..bf3716fe83e0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -105,19 +105,18 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_idx(struct net *net, struct rhashtable_iter *iter, int pos); -int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), - struct net *net, +int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p); +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); @@ -510,8 +509,8 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport) return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1); } -#define sctp_for_each_hentry(epb, head) \ - hlist_for_each_entry(epb, head, node) +#define sctp_for_each_hentry(ep, head) \ + hlist_for_each_entry(ep, head, node) /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) @@ -626,7 +625,8 @@ static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize) static inline int sctp_transport_pl_hlen(struct sctp_transport *t) { - return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0); + return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0) - + sizeof(struct sctphdr); } static inline void sctp_transport_pl_reset(struct sctp_transport *t) @@ -653,12 +653,10 @@ static inline void sctp_transport_pl_update(struct sctp_transport *t) if (t->pl.state == SCTP_PL_DISABLED) return; - if (del_timer(&t->probe_timer)) - sctp_transport_put(t); - t->pl.state = SCTP_PL_BASE; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pl.probe_size = SCTP_BASE_PLPMTU; + sctp_transport_reset_probe_timer(t); } static inline bool sctp_transport_pl_enabled(struct sctp_transport *t) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 651bba654d77..350f250b0dc7 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -984,12 +984,10 @@ struct sctp_transport { } cacc; struct { - __u32 last_rtx_chunks; __u16 pmtu; __u16 probe_size; __u16 probe_high; - __u8 probe_count:3; - __u8 raise_count:5; + __u8 probe_count; __u8 state; } pl; /* plpmtud related */ @@ -1011,6 +1009,7 @@ void sctp_transport_reset_t3_rtx(struct sctp_transport *); void sctp_transport_reset_hb_timer(struct sctp_transport *); void sctp_transport_reset_reconf_timer(struct sctp_transport *transport); void sctp_transport_reset_probe_timer(struct sctp_transport *transport); +void sctp_transport_reset_raise_timer(struct sctp_transport *transport); int sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); @@ -1025,7 +1024,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); -bool sctp_transport_pl_send(struct sctp_transport *t); +void sctp_transport_pl_send(struct sctp_transport *t); bool sctp_transport_pl_recv(struct sctp_transport *t); @@ -1244,10 +1243,6 @@ enum sctp_endpoint_type { */ struct sctp_ep_common { - /* Fields to help us manage our entries in the hash tables. */ - struct hlist_node node; - int hashent; - /* Runtime type information. What kind of endpoint is this? */ enum sctp_endpoint_type type; @@ -1299,6 +1294,10 @@ struct sctp_endpoint { /* Common substructure for endpoint and association. */ struct sctp_ep_common base; + /* Fields to help us manage our entries in the hash tables. */ + struct hlist_node node; + int hashent; + /* Associations: A list of current associations and mappings * to the data consumers for each association. This * may be in the form of a hash table or other @@ -1355,16 +1354,7 @@ struct sctp_endpoint { reconf_enable:1; __u8 strreset_enable; - - /* Security identifiers from incoming (INIT). These are set by - * security_sctp_assoc_request(). These will only be used by - * SCTP TCP type sockets and peeled off connections as they - * cause a new socket to be generated. security_sctp_sk_clone() - * will then plug these into the new socket. - */ - - u32 secid; - u32 peer_secid; + struct rcu_head rcu; }; /* Recover the outter endpoint structure. */ @@ -1380,7 +1370,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); -void sctp_endpoint_hold(struct sctp_endpoint *); +int sctp_endpoint_hold(struct sctp_endpoint *ep); void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, @@ -2104,6 +2094,16 @@ struct sctp_association { __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + /* Security identifiers from incoming (INIT). These are set by + * security_sctp_assoc_request(). These will only be used by + * SCTP TCP type sockets and peeled off connections as they + * cause a new socket to be generated. security_sctp_sk_clone() + * will then plug these into the new socket. + */ + + u32 secid; + u32 peer_secid; + struct rcu_head rcu; }; diff --git a/include/net/seg6.h b/include/net/seg6.h index 9d19c15e8545..af668f17b398 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -58,9 +58,30 @@ extern int seg6_local_init(void); extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced); +extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags); +extern void seg6_icmp_srh(struct sk_buff *skb, struct inet6_skb_parm *opt); extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto); extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, u32 tbl_id); + +/* If the packet which invoked an ICMP error contains an SRH return + * the true destination address from within the SRH, otherwise use the + * destination address in the IP header. + */ +static inline const struct in6_addr *seg6_get_daddr(struct sk_buff *skb, + struct inet6_skb_parm *opt) +{ + struct ipv6_sr_hdr *srh; + + if (opt->flags & IP6SKB_SEG6) { + srh = (struct ipv6_sr_hdr *)(skb->data + opt->srhoff); + return &srh->segments[0]; + } + + return NULL; +} + + #endif diff --git a/include/net/sock.h b/include/net/sock.h index 463f390d90b3..ff9b508d9c5f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -56,13 +56,13 @@ #include <linux/wait.h> #include <linux/cgroup-defs.h> #include <linux/rbtree.h> -#include <linux/filter.h> #include <linux/rculist_nulls.h> #include <linux/poll.h> #include <linux/sockptr.h> #include <linux/indirect_call_wrapper.h> #include <linux/atomic.h> #include <linux/refcount.h> +#include <linux/llist.h> #include <net/dst.h> #include <net/checksum.h> #include <net/tcp_states.h> @@ -248,6 +248,7 @@ struct sock_common { }; struct bpf_local_storage; +struct sk_filter; /** * struct sock - network layer representation of sockets @@ -259,10 +260,11 @@ struct bpf_local_storage; * @sk_rcvbuf: size of receive buffer in bytes * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux + * @sk_rx_dst_ifindex: ifindex for @sk_rx_dst + * @sk_rx_dst_cookie: cookie for @sk_rx_dst * @sk_dst_cache: destination cache * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy - * @sk_rx_skb_cache: cache copy of recently accessed RX skb * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed * @sk_tsq_flags: TCP Small Queues flags @@ -270,6 +272,7 @@ struct bpf_local_storage; * @sk_omem_alloc: "o" is "option" or "other" * @sk_wmem_queued: persistent queue size * @sk_forward_alloc: space allocated forward + * @sk_reserved_mem: space reserved and non-reclaimable for the socket * @sk_napi_id: id of the last napi context to receive data for sk * @sk_ll_usec: usecs to busypoll when there is no data * @sk_allocation: allocation mode @@ -282,15 +285,14 @@ struct bpf_local_storage; * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) - * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) - * @sk_route_forced_caps: static, forced route capabilities - * (set in tcp_init_sock()) + * @sk_gso_disabled: if set, NETIF_F_GSO_MASK is forbidden. * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build * @sk_gso_max_segs: Maximum number of GSO segments * @sk_pacing_shift: scaling factor for TCP Small Queues * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held + * @defer_list: head of llist storing skbs to be freed * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, @@ -329,7 +331,6 @@ struct bpf_local_storage; * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit * @tcp_rtx_queue: TCP re-transmit queue [union with @sk_send_head] - * @sk_tx_skb_cache: cache copy of recently accessed TX skb * @sk_security: used by security modules * @sk_mark: generic packet mark * @sk_cgrp_data: cgroup data for this cgroup @@ -349,6 +350,7 @@ struct bpf_local_storage; * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags + * @ns_tracker: tracker for netns reference */ struct sock { /* @@ -390,11 +392,15 @@ struct sock { #define sk_flags __sk_common.skc_flags #define sk_rxhash __sk_common.skc_rxhash + /* early demux fields */ + struct dst_entry __rcu *sk_rx_dst; + int sk_rx_dst_ifindex; + u32 sk_rx_dst_cookie; + socket_lock_t sk_lock; atomic_t sk_drops; int sk_rcvlowat; struct sk_buff_head sk_error_queue; - struct sk_buff *sk_rx_skb_cache; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -410,9 +416,12 @@ struct sock { struct sk_buff *head; struct sk_buff *tail; } sk_backlog; + struct llist_head defer_list; + #define sk_rmem_alloc sk_backlog.rmem_alloc int sk_forward_alloc; + u32 sk_reserved_mem; #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sk_ll_usec; /* ===== mostly read cache line ===== */ @@ -430,7 +439,7 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif - struct dst_entry *sk_rx_dst; + struct dst_entry __rcu *sk_dst_cache; atomic_t sk_omem_alloc; int sk_sndbuf; @@ -443,7 +452,6 @@ struct sock { struct sk_buff *sk_send_head; struct rb_root tcp_rtx_queue; }; - struct sk_buff *sk_tx_skb_cache; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; @@ -457,8 +465,6 @@ struct sock { unsigned long sk_max_pacing_rate; struct page_frag sk_frag; netdev_features_t sk_route_caps; - netdev_features_t sk_route_nocaps; - netdev_features_t sk_route_forced_caps; int sk_gso_type; unsigned int sk_gso_max_size; gfp_t sk_allocation; @@ -468,7 +474,7 @@ struct sock { * Because of non atomicity rules, all * changes are protected by socket lock. */ - u8 sk_padding : 1, + u8 sk_gso_disabled : 1, sk_kern_sock : 1, sk_no_check_tx : 1, sk_no_check_rx : 1, @@ -490,6 +496,7 @@ struct sock { u16 sk_busy_poll_budget; #endif spinlock_t sk_peer_lock; + int sk_bind_phc; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; @@ -499,7 +506,6 @@ struct sock { seqlock_t sk_stamp_seq; #endif u16 sk_tsflags; - int sk_bind_phc; u8 sk_shutdown; u32 sk_tskey; atomic_t sk_zckey; @@ -533,6 +539,7 @@ struct sock { struct bpf_local_storage __rcu *sk_bpf_storage; #endif struct rcu_head sk_rcu; + netns_tracker ns_tracker; }; enum sk_pacing { @@ -1019,12 +1026,18 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); +INDIRECT_CALLABLE_DECLARE(int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)); +INDIRECT_CALLABLE_DECLARE(int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)); + static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { if (sk_memalloc_socks() && skb_pfmemalloc(skb)) return __sk_backlog_rcv(sk, skb); - return sk->sk_backlog_rcv(sk, skb); + return INDIRECT_CALL_INET(sk->sk_backlog_rcv, + tcp_v6_do_rcv, + tcp_v4_do_rcv, + sk, skb); } static inline void sk_incoming_cpu_update(struct sock *sk) @@ -1196,6 +1209,7 @@ struct proto { void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); + void (*put_port)(struct sock *sk); #ifdef CONFIG_BPF_SYSCALL int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock, @@ -1207,6 +1221,10 @@ struct proto { unsigned int inuse_idx; #endif +#if IS_ENABLED(CONFIG_MPTCP) + int (*forward_alloc_get)(const struct sock *sk); +#endif + bool (*stream_memory_free)(const struct sock *sk, int wake); bool (*sock_is_readable)(struct sock *sk); /* Memory pressure */ @@ -1214,6 +1232,7 @@ struct proto { void (*leave_memory_pressure)(struct sock *sk); atomic_long_t *memory_allocated; /* Current allocated memory. */ struct percpu_counter *sockets_allocated; /* Current number of sockets. */ + /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. @@ -1237,7 +1256,7 @@ struct proto { unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ - struct percpu_counter *orphan_count; + unsigned int __percpu *orphan_count; struct request_sock_ops *rsk_prot; struct timewait_sock_ops *twsk_prot; @@ -1291,20 +1310,23 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake)); +static inline int sk_forward_alloc_get(const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_MPTCP) + if (sk->sk_prot->forward_alloc_get) + return sk->sk_prot->forward_alloc_get(sk); +#endif + return sk->sk_forward_alloc; +} + static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) return false; -#ifdef CONFIG_INET - return sk->sk_prot->stream_memory_free ? - INDIRECT_CALL_1(sk->sk_prot->stream_memory_free, - tcp_stream_memory_free, - sk, wake) : true; -#else return sk->sk_prot->stream_memory_free ? - sk->sk_prot->stream_memory_free(sk, wake) : true; -#endif + INDIRECT_CALL_INET_1(sk->sk_prot->stream_memory_free, + tcp_stream_memory_free, sk, wake) : true; } static inline bool sk_stream_memory_free(const struct sock *sk) @@ -1411,13 +1433,32 @@ proto_memory_pressure(struct proto *prot) #ifdef CONFIG_PROC_FS -/* Called with local bh disabled */ -void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); +#define PROTO_INUSE_NR 64 /* should be enough for the first time */ +struct prot_inuse { + int all; + int val[PROTO_INUSE_NR]; +}; + +static inline void sock_prot_inuse_add(const struct net *net, + const struct proto *prot, int val) +{ + this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val); +} + +static inline void sock_inuse_add(const struct net *net, int val) +{ + this_cpu_add(net->core.prot_inuse->all, val); +} + int sock_prot_inuse_get(struct net *net, struct proto *proto); int sock_inuse_get(struct net *net); #else -static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, - int inc) +static inline void sock_prot_inuse_add(const struct net *net, + const struct proto *prot, int val) +{ +} + +static inline void sock_inuse_add(const struct net *net, int val) { } #endif @@ -1518,20 +1559,49 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) skb_pfmemalloc(skb); } +static inline int sk_unused_reserved_mem(const struct sock *sk) +{ + int unused_mem; + + if (likely(!sk->sk_reserved_mem)) + return 0; + + unused_mem = sk->sk_reserved_mem - sk->sk_wmem_queued - + atomic_read(&sk->sk_rmem_alloc); + + return unused_mem > 0 ? unused_mem : 0; +} + static inline void sk_mem_reclaim(struct sock *sk) { + int reclaimable; + if (!sk_has_account(sk)) return; - if (sk->sk_forward_alloc >= SK_MEM_QUANTUM) - __sk_mem_reclaim(sk, sk->sk_forward_alloc); + + reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); + + if (reclaimable >= SK_MEM_QUANTUM) + __sk_mem_reclaim(sk, reclaimable); +} + +static inline void sk_mem_reclaim_final(struct sock *sk) +{ + sk->sk_reserved_mem = 0; + sk_mem_reclaim(sk); } static inline void sk_mem_reclaim_partial(struct sock *sk) { + int reclaimable; + if (!sk_has_account(sk)) return; - if (sk->sk_forward_alloc > SK_MEM_QUANTUM) - __sk_mem_reclaim(sk, sk->sk_forward_alloc - 1); + + reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); + + if (reclaimable > SK_MEM_QUANTUM) + __sk_mem_reclaim(sk, reclaimable - 1); } static inline void sk_mem_charge(struct sock *sk, int size) @@ -1541,11 +1611,19 @@ static inline void sk_mem_charge(struct sock *sk, int size) sk->sk_forward_alloc -= size; } +/* the following macros control memory reclaiming in sk_mem_uncharge() + */ +#define SK_RECLAIM_THRESHOLD (1 << 21) +#define SK_RECLAIM_CHUNK (1 << 20) + static inline void sk_mem_uncharge(struct sock *sk, int size) { + int reclaimable; + if (!sk_has_account(sk)) return; sk->sk_forward_alloc += size; + reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); /* Avoid a possible overflow. * TCP send queues can make this happen, if sk_mem_reclaim() @@ -1554,33 +1632,8 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) * If we reach 2 MBytes, reclaim 1 MBytes right now, there is * no need to hold that much forward allocation anyway. */ - if (unlikely(sk->sk_forward_alloc >= 1 << 21)) - __sk_mem_reclaim(sk, 1 << 20); -} - -DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); -static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) -{ - sk_wmem_queued_add(sk, -skb->truesize); - sk_mem_uncharge(sk, skb->truesize); - if (static_branch_unlikely(&tcp_tx_skb_cache_key) && - !sk->sk_tx_skb_cache && !skb_cloned(skb)) { - skb_ext_reset(skb); - skb_zcopy_clear(skb, true); - sk->sk_tx_skb_cache = skb; - return; - } - __kfree_skb(skb); -} - -static inline void sock_release_ownership(struct sock *sk) -{ - if (sk->sk_lock.owned) { - sk->sk_lock.owned = 0; - - /* The sk_lock has mutex_unlock() semantics: */ - mutex_release(&sk->sk_lock.dep_map, _RET_IP_); - } + if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD)) + __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK); } /* @@ -1709,12 +1762,23 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk) return sk->sk_lock.owned; } +static inline void sock_release_ownership(struct sock *sk) +{ + if (sock_owned_by_user_nocheck(sk)) { + sk->sk_lock.owned = 0; + + /* The sk_lock has mutex_unlock() semantics: */ + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); + } +} + /* no reclassification while locks are held */ static inline bool sock_allow_reclassification(const struct sock *csk) { struct sock *sk = (struct sock *)csk; - return !sk->sk_lock.owned && !spin_is_locked(&sk->sk_lock.slock); + return !sock_owned_by_user_nocheck(sk) && + !spin_is_locked(&sk->sk_lock.slock); } struct sock *sk_alloc(struct net *net, int family, gfp_t priority, @@ -1883,32 +1947,47 @@ static inline int sk_tx_queue_get(const struct sock *sk) return -1; } -static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) +static inline void __sk_rx_queue_set(struct sock *sk, + const struct sk_buff *skb, + bool force_set) { #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING if (skb_rx_queue_recorded(skb)) { u16 rx_queue = skb_get_rx_queue(skb); - if (WARN_ON_ONCE(rx_queue == NO_QUEUE_MAPPING)) - return; - - sk->sk_rx_queue_mapping = rx_queue; + if (force_set || + unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue)) + WRITE_ONCE(sk->sk_rx_queue_mapping, rx_queue); } #endif } +static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) +{ + __sk_rx_queue_set(sk, skb, true); +} + +static inline void sk_rx_queue_update(struct sock *sk, const struct sk_buff *skb) +{ + __sk_rx_queue_set(sk, skb, false); +} + static inline void sk_rx_queue_clear(struct sock *sk) { #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING - sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING; + WRITE_ONCE(sk->sk_rx_queue_mapping, NO_QUEUE_MAPPING); #endif } static inline int sk_rx_queue_get(const struct sock *sk) { #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING - if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING) - return sk->sk_rx_queue_mapping; + if (sk) { + int res = READ_ONCE(sk->sk_rx_queue_mapping); + + if (res != NO_QUEUE_MAPPING) + return res; + } #endif return -1; @@ -2073,13 +2152,10 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) { if (skb_get_dst_pending_confirm(skb)) { struct sock *sk = skb->sk; - unsigned long now = jiffies; - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); if (sk && READ_ONCE(sk->sk_dst_pending_confirm)) WRITE_ONCE(sk->sk_dst_pending_confirm, 0); + neigh_confirm(n); } } @@ -2092,10 +2168,10 @@ static inline bool sk_can_gso(const struct sock *sk) void sk_setup_caps(struct sock *sk, struct dst_entry *dst); -static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) +static inline void sk_gso_disable(struct sock *sk) { - sk->sk_route_nocaps |= flags; - sk->sk_route_caps &= ~flags; + sk->sk_gso_disabled = 1; + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, @@ -2388,31 +2464,32 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) return; val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); + val = max_t(u32, val, sk_unused_reserved_mem(sk)); WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF)); } -struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, - bool force_schedule); - /** * sk_page_frag - return an appropriate page_frag * @sk: socket * * Use the per task page_frag instead of the per socket one for - * optimization when we know that we're in the normal context and owns + * optimization when we know that we're in process context and own * everything that's associated with %current. * - * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest - * inside other socket operations and end up recursing into sk_page_frag() - * while it's already in use. + * Both direct reclaim and page faults can nest inside other + * socket operations and end up recursing into sk_page_frag() + * while it's already in use: explicitly avoid task page_frag + * usage if the caller is potentially doing any of them. + * This assumes that page fault handlers use the GFP_NOFS flags. * * Return: a per task page_frag if context allows that, * otherwise a per socket one. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { - if (gfpflags_normal_context(sk->sk_allocation)) + if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) == + (__GFP_DIRECT_RECLAIM | __GFP_FS)) return ¤t->task_frag; return &sk->sk_frag; @@ -2608,7 +2685,11 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } -DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); +static inline bool sk_is_tcp(const struct sock *sk) +{ + return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; +} + /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from @@ -2620,12 +2701,6 @@ DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); - if (static_branch_unlikely(&tcp_rx_skb_cache_key) && - !sk->sk_rx_skb_cache) { - sk->sk_rx_skb_cache = skb; - skb_orphan(skb); - return; - } __kfree_skb(skb); } @@ -2820,6 +2895,10 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs); int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len); +int sock_get_timeout(long timeo, void *optval, bool old_timeval); +int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, + sockptr_t optval, int optlen, bool old_timeval); + static inline bool sk_is_readable(struct sock *sk) { if (sk->sk_prot->sock_is_readable) diff --git a/include/net/strparser.h b/include/net/strparser.h index 1d20b98493a1..732b7097d78e 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -54,10 +54,28 @@ struct strp_msg { int offset; }; +struct _strp_msg { + /* Internal cb structure. struct strp_msg must be first for passing + * to upper layer. + */ + struct strp_msg strp; + int accum_len; +}; + +struct sk_skb_cb { +#define SK_SKB_CB_PRIV_LEN 20 + unsigned char data[SK_SKB_CB_PRIV_LEN]; + struct _strp_msg strp; + /* temp_reg is a temporary register used for bpf_convert_data_end_access + * when dst_reg == src_reg. + */ + u64 temp_reg; +}; + static inline struct strp_msg *strp_msg(struct sk_buff *skb) { return (struct strp_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); + offsetof(struct sk_skb_cb, strp)); } /* Structure for an attached lower socket */ diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 60d806b6a5ae..d353793dfeb5 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -299,28 +299,16 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); -int switchdev_handle_fdb_add_to_device(struct net_device *dev, +int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event, const struct switchdev_notifier_fdb_info *fdb_info, bool (*check_cb)(const struct net_device *dev), bool (*foreign_dev_check_cb)(const struct net_device *dev, const struct net_device *foreign_dev), - int (*add_cb)(struct net_device *dev, - const struct net_device *orig_dev, const void *ctx, + int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, + unsigned long event, const void *ctx, const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_add_cb)(struct net_device *dev, - const struct net_device *orig_dev, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)); - -int switchdev_handle_fdb_del_to_device(struct net_device *dev, - const struct switchdev_notifier_fdb_info *fdb_info, - bool (*check_cb)(const struct net_device *dev), - bool (*foreign_dev_check_cb)(const struct net_device *dev, - const struct net_device *foreign_dev), - int (*del_cb)(struct net_device *dev, - const struct net_device *orig_dev, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_del_cb)(struct net_device *dev, - const struct net_device *orig_dev, const void *ctx, + int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, + unsigned long event, const void *ctx, const struct switchdev_notifier_fdb_info *fdb_info)); int switchdev_handle_port_obj_add(struct net_device *dev, @@ -426,32 +414,16 @@ call_switchdev_blocking_notifiers(unsigned long val, } static inline int -switchdev_handle_fdb_add_to_device(struct net_device *dev, - const struct switchdev_notifier_fdb_info *fdb_info, - bool (*check_cb)(const struct net_device *dev), - bool (*foreign_dev_check_cb)(const struct net_device *dev, - const struct net_device *foreign_dev), - int (*add_cb)(struct net_device *dev, - const struct net_device *orig_dev, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_add_cb)(struct net_device *dev, - const struct net_device *orig_dev, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)) -{ - return 0; -} - -static inline int -switchdev_handle_fdb_del_to_device(struct net_device *dev, +switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event, const struct switchdev_notifier_fdb_info *fdb_info, bool (*check_cb)(const struct net_device *dev), bool (*foreign_dev_check_cb)(const struct net_device *dev, const struct net_device *foreign_dev), - int (*del_cb)(struct net_device *dev, - const struct net_device *orig_dev, const void *ctx, + int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, + unsigned long event, const void *ctx, const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_del_cb)(struct net_device *dev, - const struct net_device *orig_dev, const void *ctx, + int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, + unsigned long event, const void *ctx, const struct switchdev_notifier_fdb_info *fdb_info)) { return 0; diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index 8bc6be81a7ad..c8fa11ebb397 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h @@ -60,11 +60,6 @@ static inline bool is_tcf_gate(const struct tc_action *a) return false; } -static inline u32 tcf_gate_index(const struct tc_action *a) -{ - return a->tcfa_index; -} - static inline s32 tcf_gate_prio(const struct tc_action *a) { s32 tcfg_prio; diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 1cace4c69e44..32ce8ea36950 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -10,6 +10,7 @@ struct tcf_mirred { int tcfm_eaction; bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; + netdevice_tracker tcfm_dev_tracker; struct list_head tcfm_list; }; #define to_mirred(a) ((struct tcf_mirred *)a) diff --git a/include/net/tcp.h b/include/net/tcp.h index 60c384569e9c..b9fc978fb2ca 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -48,7 +48,9 @@ extern struct inet_hashinfo tcp_hashinfo; -extern struct percpu_counter tcp_orphan_count; +DECLARE_PER_CPU(unsigned int, tcp_orphan_count); +int tcp_orphan_count_sum(void); + void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) @@ -288,21 +290,18 @@ static inline bool tcp_out_of_memory(struct sock *sk) return false; } -void sk_forced_mem_schedule(struct sock *sk, int size); - -static inline bool tcp_too_many_orphans(struct sock *sk, int shift) +static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { - struct percpu_counter *ocp = sk->sk_prot->orphan_count; - int orphans = percpu_counter_read_positive(ocp); - - if (orphans << shift > sysctl_tcp_max_orphans) { - orphans = percpu_counter_sum_positive(ocp); - if (orphans << shift > sysctl_tcp_max_orphans) - return true; - } - return false; + sk_wmem_queued_add(sk, -skb->truesize); + if (!skb_zcopy_pure(skb)) + sk_mem_uncharge(sk, skb->truesize); + else + sk_mem_uncharge(sk, SKB_TRUESIZE(skb_end_offset(skb))); + __kfree_skb(skb); } +void sk_forced_mem_schedule(struct sock *sk, int size); + bool tcp_check_oom(struct sock *sk, int shift); @@ -322,7 +321,7 @@ void tcp_shutdown(struct sock *sk, int how); int tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); -void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb); +void tcp_remove_empty_skb(struct sock *sk); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); @@ -330,8 +329,6 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags); -struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, - struct page *page, int offset, size_t *size); ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tcp_send_mss(struct sock *sk, int *size_goal, int flags); @@ -350,6 +347,8 @@ void tcp_twsk_destructor(struct sock *sk); ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, + bool force_schedule); void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks); static inline void tcp_dec_quickack_mode(struct sock *sk, @@ -581,6 +580,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ +void tcp_skb_entail(struct sock *sk, struct sk_buff *skb); +void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); @@ -874,10 +875,11 @@ struct tcp_skb_cb { __u32 ack_seq; /* Sequence number ACK'd */ union { struct { +#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1) /* There is space for up to 24 bytes */ - __u32 in_flight:30,/* Bytes in flight at transmit */ - is_app_limited:1, /* cwnd not fully used? */ - unused:1; + __u32 is_app_limited:1, /* cwnd not fully used? */ + delivered_ce:20, + unused:11; /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ @@ -975,7 +977,8 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to, const struct sk_buff *from) { return likely(tcp_skb_can_collapse_to(to) && - mptcp_skb_can_collapse(to, from)); + mptcp_skb_can_collapse(to, from) && + skb_pure_zcopy_same(to, from)); } /* Events passed to congestion control interface */ @@ -1029,7 +1032,9 @@ struct ack_sample { struct rate_sample { u64 prior_mstamp; /* starting timestamp for interval */ u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ + u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ s32 delivered; /* number of packets delivered over interval */ + s32 delivered_ce; /* number of packets delivered w/ CE marks*/ long interval_us; /* time for tp->delivered to incr "delivered" */ u32 snd_interval_us; /* snd interval for delivered packets */ u32 rcv_interval_us; /* rcv interval for delivered packets */ @@ -1363,6 +1368,20 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) } bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); + +#ifdef CONFIG_INET +void __sk_defer_free_flush(struct sock *sk); + +static inline void sk_defer_free_flush(struct sock *sk) +{ + if (llist_empty(&sk->defer_list)) + return; + __sk_defer_free_flush(sk); +} +#else +static inline void sk_defer_free_flush(struct sock *sk) {} +#endif + int tcp_filter(struct sock *sk, struct sk_buff *skb); void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); @@ -1418,6 +1437,17 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } +static inline void tcp_adjust_rcv_ssthresh(struct sock *sk) +{ + int unused_mem = sk_unused_reserved_mem(sk); + struct tcp_sock *tp = tcp_sk(sk); + + tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); + if (unused_mem) + tp->rcv_ssthresh = max_t(u32, tp->rcv_ssthresh, + tcp_win_from_space(sk, unused_mem)); +} + void tcp_cleanup_rbuf(struct sock *sk, int copied); /* We provision sk_rcvbuf around 200% of sk_rcvlowat. @@ -1870,7 +1900,7 @@ static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct soc { list_del(&skb->tcp_tsorted_anchor); tcp_rtx_queue_unlink(skb, sk); - sk_wmem_free_skb(sk, skb); + tcp_wmem_free_skb(sk, skb); } static inline void tcp_push_pending_frames(struct sock *sk) @@ -2156,9 +2186,13 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) u16 segs_in; segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs); - tp->segs_in += segs_in; + + /* We update these fields while other threads might + * read them from tcp_get_info() + */ + WRITE_ONCE(tp->segs_in, tp->segs_in + segs_in); if (skb->len > tcp_hdrlen(skb)) - tp->data_segs_in += segs_in; + WRITE_ONCE(tp->data_segs_in, tp->data_segs_in + segs_in); } /* diff --git a/include/net/tls.h b/include/net/tls.h index 1fffb206f09f..526cb2c3b724 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -66,7 +66,7 @@ #define MAX_IV_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 -/* For AES-CCM, the full 16-bytes of IV is made of '4' fields of given sizes. +/* For CCM mode, the full 16-bytes of IV is made of '4' fields of given sizes. * * IV[16] = b0[1] || implicit nonce[4] || explicit nonce[8] || length[3] * @@ -74,6 +74,7 @@ * Hence b0 contains (3 - 1) = 2. */ #define TLS_AES_CCM_IV_B0_BYTE 2 +#define TLS_SM4_CCM_IV_B0_BYTE 2 #define __TLS_INC_STATS(net, field) \ __SNMP_INC_STATS((net)->mib.tls_statistics, field) @@ -220,6 +221,8 @@ union tls_crypto_context { struct tls12_crypto_info_aes_gcm_128 aes_gcm_128; struct tls12_crypto_info_aes_gcm_256 aes_gcm_256; struct tls12_crypto_info_chacha20_poly1305 chacha20_poly1305; + struct tls12_crypto_info_sm4_gcm sm4_gcm; + struct tls12_crypto_info_sm4_ccm sm4_ccm; }; }; diff --git a/include/net/udp.h b/include/net/udp.h index 909ecf447e0f..f1c2a88c9005 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -167,36 +167,12 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, - struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, - struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); -struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, - struct udphdr *uh, struct sock *sk); -int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); - struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); -static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) -{ - struct udphdr *uh; - unsigned int hlen, off; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*uh); - uh = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) - uh = skb_gro_header_slow(skb, hlen, off); - - return uh; -} - /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline int udp_lib_hash(struct sock *sk) { diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 08537aa14f7c..5a934bebe630 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -10,6 +10,7 @@ #include <net/nexthop.h> #define IANA_VXLAN_UDP_PORT 4789 +#define IANA_VXLAN_GPE_UDP_PORT 4790 /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ diff --git a/include/net/xdp.h b/include/net/xdp.h index ad5b02dcb6f4..8f0812e4996d 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -15,13 +15,13 @@ * level RX-ring queues. It is information that is specific to how * the driver have configured a given RX-ring queue. * - * Each xdp_buff frame received in the driver carry a (pointer) + * Each xdp_buff frame received in the driver carries a (pointer) * reference to this xdp_rxq_info structure. This provides the XDP * data-path read-access to RX-info for both kernel and bpf-side * (limited subset). * * For now, direct access is only safe while running in NAPI/softirq - * context. Contents is read-mostly and must not be updated during + * context. Contents are read-mostly and must not be updated during * driver NAPI/softirq poll. * * The driver usage API is a register and unregister API. @@ -30,8 +30,8 @@ * can be attached as long as it doesn't change the underlying * RX-ring. If the RX-ring does change significantly, the NIC driver * naturally need to stop the RX-ring before purging and reallocating - * memory. In that process the driver MUST call unregistor (which - * also apply for driver shutdown and unload). The register API is + * memory. In that process the driver MUST call unregister (which + * also applies for driver shutdown and unload). The register API is * also mandatory during RX-ring setup. */ @@ -260,6 +260,9 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, enum xdp_mem_type type, void *allocator); void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); +int xdp_reg_mem_model(struct xdp_mem_info *mem, + enum xdp_mem_type type, void *allocator); +void xdp_unreg_mem_model(struct xdp_mem_info *mem); /* Drivers not supporting XDP metadata can use this helper, which * rejects any room expansion for metadata as a result. diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h index a9d5b7603b89..a2d58b1a12e1 100644 --- a/include/net/xdp_priv.h +++ b/include/net/xdp_priv.h @@ -10,7 +10,6 @@ struct xdp_mem_allocator { union { void *allocator; struct page_pool *page_pool; - struct zero_copy_allocator *zc_alloc; }; struct rhash_head node; struct rcu_head rcu; diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index fff069d2ed1b..3057e1a4a11c 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -6,6 +6,7 @@ #ifndef _LINUX_XDP_SOCK_H #define _LINUX_XDP_SOCK_H +#include <linux/bpf.h> #include <linux/workqueue.h> #include <linux/if_xdp.h> #include <linux/mutex.h> diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 4e295541e396..443d45951564 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -77,6 +77,12 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) return xp_alloc(pool); } +/* Returns as many entries as possible up to max. 0 <= N <= max. */ +static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) +{ + return xp_alloc_batch(pool, xdp, max); +} + static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count) { return xp_can_alloc(pool, count); @@ -89,6 +95,13 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) xp_free(xskb); } +static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) +{ + xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; + xdp->data_meta = xdp->data; + xdp->data_end = xdp->data + size; +} + static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) { @@ -212,6 +225,11 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) return NULL; } +static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) +{ + return 0; +} + static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count) { return false; @@ -221,6 +239,10 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) { } +static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) +{ +} + static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2308210793a0..fdb41e8bb626 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -128,6 +128,7 @@ struct xfrm_state_walk { struct xfrm_state_offload { struct net_device *dev; + netdevice_tracker dev_tracker; struct net_device *real_dev; unsigned long offload_handle; unsigned int num_exthdrs; @@ -200,6 +201,11 @@ struct xfrm_state { struct xfrm_algo_aead *aead; const char *geniv; + /* mapping change rate limiting */ + __be16 new_mapping_sport; + u32 new_mapping; /* seconds */ + u32 mapping_maxage; /* seconds for input SA */ + /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; struct sock __rcu *encap_sk; @@ -1162,7 +1168,7 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); - if (xfrm_default_allow(net, XFRM_POLICY_FWD)) + if (xfrm_default_allow(net, XFRM_POLICY_OUT)) return !net->xfrm.policy_count[XFRM_POLICY_OUT] || (skb_dst(skb)->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); @@ -1913,7 +1919,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) if (dev->xfrmdev_ops->xdo_dev_state_free) dev->xfrmdev_ops->xdo_dev_state_free(x); xso->dev = NULL; - dev_put(dev); + dev_put_track(dev, &xso->dev_tracker); } } #else diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 7a9a23e7a604..ddeefc4a1040 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -7,6 +7,7 @@ #include <linux/if_xdp.h> #include <linux/types.h> #include <linux/dma-mapping.h> +#include <linux/bpf.h> #include <net/xdp.h> struct xsk_buff_pool; @@ -23,7 +24,6 @@ struct xdp_buff_xsk { dma_addr_t dma; dma_addr_t frame_dma; struct xsk_buff_pool *pool; - bool unaligned; u64 orig_addr; struct list_head free_list_node; }; @@ -67,6 +67,7 @@ struct xsk_buff_pool { u32 free_heads_cnt; u32 headroom; u32 chunk_size; + u32 chunk_shift; u32 frame_len; u8 cached_need_wakeup; bool uses_need_wakeup; @@ -81,6 +82,13 @@ struct xsk_buff_pool { struct xdp_buff_xsk *free_heads[]; }; +/* Masks for xdp_umem_page flags. + * The low 12-bits of the addr will be 0 since this is the page address, so we + * can use them for flags. + */ +#define XSK_NEXT_PG_CONTIG_SHIFT 0 +#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT) + /* AF_XDP core. */ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem); @@ -89,7 +97,6 @@ int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, struct net_device *dev, u16 queue_id); void xp_destroy(struct xsk_buff_pool *pool); -void xp_release(struct xdp_buff_xsk *xskb); void xp_get_pool(struct xsk_buff_pool *pool); bool xp_put_pool(struct xsk_buff_pool *pool); void xp_clear_dev(struct xsk_buff_pool *pool); @@ -99,12 +106,28 @@ void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs); /* AF_XDP, and XDP core. */ void xp_free(struct xdp_buff_xsk *xskb); +static inline void xp_init_xskb_addr(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool, + u64 addr) +{ + xskb->orig_addr = addr; + xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom; +} + +static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool, + dma_addr_t *dma_pages, u64 addr) +{ + xskb->frame_dma = (dma_pages[addr >> PAGE_SHIFT] & ~XSK_NEXT_PG_CONTIG_MASK) + + (addr & ~PAGE_MASK); + xskb->dma = xskb->frame_dma + pool->headroom + XDP_PACKET_HEADROOM; +} + /* AF_XDP ZC drivers, via xdp_sock_buff.h */ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq); int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs, struct page **pages, u32 nr_pages); void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs); struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool); +u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max); bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count); void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr); dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr); @@ -180,4 +203,25 @@ static inline u64 xp_unaligned_add_offset_to_addr(u64 addr) xp_unaligned_extract_offset(addr); } +static inline u32 xp_aligned_extract_idx(struct xsk_buff_pool *pool, u64 addr) +{ + return xp_aligned_extract_addr(pool, addr) >> pool->chunk_shift; +} + +static inline void xp_release(struct xdp_buff_xsk *xskb) +{ + if (xskb->pool->unaligned) + xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; +} + +static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb) +{ + u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start; + + offset += xskb->pool->headroom; + if (!xskb->pool->unaligned) + return xskb->orig_addr + offset; + return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); +} + #endif /* XSK_BUFF_POOL_H_ */ |
