diff options
Diffstat (limited to 'include/net')
118 files changed, 3348 insertions, 3349 deletions
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 60cad0d200a4..fd7a034b8278 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -24,6 +24,8 @@ * @P9_DEBUG_PKT: packet marshalling/unmarshalling * @P9_DEBUG_FSC: FS-cache tracing * @P9_DEBUG_VPKT: Verbose packet debugging (full packet dump) + * @P9_DEBUG_CACHE: cache operations tracing + * @P9_DEBUG_MMAP: memory-mapped I/O tracing * * These flags are passed at mount time to turn on various levels of * verbosity and tracing which will be output to the system logs. @@ -68,13 +70,39 @@ void _p9_debug(enum p9_debug_flags level, const char *func, * @P9_RSYMLINK: make symlink response * @P9_TMKNOD: create a special file object request * @P9_RMKNOD: create a special file object response + * @P9_TLOPEN: open a file for I/O (9P2000.L) + * @P9_RLOPEN: response with qid and iounit (9P2000.L) * @P9_TLCREATE: prepare a handle for I/O on an new file for 9P2000.L * @P9_RLCREATE: response with file access information for 9P2000.L * @P9_TRENAME: rename request * @P9_RRENAME: rename response - * @P9_TMKDIR: create a directory request - * @P9_RMKDIR: create a directory response - * @P9_TVERSION: version handshake request + * @P9_TREADLINK: read symbolic link target (9P2000.L) + * @P9_RREADLINK: response with symbolic link target (9P2000.L) + * @P9_TGETATTR: get file attributes request (9P2000.L) + * @P9_RGETATTR: get file attributes response (9P2000.L) + * @P9_TSETATTR: set file attributes request (9P2000.L) + * @P9_RSETATTR: set file attributes response (9P2000.L) + * @P9_TXATTRWALK: prepare to read/list extended attributes (9P2000.L) + * @P9_RXATTRWALK: response with extended attribute size (9P2000.L) + * @P9_TXATTRCREATE: prepare to set extended attribute (9P2000.L) + * @P9_RXATTRCREATE: set extended attribute response (9P2000.L) + * @P9_TREADDIR: read directory entries request (9P2000.L) + * @P9_RREADDIR: read directory entries response (9P2000.L) + * @P9_TFSYNC: flush cached file data to storage request (9P2000.L) + * @P9_RFSYNC: flush cached file data to storage response (9P2000.L) + * @P9_TLOCK: acquire or release a POSIX record lock (9P2000.L) + * @P9_RLOCK: POSIX record lock response (9P2000.L) + * @P9_TGETLOCK: test for existence of POSIX record lock (9P2000.L) + * @P9_RGETLOCK: POSIX record lock test response (9P2000.L) + * @P9_TLINK: create a hard link (9P2000.L) + * @P9_RLINK: hard link response (9P2000.L) + * @P9_TRENAMEAT: safely rename across directories (9P2000.L) + * @P9_RRENAMEAT: rename response (9P2000.L) + * @P9_TUNLINKAT: unlink a file or directory (9P2000.L) + * @P9_RUNLINKAT: unlink response (9P2000.L) + * @P9_TMKDIR: create a directory request (9P2000.L) + * @P9_RMKDIR: create a directory response (9P2000.L) + * @P9_TVERSION: negotiate protocol version and message size * @P9_RVERSION: version handshake response * @P9_TAUTH: request to establish authentication channel * @P9_RAUTH: response with authentication information @@ -194,6 +222,10 @@ enum p9_msg_t { * @P9_ORCLOSE: remove the file when the file is closed * @P9_OAPPEND: open the file and seek to the end * @P9_OEXCL: only create a file, do not open it + * @P9L_MODE_MASK: mask for protocol mode bits (client-side only) + * @P9L_DIRECT: disable client-side caching for this file + * @P9L_NOWRITECACHE: disable write caching for this file + * @P9L_LOOSE: enable loose cache consistency * * 9P open modes differ slightly from Posix standard modes. * In particular, there are extra modes which specify different diff --git a/include/net/Space.h b/include/net/Space.h index ef42629f4258..6a0b6674d930 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -3,10 +3,5 @@ * ethernet adaptor have the name "eth[0123...]". */ -struct net_device *ultra_probe(int unit); -struct net_device *wd_probe(int unit); struct net_device *ne_probe(int unit); -struct net_device *smc_init(int unit); struct net_device *cs89x0_probe(int unit); -struct net_device *tc515_probe(int unit); -struct net_device *lance_probe(int unit); diff --git a/include/net/act_api.h b/include/net/act_api.h index 91a24b5e0b93..d11b79107930 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -70,6 +70,7 @@ struct tc_action { #define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2)) #define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3)) #define TCA_ACT_FLAGS_AT_INGRESS (1U << (TCA_ACT_FLAGS_USER_BITS + 4)) +#define TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT (1U << (TCA_ACT_FLAGS_USER_BITS + 5)) /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. @@ -159,7 +160,7 @@ int tc_action_net_init(struct net *net, struct tc_action_net *tn, { int err = 0; - tn->idrinfo = kmalloc(sizeof(*tn->idrinfo), GFP_KERNEL); + tn->idrinfo = kmalloc_obj(*tn->idrinfo); if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 78e8b877fb25..9e96776945e5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -8,7 +8,8 @@ #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ -#define TEMP_VALID_LIFETIME (7*86400) /* 1 week */ +/* TEMP_VALID_LIFETIME default value as specified in RFC 8981 3.8 */ +#define TEMP_VALID_LIFETIME (2*86400) /* 2 days */ #define TEMP_PREFERRED_LIFETIME (86400) /* 24 hours */ #define REGEN_MIN_ADVANCE (2) /* 2 seconds */ #define REGEN_MAX_RETRY (3) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d40e978126e3..4e40063adab4 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/workqueue.h> +#include <net/netns/vsock.h> #include <net/sock.h> #include <uapi/linux/vm_sockets.h> @@ -124,7 +125,7 @@ struct vsock_transport { size_t len, int flags); int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, struct msghdr *, size_t len); - bool (*dgram_allow)(u32 cid, u32 port); + bool (*dgram_allow)(struct vsock_sock *vsk, u32 cid, u32 port); /* STREAM. */ /* TODO: stream_bind() */ @@ -136,14 +137,14 @@ struct vsock_transport { s64 (*stream_has_space)(struct vsock_sock *); u64 (*stream_rcvhiwat)(struct vsock_sock *); bool (*stream_is_active)(struct vsock_sock *); - bool (*stream_allow)(u32 cid, u32 port); + bool (*stream_allow)(struct vsock_sock *vsk, u32 cid, u32 port); /* SEQ_PACKET. */ ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, int flags); int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg, size_t len); - bool (*seqpacket_allow)(u32 remote_cid); + bool (*seqpacket_allow)(struct vsock_sock *vsk, u32 remote_cid); u32 (*seqpacket_has_data)(struct vsock_sock *vsk); /* Notification. */ @@ -178,6 +179,15 @@ struct vsock_transport { /* Addressing. */ u32 (*get_local_cid)(void); + /* Check if this transport serves a specific remote CID. + * For H2G transports: return true if the CID belongs to a registered + * guest. If not implemented, all CIDs > VMADDR_CID_HOST go to H2G. + * For G2H transports: return true if the transport can reach arbitrary + * CIDs via the hypervisor (i.e. supports the fallback overlay). VMCI + * does not implement this as it only serves CIDs 0 and 2. + */ + bool (*has_remote_cid)(struct vsock_sock *vsk, u32 remote_cid); + /* Read a single skb */ int (*read_skb)(struct vsock_sock *, skb_read_actor_t); @@ -216,6 +226,11 @@ void vsock_remove_connected(struct vsock_sock *vsk); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); +struct sock *vsock_find_bound_socket_net(struct sockaddr_vm *addr, + struct net *net); +struct sock *vsock_find_connected_socket_net(struct sockaddr_vm *src, + struct sockaddr_vm *dst, + struct net *net); void vsock_remove_sock(struct vsock_sock *vsk); void vsock_for_each_connected_socket(struct vsock_transport *transport, void (*fn)(struct sock *sk)); @@ -256,4 +271,62 @@ static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t) { return t->msgzerocopy_allow && t->msgzerocopy_allow(); } + +static inline enum vsock_net_mode vsock_net_mode(struct net *net) +{ + if (!net) + return VSOCK_NET_MODE_GLOBAL; + + return READ_ONCE(net->vsock.mode); +} + +static inline bool vsock_net_mode_global(struct vsock_sock *vsk) +{ + return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL; +} + +static inline bool vsock_net_set_child_mode(struct net *net, + enum vsock_net_mode mode) +{ + int new_locked = mode + 1; + int old_locked = 0; /* unlocked */ + + if (try_cmpxchg(&net->vsock.child_ns_mode_locked, + &old_locked, new_locked)) { + WRITE_ONCE(net->vsock.child_ns_mode, mode); + return true; + } + + return old_locked == new_locked; +} + +static inline enum vsock_net_mode vsock_net_child_mode(struct net *net) +{ + return READ_ONCE(net->vsock.child_ns_mode); +} + +/* Return true if two namespaces pass the mode rules. Otherwise, return false. + * + * A NULL namespace is treated as VSOCK_NET_MODE_GLOBAL. + * + * Read more about modes in the comment header of net/vmw_vsock/af_vsock.c. + */ +static inline bool vsock_net_check_mode(struct net *ns0, struct net *ns1) +{ + enum vsock_net_mode mode0, mode1; + + /* Any vsocks within the same network namespace are always reachable, + * regardless of the mode. + */ + if (net_eq(ns0, ns1)) + return true; + + mode0 = vsock_net_mode(ns0); + mode1 = vsock_net_mode(ns1); + + /* Different namespaces are only reachable if they are both + * global mode. + */ + return mode0 == VSOCK_NET_MODE_GLOBAL && mode0 == mode1; +} #endif /* __AF_VSOCK_H__ */ diff --git a/include/net/atmclip.h b/include/net/atmclip.h deleted file mode 100644 index 70e350e0db3d..000000000000 --- a/include/net/atmclip.h +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* net/atm/atmarp.h - RFC1577 ATM ARP */ - -/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ - - -#ifndef _ATMCLIP_H -#define _ATMCLIP_H - -#include <linux/netdevice.h> -#include <linux/atm.h> -#include <linux/atmdev.h> -#include <linux/atmarp.h> -#include <linux/spinlock.h> -#include <net/neighbour.h> - - -#define CLIP_VCC(vcc) ((struct clip_vcc *) ((vcc)->user_back)) - -struct sk_buff; - -struct clip_vcc { - struct atm_vcc *vcc; /* VCC descriptor */ - struct atmarp_entry *entry; /* ATMARP table entry, NULL if IP addr. - isn't known yet */ - int xoff; /* 1 if send buffer is full */ - unsigned char encap; /* 0: NULL, 1: LLC/SNAP */ - unsigned long last_use; /* last send or receive operation */ - unsigned long idle_timeout; /* keep open idle for so many jiffies*/ - void (*old_push)(struct atm_vcc *vcc,struct sk_buff *skb); - /* keep old push fn for chaining */ - void (*old_pop)(struct atm_vcc *vcc,struct sk_buff *skb); - /* keep old pop fn for chaining */ - struct clip_vcc *next; /* next VCC */ -}; - - -struct atmarp_entry { - struct clip_vcc *vccs; /* active VCCs; NULL if resolution is - pending */ - unsigned long expires; /* entry expiration time */ - struct neighbour *neigh; /* neighbour back-pointer */ -}; - -#define PRIV(dev) ((struct clip_priv *) netdev_priv(dev)) - -struct clip_priv { - int number; /* for convenience ... */ - spinlock_t xoff_lock; /* ensures that pop is atomic (SMP) */ - struct net_device *next; /* next CLIP interface */ -}; - -#endif diff --git a/include/net/ax25.h b/include/net/ax25.h index a7bba42dde15..6b2f518facdb 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -1,486 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * Declarations of AX.25 type objects. - * - * Alan Cox (GW4PTS) 10/11/93 - */ #ifndef _AX25_H -#define _AX25_H +#define _AX25_H #include <linux/ax25.h> -#include <linux/spinlock.h> -#include <linux/timer.h> -#include <linux/list.h> -#include <linux/slab.h> -#include <linux/refcount.h> -#include <net/neighbour.h> -#include <net/sock.h> -#include <linux/seq_file.h> -#define AX25_T1CLAMPLO 1 -#define AX25_T1CLAMPHI (30 * HZ) - -#define AX25_BPQ_HEADER_LEN 16 -#define AX25_KISS_HEADER_LEN 1 - -#define AX25_HEADER_LEN 17 -#define AX25_ADDR_LEN 7 -#define AX25_DIGI_HEADER_LEN (AX25_MAX_DIGIS * AX25_ADDR_LEN) -#define AX25_MAX_HEADER_LEN (AX25_HEADER_LEN + AX25_DIGI_HEADER_LEN) - -/* AX.25 Protocol IDs */ -#define AX25_P_ROSE 0x01 -#define AX25_P_VJCOMP 0x06 /* Compressed TCP/IP packet */ - /* Van Jacobsen (RFC 1144) */ -#define AX25_P_VJUNCOMP 0x07 /* Uncompressed TCP/IP packet */ - /* Van Jacobsen (RFC 1144) */ -#define AX25_P_SEGMENT 0x08 /* Segmentation fragment */ -#define AX25_P_TEXNET 0xc3 /* TEXTNET datagram protocol */ -#define AX25_P_LQ 0xc4 /* Link Quality Protocol */ -#define AX25_P_ATALK 0xca /* Appletalk */ -#define AX25_P_ATALK_ARP 0xcb /* Appletalk ARP */ -#define AX25_P_IP 0xcc /* ARPA Internet Protocol */ -#define AX25_P_ARP 0xcd /* ARPA Address Resolution */ -#define AX25_P_FLEXNET 0xce /* FlexNet */ -#define AX25_P_NETROM 0xcf /* NET/ROM */ -#define AX25_P_TEXT 0xF0 /* No layer 3 protocol impl. */ - -/* AX.25 Segment control values */ -#define AX25_SEG_REM 0x7F -#define AX25_SEG_FIRST 0x80 - -#define AX25_CBIT 0x80 /* Command/Response bit */ -#define AX25_EBIT 0x01 /* HDLC Address Extension bit */ -#define AX25_HBIT 0x80 /* Has been repeated bit */ - -#define AX25_SSSID_SPARE 0x60 /* Unused bits in SSID for standard AX.25 */ -#define AX25_ESSID_SPARE 0x20 /* Unused bits in SSID for extended AX.25 */ -#define AX25_DAMA_FLAG 0x20 /* Well, it is *NOT* unused! (dl1bke 951121 */ - -#define AX25_COND_ACK_PENDING 0x01 -#define AX25_COND_REJECT 0x02 -#define AX25_COND_PEER_RX_BUSY 0x04 -#define AX25_COND_OWN_RX_BUSY 0x08 -#define AX25_COND_DAMA_MODE 0x10 - -#ifndef _LINUX_NETDEVICE_H -#include <linux/netdevice.h> -#endif - -/* Upper sub-layer (LAPB) definitions */ - -/* Control field templates */ -#define AX25_I 0x00 /* Information frames */ -#define AX25_S 0x01 /* Supervisory frames */ -#define AX25_RR 0x01 /* Receiver ready */ -#define AX25_RNR 0x05 /* Receiver not ready */ -#define AX25_REJ 0x09 /* Reject */ -#define AX25_U 0x03 /* Unnumbered frames */ -#define AX25_SABM 0x2f /* Set Asynchronous Balanced Mode */ -#define AX25_SABME 0x6f /* Set Asynchronous Balanced Mode Extended */ -#define AX25_DISC 0x43 /* Disconnect */ -#define AX25_DM 0x0f /* Disconnected mode */ -#define AX25_UA 0x63 /* Unnumbered acknowledge */ -#define AX25_FRMR 0x87 /* Frame reject */ -#define AX25_UI 0x03 /* Unnumbered information */ -#define AX25_XID 0xaf /* Exchange information */ -#define AX25_TEST 0xe3 /* Test */ - -#define AX25_PF 0x10 /* Poll/final bit for standard AX.25 */ -#define AX25_EPF 0x01 /* Poll/final bit for extended AX.25 */ - -#define AX25_ILLEGAL 0x100 /* Impossible to be a real frame type */ - -#define AX25_POLLOFF 0 -#define AX25_POLLON 1 - -/* AX25 L2 C-bit */ -#define AX25_COMMAND 1 -#define AX25_RESPONSE 2 - -/* Define Link State constants. */ - -enum { - AX25_STATE_0, /* Listening */ - AX25_STATE_1, /* SABM sent */ - AX25_STATE_2, /* DISC sent */ - AX25_STATE_3, /* Established */ - AX25_STATE_4 /* Recovery */ -}; - -#define AX25_MODULUS 8 /* Standard AX.25 modulus */ -#define AX25_EMODULUS 128 /* Extended AX.25 modulus */ - -enum { - AX25_PROTO_STD_SIMPLEX, - AX25_PROTO_STD_DUPLEX, -#ifdef CONFIG_AX25_DAMA_SLAVE - AX25_PROTO_DAMA_SLAVE, -#ifdef CONFIG_AX25_DAMA_MASTER - AX25_PROTO_DAMA_MASTER, -#define AX25_PROTO_MAX AX25_PROTO_DAMA_MASTER -#endif -#endif - __AX25_PROTO_MAX, - AX25_PROTO_MAX = __AX25_PROTO_MAX -1 -}; - -enum { - AX25_VALUES_IPDEFMODE, /* 0=DG 1=VC */ - AX25_VALUES_AXDEFMODE, /* 0=Normal 1=Extended Seq Nos */ - AX25_VALUES_BACKOFF, /* 0=None 1=Linear 2=Exponential */ - AX25_VALUES_CONMODE, /* Allow connected modes - 0=No 1=no "PID text" 2=all PIDs */ - AX25_VALUES_WINDOW, /* Default window size for standard AX.25 */ - AX25_VALUES_EWINDOW, /* Default window size for extended AX.25 */ - AX25_VALUES_T1, /* Default T1 timeout value */ - AX25_VALUES_T2, /* Default T2 timeout value */ - AX25_VALUES_T3, /* Default T3 timeout value */ - AX25_VALUES_IDLE, /* Connected mode idle timer */ - AX25_VALUES_N2, /* Default N2 value */ - AX25_VALUES_PACLEN, /* AX.25 MTU */ - AX25_VALUES_PROTOCOL, /* Std AX.25, DAMA Slave, DAMA Master */ -#ifdef CONFIG_AX25_DAMA_SLAVE - AX25_VALUES_DS_TIMEOUT, /* DAMA Slave timeout */ -#endif - AX25_MAX_VALUES /* THIS MUST REMAIN THE LAST ENTRY OF THIS LIST */ -}; - -#define AX25_DEF_IPDEFMODE 0 /* Datagram */ -#define AX25_DEF_AXDEFMODE 0 /* Normal */ -#define AX25_DEF_BACKOFF 1 /* Linear backoff */ -#define AX25_DEF_CONMODE 2 /* Connected mode allowed */ -#define AX25_DEF_WINDOW 2 /* Window=2 */ -#define AX25_DEF_EWINDOW 32 /* Module-128 Window=32 */ -#define AX25_DEF_T1 10000 /* T1=10s */ -#define AX25_DEF_T2 3000 /* T2=3s */ -#define AX25_DEF_T3 300000 /* T3=300s */ -#define AX25_DEF_N2 10 /* N2=10 */ -#define AX25_DEF_IDLE 0 /* Idle=None */ -#define AX25_DEF_PACLEN 256 /* Paclen=256 */ -#define AX25_DEF_PROTOCOL AX25_PROTO_STD_SIMPLEX /* Standard AX.25 */ -#define AX25_DEF_DS_TIMEOUT 180000 /* DAMA timeout 3 minutes */ - -typedef struct ax25_uid_assoc { - struct hlist_node uid_node; - refcount_t refcount; - kuid_t uid; - ax25_address call; -} ax25_uid_assoc; - -#define ax25_uid_for_each(__ax25, list) \ - hlist_for_each_entry(__ax25, list, uid_node) - -#define ax25_uid_hold(ax25) \ - refcount_inc(&((ax25)->refcount)) - -static inline void ax25_uid_put(ax25_uid_assoc *assoc) -{ - if (refcount_dec_and_test(&assoc->refcount)) { - kfree(assoc); - } -} - -typedef struct { - ax25_address calls[AX25_MAX_DIGIS]; - unsigned char repeated[AX25_MAX_DIGIS]; - unsigned char ndigi; - signed char lastrepeat; -} ax25_digi; - -typedef struct ax25_route { - struct ax25_route *next; - ax25_address callsign; - struct net_device *dev; - ax25_digi *digipeat; - char ip_mode; -} ax25_route; - -void __ax25_put_route(ax25_route *ax25_rt); - -extern rwlock_t ax25_route_lock; - -static inline void ax25_route_lock_use(void) -{ - read_lock(&ax25_route_lock); -} - -static inline void ax25_route_lock_unuse(void) -{ - read_unlock(&ax25_route_lock); -} - -typedef struct { - char slave; /* slave_mode? */ - struct timer_list slave_timer; /* timeout timer */ - unsigned short slave_timeout; /* when? */ -} ax25_dama_info; - -struct ctl_table; - -typedef struct ax25_dev { - struct list_head list; - - struct net_device *dev; - netdevice_tracker dev_tracker; - - struct net_device *forward; - struct ctl_table_header *sysheader; - int values[AX25_MAX_VALUES]; -#if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) - ax25_dama_info dama; -#endif - refcount_t refcount; - bool device_up; - struct rcu_head rcu; -} ax25_dev; - -typedef struct ax25_cb { - struct hlist_node ax25_node; - ax25_address source_addr, dest_addr; - ax25_digi *digipeat; - ax25_dev *ax25_dev; - netdevice_tracker dev_tracker; - unsigned char iamdigi; - unsigned char state, modulus, pidincl; - unsigned short vs, vr, va; - unsigned char condition, backoff; - unsigned char n2, n2count; - struct timer_list t1timer, t2timer, t3timer, idletimer; - unsigned long t1, t2, t3, idle, rtt; - unsigned short paclen, fragno, fraglen; - struct sk_buff_head write_queue; - struct sk_buff_head reseq_queue; - struct sk_buff_head ack_queue; - struct sk_buff_head frag_queue; - unsigned char window; - struct timer_list timer, dtimer; - struct sock *sk; /* Backlink to socket */ - refcount_t refcount; -} ax25_cb; - -struct ax25_sock { - struct sock sk; - struct ax25_cb *cb; -}; - -#define ax25_sk(ptr) container_of_const(ptr, struct ax25_sock, sk) - -static inline struct ax25_cb *sk_to_ax25(const struct sock *sk) -{ - return ax25_sk(sk)->cb; -} - -#define ax25_for_each(__ax25, list) \ - hlist_for_each_entry(__ax25, list, ax25_node) - -#define ax25_cb_hold(__ax25) \ - refcount_inc(&((__ax25)->refcount)) - -static __inline__ void ax25_cb_put(ax25_cb *ax25) -{ - if (refcount_dec_and_test(&ax25->refcount)) { - kfree(ax25->digipeat); - kfree(ax25); - } -} - -static inline void ax25_dev_hold(ax25_dev *ax25_dev) -{ - refcount_inc(&ax25_dev->refcount); -} - -static inline void ax25_dev_put(ax25_dev *ax25_dev) -{ - if (refcount_dec_and_test(&ax25_dev->refcount)) - kfree_rcu(ax25_dev, rcu); -} -static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) -{ - skb->dev = dev; - skb_reset_mac_header(skb); - skb->pkt_type = PACKET_HOST; - return htons(ETH_P_AX25); -} - -/* af_ax25.c */ -extern struct hlist_head ax25_list; -extern spinlock_t ax25_list_lock; -void ax25_cb_add(ax25_cb *); -struct sock *ax25_find_listener(ax25_address *, int, struct net_device *, int); -struct sock *ax25_get_socket(ax25_address *, ax25_address *, int); -ax25_cb *ax25_find_cb(const ax25_address *, ax25_address *, ax25_digi *, - struct net_device *); -void ax25_send_to_raw(ax25_address *, struct sk_buff *, int); -void ax25_destroy_socket(ax25_cb *); -ax25_cb * __must_check ax25_create_cb(void); -void ax25_fillin_cb(ax25_cb *, ax25_dev *); -struct sock *ax25_make_new(struct sock *, struct ax25_dev *); - -/* ax25_addr.c */ -extern const ax25_address ax25_bcast; -extern const ax25_address ax25_defaddr; -extern const ax25_address null_ax25_address; -char *ax2asc(char *buf, const ax25_address *); -void asc2ax(ax25_address *addr, const char *callsign); -int ax25cmp(const ax25_address *, const ax25_address *); -int ax25digicmp(const ax25_digi *, const ax25_digi *); -const unsigned char *ax25_addr_parse(const unsigned char *, int, - ax25_address *, ax25_address *, ax25_digi *, int *, int *); -int ax25_addr_build(unsigned char *, const ax25_address *, - const ax25_address *, const ax25_digi *, int, int); -int ax25_addr_size(const ax25_digi *); -void ax25_digi_invert(const ax25_digi *, ax25_digi *); - -/* ax25_dev.c */ -extern spinlock_t ax25_dev_lock; - -#if IS_ENABLED(CONFIG_AX25) -static inline ax25_dev *ax25_dev_ax25dev(const struct net_device *dev) -{ - return rcu_dereference_rtnl(dev->ax25_ptr); -} -#endif - -ax25_dev *ax25_addr_ax25dev(ax25_address *); -void ax25_dev_device_up(struct net_device *); -void ax25_dev_device_down(struct net_device *); -int ax25_fwd_ioctl(unsigned int, struct ax25_fwd_struct *); -struct net_device *ax25_fwd_dev(struct net_device *); -void ax25_dev_free(void); - -/* ax25_ds_in.c */ -int ax25_ds_frame_in(ax25_cb *, struct sk_buff *, int); - -/* ax25_ds_subr.c */ -void ax25_ds_nr_error_recovery(ax25_cb *); -void ax25_ds_enquiry_response(ax25_cb *); -void ax25_ds_establish_data_link(ax25_cb *); -void ax25_dev_dama_off(ax25_dev *); -void ax25_dama_on(ax25_cb *); -void ax25_dama_off(ax25_cb *); - -/* ax25_ds_timer.c */ -void ax25_ds_setup_timer(ax25_dev *); -void ax25_ds_set_timer(ax25_dev *); -void ax25_ds_del_timer(ax25_dev *); -void ax25_ds_timer(ax25_cb *); -void ax25_ds_t1_timeout(ax25_cb *); -void ax25_ds_heartbeat_expiry(ax25_cb *); -void ax25_ds_t3timer_expiry(ax25_cb *); -void ax25_ds_idletimer_expiry(ax25_cb *); - -/* ax25_iface.c */ - -struct ax25_protocol { - struct ax25_protocol *next; - unsigned int pid; - int (*func)(struct sk_buff *, ax25_cb *); -}; - -void ax25_register_pid(struct ax25_protocol *ap); -void ax25_protocol_release(unsigned int); - -struct ax25_linkfail { - struct hlist_node lf_node; - void (*func)(ax25_cb *, int); -}; - -void ax25_linkfail_register(struct ax25_linkfail *lf); -void ax25_linkfail_release(struct ax25_linkfail *lf); -int __must_check ax25_listen_register(const ax25_address *, - struct net_device *); -void ax25_listen_release(const ax25_address *, struct net_device *); -int(*ax25_protocol_function(unsigned int))(struct sk_buff *, ax25_cb *); -int ax25_listen_mine(const ax25_address *, struct net_device *); -void ax25_link_failed(ax25_cb *, int); -int ax25_protocol_is_registered(unsigned int); - -/* ax25_in.c */ -int ax25_rx_iframe(ax25_cb *, struct sk_buff *); -int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, - struct net_device *); - -/* ax25_ip.c */ -netdev_tx_t ax25_ip_xmit(struct sk_buff *skb); -extern const struct header_ops ax25_header_ops; - -/* ax25_out.c */ -ax25_cb *ax25_send_frame(struct sk_buff *, int, const ax25_address *, - ax25_address *, ax25_digi *, struct net_device *); -void ax25_output(ax25_cb *, int, struct sk_buff *); -void ax25_kick(ax25_cb *); -void ax25_transmit_buffer(ax25_cb *, struct sk_buff *, int); -void ax25_queue_xmit(struct sk_buff *skb, struct net_device *dev); -int ax25_check_iframes_acked(ax25_cb *, unsigned short); - -/* ax25_route.c */ -void ax25_rt_device_down(struct net_device *); -int ax25_rt_ioctl(unsigned int, void __user *); -extern const struct seq_operations ax25_rt_seqops; -ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev); -struct sk_buff *ax25_rt_build_path(struct sk_buff *, ax25_address *, - ax25_address *, ax25_digi *); -void ax25_rt_free(void); - -/* ax25_std_in.c */ -int ax25_std_frame_in(ax25_cb *, struct sk_buff *, int); - -/* ax25_std_subr.c */ -void ax25_std_nr_error_recovery(ax25_cb *); -void ax25_std_establish_data_link(ax25_cb *); -void ax25_std_transmit_enquiry(ax25_cb *); -void ax25_std_enquiry_response(ax25_cb *); -void ax25_std_timeout_response(ax25_cb *); - -/* ax25_std_timer.c */ -void ax25_std_heartbeat_expiry(ax25_cb *); -void ax25_std_t1timer_expiry(ax25_cb *); -void ax25_std_t2timer_expiry(ax25_cb *); -void ax25_std_t3timer_expiry(ax25_cb *); -void ax25_std_idletimer_expiry(ax25_cb *); - -/* ax25_subr.c */ -void ax25_clear_queues(ax25_cb *); -void ax25_frames_acked(ax25_cb *, unsigned short); -void ax25_requeue_frames(ax25_cb *); -int ax25_validate_nr(ax25_cb *, unsigned short); -int ax25_decode(ax25_cb *, struct sk_buff *, int *, int *, int *); -void ax25_send_control(ax25_cb *, int, int, int); -void ax25_return_dm(struct net_device *, ax25_address *, ax25_address *, - ax25_digi *); -void ax25_calculate_t1(ax25_cb *); -void ax25_calculate_rtt(ax25_cb *); -void ax25_disconnect(ax25_cb *, int); - -/* ax25_timer.c */ -void ax25_setup_timers(ax25_cb *); -void ax25_start_heartbeat(ax25_cb *); -void ax25_start_t1timer(ax25_cb *); -void ax25_start_t2timer(ax25_cb *); -void ax25_start_t3timer(ax25_cb *); -void ax25_start_idletimer(ax25_cb *); -void ax25_stop_heartbeat(ax25_cb *); -void ax25_stop_t1timer(ax25_cb *); -void ax25_stop_t2timer(ax25_cb *); -void ax25_stop_t3timer(ax25_cb *); -void ax25_stop_idletimer(ax25_cb *); -int ax25_t1timer_running(ax25_cb *); -unsigned long ax25_display_timer(struct timer_list *); - -/* ax25_uid.c */ -extern int ax25_uid_policy; -ax25_uid_assoc *ax25_findbyuid(kuid_t); -int __must_check ax25_uid_ioctl(int, struct sockaddr_ax25 *); -extern const struct seq_operations ax25_uid_seqops; -void ax25_uid_free(void); - -/* sysctl_net_ax25.c */ -#ifdef CONFIG_SYSCTL -int ax25_register_dev_sysctl(ax25_dev *ax25_dev); -void ax25_unregister_dev_sysctl(ax25_dev *ax25_dev); -#else -static inline int ax25_register_dev_sysctl(ax25_dev *ax25_dev) { return 0; } -static inline void ax25_unregister_dev_sysctl(ax25_dev *ax25_dev) {} -#endif /* CONFIG_SYSCTL */ +#define AX25_ADDR_LEN 7 +#define AX25_P_IP 0xCC #endif diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index d46ed9011ee5..69eed69f7f26 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -130,21 +130,30 @@ struct bt_voice { #define BT_RCVMTU 13 #define BT_PHY 14 -#define BT_PHY_BR_1M_1SLOT 0x00000001 -#define BT_PHY_BR_1M_3SLOT 0x00000002 -#define BT_PHY_BR_1M_5SLOT 0x00000004 -#define BT_PHY_EDR_2M_1SLOT 0x00000008 -#define BT_PHY_EDR_2M_3SLOT 0x00000010 -#define BT_PHY_EDR_2M_5SLOT 0x00000020 -#define BT_PHY_EDR_3M_1SLOT 0x00000040 -#define BT_PHY_EDR_3M_3SLOT 0x00000080 -#define BT_PHY_EDR_3M_5SLOT 0x00000100 -#define BT_PHY_LE_1M_TX 0x00000200 -#define BT_PHY_LE_1M_RX 0x00000400 -#define BT_PHY_LE_2M_TX 0x00000800 -#define BT_PHY_LE_2M_RX 0x00001000 -#define BT_PHY_LE_CODED_TX 0x00002000 -#define BT_PHY_LE_CODED_RX 0x00004000 +#define BT_PHY_BR_1M_1SLOT BIT(0) +#define BT_PHY_BR_1M_3SLOT BIT(1) +#define BT_PHY_BR_1M_5SLOT BIT(2) +#define BT_PHY_EDR_2M_1SLOT BIT(3) +#define BT_PHY_EDR_2M_3SLOT BIT(4) +#define BT_PHY_EDR_2M_5SLOT BIT(5) +#define BT_PHY_EDR_3M_1SLOT BIT(6) +#define BT_PHY_EDR_3M_3SLOT BIT(7) +#define BT_PHY_EDR_3M_5SLOT BIT(8) +#define BT_PHY_LE_1M_TX BIT(9) +#define BT_PHY_LE_1M_RX BIT(10) +#define BT_PHY_LE_2M_TX BIT(11) +#define BT_PHY_LE_2M_RX BIT(12) +#define BT_PHY_LE_CODED_TX BIT(13) +#define BT_PHY_LE_CODED_RX BIT(14) + +#define BT_PHY_BREDR_MASK (BT_PHY_BR_1M_1SLOT | BT_PHY_BR_1M_3SLOT | \ + BT_PHY_BR_1M_5SLOT | BT_PHY_EDR_2M_1SLOT | \ + BT_PHY_EDR_2M_3SLOT | BT_PHY_EDR_2M_5SLOT | \ + BT_PHY_EDR_3M_1SLOT | BT_PHY_EDR_3M_3SLOT | \ + BT_PHY_EDR_3M_5SLOT) +#define BT_PHY_LE_MASK (BT_PHY_LE_1M_TX | BT_PHY_LE_1M_RX | \ + BT_PHY_LE_2M_TX | BT_PHY_LE_2M_RX | \ + BT_PHY_LE_CODED_TX | BT_PHY_LE_CODED_RX) #define BT_MODE 15 @@ -173,7 +182,7 @@ struct bt_iso_io_qos { __u32 interval; __u16 latency; __u16 sdu; - __u8 phy; + __u8 phys; __u8 rtn; }; @@ -212,9 +221,9 @@ struct bt_iso_qos { }; }; -#define BT_ISO_PHY_1M 0x01 -#define BT_ISO_PHY_2M 0x02 -#define BT_ISO_PHY_CODED 0x04 +#define BT_ISO_PHY_1M BIT(0) +#define BT_ISO_PHY_2M BIT(1) +#define BT_ISO_PHY_CODED BIT(2) #define BT_ISO_PHY_ANY (BT_ISO_PHY_1M | BT_ISO_PHY_2M | \ BT_ISO_PHY_CODED) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index a27cd3626b87..572b1c620c5d 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -654,6 +654,8 @@ enum { #define HCI_LE_ISO_BROADCASTER 0x40 #define HCI_LE_ISO_SYNC_RECEIVER 0x80 #define HCI_LE_LL_EXT_FEATURE 0x80 +#define HCI_LE_CS 0x40 +#define HCI_LE_CS_HOST 0x80 /* Connection modes */ #define HCI_CM_ACTIVE 0x0000 @@ -1466,8 +1468,12 @@ struct hci_rp_read_data_block_size { } __packed; #define HCI_OP_READ_LOCAL_CODECS 0x100b -struct hci_std_codecs { +struct hci_std_codecs_hdr { __u8 num; +} __packed; + +struct hci_std_codecs { + struct hci_std_codecs_hdr; __u8 codec[]; } __packed; @@ -1485,7 +1491,7 @@ struct hci_vnd_codecs { struct hci_rp_read_local_supported_codecs { __u8 status; - struct hci_std_codecs std_codecs; + struct hci_std_codecs_hdr std_codecs; struct hci_vnd_codecs vnd_codecs; } __packed; @@ -1502,8 +1508,12 @@ struct hci_std_codec_v2 { __u8 transport; } __packed; -struct hci_std_codecs_v2 { +struct hci_std_codecs_v2_hdr { __u8 num; +} __packed; + +struct hci_std_codecs_v2 { + struct hci_std_codecs_v2_hdr; struct hci_std_codec_v2 codec[]; } __packed; @@ -1520,7 +1530,7 @@ struct hci_vnd_codecs_v2 { struct hci_rp_read_local_supported_codecs_v2 { __u8 status; - struct hci_std_codecs_v2 std_codecs; + struct hci_std_codecs_v2_hdr std_codecs; struct hci_vnd_codecs_v2 vendor_codecs; } __packed; @@ -1883,6 +1893,15 @@ struct hci_cp_le_set_default_phy { #define HCI_LE_SET_PHY_2M 0x02 #define HCI_LE_SET_PHY_CODED 0x04 +#define HCI_OP_LE_SET_PHY 0x2032 +struct hci_cp_le_set_phy { + __le16 handle; + __u8 all_phys; + __u8 tx_phys; + __u8 rx_phys; + __le16 phy_opts; +} __packed; + #define HCI_OP_LE_SET_EXT_SCAN_PARAMS 0x2041 struct hci_cp_le_set_ext_scan_params { __u8 own_addr_type; @@ -2136,8 +2155,8 @@ struct hci_cis_params { __u8 cis_id; __le16 c_sdu; __le16 p_sdu; - __u8 c_phy; - __u8 p_phy; + __u8 c_phys; + __u8 p_phys; __u8 c_rtn; __u8 p_rtn; } __packed; @@ -2269,6 +2288,204 @@ struct hci_cp_le_read_all_remote_features { __u8 pages; } __packed; +/* Channel Sounding Commands */ +#define HCI_OP_LE_CS_RD_LOCAL_SUPP_CAP 0x2089 +struct hci_rp_le_cs_rd_local_supp_cap { + __u8 status; + __u8 num_config_supported; + __le16 max_consecutive_procedures_supported; + __u8 num_antennas_supported; + __u8 max_antenna_paths_supported; + __u8 roles_supported; + __u8 modes_supported; + __u8 rtt_capability; + __u8 rtt_aa_only_n; + __u8 rtt_sounding_n; + __u8 rtt_random_payload_n; + __le16 nadm_sounding_capability; + __le16 nadm_random_capability; + __u8 cs_sync_phys_supported; + __le16 subfeatures_supported; + __le16 t_ip1_times_supported; + __le16 t_ip2_times_supported; + __le16 t_fcs_times_supported; + __le16 t_pm_times_supported; + __u8 t_sw_time_supported; + __u8 tx_snr_capability; +} __packed; + +#define HCI_OP_LE_CS_RD_RMT_SUPP_CAP 0x208A +struct hci_cp_le_cs_rd_local_supp_cap { + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_WR_CACHED_RMT_SUPP_CAP 0x208B +struct hci_cp_le_cs_wr_cached_rmt_supp_cap { + __le16 handle; + __u8 num_config_supported; + __le16 max_consecutive_procedures_supported; + __u8 num_antennas_supported; + __u8 max_antenna_paths_supported; + __u8 roles_supported; + __u8 modes_supported; + __u8 rtt_capability; + __u8 rtt_aa_only_n; + __u8 rtt_sounding_n; + __u8 rtt_random_payload_n; + __le16 nadm_sounding_capability; + __le16 nadm_random_capability; + __u8 cs_sync_phys_supported; + __le16 subfeatures_supported; + __le16 t_ip1_times_supported; + __le16 t_ip2_times_supported; + __le16 t_fcs_times_supported; + __le16 t_pm_times_supported; + __u8 t_sw_time_supported; + __u8 tx_snr_capability; +} __packed; + +struct hci_rp_le_cs_wr_cached_rmt_supp_cap { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_SEC_ENABLE 0x208C +struct hci_cp_le_cs_sec_enable { + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_SET_DEFAULT_SETTINGS 0x208D +struct hci_cp_le_cs_set_default_settings { + __le16 handle; + __u8 role_enable; + __u8 cs_sync_ant_sel; + __s8 max_tx_power; +} __packed; + +struct hci_rp_le_cs_set_default_settings { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_RD_RMT_FAE_TABLE 0x208E +struct hci_cp_le_cs_rd_rmt_fae_table { + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_WR_CACHED_RMT_FAE_TABLE 0x208F +struct hci_cp_le_cs_wr_rmt_cached_fae_table { + __le16 handle; + __u8 remote_fae_table[72]; +} __packed; + +struct hci_rp_le_cs_wr_rmt_cached_fae_table { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_CREATE_CONFIG 0x2090 +struct hci_cp_le_cs_create_config { + __le16 handle; + __u8 config_id; + __u8 create_context; + __u8 main_mode_type; + __u8 sub_mode_type; + __u8 min_main_mode_steps; + __u8 max_main_mode_steps; + __u8 main_mode_repetition; + __u8 mode_0_steps; + __u8 role; + __u8 rtt_type; + __u8 cs_sync_phy; + __u8 channel_map[10]; + __u8 channel_map_repetition; + __u8 channel_selection_type; + __u8 ch3c_shape; + __u8 ch3c_jump; + __u8 reserved; +} __packed; + +#define HCI_OP_LE_CS_REMOVE_CONFIG 0x2091 +struct hci_cp_le_cs_remove_config { + __le16 handle; + __u8 config_id; +} __packed; + +#define HCI_OP_LE_CS_SET_CH_CLASSIFICATION 0x2092 +struct hci_cp_le_cs_set_ch_classification { + __u8 ch_classification[10]; +} __packed; + +struct hci_rp_le_cs_set_ch_classification { + __u8 status; +} __packed; + +#define HCI_OP_LE_CS_SET_PROC_PARAM 0x2093 +struct hci_cp_le_cs_set_proc_param { + __le16 handle; + __u8 config_id; + __le16 max_procedure_len; + __le16 min_procedure_interval; + __le16 max_procedure_interval; + __le16 max_procedure_count; + __u8 min_subevent_len[3]; + __u8 max_subevent_len[3]; + __u8 tone_antenna_config_selection; + __u8 phy; + __u8 tx_power_delta; + __u8 preferred_peer_antenna; + __u8 snr_control_initiator; + __u8 snr_control_reflector; +} __packed; + +struct hci_rp_le_cs_set_proc_param { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_SET_PROC_ENABLE 0x2094 +struct hci_cp_le_cs_set_proc_enable { + __le16 handle; + __u8 config_id; + __u8 enable; +} __packed; + +#define HCI_OP_LE_CS_TEST 0x2095 +struct hci_cp_le_cs_test { + __u8 main_mode_type; + __u8 sub_mode_type; + __u8 main_mode_repetition; + __u8 mode_0_steps; + __u8 role; + __u8 rtt_type; + __u8 cs_sync_phy; + __u8 cs_sync_antenna_selection; + __u8 subevent_len[3]; + __le16 subevent_interval; + __u8 max_num_subevents; + __u8 transmit_power_level; + __u8 t_ip1_time; + __u8 t_ip2_time; + __u8 t_fcs_time; + __u8 t_pm_time; + __u8 t_sw_time; + __u8 tone_antenna_config_selection; + __u8 reserved; + __u8 snr_control_initiator; + __u8 snr_control_reflector; + __le16 drbg_nonce; + __u8 channel_map_repetition; + __le16 override_config; + __u8 override_parameters_length; + __u8 override_parameters_data[]; +} __packed; + +struct hci_rp_le_cs_test { + __u8 status; +} __packed; + +#define HCI_OP_LE_CS_TEST_END 0x2096 + /* ---- HCI Events ---- */ struct hci_ev_status { __u8 status; @@ -2960,6 +3177,129 @@ struct hci_evt_le_read_all_remote_features_complete { __u8 features[248]; } __packed; +/* Channel Sounding Events */ +#define HCI_EVT_LE_CS_READ_RMT_SUPP_CAP_COMPLETE 0x2C +struct hci_evt_le_cs_read_rmt_supp_cap_complete { + __u8 status; + __le16 handle; + __u8 num_configs_supp; + __le16 max_consec_proc_supp; + __u8 num_ant_supp; + __u8 max_ant_path_supp; + __u8 roles_supp; + __u8 modes_supp; + __u8 rtt_cap; + __u8 rtt_aa_only_n; + __u8 rtt_sounding_n; + __u8 rtt_rand_payload_n; + __le16 nadm_sounding_cap; + __le16 nadm_rand_cap; + __u8 cs_sync_phys_supp; + __le16 sub_feat_supp; + __le16 t_ip1_times_supp; + __le16 t_ip2_times_supp; + __le16 t_fcs_times_supp; + __le16 t_pm_times_supp; + __u8 t_sw_times_supp; + __u8 tx_snr_cap; +} __packed; + +#define HCI_EVT_LE_CS_READ_RMT_FAE_TABLE_COMPLETE 0x2D +struct hci_evt_le_cs_read_rmt_fae_table_complete { + __u8 status; + __le16 handle; + __u8 remote_fae_table[72]; +} __packed; + +#define HCI_EVT_LE_CS_SECURITY_ENABLE_COMPLETE 0x2E +struct hci_evt_le_cs_security_enable_complete { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_EVT_LE_CS_CONFIG_COMPLETE 0x2F +struct hci_evt_le_cs_config_complete { + __u8 status; + __le16 handle; + __u8 config_id; + __u8 action; + __u8 main_mode_type; + __u8 sub_mode_type; + __u8 min_main_mode_steps; + __u8 max_main_mode_steps; + __u8 main_mode_rep; + __u8 mode_0_steps; + __u8 role; + __u8 rtt_type; + __u8 cs_sync_phy; + __u8 channel_map[10]; + __u8 channel_map_rep; + __u8 channel_sel_type; + __u8 ch3c_shape; + __u8 ch3c_jump; + __u8 reserved; + __u8 t_ip1_time; + __u8 t_ip2_time; + __u8 t_fcs_time; + __u8 t_pm_time; +} __packed; + +#define HCI_EVT_LE_CS_PROCEDURE_ENABLE_COMPLETE 0x30 +struct hci_evt_le_cs_procedure_enable_complete { + __u8 status; + __le16 handle; + __u8 config_id; + __u8 state; + __u8 tone_ant_config_sel; + __s8 sel_tx_pwr; + __u8 sub_evt_len[3]; + __u8 sub_evts_per_evt; + __le16 sub_evt_intrvl; + __le16 evt_intrvl; + __le16 proc_intrvl; + __le16 proc_counter; + __le16 max_proc_len; +} __packed; + +#define HCI_EVT_LE_CS_SUBEVENT_RESULT 0x31 +struct hci_evt_le_cs_subevent_result { + __le16 handle; + __u8 config_id; + __le16 start_acl_conn_evt_counter; + __le16 proc_counter; + __le16 freq_comp; + __u8 ref_pwr_lvl; + __u8 proc_done_status; + __u8 subevt_done_status; + __u8 abort_reason; + __u8 num_ant_paths; + __u8 num_steps_reported; + __u8 step_mode[0]; /* depends on num_steps_reported */ + __u8 step_channel[0]; /* depends on num_steps_reported */ + __u8 step_data_length[0]; /* depends on num_steps_reported */ + __u8 step_data[0]; /* depends on num_steps_reported */ +} __packed; + +#define HCI_EVT_LE_CS_SUBEVENT_RESULT_CONTINUE 0x32 +struct hci_evt_le_cs_subevent_result_continue { + __le16 handle; + __u8 config_id; + __u8 proc_done_status; + __u8 subevt_done_status; + __u8 abort_reason; + __u8 num_ant_paths; + __u8 num_steps_reported; + __u8 step_mode[0]; /* depends on num_steps_reported */ + __u8 step_channel[0]; /* depends on num_steps_reported */ + __u8 step_data_length[0]; /* depends on num_steps_reported */ + __u8 step_data[0]; /* depends on num_steps_reported */ +} __packed; + +#define HCI_EVT_LE_CS_TEST_END_COMPLETE 0x33 +struct hci_evt_le_cs_test_end_complete { + __u8 status; +} __packed; + #define HCI_EV_VENDOR 0xff /* Internal events generated by Bluetooth stack */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4263e71a23ef..a7bffb908c1e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -730,6 +730,8 @@ struct hci_conn { __u16 le_per_adv_data_offset; __u8 le_adv_phy; __u8 le_adv_sec_phy; + __u8 le_tx_def_phys; + __u8 le_rx_def_phys; __u8 le_tx_phy; __u8 le_rx_phy; __s8 rssi; @@ -2071,6 +2073,12 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define ll_ext_feature_capable(dev) \ ((dev)->le_features[7] & HCI_LE_LL_EXT_FEATURE) +/* Channel sounding support */ +#define le_cs_capable(dev) \ + ((dev)->le_features[5] & HCI_LE_CS) +#define le_cs_host_capable(dev) \ + ((dev)->le_features[5] & HCI_LE_CS_HOST) + #define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \ (!hci_test_quirk((dev), HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG))) @@ -2334,6 +2342,7 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode); void *hci_recv_event_data(struct hci_dev *hdev, __u8 event); u32 hci_conn_get_phy(struct hci_conn *conn); +int hci_conn_set_phy(struct hci_conn *conn, u32 phys); /* ----- HCI Sockets ----- */ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 56076bbc981d..73e494b2591d 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -191,3 +191,6 @@ int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_past_sync(struct hci_conn *conn, struct hci_conn *le); int hci_le_read_remote_features(struct hci_conn *conn); + +int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type); +int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys); diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 00e182a22720..5172afee5494 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -284,9 +284,9 @@ struct l2cap_conn_rsp { #define L2CAP_CR_LE_BAD_KEY_SIZE 0x0007 #define L2CAP_CR_LE_ENCRYPTION 0x0008 #define L2CAP_CR_LE_INVALID_SCID 0x0009 -#define L2CAP_CR_LE_SCID_IN_USE 0X000A -#define L2CAP_CR_LE_UNACCEPT_PARAMS 0X000B -#define L2CAP_CR_LE_INVALID_PARAMS 0X000C +#define L2CAP_CR_LE_SCID_IN_USE 0x000A +#define L2CAP_CR_LE_UNACCEPT_PARAMS 0x000B +#define L2CAP_CR_LE_INVALID_PARAMS 0x000C /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 @@ -493,6 +493,8 @@ struct l2cap_ecred_reconf_req { #define L2CAP_RECONF_SUCCESS 0x0000 #define L2CAP_RECONF_INVALID_MTU 0x0001 #define L2CAP_RECONF_INVALID_MPS 0x0002 +#define L2CAP_RECONF_INVALID_CID 0x0003 +#define L2CAP_RECONF_INVALID_PARAMS 0x0004 struct l2cap_ecred_reconf_rsp { __le16 result; @@ -655,8 +657,8 @@ struct l2cap_conn { struct sk_buff *rx_skb; __u32 rx_len; + struct ida tx_ida; __u8 tx_ident; - struct mutex ident_lock; struct sk_buff_head pending_rx; struct work_struct pending_rx_work; diff --git a/include/net/bonding.h b/include/net/bonding.h index 462078403557..edd1942dcd73 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -69,9 +69,6 @@ #define bond_first_slave_rcu(bond) \ netdev_lower_get_first_private_rcu(bond->dev) -#define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond)) -#define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond)) - /** * bond_for_each_slave - iterate over all slaves * @bond: the bond holding this list @@ -91,22 +88,22 @@ NETIF_F_GSO_ESP) #ifdef CONFIG_NET_POLL_CONTROLLER -extern atomic_t netpoll_block_tx; +DECLARE_STATIC_KEY_FALSE(netpoll_block_tx); static inline void block_netpoll_tx(void) { - atomic_inc(&netpoll_block_tx); + static_branch_inc(&netpoll_block_tx); } static inline void unblock_netpoll_tx(void) { - atomic_dec(&netpoll_block_tx); + static_branch_dec(&netpoll_block_tx); } static inline int is_netpoll_tx_blocked(struct net_device *dev) { - if (unlikely(netpoll_tx_running(dev))) - return atomic_read(&netpoll_block_tx); + if (static_branch_unlikely(&netpoll_block_tx)) + return netpoll_tx_running(dev); return 0; } #else @@ -254,6 +251,7 @@ struct bonding { struct delayed_work ad_work; struct delayed_work mcast_work; struct delayed_work slave_arr_work; + struct delayed_work peer_notify_work; #ifdef CONFIG_DEBUG_FS /* debugging support via debugfs */ struct dentry *debug_dir; @@ -698,6 +696,7 @@ void bond_debug_register(struct bonding *bond); void bond_debug_unregister(struct bonding *bond); void bond_debug_reregister(struct bonding *bond); const char *bond_mode_name(int mode); +bool __bond_xdp_check(int mode, int xmit_policy); bool bond_xdp_check(struct bonding *bond, int mode); void bond_setup(struct net_device *bond_dev); unsigned int bond_get_num_tx_queues(void); @@ -710,6 +709,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, int level); int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave); void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay); +void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay); void bond_work_init_all(struct bonding *bond); void bond_work_cancel_all(struct bonding *bond); diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h deleted file mode 100644 index b655d8666f55..000000000000 --- a/include/net/caif/caif_dev.h +++ /dev/null @@ -1,128 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#ifndef CAIF_DEV_H_ -#define CAIF_DEV_H_ - -#include <net/caif/caif_layer.h> -#include <net/caif/cfcnfg.h> -#include <net/caif/caif_device.h> -#include <linux/caif/caif_socket.h> -#include <linux/if.h> -#include <linux/net.h> - -/** - * struct caif_param - CAIF parameters. - * @size: Length of data - * @data: Binary Data Blob - */ -struct caif_param { - u16 size; - u8 data[256]; -}; - -/** - * struct caif_connect_request - Request data for CAIF channel setup. - * @protocol: Type of CAIF protocol to use (at, datagram etc) - * @sockaddr: Socket address to connect. - * @priority: Priority of the connection. - * @link_selector: Link selector (high bandwidth or low latency) - * @ifindex: kernel index of the interface. - * @param: Connect Request parameters (CAIF_SO_REQ_PARAM). - * - * This struct is used when connecting a CAIF channel. - * It contains all CAIF channel configuration options. - */ -struct caif_connect_request { - enum caif_protocol_type protocol; - struct sockaddr_caif sockaddr; - enum caif_channel_priority priority; - enum caif_link_selector link_selector; - int ifindex; - struct caif_param param; -}; - -/** - * caif_connect_client - Connect a client to CAIF Core Stack. - * @config: Channel setup parameters, specifying what address - * to connect on the Modem. - * @client_layer: User implementation of client layer. This layer - * MUST have receive and control callback functions - * implemented. - * @ifindex: Link layer interface index used for this connection. - * @headroom: Head room needed by CAIF protocol. - * @tailroom: Tail room needed by CAIF protocol. - * - * This function connects a CAIF channel. The Client must implement - * the struct cflayer. This layer represents the Client layer and holds - * receive functions and control callback functions. Control callback - * function will receive information about connect/disconnect responses, - * flow control etc (see enum caif_control). - * E.g. CAIF Socket will call this function for each socket it connects - * and have one client_layer instance for each socket. - */ -int caif_connect_client(struct net *net, - struct caif_connect_request *conn_req, - struct cflayer *client_layer, int *ifindex, - int *headroom, int *tailroom); - -/** - * caif_disconnect_client - Disconnects a client from the CAIF stack. - * - * @client_layer: Client layer to be disconnected. - */ -int caif_disconnect_client(struct net *net, struct cflayer *client_layer); - - -/** - * caif_client_register_refcnt - register ref-count functions provided by client. - * - * @adapt_layer: Client layer using CAIF Stack. - * @hold: Function provided by client layer increasing ref-count - * @put: Function provided by client layer decreasing ref-count - * - * Client of the CAIF Stack must register functions for reference counting. - * These functions are called by the CAIF Stack for every upstream packet, - * and must therefore be implemented efficiently. - * - * Client should call caif_free_client when reference count degrease to zero. - */ - -void caif_client_register_refcnt(struct cflayer *adapt_layer, - void (*hold)(struct cflayer *lyr), - void (*put)(struct cflayer *lyr)); -/** - * caif_free_client - Free memory used to manage the client in the CAIF Stack. - * - * @client_layer: Client layer to be removed. - * - * This function must be called from client layer in order to free memory. - * Caller must guarantee that no packets are in flight upstream when calling - * this function. - */ -void caif_free_client(struct cflayer *adap_layer); - -/** - * struct caif_enroll_dev - Enroll a net-device as a CAIF Link layer - * @dev: Network device to enroll. - * @caifdev: Configuration information from CAIF Link Layer - * @link_support: Link layer support layer - * @head_room: Head room needed by link support layer - * @layer: Lowest layer in CAIF stack - * @rcv_fun: Receive function for CAIF stack. - * - * This function enroll a CAIF link layer into CAIF Stack and - * expects the interface to be able to handle CAIF payload. - * The link_support layer is used to add any Link Layer specific - * framing. - */ -int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, - struct cflayer *link_support, int head_room, - struct cflayer **layer, int (**rcv_func)( - struct sk_buff *, struct net_device *, - struct packet_type *, struct net_device *)); - -#endif /* CAIF_DEV_H_ */ diff --git a/include/net/caif/caif_device.h b/include/net/caif/caif_device.h deleted file mode 100644 index 91d1fd5b44a4..000000000000 --- a/include/net/caif/caif_device.h +++ /dev/null @@ -1,55 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#ifndef CAIF_DEVICE_H_ -#define CAIF_DEVICE_H_ -#include <linux/kernel.h> -#include <linux/net.h> -#include <linux/netdevice.h> -#include <linux/caif/caif_socket.h> -#include <net/caif/caif_device.h> - -/** - * struct caif_dev_common - data shared between CAIF drivers and stack. - * @flowctrl: Flow Control callback function. This function is - * supplied by CAIF Core Stack and is used by CAIF - * Link Layer to send flow-stop to CAIF Core. - * The flow information will be distributed to all - * clients of CAIF. - * - * @link_select: Profile of device, either high-bandwidth or - * low-latency. This member is set by CAIF Link - * Layer Device in order to indicate if this device - * is a high bandwidth or low latency device. - * - * @use_frag: CAIF Frames may be fragmented. - * Is set by CAIF Link Layer in order to indicate if the - * interface receives fragmented frames that must be - * assembled by CAIF Core Layer. - * - * @use_fcs: Indicate if Frame CheckSum (fcs) is used. - * Is set if the physical interface is - * using Frame Checksum on the CAIF Frames. - * - * @use_stx: Indicate STart of frame eXtension (stx) in use. - * Is set if the CAIF Link Layer expects - * CAIF Frames to start with the STX byte. - * - * This structure is shared between the CAIF drivers and the CAIF stack. - * It is used by the device to register its behavior. - * CAIF Core layer must set the member flowctrl in order to supply - * CAIF Link Layer with the flow control function. - * - */ - struct caif_dev_common { - void (*flowctrl)(struct net_device *net, int on); - enum caif_link_selector link_select; - int use_frag; - int use_fcs; - int use_stx; -}; - -#endif /* CAIF_DEVICE_H_ */ diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h deleted file mode 100644 index 053e7c6a6a66..000000000000 --- a/include/net/caif/caif_layer.h +++ /dev/null @@ -1,277 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#ifndef CAIF_LAYER_H_ -#define CAIF_LAYER_H_ - -#include <linux/list.h> - -struct cflayer; -struct cfpkt; -struct caif_payload_info; - -#define CAIF_LAYER_NAME_SZ 16 - -/** - * caif_assert() - Assert function for CAIF. - * @assert: expression to evaluate. - * - * This function will print a error message and a do WARN_ON if the - * assertion fails. Normally this will do a stack up at the current location. - */ -#define caif_assert(assert) \ -do { \ - if (!(assert)) { \ - pr_err("caif:Assert detected:'%s'\n", #assert); \ - WARN_ON(!(assert)); \ - } \ -} while (0) - -/** - * enum caif_ctrlcmd - CAIF Stack Control Signaling sent in layer.ctrlcmd(). - * - * @CAIF_CTRLCMD_FLOW_OFF_IND: Flow Control is OFF, transmit function - * should stop sending data - * - * @CAIF_CTRLCMD_FLOW_ON_IND: Flow Control is ON, transmit function - * can start sending data - * - * @CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND: Remote end modem has decided to close - * down channel - * - * @CAIF_CTRLCMD_INIT_RSP: Called initially when the layer below - * has finished initialization - * - * @CAIF_CTRLCMD_DEINIT_RSP: Called when de-initialization is - * complete - * - * @CAIF_CTRLCMD_INIT_FAIL_RSP: Called if initialization fails - * - * @_CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND: CAIF Link layer temporarily cannot - * send more packets. - * @_CAIF_CTRLCMD_PHYIF_FLOW_ON_IND: Called if CAIF Link layer is able - * to send packets again. - * @_CAIF_CTRLCMD_PHYIF_DOWN_IND: Called if CAIF Link layer is going - * down. - * - * These commands are sent upwards in the CAIF stack to the CAIF Client. - * They are used for signaling originating from the modem or CAIF Link Layer. - * These are either responses (*_RSP) or events (*_IND). - */ -enum caif_ctrlcmd { - CAIF_CTRLCMD_FLOW_OFF_IND, - CAIF_CTRLCMD_FLOW_ON_IND, - CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, - CAIF_CTRLCMD_INIT_RSP, - CAIF_CTRLCMD_DEINIT_RSP, - CAIF_CTRLCMD_INIT_FAIL_RSP, - _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND, - _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND, - _CAIF_CTRLCMD_PHYIF_DOWN_IND, -}; - -/** - * enum caif_modemcmd - Modem Control Signaling, sent from CAIF Client - * to the CAIF Link Layer or modem. - * - * @CAIF_MODEMCMD_FLOW_ON_REQ: Flow Control is ON, transmit function - * can start sending data. - * - * @CAIF_MODEMCMD_FLOW_OFF_REQ: Flow Control is OFF, transmit function - * should stop sending data. - * - * @_CAIF_MODEMCMD_PHYIF_USEFULL: Notify physical layer that it is in use - * - * @_CAIF_MODEMCMD_PHYIF_USELESS: Notify physical layer that it is - * no longer in use. - * - * These are requests sent 'downwards' in the stack. - * Flow ON, OFF can be indicated to the modem. - */ -enum caif_modemcmd { - CAIF_MODEMCMD_FLOW_ON_REQ = 0, - CAIF_MODEMCMD_FLOW_OFF_REQ = 1, - _CAIF_MODEMCMD_PHYIF_USEFULL = 3, - _CAIF_MODEMCMD_PHYIF_USELESS = 4 -}; - -/** - * enum caif_direction - CAIF Packet Direction. - * Indicate if a packet is to be sent out or to be received in. - * @CAIF_DIR_IN: Incoming packet received. - * @CAIF_DIR_OUT: Outgoing packet to be transmitted. - */ -enum caif_direction { - CAIF_DIR_IN = 0, - CAIF_DIR_OUT = 1 -}; - -/** - * struct cflayer - CAIF Stack layer. - * Defines the framework for the CAIF Core Stack. - * @up: Pointer up to the layer above. - * @dn: Pointer down to the layer below. - * @node: List node used when layer participate in a list. - * @receive: Packet receive function. - * @transmit: Packet transmit function. - * @ctrlcmd: Used for control signalling upwards in the stack. - * @modemcmd: Used for control signaling downwards in the stack. - * @id: The identity of this layer - * @name: Name of the layer. - * - * This structure defines the layered structure in CAIF. - * - * It defines CAIF layering structure, used by all CAIF Layers and the - * layers interfacing CAIF. - * - * In order to integrate with CAIF an adaptation layer on top of the CAIF stack - * and PHY layer below the CAIF stack - * must be implemented. These layer must follow the design principles below. - * - * Principles for layering of protocol layers: - * - All layers must use this structure. If embedding it, then place this - * structure first in the layer specific structure. - * - * - Each layer should not depend on any others layer's private data. - * - * - In order to send data upwards do - * layer->up->receive(layer->up, packet); - * - * - In order to send data downwards do - * layer->dn->transmit(layer->dn, info, packet); - */ -struct cflayer { - struct cflayer *up; - struct cflayer *dn; - struct list_head node; - - /* - * receive() - Receive Function (non-blocking). - * Contract: Each layer must implement a receive function passing the - * CAIF packets upwards in the stack. - * Packet handling rules: - * - The CAIF packet (cfpkt) ownership is passed to the - * called receive function. This means that the - * packet cannot be accessed after passing it to the - * above layer using up->receive(). - * - * - If parsing of the packet fails, the packet must be - * destroyed and negative error code returned - * from the function. - * EXCEPTION: If the framing layer (cffrml) returns - * -EILSEQ, the packet is not freed. - * - * - If parsing succeeds (and above layers return OK) then - * the function must return a value >= 0. - * - * Returns result < 0 indicates an error, 0 or positive value - * indicates success. - * - * @layr: Pointer to the current layer the receive function is - * implemented for (this pointer). - * @cfpkt: Pointer to CaifPacket to be handled. - */ - int (*receive)(struct cflayer *layr, struct cfpkt *cfpkt); - - /* - * transmit() - Transmit Function (non-blocking). - * Contract: Each layer must implement a transmit function passing the - * CAIF packet downwards in the stack. - * Packet handling rules: - * - The CAIF packet (cfpkt) ownership is passed to the - * transmit function. This means that the packet - * cannot be accessed after passing it to the below - * layer using dn->transmit(). - * - * - Upon error the packet ownership is still passed on, - * so the packet shall be freed where error is detected. - * Callers of the transmit function shall not free packets, - * but errors shall be returned. - * - * - Return value less than zero means error, zero or - * greater than zero means OK. - * - * Returns result < 0 indicates an error, 0 or positive value - * indicates success. - * - * @layr: Pointer to the current layer the receive function - * isimplemented for (this pointer). - * @cfpkt: Pointer to CaifPacket to be handled. - */ - int (*transmit) (struct cflayer *layr, struct cfpkt *cfpkt); - - /* - * cttrlcmd() - Control Function upwards in CAIF Stack (non-blocking). - * Used for signaling responses (CAIF_CTRLCMD_*_RSP) - * and asynchronous events from the modem (CAIF_CTRLCMD_*_IND) - * - * @layr: Pointer to the current layer the receive function - * is implemented for (this pointer). - * @ctrl: Control Command. - */ - void (*ctrlcmd) (struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid); - - /* - * modemctrl() - Control Function used for controlling the modem. - * Used to signal down-wards in the CAIF stack. - * Returns 0 on success, < 0 upon failure. - * - * @layr: Pointer to the current layer the receive function - * is implemented for (this pointer). - * @ctrl: Control Command. - */ - int (*modemcmd) (struct cflayer *layr, enum caif_modemcmd ctrl); - - unsigned int id; - char name[CAIF_LAYER_NAME_SZ]; -}; - -/** - * layer_set_up() - Set the up pointer for a specified layer. - * @layr: Layer where up pointer shall be set. - * @above: Layer above. - */ -#define layer_set_up(layr, above) ((layr)->up = (struct cflayer *)(above)) - -/** - * layer_set_dn() - Set the down pointer for a specified layer. - * @layr: Layer where down pointer shall be set. - * @below: Layer below. - */ -#define layer_set_dn(layr, below) ((layr)->dn = (struct cflayer *)(below)) - -/** - * struct dev_info - Physical Device info information about physical layer. - * @dev: Pointer to native physical device. - * @id: Physical ID of the physical connection used by the - * logical CAIF connection. Used by service layers to - * identify their physical id to Caif MUX (CFMUXL)so - * that the MUX can add the correct physical ID to the - * packet. - */ -struct dev_info { - void *dev; - unsigned int id; -}; - -/** - * struct caif_payload_info - Payload information embedded in packet (sk_buff). - * - * @dev_info: Information about the receiving device. - * - * @hdr_len: Header length, used to align pay load on 32bit boundary. - * - * @channel_id: Channel ID of the logical CAIF connection. - * Used by mux to insert channel id into the caif packet. - */ -struct caif_payload_info { - struct dev_info *dev_info; - unsigned short hdr_len; - unsigned short channel_id; -}; - -#endif /* CAIF_LAYER_H_ */ diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h deleted file mode 100644 index 8819ff4db35a..000000000000 --- a/include/net/caif/cfcnfg.h +++ /dev/null @@ -1,90 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#ifndef CFCNFG_H_ -#define CFCNFG_H_ -#include <linux/spinlock.h> -#include <linux/netdevice.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfctrl.h> - -struct cfcnfg; - -/** - * enum cfcnfg_phy_preference - Physical preference HW Abstraction - * - * @CFPHYPREF_UNSPECIFIED: Default physical interface - * - * @CFPHYPREF_LOW_LAT: Default physical interface for low-latency - * traffic - * @CFPHYPREF_HIGH_BW: Default physical interface for high-bandwidth - * traffic - * @CFPHYPREF_LOOP: TEST only Loopback interface simulating modem - * responses. - * - */ -enum cfcnfg_phy_preference { - CFPHYPREF_UNSPECIFIED, - CFPHYPREF_LOW_LAT, - CFPHYPREF_HIGH_BW, - CFPHYPREF_LOOP -}; - -/** - * cfcnfg_create() - Get the CAIF configuration object given network. - * @net: Network for the CAIF configuration object. - */ -struct cfcnfg *get_cfcnfg(struct net *net); - -/** - * cfcnfg_create() - Create the CAIF configuration object. - */ -struct cfcnfg *cfcnfg_create(void); - -/** - * cfcnfg_remove() - Remove the CFCNFG object - * @cfg: config object - */ -void cfcnfg_remove(struct cfcnfg *cfg); - -/** - * cfcnfg_add_phy_layer() - Adds a physical layer to the CAIF stack. - * @cnfg: Pointer to a CAIF configuration object, created by - * cfcnfg_create(). - * @dev: Pointer to link layer device - * @phy_layer: Specify the physical layer. The transmit function - * MUST be set in the structure. - * @pref: The phy (link layer) preference. - * @link_support: Protocol implementation for link layer specific protocol. - * @fcs: Specify if checksum is used in CAIF Framing Layer. - * @head_room: Head space needed by link specific protocol. - */ -int -cfcnfg_add_phy_layer(struct cfcnfg *cnfg, - struct net_device *dev, struct cflayer *phy_layer, - enum cfcnfg_phy_preference pref, - struct cflayer *link_support, - bool fcs, int head_room); - -/** - * cfcnfg_del_phy_layer - Deletes an phy layer from the CAIF stack. - * - * @cnfg: Pointer to a CAIF configuration object, created by - * cfcnfg_create(). - * @phy_layer: Adaptation layer to be removed. - */ -int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer); - -/** - * cfcnfg_set_phy_state() - Set the state of the physical interface device. - * @cnfg: Configuration object - * @phy_layer: Physical Layer representation - * @up: State of device - */ -int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer, - bool up); - -#endif /* CFCNFG_H_ */ diff --git a/include/net/caif/cfctrl.h b/include/net/caif/cfctrl.h deleted file mode 100644 index 86d17315c8a1..000000000000 --- a/include/net/caif/cfctrl.h +++ /dev/null @@ -1,130 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#ifndef CFCTRL_H_ -#define CFCTRL_H_ -#include <net/caif/caif_layer.h> -#include <net/caif/cfsrvl.h> - -/* CAIF Control packet commands */ -enum cfctrl_cmd { - CFCTRL_CMD_LINK_SETUP = 0, - CFCTRL_CMD_LINK_DESTROY = 1, - CFCTRL_CMD_LINK_ERR = 2, - CFCTRL_CMD_ENUM = 3, - CFCTRL_CMD_SLEEP = 4, - CFCTRL_CMD_WAKE = 5, - CFCTRL_CMD_LINK_RECONF = 6, - CFCTRL_CMD_START_REASON = 7, - CFCTRL_CMD_RADIO_SET = 8, - CFCTRL_CMD_MODEM_SET = 9, - CFCTRL_CMD_MASK = 0xf -}; - -/* Channel types */ -enum cfctrl_srv { - CFCTRL_SRV_DECM = 0, - CFCTRL_SRV_VEI = 1, - CFCTRL_SRV_VIDEO = 2, - CFCTRL_SRV_DBG = 3, - CFCTRL_SRV_DATAGRAM = 4, - CFCTRL_SRV_RFM = 5, - CFCTRL_SRV_UTIL = 6, - CFCTRL_SRV_MASK = 0xf -}; - -#define CFCTRL_RSP_BIT 0x20 -#define CFCTRL_ERR_BIT 0x10 - -struct cfctrl_rsp { - void (*linksetup_rsp)(struct cflayer *layer, u8 linkid, - enum cfctrl_srv serv, u8 phyid, - struct cflayer *adapt_layer); - void (*linkdestroy_rsp)(struct cflayer *layer, u8 linkid); - void (*linkerror_ind)(void); - void (*enum_rsp)(void); - void (*sleep_rsp)(void); - void (*wake_rsp)(void); - void (*restart_rsp)(void); - void (*radioset_rsp)(void); - void (*reject_rsp)(struct cflayer *layer, u8 linkid, - struct cflayer *client_layer); -}; - -/* Link Setup Parameters for CAIF-Links. */ -struct cfctrl_link_param { - enum cfctrl_srv linktype;/* (T3,T0) Type of Channel */ - u8 priority; /* (P4,P0) Priority of the channel */ - u8 phyid; /* (U2-U0) Physical interface to connect */ - u8 endpoint; /* (E1,E0) Endpoint for data channels */ - u8 chtype; /* (H1,H0) Channel-Type, applies to - * VEI, DEBUG */ - union { - struct { - u8 connid; /* (D7,D0) Video LinkId */ - } video; - - struct { - u32 connid; /* (N31,Ngit0) Connection ID used - * for Datagram */ - } datagram; - - struct { - u32 connid; /* Connection ID used for RFM */ - char volume[20]; /* Volume to mount for RFM */ - } rfm; /* Configuration for RFM */ - - struct { - u16 fifosize_kb; /* Psock FIFO size in KB */ - u16 fifosize_bufs; /* Psock # signal buffers */ - char name[16]; /* Name of the PSOCK service */ - u8 params[255]; /* Link setup Parameters> */ - u16 paramlen; /* Length of Link Setup - * Parameters */ - } utility; /* Configuration for Utility Links (Psock) */ - } u; -}; - -/* This structure is used internally in CFCTRL */ -struct cfctrl_request_info { - int sequence_no; - enum cfctrl_cmd cmd; - u8 channel_id; - struct cfctrl_link_param param; - struct cflayer *client_layer; - struct list_head list; -}; - -struct cfctrl { - struct cfsrvl serv; - struct cfctrl_rsp res; - atomic_t req_seq_no; - atomic_t rsp_seq_no; - struct list_head list; - /* Protects from simultaneous access to first_req list */ - spinlock_t info_list_lock; -#ifndef CAIF_NO_LOOP - u8 loop_linkid; - int loop_linkused[256]; - /* Protects simultaneous access to loop_linkid and loop_linkused */ - spinlock_t loop_linkid_lock; -#endif - -}; - -void cfctrl_enum_req(struct cflayer *cfctrl, u8 physlinkid); -int cfctrl_linkup_request(struct cflayer *cfctrl, - struct cfctrl_link_param *param, - struct cflayer *user_layer); -int cfctrl_linkdown_req(struct cflayer *cfctrl, u8 linkid, - struct cflayer *client); - -struct cflayer *cfctrl_create(void); -struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer); -int cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer); -void cfctrl_remove(struct cflayer *layr); - -#endif /* CFCTRL_H_ */ diff --git a/include/net/caif/cffrml.h b/include/net/caif/cffrml.h deleted file mode 100644 index 1ab8a80ede4d..000000000000 --- a/include/net/caif/cffrml.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#ifndef CFFRML_H_ -#define CFFRML_H_ -#include <net/caif/caif_layer.h> -#include <linux/netdevice.h> - -struct cffrml; -struct cflayer *cffrml_create(u16 phyid, bool use_fcs); -void cffrml_free(struct cflayer *layr); -void cffrml_set_uplayer(struct cflayer *this, struct cflayer *up); -void cffrml_set_dnlayer(struct cflayer *this, struct cflayer *dn); -void cffrml_put(struct cflayer *layr); -void cffrml_hold(struct cflayer *layr); -int cffrml_refcnt_read(struct cflayer *layr); - -#endif /* CFFRML_H_ */ diff --git a/include/net/caif/cfmuxl.h b/include/net/caif/cfmuxl.h deleted file mode 100644 index 92ccb2648309..000000000000 --- a/include/net/caif/cfmuxl.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#ifndef CFMUXL_H_ -#define CFMUXL_H_ -#include <net/caif/caif_layer.h> - -struct cfsrvl; -struct cffrml; - -struct cflayer *cfmuxl_create(void); -int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid); -struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid); -int cfmuxl_set_dnlayer(struct cflayer *layr, struct cflayer *up, u8 phyid); -struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 linkid); - -#endif /* CFMUXL_H_ */ diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h deleted file mode 100644 index acf664227d96..000000000000 --- a/include/net/caif/cfpkt.h +++ /dev/null @@ -1,232 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#ifndef CFPKT_H_ -#define CFPKT_H_ -#include <net/caif/caif_layer.h> -#include <linux/types.h> -struct cfpkt; - -/* Create a CAIF packet. - * len: Length of packet to be created - * @return New packet. - */ -struct cfpkt *cfpkt_create(u16 len); - -/* - * Destroy a CAIF Packet. - * pkt Packet to be destroyed. - */ -void cfpkt_destroy(struct cfpkt *pkt); - -/* - * Extract header from packet. - * - * pkt Packet to extract header data from. - * data Pointer to copy the header data into. - * len Length of head data to copy. - * @return zero on success and error code upon failure - */ -int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len); - -static inline u8 cfpkt_extr_head_u8(struct cfpkt *pkt) -{ - u8 tmp; - - cfpkt_extr_head(pkt, &tmp, 1); - - return tmp; -} - -static inline u16 cfpkt_extr_head_u16(struct cfpkt *pkt) -{ - __le16 tmp; - - cfpkt_extr_head(pkt, &tmp, 2); - - return le16_to_cpu(tmp); -} - -static inline u32 cfpkt_extr_head_u32(struct cfpkt *pkt) -{ - __le32 tmp; - - cfpkt_extr_head(pkt, &tmp, 4); - - return le32_to_cpu(tmp); -} - -/* - * Peek header from packet. - * Reads data from packet without changing packet. - * - * pkt Packet to extract header data from. - * data Pointer to copy the header data into. - * len Length of head data to copy. - * @return zero on success and error code upon failure - */ -int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len); - -/* - * Extract header from trailer (end of packet). - * - * pkt Packet to extract header data from. - * data Pointer to copy the trailer data into. - * len Length of header data to copy. - * @return zero on success and error code upon failure - */ -int cfpkt_extr_trail(struct cfpkt *pkt, void *data, u16 len); - -/* - * Add header to packet. - * - * - * pkt Packet to add header data to. - * data Pointer to data to copy into the header. - * len Length of header data to copy. - * @return zero on success and error code upon failure - */ -int cfpkt_add_head(struct cfpkt *pkt, const void *data, u16 len); - -/* - * Add trailer to packet. - * - * - * pkt Packet to add trailer data to. - * data Pointer to data to copy into the trailer. - * len Length of trailer data to copy. - * @return zero on success and error code upon failure - */ -int cfpkt_add_trail(struct cfpkt *pkt, const void *data, u16 len); - -/* - * Pad trailer on packet. - * Moves data pointer in packet, no content copied. - * - * pkt Packet in which to pad trailer. - * len Length of padding to add. - * @return zero on success and error code upon failure - */ -int cfpkt_pad_trail(struct cfpkt *pkt, u16 len); - -/* - * Add a single byte to packet body (tail). - * - * pkt Packet in which to add byte. - * data Byte to add. - * @return zero on success and error code upon failure - */ -int cfpkt_addbdy(struct cfpkt *pkt, const u8 data); - -/* - * Add a data to packet body (tail). - * - * pkt Packet in which to add data. - * data Pointer to data to copy into the packet body. - * len Length of data to add. - * @return zero on success and error code upon failure - */ -int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len); - -/* - * Checks whether there are more data to process in packet. - * pkt Packet to check. - * @return true if more data are available in packet false otherwise - */ -bool cfpkt_more(struct cfpkt *pkt); - -/* - * Checks whether the packet is erroneous, - * i.e. if it has been attempted to extract more data than available in packet - * or writing more data than has been allocated in cfpkt_create(). - * pkt Packet to check. - * @return true on error false otherwise - */ -bool cfpkt_erroneous(struct cfpkt *pkt); - -/* - * Get the packet length. - * pkt Packet to get length from. - * @return Number of bytes in packet. - */ -u16 cfpkt_getlen(struct cfpkt *pkt); - -/* - * Set the packet length, by adjusting the trailer pointer according to length. - * pkt Packet to set length. - * len Packet length. - * @return Number of bytes in packet. - */ -int cfpkt_setlen(struct cfpkt *pkt, u16 len); - -/* - * cfpkt_append - Appends a packet's data to another packet. - * dstpkt: Packet to append data into, WILL BE FREED BY THIS FUNCTION - * addpkt: Packet to be appended and automatically released, - * WILL BE FREED BY THIS FUNCTION. - * expectlen: Packet's expected total length. This should be considered - * as a hint. - * NB: Input packets will be destroyed after appending and cannot be used - * after calling this function. - * @return The new appended packet. - */ -struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, struct cfpkt *addpkt, - u16 expectlen); - -/* - * cfpkt_split - Split a packet into two packets at the specified split point. - * pkt: Packet to be split (will contain the first part of the data on exit) - * pos: Position to split packet in two parts. - * @return The new packet, containing the second part of the data. - */ -struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos); - -/* - * Iteration function, iterates the packet buffers from start to end. - * - * Checksum iteration function used to iterate buffers - * (we may have packets consisting of a chain of buffers) - * pkt: Packet to calculate checksum for - * iter_func: Function pointer to iteration function - * chks: Checksum calculated so far. - * buf: Pointer to the buffer to checksum - * len: Length of buf. - * data: Initial checksum value. - * @return Checksum of buffer. - */ - -int cfpkt_iterate(struct cfpkt *pkt, - u16 (*iter_func)(u16 chks, void *buf, u16 len), - u16 data); - -/* Map from a "native" packet (e.g. Linux Socket Buffer) to a CAIF packet. - * dir - Direction indicating whether this packet is to be sent or received. - * nativepkt - The native packet to be transformed to a CAIF packet - * @return The mapped CAIF Packet CFPKT. - */ -struct cfpkt *cfpkt_fromnative(enum caif_direction dir, void *nativepkt); - -/* Map from a CAIF packet to a "native" packet (e.g. Linux Socket Buffer). - * pkt - The CAIF packet to be transformed into a "native" packet. - * @return The native packet transformed from a CAIF packet. - */ -void *cfpkt_tonative(struct cfpkt *pkt); - -/* - * Returns packet information for a packet. - * pkt Packet to get info from; - * @return Packet information - */ -struct caif_payload_info *cfpkt_info(struct cfpkt *pkt); - -/** cfpkt_set_prio - set priority for a CAIF packet. - * - * @pkt: The CAIF packet to be adjusted. - * @prio: one of TC_PRIO_ constants. - */ -void cfpkt_set_prio(struct cfpkt *pkt, int prio); - -#endif /* CFPKT_H_ */ diff --git a/include/net/caif/cfserl.h b/include/net/caif/cfserl.h deleted file mode 100644 index 67cce8757175..000000000000 --- a/include/net/caif/cfserl.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#ifndef CFSERL_H_ -#define CFSERL_H_ -#include <net/caif/caif_layer.h> - -struct cflayer *cfserl_create(int instance, bool use_stx); -void cfserl_release(struct cflayer *layer); -#endif diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h deleted file mode 100644 index a000dc45f966..000000000000 --- a/include/net/caif/cfsrvl.h +++ /dev/null @@ -1,61 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#ifndef CFSRVL_H_ -#define CFSRVL_H_ -#include <linux/list.h> -#include <linux/stddef.h> -#include <linux/types.h> -#include <linux/kref.h> -#include <linux/rculist.h> - -struct cfsrvl { - struct cflayer layer; - bool open; - bool phy_flow_on; - bool modem_flow_on; - bool supports_flowctrl; - void (*release)(struct cflayer *layer); - struct dev_info dev_info; - void (*hold)(struct cflayer *lyr); - void (*put)(struct cflayer *lyr); - struct rcu_head rcu; -}; - -struct cflayer *cfvei_create(u8 linkid, struct dev_info *dev_info); -struct cflayer *cfdgml_create(u8 linkid, struct dev_info *dev_info); -struct cflayer *cfutill_create(u8 linkid, struct dev_info *dev_info); -struct cflayer *cfvidl_create(u8 linkid, struct dev_info *dev_info); -struct cflayer *cfrfml_create(u8 linkid, struct dev_info *dev_info, - int mtu_size); -struct cflayer *cfdbgl_create(u8 linkid, struct dev_info *dev_info); - -bool cfsrvl_phyid_match(struct cflayer *layer, int phyid); - -void cfsrvl_init(struct cfsrvl *service, - u8 channel_id, - struct dev_info *dev_info, - bool supports_flowctrl); -bool cfsrvl_ready(struct cfsrvl *service, int *err); - -static inline void cfsrvl_get(struct cflayer *layr) -{ - struct cfsrvl *s = container_of(layr, struct cfsrvl, layer); - if (layr == NULL || layr->up == NULL || s->hold == NULL) - return; - - s->hold(layr->up); -} - -static inline void cfsrvl_put(struct cflayer *layr) -{ - struct cfsrvl *s = container_of(layr, struct cfsrvl, layer); - if (layr == NULL || layr->up == NULL || s->hold == NULL) - return; - - s->put(layr->up); -} -#endif /* CFSRVL_H_ */ diff --git a/include/net/can.h b/include/net/can.h new file mode 100644 index 000000000000..6db9e826f0e0 --- /dev/null +++ b/include/net/can.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * net/can.h + * + * Definitions for the CAN network socket buffer extensions + * + * Copyright (C) 2026 Oliver Hartkopp <socketcan@hartkopp.net> + * + */ + +#ifndef _NET_CAN_H +#define _NET_CAN_H + +/** + * struct can_skb_ext - skb extensions for CAN specific content + * @can_iif: ifindex of the first interface the CAN frame appeared on + * @can_framelen: cached echo CAN frame length for bql + * @can_gw_hops: can-gw CAN frame time-to-live counter + * @can_ext_flags: CAN skb extensions flags + */ +struct can_skb_ext { + int can_iif; + u16 can_framelen; + u8 can_gw_hops; + u8 can_ext_flags; +}; + +#endif /* _NET_CAN_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2900202588a5..9d3639ff9c28 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7,7 +7,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2025 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation */ #include <linux/ethtool.h> @@ -126,6 +126,7 @@ struct wiphy; * @IEEE80211_CHAN_NO_4MHZ: 4 MHz bandwidth is not permitted on this channel. * @IEEE80211_CHAN_NO_8MHZ: 8 MHz bandwidth is not permitted on this channel. * @IEEE80211_CHAN_NO_16MHZ: 16 MHz bandwidth is not permitted on this channel. + * @IEEE80211_CHAN_NO_UHR: UHR operation is not permitted on this channel. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = BIT(0), @@ -143,6 +144,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_10MHZ = BIT(12), IEEE80211_CHAN_NO_HE = BIT(13), /* can use free bits here */ + IEEE80211_CHAN_NO_UHR = BIT(18), IEEE80211_CHAN_NO_320MHZ = BIT(19), IEEE80211_CHAN_NO_EHT = BIT(20), IEEE80211_CHAN_DFS_CONCURRENT = BIT(21), @@ -188,6 +190,8 @@ enum ieee80211_channel_flags { * on this channel. * @dfs_state_entered: timestamp (jiffies) when the dfs state was entered. * @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels. + * @cac_start_time: timestamp (CLOCK_BOOTTIME, nanoseconds) when CAC was + * started on this channel. Zero when CAC is not in progress. * @psd: power spectral density (in dBm) */ struct ieee80211_channel { @@ -205,6 +209,7 @@ struct ieee80211_channel { enum nl80211_dfs_state dfs_state; unsigned long dfs_state_entered; unsigned int dfs_cac_ms; + u64 cac_start_time; s8 psd; }; @@ -429,6 +434,18 @@ struct ieee80211_sta_eht_cap { u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN]; }; +/** + * struct ieee80211_sta_uhr_cap - STA's UHR capabilities + * @has_uhr: true iff UHR is supported and data is valid + * @mac: fixed MAC capabilities + * @phy: fixed PHY capabilities + */ +struct ieee80211_sta_uhr_cap { + bool has_uhr; + struct ieee80211_uhr_cap_mac mac; + struct ieee80211_uhr_cap_phy phy; +}; + /* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ #ifdef __CHECKER__ /* @@ -454,6 +471,7 @@ struct ieee80211_sta_eht_cap { * @he_6ghz_capa: HE 6 GHz capabilities, must be filled in for a * 6 GHz band channel (and 0 may be valid value). * @eht_cap: STA's EHT capabilities + * @uhr_cap: STA's UHR capabilities * @vendor_elems: vendor element(s) to advertise * @vendor_elems.data: vendor element(s) data * @vendor_elems.len: vendor element(s) length @@ -463,6 +481,7 @@ struct ieee80211_sband_iftype_data { struct ieee80211_sta_he_cap he_cap; struct ieee80211_he_6ghz_capa he_6ghz_capa; struct ieee80211_sta_eht_cap eht_cap; + struct ieee80211_sta_uhr_cap uhr_cap; struct { const u8 *data; unsigned int len; @@ -705,6 +724,26 @@ ieee80211_get_eht_iftype_cap(const struct ieee80211_supported_band *sband, } /** + * ieee80211_get_uhr_iftype_cap - return UHR capabilities for an sband's iftype + * @sband: the sband to search for the iftype on + * @iftype: enum nl80211_iftype + * + * Return: pointer to the struct ieee80211_sta_uhr_cap, or NULL is none found + */ +static inline const struct ieee80211_sta_uhr_cap * +ieee80211_get_uhr_iftype_cap(const struct ieee80211_supported_band *sband, + enum nl80211_iftype iftype) +{ + const struct ieee80211_sband_iftype_data *data = + ieee80211_get_sband_iftype_data(sband, iftype); + + if (data && data->uhr_cap.has_uhr) + return &data->uhr_cap; + + return NULL; +} + +/** * wiphy_read_of_freq_limits - read frequency limits from device tree * * @wiphy: the wireless device to get extra limits for @@ -1486,6 +1525,7 @@ struct cfg80211_s1g_short_beacon { * @he_cap: HE capabilities (or %NULL if HE isn't enabled) * @eht_cap: EHT capabilities (or %NULL if EHT isn't enabled) * @eht_oper: EHT operation IE (or %NULL if EHT isn't enabled) + * @uhr_oper: UHR operation (or %NULL if UHR isn't enabled) * @ht_required: stations must support HT * @vht_required: stations must support VHT * @twt_responder: Enable Target Wait Time @@ -1525,6 +1565,7 @@ struct cfg80211_ap_settings { const struct ieee80211_he_operation *he_oper; const struct ieee80211_eht_cap_elem *eht_cap; const struct ieee80211_eht_operation *eht_oper; + const struct ieee80211_uhr_operation *uhr_oper; bool ht_required, vht_required, he_required, sae_h2e_required; bool twt_responder; u32 flags; @@ -1698,6 +1739,8 @@ struct sta_txpwr { * @eht_capa: EHT capabilities of station * @eht_capa_len: the length of the EHT capabilities * @s1g_capa: S1G capabilities of station + * @uhr_capa: UHR capabilities of the station + * @uhr_capa_len: the length of the UHR capabilities */ struct link_station_parameters { const u8 *mld_mac; @@ -1717,6 +1760,8 @@ struct link_station_parameters { const struct ieee80211_eht_cap_elem *eht_capa; u8 eht_capa_len; const struct ieee80211_s1g_cap *s1g_capa; + const struct ieee80211_uhr_cap *uhr_capa; + u8 uhr_capa_len; }; /** @@ -1785,6 +1830,8 @@ struct cfg80211_ttlm_params { * present/updated * @eml_cap: EML capabilities of this station * @link_sta_params: link related params. + * @epp_peer: EPP peer indication + * @nmi_mac: MAC address of the NMI station of the NAN peer */ struct station_parameters { struct net_device *vlan; @@ -1811,6 +1858,8 @@ struct station_parameters { bool eml_cap_present; u16 eml_cap; struct link_station_parameters link_sta_params; + bool epp_peer; + const u8 *nmi_mac; }; /** @@ -1850,6 +1899,8 @@ struct station_del_parameters { * entry that is operating, has been marked authorized by userspace) * @CFG80211_STA_MESH_PEER_KERNEL: peer on mesh interface (kernel managed) * @CFG80211_STA_MESH_PEER_USER: peer on mesh interface (user managed) + * @CFG80211_STA_NAN_MGMT: NAN management interface station + * @CFG80211_STA_NAN_DATA: NAN data path station */ enum cfg80211_station_type { CFG80211_STA_AP_CLIENT, @@ -1861,6 +1912,8 @@ enum cfg80211_station_type { CFG80211_STA_TDLS_PEER_ACTIVE, CFG80211_STA_MESH_PEER_KERNEL, CFG80211_STA_MESH_PEER_USER, + CFG80211_STA_NAN_MGMT, + CFG80211_STA_NAN_DATA, }; /** @@ -1896,6 +1949,11 @@ int cfg80211_check_station_change(struct wiphy *wiphy, * @RATE_INFO_FLAGS_EXTENDED_SC_DMG: 60GHz extended SC MCS * @RATE_INFO_FLAGS_EHT_MCS: EHT MCS information * @RATE_INFO_FLAGS_S1G_MCS: MCS field filled with S1G MCS + * @RATE_INFO_FLAGS_UHR_MCS: UHR MCS information + * @RATE_INFO_FLAGS_UHR_ELR_MCS: UHR ELR MCS was used + * (set together with @RATE_INFO_FLAGS_UHR_MCS) + * @RATE_INFO_FLAGS_UHR_IM: UHR Interference Mitigation + * was used */ enum rate_info_flags { RATE_INFO_FLAGS_MCS = BIT(0), @@ -1907,6 +1965,9 @@ enum rate_info_flags { RATE_INFO_FLAGS_EXTENDED_SC_DMG = BIT(6), RATE_INFO_FLAGS_EHT_MCS = BIT(7), RATE_INFO_FLAGS_S1G_MCS = BIT(8), + RATE_INFO_FLAGS_UHR_MCS = BIT(9), + RATE_INFO_FLAGS_UHR_ELR_MCS = BIT(10), + RATE_INFO_FLAGS_UHR_IM = BIT(11), }; /** @@ -1922,7 +1983,7 @@ enum rate_info_flags { * @RATE_INFO_BW_160: 160 MHz bandwidth * @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation * @RATE_INFO_BW_320: 320 MHz bandwidth - * @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT RU allocation + * @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT/UHR RU allocation * @RATE_INFO_BW_1: 1 MHz bandwidth * @RATE_INFO_BW_2: 2 MHz bandwidth * @RATE_INFO_BW_4: 4 MHz bandwidth @@ -1953,7 +2014,7 @@ enum rate_info_bw { * * @flags: bitflag of flags from &enum rate_info_flags * @legacy: bitrate in 100kbit/s for 802.11abg - * @mcs: mcs index if struct describes an HT/VHT/HE/EHT/S1G rate + * @mcs: mcs index if struct describes an HT/VHT/HE/EHT/S1G/UHR rate * @nss: number of streams (VHT & HE only) * @bw: bandwidth (from &enum rate_info_bw) * @he_gi: HE guard interval (from &enum nl80211_he_gi) @@ -3260,6 +3321,7 @@ struct cfg80211_ml_reconf_req { * Drivers shall disable MLO features for the current association if this * flag is not set. * @ASSOC_REQ_SPP_AMSDU: SPP A-MSDUs will be used on this connection (if any) + * @ASSOC_REQ_DISABLE_UHR: Disable UHR */ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HT = BIT(0), @@ -3270,6 +3332,7 @@ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_EHT = BIT(5), CONNECT_REQ_MLO_SUPPORT = BIT(6), ASSOC_REQ_SPP_AMSDU = BIT(7), + ASSOC_REQ_DISABLE_UHR = BIT(8), }; /** @@ -3924,6 +3987,77 @@ struct cfg80211_qos_map { }; /** + * DOC: Neighbor Awareness Networking (NAN) + * + * NAN uses two interface types: + * + * - %NL80211_IFTYPE_NAN: a non-netdev interface. This has two roles: (1) holds + * the configuration of all NAN activities (DE parameters, synchronisation + * parameters, local schedule, etc.), and (2) uses as the NAN Management + * Interface (NMI), which is used for NAN management communication. + * + * - %NL80211_IFTYPE_NAN_DATA: The NAN Data Interface (NDI), used for data + * communication with NAN peers. + * + * An NDI interface can only be started (IFF_UP) if the NMI one is running and + * NAN is started. Before NAN is stopped, all associated NDI interfaces + * must be stopped first. + * + * The local schedule specifies which channels the device is available on and + * when. Must be cancelled before NAN is stopped. + * + * NAN Stations + * ~~~~~~~~~~~~ + * + * There are two types of stations corresponding to the two interface types: + * + * - NMI station: Represents the NAN peer. Peer-specific data such as the peer's + * schedule and the HT, VHT and HE capabilities belongs to the NMI station. + * Also used for Tx/Rx of NAN management frames to/from the peer. + * Added on the %NL80211_IFTYPE_NAN interface. + * + * - NDI station: Used for Tx/Rx of data frames (and non-NAN management frames) + * for a specific NDP established with the NAN peer. Added on the + * %NL80211_IFTYPE_NAN_DATA interface. + * + * A peer may reuse its NMI address as the NDI address. In that case, two + * separate stations should be added even though they share the same MAC + * address. + * + * HT, VHT and HE capabilities should not changes after it was set. It is the + * driver's responsibility to check that. + * + * An NDI station can only be added if the corresponding NMI station has already + * been configured with HT (and possibly VHT and HE) capabilities. It is the + * driver's responsibility to check that. + * + * All NDI stations must be removed before corresponding NMI station is removed. + * Therefore, removing a NMI station implies that the associated NDI station(s) + * (if any) will be removed first. + * + * NAN Dependencies + * ~~~~~~~~~~~~~~~~ + * + * The following diagram shows the dependencies between NAN components. + * An arrow from A to B means A must be started/added before B, and B must be + * stopped/removed before A: + * + * +-------------+ + * | NMI iface |---(local schedule) + * +------+------+ + * / \ + * v v + * +-----------+ +-------------+ + * | NDI iface | | NMI sta |---(peer schedule) + * +-----+-----+ +------+------+ + * \ / + * v v + * +----------+ + * | NDI sta | + * +----------+ + */ + +/** * struct cfg80211_nan_band_config - NAN band specific configuration * * @chan: Pointer to the IEEE 802.11 channel structure. The channel to be used @@ -3966,7 +4100,6 @@ struct cfg80211_nan_band_config { * (i.e. BIT(NL80211_BAND_2GHZ)). * @cluster_id: cluster ID used for NAN synchronization. This is a MAC address * that can take a value from 50-6F-9A-01-00-00 to 50-6F-9A-01-FF-FF. - * If NULL, the device will pick a random Cluster ID. * @scan_period: period (in seconds) between NAN scans. * @scan_dwell_time: dwell time (in milliseconds) for NAN scans. * @discovery_beacon_interval: interval (in TUs) for discovery beacons. @@ -3982,7 +4115,7 @@ struct cfg80211_nan_band_config { struct cfg80211_nan_conf { u8 master_pref; u8 bands; - const u8 *cluster_id; + u8 cluster_id[ETH_ALEN] __aligned(2); u16 scan_period; u16 scan_dwell_time; u8 discovery_beacon_interval; @@ -3994,6 +4127,102 @@ struct cfg80211_nan_conf { u16 vendor_elems_len; }; +#define CFG80211_NAN_SCHED_NUM_TIME_SLOTS 32 + +/** + * struct cfg80211_nan_channel - NAN channel configuration + * + * This struct defines a NAN channel configuration + * + * @chandef: the channel definition + * @channel_entry: pointer to the Channel Entry blob as defined in Wi-Fi Aware + * (TM) 4.0 specification Table 100 (Channel Entry format for the NAN + * Availability attribute). + * @rx_nss: number of spatial streams supported on this channel + */ +struct cfg80211_nan_channel { + struct cfg80211_chan_def chandef; + const u8 *channel_entry; + u8 rx_nss; +}; + +/** + * struct cfg80211_nan_local_sched - NAN local schedule + * + * This struct defines NAN local schedule parameters + * + * @schedule: a mapping of time slots to chandef indexes in %nan_channels. + * An unscheduled slot will be set to %NL80211_NAN_SCHED_NOT_AVAIL_SLOT. + * @n_channels: number of channel definitions in %nan_channels. + * @nan_avail_blob: pointer to NAN Availability attribute blob. + * See %NL80211_ATTR_NAN_AVAIL_BLOB for more details. + * @nan_avail_blob_len: length of the @nan_avail_blob in bytes. + * @deferred: if true, the command containing this schedule configuration is a + * request from the device to perform an announced schedule update. This + * means that it needs to send the updated NAN availability to the peers, + * and do the actual switch on the right time (i.e. at the end of the slot + * after the slot in which the updated NAN Availability was sent). + * See %NL80211_ATTR_NAN_SCHED_DEFERRED for more details. + * If false, the schedule is applied immediately. + * @nan_channels: array of NAN channel definitions that can be scheduled. + */ +struct cfg80211_nan_local_sched { + u8 schedule[CFG80211_NAN_SCHED_NUM_TIME_SLOTS]; + u8 n_channels; + const u8 *nan_avail_blob; + u16 nan_avail_blob_len; + bool deferred; + struct cfg80211_nan_channel nan_channels[] __counted_by(n_channels); +}; + +/** + * struct cfg80211_nan_peer_map - NAN peer schedule map + * + * This struct defines a single NAN peer schedule map + * + * @map_id: map ID of this schedule map + * @schedule: a mapping of time slots to chandef indexes in the schedule's + * @nan_channels. Each slot lasts 16TUs. An unscheduled slot will be + * set to %NL80211_NAN_SCHED_NOT_AVAIL_SLOT. + */ +struct cfg80211_nan_peer_map { + u8 map_id; + u8 schedule[CFG80211_NAN_SCHED_NUM_TIME_SLOTS]; +}; + +#define CFG80211_NAN_MAX_PEER_MAPS 2 +#define CFG80211_NAN_INVALID_MAP_ID 0xff + +/** + * struct cfg80211_nan_peer_sched - NAN peer schedule + * + * This struct defines NAN peer schedule parameters for a peer. + * + * @peer_addr: MAC address of the peer (NMI address) + * @seq_id: sequence ID of the peer schedule. + * @committed_dw: committed DW as published by the peer. + * See %NL80211_ATTR_NAN_COMMITTED_DW + * @max_chan_switch: maximum channel switch time in microseconds as published + * by the peer. See %NL80211_ATTR_NAN_MAX_CHAN_SWITCH_TIME. + * @init_ulw: initial ULWs as published by the peer. + * @ulw_size: number of bytes in @init_ulw. + * @n_channels: number of channel definitions in @nan_channels. + * @nan_channels: array of NAN channel definitions for this schedule. + * @maps: array of peer schedule maps. Unused entries have + * map_id = %CFG80211_NAN_INVALID_MAP_ID. + */ +struct cfg80211_nan_peer_sched { + const u8 *peer_addr; + u8 seq_id; + u16 committed_dw; + u16 max_chan_switch; + const u8 *init_ulw; + u16 ulw_size; + u8 n_channels; + struct cfg80211_nan_channel *nan_channels; + struct cfg80211_nan_peer_map maps[CFG80211_NAN_MAX_PEER_MAPS]; +}; + /** * enum cfg80211_nan_conf_changes - indicates changed fields in NAN * configuration @@ -4187,6 +4416,7 @@ struct cfg80211_ftm_responder_stats { * @num_bursts_exp: actual number of bursts exponent negotiated * @burst_duration: actual burst duration negotiated * @ftms_per_burst: actual FTMs per burst negotiated + * @burst_period: actual burst period negotiated in units of 100ms * @lci_len: length of LCI information (if present) * @civicloc_len: length of civic location information (if present) * @lci: LCI data (may be %NULL) @@ -4228,6 +4458,7 @@ struct cfg80211_pmsr_ftm_result { u8 num_bursts_exp; u8 burst_duration; u8 ftms_per_burst; + u16 burst_period; s32 rssi_avg; s32 rssi_spread; struct rate_info tx_rate, rx_rate; @@ -4290,7 +4521,9 @@ struct cfg80211_pmsr_result { * @burst_period: burst period to use * @asap: indicates to use ASAP mode * @num_bursts_exp: number of bursts exponent - * @burst_duration: burst duration + * @burst_duration: burst duration. If @trigger_based or @non_trigger_based is + * set, this is the burst duration in milliseconds, and zero means the + * device should pick an appropriate value based on @ftms_per_burst. * @ftms_per_burst: number of FTMs per burst * @ftmr_retries: number of retries for FTM request * @request_lci: request LCI information @@ -4303,6 +4536,8 @@ struct cfg80211_pmsr_result { * EDCA based ranging will be used. * @lmr_feedback: negotiate for I2R LMR feedback. Only valid if either * @trigger_based or @non_trigger_based is set. + * @rsta: Operate as the RSTA in the measurement. Only valid if @lmr_feedback + * and either @trigger_based or @non_trigger_based is set. * @bss_color: the bss color of the responder. Optional. Set to zero to * indicate the driver should set the BSS color. Only valid if * @non_trigger_based or @trigger_based is set. @@ -4318,7 +4553,8 @@ struct cfg80211_pmsr_ftm_request_peer { request_civicloc:1, trigger_based:1, non_trigger_based:1, - lmr_feedback:1; + lmr_feedback:1, + rsta:1; u8 num_bursts_exp; u8 burst_duration; u8 ftms_per_burst; @@ -4767,6 +5003,19 @@ struct mgmt_frame_regs { * @nan_change_conf: changes NAN configuration. The changed parameters must * be specified in @changes (using &enum cfg80211_nan_conf_changes); * All other parameters must be ignored. + * @nan_set_local_sched: configure the local schedule for NAN. The schedule + * consists of an array of %cfg80211_nan_channel and the schedule itself, + * in which each entry maps each time slot to the channel on which the + * radio should operate on. If the chandef of a NAN channel is not + * changed, the channel entry must also remain unchanged. It is the + * driver's responsibility to verify this. + * @nan_set_peer_sched: configure the peer schedule for NAN. The schedule + * consists of an array of %cfg80211_nan_channel and the schedule itself, + * in which each entry maps each time slot to a channel on which the + * radio should operate on. In addition, it contains more peer's schedule + * information such as committed DW, etc. When updating an existing peer + * schedule, the full new schedule is provided - partial updates are not + * supported, and the new schedule completely replaces the previous one. * * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS * @@ -4861,24 +5110,24 @@ struct cfg80211_ops { struct wireless_dev *wdev, unsigned int link_id); - int (*add_key)(struct wiphy *wiphy, struct net_device *netdev, + int (*add_key)(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params); - int (*get_key)(struct wiphy *wiphy, struct net_device *netdev, + int (*get_key)(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params*)); - int (*del_key)(struct wiphy *wiphy, struct net_device *netdev, + int (*del_key)(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr); int (*set_default_key)(struct wiphy *wiphy, struct net_device *netdev, int link_id, u8 key_index, bool unicast, bool multicast); int (*set_default_mgmt_key)(struct wiphy *wiphy, - struct net_device *netdev, int link_id, + struct wireless_dev *wdev, int link_id, u8 key_index); int (*set_default_beacon_key)(struct wiphy *wiphy, - struct net_device *netdev, + struct wireless_dev *wdev, int link_id, u8 key_index); @@ -4890,17 +5139,17 @@ struct cfg80211_ops { unsigned int link_id); - int (*add_station)(struct wiphy *wiphy, struct net_device *dev, + int (*add_station)(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params); - int (*del_station)(struct wiphy *wiphy, struct net_device *dev, + int (*del_station)(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params); - int (*change_station)(struct wiphy *wiphy, struct net_device *dev, + int (*change_station)(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params); - int (*get_station)(struct wiphy *wiphy, struct net_device *dev, + int (*get_station)(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo); - int (*dump_station)(struct wiphy *wiphy, struct net_device *dev, + int (*dump_station)(struct wiphy *wiphy, struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo); int (*add_mpath)(struct wiphy *wiphy, struct net_device *dev, @@ -5144,7 +5393,12 @@ struct cfg80211_ops { struct wireless_dev *wdev, struct cfg80211_nan_conf *conf, u32 changes); - + int (*nan_set_local_sched)(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_local_sched *sched); + int (*nan_set_peer_sched)(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_peer_sched *sched); int (*set_multicast_to_unicast)(struct wiphy *wiphy, struct net_device *dev, const bool enabled); @@ -5638,6 +5892,18 @@ cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type); * not limited) * @ftm.trigger_based: trigger based ranging measurement is supported * @ftm.non_trigger_based: non trigger based ranging measurement is supported + * @ftm.support_6ghz: supports ranging in 6 GHz band + * @ftm.max_tx_ltf_rep: maximum number of TX LTF repetitions supported (0 means + * only one LTF, no repetitions) + * @ftm.max_rx_ltf_rep: maximum number of RX LTF repetitions supported (0 means + * only one LTF, no repetitions) + * @ftm.max_tx_sts: maximum number of TX STS supported (zero based) + * @ftm.max_rx_sts: maximum number of RX STS supported (zero based) + * @ftm.max_total_ltf_tx: maximum total number of LTFs that can be transmitted + * (0 means unknown) + * @ftm.max_total_ltf_rx: maximum total number of LTFs that can be received + * (0 means unknown) + * @ftm.support_rsta: supports operating as RSTA in PMSR FTM request */ struct cfg80211_pmsr_capabilities { unsigned int max_peers; @@ -5655,7 +5921,15 @@ struct cfg80211_pmsr_capabilities { request_lci:1, request_civicloc:1, trigger_based:1, - non_trigger_based:1; + non_trigger_based:1, + support_6ghz:1; + u8 max_tx_ltf_rep; + u8 max_rx_ltf_rep; + u8 max_tx_sts; + u8 max_rx_sts; + u8 max_total_ltf_tx; + u8 max_total_ltf_rx; + u8 support_rsta:1; } ftm; }; @@ -5753,6 +6027,12 @@ enum wiphy_nan_flags { * @max_channel_switch_time: maximum channel switch time in milliseconds. * @dev_capabilities: NAN device capabilities as defined in Wi-Fi Aware (TM) * specification Table 79 (Capabilities field). + * @phy: Band-agnostic capabilities for NAN data interfaces. Since NAN + * operates on multiple channels simultaneously, these capabilities apply + * across all bands. Valid only if NL80211_IFTYPE_NAN_DATA is supported. + * @phy.ht: HT capabilities (mandatory for NAN data) + * @phy.vht: VHT capabilities (optional) + * @phy.he: HE capabilities (optional) */ struct wiphy_nan_capa { u32 flags; @@ -5760,6 +6040,11 @@ struct wiphy_nan_capa { u8 n_antennas; u16 max_channel_switch_time; u8 dev_capabilities; + struct { + struct ieee80211_sta_ht_cap ht; + struct ieee80211_sta_vht_cap vht; + struct ieee80211_sta_he_cap he; + } phy; }; #define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff @@ -6653,8 +6938,8 @@ enum ieee80211_ap_reg_power { * the P2P Device. * @ps: powersave mode is enabled * @ps_timeout: dynamic powersave timeout - * @ap_unexpected_nlportid: (private) netlink port ID of application - * registered for unexpected class 3 frames (AP mode) + * @unexpected_nlportid: (private) netlink port ID of application + * registered for unexpected frames (AP mode or NAN_DATA mode) * @conn: (private) cfg80211 software SME connection state machine data * @connect_keys: (private) keys to set after connection is established * @conn_bss_type: connecting/connected BSS type @@ -6716,7 +7001,7 @@ struct wireless_dev { bool ps; int ps_timeout; - u32 ap_unexpected_nlportid; + u32 unexpected_nlportid; u32 owner_nlportid; bool nl_owner_dead; @@ -6776,6 +7061,9 @@ struct wireless_dev { } ocb; struct { u8 cluster_id[ETH_ALEN] __aligned(2); + u8 n_channels; + struct cfg80211_chan_def *chandefs; + bool sched_update_pending; } nan; } u; @@ -8881,35 +9169,35 @@ static inline void cfg80211_sinfo_release_content(struct station_info *sinfo) /** * cfg80211_new_sta - notify userspace about station * - * @dev: the netdev + * @wdev: the wireless device * @mac_addr: the station's address * @sinfo: the station information * @gfp: allocation flags */ -void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, +void cfg80211_new_sta(struct wireless_dev *wdev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); /** * cfg80211_del_sta_sinfo - notify userspace about deletion of a station - * @dev: the netdev + * @wdev: the wireless device * @mac_addr: the station's address. For MLD station, MLD address is used. * @sinfo: the station information/statistics * @gfp: allocation flags */ -void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, +void cfg80211_del_sta_sinfo(struct wireless_dev *wdev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); /** * cfg80211_del_sta - notify userspace about deletion of a station * - * @dev: the netdev + * @wdev: the wireless device * @mac_addr: the station's address. For MLD station, MLD address is used. * @gfp: allocation flags */ -static inline void cfg80211_del_sta(struct net_device *dev, +static inline void cfg80211_del_sta(struct wireless_dev *wdev, const u8 *mac_addr, gfp_t gfp) { - cfg80211_del_sta_sinfo(dev, mac_addr, NULL, gfp); + cfg80211_del_sta_sinfo(wdev, mac_addr, NULL, gfp); } /** @@ -9284,9 +9572,10 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, * @addr: the transmitter address * @gfp: context flags * - * This function is used in AP mode (only!) to inform userspace that - * a spurious class 3 frame was received, to be able to deauth the - * sender. + * This function is used in AP mode to inform userspace that a spurious + * class 3 frame was received, to be able to deauth the sender. + * It is also used in NAN_DATA mode to report frames from unknown peers + * (A2 not assigned to any active NDP), per Wi-Fi Aware (TM) 4.0 specification 6.2.5. * Return: %true if the frame was passed to userspace (or this failed * for a reason other than not having a subscription.) */ @@ -9785,6 +10074,21 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, int cfg80211_get_radio_idx_by_chan(struct wiphy *wiphy, const struct ieee80211_channel *chan); +/** + * cfg80211_stop_link - stop AP/P2P_GO link if link_id is non-negative or stops + * all links on the interface. + * + * @wiphy: the wiphy + * @wdev: wireless device + * @link_id: valid link ID in case of MLO AP/P2P_GO Operation or else -1 + * @gfp: context flags + * + * If link_id is set during MLO operation, stops only the specified AP/P2P_GO + * link and if link_id is set to -1 or last link is stopped, the entire + * interface is stopped as if AP was stopped, IBSS/mesh left, STA disconnected. + */ +void cfg80211_stop_link(struct wiphy *wiphy, struct wireless_dev *wdev, + int link_id, gfp_t gfp); /** * cfg80211_stop_iface - trigger interface disconnection @@ -9798,8 +10102,11 @@ int cfg80211_get_radio_idx_by_chan(struct wiphy *wiphy, * * Note: This doesn't need any locks and is asynchronous. */ -void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, - gfp_t gfp); +static inline void +cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, gfp_t gfp) +{ + cfg80211_stop_link(wiphy, wdev, -1, gfp); +} /** * cfg80211_shutdown_all_interfaces - shut down all interfaces for a wiphy @@ -9915,6 +10222,18 @@ void cfg80211_nan_func_terminated(struct wireless_dev *wdev, enum nl80211_nan_func_term_reason reason, u64 cookie, gfp_t gfp); +/** + * cfg80211_nan_sched_update_done - notify deferred schedule update completion + * @wdev: the wireless device reporting the event + * @success: whether or not the schedule update was successful + * @gfp: allocation flags + * + * This function notifies user space that a deferred local NAN schedule update + * (requested with %NL80211_ATTR_NAN_SCHED_DEFERRED) has been completed. + */ +void cfg80211_nan_sched_update_done(struct wireless_dev *wdev, bool success, + gfp_t gfp); + /* ethtool helper */ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); @@ -10147,9 +10466,9 @@ cfg80211_6ghz_power_type(u8 control, u32 client_flags) case IEEE80211_6GHZ_CTRL_REG_LPI_AP: case IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP: case IEEE80211_6GHZ_CTRL_REG_AP_ROLE_NOT_RELEVANT: + case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD: return IEEE80211_REG_LPI_AP; case IEEE80211_6GHZ_CTRL_REG_SP_AP: - case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD: return IEEE80211_REG_SP_AP; case IEEE80211_6GHZ_CTRL_REG_VLP_AP: return IEEE80211_REG_VLP_AP; @@ -10255,6 +10574,39 @@ void cfg80211_nan_cluster_joined(struct wireless_dev *wdev, const u8 *cluster_id, bool new_cluster, gfp_t gfp); +/** + * cfg80211_nan_ulw_update - Notify user space about ULW update + * @wdev: Pointer to the wireless device structure + * @ulw: Pointer to the ULW blob data + * @ulw_len: Length of the ULW blob in bytes + * @gfp: Memory allocation flags + * + * This function is used by drivers to notify user space when the device's + * ULW (Unaligned Schedule) blob has been updated. User space can use this + * blob to attach to frames sent to peers. + */ +void cfg80211_nan_ulw_update(struct wireless_dev *wdev, + const u8 *ulw, size_t ulw_len, gfp_t gfp); + +/** + * cfg80211_nan_channel_evac - Notify user space about NAN channel evacuation + * @wdev: Pointer to the wireless device structure + * @chandef: Pointer to the channel definition of the NAN channel that was + * evacuated + * @gfp: Memory allocation flags + * + * This function is used by drivers to notify user space when a NAN + * channel has been evacuated (i.e. ULWed) due to channel resource conflicts + * with other interfaces. + * This can happen when another interface sharing the channel resource with NAN + * needs to move to a different channel (e.g. due to channel switch or link + * switch). User space may reconfigure the local schedule to exclude the + * evacuated channel. + */ +void cfg80211_nan_channel_evac(struct wireless_dev *wdev, + const struct cfg80211_chan_def *chandef, + gfp_t gfp); + #ifdef CONFIG_CFG80211_DEBUGFS /** * wiphy_locked_debugfs_read - do a locked read in debugfs @@ -10373,4 +10725,27 @@ cfg80211_s1g_get_primary_sibling(struct wiphy *wiphy, return ieee80211_get_channel_khz(wiphy, sibling_1mhz_khz); } + +/** + * cfg80211_incumbent_signal_notify - Notify userspace of incumbent signal detection + * @wiphy: the wiphy to use + * @chandef: channel definition in which the interference was detected + * @signal_interference_bitmap: bitmap indicating interference across 20 MHz segments + * @gfp: allocation context for message creation and multicast; pass GFP_ATOMIC + * if called from atomic context (e.g. firmware event handler), otherwise + * GFP_KERNEL + * + * Use this function to notify userspace when an incumbent signal is detected on + * the operating channel in the 6 GHz band. The notification includes the + * current channel definition and a bitmap representing interference across + * the operating bandwidth. Each bit in the bitmap corresponds to a 20 MHz + * segment, with the lowest bit representing the lowest frequency segment. + * Punctured sub-channels are included in the bitmap structure but are always + * set to zero since interference detection is not performed on them. + */ +void cfg80211_incumbent_signal_notify(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + u32 signal_interference_bitmap, + gfp_t gfp); + #endif /* __NET_CFG80211_H */ diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index 78a27ac73070..2c1f0ec309e9 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -120,10 +120,10 @@ static bool codel_should_drop(const struct sk_buff *skb, } skb_len = skb_len_func(skb); - vars->ldelay = now - skb_time_func(skb); + WRITE_ONCE(vars->ldelay, now - skb_time_func(skb)); if (unlikely(skb_len > stats->maxpacket)) - stats->maxpacket = skb_len; + WRITE_ONCE(stats->maxpacket, skb_len); if (codel_time_before(vars->ldelay, params->target) || *backlog <= params->mtu) { @@ -158,7 +158,8 @@ static struct sk_buff *codel_dequeue(void *ctx, bool drop; if (!skb) { - vars->dropping = false; + vars->first_above_time = 0; + WRITE_ONCE(vars->dropping, false); return skb; } now = codel_get_time(); @@ -167,7 +168,7 @@ static struct sk_buff *codel_dequeue(void *ctx, if (vars->dropping) { if (!drop) { /* sojourn time below target - leave dropping state */ - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); } else if (codel_time_after_eq(now, vars->drop_next)) { /* It's time for the next drop. Drop the current * packet and dequeue the next. The dequeue might @@ -179,16 +180,18 @@ static struct sk_buff *codel_dequeue(void *ctx, */ while (vars->dropping && codel_time_after_eq(now, vars->drop_next)) { - vars->count++; /* dont care of possible wrap - * since there is no more divide - */ + /* dont care of possible wrap + * since there is no more divide. + */ + WRITE_ONCE(vars->count, vars->count + 1); codel_Newton_step(vars); if (params->ecn && INET_ECN_set_ce(skb)) { - stats->ecn_mark++; - vars->drop_next = + WRITE_ONCE(stats->ecn_mark, + stats->ecn_mark + 1); + WRITE_ONCE(vars->drop_next, codel_control_law(vars->drop_next, params->interval, - vars->rec_inv_sqrt); + vars->rec_inv_sqrt)); goto end; } stats->drop_len += skb_len_func(skb); @@ -201,13 +204,13 @@ static struct sk_buff *codel_dequeue(void *ctx, skb_time_func, backlog, now)) { /* leave dropping state */ - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); } else { /* and schedule the next drop */ - vars->drop_next = + WRITE_ONCE(vars->drop_next, codel_control_law(vars->drop_next, params->interval, - vars->rec_inv_sqrt); + vars->rec_inv_sqrt)); } } } @@ -215,7 +218,7 @@ static struct sk_buff *codel_dequeue(void *ctx, u32 delta; if (params->ecn && INET_ECN_set_ce(skb)) { - stats->ecn_mark++; + WRITE_ONCE(stats->ecn_mark, stats->ecn_mark + 1); } else { stats->drop_len += skb_len_func(skb); drop_func(skb, ctx); @@ -226,7 +229,7 @@ static struct sk_buff *codel_dequeue(void *ctx, stats, skb_len_func, skb_time_func, backlog, now); } - vars->dropping = true; + WRITE_ONCE(vars->dropping, true); /* if min went above target close to when we last went below it * assume that the drop rate that controlled the queue on the * last cycle is a good starting point to control it now. @@ -235,19 +238,20 @@ static struct sk_buff *codel_dequeue(void *ctx, if (delta > 1 && codel_time_before(now - vars->drop_next, 16 * params->interval)) { - vars->count = delta; + WRITE_ONCE(vars->count, delta); /* we dont care if rec_inv_sqrt approximation * is not very precise : * Next Newton steps will correct it quadratically. */ codel_Newton_step(vars); } else { - vars->count = 1; + WRITE_ONCE(vars->count, 1); vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; } - vars->lastcount = vars->count; - vars->drop_next = codel_control_law(now, params->interval, - vars->rec_inv_sqrt); + WRITE_ONCE(vars->lastcount, vars->count); + WRITE_ONCE(vars->drop_next, + codel_control_law(now, params->interval, + vars->rec_inv_sqrt)); } end: if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) { @@ -261,7 +265,7 @@ end: params->ce_threshold_selector)); } if (set_ce && INET_ECN_set_ce(skb)) - stats->ce_mark++; + WRITE_ONCE(stats->ce_mark, stats->ce_mark + 1); } return skb; } diff --git a/include/net/devlink.h b/include/net/devlink.h index cb839e0435a1..bcd31de1f890 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -129,6 +129,7 @@ struct devlink_rate { struct devlink_port { struct list_head list; struct list_head region_list; + struct list_head resource_list; struct devlink *devlink; const struct devlink_port_ops *ops; unsigned int index; @@ -1611,6 +1612,9 @@ struct devlink_ops { void *devlink_priv(struct devlink *devlink); struct devlink *priv_to_devlink(void *priv); struct device *devlink_to_dev(const struct devlink *devlink); +const char *devlink_bus_name(const struct devlink *devlink); +const char *devlink_dev_name(const struct devlink *devlink); +const char *devlink_dev_driver_name(const struct devlink *devlink); /* Devlink instance explicit locking */ void devl_lock(struct devlink *devlink); @@ -1644,6 +1648,13 @@ void devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_free(struct devlink *devlink); +struct devlink *devlink_shd_get(const char *id, + const struct devlink_ops *ops, + size_t priv_size, + const struct device_driver *driver); +void devlink_shd_put(struct devlink *devlink); +void *devlink_shd_get_priv(struct devlink *devlink); + /** * struct devlink_port_ops - Port operations * @port_split: Callback used to split the port into multiple ones. @@ -1875,12 +1886,19 @@ int devl_resource_register(struct devlink *devlink, u64 resource_size, u64 resource_id, u64 parent_resource_id, - const struct devlink_resource_size_params *size_params); + const struct devlink_resource_size_params *params); void devl_resources_unregister(struct devlink *devlink); void devlink_resources_unregister(struct devlink *devlink); int devl_resource_size_get(struct devlink *devlink, u64 resource_id, u64 *p_resource_size); +int +devl_port_resource_register(struct devlink_port *devlink_port, + const char *resource_name, + u64 resource_size, u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *params); +void devl_port_resources_unregister(struct devlink_port *devlink_port); int devl_dpipe_table_resource_set(struct devlink *devlink, const char *table_name, u64 resource_id, u64 resource_units); diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index a7b7abd66e21..e0ca3904ff8e 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -68,18 +68,15 @@ FN(SECURITY_HOOK) \ FN(QDISC_DROP) \ FN(QDISC_BURST_DROP) \ - FN(QDISC_OVERLIMIT) \ - FN(QDISC_CONGESTED) \ - FN(CAKE_FLOOD) \ - FN(FQ_BAND_LIMIT) \ - FN(FQ_HORIZON_LIMIT) \ - FN(FQ_FLOW_LIMIT) \ FN(CPU_BACKLOG) \ + FN(MACVLAN_BROADCAST_BACKLOG) \ + FN(IPVLAN_MULTICAST_BACKLOG) \ FN(XDP) \ FN(TC_INGRESS) \ FN(UNHANDLED_PROTO) \ FN(SKB_CSUM) \ FN(SKB_GSO_SEG) \ + FN(SKB_BAD_GSO) \ FN(SKB_UCOPY_FAULT) \ FN(DEV_HDR) \ FN(DEV_READY) \ @@ -127,9 +124,9 @@ FN(CANFD_RX_INVALID_FRAME) \ FN(CANXL_RX_INVALID_FRAME) \ FN(PFMEMALLOC) \ - FN(DUALPI2_STEP_DROP) \ FN(PSP_INPUT) \ FN(PSP_OUTPUT) \ + FN(RECURSION_LIMIT) \ FNe(MAX) /** @@ -371,8 +368,10 @@ enum skb_drop_reason { /** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */ SKB_DROP_REASON_SECURITY_HOOK, /** - * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting ( - * failed to enqueue to current qdisc) + * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc during enqueue or + * dequeue. More specific drop reasons are available via the + * qdisc:qdisc_drop tracepoint, which also provides qdisc handle + * and name for identifying the source. */ SKB_DROP_REASON_QDISC_DROP, /** @@ -381,41 +380,21 @@ enum skb_drop_reason { */ SKB_DROP_REASON_QDISC_BURST_DROP, /** - * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc - * instance exceeds its total buffer size limit. - */ - SKB_DROP_REASON_QDISC_OVERLIMIT, - /** - * @SKB_DROP_REASON_QDISC_CONGESTED: dropped by a qdisc AQM algorithm - * due to congestion. - */ - SKB_DROP_REASON_QDISC_CONGESTED, - /** - * @SKB_DROP_REASON_CAKE_FLOOD: dropped by the flood protection part of - * CAKE qdisc AQM algorithm (BLUE). - */ - SKB_DROP_REASON_CAKE_FLOOD, - /** - * @SKB_DROP_REASON_FQ_BAND_LIMIT: dropped by fq qdisc when per band - * limit is reached. - */ - SKB_DROP_REASON_FQ_BAND_LIMIT, - /** - * @SKB_DROP_REASON_FQ_HORIZON_LIMIT: dropped by fq qdisc when packet - * timestamp is too far in the future. - */ - SKB_DROP_REASON_FQ_HORIZON_LIMIT, - /** - * @SKB_DROP_REASON_FQ_FLOW_LIMIT: dropped by fq qdisc when a flow - * exceeds its limits. - */ - SKB_DROP_REASON_FQ_FLOW_LIMIT, - /** * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU * backlog queue. This can be caused by backlog queue full (see * netdev_max_backlog in net.rst) or RPS flow limit */ SKB_DROP_REASON_CPU_BACKLOG, + /** + * @SKB_DROP_REASON_MACVLAN_BROADCAST_BACKLOG: failed to enqueue the skb + * to macvlan broadcast queue. + */ + SKB_DROP_REASON_MACVLAN_BROADCAST_BACKLOG, + /** + * @SKB_DROP_REASON_IPVLAN_MULTICAST_BACKLOG: failed to enqueue the skb + * to ipvlan multicast queue. + */ + SKB_DROP_REASON_IPVLAN_MULTICAST_BACKLOG, /** @SKB_DROP_REASON_XDP: dropped by XDP in input path */ SKB_DROP_REASON_XDP, /** @SKB_DROP_REASON_TC_INGRESS: dropped in TC ingress HOOK */ @@ -426,6 +405,8 @@ enum skb_drop_reason { SKB_DROP_REASON_SKB_CSUM, /** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */ SKB_DROP_REASON_SKB_GSO_SEG, + /** @SKB_DROP_REASON_SKB_BAD_GSO: malicious gso packet. */ + SKB_DROP_REASON_SKB_BAD_GSO, /** * @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space, * e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx() @@ -613,15 +594,12 @@ enum skb_drop_reason { * reached a path or socket not eligible for use of memory reserves */ SKB_DROP_REASON_PFMEMALLOC, - /** - * @SKB_DROP_REASON_DUALPI2_STEP_DROP: dropped by the step drop - * threshold of DualPI2 qdisc. - */ - SKB_DROP_REASON_DUALPI2_STEP_DROP, /** @SKB_DROP_REASON_PSP_INPUT: PSP input checks failed */ SKB_DROP_REASON_PSP_INPUT, /** @SKB_DROP_REASON_PSP_OUTPUT: PSP output checks failed */ SKB_DROP_REASON_PSP_OUTPUT, + /** @SKB_DROP_REASON_RECURSION_LIMIT: Dead loop on virtual device. */ + SKB_DROP_REASON_RECURSION_LIMIT, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/include/net/dropreason-qdisc.h b/include/net/dropreason-qdisc.h new file mode 100644 index 000000000000..fb151cd31751 --- /dev/null +++ b/include/net/dropreason-qdisc.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _LINUX_DROPREASON_QDISC_H +#define _LINUX_DROPREASON_QDISC_H +#include <net/dropreason.h> + +#define DEFINE_QDISC_DROP_REASON(FN, FNe) \ + FN(UNSPEC) \ + FN(GENERIC) \ + FN(OVERLIMIT) \ + FN(CONGESTED) \ + FN(MAXFLOWS) \ + FN(FLOOD_PROTECTION) \ + FN(BAND_LIMIT) \ + FN(HORIZON_LIMIT) \ + FN(FLOW_LIMIT) \ + FN(L4S_STEP_NON_ECN) \ + FNe(MAX) + +#undef FN +#undef FNe +#define FN(reason) QDISC_DROP_##reason, +#define FNe(reason) QDISC_DROP_##reason + +/** + * enum qdisc_drop_reason - reason why a qdisc dropped a packet + * + * Qdisc-specific drop reasons for packet drops that occur within the + * traffic control (TC) queueing discipline layer. These reasons provide + * detailed diagnostics about why packets were dropped by various qdisc + * algorithms, enabling fine-grained monitoring and troubleshooting of + * queue behavior. + */ +enum qdisc_drop_reason { + /** + * @QDISC_DROP_UNSPEC: unspecified/invalid qdisc drop reason. + * Value 0 serves as analogous to SKB_NOT_DROPPED_YET for enum skb_drop_reason. + * Used for catching zero-initialized drop_reason fields. + */ + QDISC_DROP_UNSPEC = 0, + /** + * @__QDISC_DROP_REASON: subsystem base value for qdisc drop reasons + */ + __QDISC_DROP_REASON = SKB_DROP_REASON_SUBSYS_QDISC << + SKB_DROP_REASON_SUBSYS_SHIFT, + /** + * @QDISC_DROP_GENERIC: generic/default qdisc drop, used when no + * more specific reason applies + */ + QDISC_DROP_GENERIC, + /** + * @QDISC_DROP_OVERLIMIT: packet dropped because the qdisc queue + * length exceeded its configured limit (sch->limit). This typically + * indicates the queue is full and cannot accept more packets. + */ + QDISC_DROP_OVERLIMIT, + /** + * @QDISC_DROP_CONGESTED: packet dropped due to active congestion + * control algorithms (e.g., CoDel, PIE, RED) detecting network + * congestion. The qdisc proactively dropped the packet to signal + * congestion to the sender and prevent bufferbloat. + */ + QDISC_DROP_CONGESTED, + /** + * @QDISC_DROP_MAXFLOWS: packet dropped because the qdisc's flow + * tracking table is full and no free slots are available to allocate + * for a new flow. This indicates flow table exhaustion in flow-based + * qdiscs that maintain per-flow state (e.g., SFQ). + */ + QDISC_DROP_MAXFLOWS, + /** + * @QDISC_DROP_FLOOD_PROTECTION: packet dropped by flood protection + * mechanism detecting unresponsive flows (potential DoS/flood). + * Used by qdiscs implementing probabilistic drop algorithms like + * BLUE (e.g., CAKE's Cobalt AQM). + */ + QDISC_DROP_FLOOD_PROTECTION, + /** + * @QDISC_DROP_BAND_LIMIT: packet dropped because the priority band's + * limit was reached. Used by qdiscs with priority bands that have + * per-band packet limits (e.g., FQ). + */ + QDISC_DROP_BAND_LIMIT, + /** + * @QDISC_DROP_HORIZON_LIMIT: packet dropped because its timestamp + * is too far in the future (beyond the configured horizon). + * Used by qdiscs with time-based scheduling (e.g., FQ). + */ + QDISC_DROP_HORIZON_LIMIT, + /** + * @QDISC_DROP_FLOW_LIMIT: packet dropped because an individual flow + * exceeded its per-flow packet/depth limit. Used by FQ and SFQ qdiscs + * to enforce per-flow fairness and prevent a single flow from + * monopolizing queue resources. + */ + QDISC_DROP_FLOW_LIMIT, + /** + * @QDISC_DROP_L4S_STEP_NON_ECN: DualPI2 qdisc dropped a non-ECN-capable + * packet because the L4S queue delay exceeded the step threshold. + * Since the packet cannot be ECN-marked, it must be dropped to signal + * congestion. See RFC 9332 for the DualQ Coupled AQM step mechanism. + */ + QDISC_DROP_L4S_STEP_NON_ECN, + /** + * @QDISC_DROP_MAX: the maximum of qdisc drop reasons, which + * shouldn't be used as a real 'reason' - only for tracing code gen + */ + QDISC_DROP_MAX, +}; + +#undef FN +#undef FNe + +#endif diff --git a/include/net/dropreason.h b/include/net/dropreason.h index 7d3b1a2a6fec..1df60645fb27 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -23,6 +23,12 @@ enum skb_drop_reason_subsys { */ SKB_DROP_REASON_SUBSYS_OPENVSWITCH, + /** + * @SKB_DROP_REASON_SUBSYS_QDISC: TC qdisc drop reasons, + * see include/net/dropreason-qdisc.h + */ + SKB_DROP_REASON_SUBSYS_QDISC, + /** @SKB_DROP_REASON_SUBSYS_NUM: number of subsystems defined */ SKB_DROP_REASON_SUBSYS_NUM }; diff --git a/include/net/dsa.h b/include/net/dsa.h index 6b2b5ed64ea4..8b6d34e8a6f0 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -57,6 +57,7 @@ struct tc_action; #define DSA_TAG_PROTO_BRCM_LEGACY_FCS_VALUE 29 #define DSA_TAG_PROTO_YT921X_VALUE 30 #define DSA_TAG_PROTO_MXL_GSW1XX_VALUE 31 +#define DSA_TAG_PROTO_MXL862_VALUE 32 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -91,6 +92,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_VSC73XX_8021Q = DSA_TAG_PROTO_VSC73XX_8021Q_VALUE, DSA_TAG_PROTO_YT921X = DSA_TAG_PROTO_YT921X_VALUE, DSA_TAG_PROTO_MXL_GSW1XX = DSA_TAG_PROTO_MXL_GSW1XX_VALUE, + DSA_TAG_PROTO_MXL862 = DSA_TAG_PROTO_MXL862_VALUE, }; struct dsa_switch; @@ -216,12 +218,6 @@ struct dsa_mall_mirror_tc_entry { bool ingress; }; -/* TC port policer entry */ -struct dsa_mall_policer_tc_entry { - u32 burst; - u64 rate_bytes_per_sec; -}; - /* TC matchall entry */ struct dsa_mall_tc_entry { struct list_head list; @@ -229,7 +225,7 @@ struct dsa_mall_tc_entry { enum dsa_port_mall_action_type type; union { struct dsa_mall_mirror_tc_entry mirror; - struct dsa_mall_policer_tc_entry policer; + struct flow_action_police policer; }; }; @@ -835,6 +831,22 @@ dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst, return false; } +#define dsa_switch_for_each_bridge_member(_dp, _ds, _bdev) \ + dsa_switch_for_each_user_port(_dp, _ds) \ + if (dsa_port_offloads_bridge_dev(_dp, _bdev)) + +static inline u32 +dsa_bridge_ports(struct dsa_switch *ds, const struct net_device *bdev) +{ + struct dsa_port *dp; + u32 mask = 0; + + dsa_switch_for_each_bridge_member(dp, ds, bdev) + mask |= BIT(dp->index); + + return mask; +} + static inline bool dsa_port_tree_same(const struct dsa_port *a, const struct dsa_port *b) { @@ -1110,7 +1122,7 @@ struct dsa_switch_ops { void (*port_mirror_del)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); int (*port_policer_add)(struct dsa_switch *ds, int port, - struct dsa_mall_policer_tc_entry *policer); + const struct flow_action_police *policer); void (*port_policer_del)(struct dsa_switch *ds, int port); int (*port_setup_tc)(struct dsa_switch *ds, int port, enum tc_setup_type type, void *type_data); diff --git a/include/net/dst.h b/include/net/dst.h index f8aa1239b4db..307073eae7f8 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -219,6 +219,12 @@ static inline u32 dst_mtu(const struct dst_entry *dst) return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst); } +/* Variant of dst_mtu() for IPv4 users. */ +static inline u32 dst4_mtu(const struct dst_entry *dst) +{ + return INDIRECT_CALL_1(dst->ops->mtu, ipv4_mtu, dst); +} + /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric) { diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 596ab9791e4d..70a02ee14308 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -231,6 +231,21 @@ struct flow_action_cookie *flow_action_cookie_create(void *data, gfp_t gfp); void flow_action_cookie_destroy(struct flow_action_cookie *cookie); +struct flow_action_police { + u32 burst; + u64 rate_bytes_ps; + u64 peakrate_bytes_ps; + u32 avrate; + u16 overhead; + u64 burst_pkt; + u64 rate_pkt_ps; + u32 mtu; + struct { + enum flow_action_id act_id; + u32 extval; + } exceed, notexceed; +}; + struct flow_action_entry { enum flow_action_id id; u32 hw_index; @@ -275,20 +290,7 @@ struct flow_action_entry { u32 trunc_size; bool truncate; } sample; - struct { /* FLOW_ACTION_POLICE */ - u32 burst; - u64 rate_bytes_ps; - u64 peakrate_bytes_ps; - u32 avrate; - u16 overhead; - u64 burst_pkt; - u64 rate_pkt_ps; - u32 mtu; - struct { - enum flow_action_id act_id; - u32 extval; - } exceed, notexceed; - } police; + struct flow_action_police police; /* FLOW_ACTION_POLICE */ struct { /* FLOW_ACTION_CT */ int action; u16 zone; @@ -526,7 +528,7 @@ static inline bool flow_rule_has_enc_control_flags(const u32 enc_ctrl_flags, * * Return: true if control flags are set, false otherwise. */ -static inline bool flow_rule_match_has_control_flags(struct flow_rule *rule, +static inline bool flow_rule_match_has_control_flags(const struct flow_rule *rule, struct netlink_ext_ack *extack) { struct flow_match_control match; @@ -718,7 +720,7 @@ struct flow_offload_action { struct flow_offload_action *offload_action_alloc(unsigned int num_actions); static inline struct flow_rule * -flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd) +flow_cls_offload_flow_rule(const struct flow_cls_offload *flow_cmd) { return flow_cmd->rule; } diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 9467e33dfb36..8aca881937da 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -358,7 +358,7 @@ static int fq_init(struct fq *fq, int flows_cnt) fq->limit = 8192; fq->memory_limit = 16 << 20; /* 16 MBytes */ - fq->flows = kvcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); + fq->flows = kvzalloc_objs(fq->flows[0], fq->flows_cnt); if (!fq->flows) return -ENOMEM; diff --git a/include/net/gro.h b/include/net/gro.h index b65f631c521d..2300b6da05b2 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -405,9 +405,8 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, - struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); +struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *); +int udp6_gro_complete(struct sk_buff *, int); #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ ({ \ diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 6632b1aa7584..62534d1f3c70 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -6,6 +6,9 @@ #include <linux/types.h> #include <linux/netdevice.h> #include <net/protocol.h> +#ifdef CONFIG_RPS +#include <net/rps-types.h> +#endif struct skb_defer_node { struct llist_head defer_list; @@ -33,7 +36,7 @@ struct net_hotdata { struct kmem_cache *skbuff_fclone_cache; struct kmem_cache *skb_small_head_cache; #ifdef CONFIG_RPS - struct rps_sock_flow_table __rcu *rps_sock_flow_table; + rps_tag_ptr rps_sock_flow_table; u32 rps_cpu_mask; #endif struct skb_defer_node __percpu *skb_defer_nodes; diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 745891d2e113..b814e1acc512 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -18,10 +18,14 @@ struct sk_buff; struct sock; struct sockaddr; -struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, +struct dst_entry *inet6_csk_route_socket(struct sock *sk, + struct flowi6 *fl6); + +struct dst_entry *inet6_csk_route_req(const struct sock *sk, + struct dst_entry *dst, + struct flowi6 *fl6, const struct request_sock *req, u8 proto); int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl); -struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu); #endif /* _INET6_CONNECTION_SOCK_H */ diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 282e29237d93..2cc5d416bbb5 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -24,6 +24,8 @@ struct inet_hashinfo; +void inet6_init_ehash_secret(void); + static inline unsigned int __inet6_ehashfn(const u32 lhash, const u16 lport, const u32 fhash, @@ -175,7 +177,7 @@ static inline bool inet6_match(const struct net *net, const struct sock *sk, { if (!net_eq(sock_net(sk), net) || sk->sk_family != AF_INET6 || - sk->sk_portpair != ports || + READ_ONCE(sk->sk_portpair) != ports || !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return false; diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 5dd2bf24449e..3d747896be30 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -59,8 +59,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net); -int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, - int *addr_len); +int inet_recv_error(struct sock *sk, struct msghdr *msg, int len); struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb); int inet_gro_complete(struct sk_buff *skb, int nhoff); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ecb362025c4e..433c2df23076 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -34,7 +34,7 @@ struct tcp_congestion_ops; */ struct inet_connection_sock_af_ops { int (*queue_xmit)(struct sock *sk, struct sk_buff *skb, struct flowi *fl); - void (*send_check)(struct sock *sk, struct sk_buff *skb); + u16 net_header_len; int (*rebuild_header)(struct sock *sk); void (*sk_rx_dst_set)(struct sock *sk, const struct sk_buff *skb); int (*conn_request)(struct sock *sk, struct sk_buff *skb); @@ -42,8 +42,9 @@ struct inet_connection_sock_af_ops { struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req); - u16 net_header_len; + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)); int (*setsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int (*getsockopt)(struct sock *sk, int level, int optname, diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index ea32393464a2..827b87a95dab 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -51,11 +51,25 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) return outer; } +/* Apply either ECT(0) or ECT(1) */ +static inline void __INET_ECN_xmit(struct sock *sk, bool use_ect_1) +{ + __u8 ect = use_ect_1 ? INET_ECN_ECT_1 : INET_ECN_ECT_0; + + /* Mask the complete byte in case the connection alternates between + * ECT(0) and ECT(1). + */ + inet_sk(sk)->tos &= ~INET_ECN_MASK; + inet_sk(sk)->tos |= ect; + if (inet6_sk(sk)) { + inet6_sk(sk)->tclass &= ~INET_ECN_MASK; + inet6_sk(sk)->tclass |= ect; + } +} + static inline void INET_ECN_xmit(struct sock *sk) { - inet_sk(sk)->tos |= INET_ECN_ECT_0; - if (inet6_sk(sk) != NULL) - inet6_sk(sk)->tclass |= INET_ECN_ECT_0; + __INET_ECN_xmit(sk, false); } static inline void INET_ECN_dontxmit(struct sock *sk) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ac05a52d9e13..6e2fe186d0dc 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -264,6 +264,20 @@ inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk, return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } +static inline bool inet_use_hash2_on_bind(const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + return false; + + if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + return true; + } +#endif + return sk->sk_rcv_saddr != htonl(INADDR_ANY); +} + struct inet_bind_hashbucket * inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port); @@ -286,7 +300,6 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, unsigned long high_limit); -int inet_hashinfo2_init_mod(struct inet_hashinfo *h); bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, @@ -345,7 +358,7 @@ static inline bool inet_match(const struct net *net, const struct sock *sk, int dif, int sdif) { if (!net_eq(sock_net(sk), net) || - sk->sk_portpair != ports || + READ_ONCE(sk->sk_portpair) != ports || sk->sk_addrpair != cookie) return false; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index ac1c75975908..7cdcbed3e5cb 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -26,6 +26,8 @@ #include <net/tcp_states.h> #include <net/l3mdev.h> +#define IP_OPTIONS_DATA_FIXED_SIZE 40 + /** struct ip_options - IP Options * * @faddr - Saved first hop address @@ -58,12 +60,9 @@ struct ip_options { struct ip_options_rcu { struct rcu_head rcu; - struct ip_options opt; -}; -struct ip_options_data { - struct ip_options_rcu opt; - char data[40]; + /* Must be last as it ends in a flexible-array member. */ + struct ip_options opt; }; struct inet_request_sock { @@ -101,10 +100,7 @@ struct inet_request_sock { }; }; -static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) -{ - return (struct inet_request_sock *)sk; -} +#define inet_rsk(ptr) container_of_const(ptr, struct inet_request_sock, req) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { @@ -163,6 +159,13 @@ static inline bool inet_sk_bound_dev_eq(const struct net *net, #endif } +struct inet6_cork { + struct ipv6_txoptions *opt; + u8 hop_limit; + u8 tclass; + u8 dontfrag:1; +}; + struct inet_cork { unsigned int flags; __be32 addr; @@ -183,6 +186,9 @@ struct inet_cork { struct inet_cork_full { struct inet_cork base; struct flowi fl; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_cork base6; +#endif }; struct ip_mc_socklist; diff --git a/include/net/ioam6.h b/include/net/ioam6.h index 2cbbee6e806a..a75912fe247e 100644 --- a/include/net/ioam6.h +++ b/include/net/ioam6.h @@ -60,6 +60,8 @@ void ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_trace_hdr *trace, bool is_input); +u8 ioam6_trace_compute_nodelen(u32 trace_type); + int ioam6_init(void); void ioam6_exit(void); diff --git a/include/net/ip.h b/include/net/ip.h index 69d5cef46004..7f2fe1a8401b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -101,7 +101,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; - ipcm->protocol = inet->inet_num; + ipcm->protocol = READ_ONCE(inet->inet_num); } #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) @@ -692,14 +692,6 @@ static __inline__ void inet_reset_saddr(struct sock *sk) #endif -#if IS_MODULE(CONFIG_IPV6) -#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X) -#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X) -#else -#define EXPORT_IPV6_MOD(X) -#define EXPORT_IPV6_MOD_GPL(X) -#endif - static inline unsigned int ipv4_addr_hash(__be32 ip) { return (__force unsigned int) ip; @@ -812,7 +804,7 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len); void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index c8a96b888277..6677b3cc3972 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -82,6 +82,4 @@ static inline __sum16 udp_v6_check(int len, void udp6_set_csum(bool nocheck, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, int len); - -int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto); #endif diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 88b0dd4d8e09..9cd27e1b9b69 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -486,11 +486,30 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) rcu_read_unlock(); } +#if IS_ENABLED(CONFIG_IPV6) int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib6_nh_release(struct fib6_nh *fib6_nh); void fib6_nh_release_dsts(struct fib6_nh *fib6_nh); +#else +static inline int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel"); + return -EAFNOSUPPORT; +} + +static inline void fib6_nh_release(struct fib6_nh *fib6_nh) +{ +} + +static inline void fib6_nh_release_dsts(struct fib6_nh *fib6_nh) +{ +} +#endif + int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, @@ -502,17 +521,26 @@ int call_fib6_multipath_entry_notifiers(struct net *net, unsigned int nsiblings, struct netlink_ext_ack *extack); int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt); +#if IS_ENABLED(CONFIG_IPV6) void fib6_rt_update(struct net *net, struct fib6_info *rt, struct nl_info *info); +#else +static inline void fib6_rt_update(struct net *net, struct fib6_info *rt, + struct nl_info *info) +{ +} +#endif void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); +void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now); void fib6_run_gc(unsigned long expires, struct net *net, bool force); - void fib6_gc_cleanup(void); int fib6_init(void); +#if IS_ENABLED(CONFIG_IPV6) /* Add the route to the gc list if it is not already there * * The callers should hold f6i->fib6_table->tb6_lock. @@ -545,6 +573,23 @@ static inline void fib6_remove_gc_list(struct fib6_info *f6i) hlist_del_init(&f6i->gc_link); } +static inline void fib6_may_remove_gc_list(struct net *net, + struct fib6_info *f6i) +{ + struct fib6_gc_args gc_args; + + if (hlist_unhashed(&f6i->gc_link)) + return; + + gc_args.timeout = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_interval); + gc_args.more = 0; + + rcu_read_lock(); + fib6_age_exceptions(f6i, &gc_args, jiffies); + rcu_read_unlock(); +} +#endif + struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; @@ -569,8 +614,13 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); void fib6_update_sernum(struct net *net, struct fib6_info *rt); +#if IS_ENABLED(CONFIG_IPV6) void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); -void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i); +#else +static inline void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt) +{ +} +#endif void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val); static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) @@ -580,7 +630,7 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, bool offload, bool trap, bool offload_failed); -#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) +#if IS_ENABLED(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) struct bpf_iter__ipv6_route { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct fib6_info *, rt); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 7c5512baa4b2..09ffe0f13ce7 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -77,7 +77,14 @@ static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) f6i->fib6_nh->fib_nh_gw_family; } +#if IS_ENABLED(CONFIG_IPV6) void ip6_route_input(struct sk_buff *skb); +#else +static inline void ip6_route_input(struct sk_buff *skb) +{ +} +#endif + struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, struct flowi6 *fl6, @@ -119,7 +126,15 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); int ip6_ins_rt(struct net *net, struct fib6_info *f6i); +#if IS_ENABLED(CONFIG_IPV6) int ip6_del_rt(struct net *net, struct fib6_info *f6i, bool skip_notify); +#else +static inline int ip6_del_rt(struct net *net, struct fib6_info *f6i, + bool skip_notify) +{ + return -EAFNOSUPPORT; +} +#endif void rt6_flush_exceptions(struct fib6_info *f6i); void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, @@ -252,19 +267,43 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb) return rt->rt6i_flags & RTF_LOCAL; } +static inline bool __ipv6_anycast_destination(const struct rt6key *rt6i_dst, + u32 rt6i_flags, + const struct in6_addr *daddr) +{ + return rt6i_flags & RTF_ANYCAST || + (rt6i_dst->plen < 127 && + !(rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) && + ipv6_addr_equal(&rt6i_dst->addr, daddr)); +} + static inline bool ipv6_anycast_destination(const struct dst_entry *dst, const struct in6_addr *daddr) { const struct rt6_info *rt = dst_rt6_info(dst); - return rt->rt6i_flags & RTF_ANYCAST || - (rt->rt6i_dst.plen < 127 && - !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) && - ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); + return __ipv6_anycast_destination(&rt->rt6i_dst, rt->rt6i_flags, daddr); } +#if IS_ENABLED(CONFIG_IPV6) int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); +#else +static inline int ip6_fragment(struct net *net, struct sock *sk, + struct sk_buff *skb, + int (*output)(struct net *, struct sock *, + struct sk_buff *)) +{ + kfree_skb(skb); + return -EAFNOSUPPORT; +} +#endif + +/* Variant of dst_mtu() for IPv6 users */ +static inline u32 dst6_mtu(const struct dst_entry *dst) +{ + return INDIRECT_CALL_1(dst->ops->mtu, ip6_mtu, dst); +} static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) { diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 120db2865811..b99805ee2fd1 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -156,6 +156,18 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, { int pkt_len, err; + if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) { + if (dev) { + net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", + dev->name); + DEV_STATS_INC(dev, tx_errors); + } + kfree_skb_reason(skb, SKB_DROP_REASON_RECURSION_LIMIT); + return; + } + + dev_xmit_recursion_inc(); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); IP6CB(skb)->flags = ip6cb_flags; pkt_len = skb->len - skb_inner_network_offset(skb); @@ -166,6 +178,8 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, pkt_len = -1; iptunnel_xmit_stats(dev, pkt_len); } + + dev_xmit_recursion_dec(); } #endif #endif diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b4495c38e0a0..318593743b6e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -559,7 +559,7 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net, siphash_aligned_key_t hash_key; u32 mp_seed; - mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; + mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed); fib_multipath_hash_construct_key(&hash_key, mp_seed); return flow_hash_from_keys_seed(keys, &hash_key); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 4021e6a73e32..d708b66e55cd 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -27,6 +27,13 @@ #include <net/ip6_route.h> #endif +/* Recursion limit for tunnel xmit to detect routing loops. + * Unlike XMIT_RECURSION_LIMIT (8) used in the no-qdisc path, tunnel + * recursion involves route lookups and full IP output, consuming much + * more stack per level, so a lower limit is needed. + */ +#define IP_TUNNEL_RECURSION_LIMIT 5 + /* Keep error state on tunnel for 30 sec */ #define IPTUNNEL_ERR_TIMEO (30*HZ) @@ -658,13 +665,29 @@ static inline int iptunnel_pull_offloads(struct sk_buff *skb) static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { if (pkt_len > 0) { - struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); - - u64_stats_update_begin(&tstats->syncp); - u64_stats_add(&tstats->tx_bytes, pkt_len); - u64_stats_inc(&tstats->tx_packets); - u64_stats_update_end(&tstats->syncp); - put_cpu_ptr(tstats); + if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_DSTATS) { + struct pcpu_dstats *dstats = get_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_add(&dstats->tx_bytes, pkt_len); + u64_stats_inc(&dstats->tx_packets); + u64_stats_update_end(&dstats->syncp); + put_cpu_ptr(dstats); + return; + } + if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) { + struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + u64_stats_add(&tstats->tx_bytes, pkt_len); + u64_stats_inc(&tstats->tx_packets); + u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); + return; + } + pr_err_once("iptunnel_xmit_stats pcpu_stat_type=%d\n", + dev->pcpu_stat_type); + WARN_ON_ONCE(1); return; } diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 29a36709e7f3..72d325c81313 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -11,6 +11,7 @@ #include <asm/types.h> /* for __uXX types */ #include <linux/list.h> /* for struct list_head */ +#include <linux/rculist_bl.h> /* for struct hlist_bl_head */ #include <linux/spinlock.h> /* for struct rwlock_t */ #include <linux/atomic.h> /* for struct atomic_t */ #include <linux/refcount.h> /* for struct refcount_t */ @@ -30,10 +31,23 @@ #endif #include <net/net_namespace.h> /* Netw namespace */ #include <linux/sched/isolation.h> +#include <linux/siphash.h> #define IP_VS_HDR_INVERSE 1 #define IP_VS_HDR_ICMP 2 +/* conn_tab limits (as per Kconfig) */ +#define IP_VS_CONN_TAB_MIN_BITS 8 +#if BITS_PER_LONG > 32 +#define IP_VS_CONN_TAB_MAX_BITS 27 +#else +#define IP_VS_CONN_TAB_MAX_BITS 20 +#endif + +/* svc_table limits */ +#define IP_VS_SVC_TAB_MIN_BITS 4 +#define IP_VS_SVC_TAB_MAX_BITS 20 + /* Generic access of ipvs struct */ static inline struct netns_ipvs *net_ipvs(struct net* net) { @@ -43,8 +57,6 @@ static inline struct netns_ipvs *net_ipvs(struct net* net) /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; -extern struct mutex __ip_vs_mutex; - struct ip_vs_iphdr { int hdr_flags; /* ipvs flags */ __u32 off; /* Where IP or IPv4 header starts */ @@ -265,6 +277,29 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, pr_err(msg, ##__VA_ARGS__); \ } while (0) +struct ip_vs_aligned_lock { + spinlock_t l; /* Protect buckets */ +} ____cacheline_aligned_in_smp; + +/* For arrays per family */ +enum { + IP_VS_AF_INET, + IP_VS_AF_INET6, + IP_VS_AF_MAX +}; + +static inline int ip_vs_af_index(int af) +{ + return af == AF_INET6 ? IP_VS_AF_INET6 : IP_VS_AF_INET; +} + +/* work_flags */ +enum { + IP_VS_WORK_SVC_RESIZE, /* Schedule svc_resize_work */ + IP_VS_WORK_SVC_NORESIZE, /* Stopping svc_resize_work */ + IP_VS_WORK_CONN_RESIZE, /* Schedule conn_resize_work */ +}; + /* The port number of FTP service (in network order). */ #define FTPPORT cpu_to_be16(21) #define FTPDATA cpu_to_be16(20) @@ -466,6 +501,198 @@ struct ip_vs_est_kt_data { int est_row; /* estimated row */ }; +/* IPVS resizable hash tables */ +struct ip_vs_rht { + struct hlist_bl_head *buckets; + struct ip_vs_rht __rcu *new_tbl; /* New/Same table */ + seqcount_t *seqc; /* Protects moves */ + struct ip_vs_aligned_lock *lock; /* Protect seqc */ + int mask; /* Buckets mask */ + int size; /* Buckets */ + int seqc_mask; /* seqc mask */ + int lock_mask; /* lock mask */ + u32 table_id; + int u_thresh; /* upper threshold */ + int l_thresh; /* lower threshold */ + int lfactor; /* Load Factor (shift)*/ + int bits; /* size = 1 << bits */ + siphash_key_t hash_key; + struct rcu_head rcu_head; +}; + +/** + * ip_vs_rht_for_each_table() - Walk the hash tables + * @table: struct ip_vs_rht __rcu *table + * @t: current table, used as cursor, struct ip_vs_rht *var + * @p: previous table, temp struct ip_vs_rht *var + * + * Walk tables assuming others can not change the installed tables + */ +#define ip_vs_rht_for_each_table(table, t, p) \ + for (p = NULL, t = rcu_dereference_protected(table, 1); \ + t != p; \ + p = t, t = rcu_dereference_protected(t->new_tbl, 1)) + +/** + * ip_vs_rht_for_each_table_rcu() - Walk the hash tables under RCU reader lock + * @table: struct ip_vs_rht __rcu *table + * @t: current table, used as cursor, struct ip_vs_rht *var + * @p: previous table, temp struct ip_vs_rht *var + * + * We usually search in one table and also in second table on resizing + */ +#define ip_vs_rht_for_each_table_rcu(table, t, p) \ + for (p = NULL, t = rcu_dereference(table); \ + t != p; \ + p = t, t = rcu_dereference(t->new_tbl)) + +/** + * ip_vs_rht_for_each_bucket() - Walk all table buckets + * @t: current table, used as cursor, struct ip_vs_rht *var + * @bucket: bucket index, used as cursor, u32 var + * @head: bucket address, used as cursor, struct hlist_bl_head *var + */ +#define ip_vs_rht_for_each_bucket(t, bucket, head) \ + for (bucket = 0, head = (t)->buckets; \ + bucket < t->size; bucket++, head++) + +/** + * ip_vs_rht_for_bucket_retry() - Retry bucket if entries are moved + * @t: current table, used as cursor, struct ip_vs_rht *var + * @bucket: index of current bucket or hash key + * @sc: temp seqcount_t *var + * @seq: temp unsigned int var for sequence count + * @retry: temp int var + */ +#define ip_vs_rht_for_bucket_retry(t, bucket, sc, seq, retry) \ + for (retry = 1, sc = &(t)->seqc[(bucket) & (t)->seqc_mask]; \ + retry && ({ seq = read_seqcount_begin(sc); 1; }); \ + retry = read_seqcount_retry(sc, seq)) + +/** + * DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU() - Declare variables + * + * Variables for ip_vs_rht_walk_buckets_rcu + */ +#define DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU() \ + struct ip_vs_rht *_t, *_p; \ + unsigned int _seq; \ + seqcount_t *_sc; \ + u32 _bucket; \ + int _retry +/** + * ip_vs_rht_walk_buckets_rcu() - Walk all buckets under RCU read lock + * @table: struct ip_vs_rht __rcu *table + * @head: bucket address, used as cursor, struct hlist_bl_head *var + * + * Can be used while others add/delete/move entries + * Not suitable if duplicates are not desired + * Possible cases for reader that uses cond_resched_rcu() in the loop: + * - new table can not be installed, no need to repeat + * - new table can be installed => check and repeat if new table is + * installed, needed for !PREEMPT_RCU + */ +#define ip_vs_rht_walk_buckets_rcu(table, head) \ + ip_vs_rht_for_each_table_rcu(table, _t, _p) \ + ip_vs_rht_for_each_bucket(_t, _bucket, head) \ + ip_vs_rht_for_bucket_retry(_t, _bucket, _sc, \ + _seq, _retry) + +/** + * DECLARE_IP_VS_RHT_WALK_BUCKET_RCU() - Declare variables + * + * Variables for ip_vs_rht_walk_bucket_rcu + */ +#define DECLARE_IP_VS_RHT_WALK_BUCKET_RCU() \ + unsigned int _seq; \ + seqcount_t *_sc; \ + int _retry +/** + * ip_vs_rht_walk_bucket_rcu() - Walk bucket under RCU read lock + * @t: current table, struct ip_vs_rht *var + * @bucket: index of current bucket or hash key + * @head: bucket address, used as cursor, struct hlist_bl_head *var + * + * Can be used while others add/delete/move entries + * Not suitable if duplicates are not desired + * Possible cases for reader that uses cond_resched_rcu() in the loop: + * - new table can not be installed, no need to repeat + * - new table can be installed => check and repeat if new table is + * installed, needed for !PREEMPT_RCU + */ +#define ip_vs_rht_walk_bucket_rcu(t, bucket, head) \ + if (({ head = (t)->buckets + ((bucket) & (t)->mask); 0; })) \ + {} \ + else \ + ip_vs_rht_for_bucket_retry(t, (bucket), _sc, _seq, _retry) + +/** + * DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU() - Declare variables + * + * Variables for ip_vs_rht_walk_buckets_safe_rcu + */ +#define DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU() \ + struct ip_vs_rht *_t, *_p; \ + u32 _bucket +/** + * ip_vs_rht_walk_buckets_safe_rcu() - Walk all buckets under RCU read lock + * @table: struct ip_vs_rht __rcu *table + * @head: bucket address, used as cursor, struct hlist_bl_head *var + * + * Can be used while others add/delete entries but moving is disabled + * Using cond_resched_rcu() should be safe if tables do not change + */ +#define ip_vs_rht_walk_buckets_safe_rcu(table, head) \ + ip_vs_rht_for_each_table_rcu(table, _t, _p) \ + ip_vs_rht_for_each_bucket(_t, _bucket, head) + +/** + * DECLARE_IP_VS_RHT_WALK_BUCKETS() - Declare variables + * + * Variables for ip_vs_rht_walk_buckets + */ +#define DECLARE_IP_VS_RHT_WALK_BUCKETS() \ + struct ip_vs_rht *_t, *_p; \ + u32 _bucket + +/** + * ip_vs_rht_walk_buckets() - Walk all buckets + * @table: struct ip_vs_rht __rcu *table + * @head: bucket address, used as cursor, struct hlist_bl_head *var + * + * Use if others can not add/delete/move entries + */ +#define ip_vs_rht_walk_buckets(table, head) \ + ip_vs_rht_for_each_table(table, _t, _p) \ + ip_vs_rht_for_each_bucket(_t, _bucket, head) + +/* Entries can be in one of two tables, so we flip bit when new table is + * created and store it as highest bit in hash keys + */ +#define IP_VS_RHT_TABLE_ID_MASK BIT(31) + +/* Check if hash key is from this table */ +static inline bool ip_vs_rht_same_table(struct ip_vs_rht *t, u32 hash_key) +{ + return !((t->table_id ^ hash_key) & IP_VS_RHT_TABLE_ID_MASK); +} + +/* Build per-table hash key from hash value */ +static inline u32 ip_vs_rht_build_hash_key(struct ip_vs_rht *t, u32 hash) +{ + return t->table_id | (hash & ~IP_VS_RHT_TABLE_ID_MASK); +} + +void ip_vs_rht_free(struct ip_vs_rht *t); +void ip_vs_rht_rcu_free(struct rcu_head *head); +struct ip_vs_rht *ip_vs_rht_alloc(int buckets, int scounts, int locks); +int ip_vs_rht_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, int n, + int lfactor, int min_bits, int max_bits); +void ip_vs_rht_set_thresholds(struct ip_vs_rht *t, int size, int lfactor, + int min_bits, int max_bits); +u32 ip_vs_rht_hash_linfo(struct ip_vs_rht *t, int af, + const union nf_inet_addr *addr, u32 v1, u32 v2); + struct dst_entry; struct iphdr; struct ip_vs_conn; @@ -559,50 +786,48 @@ struct ip_vs_conn_param { __u8 pe_data_len; }; +/* Hash node in conn_tab */ +struct ip_vs_conn_hnode { + struct hlist_bl_node node; /* node in conn_tab */ + u32 hash_key; /* Key for the hash table */ + u8 dir; /* 0=out->in, 1=in->out */ +} __packed; + /* IP_VS structure allocated for each dynamically scheduled connection */ struct ip_vs_conn { - struct hlist_node c_list; /* hashed list heads */ - /* Protocol, addresses and port numbers */ + /* Cacheline for hash table nodes - rarely modified */ + + struct ip_vs_conn_hnode hn0; /* Original direction */ + u8 af; /* address family */ __be16 cport; + struct ip_vs_conn_hnode hn1; /* Reply direction */ + u8 daf; /* Address family of the dest */ __be16 dport; - __be16 vport; - u16 af; /* address family */ - union nf_inet_addr caddr; /* client address */ - union nf_inet_addr vaddr; /* virtual address */ - union nf_inet_addr daddr; /* destination address */ + struct ip_vs_dest *dest; /* real server */ + atomic_t n_control; /* Number of controlled ones */ volatile __u32 flags; /* status flags */ - __u16 protocol; /* Which protocol (TCP/UDP) */ - __u16 daf; /* Address family of the dest */ - struct netns_ipvs *ipvs; - - /* counter and timer */ - refcount_t refcnt; /* reference count */ - struct timer_list timer; /* Expiration timer */ - volatile unsigned long timeout; /* timeout */ + /* 44/64 */ - /* Flags and state transition */ - spinlock_t lock; /* lock for state transition */ + struct ip_vs_conn *control; /* Master control connection */ + const struct ip_vs_pe *pe; + char *pe_data; + __u8 pe_data_len; volatile __u16 state; /* state info */ volatile __u16 old_state; /* old state, to be used for * state transition triggered * synchronization */ - __u32 fwmark; /* Fire wall mark from skb */ - unsigned long sync_endtime; /* jiffies + sent_retries */ + /* 2-byte hole */ + /* 64/96 */ - /* Control members */ - struct ip_vs_conn *control; /* Master control connection */ - atomic_t n_control; /* Number of controlled ones */ - struct ip_vs_dest *dest; /* real server */ - atomic_t in_pkts; /* incoming packet counter */ + union nf_inet_addr caddr; /* client address */ + union nf_inet_addr vaddr; /* virtual address */ + /* 96/128 */ - /* Packet transmitter for different forwarding methods. If it - * mangles the packet, it must return NF_DROP or better NF_STOLEN, - * otherwise this must be changed to a sk_buff **. - * NF_ACCEPT can be returned when destination is local. - */ - int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph); + union nf_inet_addr daddr; /* destination address */ + __u32 fwmark; /* Fire wall mark from skb */ + __be16 vport; + __u16 protocol; /* Which protocol (TCP/UDP) */ /* Note: we can group the following members into a structure, * in order to save more space, and the following members are @@ -610,14 +835,31 @@ struct ip_vs_conn { */ struct ip_vs_app *app; /* bound ip_vs_app object */ void *app_data; /* Application private data */ + /* 128/168 */ struct_group(sync_conn_opt, struct ip_vs_seq in_seq; /* incoming seq. struct */ struct ip_vs_seq out_seq; /* outgoing seq. struct */ ); + /* 152/192 */ - const struct ip_vs_pe *pe; - char *pe_data; - __u8 pe_data_len; + struct timer_list timer; /* Expiration timer */ + volatile unsigned long timeout; /* timeout */ + spinlock_t lock; /* lock for state transition */ + refcount_t refcnt; /* reference count */ + atomic_t in_pkts; /* incoming packet counter */ + /* 64-bit: 4-byte gap */ + + /* 188/256 */ + unsigned long sync_endtime; /* jiffies + sent_retries */ + struct netns_ipvs *ipvs; + + /* Packet transmitter for different forwarding methods. If it + * mangles the packet, it must return NF_DROP or better NF_STOLEN, + * otherwise this must be changed to a sk_buff **. + * NF_ACCEPT can be returned when destination is local. + */ + int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph); struct rcu_head rcu_head; }; @@ -673,15 +915,15 @@ struct ip_vs_dest_user_kern { * forwarding entries. */ struct ip_vs_service { - struct hlist_node s_list; /* for normal service table */ - struct hlist_node f_list; /* for fwmark-based service table */ - atomic_t refcnt; /* reference counter */ - + struct hlist_bl_node s_list; /* node in service table */ + u32 hash_key; /* Key for the hash table */ u16 af; /* address family */ __u16 protocol; /* which protocol (TCP/UDP) */ + union nf_inet_addr addr; /* IP address for virtual service */ - __be16 port; /* port number for the service */ __u32 fwmark; /* firewall mark of the service */ + atomic_t refcnt; /* reference counter */ + __be16 port; /* port number for the service */ unsigned int flags; /* service status flags */ unsigned int timeout; /* persistent timeout in ticks */ __be32 netmask; /* grouping granularity, mask/plen */ @@ -791,8 +1033,8 @@ struct ip_vs_pe { int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb); bool (*ct_match)(const struct ip_vs_conn_param *p, struct ip_vs_conn *ct); - u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, - bool inverse); + u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, + struct ip_vs_rht *t, bool inverse); int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); /* create connections for real-server outgoing packets */ struct ip_vs_conn* (*conn_out)(struct ip_vs_service *svc, @@ -931,21 +1173,27 @@ struct netns_ipvs { #endif /* ip_vs_conn */ atomic_t conn_count; /* connection counter */ + atomic_t no_cport_conns[IP_VS_AF_MAX]; + struct delayed_work conn_resize_work;/* resize conn_tab */ /* ip_vs_ctl */ struct ip_vs_stats_rcu *tot_stats; /* Statistics & est. */ - int num_services; /* no of virtual services */ - int num_services6; /* IPv6 virtual services */ - /* Trash for destinations */ struct list_head dest_trash; spinlock_t dest_trash_lock; struct timer_list dest_trash_timer; /* expiration timer */ + struct mutex service_mutex; /* service reconfig */ + struct rw_semaphore svc_resize_sem; /* svc_table resizing */ + struct delayed_work svc_resize_work; /* resize svc_table */ + atomic_t svc_table_changes;/* ++ on new table */ /* Service counters */ - atomic_t ftpsvc_counter; - atomic_t nullsvc_counter; - atomic_t conn_out_counter; + atomic_t num_services[IP_VS_AF_MAX]; /* Services */ + atomic_t fwm_services[IP_VS_AF_MAX]; /* Services */ + atomic_t nonfwm_services[IP_VS_AF_MAX];/* Services */ + atomic_t ftpsvc_counter[IP_VS_AF_MAX]; /* FTPPORT */ + atomic_t nullsvc_counter[IP_VS_AF_MAX];/* Zero port */ + atomic_t conn_out_counter[IP_VS_AF_MAX];/* out conn */ #ifdef CONFIG_SYSCTL /* delayed work for expiring no dest connections */ @@ -956,6 +1204,7 @@ struct netns_ipvs { int drop_counter; int old_secure_tcp; atomic_t dropentry; + s8 dropentry_counters[8]; /* locks in ctl.c */ spinlock_t dropentry_lock; /* drop entry handling */ spinlock_t droppacket_lock; /* drop packet handling */ @@ -1002,6 +1251,8 @@ struct netns_ipvs { int sysctl_est_nice; /* kthread nice */ int est_stopped; /* stop tasks */ #endif + int sysctl_conn_lfactor; + int sysctl_svc_lfactor; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1011,6 +1262,7 @@ struct netns_ipvs { int sysctl_lblcr_expiration; struct ctl_table_header *lblcr_ctl_header; struct ctl_table *lblcr_ctl_table; + unsigned long work_flags; /* IP_VS_WORK_* flags */ /* ip_vs_est */ struct delayed_work est_reload_work;/* Reload kthread tasks */ struct mutex est_mutex; /* protect kthread tasks */ @@ -1041,6 +1293,10 @@ struct netns_ipvs { */ unsigned int mixed_address_family_dests; unsigned int hooks_afmask; /* &1=AF_INET, &2=AF_INET6 */ + + struct ip_vs_rht __rcu *svc_table; /* Services */ + struct ip_vs_rht __rcu *conn_tab; /* Connections */ + atomic_t conn_tab_changes;/* ++ on new table */ }; #define DEFAULT_SYNC_THRESHOLD 3 @@ -1290,6 +1546,24 @@ static inline int sysctl_est_nice(struct netns_ipvs *ipvs) #endif +/* Get load factor to map conn_count/u_thresh to t->size */ +static inline int sysctl_conn_lfactor(struct netns_ipvs *ipvs) +{ + return READ_ONCE(ipvs->sysctl_conn_lfactor); +} + +/* Get load factor to map num_services/u_thresh to t->size + * Smaller value decreases u_thresh to reduce collisions but increases + * the table size + * Returns factor where: + * - <0: u_thresh = size >> -factor, eg. lfactor -2 = 25% load + * - >=0: u_thresh = size << factor, eg. lfactor 1 = 200% load + */ +static inline int sysctl_svc_lfactor(struct netns_ipvs *ipvs) +{ + return READ_ONCE(ipvs->sysctl_svc_lfactor); +} + /* IPVS core functions * (from ip_vs_core.c) */ @@ -1363,6 +1637,23 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) } void ip_vs_conn_put(struct ip_vs_conn *cp); void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); +int ip_vs_conn_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, + int lfactor); +struct ip_vs_rht *ip_vs_conn_tab_alloc(struct netns_ipvs *ipvs, int buckets, + int lfactor); + +static inline struct ip_vs_conn * +ip_vs_hn0_to_conn(struct ip_vs_conn_hnode *hn) +{ + return container_of(hn, struct ip_vs_conn, hn0); +} + +static inline struct ip_vs_conn * +ip_vs_hn_to_conn(struct ip_vs_conn_hnode *hn) +{ + return hn->dir ? container_of(hn, struct ip_vs_conn, hn1) : + container_of(hn, struct ip_vs_conn, hn0); +} struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, const union nf_inet_addr *daddr, @@ -1716,6 +2007,13 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp) return fwd; } +/* Check if connection uses double hashing */ +static inline bool ip_vs_conn_use_hash2(struct ip_vs_conn *cp) +{ + return IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ && + !(cp->flags & IP_VS_CONN_F_TEMPLATE); +} + void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int dir); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 74fbf1ad8065..d042afe7a245 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -25,8 +25,6 @@ struct ip_tunnel_info; #define SIN6_LEN_RFC2133 24 -#define IPV6_MAXPLEN 65535 - /* * NextHeader field of IPv6 header */ @@ -151,17 +149,6 @@ struct frag_hdr { __be32 identification; }; -/* - * Jumbo payload option, as described in RFC 2675 2. - */ -struct hop_jumbo_hdr { - u8 nexthdr; - u8 hdrlen; - u8 tlv_type; /* IPV6_TLV_JUMBO, 0xC2 */ - u8 tlv_len; /* 4 */ - __be32 jumbo_payload_len; -}; - #define IP6_MF 0x0001 #define IP6_OFFSET 0xFFF8 @@ -464,72 +451,6 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt); -/* This helper is specialized for BIG TCP needs. - * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. - * It assumes headers are already in skb->head. - * Returns: 0, or IPPROTO_TCP if a BIG TCP packet is there. - */ -static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb) -{ - const struct hop_jumbo_hdr *jhdr; - const struct ipv6hdr *nhdr; - - if (likely(skb->len <= GRO_LEGACY_MAX_SIZE)) - return 0; - - if (skb->protocol != htons(ETH_P_IPV6)) - return 0; - - if (skb_network_offset(skb) + - sizeof(struct ipv6hdr) + - sizeof(struct hop_jumbo_hdr) > skb_headlen(skb)) - return 0; - - nhdr = ipv6_hdr(skb); - - if (nhdr->nexthdr != NEXTHDR_HOP) - return 0; - - jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1); - if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 || - jhdr->nexthdr != IPPROTO_TCP) - return 0; - return jhdr->nexthdr; -} - -/* Return 0 if HBH header is successfully removed - * Or if HBH removal is unnecessary (packet is not big TCP) - * Return error to indicate dropping the packet - */ -static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb) -{ - const int hophdr_len = sizeof(struct hop_jumbo_hdr); - int nexthdr = ipv6_has_hopopt_jumbo(skb); - struct ipv6hdr *h6; - - if (!nexthdr) - return 0; - - if (skb_cow_head(skb, 0)) - return -1; - - /* Remove the HBH header. - * Layout: [Ethernet header][IPv6 header][HBH][L4 Header] - */ - memmove(skb_mac_header(skb) + hophdr_len, skb_mac_header(skb), - skb_network_header(skb) - skb_mac_header(skb) + - sizeof(struct ipv6hdr)); - - __skb_pull(skb, hophdr_len); - skb->network_header += hophdr_len; - skb->mac_header += hophdr_len; - - h6 = ipv6_hdr(skb); - h6->nexthdr = nexthdr; - - return 0; -} - static inline bool ipv6_accept_ra(const struct inet6_dev *idev) { s32 accept_ra = READ_ONCE(idev->cnf.accept_ra); @@ -931,10 +852,10 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, #if IS_ENABLED(CONFIG_IPV6) -static inline bool ipv6_can_nonlocal_bind(struct net *net, - struct inet_sock *inet) +static inline bool ipv6_can_nonlocal_bind(const struct net *net, + const struct inet_sock *inet) { - return net->ipv6.sysctl.ip_nonlocal_bind || + return READ_ONCE(net->ipv6.sysctl.ip_nonlocal_bind) || test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) || test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags); } @@ -949,10 +870,12 @@ static inline bool ipv6_can_nonlocal_bind(struct net *net, #define IP6_DEFAULT_AUTO_FLOW_LABELS IP6_AUTO_FLOW_LABEL_OPTOUT -static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, +static inline __be32 ip6_make_flowlabel(const struct net *net, + struct sk_buff *skb, __be32 flowlabel, bool autolabel, struct flowi6 *fl6) { + u8 auto_flowlabels; u32 hash; /* @flowlabel may include more than a flow label, eg, the traffic class. @@ -960,10 +883,12 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, */ flowlabel &= IPV6_FLOWLABEL_MASK; - if (flowlabel || - net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF || - (!autolabel && - net->ipv6.sysctl.auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED)) + if (flowlabel) + return flowlabel; + + auto_flowlabels = READ_ONCE(net->ipv6.sysctl.auto_flowlabels); + if (auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF || + (!autolabel && auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED)) return flowlabel; hash = skb_get_hash_flowi6(skb, fl6); @@ -976,15 +901,15 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; - if (net->ipv6.sysctl.flowlabel_state_ranges) + if (READ_ONCE(net->ipv6.sysctl.flowlabel_state_ranges)) flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; return flowlabel; } -static inline int ip6_default_np_autolabel(struct net *net) +static inline int ip6_default_np_autolabel(const struct net *net) { - switch (net->ipv6.sysctl.auto_flowlabels) { + switch (READ_ONCE(net->ipv6.sysctl.auto_flowlabels)) { case IP6_AUTO_FLOW_LABEL_OFF: case IP6_AUTO_FLOW_LABEL_OPTIN: default: @@ -995,13 +920,13 @@ static inline int ip6_default_np_autolabel(struct net *net) } } #else -static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, +static inline __be32 ip6_make_flowlabel(const struct net *net, struct sk_buff *skb, __be32 flowlabel, bool autolabel, struct flowi6 *fl6) { return flowlabel; } -static inline int ip6_default_np_autolabel(struct net *net) +static inline int ip6_default_np_autolabel(const struct net *net) { return 0; } @@ -1010,11 +935,11 @@ static inline int ip6_default_np_autolabel(struct net *net) #if IS_ENABLED(CONFIG_IPV6) static inline int ip6_multipath_hash_policy(const struct net *net) { - return net->ipv6.sysctl.multipath_hash_policy; + return READ_ONCE(net->ipv6.sysctl.multipath_hash_policy); } static inline u32 ip6_multipath_hash_fields(const struct net *net) { - return net->ipv6.sysctl.multipath_hash_fields; + return READ_ONCE(net->ipv6.sysctl.multipath_hash_fields); } #else static inline int ip6_multipath_hash_policy(const struct net *net) @@ -1103,8 +1028,7 @@ void ip6_flush_pending_frames(struct sock *sk); int ip6_send_skb(struct sk_buff *skb); struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue, - struct inet_cork_full *cork, - struct inet6_cork *v6_cork); + struct inet_cork_full *cork); struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), @@ -1115,14 +1039,23 @@ struct sk_buff *ip6_make_skb(struct sock *sk, static inline struct sk_buff *ip6_finish_skb(struct sock *sk) { - return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork, - &inet6_sk(sk)->cork); + return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); } int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); +#if IS_ENABLED(CONFIG_IPV6) struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst); +#else +static inline struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, + struct flowi6 *fl6, + const struct in6_addr *final_dst) +{ + return ERR_PTR(-EAFNOSUPPORT); +} +#endif + struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool connected); @@ -1147,11 +1080,11 @@ int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); * Extension header (options) processing */ -void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, - u8 *proto, struct in6_addr **daddr_p, - struct in6_addr *saddr); -void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, - u8 *proto); +u8 ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, + u8 proto, struct in6_addr **daddr_p, + struct in6_addr *saddr); +u8 ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, + u8 proto); int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp, __be16 *frag_offp); @@ -1170,9 +1103,19 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type); -struct in6_addr *fl6_update_dst(struct flowi6 *fl6, - const struct ipv6_txoptions *opt, - struct in6_addr *orig); +struct in6_addr *__fl6_update_dst(struct flowi6 *fl6, + const struct ipv6_txoptions *opt, + struct in6_addr *orig); + +static inline struct in6_addr * +fl6_update_dst(struct flowi6 *fl6, const struct ipv6_txoptions *opt, + struct in6_addr *orig) +{ + if (likely(!opt)) + return NULL; + + return __fl6_update_dst(fl6, opt, orig); +} /* * socket options (ipv6_sockglue.c) @@ -1196,10 +1139,8 @@ int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr_unsized *addr, int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr); void ip6_datagram_release_cb(struct sock *sk); -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, - int *addr_len); -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, - int *addr_len); +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len); +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len); void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); @@ -1208,6 +1149,8 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); void inet6_cleanup_sock(struct sock *sk); void inet6_sock_destruct(struct sock *sk); int inet6_release(struct socket *sock); +int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, + u32 flags); int inet6_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len); int inet6_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, @@ -1248,8 +1191,6 @@ int tcp6_proc_init(struct net *net); void tcp6_proc_exit(struct net *net); int udp6_proc_init(struct net *net); void udp6_proc_exit(struct net *net); -int udplite6_proc_init(void); -void udplite6_proc_exit(void); int ipv6_misc_proc_init(void); void ipv6_misc_proc_exit(void); int snmp6_register_dev(struct inet6_dev *idev); @@ -1280,12 +1221,15 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, static inline int ip6_sock_set_v6only(struct sock *sk) { - if (inet_sk(sk)->inet_num) - return -EINVAL; + int ret = 0; + lock_sock(sk); - sk->sk_ipv6only = true; + if (inet_sk(sk)->inet_num) + ret = -EINVAL; + else + sk->sk_ipv6only = true; release_sock(sk); - return 0; + return ret; } static inline void ip6_sock_set_recverr(struct sock *sk) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h deleted file mode 100644 index d3013e721b14..000000000000 --- a/include/net/ipv6_stubs.h +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _IPV6_STUBS_H -#define _IPV6_STUBS_H - -#include <linux/in6.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/dst.h> -#include <net/flow.h> -#include <net/neighbour.h> -#include <net/sock.h> -#include <net/ipv6.h> - -/* structs from net/ip6_fib.h */ -struct fib6_info; -struct fib6_nh; -struct fib6_config; -struct fib6_result; - -/* This is ugly, ideally these symbols should be built - * into the core kernel. - */ -struct ipv6_stub { - int (*ipv6_sock_mc_join)(struct sock *sk, int ifindex, - const struct in6_addr *addr); - int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex, - const struct in6_addr *addr); - struct dst_entry *(*ipv6_dst_lookup_flow)(struct net *net, - const struct sock *sk, - struct flowi6 *fl6, - const struct in6_addr *final_dst); - int (*ipv6_route_input)(struct sk_buff *skb); - - struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); - int (*fib6_lookup)(struct net *net, int oif, struct flowi6 *fl6, - struct fib6_result *res, int flags); - int (*fib6_table_lookup)(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, - struct fib6_result *res, int flags); - void (*fib6_select_path)(const struct net *net, struct fib6_result *res, - struct flowi6 *fl6, int oif, bool oif_match, - const struct sk_buff *skb, int strict); - u32 (*ip6_mtu_from_fib6)(const struct fib6_result *res, - const struct in6_addr *daddr, - const struct in6_addr *saddr); - - int (*fib6_nh_init)(struct net *net, struct fib6_nh *fib6_nh, - struct fib6_config *cfg, gfp_t gfp_flags, - struct netlink_ext_ack *extack); - void (*fib6_nh_release)(struct fib6_nh *fib6_nh); - void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh); - void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt); - int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify); - void (*fib6_rt_update)(struct net *net, struct fib6_info *rt, - struct nl_info *info); - - void (*udpv6_encap_enable)(void); - void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, - const struct in6_addr *solicited_addr, - bool router, bool solicited, bool override, bool inc_opt); -#if IS_ENABLED(CONFIG_XFRM) - void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu); - int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb); - struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk, - struct list_head *head, - struct sk_buff *skb); - int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi, - int encap_type); -#endif - struct neigh_table *nd_tbl; - - int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, - int (*output)(struct net *, struct sock *, struct sk_buff *)); - struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr, - struct net_device *dev); - int (*ip6_xmit)(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, - __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority); -}; -extern const struct ipv6_stub *ipv6_stub __read_mostly; - -/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ -struct ipv6_bpf_stub { - int (*inet6_bind)(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, - u32 flags); - struct sock *(*udp6_lib_lookup)(const struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif, int sdif, struct udp_table *tbl, - struct sk_buff *skb); - int (*ipv6_setsockopt)(struct sock *sk, int level, int optname, - sockptr_t optval, unsigned int optlen); - int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, - sockptr_t optval, sockptr_t optlen); - int (*ipv6_dev_get_saddr)(struct net *net, - const struct net_device *dst_dev, - const struct in6_addr *daddr, - unsigned int prefs, - struct in6_addr *saddr); -}; -extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; - -#endif diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h index 9804fa5d9c67..18f511da9a09 100644 --- a/include/net/iucv/iucv.h +++ b/include/net/iucv/iucv.h @@ -195,35 +195,15 @@ struct iucv_handler { struct list_head paths; }; -/** - * iucv_register: - * @handler: address of iucv handler structure - * @smp: != 0 indicates that the handler can deal with out of order messages - * - * Registers a driver with IUCV. - * - * Returns: 0 on success, -ENOMEM if the memory allocation for the pathid - * table failed, or -EIO if IUCV_DECLARE_BUFFER failed on all cpus. - */ int iucv_register(struct iucv_handler *handler, int smp); +void iucv_unregister(struct iucv_handler *handler, int smp); /** - * iucv_unregister - * @handler: address of iucv handler structure - * @smp: != 0 indicates that the handler can deal with out of order messages - * - * Unregister driver from IUCV. - */ -void iucv_unregister(struct iucv_handler *handle, int smp); - -/** - * iucv_path_alloc + * iucv_path_alloc - Allocate a new path structure for use with iucv_connect. * @msglim: initial message limit * @flags: initial flags * @gfp: kmalloc allocation flag * - * Allocate a new path structure for use with iucv_connect. - * * Returns: NULL if the memory allocation failed or a pointer to the * path structure. */ @@ -231,7 +211,7 @@ static inline struct iucv_path *iucv_path_alloc(u16 msglim, u8 flags, gfp_t gfp) { struct iucv_path *path; - path = kzalloc(sizeof(struct iucv_path), gfp); + path = kzalloc_obj(struct iucv_path, gfp); if (path) { path->msglim = msglim; path->flags = flags; @@ -240,229 +220,48 @@ static inline struct iucv_path *iucv_path_alloc(u16 msglim, u8 flags, gfp_t gfp) } /** - * iucv_path_free + * iucv_path_free - Frees a path structure. * @path: address of iucv path structure - * - * Frees a path structure. */ static inline void iucv_path_free(struct iucv_path *path) { kfree(path); } -/** - * iucv_path_accept - * @path: address of iucv path structure - * @handler: address of iucv handler structure - * @userdata: 16 bytes of data reflected to the communication partner - * @private: private data passed to interrupt handlers for this path - * - * This function is issued after the user received a connection pending - * external interrupt and now wishes to complete the IUCV communication path. - * - * Returns: the result of the CP IUCV call. - */ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler, u8 *userdata, void *private); -/** - * iucv_path_connect - * @path: address of iucv path structure - * @handler: address of iucv handler structure - * @userid: 8-byte user identification - * @system: 8-byte target system identification - * @userdata: 16 bytes of data reflected to the communication partner - * @private: private data passed to interrupt handlers for this path - * - * This function establishes an IUCV path. Although the connect may complete - * successfully, you are not able to use the path until you receive an IUCV - * Connection Complete external interrupt. - * - * Returns: the result of the CP IUCV call. - */ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, u8 *userid, u8 *system, u8 *userdata, void *private); -/** - * iucv_path_quiesce: - * @path: address of iucv path structure - * @userdata: 16 bytes of data reflected to the communication partner - * - * This function temporarily suspends incoming messages on an IUCV path. - * You can later reactivate the path by invoking the iucv_resume function. - * - * Returns: the result from the CP IUCV call. - */ int iucv_path_quiesce(struct iucv_path *path, u8 *userdata); -/** - * iucv_path_resume: - * @path: address of iucv path structure - * @userdata: 16 bytes of data reflected to the communication partner - * - * This function resumes incoming messages on an IUCV path that has - * been stopped with iucv_path_quiesce. - * - * Returns: the result from the CP IUCV call. - */ int iucv_path_resume(struct iucv_path *path, u8 *userdata); -/** - * iucv_path_sever - * @path: address of iucv path structure - * @userdata: 16 bytes of data reflected to the communication partner - * - * This function terminates an IUCV path. - * - * Returns: the result from the CP IUCV call. - */ int iucv_path_sever(struct iucv_path *path, u8 *userdata); -/** - * iucv_message_purge - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @srccls: source class of message - * - * Cancels a message you have sent. - * - * Returns: the result from the CP IUCV call. - */ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg, u32 srccls); -/** - * iucv_message_receive - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @flags: flags that affect how the message is received (IUCV_IPBUFLST) - * @buffer: address of data buffer or address of struct iucv_array - * @size: length of data buffer - * @residual: - * - * This function receives messages that are being sent to you over - * established paths. This function will deal with RMDATA messages - * embedded in struct iucv_message as well. - * - * Locking: local_bh_enable/local_bh_disable - * - * Returns: the result from the CP IUCV call. - */ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, u8 flags, void *buffer, size_t size, size_t *residual); -/** - * __iucv_message_receive - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @flags: flags that affect how the message is received (IUCV_IPBUFLST) - * @buffer: address of data buffer or address of struct iucv_array - * @size: length of data buffer - * @residual: - * - * This function receives messages that are being sent to you over - * established paths. This function will deal with RMDATA messages - * embedded in struct iucv_message as well. - * - * Locking: no locking. - * - * Returns: the result from the CP IUCV call. - */ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, u8 flags, void *buffer, size_t size, size_t *residual); -/** - * iucv_message_reject - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * - * The reject function refuses a specified message. Between the time you - * are notified of a message and the time that you complete the message, - * the message may be rejected. - * - * Returns: the result from the CP IUCV call. - */ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg); -/** - * iucv_message_reply - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @flags: how the reply is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST) - * @reply: address of data buffer or address of struct iucv_array - * @size: length of reply data buffer - * - * This function responds to the two-way messages that you receive. You - * must identify completely the message to which you wish to reply. ie, - * pathid, msgid, and trgcls. Prmmsg signifies the data is moved into - * the parameter list. - * - * Returns: the result from the CP IUCV call. - */ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg, u8 flags, void *reply, size_t size); -/** - * iucv_message_send - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST) - * @srccls: source class of message - * @buffer: address of data buffer or address of struct iucv_array - * @size: length of send buffer - * - * This function transmits data to another application. Data to be - * transmitted is in a buffer and this is a one-way message and the - * receiver will not reply to the message. - * - * Locking: local_bh_enable/local_bh_disable - * - * Returns: the result from the CP IUCV call. - */ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size); -/** - * __iucv_message_send - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST) - * @srccls: source class of message - * @buffer: address of data buffer or address of struct iucv_array - * @size: length of send buffer - * - * This function transmits data to another application. Data to be - * transmitted is in a buffer and this is a one-way message and the - * receiver will not reply to the message. - * - * Locking: no locking. - * - * Returns: the result from the CP IUCV call. - */ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size); -/** - * iucv_message_send2way - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @flags: how the message is sent and the reply is received - * (IUCV_IPRMDATA, IUCV_IPBUFLST, IUCV_IPPRTY, IUCV_ANSLST) - * @srccls: source class of message - * @buffer: address of data buffer or address of struct iucv_array - * @size: length of send buffer - * @ansbuf: address of answer buffer or address of struct iucv_array - * @asize: size of reply buffer - * - * This function transmits data to another application. Data to be - * transmitted is in a buffer. The receiver of the send is expected to - * reply to the message and a buffer is provided into which IUCV moves - * the reply to this message. - * - * Returns: the result from the CP IUCV call. - */ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size, void *answer, size_t asize, size_t *residual); diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 1eb8dad18f7e..710e98665eb3 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -207,18 +207,19 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) static inline struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) { - struct net_device *dev = skb_dst(skb)->dev; + struct net_device *dev; + rcu_read_lock(); + dev = skb_dst_dev_rcu(skb); if (netif_is_l3_slave(dev)) { struct net_device *master; - rcu_read_lock(); master = netdev_master_upper_dev_get_rcu(dev); if (master && master->l3mdev_ops->l3mdev_l3_out) skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, skb, proto); - rcu_read_unlock(); } + rcu_read_unlock(); return skb; } diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index 481a7b28e6f2..82b5d21aae87 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -597,6 +597,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp, * @pending: current number of XSkFQEs to refill * @thresh: threshold below which the queue is refilled * @buf_len: HW-writeable length per each buffer + * @truesize: step between consecutive buffers, 0 if none exists * @nid: ID of the closest NUMA node with memory */ struct libeth_xskfq { @@ -614,6 +615,8 @@ struct libeth_xskfq { u32 thresh; u32 buf_len; + u32 truesize; + int nid; }; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c2e49542626c..40cb20d9309c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7,7 +7,7 @@ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2025 Intel Corporation + * Copyright (C) 2018 - 2026 Intel Corporation */ #ifndef MAC80211_H @@ -365,6 +365,7 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. * @BSS_CHANGED_MLD_TTLM: negotiated TID to link mapping was changed * @BSS_CHANGED_TPE: transmit power envelope changed + * @BSS_CHANGED_NAN_LOCAL_SCHED: NAN local schedule changed (NAN mode only) */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -402,6 +403,7 @@ enum ieee80211_bss_change { BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), BSS_CHANGED_MLD_TTLM = BIT_ULL(34), BSS_CHANGED_TPE = BIT_ULL(35), + BSS_CHANGED_NAN_LOCAL_SCHED = BIT_ULL(36), /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -706,6 +708,7 @@ struct ieee80211_parsed_tpe { * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT * @epcs_support: does this BSS support EPCS + * @uhr_support: does this BSS support UHR * @csa_active: marks whether a channel switch is going on. * @mu_mimo_owner: indicates interface owns MU-MIMO capability * @chanctx_conf: The channel context this interface is assigned to, or %NULL @@ -832,6 +835,8 @@ struct ieee80211_bss_conf { u8 pwr_reduction; bool eht_support; bool epcs_support; + bool uhr_support; + bool csa_active; bool mu_mimo_owner; @@ -863,6 +868,74 @@ struct ieee80211_bss_conf { u8 s1g_long_beacon_period; }; +#define IEEE80211_NAN_MAX_CHANNELS 3 + +/** + * struct ieee80211_nan_channel - NAN channel information + * + * @chanreq: channel request for this NAN channel. Even though this chanreq::ap + * is irrelevant for NAN, still store it for convenience - some functions + * require it as an argument. + * @needed_rx_chains: number of RX chains needed for this NAN channel + * @chanctx_conf: chanctx_conf assigned to this NAN channel. + * If a local channel is being ULWed (because we needed this chanctx for + * something else), the local NAN channel that used this chanctx, + * will have this pointer set to %NULL. + * A peer NAN channel should never have this pointer set to %NULL. + * @channel_entry: the Channel Entry blob as defined in Wi-Fi Aware + * (TM) 4.0 specification Table 100 (Channel Entry format for the NAN + * Availability attribute). + */ +struct ieee80211_nan_channel { + struct ieee80211_chan_req chanreq; + u8 needed_rx_chains; + struct ieee80211_chanctx_conf *chanctx_conf; + u8 channel_entry[6]; +}; + +/** + * struct ieee80211_nan_peer_map - NAN peer schedule map + * + * This stores a single map from a peer's schedule. Each peer can have + * multiple maps. + * + * @map_id: the map ID from the peer schedule, %CFG80211_NAN_INVALID_MAP_ID + * if unused + * @slots: mapping of time slots to channel configurations in the schedule's + * channels array + */ +struct ieee80211_nan_peer_map { + u8 map_id; + struct ieee80211_nan_channel *slots[CFG80211_NAN_SCHED_NUM_TIME_SLOTS]; +}; + +/** + * struct ieee80211_nan_peer_sched - NAN peer schedule + * + * This stores the complete schedule from a peer. Contains peer-level + * parameters and an array of schedule maps. + * + * @seq_id: the sequence ID from the peer schedule + * @committed_dw: committed DW as published by the peer + * @max_chan_switch: maximum channel switch time in microseconds + * @init_ulw: initial ULWs as published by the peer (copied) + * @ulw_size: number of bytes in @init_ulw + * @maps: array of peer schedule maps. Invalid slots have map_id set to + * %CFG80211_NAN_INVALID_MAP_ID. + * @n_channels: number of valid channel entries in @channels + * @channels: flexible array of negotiated peer channels for this schedule + */ +struct ieee80211_nan_peer_sched { + u8 seq_id; + u16 committed_dw; + u16 max_chan_switch; + const u8 *init_ulw; + u16 ulw_size; + struct ieee80211_nan_peer_map maps[CFG80211_NAN_MAX_PEER_MAPS]; + u8 n_channels; + struct ieee80211_nan_channel channels[] __counted_by(n_channels); +}; + /** * enum mac80211_tx_info_flags - flags to describe transmission information/status * @@ -1598,6 +1671,7 @@ enum mac80211_rx_encoding { RX_ENC_VHT, RX_ENC_HE, RX_ENC_EHT, + RX_ENC_UHR, }; /** @@ -1631,7 +1705,7 @@ enum mac80211_rx_encoding { * @antenna: antenna used * @rate_idx: index of data rate into band's supported rates or MCS index if * HT or VHT is used (%RX_FLAG_HT/%RX_FLAG_VHT) - * @nss: number of streams (VHT, HE and EHT only) + * @nss: number of streams (VHT, HE, EHT and UHR only) * @flag: %RX_FLAG_\* * @encoding: &enum mac80211_rx_encoding * @bw: &enum rate_info_bw @@ -1642,6 +1716,11 @@ enum mac80211_rx_encoding { * @eht: EHT specific rate information * @eht.ru: EHT RU, from &enum nl80211_eht_ru_alloc * @eht.gi: EHT GI, from &enum nl80211_eht_gi + * @uhr: UHR specific rate information + * @uhr.ru: UHR RU, from &enum nl80211_eht_ru_alloc + * @uhr.gi: UHR GI, from &enum nl80211_eht_gi + * @uhr.elr: UHR ELR MCS was used + * @uhr.im: UHR interference mitigation was used * @rx_flags: internal RX flags for mac80211 * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU @@ -1673,6 +1752,12 @@ struct ieee80211_rx_status { u8 ru:4; u8 gi:2; } eht; + struct { + u8 ru:4; + u8 gi:2; + u8 elr:1; + u8 im:1; + } uhr; }; u8 rate_idx; u8 nss; @@ -1902,6 +1987,59 @@ enum ieee80211_offload_flags { IEEE80211_OFFLOAD_DECAP_ENABLED = BIT(2), }; +#define IEEE80211_NAN_AVAIL_BLOB_MAX_LEN 54 + +/** + * struct ieee80211_eml_params - EHT Operating mode notification parameters + * + * EML Operating mode notification parameters received in the Operating mode + * notification frame. This struct is used as a container to pass the info to + * the underlay driver. + * + * @link_id: the link ID where the Operating mode notification frame has been + * received. + * @control: EML control field defined in P802.11be section 9.4.1.76. + * @link_bitmap: eMLSR/eMLMR enabled links defined in P802.11be + * section 9.4.1.76. + * @emlmr_mcs_map_count: eMLMR number of valid mcs_map_bw fields according to + * P802.11be section 9.4.1.76 (valid if eMLMR mode control bit is set). + * @emlmr_mcs_map_bw: eMLMR supported MCS and NSS set subfileds defined in + * P802.11be section 9.4.1.76 (valid if eMLMR mode control bit is set). + */ +struct ieee80211_eml_params { + u8 link_id; + u8 control; + u16 link_bitmap; + u8 emlmr_mcs_map_count; + u8 emlmr_mcs_map_bw[9]; +}; + +/** + * struct ieee80211_nan_sched_cfg - NAN schedule configuration + * @channels: array of NAN channels. A channel entry is in use if + * channels[i].chanreq.oper.chan is not NULL. + * @schedule: NAN local schedule - mapping of each 16TU time slot to + * the NAN channel on which the radio will operate. NULL if unscheduled. + * @avail_blob: NAN Availability attribute blob. + * @avail_blob_len: length of the @avail_blob in bytes. + * @deferred: indicates that the driver should notify peers before applying the + * new NAN schedule, and apply the new schedule the second NAN Slot + * boundary after it notified the peers, as defined in Wi-Fi Aware (TM) 4.0 + * specification, section 5.2.2. + * The driver must call ieee80211_nan_sched_update_done() after the + * schedule has been applied. + * If a HW restart happened while a deferred schedule update was pending, + * mac80211 will reconfigure the deferred schedule (and wait for the driver + * to notify that the schedule has been applied). + */ +struct ieee80211_nan_sched_cfg { + struct ieee80211_nan_channel channels[IEEE80211_NAN_MAX_CHANNELS]; + struct ieee80211_nan_channel *schedule[CFG80211_NAN_SCHED_NUM_TIME_SLOTS]; + u8 avail_blob[IEEE80211_NAN_AVAIL_BLOB_MAX_LEN]; + u16 avail_blob_len; + bool deferred; +}; + /** * struct ieee80211_vif_cfg - interface configuration * @assoc: association status @@ -1930,6 +2068,7 @@ enum ieee80211_offload_flags { * your driver/device needs to do. * @ap_addr: AP MLD address, or BSSID for non-MLO connections * (station mode only) + * @nan_sched: NAN schedule parameters. &struct ieee80211_nan_sched_cfg */ struct ieee80211_vif_cfg { /* association related data */ @@ -1948,6 +2087,8 @@ struct ieee80211_vif_cfg { bool s1g; bool idle; u8 ap_addr[ETH_ALEN] __aligned(2); + /* Protected by the wiphy mutex */ + struct ieee80211_nan_sched_cfg nan_sched; }; #define IEEE80211_TTLM_NUM_TIDS 8 @@ -2034,6 +2175,7 @@ enum ieee80211_neg_ttlm_res { * @drv_priv: data area for driver use, will always be aligned to * sizeof(void \*). * @txq: the multicast data TX queue + * @txq_mgmt: the mgmt frame TX queue, currently only exists for NAN devices * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see * &enum ieee80211_offload_flags. */ @@ -2052,6 +2194,7 @@ struct ieee80211_vif { u8 hw_queue[IEEE80211_NUM_ACS]; struct ieee80211_txq *txq; + struct ieee80211_txq *txq_mgmt; netdev_features_t netdev_features; u32 driver_flags; @@ -2434,13 +2577,18 @@ struct ieee80211_sta_aggregates { * @he_cap: HE capabilities of this STA * @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities * @eht_cap: EHT capabilities of this STA + * @uhr_cap: UHR capabilities of this STA * @s1g_cap: S1G capabilities of this STA * @agg: per-link data for multi-link aggregation - * @bandwidth: current bandwidth the station can receive with + * @bandwidth: current bandwidth the station can receive with. + * This is the minimum between the peer's capabilities and our own + * operating channel width; Invalid for NAN since that is operating on + * multiple channels. * @rx_nss: in HT/VHT, the maximum number of spatial streams the * station can receive at the moment, changed by operating mode * notifications and capabilities. The value is only valid after - * the station moves to associated state. + * the station moves to associated state. Invalid for NAN since it + * operates on multiple configurations of rx_nss. * @txpwr: the station tx power configuration * */ @@ -2457,6 +2605,7 @@ struct ieee80211_link_sta { struct ieee80211_sta_he_cap he_cap; struct ieee80211_he_6ghz_capa he_6ghz_capa; struct ieee80211_sta_eht_cap eht_cap; + struct ieee80211_sta_uhr_cap uhr_cap; struct ieee80211_sta_s1g_cap s1g_cap; struct ieee80211_sta_aggregates agg; @@ -2520,6 +2669,9 @@ struct ieee80211_link_sta { * by the AP. * @valid_links: bitmap of valid links, or 0 for non-MLO * @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not. + * @epp_peer: indicates that the peer is an EPP peer. + * @nmi: For NDI stations, pointer to the NMI station of the peer. + * @nan_sched: NAN peer schedule for this station. Valid only for NMI stations. */ struct ieee80211_sta { u8 addr[ETH_ALEN] __aligned(2); @@ -2544,9 +2696,15 @@ struct ieee80211_sta { struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1]; u16 valid_links; + bool epp_peer; struct ieee80211_link_sta deflink; struct ieee80211_link_sta __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; + struct ieee80211_sta __rcu *nmi; + + /* should only be accessed with the wiphy mutex held */ + struct ieee80211_nan_peer_sched *nan_sched; + /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; @@ -2780,6 +2938,8 @@ struct ieee80211_txq { * station has a unique address, i.e. each station entry can be identified * by just its MAC address; this prevents, for example, the same station * from connecting to two virtual AP interfaces at the same time. + * Note that this doesn't apply for NAN, in which the peer's NMI address + * can be equal to its NDI address. * * @IEEE80211_HW_SUPPORTS_REORDERING_BUFFER: Hardware (or driver) manages the * reordering buffer internally, guaranteeing mac80211 receives frames in @@ -2869,6 +3029,9 @@ struct ieee80211_txq { * HW flag so drivers can opt in according to their own control, e.g. in * testing. * + * @IEEE80211_HW_SUPPORTS_NDP_BLOCKACK: HW can transmit/receive S1G NDP + * BlockAck frames. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2929,6 +3092,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_DISALLOW_PUNCTURING, IEEE80211_HW_HANDLES_QUIET_CSA, IEEE80211_HW_STRICT, + IEEE80211_HW_SUPPORTS_NDP_BLOCKACK, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -4442,6 +4606,12 @@ struct ieee80211_prep_tx_info { * @del_nan_func: Remove a NAN function. The driver must call * ieee80211_nan_func_terminated() with * NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST reason code upon removal. + * @nan_peer_sched_changed: Notifies the driver that the peer NAN schedule + * has changed. The new schedule is available via sta->nan_sched. + * Note that the channel_entry blob might not match the actual chandef + * since the bandwidth of the chandef is the minimum of the local and peer + * bandwidth. It is the driver responsibility to remove the peer schedule + * when the NMI station is removed. * @can_aggregate_in_amsdu: Called in order to determine if HW supports * aggregating two specific frames in the same A-MSDU. The relation * between the skbs should be symmetric and transitive. Note that while @@ -4511,6 +4681,9 @@ struct ieee80211_prep_tx_info { * interface with the specified type would be added, and thus drivers that * implement this callback need to handle such cases. The type is the full * &enum nl80211_iftype. + * @set_eml_op_mode: Configure eMLSR/eMLMR operation mode in the underlay + * driver according to the parameter received in the EML Operating mode + * notification frame. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4844,6 +5017,8 @@ struct ieee80211_ops { void (*del_nan_func)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u8 instance_id); + int (*nan_peer_sched_changed)(struct ieee80211_hw *hw, + struct ieee80211_sta *sta); bool (*can_aggregate_in_amsdu)(struct ieee80211_hw *hw, struct sk_buff *head, struct sk_buff *skb); @@ -4906,6 +5081,10 @@ struct ieee80211_ops { struct ieee80211_neg_ttlm *ttlm); void (*prep_add_interface)(struct ieee80211_hw *hw, enum nl80211_iftype type); + int (*set_eml_op_mode)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_eml_params *eml_params); }; /** @@ -6272,6 +6451,30 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, struct ieee80211_vif *vif), void *data); +struct ieee80211_vif * +__ieee80211_iterate_interfaces(struct ieee80211_hw *hw, + struct ieee80211_vif *prev, + u32 iter_flags); + +/** + * for_each_interface - iterate interfaces under wiphy mutex + * @vif: the iterator variable + * @hw: the HW to iterate for + * @flags: the iteration flags, see &enum ieee80211_interface_iteration_flags + */ +#define for_each_interface(vif, hw, flags) \ + for (vif = __ieee80211_iterate_interfaces(hw, NULL, flags); \ + vif; \ + vif = __ieee80211_iterate_interfaces(hw, vif, flags)) + +/** + * for_each_active_interface - iterate active interfaces under wiphy mutex + * @vif: the iterator variable + * @hw: the HW to iterate for + */ +#define for_each_active_interface(vif, hw) \ + for_each_interface(vif, hw, IEEE80211_IFACE_ITER_ACTIVE) + /** * ieee80211_iterate_active_interfaces_mtx - iterate active interfaces * @@ -6284,12 +6487,18 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, * @iterator: the iterator function to call, cannot sleep * @data: first argument of the iterator function */ -void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw, - u32 iter_flags, - void (*iterator)(void *data, - u8 *mac, - struct ieee80211_vif *vif), - void *data); +static inline void +ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw, + u32 iter_flags, + void (*iterator)(void *data, u8 *mac, + struct ieee80211_vif *vif), + void *data) +{ + struct ieee80211_vif *vif; + + for_each_interface(vif, hw, iter_flags | IEEE80211_IFACE_ITER_ACTIVE) + iterator(data, vif->addr, vif); +} /** * ieee80211_iterate_stations_atomic - iterate stations @@ -6308,6 +6517,20 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw, struct ieee80211_sta *sta), void *data); +struct ieee80211_sta * +__ieee80211_iterate_stations(struct ieee80211_hw *hw, + struct ieee80211_sta *prev); + +/** + * for_each_station - iterate stations under wiphy mutex + * @sta: the iterator variable + * @hw: the HW to iterate for + */ +#define for_each_station(sta, hw) \ + for (sta = __ieee80211_iterate_stations(hw, NULL); \ + sta; \ + sta = __ieee80211_iterate_stations(hw, sta)) + /** * ieee80211_iterate_stations_mtx - iterate stations * @@ -6320,10 +6543,17 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw, * @iterator: the iterator function to call * @data: first argument of the iterator function */ -void ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw, - void (*iterator)(void *data, - struct ieee80211_sta *sta), - void *data); +static inline void +ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw, + void (*iterator)(void *data, + struct ieee80211_sta *sta), + void *data) +{ + struct ieee80211_sta *sta; + + for_each_station(sta, hw) + iterator(data, sta); +} /** * ieee80211_queue_work - add work onto the mac80211 workqueue @@ -7237,6 +7467,20 @@ ieee80211_get_eht_iftype_cap_vif(const struct ieee80211_supported_band *sband, } /** + * ieee80211_get_uhr_iftype_cap_vif - return UHR capabilities for sband/vif + * @sband: the sband to search for the iftype on + * @vif: the vif to get the iftype from + * + * Return: pointer to the struct ieee80211_sta_uhr_cap, or %NULL is none found + */ +static inline const struct ieee80211_sta_uhr_cap * +ieee80211_get_uhr_iftype_cap_vif(const struct ieee80211_supported_band *sband, + struct ieee80211_vif *vif) +{ + return ieee80211_get_uhr_iftype_cap(sband, ieee80211_vif_type_p2p(vif)); +} + +/** * ieee80211_update_mu_groups - set the VHT MU-MIMO groud data * * @vif: the specified virtual interface @@ -7272,6 +7516,24 @@ void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif); int ieee80211_ave_rssi(struct ieee80211_vif *vif, int link_id); /** + * ieee80211_calculate_rx_timestamp - calculate timestamp in frame + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @status: RX status + * @mpdu_len: total MPDU length (including FCS) + * @mpdu_offset: offset into MPDU to calculate timestamp at + * + * This function calculates the RX timestamp at the given MPDU offset, taking + * into account what the RX timestamp was. An offset of 0 will just normalize + * the timestamp to TSF at beginning of MPDU reception. + * + * Returns: the calculated timestamp + */ +u64 ieee80211_calculate_rx_timestamp(struct ieee80211_hw *hw, + struct ieee80211_rx_status *status, + unsigned int mpdu_len, + unsigned int mpdu_offset); + +/** * ieee80211_report_wowlan_wakeup - report WoWLAN wakeup * @vif: virtual interface * @wakeup: wakeup reason(s) @@ -7291,7 +7553,9 @@ void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, * @band: the band to transmit on * @sta: optional pointer to get the station to send the frame to * - * Return: %true if the skb was prepared, %false otherwise + * Return: %true if the skb was prepared, %false otherwise. + * On failure, the skb is freed by this function; callers must not + * free it again. * * Note: must be called under RCU lock */ @@ -7615,6 +7879,17 @@ void ieee80211_nan_func_match(struct ieee80211_vif *vif, gfp_t gfp); /** + * ieee80211_nan_sched_update_done - notify that NAN schedule update is done + * + * This function is called by the driver to notify mac80211 that the NAN + * schedule update has been applied. + * Must be called with wiphy mutex held. May sleep. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + */ +void ieee80211_nan_sched_update_done(struct ieee80211_vif *vif); + +/** * ieee80211_calc_rx_airtime - calculate estimated transmission airtime for RX. * * This function calculates the estimated airtime usage of a frame based on the @@ -7650,19 +7925,22 @@ u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw, * ieee80211_get_fils_discovery_tmpl - Get FILS discovery template. * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: valid link_id during MLO or 0 for non-MLO. * * The driver is responsible for freeing the returned skb. * * Return: FILS discovery template. %NULL on error. */ struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + unsigned int link_id); /** * ieee80211_get_unsol_bcast_probe_resp_tmpl - Get unsolicited broadcast * probe response template. * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: valid link_id during MLO or 0 for non-MLO. * * The driver is responsible for freeing the returned skb. * @@ -7670,7 +7948,8 @@ struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw, */ struct sk_buff * ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + unsigned int link_id); /** * ieee80211_obss_color_collision_notify - notify userland about a BSS color @@ -7846,4 +8125,11 @@ int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw, * Return: %true iff the vif is a NAN interface and NAN is started */ bool ieee80211_vif_nan_started(struct ieee80211_vif *vif); + +/** + * ieee80211_encrypt_tx_skb - Encrypt the transmit skb + * @skb: the skb + * Return: 0 if success and non-zero on error + */ +int ieee80211_encrypt_tx_skb(struct sk_buff *skb); #endif /* MAC80211_H */ diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index eaa27483f99b..6d836060976a 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -35,6 +35,8 @@ enum gdma_request_type { GDMA_CREATE_MR = 31, GDMA_DESTROY_MR = 32, GDMA_QUERY_HWC_TIMEOUT = 84, /* 0x54 */ + GDMA_ALLOC_DM = 96, /* 0x60 */ + GDMA_DESTROY_DM = 97, /* 0x61 */ }; #define GDMA_RESOURCE_DOORBELL_PAGE 27 @@ -213,6 +215,12 @@ enum gdma_page_type { #define GDMA_INVALID_DMA_REGION 0 +struct mana_serv_work { + struct work_struct serv_work; + struct pci_dev *pdev; + enum gdma_eqe_type type; +}; + struct gdma_mem_info { struct device *dev; @@ -384,6 +392,7 @@ struct gdma_irq_context { enum gdma_context_flags { GC_PROBE_SUCCEEDED = 0, + GC_IN_SERVICE = 1, }; struct gdma_context { @@ -409,14 +418,15 @@ struct gdma_context { u32 test_event_eq_id; bool is_pf; - bool in_service; phys_addr_t bar0_pa; void __iomem *bar0_va; + resource_size_t bar0_size; void __iomem *shm_base; void __iomem *db_page_base; phys_addr_t phys_db_page_base; - u32 db_page_size; + u64 db_page_off; + u64 db_page_size; int numa_node; /* Shared memory chanenl (used to bootstrap HWC) */ @@ -471,6 +481,8 @@ int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe); void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit); +int mana_schedule_serv_work(struct gdma_context *gc, enum gdma_eqe_type type); + struct gdma_wqe { u32 reserved :24; u32 last_vbytes :8; @@ -598,6 +610,10 @@ enum { /* Driver can self reset on FPGA Reconfig EQE notification */ #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) + +/* Driver detects stalled send queues and recovers them */ +#define GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY BIT(18) + #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6) /* Driver supports linearizing the skb when num_sge exceeds hardware limit */ @@ -609,6 +625,9 @@ enum { /* Driver can handle hardware recovery events during probe */ #define GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY BIT(22) +/* Driver supports self recovery on Hardware Channel timeouts */ +#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECOVERY BIT(25) + #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ @@ -621,7 +640,9 @@ enum { GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \ GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \ GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE | \ - GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY) + GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY | \ + GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY | \ + GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECOVERY) #define GDMA_DRV_CAP_FLAGS2 0 @@ -771,6 +792,7 @@ enum gdma_mr_access_flags { GDMA_ACCESS_FLAG_REMOTE_READ = BIT_ULL(2), GDMA_ACCESS_FLAG_REMOTE_WRITE = BIT_ULL(3), GDMA_ACCESS_FLAG_REMOTE_ATOMIC = BIT_ULL(4), + GDMA_ACCESS_FLAG_BIND_MW = BIT_ULL(5), }; /* GDMA_CREATE_DMA_REGION */ @@ -861,6 +883,12 @@ enum gdma_mr_type { GDMA_MR_TYPE_GVA = 2, /* Guest zero-based address MRs */ GDMA_MR_TYPE_ZBVA = 4, + /* Device address MRs */ + GDMA_MR_TYPE_DM = 5, + /* Memory Window type 1 */ + GDMA_MR_TYPE_MW1 = 6, + /* Memory Window type 2 */ + GDMA_MR_TYPE_MW2 = 7, }; struct gdma_create_mr_params { @@ -876,6 +904,12 @@ struct gdma_create_mr_params { u64 dma_region_handle; enum gdma_mr_access_flags access_flags; } zbva; + struct { + u64 dm_handle; + u64 offset; + u64 length; + enum gdma_mr_access_flags access_flags; + } da; }; }; @@ -890,13 +924,23 @@ struct gdma_create_mr_request { u64 dma_region_handle; u64 virtual_address; enum gdma_mr_access_flags access_flags; - } gva; + } __packed gva; struct { u64 dma_region_handle; enum gdma_mr_access_flags access_flags; - } zbva; - }; + } __packed zbva; + struct { + u64 dm_handle; + u64 offset; + enum gdma_mr_access_flags access_flags; + } __packed da; + } __packed; u32 reserved_2; + union { + struct { + u64 length; + } da_ext; + }; };/* HW DATA */ struct gdma_create_mr_response { @@ -915,6 +959,27 @@ struct gdma_destroy_mr_response { struct gdma_resp_hdr hdr; };/* HW DATA */ +struct gdma_alloc_dm_req { + struct gdma_req_hdr hdr; + u64 length; + u32 alignment; + u32 flags; +}; /* HW Data */ + +struct gdma_alloc_dm_resp { + struct gdma_resp_hdr hdr; + u64 dm_handle; +}; /* HW Data */ + +struct gdma_destroy_dm_req { + struct gdma_req_hdr hdr; + u64 dm_handle; +}; /* HW Data */ + +struct gdma_destroy_dm_resp { + struct gdma_resp_hdr hdr; +}; /* HW Data */ + int mana_gd_verify_vf_version(struct pci_dev *pdev); int mana_gd_register_device(struct gdma_dev *gd); diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index d7e089c6b694..8f721cd4e4a7 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -61,8 +61,11 @@ enum TRI_STATE { #define MAX_PORTS_IN_MANA_DEV 256 +/* Maximum number of packets per coalesced CQE */ +#define MANA_RXCOMP_OOB_NUM_PPI 4 + /* Update this count whenever the respective structures are changed */ -#define MANA_STATS_RX_COUNT 5 +#define MANA_STATS_RX_COUNT (6 + MANA_RXCOMP_OOB_NUM_PPI - 1) #define MANA_STATS_TX_COUNT 11 #define MANA_RX_FRAG_ALIGNMENT 64 @@ -73,6 +76,8 @@ struct mana_stats_rx { u64 xdp_drop; u64 xdp_tx; u64 xdp_redirect; + u64 pkt_len0_err; + u64 coalesced_cqe[MANA_RXCOMP_OOB_NUM_PPI - 1]; struct u64_stats_sync syncp; }; @@ -227,8 +232,6 @@ struct mana_rxcomp_perpkt_info { u32 pkt_hash; }; /* HW DATA */ -#define MANA_RXCOMP_OOB_NUM_PPI 4 - /* Receive completion OOB */ struct mana_rxcomp_oob { struct mana_cqe_header cqe_hdr; @@ -378,7 +381,6 @@ struct mana_ethtool_stats { u64 tx_cqe_err; u64 tx_cqe_unknown_type; u64 tx_linear_pkt_cnt; - u64 rx_coalesced_err; u64 rx_cqe_unknown_type; }; @@ -480,7 +482,7 @@ struct mana_context { struct mana_ethtool_hc_stats hc_stats; struct mana_eq *eqs; struct dentry *mana_eqs_debugfs; - + struct workqueue_struct *per_port_queue_reset_wq; /* Workqueue for querying hardware stats */ struct delayed_work gf_stats_work; bool hwc_timeout_occurred; @@ -495,6 +497,7 @@ struct mana_context { struct mana_port_context { struct mana_context *ac; struct net_device *ndev; + struct work_struct queue_reset_work; u8 mac_addr[ETH_ALEN]; @@ -556,6 +559,9 @@ struct mana_port_context { bool port_is_up; bool port_st_save; /* Saved port state */ + u8 cqe_coalescing_enable; + u32 cqe_coalescing_timeout_ns; + struct mana_ethtool_stats eth_stats; struct mana_ethtool_phy_stats phy_stats; @@ -567,6 +573,7 @@ struct mana_port_context { netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx, bool update_hash, bool update_tab); +int mana_disable_vport_rx(struct mana_port_context *apc); int mana_alloc_queues(struct net_device *ndev); int mana_attach(struct net_device *ndev); @@ -901,6 +908,10 @@ struct mana_cfg_rx_steer_req_v2 { struct mana_cfg_rx_steer_resp { struct gdma_resp_hdr hdr; + + /* V2 */ + u32 cqe_coalescing_timeout_ns; + u32 reserved1; }; /* HW DATA */ /* Register HW vPort */ @@ -997,6 +1008,7 @@ struct mana_deregister_filter_resp { #define STATISTICS_FLAGS_TX_ERRORS_GDMA_ERROR 0x0000000004000000 #define MANA_MAX_NUM_QUEUES 64 +#define MANA_DEF_NUM_QUEUES 16 #define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1) diff --git a/include/net/mctp.h b/include/net/mctp.h index c3207ce98f07..d8bf9074110d 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -26,6 +26,9 @@ struct mctp_hdr { #define MCTP_VER_MIN 1 #define MCTP_VER_MAX 1 +/* Definitions for ver field */ +#define MCTP_HDR_VER_MASK GENMASK(3, 0) + /* Definitions for flags_seq_tag field */ #define MCTP_HDR_FLAG_SOM BIT(7) #define MCTP_HDR_FLAG_EOM BIT(6) @@ -270,6 +273,7 @@ struct mctp_dst { struct mctp_dev *dev; unsigned int mtu; mctp_eid_t nexthop; + mctp_eid_t saddr; /* set for direct addressing */ unsigned char halen; diff --git a/include/net/ndisc.h b/include/net/ndisc.h index d38783a2ce57..3da1a6f8d3f9 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -2,8 +2,6 @@ #ifndef _NDISC_H #define _NDISC_H -#include <net/ipv6_stubs.h> - /* * ICMP codes for neighbour discovery messages */ @@ -359,14 +357,6 @@ static inline struct neighbour *__ipv6_neigh_lookup_noref(struct net_device *dev return ___neigh_lookup_noref(&nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev); } -static inline -struct neighbour *__ipv6_neigh_lookup_noref_stub(struct net_device *dev, - const void *pkey) -{ - return ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, - ndisc_hashfn, pkey, dev); -} - static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, const void *pkey) { struct neighbour *n; @@ -391,28 +381,20 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, rcu_read_unlock(); } -static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, - const void *pkey) -{ - struct neighbour *n; - - rcu_read_lock(); - n = __ipv6_neigh_lookup_noref_stub(dev, pkey); - neigh_confirm(n); - rcu_read_unlock(); -} - -/* uses ipv6_stub and is meant for use outside of IPv6 core */ static inline struct neighbour *ip_neigh_gw6(struct net_device *dev, const void *addr) { +#if IS_ENABLED(CONFIG_IPV6) struct neighbour *neigh; - neigh = __ipv6_neigh_lookup_noref_stub(dev, addr); + neigh = __ipv6_neigh_lookup_noref(dev, addr); if (unlikely(!neigh)) - neigh = __neigh_create(ipv6_stub->nd_tbl, addr, dev, false); + neigh = __neigh_create(&nd_tbl, addr, dev, false); return neigh; +#else + return ERR_PTR(-EAFNOSUPPORT); +#endif } int ndisc_init(void); @@ -434,6 +416,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); + void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index cb664f6e3558..80de5e98a66d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -37,6 +37,7 @@ #include <net/netns/smc.h> #include <net/netns/bpf.h> #include <net/netns/mctp.h> +#include <net/netns/vsock.h> #include <net/net_trackers.h> #include <linux/ns_common.h> #include <linux/idr.h> @@ -120,6 +121,7 @@ struct net { * it is critical that it is on a read_mostly cache line. */ u32 hash_mix; + bool is_dying; struct net_device *loopback_dev; /* The loopback */ @@ -196,6 +198,9 @@ struct net { /* Move to a better place when the config guard is removed. */ struct mutex rtnl_mutex; #endif +#if IS_ENABLED(CONFIG_VSOCKETS) + struct netns_vsock vsock; +#endif } __randomize_layout; #include <linux/seq_file_net.h> @@ -259,14 +264,14 @@ void ipx_unregister_sysctl(void); #define ipx_unregister_sysctl() #endif -#ifdef CONFIG_NET_NS -void __put_net(struct net *net); - static inline struct net *to_net_ns(struct ns_common *ns) { return container_of(ns, struct net, ns); } +#ifdef CONFIG_NET_NS +void __put_net(struct net *net); + /* Try using get_net_track() instead */ static inline struct net *get_net(struct net *net) { @@ -304,7 +309,7 @@ static inline int check_net(const struct net *net) return ns_ref_read(net) != 0; } -void net_drop_ns(void *); +void net_drop_ns(struct ns_common *); void net_passive_dec(struct net *net); #else diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index cd00e0406cf4..70c9fe9e83cc 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -14,6 +14,10 @@ struct netdev_config { u8 hds_config; }; +struct netdev_queue_config { + u32 rx_page_size; +}; + /* See the netdev.yaml spec for definition of each statistic */ struct netdev_queue_stats_rx { u64 bytes; @@ -111,6 +115,11 @@ void netdev_stat_queue_sum(struct net_device *netdev, int tx_start, int tx_end, struct netdev_queue_stats_tx *tx_sum); +enum { + /* The queue checks and honours the page size qcfg parameter */ + QCFG_RX_PAGE_SIZE = 0x1, +}; + /** * struct netdev_queue_mgmt_ops - netdev ops for queue management * @@ -130,28 +139,60 @@ void netdev_stat_queue_sum(struct net_device *netdev, * @ndo_queue_get_dma_dev: Get dma device for zero-copy operations to be used * for this queue. Return NULL on error. * + * @ndo_default_qcfg: (Optional) Populate queue config struct with defaults. + * Queue config structs are passed to this helper before + * the user-requested settings are applied. + * + * @ndo_validate_qcfg: (Optional) Check if queue config is supported. + * Called when configuration affecting a queue may be + * changing, either due to NIC-wide config, or config + * scoped to the queue at a specified index. + * When NIC-wide config is changed the callback will + * be invoked for all queues. + * + * @ndo_queue_create: Create a new RX queue on a virtual device that will + * be paired with a physical device's queue via leasing. + * Return the new queue id on success, negative error + * on failure. + * + * @supported_params: Bitmask of supported parameters, see QCFG_*. + * * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only * be called for an interface which is open. */ struct netdev_queue_mgmt_ops { - size_t ndo_queue_mem_size; - int (*ndo_queue_mem_alloc)(struct net_device *dev, - void *per_queue_mem, - int idx); - void (*ndo_queue_mem_free)(struct net_device *dev, - void *per_queue_mem); - int (*ndo_queue_start)(struct net_device *dev, - void *per_queue_mem, - int idx); - int (*ndo_queue_stop)(struct net_device *dev, - void *per_queue_mem, - int idx); - struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev, - int idx); + size_t ndo_queue_mem_size; + int (*ndo_queue_mem_alloc)(struct net_device *dev, + struct netdev_queue_config *qcfg, + void *per_queue_mem, + int idx); + void (*ndo_queue_mem_free)(struct net_device *dev, + void *per_queue_mem); + int (*ndo_queue_start)(struct net_device *dev, + struct netdev_queue_config *qcfg, + void *per_queue_mem, + int idx); + int (*ndo_queue_stop)(struct net_device *dev, + void *per_queue_mem, + int idx); + void (*ndo_default_qcfg)(struct net_device *dev, + struct netdev_queue_config *qcfg); + int (*ndo_validate_qcfg)(struct net_device *dev, + struct netdev_queue_config *qcfg, + struct netlink_ext_ack *extack); + struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev, + int idx); + int (*ndo_queue_create)(struct net_device *dev, + struct netlink_ext_ack *extack); + + unsigned int supported_params; }; -bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx); +void netdev_queue_config(struct net_device *dev, int rxq, + struct netdev_queue_config *qcfg); + +bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx); /** * DOC: Lockless queue stopping / waking helpers. @@ -310,6 +351,17 @@ static inline void netif_subqueue_sent(const struct net_device *dev, netdev_tx_sent_queue(txq, bytes); } +static inline unsigned int netif_xmit_timeout_ms(struct netdev_queue *txq) +{ + unsigned long trans_start = READ_ONCE(txq->trans_start); + + if (netif_xmit_stopped(txq) && + time_after(jiffies, trans_start + txq->dev->watchdog_timeo)) + return jiffies_to_msecs(jiffies - trans_start); + + return 0; +} + #define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ ({ \ struct netdev_queue *_txq; \ @@ -328,6 +380,14 @@ static inline void netif_subqueue_sent(const struct net_device *dev, get_desc, start_thrs); \ }) -struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx); - -#endif +struct device *netdev_queue_get_dma_dev(struct net_device *dev, + unsigned int idx, + enum netdev_queue_type type); +bool netdev_can_create_queue(const struct net_device *dev, + struct netlink_ext_ack *extack); +bool netdev_can_lease_queue(const struct net_device *dev, + struct netlink_ext_ack *extack); +bool netdev_queue_busy(struct net_device *dev, unsigned int idx, + enum netdev_queue_type type, + struct netlink_ext_ack *extack); +#endif /* _LINUX_NET_QUEUES_H */ diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 8cdcd138b33f..9415a94d333d 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -7,13 +7,15 @@ #include <linux/sysfs.h> #include <net/xdp.h> #include <net/page_pool/types.h> +#include <net/netdev_queues.h> +#include <net/rps-types.h> /* This structure contains an instance of an RX queue. */ struct netdev_rx_queue { struct xdp_rxq_info xdp_rxq; #ifdef CONFIG_RPS struct rps_map __rcu *rps_map; - struct rps_dev_flow_table __rcu *rps_flow_table; + rps_tag_ptr rps_flow_table; #endif struct kobject kobj; const struct attribute_group **groups; @@ -27,7 +29,16 @@ struct netdev_rx_queue { struct xsk_buff_pool *pool; #endif struct napi_struct *napi; + struct netdev_queue_config qcfg; struct pp_memory_provider_params mp_params; + + /* If a queue is leased, then the lease pointer is always + * valid. From the physical device it points to the virtual + * queue, and from the virtual device it points to the + * physical queue. + */ + struct netdev_rx_queue *lease; + netdevice_tracker lease_tracker; } ____cacheline_aligned_in_smp; /* @@ -56,6 +67,18 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue) return index; } -int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq); +enum netif_lease_dir { + NETIF_VIRT_TO_PHYS, + NETIF_PHYS_TO_VIRT, +}; -#endif +struct netdev_rx_queue * +__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq, + enum netif_lease_dir dir); + +int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq); +void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst, + struct netdev_rx_queue *rxq_src); +void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst, + struct netdev_rx_queue *rxq_src); +#endif /* _LINUX_NETDEV_RX_QUEUE_H */ diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 8d65ffbf57de..b39417ad955e 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -16,9 +16,6 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_SCTP extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite; -#endif #ifdef CONFIG_NF_CT_PROTO_GRE extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre; #endif diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index aa0a7c82199e..bc42dd0e10e6 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -16,6 +16,7 @@ #include <linux/bitops.h> #include <linux/compiler.h> +#include <net/netns/generic.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_tcp.h> #include <linux/netfilter/nf_conntrack_sctp.h> diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3384859a8921..8883575adcc1 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock); extern spinlock_t nf_conntrack_expect_lock; +static inline void lockdep_nfct_expect_lock_held(void) +{ + lockdep_assert_held(&nf_conntrack_expect_lock); +} + /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index 52a06de41aa0..cf0166520cf3 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -13,6 +13,7 @@ struct nf_conncount_list { u32 last_gc; /* jiffies at most recent gc */ struct list_head head; /* connections with the same filtering key */ unsigned int count; /* length of list */ + unsigned int last_gc_count; /* length of list at most recent gc */ }; struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 165e7a03b8e9..e9a8350e7ccf 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -22,10 +22,16 @@ struct nf_conntrack_expect { /* Hash member */ struct hlist_node hnode; + /* Network namespace */ + possible_net_t net; + /* We expect this tuple, with the following mask */ struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_mask mask; +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone zone; +#endif /* Usage count. */ refcount_t use; @@ -40,7 +46,7 @@ struct nf_conntrack_expect { struct nf_conntrack_expect *this); /* Helper to assign to new connection */ - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper __rcu *helper; /* The conntrack of the master connection */ struct nf_conn *master; @@ -62,7 +68,17 @@ struct nf_conntrack_expect { static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) { - return nf_ct_net(exp->master); + return read_pnet(&exp->net); +} + +static inline bool nf_ct_exp_zone_equal_any(const struct nf_conntrack_expect *a, + const struct nf_conntrack_zone *b) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + return a->zone.id == b->id; +#else + return true; +#endif } #define NF_CT_EXP_POLICY_NAME_LEN 16 diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index cd5020835a6d..fde2427ceb8f 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -107,11 +107,6 @@ int nf_conntrack_udp_packet(struct nf_conn *ct, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state); -int nf_conntrack_udplite_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state); int nf_conntrack_tcp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, @@ -139,8 +134,6 @@ void nf_conntrack_icmpv6_init_net(struct net *net); /* Existing built-in generic protocol */ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; -#define MAX_NF_CT_PROTO IPPROTO_UDPLITE - const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto); /* Generic netlink helpers */ diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 9fdaba911de6..3a66d4abb6d6 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -14,6 +14,7 @@ struct nf_ct_timeout { __u16 l3num; const struct nf_conntrack_l4proto *l4proto; + struct rcu_head rcu; char data[]; }; diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index f7dd950ff250..4d55b7325707 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -11,7 +11,7 @@ #ifndef _NF_CONNTRACK_TUPLE_H #define _NF_CONNTRACK_TUPLE_H -#include <linux/netfilter/x_tables.h> +#include <linux/netfilter.h> #include <linux/netfilter/nf_conntrack_tuple_common.h> #include <linux/list_nulls.h> diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 4aeffddb7586..d17035d14d96 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -6,11 +6,13 @@ #include <linux/ipv6.h> #include <linux/jhash.h> #include <linux/netfilter.h> +#include <linux/rhashtable-types.h> #include <linux/skbuff.h> /* Each queued (to userspace) skbuff has one of these. */ struct nf_queue_entry { struct list_head list; + struct rhash_head hash_node; struct sk_buff *skb; unsigned int id; unsigned int hook_index; /* index in hook_entries->hook[] */ @@ -19,6 +21,7 @@ struct nf_queue_entry { struct net_device *physout; #endif struct nf_hook_state state; + bool nf_ct_is_unconfirmed; u16 size; /* sizeof(entry) + saved route keys */ /* extra space to store route keys */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0e266c2d0e7f..2c0173d9309c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -6,7 +6,6 @@ #include <linux/list.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> -#include <linux/netfilter/x_tables.h> #include <linux/netfilter/nf_tables.h> #include <linux/u64_stats_sync.h> #include <linux/rhashtable.h> @@ -32,7 +31,9 @@ struct nft_pktinfo { const struct nf_hook_state *state; u8 flags; u8 tprot; + __be16 ethertype; u16 fragoff; + u16 nhoff; u16 thoff; u16 inneroff; }; @@ -84,6 +85,8 @@ static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt) { pkt->flags = 0; pkt->tprot = 0; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = 0; pkt->thoff = 0; pkt->fragoff = 0; } @@ -123,17 +126,6 @@ struct nft_regs { }; }; -struct nft_regs_track { - struct { - const struct nft_expr *selector; - const struct nft_expr *bitwise; - u8 num_reg; - } regs[NFT_REG32_NUM]; - - const struct nft_expr *cur; - const struct nft_expr *last; -}; - /* Store/load an u8, u16 or u64 integer to/from the u32 data register. * * Note, when using concatenations, register allocation happens at 32-bit @@ -317,11 +309,13 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) * @NFT_ITER_UNSPEC: unspecified, to catch errors * @NFT_ITER_READ: read-only iteration over set elements * @NFT_ITER_UPDATE: iteration under mutex to update set element state + * @NFT_ITER_UPDATE_CLONE: clone set before iteration under mutex to update element */ enum nft_iter_type { NFT_ITER_UNSPEC, NFT_ITER_READ, NFT_ITER_UPDATE, + NFT_ITER_UPDATE_CLONE, }; struct nft_set; @@ -424,8 +418,6 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr, bool reset); -bool nft_expr_reduce_bitwise(struct nft_regs_track *track, - const struct nft_expr *expr); struct nft_set_ext; @@ -452,6 +444,7 @@ struct nft_set_ext; * @init: initialize private data of new set instance * @destroy: destroy private data of set instance * @gc_init: initialize garbage collection + * @abort_skip_removal: skip removal of elements from abort path * @elemsize: element private size * * Operations lookup, update and delete have simpler interfaces, are faster @@ -509,6 +502,7 @@ struct nft_set_ops { const struct nft_set *set); void (*gc_init)(const struct nft_set *set); + bool abort_skip_removal; unsigned int elemsize; }; @@ -871,6 +865,8 @@ struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, u64 timeout, u64 expiration, gfp_t gfp); int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[]); +void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, + struct nft_set_elem_expr *elem_expr); void nft_set_elem_destroy(const struct nft_set *set, const struct nft_elem_priv *elem_priv, bool destroy_expr); @@ -936,7 +932,6 @@ struct nft_offload_ctx; * @destroy_clone: destruction clone function * @dump: function to dump parameters * @validate: validate expression, called during loop detection - * @reduce: reduce expression * @gc: garbage collection expression * @offload: hardware offload expression * @offload_action: function to report true/false to allocate one slot or not in the flow @@ -970,8 +965,6 @@ struct nft_expr_ops { bool reset); int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr); - bool (*reduce)(struct nft_regs_track *track, - const struct nft_expr *expr); bool (*gc)(struct net *net, const struct nft_expr *expr); int (*offload)(struct nft_offload_ctx *ctx, @@ -1856,6 +1849,11 @@ struct nft_trans_gc { struct rcu_head rcu; }; +static inline int nft_trans_gc_space(const struct nft_trans_gc *trans) +{ + return NFT_TRANS_GC_BATCHCOUNT - trans->count; +} + static inline void nft_ctx_update(struct nft_ctx *ctx, const struct nft_trans *trans) { @@ -1949,20 +1947,4 @@ static inline u64 nft_net_tstamp(const struct net *net) return nft_pernet(net)->tstamp; } -#define __NFT_REDUCE_READONLY 1UL -#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY - -void nft_reg_track_update(struct nft_regs_track *track, - const struct nft_expr *expr, u8 dreg, u8 len); -void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len); -void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg); - -static inline bool nft_reg_track_cmp(struct nft_regs_track *track, - const struct nft_expr *expr, u8 dreg) -{ - return track->regs[dreg].selector && - track->regs[dreg].selector->ops == expr->ops && - track->regs[dreg].num_reg == 0; -} - #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index fcf967286e37..e715405a73cb 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -12,16 +12,19 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt) ip = ip_hdr(pkt->skb); pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = ip->protocol; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = 0; pkt->thoff = ip_hdrlen(pkt->skb); pkt->fragoff = ntohs(ip->frag_off) & IP_OFFSET; } -static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) +static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + int nhoff) { struct iphdr *iph, _iph; u32 len, thoff, skb_len; - iph = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb), + iph = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb) + nhoff, sizeof(*iph), &_iph); if (!iph) return -1; @@ -31,7 +34,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) len = iph_totlen(pkt->skb, iph); thoff = iph->ihl * 4; - skb_len = pkt->skb->len - skb_network_offset(pkt->skb); + skb_len = pkt->skb->len - skb_network_offset(pkt->skb) - nhoff; if (skb_len < len) return -1; @@ -42,7 +45,9 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = iph->protocol; - pkt->thoff = skb_network_offset(pkt->skb) + thoff; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = nhoff; + pkt->thoff = skb_network_offset(pkt->skb) + nhoff + thoff; pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; return 0; @@ -50,7 +55,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) { - if (__nft_set_pktinfo_ipv4_validate(pkt) < 0) + if (__nft_set_pktinfo_ipv4_validate(pkt, 0) < 0) nft_set_pktinfo_unspec(pkt); } @@ -78,6 +83,8 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt) } pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = 0; pkt->tprot = iph->protocol; pkt->thoff = thoff; pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index a0633eeaec97..d7b8c559b795 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -20,21 +20,23 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt) pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = 0; pkt->thoff = thoff; pkt->fragoff = frag_off; } -static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) +static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, int nhoff) { #if IS_ENABLED(CONFIG_IPV6) unsigned int flags = IP6_FH_F_AUTH; struct ipv6hdr *ip6h, _ip6h; - unsigned int thoff = 0; + unsigned int thoff = nhoff; unsigned short frag_off; u32 pkt_len, skb_len; int protohdr; - ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb), + ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb) + nhoff, sizeof(*ip6h), &_ip6h); if (!ip6h) return -1; @@ -42,8 +44,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) if (ip6h->version != 6) return -1; - pkt_len = ntohs(ip6h->payload_len); - skb_len = pkt->skb->len - skb_network_offset(pkt->skb); + pkt_len = ipv6_payload_len(pkt->skb, ip6h); + skb_len = pkt->skb->len - skb_network_offset(pkt->skb) - nhoff; if (pkt_len + sizeof(*ip6h) > skb_len) return -1; @@ -53,6 +55,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = nhoff; pkt->thoff = thoff; pkt->fragoff = frag_off; @@ -64,7 +68,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) { - if (__nft_set_pktinfo_ipv6_validate(pkt) < 0) + if (__nft_set_pktinfo_ipv6_validate(pkt, 0) < 0) nft_set_pktinfo_unspec(pkt); } @@ -86,7 +90,7 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) if (ip6h->version != 6) goto inhdr_error; - pkt_len = ntohs(ip6h->payload_len); + pkt_len = ipv6_payload_len(pkt->skb, ip6h); if (pkt_len + sizeof(*ip6h) > pkt->skb->len) { idev = __in6_dev_get(nft_in(pkt)); __IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INTRUNCATEDPKTS); @@ -99,6 +103,8 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = 0; pkt->thoff = thoff; pkt->fragoff = frag_off; diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index 3568b6a2f5f0..14c427891ee6 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -67,6 +67,16 @@ struct nft_flow_rule { struct flow_rule *rule; }; +static inline struct flow_action_entry * +nft_flow_action_entry_next(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow) +{ + if (unlikely(ctx->num_actions >= flow->rule->action.num_entries)) + return NULL; + + return &flow->rule->action.entries[ctx->num_actions++]; +} + void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, enum flow_dissector_key_id addr_type); diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 7370fba844ef..e0422456f27b 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -66,6 +66,4 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, void nft_fib_store_result(void *reg, const struct nft_fib *priv, const struct net_device *dev); -bool nft_fib_reduce(struct nft_regs_track *track, - const struct nft_expr *expr); #endif diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index d602263590fe..f74e63290603 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -43,9 +43,6 @@ void nft_meta_set_destroy(const struct nft_ctx *ctx, int nft_meta_set_validate(const struct nft_ctx *ctx, const struct nft_expr *expr); -bool nft_meta_get_reduce(struct nft_regs_track *track, - const struct nft_expr *expr); - struct nft_inner_tun_ctx; void nft_meta_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt, diff --git a/include/net/netlink.h b/include/net/netlink.h index 1a8356ca4b78..546d10586576 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -2265,6 +2265,25 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) } /** + * nla_nest_end_safe - Validate and finalize nesting of attributes + * @skb: socket buffer the attributes are stored in + * @start: container attribute + * + * Corrects the container attribute header to include all appended + * attributes. + * + * Returns: the total data length of the skb, or -EMSGSIZE if the + * nested attribute length exceeds U16_MAX. + */ +static inline int nla_nest_end_safe(struct sk_buff *skb, struct nlattr *start) +{ + if (skb_tail_pointer(skb) - (unsigned char *)start > U16_MAX) + return -EMSGSIZE; + + return nla_nest_end(skb, start); +} + +/** * nla_nest_cancel - Cancel nesting of attributes * @skb: socket buffer the message is stored in * @start: container attribute diff --git a/include/net/netmem.h b/include/net/netmem.h index 9e10f4ac50c3..507b74c9f52d 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -93,27 +93,21 @@ enum net_iov_type { * supported. */ struct net_iov { - union { - struct netmem_desc desc; - - /* XXX: The following part should be removed once all - * the references to them are converted so as to be - * accessed via netmem_desc e.g. niov->desc.pp instead - * of niov->pp. - */ - struct { - unsigned long _flags; - unsigned long pp_magic; - struct page_pool *pp; - unsigned long _pp_mapping_pad; - unsigned long dma_addr; - atomic_long_t pp_ref_count; - }; - }; - struct net_iov_area *owner; + struct netmem_desc desc; + unsigned int page_type; enum net_iov_type type; + struct net_iov_area *owner; }; +/* Make sure 'the offset of page_type in struct page == the offset of + * type in struct net_iov'. + */ +#define NET_IOV_ASSERT_OFFSET(pg, iov) \ + static_assert(offsetof(struct page, pg) == \ + offsetof(struct net_iov, iov)) +NET_IOV_ASSERT_OFFSET(page_type, page_type); +#undef NET_IOV_ASSERT_OFFSET + struct net_iov_area { /* Array of net_iovs for this area. */ struct net_iov *niovs; @@ -123,26 +117,6 @@ struct net_iov_area { unsigned long base_virtual; }; -/* net_iov is union'ed with struct netmem_desc mirroring struct page, so - * the page_pool can access these fields without worrying whether the - * underlying fields are accessed via netmem_desc or directly via - * net_iov, until all the references to them are converted so as to be - * accessed via netmem_desc e.g. niov->desc.pp instead of niov->pp. - * - * The non-net stack fields of struct page are private to the mm stack - * and must never be mirrored to net_iov. - */ -#define NET_IOV_ASSERT_OFFSET(desc, iov) \ - static_assert(offsetof(struct netmem_desc, desc) == \ - offsetof(struct net_iov, iov)) -NET_IOV_ASSERT_OFFSET(_flags, _flags); -NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic); -NET_IOV_ASSERT_OFFSET(pp, pp); -NET_IOV_ASSERT_OFFSET(_pp_mapping_pad, _pp_mapping_pad); -NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr); -NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count); -#undef NET_IOV_ASSERT_OFFSET - static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov) { return niov->owner; @@ -256,7 +230,7 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem) */ #define pp_page_to_nmdesc(p) \ ({ \ - DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \ + DEBUG_NET_WARN_ON_ONCE(!PageNetpp(p)); \ __pp_page_to_nmdesc(p); \ }) @@ -389,8 +363,36 @@ static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) return netmem_to_nmdesc(netmem)->dma_addr; } -void get_netmem(netmem_ref netmem); -void put_netmem(netmem_ref netmem); +#if defined(CONFIG_NET_DEVMEM) +static inline bool net_is_devmem_iov(const struct net_iov *niov) +{ + return niov->type == NET_IOV_DMABUF; +} +#else +static inline bool net_is_devmem_iov(const struct net_iov *niov) +{ + return false; +} +#endif + +void __get_netmem(netmem_ref netmem); +void __put_netmem(netmem_ref netmem); + +static __always_inline void get_netmem(netmem_ref netmem) +{ + if (netmem_is_net_iov(netmem)) + __get_netmem(netmem); + else + get_page(netmem_to_page(netmem)); +} + +static __always_inline void put_netmem(netmem_ref netmem) +{ + if (netmem_is_net_iov(netmem)) + __put_netmem(netmem); + else + put_page(netmem_to_page(netmem)); +} #define netmem_dma_unmap_addr_set(NETMEM, PTR, ADDR_NAME, VAL) \ do { \ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2dbd46fc4734..80ccd4dda8e0 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -74,6 +74,7 @@ struct netns_ipv4 { /* TXRX readonly hotpath cache lines */ __cacheline_group_begin(netns_ipv4_read_txrx); + u8 sysctl_tcp_shrink_window; __cacheline_group_end(netns_ipv4_read_txrx); /* RX readonly hotpath cache line */ @@ -88,6 +89,12 @@ struct netns_ipv4 { int sysctl_tcp_rcvbuf_low_rtt; __cacheline_group_end(netns_ipv4_read_rx); + /* ICMP rate limiter hot cache line. */ + __cacheline_group_begin_aligned(icmp); + atomic_t icmp_global_credit; + u32 icmp_global_stamp; + __cacheline_group_end_aligned(icmp); + struct inet_timewait_death_row tcp_death_row; struct udp_table *udp_table; @@ -116,7 +123,6 @@ struct netns_ipv4 { #endif bool fib_has_custom_local_routes; bool fib_offload_disabled; - u8 sysctl_tcp_shrink_window; #ifdef CONFIG_IP_ROUTE_CLASSID atomic_t fib_num_tclassid_users; #endif @@ -141,8 +147,7 @@ struct netns_ipv4 { int sysctl_icmp_ratemask; int sysctl_icmp_msgs_per_sec; int sysctl_icmp_msgs_burst; - atomic_t icmp_global_credit; - u32 icmp_global_stamp; + u32 ip_rt_min_pmtu; int ip_rt_mtu_expires; int ip_rt_min_advmss; @@ -161,6 +166,7 @@ struct netns_ipv4 { u8 sysctl_ip_autobind_reuse; /* Shall we try to damage output packets if routing dev changes? */ u8 sysctl_ip_dynaddr; + u32 sysctl_ip_local_port_step_width; #ifdef CONFIG_NET_L3_MASTER_DEV u8 sysctl_raw_l3mdev_accept; #endif @@ -274,6 +280,9 @@ struct netns_ipv4 { struct list_head mr_tables; struct fib_rules_ops *mr_rules_ops; #endif + struct fib_notifier_ops *ipmr_notifier_ops; + atomic_t ipmr_seq; + struct mutex mfc_mutex; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed; @@ -285,9 +294,6 @@ struct netns_ipv4 { struct fib_notifier_ops *notifier_ops; unsigned int fib_seq; /* writes protected by rtnl_mutex */ - struct fib_notifier_ops *ipmr_notifier_ops; - unsigned int ipmr_seq; /* protected by rtnl_mutex */ - atomic_t rt_genid; siphash_key_t ip_id_key; struct hlist_head *inet_addr_lst; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 08d2ecc96e2b..499e4288170f 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -30,19 +30,23 @@ struct netns_sysctl_ipv6 { int ip6_rt_min_advmss; u32 multipath_hash_fields; u8 multipath_hash_policy; - u8 bindv6only; + + __cacheline_group_begin(sysctl_ipv6_flowlabel); u8 flowlabel_consistency; u8 auto_flowlabels; - int icmpv6_time; + u8 flowlabel_state_ranges; + __cacheline_group_end(sysctl_ipv6_flowlabel); + u8 icmpv6_echo_ignore_all; u8 icmpv6_echo_ignore_multicast; u8 icmpv6_echo_ignore_anycast; + int icmpv6_time; DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1); unsigned long *icmpv6_ratemask_ptr; u8 anycast_src_echo_reply; + u8 bindv6only; u8 ip_nonlocal_bind; u8 fwmark_reflect; - u8 flowlabel_state_ranges; int idgen_retries; int idgen_delay; int flowlabel_reflect; @@ -114,7 +118,7 @@ struct netns_ipv6 { struct seg6_pernet_data *seg6_data; struct fib_notifier_ops *notifier_ops; struct fib_notifier_ops *ip6mr_notifier_ops; - unsigned int ipmr_seq; /* protected by rtnl_mutex */ + atomic_t ipmr_seq; struct { struct hlist_head head; spinlock_t lock; diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index 7e373664b1e7..dce05f8e6a33 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -28,11 +28,6 @@ struct netns_mib { DEFINE_SNMP_STAT(struct mptcp_mib, mptcp_statistics); #endif - DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics); -#if IS_ENABLED(CONFIG_IPV6) - DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6); -#endif - DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics); DEFINE_SNMP_STAT_ATOMIC(struct icmpmsg_mib, icmpmsg_statistics); #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index 6682e51513ef..2073cbac2afb 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -17,6 +17,7 @@ struct netns_mpls { size_t platform_labels; struct mpls_route __rcu * __rcu *platform_label; struct mutex platform_mutex; + seqcount_mutex_t platform_label_seq; struct ctl_table_header *ctl; }; diff --git a/include/net/netns/vsock.h b/include/net/netns/vsock.h new file mode 100644 index 000000000000..7f84aad92f57 --- /dev/null +++ b/include/net/netns/vsock.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_NET_NAMESPACE_VSOCK_H +#define __NET_NET_NAMESPACE_VSOCK_H + +#include <linux/types.h> + +enum vsock_net_mode { + VSOCK_NET_MODE_GLOBAL, + VSOCK_NET_MODE_LOCAL, +}; + +struct netns_vsock { + struct ctl_table_header *sysctl_hdr; + + /* protected by the vsock_table_lock in af_vsock.c */ + u32 port; + + enum vsock_net_mode mode; + enum vsock_net_mode child_ns_mode; + + /* 0 = unlocked, 1 = locked to global, 2 = locked to local */ + int child_ns_mode_locked; + + int g2h_fallback; +}; +#endif /* __NET_NET_NAMESPACE_VSOCK_H */ diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 23dd647fe024..b73983a17e08 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -59,7 +59,7 @@ struct netns_xfrm { struct list_head inexact_bins; - struct sock *nlsk; + struct sock __rcu *nlsk; struct sock *nlsk_stash; u32 sysctl_aevent_etime; diff --git a/include/net/netrom.h b/include/net/netrom.h deleted file mode 100644 index f0565a5987d1..000000000000 --- a/include/net/netrom.h +++ /dev/null @@ -1,273 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Declarations of NET/ROM type objects. - * - * Jonathan Naylor G4KLX 9/4/95 - */ - -#ifndef _NETROM_H -#define _NETROM_H - -#include <linux/netrom.h> -#include <linux/list.h> -#include <linux/slab.h> -#include <net/sock.h> -#include <linux/refcount.h> -#include <linux/seq_file.h> -#include <net/ax25.h> - -#define NR_NETWORK_LEN 15 -#define NR_TRANSPORT_LEN 5 - -#define NR_PROTO_IP 0x0C - -#define NR_PROTOEXT 0x00 -#define NR_CONNREQ 0x01 -#define NR_CONNACK 0x02 -#define NR_DISCREQ 0x03 -#define NR_DISCACK 0x04 -#define NR_INFO 0x05 -#define NR_INFOACK 0x06 -#define NR_RESET 0x07 - -#define NR_CHOKE_FLAG 0x80 -#define NR_NAK_FLAG 0x40 -#define NR_MORE_FLAG 0x20 - -/* Define Link State constants. */ -enum { - NR_STATE_0, - NR_STATE_1, - NR_STATE_2, - NR_STATE_3 -}; - -#define NR_COND_ACK_PENDING 0x01 -#define NR_COND_REJECT 0x02 -#define NR_COND_PEER_RX_BUSY 0x04 -#define NR_COND_OWN_RX_BUSY 0x08 - -#define NR_DEFAULT_T1 120000 /* Outstanding frames - 120 seconds */ -#define NR_DEFAULT_T2 5000 /* Response delay - 5 seconds */ -#define NR_DEFAULT_N2 3 /* Number of Retries - 3 */ -#define NR_DEFAULT_T4 180000 /* Busy Delay - 180 seconds */ -#define NR_DEFAULT_IDLE 0 /* No Activity Timeout - none */ -#define NR_DEFAULT_WINDOW 4 /* Default Window Size - 4 */ -#define NR_DEFAULT_OBS 6 /* Default Obsolescence Count - 6 */ -#define NR_DEFAULT_QUAL 10 /* Default Neighbour Quality - 10 */ -#define NR_DEFAULT_TTL 16 /* Default Time To Live - 16 */ -#define NR_DEFAULT_ROUTING 1 /* Is routing enabled ? */ -#define NR_DEFAULT_FAILS 2 /* Link fails until route fails */ -#define NR_DEFAULT_RESET 0 /* Sent / accept reset cmds? */ - -#define NR_MODULUS 256 -#define NR_MAX_WINDOW_SIZE 127 /* Maximum Window Allowable - 127 */ -#define NR_MAX_PACKET_SIZE 236 /* Maximum Packet Length - 236 */ - -struct nr_sock { - struct sock sock; - ax25_address user_addr, source_addr, dest_addr; - struct net_device *device; - unsigned char my_index, my_id; - unsigned char your_index, your_id; - unsigned char state, condition, bpqext, window; - unsigned short vs, vr, va, vl; - unsigned char n2, n2count; - unsigned long t1, t2, t4, idle; - unsigned short fraglen; - struct timer_list t1timer; - struct timer_list t2timer; - struct timer_list t4timer; - struct timer_list idletimer; - struct sk_buff_head ack_queue; - struct sk_buff_head reseq_queue; - struct sk_buff_head frag_queue; -}; - -#define nr_sk(sk) ((struct nr_sock *)(sk)) - -struct nr_neigh { - struct hlist_node neigh_node; - ax25_address callsign; - ax25_digi *digipeat; - ax25_cb *ax25; - struct net_device *dev; - unsigned char quality; - unsigned char locked; - unsigned short count; - unsigned int number; - unsigned char failed; - refcount_t refcount; -}; - -struct nr_route { - unsigned char quality; - unsigned char obs_count; - struct nr_neigh *neighbour; -}; - -struct nr_node { - struct hlist_node node_node; - ax25_address callsign; - char mnemonic[7]; - unsigned char which; - unsigned char count; - struct nr_route routes[3]; - refcount_t refcount; - spinlock_t node_lock; -}; - -/********************************************************************* - * nr_node & nr_neigh lists, refcounting and locking - *********************************************************************/ - -#define nr_node_hold(__nr_node) \ - refcount_inc(&((__nr_node)->refcount)) - -static __inline__ void nr_node_put(struct nr_node *nr_node) -{ - if (refcount_dec_and_test(&nr_node->refcount)) { - kfree(nr_node); - } -} - -#define nr_neigh_hold(__nr_neigh) \ - refcount_inc(&((__nr_neigh)->refcount)) - -static __inline__ void nr_neigh_put(struct nr_neigh *nr_neigh) -{ - if (refcount_dec_and_test(&nr_neigh->refcount)) { - if (nr_neigh->ax25) - ax25_cb_put(nr_neigh->ax25); - kfree(nr_neigh->digipeat); - kfree(nr_neigh); - } -} - -/* nr_node_lock and nr_node_unlock also hold/put the node's refcounter. - */ -static __inline__ void nr_node_lock(struct nr_node *nr_node) -{ - nr_node_hold(nr_node); - spin_lock_bh(&nr_node->node_lock); -} - -static __inline__ void nr_node_unlock(struct nr_node *nr_node) -{ - spin_unlock_bh(&nr_node->node_lock); - nr_node_put(nr_node); -} - -#define nr_neigh_for_each(__nr_neigh, list) \ - hlist_for_each_entry(__nr_neigh, list, neigh_node) - -#define nr_neigh_for_each_safe(__nr_neigh, node2, list) \ - hlist_for_each_entry_safe(__nr_neigh, node2, list, neigh_node) - -#define nr_node_for_each(__nr_node, list) \ - hlist_for_each_entry(__nr_node, list, node_node) - -#define nr_node_for_each_safe(__nr_node, node2, list) \ - hlist_for_each_entry_safe(__nr_node, node2, list, node_node) - - -/*********************************************************************/ - -/* af_netrom.c */ -extern int sysctl_netrom_default_path_quality; -extern int sysctl_netrom_obsolescence_count_initialiser; -extern int sysctl_netrom_network_ttl_initialiser; -extern int sysctl_netrom_transport_timeout; -extern int sysctl_netrom_transport_maximum_tries; -extern int sysctl_netrom_transport_acknowledge_delay; -extern int sysctl_netrom_transport_busy_delay; -extern int sysctl_netrom_transport_requested_window_size; -extern int sysctl_netrom_transport_no_activity_timeout; -extern int sysctl_netrom_routing_control; -extern int sysctl_netrom_link_fails_count; -extern int sysctl_netrom_reset_circuit; - -int nr_rx_frame(struct sk_buff *, struct net_device *); -void nr_destroy_socket(struct sock *); - -/* nr_dev.c */ -int nr_rx_ip(struct sk_buff *, struct net_device *); -void nr_setup(struct net_device *); - -/* nr_in.c */ -int nr_process_rx_frame(struct sock *, struct sk_buff *); - -/* nr_loopback.c */ -void nr_loopback_init(void); -void nr_loopback_clear(void); -int nr_loopback_queue(struct sk_buff *); - -/* nr_out.c */ -void nr_output(struct sock *, struct sk_buff *); -void nr_send_nak_frame(struct sock *); -void nr_kick(struct sock *); -void nr_transmit_buffer(struct sock *, struct sk_buff *); -void nr_establish_data_link(struct sock *); -void nr_enquiry_response(struct sock *); -void nr_check_iframes_acked(struct sock *, unsigned short); - -/* nr_route.c */ -void nr_rt_device_down(struct net_device *); -struct net_device *nr_dev_first(void); -struct net_device *nr_dev_get(ax25_address *); -int nr_rt_ioctl(unsigned int, void __user *); -void nr_link_failed(ax25_cb *, int); -int nr_route_frame(struct sk_buff *, ax25_cb *); -extern const struct seq_operations nr_node_seqops; -extern const struct seq_operations nr_neigh_seqops; -void nr_rt_free(void); - -/* nr_subr.c */ -void nr_clear_queues(struct sock *); -void nr_frames_acked(struct sock *, unsigned short); -void nr_requeue_frames(struct sock *); -int nr_validate_nr(struct sock *, unsigned short); -int nr_in_rx_window(struct sock *, unsigned short); -void nr_write_internal(struct sock *, int); - -void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags); - -/* - * This routine is called when a Connect Acknowledge with the Choke Flag - * set is needed to refuse a connection. - */ -#define nr_transmit_refusal(skb, mine) \ -do { \ - __nr_transmit_reply((skb), (mine), NR_CONNACK | NR_CHOKE_FLAG); \ -} while (0) - -/* - * This routine is called when we don't have a circuit matching an incoming - * NET/ROM packet. This is an G8PZT Xrouter extension. - */ -#define nr_transmit_reset(skb, mine) \ -do { \ - __nr_transmit_reply((skb), (mine), NR_RESET); \ -} while (0) - -void nr_disconnect(struct sock *, int); - -/* nr_timer.c */ -void nr_init_timers(struct sock *sk); -void nr_start_heartbeat(struct sock *); -void nr_start_t1timer(struct sock *); -void nr_start_t2timer(struct sock *); -void nr_start_t4timer(struct sock *); -void nr_start_idletimer(struct sock *); -void nr_stop_heartbeat(struct sock *); -void nr_stop_t1timer(struct sock *); -void nr_stop_t2timer(struct sock *); -void nr_stop_t4timer(struct sock *); -void nr_stop_idletimer(struct sock *); -int nr_t1timer_running(struct sock *); - -/* sysctl_net_netrom.c */ -int nr_register_sysctl(void); -void nr_unregister_sysctl(void); - -#endif diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h index ada4f968960a..255ce4cfd975 100644 --- a/include/net/page_pool/memory_provider.h +++ b/include/net/page_pool/memory_provider.h @@ -23,14 +23,10 @@ bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr); void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov); void net_mp_niov_clear_page_pool(struct net_iov *niov); -int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *p); -int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, +int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *p, struct netlink_ext_ack *extack); -void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *old_p); -void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, +void netif_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *old_p); /** diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 1509a536cb85..03da138722f5 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -44,6 +44,8 @@ * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX * ring is usually refilled and the max consumed elements will be 64, * thus a natural max size of objects needed in the cache. + * The refill watermark is set to 64 for 4KB pages, + * and scales to balance its size in bytes across page sizes. * * Keeping room for more objects, is due to XDP_DROP use-case. As * XDP_DROP allows the opportunity to recycle objects directly into @@ -51,8 +53,15 @@ * cache is already full (or partly full) then the XDP_DROP recycles * would have to take a slower code path. */ -#define PP_ALLOC_CACHE_SIZE 128 +#if PAGE_SIZE >= SZ_64K +#define PP_ALLOC_CACHE_REFILL 4 +#elif PAGE_SIZE >= SZ_16K +#define PP_ALLOC_CACHE_REFILL 16 +#else #define PP_ALLOC_CACHE_REFILL 64 +#endif + +#define PP_ALLOC_CACHE_SIZE (PP_ALLOC_CACHE_REFILL * 2) struct pp_alloc_cache { u32 count; netmem_ref cache[PP_ALLOC_CACHE_SIZE]; @@ -161,6 +170,7 @@ struct memory_provider_ops; struct pp_memory_provider_params { void *mp_priv; const struct memory_provider_ops *mp_ops; + u32 rx_page_size; }; struct page_pool { @@ -246,7 +256,7 @@ struct page_pool { /* User-facing fields, protected by page_pools_lock */ struct { struct hlist_node list; - u64 detach_time; + ktime_t detach_time; u32 id; } user; }; diff --git a/include/net/phy/realtek_phy.h b/include/net/phy/realtek_phy.h new file mode 100644 index 000000000000..d683bc1b0659 --- /dev/null +++ b/include/net/phy/realtek_phy.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _REALTEK_PHY_H +#define _REALTEK_PHY_H + +#define PHY_ID_RTL_DUMMY_SFP 0x001ccbff + +#endif /* _REALTEK_PHY_H */ diff --git a/include/net/pie.h b/include/net/pie.h index 01cbc66825a4..1f3db0c35514 100644 --- a/include/net/pie.h +++ b/include/net/pie.h @@ -104,7 +104,7 @@ static inline void pie_vars_init(struct pie_vars *vars) vars->dq_tstamp = DTIME_INVALID; vars->accu_prob = 0; vars->dq_count = DQCOUNT_INVALID; - vars->avg_dq_rate = 0; + WRITE_ONCE(vars->avg_dq_rate, 0); } static inline struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb) diff --git a/include/net/ping.h b/include/net/ping.h index 05bfd594a64c..bcbdb5a136e3 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -20,8 +20,7 @@ /* Compatibility glue so we can support IPv6 when it's compiled as a module */ struct pingv6_ops { - int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len, - int *addr_len); + int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len); void (*ip6_datagram_recv_common_ctl)(struct sock *sk, struct msghdr *msg, struct sk_buff *skb); @@ -64,7 +63,7 @@ int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *); int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len); + int flags); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index e703c507d0da..18a419cd9d94 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -308,4 +308,28 @@ static inline unsigned int qdisc_peek_len(struct Qdisc *sch) return len; } +static inline void qdisc_lock_init(struct Qdisc *sch, + const struct Qdisc_ops *ops) +{ + spin_lock_init(&sch->q.lock); + + /* Skip dynamic keys if nesting is not possible */ + if (ops->static_flags & TCQ_F_INGRESS || + ops == &noqueue_qdisc_ops) + return; + + lockdep_register_key(&sch->root_lock_key); + lockdep_set_class(&sch->q.lock, &sch->root_lock_key); +} + +static inline void qdisc_lock_uninit(struct Qdisc *sch, + const struct Qdisc_ops *ops) +{ + if (ops->static_flags & TCQ_F_INGRESS || + ops == &noqueue_qdisc_ops) + return; + + lockdep_unregister_key(&sch->root_lock_key); +} + #endif diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 9b9e04f6bb89..5a9c826a7092 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -123,14 +123,7 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb, return sk; } -static inline void __reqsk_free(struct request_sock *req) -{ - req->rsk_ops->destructor(req); - if (req->rsk_listener) - sock_put(req->rsk_listener); - kfree(req->saved_syn); - kmem_cache_free(req->rsk_ops->slab, req); -} +void __reqsk_free(struct request_sock *req); static inline void reqsk_free(struct request_sock *req) { @@ -196,8 +189,6 @@ struct request_sock_queue { */ }; -void reqsk_queue_alloc(struct request_sock_queue *queue); - void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, bool reset); diff --git a/include/net/rose.h b/include/net/rose.h index 2b5491bbf39a..41bfcb224f0b 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -1,266 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * Declarations of Rose type objects. - * - * Jonathan Naylor G4KLX 25/8/96 - */ - #ifndef _ROSE_H -#define _ROSE_H - -#include <linux/refcount.h> -#include <linux/rose.h> -#include <net/ax25.h> -#include <net/sock.h> - -#define ROSE_ADDR_LEN 5 - -#define ROSE_MIN_LEN 3 - -#define ROSE_CALL_REQ_ADDR_LEN_OFF 3 -#define ROSE_CALL_REQ_ADDR_LEN_VAL 0xAA /* each address is 10 digits */ -#define ROSE_CALL_REQ_DEST_ADDR_OFF 4 -#define ROSE_CALL_REQ_SRC_ADDR_OFF 9 -#define ROSE_CALL_REQ_FACILITIES_OFF 14 - -#define ROSE_GFI 0x10 -#define ROSE_Q_BIT 0x80 -#define ROSE_D_BIT 0x40 -#define ROSE_M_BIT 0x10 - -#define ROSE_CALL_REQUEST 0x0B -#define ROSE_CALL_ACCEPTED 0x0F -#define ROSE_CLEAR_REQUEST 0x13 -#define ROSE_CLEAR_CONFIRMATION 0x17 -#define ROSE_DATA 0x00 -#define ROSE_INTERRUPT 0x23 -#define ROSE_INTERRUPT_CONFIRMATION 0x27 -#define ROSE_RR 0x01 -#define ROSE_RNR 0x05 -#define ROSE_REJ 0x09 -#define ROSE_RESET_REQUEST 0x1B -#define ROSE_RESET_CONFIRMATION 0x1F -#define ROSE_REGISTRATION_REQUEST 0xF3 -#define ROSE_REGISTRATION_CONFIRMATION 0xF7 -#define ROSE_RESTART_REQUEST 0xFB -#define ROSE_RESTART_CONFIRMATION 0xFF -#define ROSE_DIAGNOSTIC 0xF1 -#define ROSE_ILLEGAL 0xFD - -/* Define Link State constants. */ - -enum { - ROSE_STATE_0, /* Ready */ - ROSE_STATE_1, /* Awaiting Call Accepted */ - ROSE_STATE_2, /* Awaiting Clear Confirmation */ - ROSE_STATE_3, /* Data Transfer */ - ROSE_STATE_4, /* Awaiting Reset Confirmation */ - ROSE_STATE_5 /* Deferred Call Acceptance */ -}; - -#define ROSE_DEFAULT_T0 180000 /* Default T10 T20 value */ -#define ROSE_DEFAULT_T1 200000 /* Default T11 T21 value */ -#define ROSE_DEFAULT_T2 180000 /* Default T12 T22 value */ -#define ROSE_DEFAULT_T3 180000 /* Default T13 T23 value */ -#define ROSE_DEFAULT_HB 5000 /* Default Holdback value */ -#define ROSE_DEFAULT_IDLE 0 /* No Activity Timeout - none */ -#define ROSE_DEFAULT_ROUTING 1 /* Default routing flag */ -#define ROSE_DEFAULT_FAIL_TIMEOUT 120000 /* Time until link considered usable */ -#define ROSE_DEFAULT_MAXVC 50 /* Maximum number of VCs per neighbour */ -#define ROSE_DEFAULT_WINDOW_SIZE 7 /* Default window size */ - -#define ROSE_MODULUS 8 -#define ROSE_MAX_PACKET_SIZE 251 /* Maximum packet size */ - -#define ROSE_COND_ACK_PENDING 0x01 -#define ROSE_COND_PEER_RX_BUSY 0x02 -#define ROSE_COND_OWN_RX_BUSY 0x04 - -#define FAC_NATIONAL 0x00 -#define FAC_CCITT 0x0F - -#define FAC_NATIONAL_RAND 0x7F -#define FAC_NATIONAL_FLAGS 0x3F -#define FAC_NATIONAL_DEST_DIGI 0xE9 -#define FAC_NATIONAL_SRC_DIGI 0xEB -#define FAC_NATIONAL_FAIL_CALL 0xED -#define FAC_NATIONAL_FAIL_ADD 0xEE -#define FAC_NATIONAL_DIGIS 0xEF - -#define FAC_CCITT_DEST_NSAP 0xC9 -#define FAC_CCITT_SRC_NSAP 0xCB - -struct rose_neigh { - struct rose_neigh *next; - ax25_address callsign; - ax25_digi *digipeat; - ax25_cb *ax25; - struct net_device *dev; - unsigned short count; - refcount_t use; - unsigned int number; - char restarted; - char dce_mode; - char loopback; - struct sk_buff_head queue; - struct timer_list t0timer; - struct timer_list ftimer; -}; - -struct rose_node { - struct rose_node *next; - rose_address address; - unsigned short mask; - unsigned char count; - char loopback; - struct rose_neigh *neighbour[3]; -}; - -struct rose_route { - struct rose_route *next; - unsigned int lci1, lci2; - rose_address src_addr, dest_addr; - ax25_address src_call, dest_call; - struct rose_neigh *neigh1, *neigh2; - unsigned int rand; -}; - -struct rose_sock { - struct sock sock; - rose_address source_addr, dest_addr; - ax25_address source_call, dest_call; - unsigned char source_ndigis, dest_ndigis; - ax25_address source_digis[ROSE_MAX_DIGIS]; - ax25_address dest_digis[ROSE_MAX_DIGIS]; - struct rose_neigh *neighbour; - struct net_device *device; - netdevice_tracker dev_tracker; - unsigned int lci, rand; - unsigned char state, condition, qbitincl, defer; - unsigned char cause, diagnostic; - unsigned short vs, vr, va, vl; - unsigned long t1, t2, t3, hb, idle; -#ifdef M_BIT - unsigned short fraglen; - struct sk_buff_head frag_queue; -#endif - struct sk_buff_head ack_queue; - struct rose_facilities_struct facilities; - struct timer_list timer; - struct timer_list idletimer; -}; - -#define rose_sk(sk) ((struct rose_sock *)(sk)) - -static inline void rose_neigh_hold(struct rose_neigh *rose_neigh) -{ - refcount_inc(&rose_neigh->use); -} - -static inline void rose_neigh_put(struct rose_neigh *rose_neigh) -{ - if (refcount_dec_and_test(&rose_neigh->use)) { - if (rose_neigh->ax25) - ax25_cb_put(rose_neigh->ax25); - kfree(rose_neigh->digipeat); - kfree(rose_neigh); - } -} - -/* af_rose.c */ -extern ax25_address rose_callsign; -extern int sysctl_rose_restart_request_timeout; -extern int sysctl_rose_call_request_timeout; -extern int sysctl_rose_reset_request_timeout; -extern int sysctl_rose_clear_request_timeout; -extern int sysctl_rose_no_activity_timeout; -extern int sysctl_rose_ack_hold_back_timeout; -extern int sysctl_rose_routing_control; -extern int sysctl_rose_link_fail_timeout; -extern int sysctl_rose_maximum_vcs; -extern int sysctl_rose_window_size; - -int rosecmp(const rose_address *, const rose_address *); -int rosecmpm(const rose_address *, const rose_address *, unsigned short); -char *rose2asc(char *buf, const rose_address *); -struct sock *rose_find_socket(unsigned int, struct rose_neigh *); -void rose_kill_by_neigh(struct rose_neigh *); -unsigned int rose_new_lci(struct rose_neigh *); -int rose_rx_call_request(struct sk_buff *, struct net_device *, - struct rose_neigh *, unsigned int); -void rose_destroy_socket(struct sock *); - -/* rose_dev.c */ -void rose_setup(struct net_device *); - -/* rose_in.c */ -int rose_process_rx_frame(struct sock *, struct sk_buff *); - -/* rose_link.c */ -void rose_start_ftimer(struct rose_neigh *); -void rose_stop_ftimer(struct rose_neigh *); -void rose_stop_t0timer(struct rose_neigh *); -int rose_ftimer_running(struct rose_neigh *); -void rose_link_rx_restart(struct sk_buff *, struct rose_neigh *, - unsigned short); -void rose_transmit_clear_request(struct rose_neigh *, unsigned int, - unsigned char, unsigned char); -void rose_transmit_link(struct sk_buff *, struct rose_neigh *); - -/* rose_loopback.c */ -void rose_loopback_init(void); -void rose_loopback_clear(void); -int rose_loopback_queue(struct sk_buff *, struct rose_neigh *); - -/* rose_out.c */ -void rose_kick(struct sock *); -void rose_enquiry_response(struct sock *); - -/* rose_route.c */ -extern struct rose_neigh *rose_loopback_neigh; -extern const struct seq_operations rose_neigh_seqops; -extern const struct seq_operations rose_node_seqops; -extern struct seq_operations rose_route_seqops; - -void rose_add_loopback_neigh(void); -int __must_check rose_add_loopback_node(const rose_address *); -void rose_del_loopback_node(const rose_address *); -void rose_rt_device_down(struct net_device *); -void rose_link_device_down(struct net_device *); -struct net_device *rose_dev_first(void); -struct net_device *rose_dev_get(rose_address *); -struct rose_route *rose_route_free_lci(unsigned int, struct rose_neigh *); -struct rose_neigh *rose_get_neigh(rose_address *, unsigned char *, - unsigned char *, int); -int rose_rt_ioctl(unsigned int, void __user *); -void rose_link_failed(ax25_cb *, int); -int rose_route_frame(struct sk_buff *, ax25_cb *); -void rose_rt_free(void); - -/* rose_subr.c */ -void rose_clear_queues(struct sock *); -void rose_frames_acked(struct sock *, unsigned short); -void rose_requeue_frames(struct sock *); -int rose_validate_nr(struct sock *, unsigned short); -void rose_write_internal(struct sock *, int); -int rose_decode(struct sk_buff *, int *, int *, int *, int *, int *); -int rose_parse_facilities(unsigned char *, unsigned int, - struct rose_facilities_struct *); -void rose_disconnect(struct sock *, int, int, int); - -/* rose_timer.c */ -void rose_start_heartbeat(struct sock *); -void rose_start_t1timer(struct sock *); -void rose_start_t2timer(struct sock *); -void rose_start_t3timer(struct sock *); -void rose_start_hbtimer(struct sock *); -void rose_start_idletimer(struct sock *); -void rose_stop_heartbeat(struct sock *); -void rose_stop_timer(struct sock *); -void rose_stop_idletimer(struct sock *); +#define _ROSE_H -/* sysctl_net_rose.c */ -void rose_register_sysctl(void); -void rose_unregister_sysctl(void); +#define ROSE_ADDR_LEN 5 #endif diff --git a/include/net/rps-types.h b/include/net/rps-types.h new file mode 100644 index 000000000000..6b90a66866c1 --- /dev/null +++ b/include/net/rps-types.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_RPS_TYPES_H +#define _NET_RPS_TYPES_H + +/* Define a rps_tag_ptr: + * Low order 5 bits are used to store the ilog2(size) of an RPS table. + */ +typedef unsigned long rps_tag_ptr; + +static inline u8 rps_tag_to_log(rps_tag_ptr tag_ptr) +{ + return tag_ptr & 31U; +} + +static inline u32 rps_tag_to_mask(rps_tag_ptr tag_ptr) +{ + return (1U << rps_tag_to_log(tag_ptr)) - 1; +} + +static inline void *rps_tag_to_table(rps_tag_ptr tag_ptr) +{ + return (void *)(tag_ptr & ~31UL); +} +#endif /* _NET_RPS_TYPES_H */ diff --git a/include/net/rps.h b/include/net/rps.h index f1794cd2e7fb..e33c6a2fa8bb 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -8,6 +8,7 @@ #include <net/hotdata.h> #ifdef CONFIG_RPS +#include <net/rps-types.h> extern struct static_key_false rps_needed; extern struct static_key_false rfs_needed; @@ -39,17 +40,6 @@ struct rps_dev_flow { #define RPS_NO_FILTER 0xffff /* - * The rps_dev_flow_table structure contains a table of flow mappings. - */ -struct rps_dev_flow_table { - u8 log; - struct rcu_head rcu; - struct rps_dev_flow flows[]; -}; -#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ - ((_num) * sizeof(struct rps_dev_flow))) - -/* * The rps_sock_flow_table contains mappings of flows to the last CPU * on which they were processed by the application (set in recvmsg). * Each entry is a 32bit value. Upper part is the high-order bits @@ -60,41 +50,38 @@ struct rps_dev_flow_table { * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { - struct rcu_head rcu; - u32 mask; - - u32 ents[] ____cacheline_aligned_in_smp; + u32 ent; }; -#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) #define RPS_NO_CPU 0xffff -static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, - u32 hash) +static inline void rps_record_sock_flow(rps_tag_ptr tag_ptr, u32 hash) { - unsigned int index = hash & table->mask; + unsigned int index = hash & rps_tag_to_mask(tag_ptr); u32 val = hash & ~net_hotdata.rps_cpu_mask; + struct rps_sock_flow_table *table; /* We only give a hint, preemption can change CPU under us */ val |= raw_smp_processor_id(); + table = rps_tag_to_table(tag_ptr); /* The following WRITE_ONCE() is paired with the READ_ONCE() * here, and another one in get_rps_cpu(). */ - if (READ_ONCE(table->ents[index]) != val) - WRITE_ONCE(table->ents[index], val); + if (READ_ONCE(table[index].ent) != val) + WRITE_ONCE(table[index].ent, val); } static inline void _sock_rps_record_flow_hash(__u32 hash) { - struct rps_sock_flow_table *sock_flow_table; + rps_tag_ptr tag_ptr; if (!hash) return; rcu_read_lock(); - sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); - if (sock_flow_table) - rps_record_sock_flow(sock_flow_table, hash); + tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table); + if (tag_ptr) + rps_record_sock_flow(tag_ptr, hash); rcu_read_unlock(); } @@ -121,6 +108,7 @@ static inline void _sock_rps_record_flow(const struct sock *sk) static inline void _sock_rps_delete_flow(const struct sock *sk) { struct rps_sock_flow_table *table; + rps_tag_ptr tag_ptr; u32 hash, index; hash = READ_ONCE(sk->sk_rxhash); @@ -128,11 +116,12 @@ static inline void _sock_rps_delete_flow(const struct sock *sk) return; rcu_read_lock(); - table = rcu_dereference(net_hotdata.rps_sock_flow_table); - if (table) { - index = hash & table->mask; - if (READ_ONCE(table->ents[index]) != RPS_NO_CPU) - WRITE_ONCE(table->ents[index], RPS_NO_CPU); + tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table); + if (tag_ptr) { + index = hash & rps_tag_to_mask(tag_ptr); + table = rps_tag_to_table(tag_ptr); + if (READ_ONCE(table[index].ent) != RPS_NO_CPU) + WRITE_ONCE(table[index].ent, RPS_NO_CPU); } rcu_read_unlock(); } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c3a7268b567e..11159a50d6a1 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -20,12 +20,15 @@ #include <net/rtnetlink.h> #include <net/flow_offload.h> #include <linux/xarray.h> +#include <net/dropreason-qdisc.h> struct Qdisc_ops; struct qdisc_walker; struct tcf_walker; struct module; struct bpf_flow_keys; +struct Qdisc; +struct netdev_queue; struct qdisc_rate_table { struct tc_ratespec rate; @@ -707,8 +710,8 @@ void dev_qdisc_change_real_num_tx(struct net_device *dev, void dev_init_scheduler(struct net_device *dev); void dev_shutdown(struct net_device *dev); void dev_activate(struct net_device *dev); -void dev_deactivate(struct net_device *dev); -void dev_deactivate_many(struct list_head *head); +void dev_deactivate(struct net_device *dev, bool reset_needed); +void dev_deactivate_many(struct list_head *head, bool reset_needed); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); @@ -716,6 +719,34 @@ void qdisc_destroy(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); void qdisc_put_unlocked(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len); + +static inline void dev_reset_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + bool nolock; + + qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); + if (!qdisc) + return; + + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); + spin_lock_bh(qdisc_lock(qdisc)); + + qdisc_reset(qdisc); + + spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); + spin_unlock_bh(&qdisc->seqlock); + } +} + #ifdef CONFIG_NET_SCHED int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, void *type_data); @@ -778,13 +809,23 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb) static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { struct Qdisc *qdisc; + bool nolock; for (; i < dev->num_tx_queues; i++) { qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); if (qdisc) { + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); spin_lock_bh(qdisc_lock(qdisc)); qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); + spin_unlock_bh(&qdisc->seqlock); + } } } } @@ -1106,38 +1147,62 @@ static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) return cb; } +/* TC classifier accessors - use enum skb_drop_reason */ static inline enum skb_drop_reason tcf_get_drop_reason(const struct sk_buff *skb) { - return tc_skb_cb(skb)->drop_reason; + return (enum skb_drop_reason)tc_skb_cb(skb)->drop_reason; } static inline void tcf_set_drop_reason(const struct sk_buff *skb, enum skb_drop_reason reason) { - tc_skb_cb(skb)->drop_reason = reason; + tc_skb_cb(skb)->drop_reason = (enum qdisc_drop_reason)reason; } -static inline void tcf_kfree_skb_list(struct sk_buff *skb) +/* Qdisc accessors - use enum qdisc_drop_reason */ +static inline enum qdisc_drop_reason +tcf_get_qdisc_drop_reason(const struct sk_buff *skb) { - while (unlikely(skb)) { - struct sk_buff *next = skb->next; + return tc_skb_cb(skb)->drop_reason; +} - prefetch(next); - kfree_skb_reason(skb, tcf_get_drop_reason(skb)); - skb = next; - } +static inline void tcf_set_qdisc_drop_reason(const struct sk_buff *skb, + enum qdisc_drop_reason reason) +{ + tc_skb_cb(skb)->drop_reason = reason; +} + +void __tcf_kfree_skb_list(struct sk_buff *skb, struct Qdisc *q, + struct netdev_queue *txq, struct net_device *dev); + +static inline void tcf_kfree_skb_list(struct sk_buff *skb, struct Qdisc *q, + struct netdev_queue *txq, + struct net_device *dev) +{ + if (unlikely(skb)) + __tcf_kfree_skb_list(skb, q, txq, dev); } static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb, - enum skb_drop_reason reason) + enum qdisc_drop_reason reason) { + struct Qdisc *root; + DEBUG_NET_WARN_ON_ONCE(!(q->flags & TCQ_F_DEQUEUE_DROPS)); DEBUG_NET_WARN_ON_ONCE(q->flags & TCQ_F_NOLOCK); - tcf_set_drop_reason(skb, reason); - skb->next = q->to_free; - q->to_free = skb; + rcu_read_lock(); + root = qdisc_root_sleeping(q); + + if (root->flags & TCQ_F_DEQUEUE_DROPS) { + tcf_set_qdisc_drop_reason(skb, reason); + skb->next = root->to_free; + root->to_free = skb; + } else { + kfree_skb_reason(skb, (enum skb_drop_reason)reason); + } + rcu_read_unlock(); } /* Instead of calling kfree_skb() while root qdisc lock is held, @@ -1312,9 +1377,9 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, static inline int qdisc_drop_reason(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free, - enum skb_drop_reason reason) + enum qdisc_drop_reason reason) { - tcf_set_drop_reason(skb, reason); + tcf_set_qdisc_drop_reason(skb, reason); return qdisc_drop(skb, sch, to_free); } @@ -1419,6 +1484,11 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, struct tcf_block *block); +static inline bool mini_qdisc_pair_inited(struct mini_Qdisc_pair *miniqp) +{ + return !!miniqp->p_miniq; +} + void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx); int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); diff --git a/include/net/sch_priv.h b/include/net/sch_priv.h new file mode 100644 index 000000000000..4789f668ae87 --- /dev/null +++ b/include/net/sch_priv.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_SCHED_PRIV_H +#define __NET_SCHED_PRIV_H + +#include <net/sch_generic.h> + +struct mq_sched { + struct Qdisc **qdiscs; +}; + +int mq_init_common(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack, + const struct Qdisc_ops *qdisc_ops); +void mq_destroy_common(struct Qdisc *sch); +void mq_attach(struct Qdisc *sch); +void mq_dump_common(struct Qdisc *sch, struct sk_buff *skb); +struct netdev_queue *mq_select_queue(struct Qdisc *sch, + struct tcmsg *tcm); +struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl); +unsigned long mq_find(struct Qdisc *sch, u32 classid); +int mq_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm); +int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d); +void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg); + +#endif diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index cddebafb9f77..6f996229167b 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -5,16 +5,47 @@ #include <linux/types.h> struct net; +extern struct net init_net; + +union tcp_seq_and_ts_off { + struct { + u32 seq; + u32 ts_off; + }; + u64 hash64; +}; u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -u32 secure_tcp_seq(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr); -u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport); -u32 secure_tcpv6_ts_off(const struct net *net, - const __be32 *saddr, const __be32 *daddr); +union tcp_seq_and_ts_off +secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); + +static inline u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + union tcp_seq_and_ts_off ts; + + ts = secure_tcp_seq_and_ts_off(&init_net, saddr, daddr, + sport, dport); + + return ts.seq; +} + +union tcp_seq_and_ts_off +secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr, + const __be32 *daddr, + __be16 sport, __be16 dport); + +static inline u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport) +{ + union tcp_seq_and_ts_off ts; + + ts = secure_tcpv6_seq_and_ts_off(&init_net, saddr, daddr, + sport, dport); + return ts.seq; +} #endif /* _NET_SECURE_SEQ */ diff --git a/include/net/sock.h b/include/net/sock.h index aafe8bdb2c0f..dccd3738c368 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -81,8 +81,13 @@ * mini-semaphore synchronizes multiple users amongst themselves. */ typedef struct { - spinlock_t slock; - int owned; + union { + struct slock_owned { + int owned; + spinlock_t slock; + }; + long combined; + }; wait_queue_head_t wq; /* * We express the mutex-alike socket_lock semantics @@ -121,14 +126,14 @@ typedef __u64 __bitwise __addrpair; * @skc_bypass_prot_mem: bypass the per-protocol memory accounting for skb * @skc_bound_dev_if: bound device index if != 0 * @skc_bind_node: bind hash linkage for various protocol lookup tables - * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol + * @skc_portaddr_node: second hash linkage for UDP * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket * @skc_v6_daddr: IPV6 destination address * @skc_v6_rcv_saddr: IPV6 source address * @skc_cookie: socket's cookie value * @skc_node: main hash linkage for various protocol lookup tables - * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol + * @skc_nulls_node: main hash linkage for TCP * @skc_tx_queue_mapping: tx queue number for this connection * @skc_rx_queue_mapping: rx queue number for this connection * @skc_flags: place holder for sk_flags @@ -341,6 +346,7 @@ struct sk_filter; * @sk_reuseport_cb: reuseport group container * @sk_bpf_storage: ptr to cache and control for bpf_sk_storage * @sk_rcu: used during RCU grace period + * @sk_freeptr: used for SLAB_TYPESAFE_BY_RCU managed sockets * @sk_clockid: clockid used by time-based scheduling (SO_TXTIME) * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME * @sk_txtime_report_errors: set report errors mode for SO_TXTIME @@ -536,7 +542,7 @@ struct sock { rwlock_t sk_callback_lock; u32 sk_ack_backlog; u32 sk_max_ack_backlog; - unsigned long sk_ino; + u64 sk_ino; spinlock_t sk_peer_lock; int sk_bind_phc; struct pid *sk_peer_pid; @@ -582,7 +588,14 @@ struct sock { struct bpf_local_storage __rcu *sk_bpf_storage; #endif struct numa_drop_counters *sk_drop_counters; - struct rcu_head sk_rcu; + /* sockets using SLAB_TYPESAFE_BY_RCU can use sk_freeptr. + * By the time kfree() is called, sk_rcu can not be in + * use and can be mangled. + */ + union { + struct rcu_head sk_rcu; + freeptr_t sk_freeptr; + }; netns_tracker ns_tracker; struct xarray sk_user_frags; @@ -1308,7 +1321,7 @@ struct proto { int (*sendmsg)(struct sock *sk, struct msghdr *msg, size_t len); int (*recvmsg)(struct sock *sk, struct msghdr *msg, - size_t len, int flags, int *addr_len); + size_t len, int flags); void (*splice_eof)(struct socket *sock); int (*bind)(struct sock *sk, struct sockaddr_unsized *addr, int addr_len); @@ -1368,6 +1381,7 @@ struct proto { struct kmem_cache *slab; unsigned int obj_size; + unsigned int freeptr_offset; unsigned int ipv6_pinfo_offset; slab_flags_t slab_flags; unsigned int useroffset; /* Usercopy region offset */ @@ -1378,7 +1392,6 @@ struct proto { union { struct inet_hashinfo *hashinfo; - struct udp_table *udp_table; struct raw_hashinfo *raw_hash; struct smc_hashinfo *smc_hash; } h; @@ -1700,7 +1713,6 @@ static inline void lock_sock(struct sock *sk) lock_sock_nested(sk, 0); } -void __lock_sock(struct sock *sk); void __release_sock(struct sock *sk); void release_sock(struct sock *sk); @@ -2089,7 +2101,7 @@ static inline int sk_rx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { - sk->sk_socket = sock; + WRITE_ONCE(sk->sk_socket, sock); if (sock) { WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid); WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino); @@ -2131,7 +2143,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) write_unlock_bh(&sk->sk_callback_lock); } -static inline unsigned long sock_i_ino(const struct sock *sk) +static inline u64 sock_i_ino(const struct sock *sk) { /* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */ return READ_ONCE(sk->sk_ino); @@ -2490,12 +2502,23 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, struct sk_buff *skb)); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb, - enum skb_drop_reason *reason); +enum skb_drop_reason +sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb); static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { - return sock_queue_rcv_skb_reason(sk, skb, NULL); + enum skb_drop_reason drop_reason = sock_queue_rcv_skb_reason(sk, skb); + + switch (drop_reason) { + case SKB_DROP_REASON_SOCKET_RCVBUFF: + return -ENOMEM; + case SKB_DROP_REASON_PROTO_MEM: + return -ENOBUFS; + case 0: + return 0; + default: + return -EPERM; + } } int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 8346b0d29542..ee500706496b 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -15,6 +15,7 @@ #define SWITCHDEV_F_NO_RECURSE BIT(0) #define SWITCHDEV_F_SKIP_EOPNOTSUPP BIT(1) #define SWITCHDEV_F_DEFER BIT(2) +#define SWITCHDEV_F_NO_FOREIGN BIT(3) enum switchdev_attr_id { SWITCHDEV_ATTR_ID_UNDEFINED, diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index c1a67149c6b6..e0fded18e18c 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h @@ -32,6 +32,7 @@ struct tcf_gate_params { s32 tcfg_clockid; size_t num_entries; struct list_head entries; + struct rcu_head rcu; }; #define GATE_ACT_GATE_OPEN BIT(0) @@ -39,7 +40,7 @@ struct tcf_gate_params { struct tcf_gate { struct tc_action common; - struct tcf_gate_params param; + struct tcf_gate_params __rcu *param; u8 current_gate_status; ktime_t current_close_time; u32 current_entry_octets; @@ -51,47 +52,65 @@ struct tcf_gate { #define to_gate(a) ((struct tcf_gate *)a) +static inline struct tcf_gate_params *tcf_gate_params_locked(const struct tc_action *a) +{ + struct tcf_gate *gact = to_gate(a); + + return rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->tcf_lock)); +} + static inline s32 tcf_gate_prio(const struct tc_action *a) { + struct tcf_gate_params *p; s32 tcfg_prio; - tcfg_prio = to_gate(a)->param.tcfg_priority; + p = tcf_gate_params_locked(a); + tcfg_prio = p->tcfg_priority; return tcfg_prio; } static inline u64 tcf_gate_basetime(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_basetime; - tcfg_basetime = to_gate(a)->param.tcfg_basetime; + p = tcf_gate_params_locked(a); + tcfg_basetime = p->tcfg_basetime; return tcfg_basetime; } static inline u64 tcf_gate_cycletime(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_cycletime; - tcfg_cycletime = to_gate(a)->param.tcfg_cycletime; + p = tcf_gate_params_locked(a); + tcfg_cycletime = p->tcfg_cycletime; return tcfg_cycletime; } static inline u64 tcf_gate_cycletimeext(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_cycletimeext; - tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext; + p = tcf_gate_params_locked(a); + tcfg_cycletimeext = p->tcfg_cycletime_ext; return tcfg_cycletimeext; } static inline u32 tcf_gate_num_entries(const struct tc_action *a) { + struct tcf_gate_params *p; u32 num_entries; - num_entries = to_gate(a)->param.num_entries; + p = tcf_gate_params_locked(a); + num_entries = p->num_entries; return num_entries; } @@ -105,7 +124,7 @@ static inline struct action_gate_entry u32 num_entries; int i = 0; - p = &to_gate(a)->param; + p = tcf_gate_params_locked(a); num_entries = p->num_entries; list_for_each_entry(entry, &p->entries, list) @@ -114,7 +133,7 @@ static inline struct action_gate_entry if (i != num_entries) return NULL; - oe = kcalloc(num_entries, sizeof(*oe), GFP_ATOMIC); + oe = kzalloc_objs(*oe, num_entries, GFP_ATOMIC); if (!oe) return NULL; diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index c7f24a2da1ca..24d4d5a62b3c 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -13,15 +13,13 @@ struct tcf_ife_params { u8 eth_src[ETH_ALEN]; u16 eth_type; u16 flags; - + struct list_head metalist; struct rcu_head rcu; }; struct tcf_ife_info { struct tc_action common; struct tcf_ife_params __rcu *params; - /* list of metaids allowed */ - struct list_head metalist; }; #define to_ife(a) ((struct tcf_ife_info *)a) diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h index ffe58a02537c..4ebb053bb0dd 100644 --- a/include/net/tc_wrapper.h +++ b/include/net/tc_wrapper.h @@ -12,7 +12,8 @@ #define TC_INDIRECT_SCOPE -extern struct static_key_false tc_skip_wrapper; +extern struct static_key_false tc_skip_wrapper_act; +extern struct static_key_false tc_skip_wrapper_cls; /* TC Actions */ #ifdef CONFIG_NET_CLS_ACT @@ -46,7 +47,7 @@ TC_INDIRECT_ACTION_DECLARE(tunnel_key_act); static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { - if (static_branch_likely(&tc_skip_wrapper)) + if (static_branch_likely(&tc_skip_wrapper_act)) goto skip; #if IS_BUILTIN(CONFIG_NET_ACT_GACT) @@ -153,7 +154,7 @@ TC_INDIRECT_FILTER_DECLARE(u32_classify); static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - if (static_branch_likely(&tc_skip_wrapper)) + if (static_branch_likely(&tc_skip_wrapper_cls)) goto skip; #if IS_BUILTIN(CONFIG_NET_CLS_BPF) @@ -202,8 +203,44 @@ skip: static inline void tc_wrapper_init(void) { #ifdef CONFIG_X86 - if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE)) - static_branch_enable(&tc_skip_wrapper); + int cnt_cls = IS_BUILTIN(CONFIG_NET_CLS_BPF) + + IS_BUILTIN(CONFIG_NET_CLS_U32) + + IS_BUILTIN(CONFIG_NET_CLS_FLOWER) + + IS_BUILTIN(CONFIG_NET_CLS_FW) + + IS_BUILTIN(CONFIG_NET_CLS_MATCHALL) + + IS_BUILTIN(CONFIG_NET_CLS_BASIC) + + IS_BUILTIN(CONFIG_NET_CLS_CGROUP) + + IS_BUILTIN(CONFIG_NET_CLS_FLOW) + + IS_BUILTIN(CONFIG_NET_CLS_ROUTE4); + + int cnt_act = IS_BUILTIN(CONFIG_NET_ACT_GACT) + + IS_BUILTIN(CONFIG_NET_ACT_MIRRED) + + IS_BUILTIN(CONFIG_NET_ACT_PEDIT) + + IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT) + + IS_BUILTIN(CONFIG_NET_ACT_SKBMOD) + + IS_BUILTIN(CONFIG_NET_ACT_POLICE) + + IS_BUILTIN(CONFIG_NET_ACT_BPF) + + IS_BUILTIN(CONFIG_NET_ACT_CONNMARK) + + IS_BUILTIN(CONFIG_NET_ACT_CSUM) + + IS_BUILTIN(CONFIG_NET_ACT_CT) + + IS_BUILTIN(CONFIG_NET_ACT_CTINFO) + + IS_BUILTIN(CONFIG_NET_ACT_GATE) + + IS_BUILTIN(CONFIG_NET_ACT_MPLS) + + IS_BUILTIN(CONFIG_NET_ACT_NAT) + + IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY) + + IS_BUILTIN(CONFIG_NET_ACT_VLAN) + + IS_BUILTIN(CONFIG_NET_ACT_IFE) + + IS_BUILTIN(CONFIG_NET_ACT_SIMP) + + IS_BUILTIN(CONFIG_NET_ACT_SAMPLE); + + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) + return; + + if (cnt_cls > 1) + static_branch_enable(&tc_skip_wrapper_cls); + + if (cnt_act > 1) + static_branch_enable(&tc_skip_wrapper_act); #endif } diff --git a/include/net/tcp.h b/include/net/tcp.h index 0deb5e9dd911..ecbadcb3a744 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -43,6 +43,7 @@ #include <net/dst.h> #include <net/mptcp.h> #include <net/xfrm.h> +#include <net/secure_seq.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> @@ -347,13 +348,21 @@ extern struct proto tcp_prot; #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) +/* + * TCP splice context + */ +struct tcp_splice_state { + struct pipe_inode_info *pipe; + size_t len; + unsigned int flags; +}; + void tcp_tsq_work_init(void); int tcp_v4_err(struct sk_buff *skb, u32); void tcp_shutdown(struct sock *sk, int how); -int tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); void tcp_remove_empty_skb(struct sock *sk); @@ -366,7 +375,21 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags); int tcp_wmem_schedule(struct sock *sk, int copy); void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, int size_goal); + void tcp_release_cb(struct sock *sk); + +static inline bool tcp_release_cb_cond(struct sock *sk) +{ +#ifdef CONFIG_INET + if (likely(sk->sk_prot->release_cb == tcp_release_cb)) { + if (unlikely(smp_load_acquire(&sk->sk_tsq_flags) & TCP_DEFERRED_ALL)) + tcp_release_cb(sk); + return true; + } +#endif + return false; +} + void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); void tcp_delack_timer_handler(struct sock *sk); @@ -378,6 +401,8 @@ void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); void tcp_twsk_purge(struct list_head *net_exit_list); +int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, + unsigned int offset, size_t len); ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); @@ -489,11 +514,19 @@ void tcp_reset_keepalive_timer(struct sock *sk, unsigned long timeout); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len); + int flags); int tcp_set_rcvlowat(struct sock *sk, int val); +void tcp_set_rcvbuf(struct sock *sk, int val); int tcp_set_window_clamp(struct sock *sk, int val); -void tcp_update_recv_tstamps(struct sk_buff *skb, - struct scm_timestamping_internal *tss); + +static inline void +tcp_update_recv_tstamps(struct sk_buff *skb, + struct scm_timestamping_internal *tss) +{ + tss->ts[0] = skb->tstamp; + tss->ts[2] = skb_hwtstamps(skb)->hwtstamp; +} + void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, struct scm_timestamping_internal *tss); void tcp_data_ready(struct sock *sk); @@ -520,7 +553,6 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, * TCP v4 functions exported for the inet6 API */ -void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); void tcp_v4_mtu_reduced(struct sock *sk); void tcp_req_err(struct sock *sk, u32 seq, bool abort); void tcp_ld_RTO_revert(struct sock *sk, u32 seq); @@ -533,7 +565,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req); + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)); int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int tcp_connect(struct sock *sk); @@ -541,6 +575,7 @@ enum tcp_synack_type { TCP_SYNACK_NORMAL, TCP_SYNACK_FASTOPEN, TCP_SYNACK_COOKIE, + TCP_SYNACK_RETRANS, }; struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, @@ -751,7 +786,15 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_done_with_error(struct sock *sk, int err); void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_fin(struct sock *sk); -void tcp_check_space(struct sock *sk); +void __tcp_check_space(struct sock *sk); +static inline void tcp_check_space(struct sock *sk) +{ + /* pairs with tcp_poll() */ + smp_mb(); + + if (sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + __tcp_check_space(sk); +} void tcp_sack_compress_send_ack(struct sock *sk); static inline void tcp_cleanup_skb(struct sk_buff *skb) @@ -809,6 +852,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) /* tcp.c */ void tcp_get_info(struct sock *, struct tcp_info *); +void tcp_rate_check_app_limited(struct sock *sk); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, @@ -891,6 +935,28 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp) return (u32) win; } +/* Compute the maximum receive window we ever advertised. + * Rcv_nxt can be after the window if our peer push more data + * than the offered window. + */ +static inline u32 tcp_max_receive_window(const struct tcp_sock *tp) +{ + s32 win = tp->rcv_mwnd_seq - tp->rcv_nxt; + + if (win < 0) + win = 0; + return (u32) win; +} + +/* Check if we need to update the maximum receive window sequence number */ +static inline void tcp_update_max_rcv_wnd_seq(struct tcp_sock *tp) +{ + u32 wre = tp->rcv_wup + tp->rcv_wnd; + + if (after(wre, tp->rcv_mwnd_seq)) + tp->rcv_mwnd_seq = wre; +} + /* Choose a new window, without checks for shrinking, and without * scaling applied to the result. The caller does these things * if necessary. This is a "raw" window selection. @@ -1111,9 +1177,7 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb) extern const struct inet_connection_sock_af_ops ipv6_specific; -INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); -void tcp_v6_early_demux(struct sk_buff *skb); #endif @@ -1203,7 +1267,15 @@ enum tcp_ca_ack_event_flags { #define TCP_CONG_NON_RESTRICTED BIT(0) /* Requires ECN/ECT set on all packets */ #define TCP_CONG_NEEDS_ECN BIT(1) -#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) +/* Require successfully negotiated AccECN capability */ +#define TCP_CONG_NEEDS_ACCECN BIT(2) +/* Use ECT(1) instead of ECT(0) while the CA is uninitialized */ +#define TCP_CONG_ECT_1_NEGOTIATION BIT(3) +/* Cannot fallback to RFC3168 during AccECN negotiation */ +#define TCP_CONG_NO_FALLBACK_RFC3168 BIT(4) +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN | \ + TCP_CONG_NEEDS_ACCECN | TCP_CONG_ECT_1_NEGOTIATION | \ + TCP_CONG_NO_FALLBACK_RFC3168) union tcp_cc_info; @@ -1243,33 +1315,45 @@ struct rate_sample { struct tcp_congestion_ops { /* fast path fields are put first to fill one cache line */ + /* A congestion control (CC) must provide one of either: + * + * (a) a cong_avoid function, if the CC wants to use the core TCP + * stack's default functionality to implement a "classic" + * (Reno/CUBIC-style) response to packet loss, RFC3168 ECN, + * idle periods, pacing rate computations, etc. + * + * (b) a cong_control function, if the CC wants custom behavior and + * complete control of all congestion control behaviors. + */ + /* (a) "classic" response: calculate new cwnd. + */ + void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked); + /* (b) "custom" response: call when packets are delivered to update + * cwnd and pacing rate, after all the ca_state processing. + */ + void (*cong_control)(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs); + /* return slow start threshold (required) */ u32 (*ssthresh)(struct sock *sk); - /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked); - /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); + /* call when CA_EVENT_TX_START cwnd event occurs (optional) */ + void (*cwnd_event_tx_start)(struct sock *sk); + /* call when ack arrives (optional) */ void (*in_ack_event)(struct sock *sk, u32 flags); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - /* override sysctl_tcp_min_tso_segs */ + /* override sysctl_tcp_min_tso_segs (optional) */ u32 (*min_tso_segs)(struct sock *sk); - /* call when packets are delivered to update cwnd and pacing rate, - * after all the ca_state processing. (optional) - */ - void (*cong_control)(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs); - - /* new value of cwnd after loss (required) */ u32 (*undo_cwnd)(struct sock *sk); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ @@ -1335,10 +1419,36 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk) return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN; } +static inline bool tcp_ca_needs_accecn(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ACCECN; +} + +static inline bool tcp_ca_ect_1_negotiation(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & TCP_CONG_ECT_1_NEGOTIATION; +} + +static inline bool tcp_ca_no_fallback_rfc3168(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & TCP_CONG_NO_FALLBACK_RFC3168; +} + static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { const struct inet_connection_sock *icsk = inet_csk(sk); + if (event == CA_EVENT_TX_START) { + if (icsk->icsk_ca_ops->cwnd_event_tx_start) + icsk->icsk_ca_ops->cwnd_event_tx_start(sk); + return; + } if (icsk->icsk_ca_ops->cwnd_event) icsk->icsk_ca_ops->cwnd_event(sk, event); } @@ -1346,13 +1456,6 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) /* From tcp_cong.c */ void tcp_set_ca_state(struct sock *sk, const u8 ca_state); -/* From tcp_rate.c */ -void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); -void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, - struct rate_sample *rs); -void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - bool is_sack_reneg, struct rate_sample *rs); -void tcp_rate_check_app_limited(struct sock *sk); static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) { @@ -1410,7 +1513,7 @@ static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp) static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) { WARN_ON_ONCE((int)val <= 0); - tp->snd_cwnd = val; + WRITE_ONCE(tp->snd_cwnd, val); } static inline bool tcp_in_slow_start(const struct tcp_sock *tp) @@ -1578,11 +1681,16 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) __skb_checksum_complete(skb); } -bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, - enum skb_drop_reason *reason); +enum skb_drop_reason tcp_add_backlog(struct sock *sk, struct sk_buff *skb); + +static inline enum skb_drop_reason +tcp_filter(struct sock *sk, struct sk_buff *skb) +{ + const struct tcphdr *th = (const struct tcphdr *)skb->data; + return sk_filter_trim_cap(sk, skb, __tcp_hdrlen(th)); +} -int tcp_filter(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason); void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); int tcp_abort(struct sock *sk, int err); @@ -2095,7 +2203,34 @@ enum tcp_chrono { __TCP_CHRONO_MAX, }; -void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type); +static inline void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) +{ + const u32 now = tcp_jiffies32; + enum tcp_chrono old = tp->chrono_type; + + /* Following WRITE_ONCE()s pair with READ_ONCE()s in + * tcp_get_info_chrono_stats(). + */ + if (old > TCP_CHRONO_UNSPEC) + WRITE_ONCE(tp->chrono_stat[old - 1], + tp->chrono_stat[old - 1] + now - tp->chrono_start); + WRITE_ONCE(tp->chrono_start, now); + WRITE_ONCE(tp->chrono_type, new); +} + +static inline void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* If there are multiple conditions worthy of tracking in a + * chronograph then the highest priority enum takes precedence + * over the other conditions. So that if something "more interesting" + * starts happening, stop the previous chrono and start a new one. + */ + if (type > tp->chrono_type) + tcp_chrono_set(tp, type); +} + void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type); /* This helper is needed, because skb->tcp_tsorted_anchor uses @@ -2318,15 +2453,21 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, struct tcphdr *th); INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); -INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); #ifdef CONFIG_INET void tcp_gro_complete(struct sk_buff *skb); #else static inline void tcp_gro_complete(struct sk_buff *skb) { } #endif -void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); +static inline void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, + __be32 daddr) +{ + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); +} static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { @@ -2406,8 +2547,9 @@ struct tcp_request_sock_ops { struct flowi *fl, struct request_sock *req, u32 tw_isn); - u32 (*init_seq)(const struct sk_buff *skb); - u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); + union tcp_seq_and_ts_off (*init_seq_and_ts_off)( + const struct net *net, + const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, @@ -2513,10 +2655,7 @@ void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced); extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd); extern bool tcp_rack_mark_lost(struct sock *sk); -extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, - u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); -extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs); /* tcp_plb.c */ @@ -2942,4 +3081,18 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk, const void *saddr, const void *daddr, int family, int dif, int sdif); +static inline int tcp_recv_should_stop(struct sock *sk) +{ + return sk->sk_err || + sk->sk_state == TCP_CLOSE || + (sk->sk_shutdown & RCV_SHUTDOWN) || + signal_pending(current); +} + +INDIRECT_CALLABLE_DECLARE(union tcp_seq_and_ts_off + tcp_v4_init_seq_and_ts_off(const struct net *net, + const struct sk_buff *skb)); +INDIRECT_CALLABLE_DECLARE(union tcp_seq_and_ts_off + tcp_v6_init_seq_and_ts_off(const struct net *net, + const struct sk_buff *skb)); #endif /* _TCP_H */ diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index f13e5cd2b1ac..865d5c5a7718 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -29,8 +29,15 @@ enum tcp_accecn_option { TCP_ACCECN_OPTION_DISABLED = 0, TCP_ACCECN_OPTION_MINIMUM = 1, TCP_ACCECN_OPTION_FULL = 2, + TCP_ACCECN_OPTION_PERSIST = 3, }; +/* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */ +static inline void INET_ECN_xmit_ect_1_negotiation(struct sock *sk) +{ + __INET_ECN_xmit(sk, tcp_ca_ect_1_negotiation(sk)); +} + static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) { /* Do not set CWR if in AccECN mode! */ @@ -60,12 +67,6 @@ static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; } -/* tp->accecn_fail_mode */ -#define TCP_ACCECN_ACE_FAIL_SEND BIT(0) -#define TCP_ACCECN_ACE_FAIL_RECV BIT(1) -#define TCP_ACCECN_OPT_FAIL_SEND BIT(2) -#define TCP_ACCECN_OPT_FAIL_RECV BIT(3) - static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp) { return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND; @@ -91,11 +92,6 @@ static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode) tp->accecn_fail_mode |= mode; } -#define TCP_ACCECN_OPT_NOT_SEEN 0x0 -#define TCP_ACCECN_OPT_EMPTY_SEEN 0x1 -#define TCP_ACCECN_OPT_COUNTER_SEEN 0x2 -#define TCP_ACCECN_OPT_FAIL_SEEN 0x3 - static inline u8 tcp_accecn_ace(const struct tcphdr *th) { return (th->ae << 2) | (th->cwr << 1) | th->ece; @@ -169,7 +165,9 @@ static inline void tcp_accecn_third_ack(struct sock *sk, switch (ace) { case 0x0: /* Invalid value */ - tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV); + if (!TCP_SKB_CB(skb)->sacked) + tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV | + TCP_ACCECN_OPT_FAIL_RECV); break; case 0x7: case 0x5: @@ -183,7 +181,7 @@ static inline void tcp_accecn_third_ack(struct sock *sk, tcp_accecn_validate_syn_feedback(sk, ace, sent_ect)) { if ((tcp_accecn_extract_syn_ect(ace) == INET_ECN_CE) && !tp->delivered_ce) - tp->delivered_ce++; + WRITE_ONCE(tp->delivered_ce, 1); } break; } @@ -398,6 +396,7 @@ static inline void tcp_accecn_init_counters(struct tcp_sock *tp) tp->received_ce_pending = 0; __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes); __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes); + tp->accecn_opt_sent_w_dsack = 0; tp->accecn_minlen = 0; tp->accecn_opt_demand = 0; tp->est_ecnfield = 0; @@ -467,6 +466,26 @@ static inline u8 tcp_accecn_option_init(const struct sk_buff *skb, return TCP_ACCECN_OPT_COUNTER_SEEN; } +static inline void tcp_ecn_rcv_synack_accecn(struct sock *sk, + const struct sk_buff *skb, u8 dsf) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); + tp->syn_ect_rcv = dsf & INET_ECN_MASK; + /* Demand Accurate ECN option in response to the SYN on the SYN/ACK + * and the TCP server will try to send one more packet with an AccECN + * Option at a later point during the connection. + */ + if (tp->rx_opt.accecn && + tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) { + u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn); + + tcp_accecn_saw_opt_fail_recv(tp, saw_opt); + tp->accecn_opt_demand = 2; + } +} + /* See Table 2 of the AccECN draft */ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb, const struct tcphdr *th, u8 ip_dsfield) @@ -489,32 +508,32 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); break; case 0x1: - case 0x5: /* +========+========+============+=============+ * | A | B | SYN/ACK | Feedback | * | | | B->A | Mode of A | * | | | AE CWR ECE | | * +========+========+============+=============+ - * | AccECN | Nonce | 1 0 1 | (Reserved) | * | AccECN | ECN | 0 0 1 | Classic ECN | * | Nonce | AccECN | 0 0 1 | Classic ECN | * | ECN | AccECN | 0 0 1 | Classic ECN | * +========+========+============+=============+ */ - if (tcp_ecn_mode_pending(tp)) - /* Downgrade from AccECN, or requested initially */ + if (tcp_ca_no_fallback_rfc3168(sk)) + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); + else tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); break; - default: - tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); - tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK; - if (tp->rx_opt.accecn && - tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) { - u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn); - - tcp_accecn_saw_opt_fail_recv(tp, saw_opt); - tp->accecn_opt_demand = 2; + case 0x5: + if (tcp_ecn_mode_pending(tp)) { + tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield); + if (INET_ECN_is_ce(ip_dsfield)) { + tp->received_ce++; + tp->received_ce_pending++; + } } + break; + default: + tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield); if (INET_ECN_is_ce(ip_dsfield) && tcp_accecn_validate_syn_feedback(sk, ace, tp->syn_ect_snt)) { @@ -525,9 +544,11 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb } } -static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th, +static inline void tcp_ecn_rcv_syn(struct sock *sk, const struct tcphdr *th, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + if (tcp_ecn_mode_pending(tp)) { if (!tcp_accecn_syn_requested(th)) { /* Downgrade to classic ECN feedback */ @@ -539,7 +560,8 @@ static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th, tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); } } - if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr)) + if (tcp_ecn_mode_rfc3168(tp) && + (!th->ece || !th->cwr || tcp_ca_no_fallback_rfc3168(sk))) tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); } @@ -561,7 +583,7 @@ static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; else if (tcp_ca_needs_ecn(sk) || tcp_bpf_ca_needs_ecn(sk)) - INET_ECN_xmit(sk); + INET_ECN_xmit_ect_1_negotiation(sk); if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) { TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE; @@ -579,7 +601,8 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) bool use_ecn, use_accecn; u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn); - use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN; + use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN || + tcp_ca_needs_accecn(sk); use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN || tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN || tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn; @@ -595,7 +618,7 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) if (use_ecn) { if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) - INET_ECN_xmit(sk); + INET_ECN_xmit_ect_1_negotiation(sk); TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; if (use_accecn) { @@ -619,12 +642,22 @@ static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) } static inline void -tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) +tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, + enum tcp_synack_type synack_type) { - if (tcp_rsk(req)->accecn_ok) - tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv); - else if (inet_rsk(req)->ecn_ok) - th->ece = 1; + /* Accurate ECN shall retransmit SYN/ACK with ACE=0 if the + * previously retransmitted SYN/ACK also times out. + */ + if (!req->num_timeout || synack_type != TCP_SYNACK_RETRANS) { + if (tcp_rsk(req)->accecn_ok) + tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv); + else if (inet_rsk(req)->ecn_ok) + th->ece = 1; + } else if (tcp_rsk(req)->accecn_ok) { + th->ae = 0; + th->cwr = 0; + th->ece = 0; + } } static inline bool tcp_accecn_option_beacon_check(const struct sock *sk) diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 1a97e3f32029..c0a421fe0c2a 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -8,7 +8,6 @@ /* IPv6 transport protocols */ extern struct proto rawv6_prot; extern struct proto udpv6_prot; -extern struct proto udplitev6_prot; extern struct proto tcpv6_prot; extern struct proto pingv6_prot; @@ -28,8 +27,6 @@ int rawv6_init(void); void rawv6_exit(void); int udpv6_init(void); void udpv6_exit(void); -int udplitev6_init(void); -void udplitev6_exit(void); int tcpv6_init(void); void tcpv6_exit(void); diff --git a/include/net/tso.h b/include/net/tso.h index e7e157ae0526..da82aabd1d48 100644 --- a/include/net/tso.h +++ b/include/net/tso.h @@ -3,6 +3,7 @@ #define _TSO_H #include <linux/skbuff.h> +#include <linux/dma-mapping.h> #include <net/ip.h> #define TSO_HEADER_SIZE 256 @@ -28,4 +29,103 @@ void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size); int tso_start(struct sk_buff *skb, struct tso_t *tso); +/** + * struct tso_dma_map - DMA mapping state for GSO payload + * @dev: device used for DMA mapping + * @skb: the GSO skb being mapped + * @hdr_len: per-segment header length + * @iova_state: DMA IOVA state (when IOMMU available) + * @iova_offset: global byte offset into IOVA range (IOVA path only) + * @total_len: total payload length + * @frag_idx: current region (-1 = linear, 0..nr_frags-1 = frag) + * @offset: byte offset within current region + * @linear_dma: DMA address of the linear payload + * @linear_len: length of the linear payload + * @nr_frags: number of frags successfully DMA-mapped + * @frags: per-frag DMA address and length + * + * DMA-maps the payload regions of a GSO skb (linear data + frags). + * Prefers the DMA IOVA API for a single contiguous mapping with one + * IOTLB sync; falls back to per-region dma_map_phys() otherwise. + */ +struct tso_dma_map { + struct device *dev; + const struct sk_buff *skb; + unsigned int hdr_len; + /* IOVA path */ + struct dma_iova_state iova_state; + size_t iova_offset; + size_t total_len; + /* Fallback path if IOVA path fails */ + int frag_idx; + unsigned int offset; + dma_addr_t linear_dma; + unsigned int linear_len; + unsigned int nr_frags; + struct { + dma_addr_t dma; + unsigned int len; + } frags[MAX_SKB_FRAGS]; +}; + +/** + * struct tso_dma_map_completion_state - Completion-time cleanup state + * @iova_state: DMA IOVA state (when IOMMU available) + * @total_len: total payload length of the IOVA mapping + * + * Drivers store this on their SW ring at xmit time via + * tso_dma_map_completion_save(), then call tso_dma_map_complete() at + * completion time. + */ +struct tso_dma_map_completion_state { + struct dma_iova_state iova_state; + size_t total_len; +}; + +int tso_dma_map_init(struct tso_dma_map *map, struct device *dev, + const struct sk_buff *skb, unsigned int hdr_len); +void tso_dma_map_cleanup(struct tso_dma_map *map); +unsigned int tso_dma_map_count(struct tso_dma_map *map, unsigned int len); +bool tso_dma_map_next(struct tso_dma_map *map, dma_addr_t *addr, + unsigned int *chunk_len, unsigned int *mapping_len, + unsigned int seg_remaining); + +/** + * tso_dma_map_completion_save - save state needed for completion-time cleanup + * @map: the xmit-time DMA map + * @cstate: driver-owned storage that persists until completion + * + * Should be called at xmit time to update the completion state and later passed + * to tso_dma_map_complete(). + */ +static inline void +tso_dma_map_completion_save(const struct tso_dma_map *map, + struct tso_dma_map_completion_state *cstate) +{ + cstate->iova_state = map->iova_state; + cstate->total_len = map->total_len; +} + +/** + * tso_dma_map_complete - tear down mapping at completion time + * @dev: the device that owns the mapping + * @cstate: state saved by tso_dma_map_completion_save() + * + * Return: true if the IOVA path was used and the mapping has been + * destroyed; false if the fallback per-region path was used and the + * driver must unmap via its normal completion path. + */ +static inline bool +tso_dma_map_complete(struct device *dev, + struct tso_dma_map_completion_state *cstate) +{ + if (dma_use_iova(&cstate->iova_state)) { + dma_iova_destroy(dev, &cstate->iova_state, cstate->total_len, + DMA_TO_DEVICE, 0); + return true; + } + + return false; +} + #endif /* _TSO_H */ diff --git a/include/net/udp.h b/include/net/udp.h index a061d1b22ddc..8262e2b215b4 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -29,13 +29,12 @@ #include <linux/seq_file.h> #include <linux/poll.h> #include <linux/indirect_call_wrapper.h> +#include <linux/math.h> /** - * struct udp_skb_cb - UDP(-Lite) private variables + * struct udp_skb_cb - UDP private variables * * @header: private variables used by IPv4/IPv6 - * @cscov: checksum coverage length (UDP-Lite only) - * @partial_cov: if set indicates partial csum coverage */ struct udp_skb_cb { union { @@ -44,8 +43,6 @@ struct udp_skb_cb { struct inet6_skb_parm h6; #endif } header; - __u16 cscov; - __u8 partial_cov; }; #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) @@ -104,7 +101,7 @@ struct udp_table { unsigned int log; }; extern struct udp_table udp_table; -void udp_table_init(struct udp_table *, const char *); + static inline struct udp_hslot *udp_hashslot(struct udp_table *table, const struct net *net, unsigned int num) @@ -215,13 +212,11 @@ extern int sysctl_udp_wmem_min; struct sk_buff; /* - * Generic checksumming routines for UDP(-Lite) v4 and v6 + * Generic checksumming routines for UDP v4 and v6 */ static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb) { - return (UDP_SKB_CB(skb)->cscov == skb->len ? - __skb_checksum_complete(skb) : - __skb_checksum_complete_head(skb, UDP_SKB_CB(skb)->cscov)); + return __skb_checksum_complete(skb); } static inline int udp_lib_checksum_complete(struct sk_buff *skb) @@ -272,7 +267,6 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) skb->csum = csum_partial(skb->data, sizeof(struct udphdr), skb->csum); skb_pull_rcsum(skb, sizeof(struct udphdr)); - UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr); } typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, @@ -281,6 +275,10 @@ typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, void udp_v6_early_demux(struct sk_buff *skb); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); +int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); +INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int flags)); + struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); @@ -294,8 +292,7 @@ static inline int udp_lib_init_sock(struct sock *sk) up->forward_threshold = sk->sk_rcvbuf >> 2; set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); - up->udp_prod_queue = kcalloc(nr_node_ids, sizeof(*up->udp_prod_queue), - GFP_KERNEL); + up->udp_prod_queue = kzalloc_objs(*up->udp_prod_queue, nr_node_ids); if (!up->udp_prod_queue) return -ENOMEM; for (int i = 0; i < nr_node_ids; i++) @@ -308,7 +305,7 @@ static inline void udp_drops_inc(struct sock *sk) numa_drop_add(&udp_sk(sk)->drop_counters, 1); } -/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ +/* hash routines shared between UDPv4/6 */ static inline int udp_lib_hash(struct sock *sk) { BUG(); @@ -377,7 +374,7 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, */ hash ^= hash << 16; - return htons((((u64) hash * (max - min)) >> 32) + min); + return htons(reciprocal_scale(hash, max - min + 1) + min); } static inline int udp_rqueue_get(struct sock *sk) @@ -416,6 +413,8 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_err(struct sk_buff *, u32); int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); +INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int flags)); void udp_splice_eof(struct socket *sock); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); @@ -423,7 +422,6 @@ int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, int *karg); -int udp_init_sock(struct sock *sk); int udp_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); @@ -439,9 +437,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, - __be16 sport, - __be32 daddr, __be16 dport, int dif, int sdif, - struct udp_table *tbl, struct sk_buff *skb); + __be16 sport, __be32 daddr, __be16 dport, + int dif, int sdif, struct sk_buff *skb); struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); struct sock *udp6_lib_lookup(const struct net *net, @@ -451,8 +448,7 @@ struct sock *udp6_lib_lookup(const struct net *net, struct sock *__udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, int sdif, struct udp_table *tbl, - struct sk_buff *skb); + int dif, int sdif, struct sk_buff *skb); struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); @@ -524,38 +520,28 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, } /* - * SNMP statistics for UDP and UDP-Lite + * SNMP statistics for UDP */ -#define UDP_INC_STATS(net, field, is_udplite) do { \ - if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ - else SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) -#define __UDP_INC_STATS(net, field, is_udplite) do { \ - if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ - else __SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) - -#define __UDP6_INC_STATS(net, field, is_udplite) do { \ - if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field);\ - else __SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ -} while(0) -#define UDP6_INC_STATS(net, field, __lite) do { \ - if (__lite) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \ - else SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ -} while(0) +#define __UDP_INC_STATS(net, field) \ + __SNMP_INC_STATS((net)->mib.udp_statistics, field) +#define UDP_INC_STATS(net, field) \ + SNMP_INC_STATS((net)->mib.udp_statistics, field) +#define __UDP6_INC_STATS(net, field) \ + __SNMP_INC_STATS((net)->mib.udp_stats_in6, field) +#define UDP6_INC_STATS(net, field) \ + SNMP_INC_STATS((net)->mib.udp_stats_in6, field) #if IS_ENABLED(CONFIG_IPV6) -#define __UDPX_MIB(sk, ipv4) \ -({ \ - ipv4 ? (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ - sock_net(sk)->mib.udp_statistics) : \ - (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_stats_in6 : \ - sock_net(sk)->mib.udp_stats_in6); \ -}) +#define __UDPX_MIB(sk, ipv4) \ + ({ \ + ipv4 ? sock_net(sk)->mib.udp_statistics : \ + sock_net(sk)->mib.udp_stats_in6; \ + }) #else -#define __UDPX_MIB(sk, ipv4) \ -({ \ - IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ - sock_net(sk)->mib.udp_statistics; \ -}) +#define __UDPX_MIB(sk, ipv4) \ + ({ \ + sock_net(sk)->mib.udp_statistics; \ + }) #endif #define __UDPX_INC_STATS(sk, field) \ @@ -564,7 +550,6 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, #ifdef CONFIG_PROC_FS struct udp_seq_afinfo { sa_family_t family; - struct udp_table *udp_table; }; struct udp_iter_state { @@ -576,9 +561,6 @@ void *udp_seq_start(struct seq_file *seq, loff_t *pos); void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos); void udp_seq_stop(struct seq_file *seq, void *v); -extern const struct seq_operations udp_seq_ops; -extern const struct seq_operations udp6_seq_ops; - int udp4_proc_init(void); void udp4_proc_exit(void); #endif /* CONFIG_PROC_FS */ @@ -649,9 +631,6 @@ drop: static inline void udp_post_segment_fix_csum(struct sk_buff *skb) { - /* UDP-lite can't land here - no GRO */ - WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov); - /* UDP packets generated with UDP_SEGMENT and traversing: * * UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx) @@ -665,7 +644,6 @@ static inline void udp_post_segment_fix_csum(struct sk_buff *skb) * a valid csum after the segmentation. * Additionally fixup the UDP CB. */ - UDP_SKB_CB(skb)->cscov = skb->len; if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid) skb->csum_valid = 1; } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 9acef2fbd2fd..47c23d4a1740 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -7,9 +7,13 @@ #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> -#include <net/ipv6_stubs.h> #endif +#define UDP_TUNNEL_PARTIAL_FEATURES NETIF_F_GSO_ENCAP_ALL +#define UDP_TUNNEL_STRIPPED_GSO_TYPES ((UDP_TUNNEL_PARTIAL_FEATURES | \ + NETIF_F_GSO_PARTIAL) >> \ + NETIF_F_GSO_SHIFT) + struct udp_port_cfg { u8 family; @@ -47,7 +51,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { - return 0; + return -EPFNOSUPPORT; } #endif @@ -145,6 +149,33 @@ void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, __be16 src_port, __be16 dst_port, bool nocheck, u16 ip6cb_flags); +static inline bool udp_tunnel_handle_partial(struct sk_buff *skb) +{ + bool double_encap = !!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL); + + /* + * If the skb went through partial segmentation, lower devices + * will not need to offload the related features - except for + * UDP_TUNNEL, that will be re-added by the later + * udp_tunnel_handle_offloads(). + */ + if (double_encap) + skb_shinfo(skb)->gso_type &= ~UDP_TUNNEL_STRIPPED_GSO_TYPES; + return double_encap; +} + +static inline void udp_tunnel_set_inner_protocol(struct sk_buff *skb, + bool double_encap, + __be16 inner_proto) +{ + /* + * The inner protocol has been set by the nested tunnel, don't + * overraid it. + */ + if (!double_encap) + skb_set_inner_protocol(skb, inner_proto); +} + void udp_tunnel_sock_release(struct socket *sock); struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, @@ -198,7 +229,7 @@ static inline void udp_tunnel_encap_enable(struct sock *sk) #if IS_ENABLED(CONFIG_IPV6) if (READ_ONCE(sk->sk_family) == PF_INET6) - ipv6_stub->udpv6_encap_enable(); + udpv6_encap_enable(); #endif udp_encap_enable(); } diff --git a/include/net/udplite.h b/include/net/udplite.h deleted file mode 100644 index 786919d29f8d..000000000000 --- a/include/net/udplite.h +++ /dev/null @@ -1,88 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Definitions for the UDP-Lite (RFC 3828) code. - */ -#ifndef _UDPLITE_H -#define _UDPLITE_H - -#include <net/ip6_checksum.h> -#include <net/udp.h> - -/* UDP-Lite socket options */ -#define UDPLITE_SEND_CSCOV 10 /* sender partial coverage (as sent) */ -#define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */ - -extern struct proto udplite_prot; -extern struct udp_table udplite_table; - -/* - * Checksum computation is all in software, hence simpler getfrag. - */ -static __inline__ int udplite_getfrag(void *from, char *to, int offset, - int len, int odd, struct sk_buff *skb) -{ - struct msghdr *msg = from; - return copy_from_iter_full(to, len, &msg->msg_iter) ? 0 : -EFAULT; -} - -/* - * Checksumming routines - */ -static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) -{ - u16 cscov; - - /* In UDPv4 a zero checksum means that the transmitter generated no - * checksum. UDP-Lite (like IPv6) mandates checksums, hence packets - * with a zero checksum field are illegal. */ - if (uh->check == 0) { - net_dbg_ratelimited("UDPLite: zeroed checksum field\n"); - return 1; - } - - cscov = ntohs(uh->len); - - if (cscov == 0) /* Indicates that full coverage is required. */ - ; - else if (cscov < 8 || cscov > skb->len) { - /* - * Coverage length violates RFC 3828: log and discard silently. - */ - net_dbg_ratelimited("UDPLite: bad csum coverage %d/%d\n", - cscov, skb->len); - return 1; - - } else if (cscov < skb->len) { - UDP_SKB_CB(skb)->partial_cov = 1; - UDP_SKB_CB(skb)->cscov = cscov; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; - skb->csum_valid = 0; - } - - return 0; -} - -/* Fast-path computation of checksum. Socket may not be locked. */ -static inline __wsum udplite_csum(struct sk_buff *skb) -{ - const int off = skb_transport_offset(skb); - const struct sock *sk = skb->sk; - int len = skb->len - off; - - if (udp_test_bit(UDPLITE_SEND_CC, sk)) { - u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen); - - if (pcslen < len) { - if (pcslen > 0) - len = pcslen; - udp_hdr(skb)->len = htons(pcslen); - } - } - skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ - - return skb_checksum(skb, off, len, 0); -} - -void udplite4_register(void); -#endif /* _UDPLITE_H */ diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 23e8861e8b25..ebac60a3d8a1 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -14,7 +14,7 @@ #include <linux/mm.h> #include <net/sock.h> -#define XDP_UMEM_SG_FLAG (1 << 1) +#define XDP_UMEM_SG_FLAG BIT(3) struct net_device; struct xsk_queue; diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 242e34f771cc..46797645a0c2 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -41,16 +41,42 @@ static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool) return XDP_PACKET_HEADROOM + pool->headroom; } +static inline u32 xsk_pool_get_tailroom(bool mbuf) +{ + return mbuf ? SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : 0; +} + static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool) { return pool->chunk_size; } -static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) +static inline u32 __xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) { return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool); } +static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) +{ + u32 frame_size = __xsk_pool_get_rx_frame_size(pool); + struct xdp_umem *umem = pool->umem; + bool mbuf; + + /* Reserve tailroom only for zero-copy pools that opted into + * multi-buffer. The reserved area is used for skb_shared_info, + * matching the XDP core's xdp_data_hard_end() layout. + */ + mbuf = pool->dev && (umem->flags & XDP_UMEM_SG_FLAG); + frame_size -= xsk_pool_get_tailroom(mbuf); + + return ALIGN_DOWN(frame_size, 128); +} + +static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) +{ + return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool); +} + static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { @@ -122,7 +148,7 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) goto out; list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { - list_del(&pos->list_node); + list_del_init(&pos->list_node); xp_free(pos); } @@ -157,7 +183,7 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) frag = list_first_entry_or_null(&xskb->pool->xskb_list, struct xdp_buff_xsk, list_node); if (frag) { - list_del(&frag->list_node); + list_del_init(&frag->list_node); ret = &frag->xdp; } @@ -168,7 +194,7 @@ static inline void xsk_buff_del_frag(struct xdp_buff *xdp) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); - list_del(&xskb->list_node); + list_del_init(&xskb->list_node); } static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first) @@ -337,6 +363,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) return 0; } +static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) +{ + return 0; +} + static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0a14daaa5dd4..10d3edde6b2f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1156,19 +1156,19 @@ struct xfrm_offload { #define CRYPTO_INVALID_PROTOCOL 128 /* Used to keep whole l2 header for transport mode GRO */ - __u32 orig_mac_len; + __u16 orig_mac_len; __u8 proto; __u8 inner_ipproto; }; struct sec_path { - int len; - int olen; - int verified_cnt; - struct xfrm_state *xvec[XFRM_MAX_DEPTH]; struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH]; + + u8 len; + u8 olen; + u8 verified_cnt; }; struct sec_path *secpath_set(struct sk_buff *skb); diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 92a2358c6ce3..ccb3b350001f 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -90,11 +90,6 @@ struct xsk_buff_pool { * destructor callback. */ spinlock_t cq_prod_lock; - /* Mutual exclusion of the completion ring in the SKB mode. - * Protect: when sockets share a single cq when the same netdev - * and queue id is shared. - */ - spinlock_t cq_cached_prod_lock; struct xdp_buff_xsk *free_heads[]; }; @@ -179,13 +174,6 @@ static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool, dma_sync_single_for_device(pool->dev, dma, size, DMA_BIDIRECTIONAL); } -/* Masks for xdp_umem_page flags. - * The low 12-bits of the addr will be 0 since this is the page address, so we - * can use them for flags. - */ -#define XSK_NEXT_PG_CONTIG_SHIFT 0 -#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT) - static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool, u64 addr, u32 len) { |
