From a36283e2b683f172aa1760c77325e50b16c0f792 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 7 Apr 2025 17:45:41 +0200
Subject: udp_tunnel: create a fastpath GRO lookup.

Most UDP tunnels bind a socket to a local port, with ANY address, no
peer and no interface index specified.
Additionally it's quite common to have a single tunnel device per
namespace.

Track in each namespace the UDP tunnel socket respecting the above.
When only a single one is present, store a reference in the netns.

When such reference is not NULL, UDP tunnel GRO lookup just need to
match the incoming packet destination port vs the socket local port.

The tunnel socket never sets the reuse[port] flag[s]. When bound to no
address and interface, no other socket can exist in the same netns
matching the specified local port.

Matching packets with non-local destination addresses will be
aggregated, and eventually segmented as needed - no behavior changes
intended.

Restrict the optimization to kernel sockets only: it covers all the
relevant use-cases, and user-space owned sockets could be disconnected
and rebound after setup_udp_tunnel_sock(), breaking the uniqueness
assumption

Note that the UDP tunnel socket reference is stored into struct
netns_ipv4 for both IPv4 and IPv6 tunnels. That is intentional to keep
all the fastpath-related netns fields in the same struct and allow
cacheline-based optimization. Currently both the IPv4 and IPv6 socket
pointer share the same cacheline as the `udp_table` field.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/41d16bc8d1257d567f9344c445b4ae0b4a91ede4.1744040675.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/udp.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 0807e21cfec9..895240177f4f 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -101,6 +101,13 @@ struct udp_sock {
 
 	/* Cache friendly copy of sk->sk_peek_off >= 0 */
 	bool		peeking_with_offset;
+
+	/*
+	 * Accounting for the tunnel GRO fastpath.
+	 * Unprotected by compilers guard, as it uses space available in
+	 * the last UDP socket cacheline.
+	 */
+	struct hlist_node	tunnel_list;
 };
 
 #define udp_test_bit(nr, sk)			\
@@ -219,4 +226,13 @@ static inline void udp_allow_gso(struct sock *sk)
 
 #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)
 
+static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6)
+{
+#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
+	return rcu_dereference(net->ipv4.udp_tunnel_gro[is_ipv6].sk);
+#else
+	return NULL;
+#endif
+}
+
 #endif	/* _LINUX_UDP_H */
-- 
cgit v1.2.3


From 420aabef3ab5fa743afb4d3d391f03ef0e777ca8 Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Mon, 7 Apr 2025 21:01:02 +0200
Subject: net: Drop unused @sk of __skb_try_recv_from_queue()

__skb_try_recv_from_queue() deals with a queue, @sk is not used
since commit  e427cad6eee4 ("net: datagram: drop 'destructor'
argument from several helpers"). Remove sk from function parameters,
adapt callers.

No functional change intended.

Signed-off-by: Michal Luczaj <mhal@rbox.co>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Link: https://patch.msgid.link/20250407-cleanup-drop-param-sk-v1-1-cd076979afac@rbox.co
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b974a277975a..f1381aff0f89 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4105,8 +4105,7 @@ static inline void skb_frag_list_init(struct sk_buff *skb)
 int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
 				int *err, long *timeo_p,
 				const struct sk_buff *skb);
-struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
-					  struct sk_buff_head *queue,
+struct sk_buff *__skb_try_recv_from_queue(struct sk_buff_head *queue,
 					  unsigned int flags,
 					  int *off, int *err,
 					  struct sk_buff **last);
-- 
cgit v1.2.3


From a82dc19db13649aa4232ce37cb6f4ceff851e2fe Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 8 Apr 2025 12:59:48 -0700
Subject: net: avoid potential race between netdev_get_by_index_lock() and
 netns switch

netdev_get_by_index_lock() performs following steps:

  rcu_lock();
  dev = lookup(netns, ifindex);
  dev_get(dev);
  rcu_unlock();
  [... lock & validate the dev ...]
  return dev

Validation right now only checks if the device is registered but since
the lookup is netns-aware we must also protect against the device
switching netns right after we dropped the RCU lock. Otherwise
the caller in netns1 may get a pointer to a device which has just
switched to netns2.

We can't hold the lock for the entire netns change process (because of
the NETDEV_UNREGISTER notifier), and there's no existing marking to
indicate that the netns is unlisted because of netns move, so add one.

AFAIU none of the existing netdev_get_by_index_lock() callers can
suffer from this problem (NAPI code double checks the netns membership
and other callers are either under rtnl_lock or not ns-sensitive),
so this patch does not have to be treated as a fix.

Reviewed-by: Joe Damato <jdamato@fastly.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250408195956.412733-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf3b6445817b..8e9be80bc167 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1952,6 +1952,7 @@ enum netdev_reg_state {
  *	@priv_destructor:	Called from unregister
  *	@npinfo:		XXX: need comments on this one
  * 	@nd_net:		Network namespace this network device is inside
+ *				protected by @lock
  *
  * 	@ml_priv:	Mid-layer private
  *	@ml_priv_type:  Mid-layer private type
@@ -2359,6 +2360,9 @@ struct net_device {
 
 	bool dismantle;
 
+	/** @moving_ns: device is changing netns, protected by @lock */
+	bool moving_ns;
+
 	enum {
 		RTNL_LINK_INITIALIZED,
 		RTNL_LINK_INITIALIZING,
@@ -2521,7 +2525,7 @@ struct net_device {
 	 *	@net_shaper_hierarchy, @reg_state, @threaded
 	 *
 	 * Double protects:
-	 *	@up
+	 *	@up, @moving_ns, @nd_net
 	 *
 	 * Double ops protects:
 	 *	@real_num_rx_queues, @real_num_tx_queues
-- 
cgit v1.2.3


From 606048cbd8346e616cfaee01b0143d072534136d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 8 Apr 2025 12:59:49 -0700
Subject: net: designate XSK pool pointers in queues as "ops protected"

Read accesses go via xsk_get_pool_from_qid(), the call coming
from the core and gve look safe (other "ops locked" drivers
don't support XSK).

Write accesses go via xsk_reg_pool_at_qid() and xsk_clear_pool_at_qid().
Former is already under the ops lock, latter is not (both coming from
the workqueue via xp_clear_dev() and NETDEV_UNREGISTER via xsk_notifier()).

Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250408195956.412733-3-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8e9be80bc167..7242fb8a22fc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -688,6 +688,7 @@ struct netdev_queue {
 	/* Subordinate device that the queue has been assigned to */
 	struct net_device	*sb_dev;
 #ifdef CONFIG_XDP_SOCKETS
+	/* "ops protected", see comment about net_device::lock */
 	struct xsk_buff_pool    *pool;
 #endif
 
-- 
cgit v1.2.3


From 03df156dd3a6d5992f17682cd5c3b11e5ffdae02 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 8 Apr 2025 12:59:52 -0700
Subject: xdp: double protect netdev->xdp_flags with netdev->lock

Protect xdp_features with netdev->lock. This way pure readers
no longer have to take rtnl_lock to access the field.

This includes calling NETDEV_XDP_FEAT_CHANGE under the lock.
Looks like that's fine for bonding, the only "real" listener,
it's the same as ethtool feature change.

In terms of normal drivers - only GVE need special consideration
(other drivers don't use instance lock or don't support XDP).
It calls xdp_set_features_flag() helper from gve_init_priv() which
in turn is called from gve_reset_recovery() (locked), or prior
to netdev registration. So switch to _locked.

Reviewed-by: Joe Damato <jdamato@fastly.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Acked-by: Harshitha Ramamurthy <hramamurthy@google.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20250408195956.412733-6-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7242fb8a22fc..dece2ae396a1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2526,7 +2526,7 @@ struct net_device {
 	 *	@net_shaper_hierarchy, @reg_state, @threaded
 	 *
 	 * Double protects:
-	 *	@up, @moving_ns, @nd_net
+	 *	@up, @moving_ns, @nd_net, @xdp_flags
 	 *
 	 * Double ops protects:
 	 *	@real_num_rx_queues, @real_num_tx_queues
-- 
cgit v1.2.3


From 229671ac60e298b85c2644f52d7e487e9f487d06 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 8 Apr 2025 20:27:42 +0000
Subject: net: remove cpu stall in txq_trans_update()

txq_trans_update() currently uses txq->xmit_lock_owner
to conditionally update txq->trans_start.

For regular devices, txq->xmit_lock_owner is updated
from HARD_TX_LOCK() and HARD_TX_UNLOCK(), and this apparently
causes cpu stalls.

Using dev->lltx, which sits in a read-mostly cache-line,
and already used in HARD_TX_LOCK() and HARD_TX_UNLOCK()
helps cpu prediction.

On an AMD EPYC 7B12 dual socket server, tcp_rr with 128 threads
and 30,000 flows gets a 5 % increase in throughput.

As explained in commit 95ecba62e2fd ("net: fix races in
netdev_tx_sent_queue()/dev_watchdog()") I am planning
to no longer update txq->trans_start in the fast path
in a followup patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250408202742.2145516-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dece2ae396a1..a28a08046615 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4693,9 +4693,10 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
 /*
  * txq->trans_start can be read locklessly from dev_watchdog()
  */
-static inline void txq_trans_update(struct netdev_queue *txq)
+static inline void txq_trans_update(const struct net_device *dev,
+				    struct netdev_queue *txq)
 {
-	if (txq->xmit_lock_owner != -1)
+	if (!dev->lltx)
 		WRITE_ONCE(txq->trans_start, jiffies);
 }
 
@@ -5214,7 +5215,7 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi
 
 	rc = __netdev_start_xmit(ops, skb, dev, more);
 	if (rc == NETDEV_TX_OK)
-		txq_trans_update(txq);
+		txq_trans_update(dev, txq);
 
 	return rc;
 }
-- 
cgit v1.2.3


From b1e904999542ad6764eafa54545f1c55776006d1 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Tue, 8 Apr 2025 11:32:01 -0700
Subject: net: pass const to msg_data_left()

The msg_data_left() function doesn't modify the struct msghdr parameter,
so mark it as const. This allows the function to be used with const
references, improving type safety and making the API more flexible.

Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250408-tcpsendmsg-v3-1-208b87064c28@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/socket.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index c3322eb3d686..3b262487ec06 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr
 	return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
 }
 
-static inline size_t msg_data_left(struct msghdr *msg)
+static inline size_t msg_data_left(const struct msghdr *msg)
 {
 	return iov_iter_count(&msg->msg_iter);
 }
-- 
cgit v1.2.3


From 2a63dd0edf388802074f1d4d6b588a3b4c380688 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Wed, 9 Apr 2025 19:36:45 -0700
Subject: net: Retire DCCP socket.

DCCP was orphaned in 2021 by commit 054c4610bd05 ("MAINTAINERS: dccp:
move Gerrit Renker to CREDITS"), which noted that the last maintainer
had been inactive for five years.

In recent years, it has become a playground for syzbot, and most changes
to DCCP have been odd bug fixes triggered by syzbot.  Apart from that,
the only changes have been driven by treewide or networking API updates
or adjustments related to TCP.

Thus, in 2023, we announced we would remove DCCP in 2025 via commit
b144fcaf46d4 ("dccp: Print deprecation notice.").

Since then, only one individual has contacted the netdev mailing list. [0]

There is ongoing research for Multipath DCCP.  The repository is hosted
on GitHub [1], and development is not taking place through the upstream
community.  While the repository is published under the GPLv2 license,
the scheduling part remains proprietary, with a LICENSE file [2] stating:

  "This is not Open Source software."

The researcher mentioned a plan to address the licensing issue, upstream
the patches, and step up as a maintainer, but there has been no further
communication since then.

Maintaining DCCP for a decade without any real users has become a burden.

Therefore, it's time to remove it.

Removing DCCP will also provide significant benefits to TCP.  It allows
us to freely reorganize the layout of struct inet_connection_sock, which
is currently shared with DCCP, and optimize it to reduce the number of
cachelines accessed in the TCP fast path.

Note that we keep DCCP netfilter modules as requested.  [3]

Link: https://lore.kernel.org/netdev/20230710182253.81446-1-kuniyu@amazon.com/T/#u #[0]
Link: https://github.com/telekom/mp-dccp #[1]
Link: https://github.com/telekom/mp-dccp/blob/mpdccp_v03_k5.10/net/dccp/non_gpl_scheduler/LICENSE #[2]
Link: https://lore.kernel.org/netdev/Z_VQ0KlCRkqYWXa-@calendula/ #[3]
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Acked-by: Paul Moore <paul@paul-moore.com> (LSM and SELinux)
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Link: https://patch.msgid.link/20250410023921.11307-3-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dccp.h | 289 ---------------------------------------------------
 include/linux/tfrc.h |  51 ---------
 2 files changed, 340 deletions(-)
 delete mode 100644 include/linux/tfrc.h

(limited to 'include/linux')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 325af611909f..0b61b8b996d4 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -2,79 +2,8 @@
 #ifndef _LINUX_DCCP_H
 #define _LINUX_DCCP_H
 
-
-#include <linux/in.h>
-#include <linux/interrupt.h>
-#include <linux/ktime.h>
-#include <linux/list.h>
-#include <linux/uio.h>
-#include <linux/workqueue.h>
-
-#include <net/inet_connection_sock.h>
-#include <net/inet_sock.h>
-#include <net/inet_timewait_sock.h>
-#include <net/tcp_states.h>
 #include <uapi/linux/dccp.h>
 
-enum dccp_state {
-	DCCP_OPEN	     = TCP_ESTABLISHED,
-	DCCP_REQUESTING	     = TCP_SYN_SENT,
-	DCCP_LISTEN	     = TCP_LISTEN,
-	DCCP_RESPOND	     = TCP_SYN_RECV,
-	/*
-	 * States involved in closing a DCCP connection:
-	 * 1) ACTIVE_CLOSEREQ is entered by a server sending a CloseReq.
-	 *
-	 * 2) CLOSING can have three different meanings (RFC 4340, 8.3):
-	 *  a. Client has performed active-close, has sent a Close to the server
-	 *     from state OPEN or PARTOPEN, and is waiting for the final Reset
-	 *     (in this case, SOCK_DONE == 1).
-	 *  b. Client is asked to perform passive-close, by receiving a CloseReq
-	 *     in (PART)OPEN state. It sends a Close and waits for final Reset
-	 *     (in this case, SOCK_DONE == 0).
-	 *  c. Server performs an active-close as in (a), keeps TIMEWAIT state.
-	 *
-	 * 3) The following intermediate states are employed to give passively
-	 *    closing nodes a chance to process their unread data:
-	 *    - PASSIVE_CLOSE    (from OPEN => CLOSED) and
-	 *    - PASSIVE_CLOSEREQ (from (PART)OPEN to CLOSING; case (b) above).
-	 */
-	DCCP_ACTIVE_CLOSEREQ = TCP_FIN_WAIT1,
-	DCCP_PASSIVE_CLOSE   = TCP_CLOSE_WAIT,	/* any node receiving a Close */
-	DCCP_CLOSING	     = TCP_CLOSING,
-	DCCP_TIME_WAIT	     = TCP_TIME_WAIT,
-	DCCP_CLOSED	     = TCP_CLOSE,
-	DCCP_NEW_SYN_RECV    = TCP_NEW_SYN_RECV,
-	DCCP_PARTOPEN	     = TCP_MAX_STATES,
-	DCCP_PASSIVE_CLOSEREQ,			/* clients receiving CloseReq */
-	DCCP_MAX_STATES
-};
-
-enum {
-	DCCPF_OPEN	      = TCPF_ESTABLISHED,
-	DCCPF_REQUESTING      = TCPF_SYN_SENT,
-	DCCPF_LISTEN	      = TCPF_LISTEN,
-	DCCPF_RESPOND	      = TCPF_SYN_RECV,
-	DCCPF_ACTIVE_CLOSEREQ = TCPF_FIN_WAIT1,
-	DCCPF_CLOSING	      = TCPF_CLOSING,
-	DCCPF_TIME_WAIT	      = TCPF_TIME_WAIT,
-	DCCPF_CLOSED	      = TCPF_CLOSE,
-	DCCPF_NEW_SYN_RECV    = TCPF_NEW_SYN_RECV,
-	DCCPF_PARTOPEN	      = (1 << DCCP_PARTOPEN),
-};
-
-static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
-{
-	return (struct dccp_hdr *)skb_transport_header(skb);
-}
-
-static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen)
-{
-	skb_push(skb, headlen);
-	skb_reset_transport_header(skb);
-	return memset(skb_transport_header(skb), 0, headlen);
-}
-
 static inline struct dccp_hdr_ext *dccp_hdrx(const struct dccp_hdr *dh)
 {
 	return (struct dccp_hdr_ext *)((unsigned char *)dh + sizeof(*dh));
@@ -85,12 +14,6 @@ static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh)
 	return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0);
 }
 
-static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb)
-{
-	const struct dccp_hdr *dh = dccp_hdr(skb);
-	return __dccp_basic_hdr_len(dh);
-}
-
 static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh)
 {
 	__u64 seq_nr =  ntohs(dh->dccph_seq);
@@ -103,222 +26,10 @@ static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh)
 	return seq_nr;
 }
 
-static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb)
-{
-	return (struct dccp_hdr_request *)(skb_transport_header(skb) +
-					   dccp_basic_hdr_len(skb));
-}
-
-static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb)
-{
-	return (struct dccp_hdr_ack_bits *)(skb_transport_header(skb) +
-					    dccp_basic_hdr_len(skb));
-}
-
-static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
-{
-	const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb);
-	return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + ntohl(dhack->dccph_ack_nr_low);
-}
-
-static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb)
-{
-	return (struct dccp_hdr_response *)(skb_transport_header(skb) +
-					    dccp_basic_hdr_len(skb));
-}
-
-static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb)
-{
-	return (struct dccp_hdr_reset *)(skb_transport_header(skb) +
-					 dccp_basic_hdr_len(skb));
-}
-
 static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh)
 {
 	return __dccp_basic_hdr_len(dh) +
 	       dccp_packet_hdr_len(dh->dccph_type);
 }
 
-static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
-{
-	return __dccp_hdr_len(dccp_hdr(skb));
-}
-
-/**
- * struct dccp_request_sock  -  represent DCCP-specific connection request
- * @dreq_inet_rsk: structure inherited from
- * @dreq_iss: initial sequence number, sent on the first Response (RFC 4340, 7.1)
- * @dreq_gss: greatest sequence number sent (for retransmitted Responses)
- * @dreq_isr: initial sequence number received in the first Request
- * @dreq_gsr: greatest sequence number received (for retransmitted Request(s))
- * @dreq_service: service code present on the Request (there is just one)
- * @dreq_featneg: feature negotiation options for this connection
- * The following two fields are analogous to the ones in dccp_sock:
- * @dreq_timestamp_echo: last received timestamp to echo (13.1)
- * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo
- */
-struct dccp_request_sock {
-	struct inet_request_sock dreq_inet_rsk;
-	__u64			 dreq_iss;
-	__u64			 dreq_gss;
-	__u64			 dreq_isr;
-	__u64			 dreq_gsr;
-	__be32			 dreq_service;
-	spinlock_t		 dreq_lock;
-	struct list_head	 dreq_featneg;
-	__u32			 dreq_timestamp_echo;
-	__u32			 dreq_timestamp_time;
-};
-
-static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req)
-{
-	return (struct dccp_request_sock *)req;
-}
-
-extern struct inet_timewait_death_row dccp_death_row;
-
-extern int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
-			      struct sk_buff *skb);
-
-struct dccp_options_received {
-	u64	dccpor_ndp:48;
-	u32	dccpor_timestamp;
-	u32	dccpor_timestamp_echo;
-	u32	dccpor_elapsed_time;
-};
-
-struct ccid;
-
-enum dccp_role {
-	DCCP_ROLE_UNDEFINED,
-	DCCP_ROLE_LISTEN,
-	DCCP_ROLE_CLIENT,
-	DCCP_ROLE_SERVER,
-};
-
-struct dccp_service_list {
-	__u32	dccpsl_nr;
-	__be32	dccpsl_list[];
-};
-
-#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
-#define DCCP_SERVICE_CODE_IS_ABSENT		0
-
-static inline bool dccp_list_has_service(const struct dccp_service_list *sl,
-					const __be32 service)
-{
-	if (likely(sl != NULL)) {
-		u32 i = sl->dccpsl_nr;
-		while (i--)
-			if (sl->dccpsl_list[i] == service)
-				return true;
-	}
-	return false;
-}
-
-struct dccp_ackvec;
-
-/**
- * struct dccp_sock - DCCP socket state
- *
- * @dccps_swl - sequence number window low
- * @dccps_swh - sequence number window high
- * @dccps_awl - acknowledgement number window low
- * @dccps_awh - acknowledgement number window high
- * @dccps_iss - initial sequence number sent
- * @dccps_isr - initial sequence number received
- * @dccps_osr - first OPEN sequence number received
- * @dccps_gss - greatest sequence number sent
- * @dccps_gsr - greatest valid sequence number received
- * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss
- * @dccps_service - first (passive sock) or unique (active sock) service code
- * @dccps_service_list - second .. last service code on passive socket
- * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option
- * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo
- * @dccps_l_ack_ratio - feature-local Ack Ratio
- * @dccps_r_ack_ratio - feature-remote Ack Ratio
- * @dccps_l_seq_win - local Sequence Window (influences ack number validity)
- * @dccps_r_seq_win - remote Sequence Window (influences seq number validity)
- * @dccps_pcslen - sender   partial checksum coverage (via sockopt)
- * @dccps_pcrlen - receiver partial checksum coverage (via sockopt)
- * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2)
- * @dccps_ndp_count - number of Non Data Packets since last data packet
- * @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
- * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4)
- * @dccps_featneg - tracks feature-negotiation state (mostly during handshake)
- * @dccps_hc_rx_ackvec - rx half connection ack vector
- * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
- * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
- * @dccps_options_received - parsed set of retrieved options
- * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy
- * @dccps_tx_qlen - maximum length of the TX queue
- * @dccps_role - role of this sock, one of %dccp_role
- * @dccps_hc_rx_insert_options - receiver wants to add options when acking
- * @dccps_hc_tx_insert_options - sender wants to add options when sending
- * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
- * @dccps_sync_scheduled - flag which signals "send out-of-band message soon"
- * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets
- * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing)
- * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
- */
-struct dccp_sock {
-	/* inet_connection_sock has to be the first member of dccp_sock */
-	struct inet_connection_sock	dccps_inet_connection;
-#define dccps_syn_rtt			dccps_inet_connection.icsk_ack.lrcvtime
-	__u64				dccps_swl;
-	__u64				dccps_swh;
-	__u64				dccps_awl;
-	__u64				dccps_awh;
-	__u64				dccps_iss;
-	__u64				dccps_isr;
-	__u64				dccps_osr;
-	__u64				dccps_gss;
-	__u64				dccps_gsr;
-	__u64				dccps_gar;
-	__be32				dccps_service;
-	__u32				dccps_mss_cache;
-	struct dccp_service_list	*dccps_service_list;
-	__u32				dccps_timestamp_echo;
-	__u32				dccps_timestamp_time;
-	__u16				dccps_l_ack_ratio;
-	__u16				dccps_r_ack_ratio;
-	__u64				dccps_l_seq_win:48;
-	__u64				dccps_r_seq_win:48;
-	__u8				dccps_pcslen:4;
-	__u8				dccps_pcrlen:4;
-	__u8				dccps_send_ndp_count:1;
-	__u64				dccps_ndp_count:48;
-	unsigned long			dccps_rate_last;
-	struct list_head		dccps_featneg;
-	struct dccp_ackvec		*dccps_hc_rx_ackvec;
-	struct ccid			*dccps_hc_rx_ccid;
-	struct ccid			*dccps_hc_tx_ccid;
-	struct dccp_options_received	dccps_options_received;
-	__u8				dccps_qpolicy;
-	__u32				dccps_tx_qlen;
-	enum dccp_role			dccps_role:2;
-	__u8				dccps_hc_rx_insert_options:1;
-	__u8				dccps_hc_tx_insert_options:1;
-	__u8				dccps_server_timewait:1;
-	__u8				dccps_sync_scheduled:1;
-	struct tasklet_struct		dccps_xmitlet;
-	struct timer_list		dccps_xmit_timer;
-};
-
-#define dccp_sk(ptr)	container_of_const(ptr, struct dccp_sock, \
-					   dccps_inet_connection.icsk_inet.sk)
-
-static inline const char *dccp_role(const struct sock *sk)
-{
-	switch (dccp_sk(sk)->dccps_role) {
-	case DCCP_ROLE_UNDEFINED: return "undefined";
-	case DCCP_ROLE_LISTEN:	  return "listen";
-	case DCCP_ROLE_SERVER:	  return "server";
-	case DCCP_ROLE_CLIENT:	  return "client";
-	}
-	return NULL;
-}
-
-extern void dccp_syn_ack_timeout(const struct request_sock *req);
-
 #endif /* _LINUX_DCCP_H */
diff --git a/include/linux/tfrc.h b/include/linux/tfrc.h
deleted file mode 100644
index a5acc768085d..000000000000
--- a/include/linux/tfrc.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _LINUX_TFRC_H_
-#define _LINUX_TFRC_H_
-/*
- *  TFRC - Data Structures for the TCP-Friendly Rate Control congestion
- *         control mechanism as specified in RFC 3448.
- *
- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
- *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
- */
-#include <linux/types.h>
-
-/** 	tfrc_rx_info    -    TFRC Receiver Data Structure
- *
- * 	@tfrcrx_x_recv:	receiver estimate of sending rate (3.2.2)
- * 	@tfrcrx_rtt:	round-trip-time (communicated by sender)
- * 	@tfrcrx_p:	current estimate of loss event rate (3.2.2)
- */
-struct tfrc_rx_info {
-  	__u32 tfrcrx_x_recv;
-	__u32 tfrcrx_rtt;
-  	__u32 tfrcrx_p;
-};
-
-/** 	tfrc_tx_info    -    TFRC Sender Data Structure
- *
- * 	@tfrctx_x:	computed transmit rate (4.3 (4))
- * 	@tfrctx_x_recv: receiver estimate of send rate (4.3)
- * 	@tfrctx_x_calc:	return value of throughput equation (3.1)
- * 	@tfrctx_rtt:	(moving average) estimate of RTT (4.3)
- * 	@tfrctx_p:	current loss event rate (5.4)
- * 	@tfrctx_rto:	estimate of RTO, equals 4*RTT (4.3)
- * 	@tfrctx_ipi:	inter-packet interval (4.6)
- *
- *  Note: X and X_recv are both maintained in units of 64 * bytes/second. This
- *        enables a finer resolution of sending rates and avoids problems with
- *        integer arithmetic; u32 is not sufficient as scaling consumes 6 bits.
- */
-struct tfrc_tx_info {
-	__u64 tfrctx_x;
-	__u64 tfrctx_x_recv;
-	__u32 tfrctx_x_calc;
-	__u32 tfrctx_rtt;
-	__u32 tfrctx_p;
-	__u32 tfrctx_rto;
-	__u32 tfrctx_ipi;
-};
-
-#endif /* _LINUX_TFRC_H_ */
-- 
cgit v1.2.3


From 097f171f98289cf737437599c40b0d1e81266e9e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 9 Apr 2025 18:42:46 -0700
Subject: net: convert dev->rtnl_link_state to a bool

netdevice reg_state was split into two 16 bit enums back in 2010
in commit a2835763e130 ("rtnetlink: handle rtnl_link netlink
notifications manually"). Since the split the fields have been
moved apart, and last year we converted reg_state to a normal
u8 in commit 4d42b37def70 ("net: convert dev->reg_state to u8").

rtnl_link_state being a 16 bitfield makes no sense. Convert it
to a single bool, it seems very unlikely after 15 years that
we'll need more values in it.

We could drop dev->rtnl_link_ops from the conditions but feels
like having it there more clearly points at the reason for this
hack.

Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250410014246.780885-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d8544f6a680c..e6036b82ef4c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1946,9 +1946,6 @@ enum netdev_reg_state {
  *
  *	@reg_state:		Register/unregister state machine
  *	@dismantle:		Device is going to be freed
- *	@rtnl_link_state:	This enum represents the phases of creating
- *				a new link
- *
  *	@needs_free_netdev:	Should unregister perform free_netdev?
  *	@priv_destructor:	Called from unregister
  *	@npinfo:		XXX: need comments on this one
@@ -2363,11 +2360,8 @@ struct net_device {
 
 	/** @moving_ns: device is changing netns, protected by @lock */
 	bool moving_ns;
-
-	enum {
-		RTNL_LINK_INITIALIZED,
-		RTNL_LINK_INITIALIZING,
-	} rtnl_link_state:16;
+	/** @rtnl_link_initializing: Device being created, suppress events */
+	bool rtnl_link_initializing;
 
 	bool needs_free_netdev;
 	void (*priv_destructor)(struct net_device *dev);
-- 
cgit v1.2.3


From cd3c93167da0e760b5819246eae7a4ea30fd014b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 9 Apr 2025 12:41:36 +0200
Subject: page_pool: Move pp_magic check into helper functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we are about to stash some more information into the pp_magic
field, let's move the magic signature checks into a pair of helper
functions so it can be changed in one place.

Reviewed-by: Mina Almasry <almasrymina@google.com>
Tested-by: Yonglong Liu <liuyonglong@huawei.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Reviewed-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://patch.msgid.link/20250409-page-pool-track-dma-v9-1-6a9ef2e0cba8@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mm.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b7f13f087954..56c47f4a38ca 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4248,4 +4248,24 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
 #define VM_SEALED_SYSMAP	VM_NONE
 #endif
 
+/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is
+ * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for
+ * the head page of compound page and bit 1 for pfmemalloc page.
+ * page_is_pfmemalloc() is checked in __page_pool_put_page() to avoid recycling
+ * the pfmemalloc page.
+ */
+#define PP_MAGIC_MASK ~0x3UL
+
+#ifdef CONFIG_PAGE_POOL
+static inline bool page_pool_page_is_pp(struct page *page)
+{
+	return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE;
+}
+#else
+static inline bool page_pool_page_is_pp(struct page *page)
+{
+	return false;
+}
+#endif
+
 #endif /* _LINUX_MM_H */
-- 
cgit v1.2.3


From ee62ce7a1d909ccba0399680a03c2dee83bcae95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 9 Apr 2025 12:41:37 +0200
Subject: page_pool: Track DMA-mapped pages and unmap them when destroying the
 pool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When enabling DMA mapping in page_pool, pages are kept DMA mapped until
they are released from the pool, to avoid the overhead of re-mapping the
pages every time they are used. This causes resource leaks and/or
crashes when there are pages still outstanding while the device is torn
down, because page_pool will attempt an unmap through a non-existent DMA
device on the subsequent page return.

To fix this, implement a simple tracking of outstanding DMA-mapped pages
in page pool using an xarray. This was first suggested by Mina[0], and
turns out to be fairly straight forward: We simply store pointers to
pages directly in the xarray with xa_alloc() when they are first DMA
mapped, and remove them from the array on unmap. Then, when a page pool
is torn down, it can simply walk the xarray and unmap all pages still
present there before returning, which also allows us to get rid of the
get/put_device() calls in page_pool. Using xa_cmpxchg(), no additional
synchronisation is needed, as a page will only ever be unmapped once.

To avoid having to walk the entire xarray on unmap to find the page
reference, we stash the ID assigned by xa_alloc() into the page
structure itself, using the upper bits of the pp_magic field. This
requires a couple of defines to avoid conflicting with the
POINTER_POISON_DELTA define, but this is all evaluated at compile-time,
so does not affect run-time performance. The bitmap calculations in this
patch gives the following number of bits for different architectures:

- 23 bits on 32-bit architectures
- 21 bits on PPC64 (because of the definition of ILLEGAL_POINTER_VALUE)
- 32 bits on other 64-bit architectures

Stashing a value into the unused bits of pp_magic does have the effect
that it can make the value stored there lie outside the unmappable
range (as governed by the mmap_min_addr sysctl), for architectures that
don't define ILLEGAL_POINTER_VALUE. This means that if one of the
pointers that is aliased to the pp_magic field (such as page->lru.next)
is dereferenced while the page is owned by page_pool, that could lead to
a dereference into userspace, which is a security concern. The risk of
this is mitigated by the fact that (a) we always clear pp_magic before
releasing a page from page_pool, and (b) this would need a
use-after-free bug for struct page, which can have many other risks
since page->lru.next is used as a generic list pointer in multiple
places in the kernel. As such, with this patch we take the position that
this risk is negligible in practice. For more discussion, see[1].

Since all the tracking added in this patch is performed on DMA
map/unmap, no additional code is needed in the fast path, meaning the
performance overhead of this tracking is negligible there. A
micro-benchmark shows that the total overhead of the tracking itself is
about 400 ns (39 cycles(tsc) 395.218 ns; sum for both map and unmap[2]).
Since this cost is only paid on DMA map and unmap, it seems like an
acceptable cost to fix the late unmap issue. Further optimisation can
narrow the cases where this cost is paid (for instance by eliding the
tracking when DMA map/unmap is a no-op).

The extra memory needed to track the pages is neatly encapsulated inside
xarray, which uses the 'struct xa_node' structure to track items. This
structure is 576 bytes long, with slots for 64 items, meaning that a
full node occurs only 9 bytes of overhead per slot it tracks (in
practice, it probably won't be this efficient, but in any case it should
be an acceptable overhead).

[0] https://lore.kernel.org/all/CAHS8izPg7B5DwKfSuzz-iOop_YRbk3Sd6Y4rX7KBG9DcVJcyWg@mail.gmail.com/
[1] https://lore.kernel.org/r/20250320023202.GA25514@openwall.com
[2] https://lore.kernel.org/r/ae07144c-9295-4c9d-a400-153bb689fe9e@huawei.com

Reported-by: Yonglong Liu <liuyonglong@huawei.com>
Closes: https://lore.kernel.org/r/8743264a-9700-4227-a556-5f931c720211@huawei.com
Fixes: ff7d6b27f894 ("page_pool: refurbish version of page_pool code")
Suggested-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Jesper Dangaard Brouer <hawk@kernel.org>
Tested-by: Jesper Dangaard Brouer <hawk@kernel.org>
Tested-by: Qiuling Ren <qren@redhat.com>
Tested-by: Yuying Ma <yuma@redhat.com>
Tested-by: Yonglong Liu <liuyonglong@huawei.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://patch.msgid.link/20250409-page-pool-track-dma-v9-2-6a9ef2e0cba8@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mm.h     | 46 ++++++++++++++++++++++++++++++++++++++++++----
 include/linux/poison.h |  4 ++++
 2 files changed, 46 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 56c47f4a38ca..130d3c9d2ee4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4248,13 +4248,51 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
 #define VM_SEALED_SYSMAP	VM_NONE
 #endif
 
+/*
+ * DMA mapping IDs for page_pool
+ *
+ * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and
+ * stashes it in the upper bits of page->pp_magic. We always want to be able to
+ * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP
+ * pages can have arbitrary kernel pointers stored in the same field as pp_magic
+ * (since it overlaps with page->lru.next), so we must ensure that we cannot
+ * mistake a valid kernel pointer with any of the values we write into this
+ * field.
+ *
+ * On architectures that set POISON_POINTER_DELTA, this is already ensured,
+ * since this value becomes part of PP_SIGNATURE; meaning we can just use the
+ * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the
+ * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is
+ * 0, we make sure that we leave the two topmost bits empty, as that guarantees
+ * we won't mistake a valid kernel pointer for a value we set, regardless of the
+ * VMSPLIT setting.
+ *
+ * Altogether, this means that the number of bits available is constrained by
+ * the size of an unsigned long (at the upper end, subtracting two bits per the
+ * above), and the definition of PP_SIGNATURE (with or without
+ * POISON_POINTER_DELTA).
+ */
+#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA))
+#if POISON_POINTER_DELTA > 0
+/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA
+ * index to not overlap with that if set
+ */
+#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT)
+#else
+/* Always leave out the topmost two; see above. */
+#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2)
+#endif
+
+#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \
+				  PP_DMA_INDEX_SHIFT)
+
 /* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is
  * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for
- * the head page of compound page and bit 1 for pfmemalloc page.
- * page_is_pfmemalloc() is checked in __page_pool_put_page() to avoid recycling
- * the pfmemalloc page.
+ * the head page of compound page and bit 1 for pfmemalloc page, as well as the
+ * bits used for the DMA index. page_is_pfmemalloc() is checked in
+ * __page_pool_put_page() to avoid recycling the pfmemalloc page.
  */
-#define PP_MAGIC_MASK ~0x3UL
+#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL)
 
 #ifdef CONFIG_PAGE_POOL
 static inline bool page_pool_page_is_pp(struct page *page)
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 331a9a996fa8..8ca2235f78d5 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -70,6 +70,10 @@
 #define KEY_DESTROY		0xbd
 
 /********** net/core/page_pool.c **********/
+/*
+ * page_pool uses additional free bits within this value to store data, see the
+ * definition of PP_DMA_INDEX_MASK in mm.h
+ */
 #define PP_SIGNATURE		(0x40 + POISON_POINTER_DELTA)
 
 /********** net/core/skbuff.c **********/
-- 
cgit v1.2.3


From ceaceaf79ea0fe337344fc5c1fb10a421a362410 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 11 Apr 2025 23:04:19 +0100
Subject: net: ethtool: fix get_ts_stats() documentation

Commit 0e9c127729be ("ethtool: add interface to read Tx hardware
timestamping statistics") added documentation for timestamping
statistics, but added the detailed explanation for this method to
the get_ts_info() rather than get_ts_stats(). Move it to the correct
entry.

Cc: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1u3MTz-000Crx-IW@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 8210ece94fa6..013d25858642 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -926,10 +926,11 @@ struct kernel_ethtool_ts_info {
  * @get_ts_info: Get the time stamping and PTP hardware clock capabilities.
  *	It may be called with RCU, or rtnl or reference on the device.
  *	Drivers supporting transmit time stamps in software should set this to
- *	ethtool_op_get_ts_info(). Drivers must not zero statistics which they
- *	don't report. The stats	structure is initialized to ETHTOOL_STAT_NOT_SET
- *	indicating driver does not report statistics.
- * @get_ts_stats: Query the device hardware timestamping statistics.
+ *	ethtool_op_get_ts_info().
+ * @get_ts_stats: Query the device hardware timestamping statistics. Drivers
+ *	must not zero statistics which they don't report. The stats structure
+ *	is initialized to ETHTOOL_STAT_NOT_SET indicating driver does not
+ *	report statistics.
  * @get_module_info: Get the size and type of the eeprom contained within
  *	a plug-in module.
  * @get_module_eeprom: Get the eeprom information from the plug-in module
-- 
cgit v1.2.3


From 651f88cb046c5e002f7c11de2cebf207787d2346 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Sat, 12 Apr 2025 09:08:45 +0100
Subject: net: stmmac: remove eee_usecs_rate

plat_dat->eee_users_rate is now unused, so remove this member.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1u3Vuv-000E7y-9k@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index c4ec8bb8144e..8aed09d65b4a 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -276,7 +276,6 @@ struct plat_stmmacenet_data {
 	int mac_port_sel_speed;
 	int has_xgmac;
 	u8 vlan_fail_q;
-	unsigned long eee_usecs_rate;
 	struct pci_dev *pdev;
 	int int_snapshot_num;
 	int msi_mac_vec;
-- 
cgit v1.2.3


From f99564688f38458d86b64f099ebf03f19517cf77 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 13 Apr 2025 16:09:40 +0200
Subject: net: phy: remove device_phy_find_device

AFAICS this function has never had a user.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/ab7b8094-2eea-4e82-a047-fd60117f220b@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index a2bfae80c449..fb755358d965 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1757,7 +1757,6 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id,
 int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id);
 struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode);
 struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode);
-struct phy_device *device_phy_find_device(struct device *dev);
 struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode);
 struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45);
 int phy_device_register(struct phy_device *phy);
@@ -1779,11 +1778,6 @@ struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode)
 	return NULL;
 }
 
-static inline struct phy_device *device_phy_find_device(struct device *dev)
-{
-	return NULL;
-}
-
 static inline
 struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode)
 {
-- 
cgit v1.2.3


From 7c571ac57d9d97190dcba18212fabf99888b0c48 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 14 Apr 2025 14:26:30 -0700
Subject: net: ptp: introduce .supported_extts_flags to ptp_clock_info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PTP_EXTTS_REQUEST(2) ioctl has a flags field which specifies how the
external timestamp request should behave. This includes which edge of the
signal to timestamp, as well as a specialized "offset" mode. It is expected
that more flags will be added in the future.

Driver authors routinely do not check the flags, often accepting requests
with flags which they do not support. Even drivers which do check flags may
not be future-proofed to reject flags not yet defined. Thus, any future
flag additions often require manually updating drivers to reject these
flags.

This approach of hoping we catch flag checks during review, or playing
whack-a-mole after the fact is the wrong approach.

Introduce the "supported_extts_flags" field to the ptp_clock_info
structure. This field defines the set of flags the device actually
supports.

Update the core character device logic to check this field and reject
unsupported requests. Getting this right is somewhat tricky. First, to
avoid unnecessary repetition and make basic functionality work when
.supported_extts_flags is 0, the core always accepts the PTP_ENABLE_FEATURE
flag. This flag is used to set the 'on' parameter to the .enable function
and is thus always 'supported' by all drivers.

For backwards compatibility, the PTP_RISING_EDGE and PTP_FALLING_EDGE flags
are merely "hints" when using the old PTP_EXTTS_REQUEST ioctl, and are not
expected to be enforced. If the user issues PTP_EXTTS_REQUEST2, the
PTP_STRICT_FLAGS flag is added which is supposed to inform the driver to
strictly validate the flags and reject unsupported requests. To handle
this, first check if the driver reports PTP_STRICT_FLAGS support. If it
does not, then always allow the PTP_RISING_EDGE and PTP_FALLING_EDGE flags.
This keeps backwards compatibility with the original PTP_EXTTS_REQUEST
ioctl where these flags are not guaranteed to be honored.

This way, drivers which do not set the supported_extts_flags will continue
to accept requests for the original PTP_EXTTS_REQUEST ioctl. The core will
automatically reject requests with new flags, and correctly reject requests
with PTP_STRICT_FLAGS, where the driver is supposed to strictly validate
the flags.

Update the various drivers, refactoring their validation logic into the
.supported_extts_flags field. For consistency and readability,
PTP_ENABLE_FEATURE is not set in the supported flags list, and
PTP_EXTTS_EDGES is expanded to PTP_RISING_EDGE | PTP_FALLING_EDGE in all
cases.

Note the following driver files set n_ext_ts to a non-zero value but did
not check flags at all:

 • drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
 • drivers/net/ethernet/freescale/enetc/enetc_ptp.c
 • drivers/net/ethernet/intel/i40e/i40e_ptp.c
 • drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
 • drivers/net/ethernet/renesas/ravb_ptp.c
 • drivers/net/ethernet/renesas/rtsn.c
 • drivers/net/ethernet/renesas/rtsn.h
 • drivers/net/ethernet/ti/am65-cpts.c
 • drivers/net/ethernet/ti/cpts.h
 • drivers/net/ethernet/ti/icssg/icss_iep.c
 • drivers/net/ethernet/xscale/ptp_ixp46x.c
 • drivers/net/phy/bcm-phy-ptp.c
 • drivers/ptp/ptp_ocp.c
 • drivers/ptp/ptp_pch.c
 • drivers/ptp/ptp_qoriq.c

These drivers behavior does change slightly: they will now reject the
PTP_EXTTS_REQUEST2 ioctl, because they do not strictly validate their
flags. This also makes them no longer incorrectly accept PTP_EXT_OFFSET.

Also note that the renesas ravb driver does not support PTP_STRICT_FLAGS.
We could leave the .supported_extts_flags as 0, but I added the
PTP_RISING_EDGE | PTP_FALLING_EDGE since the driver previously manually
validated these flags. This is equivalent to 0 because the core will allow
these flags regardless unless PTP_STRICT_FLAGS is also set.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20250414-jk-supported-perout-flags-v2-1-f6b17d15475c@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ptp_clock_kernel.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 0d68d09bedd1..25cba2e5ee69 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -68,6 +68,17 @@ struct ptp_system_timestamp {
  * @n_per_out: The number of programmable periodic signals.
  * @n_pins:    The number of programmable pins.
  * @pps:       Indicates whether the clock supports a PPS callback.
+ *
+ * @supported_extts_flags:  The set of flags the driver supports for the
+ *                          PTP_EXTTS_REQUEST ioctl. The PTP core will use
+ *                          this list to reject unsupported requests.
+ *                          PTP_ENABLE_FEATURE is assumed and does not need to
+ *                          be included. If PTP_STRICT_FLAGS is *not* set,
+ *                          then both PTP_RISING_EDGE and PTP_FALLING_EDGE
+ *                          will be assumed. Note that PTP_STRICT_FLAGS must
+ *                          be set if the drivers wants to honor
+ *                          PTP_EXTTS_REQUEST2 and any future flags.
+ *
  * @pin_config: Array of length 'n_pins'. If the number of
  *              programmable pins is nonzero, then drivers must
  *              allocate and initialize this array.
@@ -174,6 +185,7 @@ struct ptp_clock_info {
 	int n_per_out;
 	int n_pins;
 	int pps;
+	unsigned int supported_extts_flags;
 	struct ptp_pin_desc *pin_config;
 	int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm);
 	int (*adjphase)(struct ptp_clock_info *ptp, s32 phase);
-- 
cgit v1.2.3


From d9f3e9ecc4562ae07aaf614cf0a6690ef7ca0e10 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 14 Apr 2025 14:26:31 -0700
Subject: net: ptp: introduce .supported_perout_flags to ptp_clock_info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PTP_PEROUT_REQUEST2 ioctl has gained support for flags specifying
specific output behavior including PTP_PEROUT_ONE_SHOT,
PTP_PEROUT_DUTY_CYCLE, PTP_PEROUT_PHASE.

Driver authors are notorious for not checking the flags of the request.
This results in misinterpreting the request, generating an output signal
that does not match the requested value. It is anticipated that even more
flags will be added in the future, resulting in even more broken requests.

Expecting these issues to be caught during review or playing whack-a-mole
after the fact is not a great solution.

Instead, introduce the supported_perout_flags field in the ptp_clock_info
structure. Update the core character device logic to explicitly reject any
request which has a flag not on this list.

This ensures that drivers must 'opt in' to the flags they support. Drivers
which don't set the .supported_perout_flags field will not need to check
that unsupported flags aren't passed, as the core takes care of this.

Update the drivers which do support flags to set this new field.

Note the following driver files set n_per_out to a non-zero value but did
not check the flags at all:

 • drivers/ptp/ptp_clockmatrix.c
 • drivers/ptp/ptp_idt82p33.c
 • drivers/ptp/ptp_fc3.c
 • drivers/net/ethernet/ti/am65-cpts.c
 • drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
 • drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
 • drivers/net/dsa/sja1105/sja1105_ptp.c
 • drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
 • drivers/net/ethernet/mscc/ocelot_vsc7514.c
 • drivers/net/ethernet/intel/i40e/i40e_ptp.c

Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20250414-jk-supported-perout-flags-v2-2-f6b17d15475c@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ptp_clock_kernel.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 25cba2e5ee69..eced7e9bf69a 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -69,6 +69,11 @@ struct ptp_system_timestamp {
  * @n_pins:    The number of programmable pins.
  * @pps:       Indicates whether the clock supports a PPS callback.
  *
+ * @supported_perout_flags:  The set of flags the driver supports for the
+ *                           PTP_PEROUT_REQUEST ioctl. The PTP core will
+ *                           reject a request with any flag not specified
+ *                           here.
+ *
  * @supported_extts_flags:  The set of flags the driver supports for the
  *                          PTP_EXTTS_REQUEST ioctl. The PTP core will use
  *                          this list to reject unsupported requests.
@@ -185,6 +190,7 @@ struct ptp_clock_info {
 	int n_per_out;
 	int n_pins;
 	int pps;
+	unsigned int supported_perout_flags;
 	unsigned int supported_extts_flags;
 	struct ptp_pin_desc *pin_config;
 	int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm);
-- 
cgit v1.2.3


From 43eca05b6a3b917c600e10cc6b06bfa57fa57401 Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Fri, 11 Apr 2025 10:49:56 +0300
Subject: xfrm: Add explicit dev to .xdo_dev_state_{add,delete,free}

Previously, device driver IPSec offload implementations would fall into
two categories:
1. Those that used xso.dev to determine the offload device.
2. Those that used xso.real_dev to determine the offload device.

The first category didn't work with bonding while the second did.
In a non-bonding setup the two pointers are the same.

This commit adds explicit pointers for the offload netdevice to
.xdo_dev_state_add() / .xdo_dev_state_delete() / .xdo_dev_state_free()
which eliminates the confusion and allows drivers from the first
category to work with bonding.

xso.real_dev now becomes a private pointer managed by the bonding
driver.

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/linux/netdevice.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d8544f6a680c..88dfb8aeed3c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1013,9 +1013,13 @@ struct netdev_bpf {
 
 #ifdef CONFIG_XFRM_OFFLOAD
 struct xfrmdev_ops {
-	int	(*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack);
-	void	(*xdo_dev_state_delete) (struct xfrm_state *x);
-	void	(*xdo_dev_state_free) (struct xfrm_state *x);
+	int	(*xdo_dev_state_add)(struct net_device *dev,
+				     struct xfrm_state *x,
+				     struct netlink_ext_ack *extack);
+	void	(*xdo_dev_state_delete)(struct net_device *dev,
+					struct xfrm_state *x);
+	void	(*xdo_dev_state_free)(struct net_device *dev,
+				      struct xfrm_state *x);
 	bool	(*xdo_dev_offload_ok) (struct sk_buff *skb,
 				       struct xfrm_state *x);
 	void	(*xdo_dev_state_advance_esn) (struct xfrm_state *x);
-- 
cgit v1.2.3


From 17240749f26e07cafa676688d8a3326086498447 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@openvpn.net>
Date: Tue, 15 Apr 2025 13:17:29 +0200
Subject: skb: implement skb_send_sock_locked_with_flags()

When sending an skb over a socket using skb_send_sock_locked(),
it is currently not possible to specify any flag to be set in
msghdr->msg_flags.

However, we may want to pass flags the user may have specified,
like MSG_NOSIGNAL.

Extend __skb_send_sock() with a new argument 'flags' and add a
new interface named skb_send_sock_locked_with_flags().

Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Signed-off-by: Antonio Quartulli <antonio@openvpn.net>
Link: https://patch.msgid.link/20250415-b4-ovpn-v26-12-577f6097b964@openvpn.net
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f1381aff0f89..beb084ee4f4d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4145,6 +4145,8 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
 		    unsigned int flags);
 int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
 			 int len);
+int skb_send_sock_locked_with_flags(struct sock *sk, struct sk_buff *skb,
+				    int offset, int len, int flags);
 int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
-- 
cgit v1.2.3


From a1b669ea16c4d7c1a1a8fc7e25aaf651ea0078c3 Mon Sep 17 00:00:00 2001
From: Amery Hung <ameryhung@gmail.com>
Date: Wed, 9 Apr 2025 14:45:57 -0700
Subject: bpf: Prepare to reuse get_ctx_arg_idx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename get_ctx_arg_idx to bpf_ctx_arg_idx, and allow others to call it.
No functional change.

Signed-off-by: Amery Hung <ameryhung@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://patch.msgid.link/20250409214606.2000194-2-ameryhung@gmail.com
---
 include/linux/btf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index ebc0c0c9b944..b2983706292f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -522,6 +522,7 @@ bool btf_param_match_suffix(const struct btf *btf,
 			    const char *suffix);
 int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto,
 		       u32 arg_no);
+u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, int off);
 
 struct bpf_verifier_log;
 
-- 
cgit v1.2.3


From 151e13ece86d234213b7f224f0e26a957c0eeb3e Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Tue, 15 Apr 2025 18:02:15 -0700
Subject: net: ethtool: Adjust exactly ETH_GSTRING_LEN-long stats to use memcpy

Many drivers populate the stats buffer using C-String based APIs (e.g.
ethtool_sprintf() and ethtool_puts()), usually when building up the
list of stats individually (i.e. with a for() loop). This, however,
requires that the source strings be populated in such a way as to have
a terminating NUL byte in the source.

Other drivers populate the stats buffer directly using one big memcpy()
of an entire array of strings. No NUL termination is needed here, as the
bytes are being directly passed through. Yet others will build up the
stats buffer individually, but also use memcpy(). This, too, does not
need NUL termination of the source strings.

However, there are cases where the strings that populate the
source stats strings are exactly ETH_GSTRING_LEN long, and GCC
15's -Wunterminated-string-initialization option complains that the
trailing NUL byte has been truncated. This situation is fine only if the
driver is using the memcpy() approach. If the C-String APIs are used,
the destination string name will have its final byte truncated by the
required trailing NUL byte applied by the C-string API.

For drivers that are already using memcpy() but have initializers that
truncate the NUL terminator, mark their source strings as __nonstring to
silence the GCC warnings.

For drivers that have initializers that truncate the NUL terminator and
are using the C-String APIs, switch to memcpy() to avoid destination
string truncation and mark their source strings as __nonstring to silence
the GCC warnings. (Also introduce ethtool_cpy() as a helper to make this
an easy replacement).

Specifically the following warnings were investigated and addressed:

../drivers/net/ethernet/chelsio/cxgb/cxgb2.c:364:9: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
  364 |         "TxFramesAbortedDueToXSCollisions",
      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/net/ethernet/freescale/enetc/enetc_ethtool.c:165:33: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
  165 |         { ENETC_PM_R1523X(0),   "MAC rx 1523 to max-octet packets" },
      |                                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/net/ethernet/freescale/enetc/enetc_ethtool.c:190:33: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
  190 |         { ENETC_PM_T1523X(0),   "MAC tx 1523 to max-octet packets" },
      |                                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/net/ethernet/google/gve/gve_ethtool.c:76:9: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
   76 |         "adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt",
      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c:117:53: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
  117 |         STMMAC_STAT(ptp_rx_msg_type_pdelay_follow_up),
      |                                                     ^
../drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c:46:12: note: in definition of macro 'STMMAC_STAT'
   46 |         { #m, sizeof_field(struct stmmac_extra_stats, m),       \
      |            ^
../drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c:328:24: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
  328 |                 .str = "a_mac_control_frames_transmitted",
      |                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c:340:24: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
  340 |                 .str = "a_pause_mac_ctrl_frames_received",
      |                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Kees Cook <kees@kernel.org>
Reviewed-by: Petr Machata <petrm@nvidia.com> # for mlxsw
Reviewed-by: Harshitha Ramamurthy <hramamurthy@google.com>
Link: https://patch.msgid.link/20250416010210.work.904-kees@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 013d25858642..7edb5f5e7134 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1330,6 +1330,17 @@ extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...);
  */
 extern void ethtool_puts(u8 **data, const char *str);
 
+/**
+ * ethtool_cpy - Write possibly-not-NUL-terminated string to ethtool string data
+ * @data: Pointer to a pointer to the start of string to write into
+ * @str: NUL-byte padded char array of size ETH_GSTRING_LEN to copy from
+ */
+#define ethtool_cpy(data, str)	do {				\
+	BUILD_BUG_ON(sizeof(str) != ETH_GSTRING_LEN);		\
+	memcpy(*(data), str, ETH_GSTRING_LEN);			\
+	*(data) += ETH_GSTRING_LEN;				\
+} while (0)
+
 /* Link mode to forced speed capabilities maps */
 struct ethtool_forced_speed_map {
 	u32		speed;
-- 
cgit v1.2.3


From 22cbc1ee268b7ec0000848708944daa61c6e4909 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 15 Apr 2025 20:04:47 -0700
Subject: netdev: fix the locking for netdev notifications

Kuniyuki reports that the assert for netdev lock fires when
there are netdev event listeners (otherwise we skip the netlink
event generation).

Correct the locking when coming from the notifier.

The NETDEV_XDP_FEAT_CHANGE notifier is already fully locked,
it's the documentation that's incorrect.

Fixes: 99e44f39a8f7 ("netdev: depend on netdev->lock for xdp features")
Reported-by: syzkaller <syzkaller@googlegroups.com>
Reported-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/20250410171019.62128-1-kuniyu@amazon.com
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250416030447.1077551-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e6036b82ef4c..0321fd952f70 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2520,7 +2520,7 @@ struct net_device {
 	 *	@net_shaper_hierarchy, @reg_state, @threaded
 	 *
 	 * Double protects:
-	 *	@up, @moving_ns, @nd_net, @xdp_flags
+	 *	@up, @moving_ns, @nd_net, @xdp_features
 	 *
 	 * Double ops protects:
 	 *	@real_num_rx_queues, @real_num_tx_queues
-- 
cgit v1.2.3


From 9ff2aa4206eff40a202e425f232036bc84ad4c0e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 17 Mar 2025 23:07:30 -0400
Subject: net: ethtool: mm: extract stmmac verification logic into common
 library

It appears that stmmac is not the only hardware which requires a
software-driven verification state machine for the MAC Merge layer.

While on the one hand it's good to encourage hardware implementations,
on the other hand it's quite difficult to tolerate multiple drivers
implementing independently fairly non-trivial logic.

Extract the hardware-independent logic from stmmac into library code and
put it in ethtool. Name the state structure "mmsv" for MAC Merge
Software Verification. Let this expose an operations structure for
executing the hardware stuff: sync hardware with the tx_active boolean
(result of verification process), enable/disable the pMAC, send mPackets,
notify library of external events (reception of mPackets), as well as
link state changes.

Note that it is assumed that the external events are received in hardirq
context. If they are not, it is probably a good idea to disable hardirqs
when calling ethtool_mmsv_event_handle(), because the library does not
do so.

Also, the MM software verification process has no business with the
tx_min_frag_size, that is all the driver's to handle.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Co-developed-by: Choong Yong Liang <yong.liang.choong@linux.intel.com>
Signed-off-by: Choong Yong Liang <yong.liang.choong@linux.intel.com>
Tested-by: Choong Yong Liang <yong.liang.choong@linux.intel.com>
Tested-by: Furong Xu <0x1207@gmail.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/ethtool.h | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 7edb5f5e7134..117718c24814 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -17,9 +17,13 @@
 #include <linux/compat.h>
 #include <linux/if_ether.h>
 #include <linux/netlink.h>
+#include <linux/timer_types.h>
 #include <uapi/linux/ethtool.h>
 #include <uapi/linux/net_tstamp.h>
 
+#define ETHTOOL_MM_MAX_VERIFY_TIME_MS		128
+#define ETHTOOL_MM_MAX_VERIFY_RETRIES		3
+
 struct compat_ethtool_rx_flow_spec {
 	u32		flow_type;
 	union ethtool_flow_union h_u;
@@ -718,6 +722,75 @@ struct ethtool_mm_stats {
 	u64 MACMergeHoldCount;
 };
 
+enum ethtool_mmsv_event {
+	ETHTOOL_MMSV_LP_SENT_VERIFY_MPACKET,
+	ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET,
+	ETHTOOL_MMSV_LP_SENT_RESPONSE_MPACKET,
+};
+
+/* MAC Merge verification mPacket type */
+enum ethtool_mpacket {
+	ETHTOOL_MPACKET_VERIFY,
+	ETHTOOL_MPACKET_RESPONSE,
+};
+
+struct ethtool_mmsv;
+
+/**
+ * struct ethtool_mmsv_ops - Operations for MAC Merge Software Verification
+ * @configure_tx: Driver callback for the event where the preemptible TX
+ *		  becomes active or inactive. Preemptible traffic
+ *		  classes must be committed to hardware only while
+ *		  preemptible TX is active.
+ * @configure_pmac: Driver callback for the event where the pMAC state
+ *		    changes as result of an administrative setting
+ *		    (ethtool) or a call to ethtool_mmsv_link_state_handle().
+ * @send_mpacket: Driver-provided method for sending a Verify or a Response
+ *		  mPacket.
+ */
+struct ethtool_mmsv_ops {
+	void (*configure_tx)(struct ethtool_mmsv *mmsv, bool tx_active);
+	void (*configure_pmac)(struct ethtool_mmsv *mmsv, bool pmac_enabled);
+	void (*send_mpacket)(struct ethtool_mmsv *mmsv, enum ethtool_mpacket mpacket);
+};
+
+/**
+ * struct ethtool_mmsv - MAC Merge Software Verification
+ * @ops: operations for MAC Merge Software Verification
+ * @dev: pointer to net_device structure
+ * @lock: serialize access to MAC Merge state between
+ *	  ethtool requests and link state updates.
+ * @status: current verification FSM state
+ * @verify_timer: timer for verification in local TX direction
+ * @verify_enabled: indicates if verification is enabled
+ * @verify_retries: number of retries for verification
+ * @pmac_enabled: indicates if the preemptible MAC is enabled
+ * @verify_time: time for verification in milliseconds
+ * @tx_enabled: indicates if transmission is enabled
+ */
+struct ethtool_mmsv {
+	const struct ethtool_mmsv_ops *ops;
+	struct net_device *dev;
+	spinlock_t lock;
+	enum ethtool_mm_verify_status status;
+	struct timer_list verify_timer;
+	bool verify_enabled;
+	int verify_retries;
+	bool pmac_enabled;
+	u32 verify_time;
+	bool tx_enabled;
+};
+
+void ethtool_mmsv_stop(struct ethtool_mmsv *mmsv);
+void ethtool_mmsv_link_state_handle(struct ethtool_mmsv *mmsv, bool up);
+void ethtool_mmsv_event_handle(struct ethtool_mmsv *mmsv,
+			       enum ethtool_mmsv_event event);
+void ethtool_mmsv_get_mm(struct ethtool_mmsv *mmsv,
+			 struct ethtool_mm_state *state);
+void ethtool_mmsv_set_mm(struct ethtool_mmsv *mmsv, struct ethtool_mm_cfg *cfg);
+void ethtool_mmsv_init(struct ethtool_mmsv *mmsv, struct net_device *dev,
+		       const struct ethtool_mmsv_ops *ops);
+
 /**
  * struct ethtool_rxfh_param - RXFH (RSS) parameters
  * @hfunc: Defines the current RSS hash function used by HW (or to be set to).
-- 
cgit v1.2.3


From 145436ae01193c0a379fd3ea9c4fbdf32863db1f Mon Sep 17 00:00:00 2001
From: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
Date: Wed, 16 Apr 2025 19:14:49 +0200
Subject: net: phy: Add helper for getting MAC termination resistance

Add helper which returns the MAC termination resistance value. Modifying
the resistance to an appropriate value can reduce signal reflections and
therefore improve signal quality.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
Link: https://patch.msgid.link/20250416-dp83822-mac-impedance-v3-3-028ac426cddb@liebherr.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index fb755358d965..066a28a4b64b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2040,6 +2040,9 @@ int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev,
 			      enum ethtool_link_mode_bit_indices linkmode,
 			      u32 *val);
 
+int phy_get_mac_termination(struct phy_device *phydev, struct device *dev,
+			    u32 *val);
+
 void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv,
 		       bool *tx_pause, bool *rx_pause);
 
-- 
cgit v1.2.3


From 76a853f86c97b348dc96e75a6e6f94d8750715ee Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 13 Mar 2025 13:49:39 +0100
Subject: wifi: free SKBTX_WIFI_STATUS skb tx_flags flag

Jason mentioned at netdevconf that we've run out of tx_flags in
the skb_shinfo(). Gain one bit back by removing the wifi bit.

We can do that because the only userspace application for it
(hostapd) doesn't change the setting on the socket, it just
uses different sockets, and normally doesn't even use this any
more, sending the frames over nl80211 instead.

Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Link: https://patch.msgid.link/20250313134942.52ff54a140ec.If390bbdc46904cf451256ba989d7a056c457af6e@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/skbuff.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b974a277975a..9ee39670e8f4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -481,9 +481,6 @@ enum {
 	/* generate software time stamp on packet tx completion */
 	SKBTX_COMPLETION_TSTAMP = 1 << 3,
 
-	/* generate wifi status information (where possible) */
-	SKBTX_WIFI_STATUS = 1 << 4,
-
 	/* determine hardware time stamp based on time or cycles */
 	SKBTX_HW_TSTAMP_NETDEV = 1 << 5,
 
-- 
cgit v1.2.3


From fcc2d3e11bcc8f01d52a8c419f49f86ff8343b7c Mon Sep 17 00:00:00 2001
From: Karthikeyan Kathirvel <karthikeyan.kathirvel@oss.qualcomm.com>
Date: Mon, 21 Apr 2025 16:45:05 +0530
Subject: wifi: ieee80211: define beacon protection bit field

An AP supporting Beacon Protection should set bit 84 in
the extended capabilities IE (9.4.2.25 in the 802.11be D7 spec).
So the *4th* bit of the 10th byte should be checked to figure out
whether beacon protection is enabled or disabled.

Signed-off-by: Karthikeyan Kathirvel <karthikeyan.kathirvel@oss.qualcomm.com>
Reviewed-by: Jeff Johnson <jeff.johnson@oss.qualcomm.com>
Link: https://patch.msgid.link/20250421111505.3633992-1-karthikeyan.kathirvel@oss.qualcomm.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 508d466de1cc..cbc3928aa504 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4087,6 +4087,9 @@ enum ieee80211_tdls_actioncode {
 /* Defines support for enhanced multi-bssid advertisement*/
 #define WLAN_EXT_CAPA11_EMA_SUPPORT	BIT(3)
 
+/* Enable Beacon Protection */
+#define WLAN_EXT_CAPA11_BCN_PROTECT	BIT(4)
+
 /* TDLS specific payload type in the LLC/SNAP header */
 #define WLAN_TDLS_SNAP_RFTYPE	0x2
 
-- 
cgit v1.2.3


From 91ea0489dc97bfda72ed74f98ab66dc0ab4235c7 Mon Sep 17 00:00:00 2001
From: Rameshkumar Sundaram <quic_ramess@quicinc.com>
Date: Thu, 27 Mar 2025 10:43:19 +0530
Subject: wifi: ieee80211: Add helpers to fetch EMLSR delay and timeout values

Add helpers to get EMLSR transition delay, padding delay and transition
timeout values from EML capabilities field of Multi-link Element.

Signed-off-by: Rameshkumar Sundaram <quic_ramess@quicinc.com>
Link: https://patch.msgid.link/20250327051320.3253783-4-quic_ramess@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 74 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index cbc3928aa504..15a87f522017 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -5618,6 +5618,80 @@ static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len)
 	return len >= fixed + elem_len;
 }
 
+/**
+ * ieee80211_emlsr_pad_delay_in_us - Fetch the EMLSR Padding delay
+ *	in microseconds
+ * @eml_cap: EML capabilities field value from common info field of
+ *	the Multi-link element
+ * Return: the EMLSR Padding delay (in microseconds) encoded in the
+ *	EML Capabilities field
+ */
+
+static inline u32 ieee80211_emlsr_pad_delay_in_us(u16 eml_cap)
+{
+	/* IEEE Std 802.11be-2024 Table 9-417i—Encoding of the EMLSR
+	 * Padding Delay subfield.
+	 */
+	u32 pad_delay = u16_get_bits(eml_cap,
+				     IEEE80211_EML_CAP_EMLSR_PADDING_DELAY);
+
+	if (!pad_delay ||
+	    pad_delay > IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_256US)
+		return 0;
+
+	return 32 * (1 << (pad_delay - 1));
+}
+
+/**
+ * ieee80211_emlsr_trans_delay_in_us - Fetch the EMLSR Transition
+ *	delay in microseconds
+ * @eml_cap: EML capabilities field value from common info field of
+ *	the Multi-link element
+ * Return: the EMLSR Transition delay (in microseconds) encoded in the
+ *	EML Capabilities field
+ */
+
+static inline u32 ieee80211_emlsr_trans_delay_in_us(u16 eml_cap)
+{
+	/* IEEE Std 802.11be-2024 Table 9-417j—Encoding of the EMLSR
+	 * Transition Delay subfield.
+	 */
+	u32 trans_delay =
+		u16_get_bits(eml_cap,
+			     IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY);
+
+	/* invalid values also just use 0 */
+	if (!trans_delay ||
+	    trans_delay > IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_256US)
+		return 0;
+
+	return 16 * (1 << (trans_delay - 1));
+}
+
+/**
+ * ieee80211_eml_trans_timeout_in_us - Fetch the EMLSR Transition
+ *	timeout value in microseconds
+ * @eml_cap: EML capabilities field value from common info field of
+ *	the Multi-link element
+ * Return: the EMLSR Transition timeout (in microseconds) encoded in
+ *	the EML Capabilities field
+ */
+
+static inline u32 ieee80211_eml_trans_timeout_in_us(u16 eml_cap)
+{
+	/* IEEE Std 802.11be-2024 Table 9-417m—Encoding of the
+	 * Transition Timeout subfield.
+	 */
+	u8 timeout = u16_get_bits(eml_cap,
+				  IEEE80211_EML_CAP_TRANSITION_TIMEOUT);
+
+	/* invalid values also just use 0 */
+	if (!timeout || timeout > IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128TU)
+		return 0;
+
+	return 128 * (1 << (timeout - 1));
+}
+
 #define for_each_mle_subelement(_elem, _data, _len)			\
 	if (ieee80211_mle_size_ok(_data, _len))				\
 		for_each_element(_elem,					\
-- 
cgit v1.2.3


From 52358dd63e348c3b6c488acc105be1aeda8fb923 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 18 Apr 2025 11:04:01 +0200
Subject: net: phy: remove function stubs

All callers of these functions depend on PHYLIB or select it directly
or indirectly by selecting PHYLINK. Stubs make sense for optional
functionality, but that's not the case here.

MDIO_XGENE usually is selected by NET_XGENE which also selects PHYLIB.
Add a dependency to PHYLIB nevertheless, in order not to break
randconfig builds.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/f7a69a1f-60e9-4ac0-8b7c-481e0cc850e7@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 37 -------------------------------------
 1 file changed, 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 066a28a4b64b..3beaf225ee88 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1753,7 +1753,6 @@ int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum,
 struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id,
 				     bool is_c45,
 				     struct phy_c45_device_ids *c45_ids);
-#if IS_ENABLED(CONFIG_PHYLIB)
 int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id);
 struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode);
 struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode);
@@ -1761,42 +1760,6 @@ struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode);
 struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45);
 int phy_device_register(struct phy_device *phy);
 void phy_device_free(struct phy_device *phydev);
-#else
-static inline int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id)
-{
-	return 0;
-}
-static inline
-struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode)
-{
-	return 0;
-}
-
-static inline
-struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode)
-{
-	return NULL;
-}
-
-static inline
-struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode)
-{
-	return NULL;
-}
-
-static inline
-struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45)
-{
-	return NULL;
-}
-
-static inline int phy_device_register(struct phy_device *phy)
-{
-	return 0;
-}
-
-static inline void phy_device_free(struct phy_device *phydev) { }
-#endif /* CONFIG_PHYLIB */
 void phy_device_remove(struct phy_device *phydev);
 int phy_get_c45_ids(struct phy_device *phydev);
 int phy_init_hw(struct phy_device *phydev);
-- 
cgit v1.2.3


From bc2550b4e195754fbb24aac1f012d3dd9e3b4edc Mon Sep 17 00:00:00 2001
From: Jeremy Harris <jgh@exim.org>
Date: Wed, 23 Apr 2025 13:43:33 +0100
Subject: tcp: fastopen: note that a child socket was created

tcp: fastopen: note that a child socket was created

This uses up the last bit in a field of tcp_sock.

Signed-off-by: Jeremy Harris <jgh@exim.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Link: https://patch.msgid.link/20250423124334.4916-2-jgh@exim.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/tcp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1669d95bb0f9..a8af71623ba7 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -385,7 +385,8 @@ struct tcp_sock {
 		syn_fastopen:1,	/* SYN includes Fast Open option */
 		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
 		syn_fastopen_ch:1, /* Active TFO re-enabling probe */
-		syn_data_acked:1;/* data in SYN is acked by SYN-ACK */
+		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
+		syn_fastopen_child:1; /* created TFO passive child socket */
 
 	u8	keepalive_probes; /* num of allowed keep alive probes	*/
 	u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */
-- 
cgit v1.2.3


From d57ee99831e336576359beb26e2b140511c99106 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 23 Apr 2025 17:08:08 +0200
Subject: net: ethernet: mtk_wed: annotate RCU release in attach()

There are some sparse warnings in wifi, and it seems that
it's actually possible to annotate a function pointer with
__releases(), making the sparse warnings go away. In a way
that also serves as documentation that rcu_read_unlock()
must be called in the attach method, so add that annotation.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://patch.msgid.link/20250423150811.456205-2-johannes@sipsolutions.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/soc/mediatek/mtk_wed.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index a476648858a6..d8949a4ed0dc 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -192,7 +192,7 @@ struct mtk_wed_device {
 };
 
 struct mtk_wed_ops {
-	int (*attach)(struct mtk_wed_device *dev);
+	int (*attach)(struct mtk_wed_device *dev) __releases(RCU);
 	int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring,
 			     void __iomem *regs, bool reset);
 	int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring,
-- 
cgit v1.2.3


From 144530c15ec7fa95b29812d86f4be527338ea204 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Fri, 25 Apr 2025 13:46:16 -0700
Subject: pds_core: remove extra name description

Fix the kernel-doc complaint
include/linux/pds/pds_adminq.h:481: warning: Excess struct member 'name' description in 'pds_core_lif_getattr_comp'

Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pds/pds_adminq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
index ddd111f04ca0..339156113fa5 100644
--- a/include/linux/pds/pds_adminq.h
+++ b/include/linux/pds/pds_adminq.h
@@ -463,7 +463,6 @@ struct pds_core_lif_getattr_cmd {
  * @rsvd:       Word boundary padding
  * @comp_index: Index in the descriptor ring for which this is the completion
  * @state:	LIF state (enum pds_core_lif_state)
- * @name:	LIF name string, 0 terminated
  * @features:	Features (enum pds_core_hw_features)
  * @rsvd2:      Word boundary padding
  * @color:	Color bit
-- 
cgit v1.2.3


From 7c4f4c4fa9b6fbb7e483bebd02f7b9cbc20ca5cc Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Fri, 25 Apr 2025 13:46:17 -0700
Subject: pds_core: smaller adminq poll starting interval

Shorten the adminq poll starting interval in order to notice
any transaction errors more quickly.

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pds/pds_adminq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
index 339156113fa5..40ff0ec2b879 100644
--- a/include/linux/pds/pds_adminq.h
+++ b/include/linux/pds/pds_adminq.h
@@ -4,7 +4,7 @@
 #ifndef _PDS_CORE_ADMINQ_H_
 #define _PDS_CORE_ADMINQ_H_
 
-#define PDSC_ADMINQ_MAX_POLL_INTERVAL	256
+#define PDSC_ADMINQ_MAX_POLL_INTERVAL	256000	/* usecs */
 
 enum pds_core_adminq_flags {
 	PDS_AQ_FLAG_FASTPOLL	= BIT(1),	/* completion poll at 1ms */
-- 
cgit v1.2.3


From 468d8b462ac64659caec53eff34f02963d5f52c8 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Tue, 15 Apr 2025 21:15:45 -0500
Subject: iidc/ice/irdma: Rename IDC header file

To prepare for the IDC upgrade to support different CORE
PCI drivers, rename header file from iidc.h to iidc_rdma.h
since this files functionality is specifically for RDMA support.

Use net/dscp.h include in irdma osdep.h and DSCP_MAX type.h,
instead of iidc header and define.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/iidc.h      | 109 ------------------------------------
 include/linux/net/intel/iidc_rdma.h | 109 ++++++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+), 109 deletions(-)
 delete mode 100644 include/linux/net/intel/iidc.h
 create mode 100644 include/linux/net/intel/iidc_rdma.h

(limited to 'include/linux')

diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h
deleted file mode 100644
index 13274c3def66..000000000000
--- a/include/linux/net/intel/iidc.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2021, Intel Corporation. */
-
-#ifndef _IIDC_H_
-#define _IIDC_H_
-
-#include <linux/auxiliary_bus.h>
-#include <linux/dcbnl.h>
-#include <linux/device.h>
-#include <linux/if_ether.h>
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
-
-enum iidc_event_type {
-	IIDC_EVENT_BEFORE_MTU_CHANGE,
-	IIDC_EVENT_AFTER_MTU_CHANGE,
-	IIDC_EVENT_BEFORE_TC_CHANGE,
-	IIDC_EVENT_AFTER_TC_CHANGE,
-	IIDC_EVENT_CRIT_ERR,
-	IIDC_EVENT_NBITS		/* must be last */
-};
-
-enum iidc_reset_type {
-	IIDC_PFR,
-	IIDC_CORER,
-	IIDC_GLOBR,
-};
-
-enum iidc_rdma_protocol {
-	IIDC_RDMA_PROTOCOL_IWARP = BIT(0),
-	IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1),
-};
-
-#define IIDC_MAX_USER_PRIORITY		8
-#define IIDC_MAX_DSCP_MAPPING		64
-#define IIDC_DSCP_PFC_MODE		0x1
-
-/* Struct to hold per RDMA Qset info */
-struct iidc_rdma_qset_params {
-	/* Qset TEID returned to the RDMA driver in
-	 * ice_add_rdma_qset and used by RDMA driver
-	 * for calls to ice_del_rdma_qset
-	 */
-	u32 teid;	/* Qset TEID */
-	u16 qs_handle; /* RDMA driver provides this */
-	u16 vport_id; /* VSI index */
-	u8 tc; /* TC branch the Qset should belong to */
-};
-
-struct iidc_qos_info {
-	u64 tc_ctx;
-	u8 rel_bw;
-	u8 prio_type;
-	u8 egress_virt_up;
-	u8 ingress_virt_up;
-};
-
-/* Struct to pass QoS info */
-struct iidc_qos_params {
-	struct iidc_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
-	u8 up2tc[IIDC_MAX_USER_PRIORITY];
-	u8 vport_relative_bw;
-	u8 vport_priority_type;
-	u8 num_tc;
-	u8 pfc_mode;
-	u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
-};
-
-struct iidc_event {
-	DECLARE_BITMAP(type, IIDC_EVENT_NBITS);
-	u32 reg;
-};
-
-struct ice_pf;
-
-int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type);
-int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
-void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos);
-int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
-void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
-
-/* Structure representing auxiliary driver tailored information about the core
- * PCI dev, each auxiliary driver using the IIDC interface will have an
- * instance of this struct dedicated to it.
- */
-
-struct iidc_auxiliary_dev {
-	struct auxiliary_device adev;
-	struct ice_pf *pf;
-};
-
-/* structure representing the auxiliary driver. This struct is to be
- * allocated and populated by the auxiliary driver's owner. The core PCI
- * driver will access these ops by performing a container_of on the
- * auxiliary_device->dev.driver.
- */
-struct iidc_auxiliary_drv {
-	struct auxiliary_driver adrv;
-	/* This event_handler is meant to be a blocking call.  For instance,
-	 * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not
-	 * return until the auxiliary driver is ready for the MTU change to
-	 * happen.
-	 */
-	void (*event_handler)(struct ice_pf *pf, struct iidc_event *event);
-};
-
-#endif /* _IIDC_H_*/
diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h
new file mode 100644
index 000000000000..0cd75404e459
--- /dev/null
+++ b/include/linux/net/intel/iidc_rdma.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _IIDC_RDMA_H_
+#define _IIDC_RDMA_H_
+
+#include <linux/auxiliary_bus.h>
+#include <linux/dcbnl.h>
+#include <linux/device.h>
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+
+enum iidc_event_type {
+	IIDC_EVENT_BEFORE_MTU_CHANGE,
+	IIDC_EVENT_AFTER_MTU_CHANGE,
+	IIDC_EVENT_BEFORE_TC_CHANGE,
+	IIDC_EVENT_AFTER_TC_CHANGE,
+	IIDC_EVENT_CRIT_ERR,
+	IIDC_EVENT_NBITS		/* must be last */
+};
+
+enum iidc_reset_type {
+	IIDC_PFR,
+	IIDC_CORER,
+	IIDC_GLOBR,
+};
+
+enum iidc_rdma_protocol {
+	IIDC_RDMA_PROTOCOL_IWARP = BIT(0),
+	IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1),
+};
+
+#define IIDC_MAX_USER_PRIORITY		8
+#define IIDC_MAX_DSCP_MAPPING		64
+#define IIDC_DSCP_PFC_MODE		0x1
+
+/* Struct to hold per RDMA Qset info */
+struct iidc_rdma_qset_params {
+	/* Qset TEID returned to the RDMA driver in
+	 * ice_add_rdma_qset and used by RDMA driver
+	 * for calls to ice_del_rdma_qset
+	 */
+	u32 teid;	/* Qset TEID */
+	u16 qs_handle; /* RDMA driver provides this */
+	u16 vport_id; /* VSI index */
+	u8 tc; /* TC branch the Qset should belong to */
+};
+
+struct iidc_qos_info {
+	u64 tc_ctx;
+	u8 rel_bw;
+	u8 prio_type;
+	u8 egress_virt_up;
+	u8 ingress_virt_up;
+};
+
+/* Struct to pass QoS info */
+struct iidc_qos_params {
+	struct iidc_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
+	u8 up2tc[IIDC_MAX_USER_PRIORITY];
+	u8 vport_relative_bw;
+	u8 vport_priority_type;
+	u8 num_tc;
+	u8 pfc_mode;
+	u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
+};
+
+struct iidc_event {
+	DECLARE_BITMAP(type, IIDC_EVENT_NBITS);
+	u32 reg;
+};
+
+struct ice_pf;
+
+int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
+int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
+int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type);
+int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
+void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos);
+int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
+void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
+
+/* Structure representing auxiliary driver tailored information about the core
+ * PCI dev, each auxiliary driver using the IIDC interface will have an
+ * instance of this struct dedicated to it.
+ */
+
+struct iidc_auxiliary_dev {
+	struct auxiliary_device adev;
+	struct ice_pf *pf;
+};
+
+/* structure representing the auxiliary driver. This struct is to be
+ * allocated and populated by the auxiliary driver's owner. The core PCI
+ * driver will access these ops by performing a container_of on the
+ * auxiliary_device->dev.driver.
+ */
+struct iidc_auxiliary_drv {
+	struct auxiliary_driver adrv;
+	/* This event_handler is meant to be a blocking call.  For instance,
+	 * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not
+	 * return until the auxiliary driver is ready for the MTU change to
+	 * happen.
+	 */
+	void (*event_handler)(struct ice_pf *pf, struct iidc_event *event);
+};
+
+#endif /* _IIDC_RDMA_H_*/
-- 
cgit v1.2.3


From 97b5631aae6896369712d6b7131afbc95c753587 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Tue, 15 Apr 2025 21:15:46 -0500
Subject: iidc/ice/irdma: Rename to iidc_* convention

In preparation of supporting more than a single core PCI driver
for RDMA, homogenize naming to iidc_rdma_* and IIDC_RDMA_*
form.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/iidc_rdma.h | 38 +++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h
index 0cd75404e459..2b24a9912fa0 100644
--- a/include/linux/net/intel/iidc_rdma.h
+++ b/include/linux/net/intel/iidc_rdma.h
@@ -11,16 +11,16 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 
-enum iidc_event_type {
-	IIDC_EVENT_BEFORE_MTU_CHANGE,
-	IIDC_EVENT_AFTER_MTU_CHANGE,
-	IIDC_EVENT_BEFORE_TC_CHANGE,
-	IIDC_EVENT_AFTER_TC_CHANGE,
-	IIDC_EVENT_CRIT_ERR,
-	IIDC_EVENT_NBITS		/* must be last */
+enum iidc_rdma_event_type {
+	IIDC_RDMA_EVENT_BEFORE_MTU_CHANGE,
+	IIDC_RDMA_EVENT_AFTER_MTU_CHANGE,
+	IIDC_RDMA_EVENT_BEFORE_TC_CHANGE,
+	IIDC_RDMA_EVENT_AFTER_TC_CHANGE,
+	IIDC_RDMA_EVENT_CRIT_ERR,
+	IIDC_RDMA_EVENT_NBITS		/* must be last */
 };
 
-enum iidc_reset_type {
+enum iidc_rdma_reset_type {
 	IIDC_PFR,
 	IIDC_CORER,
 	IIDC_GLOBR,
@@ -47,7 +47,7 @@ struct iidc_rdma_qset_params {
 	u8 tc; /* TC branch the Qset should belong to */
 };
 
-struct iidc_qos_info {
+struct iidc_rdma_qos_info {
 	u64 tc_ctx;
 	u8 rel_bw;
 	u8 prio_type;
@@ -56,8 +56,8 @@ struct iidc_qos_info {
 };
 
 /* Struct to pass QoS info */
-struct iidc_qos_params {
-	struct iidc_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
+struct iidc_rdma_qos_params {
+	struct iidc_rdma_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
 	u8 up2tc[IIDC_MAX_USER_PRIORITY];
 	u8 vport_relative_bw;
 	u8 vport_priority_type;
@@ -66,8 +66,8 @@ struct iidc_qos_params {
 	u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
 };
 
-struct iidc_event {
-	DECLARE_BITMAP(type, IIDC_EVENT_NBITS);
+struct iidc_rdma_event {
+	DECLARE_BITMAP(type, IIDC_RDMA_EVENT_NBITS);
 	u32 reg;
 };
 
@@ -75,9 +75,11 @@ struct ice_pf;
 
 int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
 int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type);
+int ice_rdma_request_reset(struct ice_pf *pf,
+			   enum iidc_rdma_reset_type reset_type);
 int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
-void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos);
+void ice_get_qos_params(struct ice_pf *pf,
+			struct iidc_rdma_qos_params *qos);
 int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
 void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
 
@@ -86,7 +88,7 @@ void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
  * instance of this struct dedicated to it.
  */
 
-struct iidc_auxiliary_dev {
+struct iidc_rdma_core_auxiliary_dev {
 	struct auxiliary_device adev;
 	struct ice_pf *pf;
 };
@@ -96,14 +98,14 @@ struct iidc_auxiliary_dev {
  * driver will access these ops by performing a container_of on the
  * auxiliary_device->dev.driver.
  */
-struct iidc_auxiliary_drv {
+struct iidc_rdma_core_auxiliary_drv {
 	struct auxiliary_driver adrv;
 	/* This event_handler is meant to be a blocking call.  For instance,
 	 * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not
 	 * return until the auxiliary driver is ready for the MTU change to
 	 * happen.
 	 */
-	void (*event_handler)(struct ice_pf *pf, struct iidc_event *event);
+	void (*event_handler)(struct ice_pf *pf, struct iidc_rdma_event *event);
 };
 
 #endif /* _IIDC_RDMA_H_*/
-- 
cgit v1.2.3


From d9251a560ba67bbedd53b81aee32e1ad95f42000 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Tue, 15 Apr 2025 21:15:47 -0500
Subject: iidc/ice/irdma: Break iidc.h into two headers

In preparation of supporting more than a single core PCI driver
for RDMA, break the iidc_rdma.h header file into two more focused
headers.

Only the elements universal to all Intel drivers will remain in
the generic iidc_rdma.h header. Move the ice specific information
to an ice specific header file named iidc_rdma_ice.h.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/iidc_rdma.h     | 14 +-------------
 include/linux/net/intel/iidc_rdma_ice.h | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 13 deletions(-)
 create mode 100644 include/linux/net/intel/iidc_rdma_ice.h

(limited to 'include/linux')

diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h
index 2b24a9912fa0..1e8136395154 100644
--- a/include/linux/net/intel/iidc_rdma.h
+++ b/include/linux/net/intel/iidc_rdma.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2021, Intel Corporation. */
+/* Copyright (C) 2021-2025, Intel Corporation. */
 
 #ifndef _IIDC_RDMA_H_
 #define _IIDC_RDMA_H_
@@ -71,18 +71,6 @@ struct iidc_rdma_event {
 	u32 reg;
 };
 
-struct ice_pf;
-
-int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_rdma_request_reset(struct ice_pf *pf,
-			   enum iidc_rdma_reset_type reset_type);
-int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
-void ice_get_qos_params(struct ice_pf *pf,
-			struct iidc_rdma_qos_params *qos);
-int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
-void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
-
 /* Structure representing auxiliary driver tailored information about the core
  * PCI dev, each auxiliary driver using the IIDC interface will have an
  * instance of this struct dedicated to it.
diff --git a/include/linux/net/intel/iidc_rdma_ice.h b/include/linux/net/intel/iidc_rdma_ice.h
new file mode 100644
index 000000000000..78d10003d776
--- /dev/null
+++ b/include/linux/net/intel/iidc_rdma_ice.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021-2025, Intel Corporation. */
+
+#ifndef _IIDC_RDMA_ICE_H_
+#define _IIDC_RDMA_ICE_H_
+
+struct ice_pf;
+
+int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
+int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
+int ice_rdma_request_reset(struct ice_pf *pf,
+			   enum iidc_rdma_reset_type reset_type);
+int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
+void ice_get_qos_params(struct ice_pf *pf,
+			struct iidc_rdma_qos_params *qos);
+int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
+void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
+
+#endif /* _IIDC_RDMA_ICE_H_*/
-- 
cgit v1.2.3


From 8239b771b94b639556c1987185fd82b2a896c923 Mon Sep 17 00:00:00 2001
From: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Date: Tue, 15 Apr 2025 21:15:48 -0500
Subject: ice: Replace ice specific DSCP mapping num with a kernel define

Replace ice driver specific DSCP mapping number defines
ICE_DSCP_NUM_VAL and IIDC_MAX_DSCP_MAPPING with
an equivalent kernel define DSCP_MAX.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/iidc_rdma.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h
index 1e8136395154..7f1910289534 100644
--- a/include/linux/net/intel/iidc_rdma.h
+++ b/include/linux/net/intel/iidc_rdma.h
@@ -10,6 +10,7 @@
 #include <linux/if_ether.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <net/dscp.h>
 
 enum iidc_rdma_event_type {
 	IIDC_RDMA_EVENT_BEFORE_MTU_CHANGE,
@@ -32,7 +33,6 @@ enum iidc_rdma_protocol {
 };
 
 #define IIDC_MAX_USER_PRIORITY		8
-#define IIDC_MAX_DSCP_MAPPING		64
 #define IIDC_DSCP_PFC_MODE		0x1
 
 /* Struct to hold per RDMA Qset info */
@@ -63,7 +63,7 @@ struct iidc_rdma_qos_params {
 	u8 vport_priority_type;
 	u8 num_tc;
 	u8 pfc_mode;
-	u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
+	u8 dscp_map[DSCP_MAX];
 };
 
 struct iidc_rdma_event {
-- 
cgit v1.2.3


From a3e1c0ad835702555d90565584ab6f723adf7f94 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 29 Apr 2025 08:04:46 +0200
Subject: net: phy: factor out provider part from mdio_bus.c

After 52358dd63e34 ("net: phy: remove function stubs") there's a
problem if CONFIG_MDIO_BUS is set, but CONFIG_PHYLIB is not.
mdiobus_scan() uses phylib functions like get_phy_device().
Bringing back the stub wouldn't make much sense, because it would
allow to compile mdiobus_scan(), but the function would be unusable.
The stub returned NULL, and we have the following in mdiobus_scan():

phydev = get_phy_device(bus, addr, c45);
        if (IS_ERR(phydev))
                return phydev;

So calling mdiobus_scan() w/o CONFIG_PHYLIB would cause a crash later in
mdiobus_scan(). In general the PHYLIB functionality isn't optional here.
Consequently, MDIO bus providers depend on PHYLIB.
Therefore factor it out and build it together with the libphy core
modules. In addition make all MDIO bus providers under /drivers/net/mdio
depend on PHYLIB. Same applies to enetc MDIO bus provider. Note that
PHYLIB selects MDIO_DEVRES, therefore we can omit this here.

Fixes: 52358dd63e34 ("net: phy: remove function stubs")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202504270639.mT0lh2o1-lkp@intel.com/
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/c74772a9-dab6-44bf-a657-389df89d85c2@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3beaf225ee88..d62d292024bc 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2062,6 +2062,7 @@ int __phy_hwtstamp_set(struct phy_device *phydev,
 		       struct netlink_ext_ack *extack);
 
 extern const struct bus_type mdio_bus_type;
+extern const struct class mdio_bus_class;
 
 struct mdio_board_info {
 	const char	*bus_id;
-- 
cgit v1.2.3


From 66d454e99d71857faf249486912e381ec83760b4 Mon Sep 17 00:00:00 2001
From: Jordan Rife <jordan@jrife.io>
Date: Fri, 2 May 2025 09:15:21 -0700
Subject: bpf: udp: Make sure iter->batch always contains a full bucket
 snapshot

Require that iter->batch always contains a full bucket snapshot. This
invariant is important to avoid skipping or repeating sockets during
iteration when combined with the next few patches. Before, there were
two cases where a call to bpf_iter_udp_batch may only capture part of a
bucket:

1. When bpf_iter_udp_realloc_batch() returns -ENOMEM [1].
2. When more sockets are added to the bucket while calling
   bpf_iter_udp_realloc_batch(), making the updated batch size
   insufficient [2].

In cases where the batch size only covers part of a bucket, it is
possible to forget which sockets were already visited, especially if we
have to process a bucket in more than two batches. This forces us to
choose between repeating or skipping sockets, so don't allow this:

1. Stop iteration and propagate -ENOMEM up to userspace if reallocation
   fails instead of continuing with a partial batch.
2. Try bpf_iter_udp_realloc_batch() with GFP_USER just as before, but if
   we still aren't able to capture the full bucket, call
   bpf_iter_udp_realloc_batch() again while holding the bucket lock to
   guarantee the bucket does not change. On the second attempt use
   GFP_NOWAIT since we hold onto the spin lock.

Introduce the udp_portaddr_for_each_entry_from macro and use it instead
of udp_portaddr_for_each_entry to make it possible to continue iteration
from an arbitrary socket. This is required for this patch in the
GFP_NOWAIT case to allow us to fill the rest of a batch starting from
the middle of a bucket and the later patch which skips sockets that were
already seen.

Testing all scenarios directly is a bit difficult, but I did some manual
testing to exercise the code paths where GFP_NOWAIT is used and where
ERR_PTR(err) is returned. I used the realloc test case included later
in this series to trigger a scenario where a realloc happens inside
bpf_iter_udp_batch and made a small code tweak to force the first
realloc attempt to allocate a too-small batch, thus requiring
another attempt with GFP_NOWAIT. Some printks showed both reallocs with
the tests passing:

Apr 25 23:16:24 crow kernel: go again GFP_USER
Apr 25 23:16:24 crow kernel: go again GFP_NOWAIT

With this setup, I also forced each of the bpf_iter_udp_realloc_batch
calls to return -ENOMEM to ensure that iteration ends and that the
read() in userspace fails.

[1]: https://lore.kernel.org/bpf/CABi4-ogUtMrH8-NVB6W8Xg_F_KDLq=yy-yu-tKr2udXE2Mu1Lg@mail.gmail.com/
[2]: https://lore.kernel.org/bpf/7ed28273-a716-4638-912d-f86f965e54bb@linux.dev/

Signed-off-by: Jordan Rife <jordan@jrife.io>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/linux/udp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 895240177f4f..4e1a672af4c5 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -216,6 +216,9 @@ static inline void udp_allow_gso(struct sock *sk)
 #define udp_portaddr_for_each_entry(__sk, list) \
 	hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
 
+#define udp_portaddr_for_each_entry_from(__sk) \
+	hlist_for_each_entry_from(__sk, __sk_common.skc_portaddr_node)
+
 #define udp_portaddr_for_each_entry_rcu(__sk, list) \
 	hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
 
-- 
cgit v1.2.3


From ca732e990fc8222a2d6782ae750304719e212fe8 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 1 May 2025 12:45:11 +0100
Subject: net: stmmac: add get_interfaces() platform method

Add a get_interfaces() platform method to allow platforms to indicate
to phylink which interface modes they support - which then allows
phylink to validate on initialisation that the configured PHY interface
mode is actually supported.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1uASLn-0021Qd-Mi@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 8aed09d65b4a..537bced69c46 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -233,6 +233,8 @@ struct plat_stmmacenet_data {
 	u8 tx_sched_algorithm;
 	struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES];
 	struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES];
+	void (*get_interfaces)(struct stmmac_priv *priv, void *bsp_priv,
+			       unsigned long *interfaces);
 	int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i,
 			       phy_interface_t interface, int speed);
 	void (*fix_mac_speed)(void *priv, int speed, unsigned int mode);
-- 
cgit v1.2.3


From 9d165dc58055d98658941a33fef9e5da866af3e9 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 1 May 2025 12:45:27 +0100
Subject: net: stmmac: remove speed_mode_2500() method

Remove the speed_mode_2500() platform method which is no longer used
or necessary, being superseded by the more flexible get_interfaces()
method.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/E1uASM3-0021R3-2B@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 537bced69c46..26ddf95d23f9 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -241,7 +241,6 @@ struct plat_stmmacenet_data {
 	int (*fix_soc_reset)(void *priv, void __iomem *ioaddr);
 	int (*serdes_powerup)(struct net_device *ndev, void *priv);
 	void (*serdes_powerdown)(struct net_device *ndev, void *priv);
-	void (*speed_mode_2500)(struct net_device *ndev, void *priv);
 	int (*mac_finish)(struct net_device *ndev,
 			  void *priv,
 			  unsigned int mode,
-- 
cgit v1.2.3


From 22c64f37e1d4e757b0073a72f1439c2c3509c5cb Mon Sep 17 00:00:00 2001
From: Mohan Kumar G <quic_mkumarg@quicinc.com>
Date: Mon, 5 May 2025 20:58:36 +0530
Subject: wifi: mac80211: Update MCS15 support in link_conf

As per IEEE 802.11be-2024 - 9.4.2.321, EHT operation element
contains MCS15 Disable subfield as the sixth bit, which is set when
MCS15 support is not enabled.

Get MCS15 support from EHT operation params and add it in link_conf
so that driver can use this value to know if EHT-MCS 15 reception
is enabled.

Co-developed-by: Dhanavandhana Kannan <quic_dhanavan1@quicinc.com>
Signed-off-by: Dhanavandhana Kannan <quic_dhanavan1@quicinc.com>
Signed-off-by: Mohan Kumar G <quic_mkumarg@quicinc.com>
Link: https://patch.msgid.link/20250505152836.3266829-1-quic_mkumarg@quicinc.com
[remove pointless !! for bool assignment]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 17f917cb4540..420c7f9aa6ee 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2325,6 +2325,7 @@ struct ieee80211_eht_cap_elem {
 #define IEEE80211_EHT_OPER_EHT_DEF_PE_DURATION	                0x04
 #define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_LIMIT         0x08
 #define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_EXP_MASK      0x30
+#define IEEE80211_EHT_OPER_MCS15_DISABLE                        0x40
 
 /**
  * struct ieee80211_eht_operation - eht operation element
-- 
cgit v1.2.3


From 4701073c3debd16d7f534f3eb808bd9b50601c0c Mon Sep 17 00:00:00 2001
From: Wei Fang <wei.fang@nxp.com>
Date: Tue, 6 May 2025 16:07:22 +0800
Subject: net: enetc: add initial netc-lib driver to support NTMP

Some NETC functionality is controlled using control messages sent to the
hardware using BD ring interface with 32B descriptor similar to transmit
BD ring used on ENETC. This BD ring interface is referred to as command
BD ring. It is used to configure functionality where the underlying
resources may be shared between different entities or being too large to
configure using direct registers. Therefore, a messaging protocol called
NETC Table Management Protocol (NTMP) is provided for exchanging
configuration and management information between the software and the
hardware using the command BD ring interface.

For the management protocol of LS1028A has been retroactively named NTMP
1.0, and its implementation is in enetc_cbdr.c and enetc_qos.c. However,
NTMP of i.MX95 has been upgraded to version 2.0, which is incompatible
with LS1028A, because the message formats have been changed. Therefore,
add the netc-lib driver to support NTMP 2.0 to operate various tables.
Note that, only MAC address filter table and RSS table are supported at
the moment. More tables will be supported in subsequent patches.

It is worth mentioning that the purpose of the netc-lib driver is to
provide some NTMP-based generic interfaces for ENETC and NETC Switch
drivers. Currently, it only supports the configurations of some tables.
Interfaces such as tc flower and debugfs will be added in the future.

Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20250506080735.3444381-2-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/fsl/ntmp.h | 121 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 include/linux/fsl/ntmp.h

(limited to 'include/linux')

diff --git a/include/linux/fsl/ntmp.h b/include/linux/fsl/ntmp.h
new file mode 100644
index 000000000000..916dc4fe7de3
--- /dev/null
+++ b/include/linux/fsl/ntmp.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2025 NXP */
+#ifndef __NETC_NTMP_H
+#define __NETC_NTMP_H
+
+#include <linux/bitops.h>
+#include <linux/if_ether.h>
+
+struct maft_keye_data {
+	u8 mac_addr[ETH_ALEN];
+	__le16 resv;
+};
+
+struct maft_cfge_data {
+	__le16 si_bitmap;
+	__le16 resv;
+};
+
+struct netc_cbdr_regs {
+	void __iomem *pir;
+	void __iomem *cir;
+	void __iomem *mr;
+
+	void __iomem *bar0;
+	void __iomem *bar1;
+	void __iomem *lenr;
+};
+
+struct netc_tbl_vers {
+	u8 maft_ver;
+	u8 rsst_ver;
+};
+
+struct netc_cbdr {
+	struct device *dev;
+	struct netc_cbdr_regs regs;
+
+	int bd_num;
+	int next_to_use;
+	int next_to_clean;
+
+	int dma_size;
+	void *addr_base;
+	void *addr_base_align;
+	dma_addr_t dma_base;
+	dma_addr_t dma_base_align;
+
+	/* Serialize the order of command BD ring */
+	spinlock_t ring_lock;
+};
+
+struct ntmp_user {
+	int cbdr_num;	/* number of control BD ring */
+	struct device *dev;
+	struct netc_cbdr *ring;
+	struct netc_tbl_vers tbl;
+};
+
+struct maft_entry_data {
+	struct maft_keye_data keye;
+	struct maft_cfge_data cfge;
+};
+
+#if IS_ENABLED(CONFIG_NXP_NETC_LIB)
+int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev,
+		   const struct netc_cbdr_regs *regs);
+void ntmp_free_cbdr(struct netc_cbdr *cbdr);
+
+/* NTMP APIs */
+int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id,
+			struct maft_entry_data *maft);
+int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id,
+			  struct maft_entry_data *maft);
+int ntmp_maft_delete_entry(struct ntmp_user *user, u32 entry_id);
+int ntmp_rsst_update_entry(struct ntmp_user *user, const u32 *table,
+			   int count);
+int ntmp_rsst_query_entry(struct ntmp_user *user,
+			  u32 *table, int count);
+#else
+static inline int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev,
+				 const struct netc_cbdr_regs *regs)
+{
+	return 0;
+}
+
+static inline void ntmp_free_cbdr(struct netc_cbdr *cbdr)
+{
+}
+
+static inline int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id,
+				      struct maft_entry_data *maft)
+{
+	return 0;
+}
+
+static inline int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id,
+					struct maft_entry_data *maft)
+{
+	return 0;
+}
+
+static inline int ntmp_maft_delete_entry(struct ntmp_user *user, u32 entry_id)
+{
+	return 0;
+}
+
+static inline int ntmp_rsst_update_entry(struct ntmp_user *user,
+					 const u32 *table, int count)
+{
+	return 0;
+}
+
+static inline int ntmp_rsst_query_entry(struct ntmp_user *user,
+					u32 *table, int count)
+{
+	return 0;
+}
+
+#endif
+
+#endif
-- 
cgit v1.2.3


From c24a65b6a27c78d8540409800886b6622ea86ebf Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Tue, 15 Apr 2025 21:15:49 -0500
Subject: iidc/ice/irdma: Update IDC to support multiple consumers

In preparation of supporting more than a single core PCI driver
for RDMA, move ice specific structs like qset_params, qos_info
and qos_params from iidc_rdma.h to iidc_rdma_ice.h.

Previously, the ice driver was just exporting its entire PF struct
to the auxiliary driver, but since each core driver will have its own
different PF struct, implement a universal struct that all core drivers
can provide to the auxiliary driver through the probe call.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Co-developed-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Co-developed-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Co-developed-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/iidc_rdma.h     | 67 +++++++++-----------------------
 include/linux/net/intel/iidc_rdma_ice.h | 69 ++++++++++++++++++++++++++++-----
 2 files changed, 78 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h
index 7f1910289534..8baad1082042 100644
--- a/include/linux/net/intel/iidc_rdma.h
+++ b/include/linux/net/intel/iidc_rdma.h
@@ -5,7 +5,6 @@
 #define _IIDC_RDMA_H_
 
 #include <linux/auxiliary_bus.h>
-#include <linux/dcbnl.h>
 #include <linux/device.h>
 #include <linux/if_ether.h>
 #include <linux/kernel.h>
@@ -17,14 +16,19 @@ enum iidc_rdma_event_type {
 	IIDC_RDMA_EVENT_AFTER_MTU_CHANGE,
 	IIDC_RDMA_EVENT_BEFORE_TC_CHANGE,
 	IIDC_RDMA_EVENT_AFTER_TC_CHANGE,
+	IIDC_RDMA_EVENT_WARN_RESET,
 	IIDC_RDMA_EVENT_CRIT_ERR,
 	IIDC_RDMA_EVENT_NBITS		/* must be last */
 };
 
+struct iidc_rdma_event {
+	DECLARE_BITMAP(type, IIDC_RDMA_EVENT_NBITS);
+	u32 reg;
+};
+
 enum iidc_rdma_reset_type {
-	IIDC_PFR,
-	IIDC_CORER,
-	IIDC_GLOBR,
+	IIDC_FUNC_RESET,
+	IIDC_DEV_RESET,
 };
 
 enum iidc_rdma_protocol {
@@ -32,53 +36,22 @@ enum iidc_rdma_protocol {
 	IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1),
 };
 
-#define IIDC_MAX_USER_PRIORITY		8
-#define IIDC_DSCP_PFC_MODE		0x1
-
-/* Struct to hold per RDMA Qset info */
-struct iidc_rdma_qset_params {
-	/* Qset TEID returned to the RDMA driver in
-	 * ice_add_rdma_qset and used by RDMA driver
-	 * for calls to ice_del_rdma_qset
-	 */
-	u32 teid;	/* Qset TEID */
-	u16 qs_handle; /* RDMA driver provides this */
-	u16 vport_id; /* VSI index */
-	u8 tc; /* TC branch the Qset should belong to */
-};
-
-struct iidc_rdma_qos_info {
-	u64 tc_ctx;
-	u8 rel_bw;
-	u8 prio_type;
-	u8 egress_virt_up;
-	u8 ingress_virt_up;
-};
-
-/* Struct to pass QoS info */
-struct iidc_rdma_qos_params {
-	struct iidc_rdma_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
-	u8 up2tc[IIDC_MAX_USER_PRIORITY];
-	u8 vport_relative_bw;
-	u8 vport_priority_type;
-	u8 num_tc;
-	u8 pfc_mode;
-	u8 dscp_map[DSCP_MAX];
-};
-
-struct iidc_rdma_event {
-	DECLARE_BITMAP(type, IIDC_RDMA_EVENT_NBITS);
-	u32 reg;
+/* Structure to be populated by core LAN PCI driver */
+struct iidc_rdma_core_dev_info {
+	struct pci_dev *pdev; /* PCI device of corresponding to main function */
+	struct auxiliary_device *adev;
+	/* Current active RDMA protocol */
+	enum iidc_rdma_protocol rdma_protocol;
+	void *iidc_priv; /* elements unique to each driver */
 };
 
 /* Structure representing auxiliary driver tailored information about the core
  * PCI dev, each auxiliary driver using the IIDC interface will have an
  * instance of this struct dedicated to it.
  */
-
 struct iidc_rdma_core_auxiliary_dev {
 	struct auxiliary_device adev;
-	struct ice_pf *pf;
+	struct iidc_rdma_core_dev_info *cdev_info;
 };
 
 /* structure representing the auxiliary driver. This struct is to be
@@ -88,12 +61,8 @@ struct iidc_rdma_core_auxiliary_dev {
  */
 struct iidc_rdma_core_auxiliary_drv {
 	struct auxiliary_driver adrv;
-	/* This event_handler is meant to be a blocking call.  For instance,
-	 * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not
-	 * return until the auxiliary driver is ready for the MTU change to
-	 * happen.
-	 */
-	void (*event_handler)(struct ice_pf *pf, struct iidc_rdma_event *event);
+	void (*event_handler)(struct iidc_rdma_core_dev_info *cdev,
+			      struct iidc_rdma_event *event);
 };
 
 #endif /* _IIDC_RDMA_H_*/
diff --git a/include/linux/net/intel/iidc_rdma_ice.h b/include/linux/net/intel/iidc_rdma_ice.h
index 78d10003d776..b40eed0e13fe 100644
--- a/include/linux/net/intel/iidc_rdma_ice.h
+++ b/include/linux/net/intel/iidc_rdma_ice.h
@@ -4,16 +4,67 @@
 #ifndef _IIDC_RDMA_ICE_H_
 #define _IIDC_RDMA_ICE_H_
 
-struct ice_pf;
+#include <linux/dcbnl.h>
 
-int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_rdma_request_reset(struct ice_pf *pf,
+#define IIDC_MAX_USER_PRIORITY         8
+#define IIDC_DSCP_PFC_MODE             0x1
+
+/**
+ * struct iidc_rdma_qset_params - Struct to hold per RDMA Qset info
+ * @teid: TEID of the Qset node
+ * @qs_handle: SW index of the Qset, RDMA provides this
+ * @vport_id: VSI index
+ * @tc: Traffic Class branch the QSet should belong to
+ */
+struct iidc_rdma_qset_params {
+	/* Qset TEID returned to the RDMA driver in
+	 * ice_add_rdma_qset and used by RDMA driver
+	 * for calls to ice_del_rdma_qset
+	 */
+	u32 teid;
+	u16 qs_handle;
+	u16 vport_id;
+	u8 tc;
+};
+
+struct iidc_rdma_qos_info {
+	u64 tc_ctx;
+	u8 rel_bw;
+	u8 prio_type;
+	u8 egress_virt_up;
+	u8 ingress_virt_up;
+};
+
+/* Struct to pass QoS info */
+struct iidc_rdma_qos_params {
+	struct iidc_rdma_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
+	u8 up2tc[IIDC_MAX_USER_PRIORITY];
+	u8 vport_relative_bw;
+	u8 vport_priority_type;
+	u8 num_tc;
+	u8 pfc_mode;
+	u8 dscp_map[DSCP_MAX];
+};
+
+struct iidc_rdma_priv_dev_info {
+	u8 pf_id;
+	u16 vport_id;
+	struct net_device *netdev;
+	struct iidc_rdma_qos_params qos_info;
+	u8 __iomem *hw_addr;
+};
+
+int ice_add_rdma_qset(struct iidc_rdma_core_dev_info *cdev,
+		      struct iidc_rdma_qset_params *qset);
+int ice_del_rdma_qset(struct iidc_rdma_core_dev_info *cdev,
+		      struct iidc_rdma_qset_params *qset);
+int ice_rdma_request_reset(struct iidc_rdma_core_dev_info *cdev,
 			   enum iidc_rdma_reset_type reset_type);
-int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
-void ice_get_qos_params(struct ice_pf *pf,
-			struct iidc_rdma_qos_params *qos);
-int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
-void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
+int ice_rdma_update_vsi_filter(struct iidc_rdma_core_dev_info *cdev, u16 vsi_id,
+			       bool enable);
+int ice_alloc_rdma_qvector(struct iidc_rdma_core_dev_info *cdev,
+			   struct msix_entry *entry);
+void ice_free_rdma_qvector(struct iidc_rdma_core_dev_info *cdev,
+			   struct msix_entry *entry);
 
 #endif /* _IIDC_RDMA_ICE_H_*/
-- 
cgit v1.2.3


From e9f3d61db5cb29b3f17f0dc40c3ec2cda2ee93e5 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Thu, 8 May 2025 00:48:22 +0000
Subject: net: add get_netmem/put_netmem support

Currently net_iovs support only pp ref counts, and do not support a
page ref equivalent.

This is fine for the RX path as net_iovs are used exclusively with the
pp and only pp refcounting is needed there. The TX path however does not
use pp ref counts, thus, support for get_page/put_page equivalent is
needed for netmem.

Support get_netmem/put_netmem. Check the type of the netmem before
passing it to page or net_iov specific code to obtain a page ref
equivalent.

For dmabuf net_iovs, we obtain a ref on the underlying binding. This
ensures the entire binding doesn't disappear until all the net_iovs have
been put_netmem'ed. We do not need to track the refcount of individual
dmabuf net_iovs as we don't allocate/free them from a pool similar to
what the buddy allocator does for pages.

This code is written to be extensible by other net_iov implementers.
get_netmem/put_netmem will check the type of the netmem and route it to
the correct helper:

pages -> [get|put]_page()
dmabuf net_iovs -> net_devmem_[get|put]_net_iov()
new net_iovs ->	new helpers

Signed-off-by: Mina Almasry <almasrymina@google.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250508004830.4100853-3-almasrymina@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff_ref.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h
index 0f3c58007488..9e49372ef1a0 100644
--- a/include/linux/skbuff_ref.h
+++ b/include/linux/skbuff_ref.h
@@ -17,7 +17,7 @@
  */
 static inline void __skb_frag_ref(skb_frag_t *frag)
 {
-	get_page(skb_frag_page(frag));
+	get_netmem(skb_frag_netmem(frag));
 }
 
 /**
@@ -40,7 +40,7 @@ static inline void skb_page_unref(netmem_ref netmem, bool recycle)
 	if (recycle && napi_pp_put_page(netmem))
 		return;
 #endif
-	put_page(netmem_to_page(netmem));
+	put_netmem(netmem);
 }
 
 /**
-- 
cgit v1.2.3


From bd61848900bff597764238f3a8ec67c815cd316e Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Thu, 8 May 2025 00:48:24 +0000
Subject: net: devmem: Implement TX path

Augment dmabuf binding to be able to handle TX. Additional to all the RX
binding, we also create tx_vec needed for the TX path.

Provide API for sendmsg to be able to send dmabufs bound to this device:

- Provide a new dmabuf_tx_cmsg which includes the dmabuf to send from.
- MSG_ZEROCOPY with SCM_DEVMEM_DMABUF cmsg indicates send from dma-buf.

Devmem is uncopyable, so piggyback off the existing MSG_ZEROCOPY
implementation, while disabling instances where MSG_ZEROCOPY falls back
to copying.

We additionally pipe the binding down to the new
zerocopy_fill_skb_from_devmem which fills a TX skb with net_iov netmems
instead of the traditional page netmems.

We also special case skb_frag_dma_map to return the dma-address of these
dmabuf net_iovs instead of attempting to map pages.

The TX path may release the dmabuf in a context where we cannot wait.
This happens when the user unbinds a TX dmabuf while there are still
references to its netmems in the TX path. In that case, the netmems will
be put_netmem'd from a context where we can't unmap the dmabuf, Resolve
this by making __net_devmem_dmabuf_binding_free schedule_work'd.

Based on work by Stanislav Fomichev <sdf@fomichev.me>. A lot of the meat
of the implementation came from devmem TCP RFC v1[1], which included the
TX path, but Stan did all the rebasing on top of netmem/net_iov.

Cc: Stanislav Fomichev <sdf@fomichev.me>
Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250508004830.4100853-5-almasrymina@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f3e72be6f634..c7397b17bb08 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1707,13 +1707,16 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset)
 extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops;
 
 struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
-				       struct ubuf_info *uarg);
+				       struct ubuf_info *uarg, bool devmem);
 
 void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref);
 
+struct net_devmem_dmabuf_binding;
+
 int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 			    struct sk_buff *skb, struct iov_iter *from,
-			    size_t length);
+			    size_t length,
+			    struct net_devmem_dmabuf_binding *binding);
 
 int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
 				struct iov_iter *from, size_t length);
@@ -1721,12 +1724,14 @@ int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
 static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb,
 					  struct msghdr *msg, int len)
 {
-	return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len);
+	return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len,
+				       NULL);
 }
 
 int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
 			     struct msghdr *msg, int len,
-			     struct ubuf_info *uarg);
+			     struct ubuf_info *uarg,
+			     struct net_devmem_dmabuf_binding *binding);
 
 /* Internal */
 #define skb_shinfo(SKB)	((struct skb_shared_info *)(skb_end_pointer(SKB)))
@@ -3697,6 +3702,10 @@ static inline dma_addr_t __skb_frag_dma_map(struct device *dev,
 					    size_t offset, size_t size,
 					    enum dma_data_direction dir)
 {
+	if (skb_frag_is_net_iov(frag)) {
+		return netmem_to_net_iov(frag->netmem)->dma_addr + offset +
+		       frag->offset;
+	}
 	return dma_map_page(dev, skb_frag_page(frag),
 			    skb_frag_off(frag) + offset, size, dir);
 }
-- 
cgit v1.2.3


From 383faec0fd64b9bff15eb5f700f023ec35520a96 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Thu, 8 May 2025 00:48:26 +0000
Subject: net: enable driver support for netmem TX

Drivers need to make sure not to pass netmem dma-addrs to the
dma-mapping API in order to support netmem TX.

Add helpers and netmem_dma_*() helpers that enables special handling of
netmem dma-addrs that drivers can use.

Document in netmem.rst what drivers need to do to support netmem TX.

Signed-off-by: Mina Almasry <almasrymina@google.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250508004830.4100853-7-almasrymina@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 773167508c82..32a1e41636a9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1772,6 +1772,7 @@ enum netdev_reg_state {
  *	@lltx:		device supports lockless Tx. Deprecated for real HW
  *			drivers. Mainly used by logical interfaces, such as
  *			bonding and tunnels
+ *	@netmem_tx:	device support netmem_tx.
  *
  *	@name:	This is the first field of the "visible" part of this structure
  *		(i.e. as seen by users in the "Space.c" file).  It is the name
@@ -2087,6 +2088,7 @@ struct net_device {
 	struct_group(priv_flags_fast,
 		unsigned long		priv_flags:32;
 		unsigned long		lltx:1;
+		unsigned long		netmem_tx:1;
 	);
 	const struct net_device_ops *netdev_ops;
 	const struct header_ops *header_ops;
-- 
cgit v1.2.3


From dc75c3ced10c611f524e9e444303a0249ce32e43 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 12 May 2025 22:20:59 +0200
Subject: net: phy: remove stub for mdiobus_register_board_info

The functionality of mdiobus_register_board_info() typically isn't
optional for the caller. Therefore remove the stub.

Note: Currently we have only one caller of mdiobus_register_board_info(),
in a DSA/PHYLINK context. Therefore CONFIG_MDIO_DEVICE is selected anyway.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/410a2222-c4e8-45b0-9091-d49674caeb00@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index d62d292024bc..7c29d346d4b3 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2071,17 +2071,8 @@ struct mdio_board_info {
 	const void	*platform_data;
 };
 
-#if IS_ENABLED(CONFIG_MDIO_DEVICE)
 int mdiobus_register_board_info(const struct mdio_board_info *info,
 				unsigned int n);
-#else
-static inline int mdiobus_register_board_info(const struct mdio_board_info *i,
-					      unsigned int n)
-{
-	return 0;
-}
-#endif
-
 
 /**
  * phy_module_driver() - Helper macro for registering PHY drivers
-- 
cgit v1.2.3


From c16608005ccb99fbde3a4cd96eab28e16f148abf Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 13 May 2025 11:19:22 +0300
Subject: net: Look for bonding slaves in the bond's network namespace

Update the for_each_netdev_in_bond_rcu macro to iterate through network
devices in the bond's network namespace instead of always using
init_net. This change is safe because:

1. **Bond-Slave Namespace Relationship**: A bond device and its slaves
   must reside in the same network namespace. The bond device's
   namespace is established at creation time and cannot change.

2. **Slave Movement Implications**: Any attempt to move a slave device
   to a different namespace automatically removes it from the bond, as
   per kernel networking stack rules.
   This maintains the invariant that slaves must exist in the same
   namespace as their bond.

This change is part of an effort to enable Link Aggregation (LAG) to
work properly inside custom network namespaces. Previously, the macro
would only find slave devices in the initial network namespace,
preventing proper bonding functionality in custom namespaces.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Link: https://patch.msgid.link/20250513081922.525716-1-mbloch@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 32a1e41636a9..9e3a2d8452d6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3268,7 +3268,7 @@ int call_netdevice_notifiers_info(unsigned long val,
 #define for_each_netdev_continue_rcu(net, d)		\
 	list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list)
 #define for_each_netdev_in_bond_rcu(bond, slave)	\
-		for_each_netdev_rcu(&init_net, slave)	\
+		for_each_netdev_rcu(dev_net_rcu(bond), slave)	\
 			if (netdev_master_upper_dev_get_rcu(slave) == (bond))
 #define net_device_entry(lh)	list_entry(lh, struct net_device, dev_list)
 
-- 
cgit v1.2.3


From b9eef3391de028fdd88fd7a2f81a4834fc98c9ac Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 May 2025 11:27:26 +0200
Subject: xdp: Use nested-BH locking for system_page_pool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

system_page_pool is a per-CPU variable and relies on disabled BH for its
locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT
this data structure requires explicit locking.

Make a struct with a page_pool member (original system_page_pool) and a
local_lock_t and use local_lock_nested_bh() for locking. This change
adds only lockdep coverage and does not alter the functional behaviour
for !PREEMPT_RT.

Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jesper Dangaard Brouer <hawk@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20250512092736.229935-6-bigeasy@linutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9e3a2d8452d6..73a97cf1bbce 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3503,7 +3503,12 @@ struct softnet_data {
 };
 
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
-DECLARE_PER_CPU(struct page_pool *, system_page_pool);
+
+struct page_pool_bh {
+	struct page_pool *pool;
+	local_lock_t bh_lock;
+};
+DECLARE_PER_CPU(struct page_pool_bh, system_page_pool);
 
 #ifndef CONFIG_PREEMPT_RT
 static inline int dev_recursion_level(void)
-- 
cgit v1.2.3


From 7fe70c06a182a140be9996b02256d907e114479a Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 May 2025 11:27:31 +0200
Subject: net/sched: act_mirred: Move the recursion counter struct netdev_xmit

mirred_nest_level is a per-CPU variable and relies on disabled BH for its
locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT
this data structure requires explicit locking.

Move mirred_nest_level to struct netdev_xmit as u8, provide wrappers.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://patch.msgid.link/20250512092736.229935-11-bigeasy@linutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice_xmit.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h
index 38325e070296..848735b3a7c0 100644
--- a/include/linux/netdevice_xmit.h
+++ b/include/linux/netdevice_xmit.h
@@ -8,6 +8,9 @@ struct netdev_xmit {
 #ifdef CONFIG_NET_EGRESS
 	u8  skip_txqueue;
 #endif
+#if IS_ENABLED(CONFIG_NET_ACT_MIRRED)
+	u8 sched_mirred_nest;
+#endif
 };
 
 #endif
-- 
cgit v1.2.3


From ea33537d82921e71f852ea2ed985acc562125efe Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 13 May 2025 19:39:12 +0000
Subject: tcp: add receive queue awareness in tcp_rcv_space_adjust()

If the application can not drain fast enough a TCP socket queue,
tcp_rcv_space_adjust() can overestimate tp->rcvq_space.space.

Then sk->sk_rcvbuf can grow and hit tcp_rmem[2] for no good reason.

Fix this by taking into acount the number of available bytes.

Keeping sk->sk_rcvbuf at the right size allows better cache efficiency.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Wei Wang <weiwan@google.com>
Link: https://patch.msgid.link/20250513193919.1089692-5-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index a8af71623ba7..29f59d50dc73 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -340,7 +340,7 @@ struct tcp_sock {
 	} rcv_rtt_est;
 /* Receiver queue space */
 	struct {
-		u32	space;
+		int	space;
 		u32	seq;
 		u64	time;
 	} rcvq_space;
-- 
cgit v1.2.3


From 7b151e4efdde7cc7cfaae66e497d12487a70c6e9 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 14 May 2025 20:54:29 +0200
Subject: net: phy: fixed_phy: remove fixed_phy_register_with_gpiod

Since its introduction 6 yrs ago this functions has never had a user.
So remove it.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/ccbeef28-65ae-4e28-b1db-816c44338dee@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy_fixed.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 1acafd86ab13..3392c09b5d24 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -13,7 +13,6 @@ struct fixed_phy_status {
 };
 
 struct device_node;
-struct gpio_desc;
 struct net_device;
 
 #if IS_ENABLED(CONFIG_FIXED_PHY)
@@ -24,11 +23,6 @@ extern struct phy_device *fixed_phy_register(unsigned int irq,
 					     struct fixed_phy_status *status,
 					     struct device_node *np);
 
-extern struct phy_device *
-fixed_phy_register_with_gpiod(unsigned int irq,
-			      struct fixed_phy_status *status,
-			      struct gpio_desc *gpiod);
-
 extern void fixed_phy_unregister(struct phy_device *phydev);
 extern int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
@@ -46,14 +40,6 @@ static inline struct phy_device *fixed_phy_register(unsigned int irq,
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct phy_device *
-fixed_phy_register_with_gpiod(unsigned int irq,
-			      struct fixed_phy_status *status,
-			      struct gpio_desc *gpiod)
-{
-	return ERR_PTR(-ENODEV);
-}
-
 static inline void fixed_phy_unregister(struct phy_device *phydev)
 {
 }
-- 
cgit v1.2.3


From a462903fa22541f212134fba81084315ad843e6e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 May 2025 13:59:27 +0200
Subject: net: netlink: reduce extack cookie size

Seems like the extack cookie hasn't found any users outside
of wireless, which always uses nl_set_extack_cookie_u64().
Thus, allocating 20 bytes for it is pointless, reduce that
to 8 bytes, and add a BUILD_BUG_ON() to ensure it's enough
(obviously it is, for a u64, but in case it changes again.)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://patch.msgid.link/20250516115927.38209-2-johannes@sipsolutions.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netlink.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index c3ae84a77e16..882e9c1b6c1d 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -63,7 +63,7 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
 }
 
 /* this can be increased when necessary - don't expose to userland */
-#define NETLINK_MAX_COOKIE_LEN	20
+#define NETLINK_MAX_COOKIE_LEN	8
 #define NETLINK_MAX_FMTMSG_LEN	80
 
 /**
@@ -212,6 +212,7 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
 {
 	if (!extack)
 		return;
+	BUILD_BUG_ON(sizeof(extack->cookie) < sizeof(cookie));
 	memcpy(extack->cookie, &cookie, sizeof(cookie));
 	extack->cookie_len = sizeof(cookie);
 }
-- 
cgit v1.2.3


From 31be641d74267d98317ef5a2b90e6200511cabb3 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 15 May 2025 10:11:54 +0200
Subject: net: phy: make mdio consumer / device layer a separate module

After having factored out the provider part from mdio_bus.c, we can
make the mdio consumer / device layer a separate module. This also
allows to remove Kconfig symbol MDIO_DEVICE.
The module init / exit functions from mdio_bus.c no longer have to be
called from phy_device.c. The link order defined in
drivers/net/phy/Makefile ensures that init / exit functions are called
in the right order.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/dba6b156-5748-44ce-b5e2-e8dc2fcee5a7@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/phy.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7c29d346d4b3..92a88b5ce356 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2033,9 +2033,6 @@ int phy_ethtool_set_link_ksettings(struct net_device *ndev,
 				   const struct ethtool_link_ksettings *cmd);
 int phy_ethtool_nway_reset(struct net_device *ndev);
 
-int __init mdio_bus_init(void);
-void mdio_bus_exit(void);
-
 int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data);
 int phy_ethtool_get_sset_count(struct phy_device *phydev);
 int phy_ethtool_get_stats(struct phy_device *phydev,
-- 
cgit v1.2.3


From 3f1716ee0f6c63795e6d225e3f5ec3825cd2bd57 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 17 May 2025 22:34:32 +0200
Subject: net: phy: fixed_phy: remove irq argument from fixed_phy_add

All callers pass PHY_POLL, therefore remove irq argument from
fixed_phy_add().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Acked-by: Greg Ungerer <gerg@linux-m68k.org>
Link: https://patch.msgid.link/b3b9b3bc-c310-4a54-b376-c909c83575de@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy_fixed.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 3392c09b5d24..316bb4deda37 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -17,8 +17,7 @@ struct net_device;
 
 #if IS_ENABLED(CONFIG_FIXED_PHY)
 extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier);
-extern int fixed_phy_add(unsigned int irq, int phy_id,
-			 struct fixed_phy_status *status);
+int fixed_phy_add(int phy_id, struct fixed_phy_status *status);
 extern struct phy_device *fixed_phy_register(unsigned int irq,
 					     struct fixed_phy_status *status,
 					     struct device_node *np);
@@ -28,7 +27,7 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
 					   struct fixed_phy_status *));
 #else
-static inline int fixed_phy_add(unsigned int irq, int phy_id,
+static inline int fixed_phy_add(int phy_id,
 				struct fixed_phy_status *status)
 {
 	return -ENODEV;
-- 
cgit v1.2.3


From d23b4af5df3900fb0b4e1a05cb8119dd1c395519 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 17 May 2025 22:35:56 +0200
Subject: net: phy: fixed_phy: remove irq argument from fixed_phy_register

All callers pass PHY_POLL, therefore remove irq argument from
fixed_phy_register().

Note: I keep the irq argument in fixed_phy_add_gpiod() for now,
for the case that somebody may want to use a GPIO interrupt in
the future, by e.g. adding a call to fwnode_irq_get() to
fixed_phy_get_gpiod().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/31cdb232-a5e9-4997-a285-cb9a7d208124@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy_fixed.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 316bb4deda37..634149a73c2a 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -18,9 +18,8 @@ struct net_device;
 #if IS_ENABLED(CONFIG_FIXED_PHY)
 extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier);
 int fixed_phy_add(int phy_id, struct fixed_phy_status *status);
-extern struct phy_device *fixed_phy_register(unsigned int irq,
-					     struct fixed_phy_status *status,
-					     struct device_node *np);
+struct phy_device *fixed_phy_register(struct fixed_phy_status *status,
+				      struct device_node *np);
 
 extern void fixed_phy_unregister(struct phy_device *phydev);
 extern int fixed_phy_set_link_update(struct phy_device *phydev,
@@ -32,9 +31,9 @@ static inline int fixed_phy_add(int phy_id,
 {
 	return -ENODEV;
 }
-static inline struct phy_device *fixed_phy_register(unsigned int irq,
-						struct fixed_phy_status *status,
-						struct device_node *np)
+static inline struct phy_device *
+fixed_phy_register(struct fixed_phy_status *status,
+		   struct device_node *np)
 {
 	return ERR_PTR(-ENODEV);
 }
-- 
cgit v1.2.3


From 4ba1c5bb4811f560a86697311cb4e9741e047a5d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 17 May 2025 22:37:29 +0200
Subject: net: phy: fixed_phy: constify status argument where possible

Constify the passed struct fixed_phy_status *status where possible.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/d1764b62-8538-408b-a4e3-b63715481a38@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy_fixed.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 634149a73c2a..5399b9e41e35 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -17,8 +17,8 @@ struct net_device;
 
 #if IS_ENABLED(CONFIG_FIXED_PHY)
 extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier);
-int fixed_phy_add(int phy_id, struct fixed_phy_status *status);
-struct phy_device *fixed_phy_register(struct fixed_phy_status *status,
+int fixed_phy_add(int phy_id, const struct fixed_phy_status *status);
+struct phy_device *fixed_phy_register(const struct fixed_phy_status *status,
 				      struct device_node *np);
 
 extern void fixed_phy_unregister(struct phy_device *phydev);
@@ -27,12 +27,12 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev,
 					   struct fixed_phy_status *));
 #else
 static inline int fixed_phy_add(int phy_id,
-				struct fixed_phy_status *status)
+				const struct fixed_phy_status *status)
 {
 	return -ENODEV;
 }
 static inline struct phy_device *
-fixed_phy_register(struct fixed_phy_status *status,
+fixed_phy_register(const struct fixed_phy_status *status,
 		   struct device_node *np)
 {
 	return ERR_PTR(-ENODEV);
-- 
cgit v1.2.3


From b803c4a4f78834b31ebfbbcea350473333760559 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Fri, 2 May 2025 02:12:10 +0900
Subject: can: dev: add struct data_bittiming_params to group FD parameters

This is a preparation patch for the introduction of CAN XL.

CAN FD and CAN XL uses similar bittiming parameters. Add one level of
nesting for all the CAN FD parameters. Typically:

  priv->can.data_bittiming;

becomes:

  priv->can.fd.data_bittiming;

This way, the CAN XL equivalent (to be introduced later) would be:

  priv->can.xl.data_bittiming;

Add the new struct data_bittiming_params which contains all the data
bittiming parameters, including the TDC and the callback functions.

This done, update all the CAN FD drivers to make use of the new
layout.

Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Link: https://patch.msgid.link/20250501171213.2161572-2-mailhol.vincent@wanadoo.fr
[mkl: fix rcar_canfd]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 23492213ea35..492d23bec7be 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -38,6 +38,17 @@ enum can_termination_gpio {
 	CAN_TERMINATION_GPIO_MAX,
 };
 
+struct data_bittiming_params {
+	const struct can_bittiming_const *data_bittiming_const;
+	struct can_bittiming data_bittiming;
+	const struct can_tdc_const *tdc_const;
+	struct can_tdc tdc;
+	const u32 *data_bitrate_const;
+	unsigned int data_bitrate_const_cnt;
+	int (*do_set_data_bittiming)(struct net_device *dev);
+	int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv);
+};
+
 /*
  * CAN common private data
  */
@@ -45,16 +56,11 @@ struct can_priv {
 	struct net_device *dev;
 	struct can_device_stats can_stats;
 
-	const struct can_bittiming_const *bittiming_const,
-		*data_bittiming_const;
-	struct can_bittiming bittiming, data_bittiming;
-	const struct can_tdc_const *tdc_const;
-	struct can_tdc tdc;
-
+	const struct can_bittiming_const *bittiming_const;
+	struct can_bittiming bittiming;
+	struct data_bittiming_params fd;
 	unsigned int bitrate_const_cnt;
 	const u32 *bitrate_const;
-	const u32 *data_bitrate_const;
-	unsigned int data_bitrate_const_cnt;
 	u32 bitrate_max;
 	struct can_clock clock;
 
@@ -77,14 +83,12 @@ struct can_priv {
 	struct delayed_work restart_work;
 
 	int (*do_set_bittiming)(struct net_device *dev);
-	int (*do_set_data_bittiming)(struct net_device *dev);
 	int (*do_set_mode)(struct net_device *dev, enum can_mode mode);
 	int (*do_set_termination)(struct net_device *dev, u16 term);
 	int (*do_get_state)(const struct net_device *dev,
 			    enum can_state *state);
 	int (*do_get_berr_counter)(const struct net_device *dev,
 				   struct can_berr_counter *bec);
-	int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv);
 };
 
 static inline bool can_tdc_is_enabled(const struct can_priv *priv)
@@ -114,11 +118,11 @@ static inline bool can_tdc_is_enabled(const struct can_priv *priv)
  */
 static inline s32 can_get_relative_tdco(const struct can_priv *priv)
 {
-	const struct can_bittiming *dbt = &priv->data_bittiming;
+	const struct can_bittiming *dbt = &priv->fd.data_bittiming;
 	s32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg +
 				  dbt->phase_seg1) * dbt->brp;
 
-	return (s32)priv->tdc.tdco - sample_point_in_tc;
+	return (s32)priv->fd.tdc.tdco - sample_point_in_tc;
 }
 
 /* helper to define static CAN controller features at device creation time */
-- 
cgit v1.2.3


From a5bd029c733b8ae790d5873e2afeb88b58e3a151 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 19 May 2025 10:50:04 -0700
Subject: net: add skb_crc32c()

Add skb_crc32c(), which calculates the CRC32C of a sk_buff.  It will
replace __skb_checksum(), which unnecessarily supports arbitrary
checksums.  Compared to __skb_checksum(), skb_crc32c():

   - Uses the correct type for CRC32C values (u32, not __wsum).

   - Does not require the caller to provide a skb_checksum_ops struct.

   - Is faster because it does not use indirect calls and does not use
     the very slow crc32c_combine().

According to commit 2817a336d4d5 ("net: skb_checksum: allow custom
update/combine for walking skb") which added __skb_checksum(), the
original motivation for the abstraction layer was to avoid code
duplication for CRC32C and other checksums in the future.  However:

   - No additional checksums showed up after CRC32C.  __skb_checksum()
     is only used with the "regular" net checksum and CRC32C.

   - Indirect calls are expensive.  Commit 2544af0344ba ("net: avoid
     indirect calls in L4 checksum calculation") worked around this
     using the INDIRECT_CALL_1 macro. But that only avoided the indirect
     call for the net checksum, and at the cost of an extra branch.

   - The checksums use different types (__wsum and u32), causing casts
     to be needed.

   - It made the checksums of fragments be combined (rather than
     chained) for both checksums, despite this being highly
     counterproductive for CRC32C due to how slow crc32c_combine() is.
     This can clearly be seen in commit 4c2f24549644 ("sctp: linearize
     early if it's not GSO") which tried to work around this performance
     bug.  With a dedicated function for each checksum, we can instead
     just use the proper strategy for each checksum.

As shown by the following tables, the new function skb_crc32c() is
faster than __skb_checksum(), with the improvement varying greatly from
5% to 2500% depending on the case.  The largest improvements come from
fragmented packets, mainly due to eliminating the inefficient
crc32c_combine().  But linear packets are improved too, especially
shorter ones, mainly due to eliminating indirect calls.  These
benchmarks were done on AMD Zen 5.  On that CPU, Linux uses IBRS instead
of retpoline; an even greater improvement might be seen with retpoline:

    Linear sk_buffs

        Length in bytes    __skb_checksum cycles    skb_crc32c cycles
        ===============    =====================    =================
                     64                       43                   18
                    256                       94                   77
                   1420                      204                  161
                  16384                     1735                 1642

    Nonlinear sk_buffs (even split between head and one fragment)

        Length in bytes    __skb_checksum cycles    skb_crc32c cycles
        ===============    =====================    =================
                     64                      579                   22
                    256                      829                   77
                   1420                     1506                  194
                  16384                     4365                 1682

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://patch.msgid.link/20250519175012.36581-3-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c7397b17bb08..7ccc6356acac 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4203,6 +4203,7 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 		      __wsum csum, const struct skb_checksum_ops *ops);
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
+u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc);
 
 static inline void * __must_check
 __skb_header_pointer(const struct sk_buff *skb, int offset, int len,
-- 
cgit v1.2.3


From 70c96c7cb9f035d5b960021f2450afa6240e66b4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 19 May 2025 10:50:08 -0700
Subject: net: fold __skb_checksum() into skb_checksum()

Now that the only remaining caller of __skb_checksum() is
skb_checksum(), fold __skb_checksum() into skb_checksum().  This makes
struct skb_checksum_ops unnecessary, so remove that too and simply do
the "regular" net checksum.  It also makes the wrapper functions
csum_partial_ext() and csum_block_add_ext() unnecessary, so remove those
too and just use the underlying functions.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://patch.msgid.link/20250519175012.36581-7-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7ccc6356acac..018c07230513 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4192,15 +4192,6 @@ static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
 	return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT;
 }
 
-struct skb_checksum_ops {
-	__wsum (*update)(const void *mem, int len, __wsum wsum);
-	__wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len);
-};
-
-extern const struct skb_checksum_ops *crc32c_csum_stub __read_mostly;
-
-__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
-		      __wsum csum, const struct skb_checksum_ops *ops);
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
 u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc);
-- 
cgit v1.2.3


From b82f72292ab4c65250bd734281464a6ab1ff4133 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 19 May 2025 10:50:09 -0700
Subject: lib/crc32: remove unused support for CRC32C combination

crc32c_combine() and crc32c_shift() are no longer used (except by the
KUnit test that tests them), and their current implementation is very
slow.  Remove them.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://patch.msgid.link/20250519175012.36581-8-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/crc32.h | 23 -----------------------
 1 file changed, 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 69c2e8bb3782..7f7d0be8a0ac 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -76,29 +76,6 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
 	return crc32_le_shift(crc1, len2) ^ crc2;
 }
 
-u32 crc32c_shift(u32 crc, size_t len);
-
-/**
- * crc32c_combine - Combine two crc32c check values into one. For two sequences
- *		    of bytes, seq1 and seq2 with lengths len1 and len2, crc32c()
- *		    check values were calculated for each, crc1 and crc2.
- *
- * @crc1: crc32c of the first block
- * @crc2: crc32c of the second block
- * @len2: length of the second block
- *
- * Return: The crc32c() check value of seq1 and seq2 concatenated, requiring
- *	   only crc1, crc2, and len2. Note: If seq_full denotes the concatenated
- *	   memory area of seq1 with seq2, and crc_full the crc32c() value of
- *	   seq_full, then crc_full == crc32c_combine(crc1, crc2, len2) when
- *	   crc_full was seeded with the same initializer as crc1, and crc2 seed
- *	   was 0. See also crc_combine_test().
- */
-static inline u32 crc32c_combine(u32 crc1, u32 crc2, size_t len2)
-{
-	return crc32c_shift(crc1, len2) ^ crc2;
-}
-
 #define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)(data), length)
 
 /*
-- 
cgit v1.2.3


From ea6342d98928e243f2024fb97a9b4d42ee55dfba Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 19 May 2025 10:50:10 -0700
Subject: net: add skb_copy_and_crc32c_datagram_iter()

Since skb_copy_and_hash_datagram_iter() is used only with CRC32C, the
crypto_ahash abstraction provides no value.  Add
skb_copy_and_crc32c_datagram_iter() which just calls crc32c() directly.

This is faster and simpler.  It also doesn't have the weird dependency
issue where skb_copy_and_hash_datagram_iter() depends on
CONFIG_CRYPTO_HASH=y without that being expressed explicitly in the
kconfig (presumably because it was too heavyweight for NET to select).
The new function is conditional on the hidden boolean symbol NET_CRC32C,
which selects CRC32.  So it gets compiled only when something that
actually needs CRC32C packet checksums is enabled, it has no implicit
dependency, and it doesn't depend on the heavyweight crypto layer.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://patch.msgid.link/20250519175012.36581-9-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 018c07230513..510adf63c211 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4137,6 +4137,8 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
 int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
 			   struct iov_iter *to, int len,
 			   struct ahash_request *hash);
+int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset,
+				      struct iov_iter *to, int len, u32 *crcp);
 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
 				 struct iov_iter *from, int len);
 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
-- 
cgit v1.2.3


From c93f75b2d755c35b596084ddd3feb3528284a53f Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 19 May 2025 10:50:12 -0700
Subject: net: remove skb_copy_and_hash_datagram_iter()

Now that skb_copy_and_hash_datagram_iter() is no longer used, remove it.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://patch.msgid.link/20250519175012.36581-11-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 510adf63c211..5520524c93bf 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -274,7 +274,6 @@
 			 SKB_DATA_ALIGN(sizeof(struct sk_buff)) +	\
 			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
-struct ahash_request;
 struct net_device;
 struct scatterlist;
 struct pipe_inode_info;
@@ -4134,9 +4133,6 @@ static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
 }
 int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
 				   struct msghdr *msg);
-int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
-			   struct iov_iter *to, int len,
-			   struct ahash_request *hash);
 int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset,
 				      struct iov_iter *to, int len, u32 *crcp);
 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
-- 
cgit v1.2.3


From 31afd6bc55cc0093c3e5b0a368319e423d4de8ea Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Sat, 17 May 2025 22:13:45 +0200
Subject: net: phy: pass PHY driver to .match_phy_device OP

Pass PHY driver pointer to .match_phy_device OP in addition to phydev.
Having access to the PHY driver struct might be useful to check the
PHY ID of the driver is being matched for in case the PHY ID scanned in
the phydev is not consistent.

A scenario for this is a PHY that change PHY ID after a firmware is
loaded, in such case, the PHY ID stored in PHY device struct is not
valid anymore and PHY will manually scan the ID in the match_phy_device
function.

Having the PHY driver info is also useful for those PHY driver that
implement multiple simple .match_phy_device OP to match specific MMD PHY
ID. With this extra info if the parsing logic is the same, the matching
function can be generalized by using the phy_id in the PHY driver
instead of hardcoding.

Rust wrapper callback is updated to align to the new match_phy_device
arguments.

Suggested-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Reviewed-by: Benno Lossin <lossin@kernel.org> # for Rust
Reviewed-by: FUJITA Tomonori <fujita.tomonori@gmail.com>
Link: https://patch.msgid.link/20250517201353.5137-2-ansuelsmth@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 92a88b5ce356..10e66d45a8e8 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -990,7 +990,8 @@ struct phy_driver {
 	 * driver for the given phydev.	 If NULL, matching is based on
 	 * phy_id and phy_id_mask.
 	 */
-	int (*match_phy_device)(struct phy_device *phydev);
+	int (*match_phy_device)(struct phy_device *phydev,
+				const struct phy_driver *phydrv);
 
 	/**
 	 * @set_wol: Some devices (e.g. qnap TS-119P II) require PHY
-- 
cgit v1.2.3


From d6c45707ac84c2d9f274ece1cea4dddb97996bde Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Sat, 17 May 2025 22:13:48 +0200
Subject: net: phy: introduce genphy_match_phy_device()

Introduce new API, genphy_match_phy_device(), to provide a way to check
to match a PHY driver for a PHY device based on the info stored in the
PHY device struct.

The function generalize the logic used in phy_bus_match() to check the
PHY ID whether if C45 or C22 ID should be used for matching.

This is useful for custom .match_phy_device function that wants to use
the generic logic under some condition. (example a PHY is already setup
and provide the correct PHY ID)

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Link: https://patch.msgid.link/20250517201353.5137-5-ansuelsmth@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 10e66d45a8e8..32b9da274115 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1868,6 +1868,9 @@ char *phy_attached_info_irq(struct phy_device *phydev)
 	__malloc;
 void phy_attached_info(struct phy_device *phydev);
 
+int genphy_match_phy_device(struct phy_device *phydev,
+			    const struct phy_driver *phydrv);
+
 /* Clause 22 PHY */
 int genphy_read_abilities(struct phy_device *phydev);
 int genphy_setup_forced(struct phy_device *phydev);
-- 
cgit v1.2.3


From 4ff4d86f6cceb6bea583bdb230e5439655778cce Mon Sep 17 00:00:00 2001
From: Kory Maincent <kory.maincent@bootlin.com>
Date: Mon, 19 May 2025 10:45:05 +0200
Subject: net: Add support for providing the PTP hardware source in tsinfo

Multi-PTP source support within a network topology has been merged,
but the hardware timestamp source is not yet exposed to users.
Currently, users only see the PTP index, which does not indicate
whether the timestamp comes from a PHY or a MAC.

Add support for reporting the hwtstamp source using a
hwtstamp-source field, alongside hwtstamp-phyindex, to describe
the origin of the hardware timestamp.

Remove HWTSTAMP_SOURCE_UNSPEC enum value as it is not used at all.

Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20250519-feature_ptp_source-v4-1-5d10e19a0265@bootlin.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/ethtool.h    | 5 +++++
 include/linux/net_tstamp.h | 7 +------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 117718c24814..5e0dd333ad1f 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -19,6 +19,7 @@
 #include <linux/netlink.h>
 #include <linux/timer_types.h>
 #include <uapi/linux/ethtool.h>
+#include <uapi/linux/ethtool_netlink_generated.h>
 #include <uapi/linux/net_tstamp.h>
 
 #define ETHTOOL_MM_MAX_VERIFY_TIME_MS		128
@@ -830,6 +831,8 @@ struct ethtool_rxfh_param {
  * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags
  * @phc_index: device index of the associated PHC, or -1 if there is none
  * @phc_qualifier: qualifier of the associated PHC
+ * @phc_source: source device of the associated PHC
+ * @phc_phyindex: index of PHY device source of the associated PHC
  * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values
  * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values
  */
@@ -838,6 +841,8 @@ struct kernel_ethtool_ts_info {
 	u32 so_timestamping;
 	int phc_index;
 	enum hwtstamp_provider_qualifier phc_qualifier;
+	enum hwtstamp_source phc_source;
+	int phc_phyindex;
 	enum hwtstamp_tx_types tx_types;
 	enum hwtstamp_rx_filters rx_filters;
 };
diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h
index ff0758e88ea1..f4936d9c2b3c 100644
--- a/include/linux/net_tstamp.h
+++ b/include/linux/net_tstamp.h
@@ -4,6 +4,7 @@
 #define _LINUX_NET_TIMESTAMPING_H_
 
 #include <uapi/linux/net_tstamp.h>
+#include <uapi/linux/ethtool_netlink_generated.h>
 
 #define SOF_TIMESTAMPING_SOFTWARE_MASK	(SOF_TIMESTAMPING_RX_SOFTWARE | \
 					 SOF_TIMESTAMPING_TX_SOFTWARE | \
@@ -13,12 +14,6 @@
 					 SOF_TIMESTAMPING_TX_HARDWARE | \
 					 SOF_TIMESTAMPING_RAW_HARDWARE)
 
-enum hwtstamp_source {
-	HWTSTAMP_SOURCE_UNSPEC,
-	HWTSTAMP_SOURCE_NETDEV,
-	HWTSTAMP_SOURCE_PHYLIB,
-};
-
 /**
  * struct hwtstamp_provider_desc - hwtstamp provider description
  *
-- 
cgit v1.2.3


From 0e81cfd971dc4833c699dcd8924e54a5021bc4e8 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Mon, 19 May 2025 13:57:57 -0700
Subject: af_unix: Move SOCK_PASS{CRED,PIDFD,SEC} to struct sock.

As explained in the next patch, SO_PASSRIGHTS would have a problem
if we assigned a corresponding bit to socket->flags, so it must be
managed in struct sock.

Mixing socket->flags and sk->sk_flags for similar options will look
confusing, and sk->sk_flags does not have enough space on 32bit system.

Also, as mentioned in commit 16e572626961 ("af_unix: dont send
SCM_CREDENTIALS by default"), SOCK_PASSCRED and SOCK_PASSPID handling
is known to be slow, and managing the flags in struct socket cannot
avoid that for embryo sockets.

Let's move SOCK_PASS{CRED,PIDFD,SEC} to struct sock.

While at it, other SOCK_XXX flags in net.h are grouped as enum.

Note that assign_bit() was atomic, so the writer side is moved down
after lock_sock() in setsockopt(), but the bit is only read once
in sendmsg() and recvmsg(), so lock_sock() is not needed there.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 0ff950eecc6b..f8418d6e33e0 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -36,14 +36,13 @@ struct net;
  * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected.
  * Eventually all flags will be in sk->sk_wq->flags.
  */
-#define SOCKWQ_ASYNC_NOSPACE	0
-#define SOCKWQ_ASYNC_WAITDATA	1
-#define SOCK_NOSPACE		2
-#define SOCK_PASSCRED		3
-#define SOCK_PASSSEC		4
-#define SOCK_SUPPORT_ZC		5
-#define SOCK_CUSTOM_SOCKOPT	6
-#define SOCK_PASSPIDFD		7
+enum socket_flags {
+	SOCKWQ_ASYNC_NOSPACE,
+	SOCKWQ_ASYNC_WAITDATA,
+	SOCK_NOSPACE,
+	SOCK_SUPPORT_ZC,
+	SOCK_CUSTOM_SOCKOPT,
+};
 
 #ifndef ARCH_HAS_SOCKET_TYPES
 /**
-- 
cgit v1.2.3


From a1f1acb9c5db9b385c9b3eb1f27f897c06df49ae Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 May 2025 12:28:44 +0200
Subject: netfilter: nf_dup{4, 6}: Move duplication check to task_struct

nf_skb_duplicated is a per-CPU variable and relies on disabled BH for its
locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT
this data structure requires explicit locking.

Due to the recursion involved, the simplest change is to make it a
per-task variable.

Move the per-CPU variable nf_skb_duplicated to task_struct and name it
in_nf_duplicate. Add it to the existing bitfield so it doesn't use
additional memory.

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Valentin Schneider <vschneid@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 11 -----------
 include/linux/sched.h     |  1 +
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 2b8aac2c70ad..892d12823ed4 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -497,17 +497,6 @@ struct nf_defrag_hook {
 extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook;
 extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook;
 
-/*
- * nf_skb_duplicated - TEE target has sent a packet
- *
- * When a xtables target sends a packet, the OUTPUT and POSTROUTING
- * hooks are traversed again, i.e. nft and xtables are invoked recursively.
- *
- * This is used by xtables TEE target to prevent the duplicated skb from
- * being duplicated again.
- */
-DECLARE_PER_CPU(bool, nf_skb_duplicated);
-
 /*
  * Contains bitmask of ctnetlink event subscribers, if any.
  * Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac1982893..52d9c52dc8f2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1044,6 +1044,7 @@ struct task_struct {
 	/* delay due to memory thrashing */
 	unsigned                        in_thrashing:1;
 #endif
+	unsigned			in_nf_duplicate:1;
 #ifdef CONFIG_PREEMPT_RT
 	struct netdev_xmit		net_xmit;
 #endif
-- 
cgit v1.2.3


From f37ad91270397a6d053e8623bdb3cf79859691d2 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 May 2025 12:28:46 +0200
Subject: netfilter: nf_dup_netdev: Move the recursion counter struct
 netdev_xmit

nf_dup_skb_recursion is a per-CPU variable and relies on disabled BH for its
locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT
this data structure requires explicit locking.

Move nf_dup_skb_recursion to struct netdev_xmit, provide wrappers.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netdevice_xmit.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h
index 848735b3a7c0..813a19122ebb 100644
--- a/include/linux/netdevice_xmit.h
+++ b/include/linux/netdevice_xmit.h
@@ -11,6 +11,9 @@ struct netdev_xmit {
 #if IS_ENABLED(CONFIG_NET_ACT_MIRRED)
 	u8 sched_mirred_nest;
 #endif
+#if IS_ENABLED(CONFIG_NF_DUP_NETDEV)
+	u8 nf_dup_skb_recursion;
+#endif
 };
 
 #endif
-- 
cgit v1.2.3


From 90869f43d06dfc836def2f53850a878f829e443e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 22 May 2025 15:49:33 +0200
Subject: netfilter: conntrack: make nf_conntrack_id callable without a module
 dependency

While nf_conntrack_id() doesn't need any functionaliy from conntrack, it
does reside in nf_conntrack_core.c -- callers add a module
dependency on conntrack.

Followup patch will need to compute the conntrack id from nf_tables_trace.c
to include it in nf_trace messages emitted to userspace via netlink.

I don't want to introduce a module dependency between nf_tables and
conntrack for this.

Since trace is slowpath, the added indirection is ok.

One alternative is to move nf_conntrack_id to the netfilter/core.c,
but I don't see a compelling reason so far.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 892d12823ed4..20947f2c685b 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -470,6 +470,7 @@ struct nf_ct_hook {
 	void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
 	void (*set_closing)(struct nf_conntrack *nfct);
 	int (*confirm)(struct sk_buff *skb);
+	u32 (*get_id)(const struct nf_conntrack *nfct);
 };
 extern const struct nf_ct_hook __rcu *nf_ct_hook;
 
-- 
cgit v1.2.3


From 73319a8ee18b9cf0b2dac87f8521595e0381ba0c Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 21 May 2025 22:44:26 +0200
Subject: netfilter: nf_tables: Have a list of nf_hook_ops in nft_hook

Supporting a 1:n relationship between nft_hook and nf_hook_ops is
convenient since a chain's or flowtable's nft_hooks may remain in place
despite matching interfaces disappearing. This stabilizes ruleset dumps
in that regard and opens the possibility to claim newly added interfaces
which match the spec. Also it prepares for wildcard interface specs
since these will potentially match multiple interfaces.

All spots dealing with hook registration are updated to handle a list of
multiple nf_hook_ops, but nft_netdev_hook_alloc() only adds a single
item for now to retain the old behaviour. The only expected functional
change here is how vanishing interfaces are handled: Instead of dropping
the respective nft_hook, only the matching nf_hook_ops are dropped.

To safely remove individual ops from the list in netdev handlers, an
rcu_head is added to struct nf_hook_ops so kfree_rcu() may be used.
There is at least nft_flowtable_find_dev() which may be iterating
through the list at the same time.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 20947f2c685b..5f896fcc074d 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -95,6 +95,9 @@ enum nf_hook_ops_type {
 };
 
 struct nf_hook_ops {
+	struct list_head	list;
+	struct rcu_head		rcu;
+
 	/* User fills in from here down. */
 	nf_hookfn		*hook;
 	struct net_device	*dev;
-- 
cgit v1.2.3


From 384492c48e6a88c9a7f0376d8e8ac7f557988e92 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <stfomichev@gmail.com>
Date: Tue, 20 May 2025 13:30:42 -0700
Subject: net: devmem: support single IOV with sendmsg

sendmsg() with a single iov becomes ITER_UBUF, sendmsg() with multiple
iovs becomes ITER_IOVEC. iter_iov_len does not return correct
value for UBUF, so teach to treat UBUF differently.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Pavel Begunkov <asml.silence@gmail.com>
Cc: Mina Almasry <almasrymina@google.com>
Fixes: bd61848900bf ("net: devmem: Implement TX path")
Signed-off-by: Stanislav Fomichev <stfomichev@gmail.com>
Acked-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/uio.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 49ece9e1888f..393d0622cc28 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -99,7 +99,13 @@ static inline const struct iovec *iter_iov(const struct iov_iter *iter)
 }
 
 #define iter_iov_addr(iter)	(iter_iov(iter)->iov_base + (iter)->iov_offset)
-#define iter_iov_len(iter)	(iter_iov(iter)->iov_len - (iter)->iov_offset)
+
+static inline size_t iter_iov_len(const struct iov_iter *i)
+{
+	if (i->iter_type == ITER_UBUF)
+		return i->count;
+	return iter_iov(i)->iov_len - i->iov_offset;
+}
 
 static inline enum iter_type iov_iter_type(const struct iov_iter *i)
 {
-- 
cgit v1.2.3


From 45ca7e9f0730ae36fc610e675b990e9cc9ca0714 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Wed, 21 May 2025 14:17:05 +0200
Subject: vsock/virtio: fix `rx_bytes` accounting for stream sockets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In `struct virtio_vsock_sock`, we maintain two counters:
- `rx_bytes`: used internally to track how many bytes have been read.
  This supports mechanisms like .stream_has_data() and sock_rcvlowat().
- `fwd_cnt`: used for the credit mechanism to inform available receive
  buffer space to the remote peer.

These counters are updated via virtio_transport_inc_rx_pkt() and
virtio_transport_dec_rx_pkt().

Since the beginning with commit 06a8fc78367d ("VSOCK: Introduce
virtio_vsock_common.ko"), we call virtio_transport_dec_rx_pkt() in
virtio_transport_stream_do_dequeue() only when we consume the entire
packet, so partial reads, do not update `rx_bytes` and `fwd_cnt`.

This is fine for `fwd_cnt`, because we still have space used for the
entire packet, and we don't want to update the credit for the other
peer until we free the space of the entire packet. However, this
causes `rx_bytes` to be stale on partial reads.

Previously, this didn’t cause issues because `rx_bytes` was used only by
.stream_has_data(), and any unread portion of a packet implied data was
still available. However, since commit 93b808876682
("virtio/vsock: fix logic which reduces credit update messages"), we now
rely on `rx_bytes` to determine if a credit update should be sent when
the data in the RX queue drops below SO_RCVLOWAT value.

This patch fixes the accounting by updating `rx_bytes` with the number
of bytes actually read, even on partial reads, while leaving `fwd_cnt`
untouched until the packet is fully consumed. Also introduce a new
`buf_used` counter to check that the remote peer is honoring the given
credit; this was previously done via `rx_bytes`.

Fixes: 93b808876682 ("virtio/vsock: fix logic which reduces credit update messages")
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://patch.msgid.link/20250521121705.196379-1-sgarzare@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/virtio_vsock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 0387d64e2c66..36fb3edfa403 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -140,6 +140,7 @@ struct virtio_vsock_sock {
 	u32 last_fwd_cnt;
 	u32 rx_bytes;
 	u32 buf_alloc;
+	u32 buf_used;
 	struct sk_buff_head rx_queue;
 	u32 msg_count;
 };
-- 
cgit v1.2.3


From ed449ddbd867f2cc02d6890c231431f264a876eb Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 21 May 2025 13:46:09 -0700
Subject: net: core: Convert inet_addr_is_any() to sockaddr_storage

All the callers of inet_addr_is_any() have a sockaddr_storage-backed
sockaddr. Avoid casts and switch prototype to the actual object being
used.

Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> # SCSI
Signed-off-by: Kees Cook <kees@kernel.org>
Link: https://patch.msgid.link/20250521204619.2301870-1-kees@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/inet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/inet.h b/include/linux/inet.h
index bd8276e96e60..9158772f3559 100644
--- a/include/linux/inet.h
+++ b/include/linux/inet.h
@@ -55,6 +55,6 @@ extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char
 
 extern int inet_pton_with_scope(struct net *net, unsigned short af,
 		const char *src, const char *port, struct sockaddr_storage *addr);
-extern bool inet_addr_is_any(struct sockaddr *addr);
+bool inet_addr_is_any(struct sockaddr_storage *addr);
 
 #endif	/* _LINUX_INET_H */
-- 
cgit v1.2.3


From 161972650d6795ea00f8b72557cf3c3e593ed250 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 21 May 2025 13:46:10 -0700
Subject: net: core: Switch netif_set_mac_address() to struct sockaddr_storage

In order to avoid passing around struct sockaddr that has a size the
compiler cannot reason about (nor track at runtime), convert
netif_set_mac_address() to take struct sockaddr_storage. This is just a
cast conversion, so there is are no binary changes. Following patches
will make actual allocation changes.

Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kees Cook <kees@kernel.org>
Link: https://patch.msgid.link/20250521204619.2301870-2-kees@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ea9d335de130..47200a394a02 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4212,7 +4212,7 @@ int netif_set_mtu(struct net_device *dev, int new_mtu);
 int dev_set_mtu(struct net_device *, int);
 int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
 			      struct netlink_ext_ack *extack);
-int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa,
+int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
 			  struct netlink_ext_ack *extack);
 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
 			struct netlink_ext_ack *extack);
-- 
cgit v1.2.3


From 9ca6804ab7c34f65fcf2e29333a39e7807c30b60 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 21 May 2025 13:46:14 -0700
Subject: net: core: Convert dev_set_mac_address() to struct sockaddr_storage

All users of dev_set_mac_address() are now using a struct sockaddr_storage.
Convert the internal data type to struct sockaddr_storage, drop the casts,
and update pointer types.

Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kees Cook <kees@kernel.org>
Link: https://patch.msgid.link/20250521204619.2301870-6-kees@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 47200a394a02..b4242b997373 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4214,7 +4214,7 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
 			      struct netlink_ext_ack *extack);
 int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
 			  struct netlink_ext_ack *extack);
-int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
+int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
 			struct netlink_ext_ack *extack);
 int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
 			     struct netlink_ext_ack *extack);
-- 
cgit v1.2.3


From ae9fcd5a0f8ab7e12619e1c66312a03b842935c3 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 21 May 2025 13:46:16 -0700
Subject: net: core: Convert dev_set_mac_address_user() to use struct
 sockaddr_storage

Convert callers of dev_set_mac_address_user() to use struct
sockaddr_storage. Add sanity checks on dev->addr_len usage.

Signed-off-by: Kees Cook <kees@kernel.org>
Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://patch.msgid.link/20250521204619.2301870-8-kees@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b4242b997373..adb14db25798 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4216,7 +4216,7 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
 			  struct netlink_ext_ack *extack);
 int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
 			struct netlink_ext_ack *extack);
-int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
+int dev_set_mac_address_user(struct net_device *dev, struct sockaddr_storage *ss,
 			     struct netlink_ext_ack *extack);
 int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
 int dev_get_port_parent_id(struct net_device *dev,
-- 
cgit v1.2.3


From e9cb929670a1e98b592b30f03f06e9e20110f318 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 22 May 2025 13:21:47 +0200
Subject: net: phy: fix up const issues in to_mdio_device() and to_phy_device()

Both to_mdio_device() and to_phy_device() "throw away" the const pointer
attribute passed to them and return a non-const pointer, which generally
is not a good thing overall.  Fix this up by using container_of_const()
which was designed for this very problem.

Cc: Alexander Lobakin <alobakin@pm.me>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Heiner Kallweit <hkallweit1@gmail.com>
Cc: Russell King <linux@armlinux.org.uk>
Fixes: 7eab14de73a8 ("mdio, phy: fix -Wshadow warnings triggered by nested container_of()")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2025052246-conduit-glory-8fc9@gregkh
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mdio.h | 5 +----
 include/linux/phy.h  | 5 +----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 3c3deac57894..e43ff9f980a4 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -45,10 +45,7 @@ struct mdio_device {
 	unsigned int reset_deassert_delay;
 };
 
-static inline struct mdio_device *to_mdio_device(const struct device *dev)
-{
-	return container_of(dev, struct mdio_device, dev);
-}
+#define to_mdio_device(__dev)	container_of_const(__dev, struct mdio_device, dev)
 
 /* struct mdio_driver_common: Common to all MDIO drivers */
 struct mdio_driver_common {
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 32b9da274115..e194dad1623d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -744,10 +744,7 @@ struct phy_device {
 #define PHY_F_NO_IRQ		0x80000000
 #define PHY_F_RXC_ALWAYS_ON	0x40000000
 
-static inline struct phy_device *to_phy_device(const struct device *dev)
-{
-	return container_of(to_mdio_device(dev), struct phy_device, mdio);
-}
+#define to_phy_device(__dev)	container_of_const(to_mdio_device(__dev), struct phy_device, mdio)
 
 /**
  * struct phy_tdr_config - Configuration of a TDR raw test
-- 
cgit v1.2.3