summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/atmdev.h6
-rw-r--r--include/linux/avf/virtchnl.h23
-rw-r--r--include/linux/can/bittiming.h2
-rw-r--r--include/linux/can/dev.h4
-rw-r--r--include/linux/dpll.h8
-rw-r--r--include/linux/ethtool.h25
-rw-r--r--include/linux/ethtool_netlink.h7
-rw-r--r--include/linux/execmem.h8
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/ieee80211.h18
-rw-r--r--include/linux/if_vlan.h23
-rw-r--r--include/linux/ipv6.h1
-rw-r--r--include/linux/libata.h7
-rw-r--r--include/linux/mdio.h1
-rw-r--r--include/linux/module.h5
-rw-r--r--include/linux/mroute6.h7
-rw-r--r--include/linux/msi.h2
-rw-r--r--include/linux/net/intel/libie/pctype.h41
-rw-r--r--include/linux/netdevice.h12
-rw-r--r--include/linux/netpoll.h10
-rw-r--r--include/linux/phy.h29
-rw-r--r--include/linux/pse-pd/pse.h106
-rw-r--r--include/linux/skbuff.h12
-rw-r--r--include/linux/soc/marvell/silicons.h25
-rw-r--r--include/linux/tcp.h3
-rw-r--r--include/linux/usb/usbnet.h2
-rw-r--r--include/net/devlink.h4
-rw-r--r--include/net/dropreason-core.h18
-rw-r--r--include/net/dsa.h2
-rw-r--r--include/net/inet_hashtables.h6
-rw-r--r--include/net/ip.h2
-rw-r--r--include/net/ip6_tunnel.h3
-rw-r--r--include/net/ip_tunnels.h2
-rw-r--r--include/net/libeth/rx.h28
-rw-r--r--include/net/libeth/tx.h36
-rw-r--r--include/net/libeth/types.h106
-rw-r--r--include/net/libeth/xdp.h1879
-rw-r--r--include/net/libeth/xsk.h685
-rw-r--r--include/net/mana/gdma.h17
-rw-r--r--include/net/mana/mana.h173
-rw-r--r--include/net/neighbour.h1
-rw-r--r--include/net/netdev_queues.h9
-rw-r--r--include/net/netmem.h23
-rw-r--r--include/net/page_pool/helpers.h7
-rw-r--r--include/net/pfcp.h2
-rw-r--r--include/net/sock.h5
-rw-r--r--include/net/tcp.h8
-rw-r--r--include/net/udp_tunnel.h104
-rw-r--r--include/net/vxlan.h5
-rw-r--r--include/trace/events/tcp.h2
-rw-r--r--include/uapi/linux/dpll.h12
-rw-r--r--include/uapi/linux/ethtool_netlink.h4
-rw-r--r--include/uapi/linux/ethtool_netlink_generated.h44
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/in6.h4
55 files changed, 3426 insertions, 157 deletions
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 9b02961d65ee..45f2f278b50a 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -249,6 +249,12 @@ static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb)
ATM_SKB(skb)->atm_options = vcc->atm_options;
}
+static inline void atm_return_tx(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+ WARN_ON_ONCE(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize,
+ &sk_atm(vcc)->sk_wmem_alloc));
+}
+
static inline void atm_force_charge(struct atm_vcc *vcc,int truesize)
{
atomic_add(truesize, &sk_atm(vcc)->sk_rmem_alloc);
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index cf0afa60e4a7..5be1881abbb6 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -132,8 +132,8 @@ enum virtchnl_ops {
VIRTCHNL_OP_RELEASE_RDMA_IRQ_MAP = VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP,
VIRTCHNL_OP_CONFIG_RSS_KEY = 23,
VIRTCHNL_OP_CONFIG_RSS_LUT = 24,
- VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25,
- VIRTCHNL_OP_SET_RSS_HENA = 26,
+ VIRTCHNL_OP_GET_RSS_HASHCFG_CAPS = 25,
+ VIRTCHNL_OP_SET_RSS_HASHCFG = 26,
VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27,
VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28,
VIRTCHNL_OP_REQUEST_QUEUES = 29,
@@ -974,18 +974,19 @@ struct virtchnl_rss_lut {
VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_rss_lut);
#define virtchnl_rss_lut_LEGACY_SIZEOF 6
-/* VIRTCHNL_OP_GET_RSS_HENA_CAPS
- * VIRTCHNL_OP_SET_RSS_HENA
- * VF sends these messages to get and set the hash filter enable bits for RSS.
+/* VIRTCHNL_OP_GET_RSS_HASHCFG_CAPS
+ * VIRTCHNL_OP_SET_RSS_HASHCFG
+ * VF sends these messages to get and set the hash filter configuration for RSS.
* By default, the PF sets these to all possible traffic types that the
* hardware supports. The VF can query this value if it wants to change the
* traffic types that are hashed by the hardware.
*/
-struct virtchnl_rss_hena {
- u64 hena;
+struct virtchnl_rss_hashcfg {
+ /* Bits defined by enum libie_filter_pctype */
+ u64 hashcfg;
};
-VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena);
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hashcfg);
/* Type of RSS algorithm */
enum virtchnl_rss_algorithm {
@@ -1779,10 +1780,10 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
case VIRTCHNL_OP_CONFIG_RSS_HFUNC:
valid_len = sizeof(struct virtchnl_rss_hfunc);
break;
- case VIRTCHNL_OP_GET_RSS_HENA_CAPS:
+ case VIRTCHNL_OP_GET_RSS_HASHCFG_CAPS:
break;
- case VIRTCHNL_OP_SET_RSS_HENA:
- valid_len = sizeof(struct virtchnl_rss_hena);
+ case VIRTCHNL_OP_SET_RSS_HASHCFG:
+ valid_len = sizeof(struct virtchnl_rss_hashcfg);
break;
case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index 9b8a9c39614b..5dfdbb63b1d5 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -14,7 +14,7 @@
#define CAN_BITRATE_UNSET 0
#define CAN_BITRATE_UNKNOWN (-1U)
-#define CAN_CTRLMODE_TDC_MASK \
+#define CAN_CTRLMODE_FD_TDC_MASK \
(CAN_CTRLMODE_TDC_AUTO | CAN_CTRLMODE_TDC_MANUAL)
/*
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 492d23bec7be..9a92cbe5b2cb 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -91,9 +91,9 @@ struct can_priv {
struct can_berr_counter *bec);
};
-static inline bool can_tdc_is_enabled(const struct can_priv *priv)
+static inline bool can_fd_tdc_is_enabled(const struct can_priv *priv)
{
- return !!(priv->ctrlmode & CAN_CTRLMODE_TDC_MASK);
+ return !!(priv->ctrlmode & CAN_CTRLMODE_FD_TDC_MASK);
}
/*
diff --git a/include/linux/dpll.h b/include/linux/dpll.h
index 5e4f9ab1cf75..6ad6c2968a28 100644
--- a/include/linux/dpll.h
+++ b/include/linux/dpll.h
@@ -30,6 +30,14 @@ struct dpll_device_ops {
void *dpll_priv,
unsigned long *qls,
struct netlink_ext_ack *extack);
+ int (*phase_offset_monitor_set)(const struct dpll_device *dpll,
+ void *dpll_priv,
+ enum dpll_feature_state state,
+ struct netlink_ext_ack *extack);
+ int (*phase_offset_monitor_get)(const struct dpll_device *dpll,
+ void *dpll_priv,
+ enum dpll_feature_state *state,
+ struct netlink_ext_ack *extack);
};
struct dpll_pin_ops {
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 5e0dd333ad1f..59877fd2a1d3 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -536,7 +536,7 @@ struct ethtool_rmon_hist_range {
u16 high;
};
-#define ETHTOOL_RMON_HIST_MAX 10
+#define ETHTOOL_RMON_HIST_MAX 11
/**
* struct ethtool_rmon_stats - selected RMON (RFC 2819) statistics
@@ -826,6 +826,19 @@ struct ethtool_rxfh_param {
};
/**
+ * struct ethtool_rxfh_fields - Rx Flow Hashing (RXFH) header field config
+ * @data: which header fields are used for hashing, bitmask of RXH_* defines
+ * @flow_type: L2-L4 network traffic flow type
+ * @rss_context: RSS context, will only be used if rxfh_per_ctx_fields is
+ * set in struct ethtool_ops
+ */
+struct ethtool_rxfh_fields {
+ u32 data;
+ u32 flow_type;
+ u32 rss_context;
+};
+
+/**
* struct kernel_ethtool_ts_info - kernel copy of struct ethtool_ts_info
* @cmd: command number = %ETHTOOL_GET_TS_INFO
* @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags
@@ -855,6 +868,8 @@ struct kernel_ethtool_ts_info {
* @cap_rss_ctx_supported: indicates if the driver supports RSS
* contexts via legacy API, drivers implementing @create_rxfh_context
* do not have to set this bit.
+ * @rxfh_per_ctx_fields: device supports selecting different header fields
+ * for Rx hash calculation and RSS for each additional context.
* @rxfh_per_ctx_key: device supports setting different RSS key for each
* additional context. Netlink API should report hfunc, key, and input_xfrm
* for every context, not just context 0.
@@ -968,6 +983,8 @@ struct kernel_ethtool_ts_info {
* will remain unchanged.
* Returns a negative error code or zero. An error code must be returned
* if at least one unsupported change was requested.
+ * @get_rxfh_fields: Get header fields used for flow hashing.
+ * @set_rxfh_fields: Set header fields used for flow hashing.
* @create_rxfh_context: Create a new RSS context with the specified RX flow
* hash indirection table, hash key, and hash function.
* The &struct ethtool_rxfh_context for this context is passed in @ctx;
@@ -1084,6 +1101,7 @@ struct ethtool_ops {
u32 supported_input_xfrm:8;
u32 cap_link_lanes_supported:1;
u32 cap_rss_ctx_supported:1;
+ u32 rxfh_per_ctx_fields:1;
u32 rxfh_per_ctx_key:1;
u32 cap_rss_rxnfc_adds:1;
u32 rxfh_indir_space;
@@ -1153,6 +1171,11 @@ struct ethtool_ops {
int (*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *);
int (*set_rxfh)(struct net_device *, struct ethtool_rxfh_param *,
struct netlink_ext_ack *extack);
+ int (*get_rxfh_fields)(struct net_device *,
+ struct ethtool_rxfh_fields *);
+ int (*set_rxfh_fields)(struct net_device *,
+ const struct ethtool_rxfh_fields *,
+ struct netlink_ext_ack *extack);
int (*create_rxfh_context)(struct net_device *,
struct ethtool_rxfh_context *ctx,
const struct ethtool_rxfh_param *rxfh,
diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h
index aba91335273a..1dcc4059b5ab 100644
--- a/include/linux/ethtool_netlink.h
+++ b/include/linux/ethtool_netlink.h
@@ -43,6 +43,8 @@ void ethtool_aggregate_rmon_stats(struct net_device *dev,
struct ethtool_rmon_stats *rmon_stats);
bool ethtool_dev_mm_supported(struct net_device *dev);
+void ethnl_pse_send_ntf(struct net_device *netdev, unsigned long notif);
+
#else
static inline int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd)
{
@@ -120,6 +122,11 @@ static inline bool ethtool_dev_mm_supported(struct net_device *dev)
return false;
}
+static inline void ethnl_pse_send_ntf(struct phy_device *phydev,
+ unsigned long notif)
+{
+}
+
#endif /* IS_ENABLED(CONFIG_ETHTOOL_NETLINK) */
static inline int ethnl_cable_test_result(struct phy_device *phydev, u8 pair,
diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index ca42d5e46ccc..3be35680a54f 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -54,7 +54,7 @@ enum execmem_range_flags {
EXECMEM_ROX_CACHE = (1 << 1),
};
-#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM)
+#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX
/**
* execmem_fill_trapping_insns - set memory to contain instructions that
* will trap
@@ -94,15 +94,9 @@ int execmem_make_temp_rw(void *ptr, size_t size);
* Return: 0 on success or negative error code on failure.
*/
int execmem_restore_rox(void *ptr, size_t size);
-
-/*
- * Called from mark_readonly(), where the system transitions to ROX.
- */
-void execmem_cache_make_ro(void);
#else
static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; }
static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; }
-static inline void execmem_cache_make_ro(void) { }
#endif
/**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4ec77da65f14..b085f161ed22 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -399,7 +399,9 @@ struct readahead_control;
{ IOCB_WAITQ, "WAITQ" }, \
{ IOCB_NOIO, "NOIO" }, \
{ IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
- { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }
+ { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \
+ { IOCB_AIO_RW, "AIO_RW" }, \
+ { IOCB_HAS_METADATA, "AIO_HAS_METADATA" }
struct kiocb {
struct file *ki_filp;
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ce377f7fb912..22f39e5e2ff1 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1278,7 +1278,7 @@ struct ieee80211_ext {
u8 sa[ETH_ALEN];
__le32 timestamp;
u8 change_seq;
- u8 variable[0];
+ u8 variable[];
} __packed s1g_beacon;
} u;
} __packed __aligned(2);
@@ -1536,7 +1536,7 @@ struct ieee80211_mgmt {
u8 action_code;
u8 dialog_token;
__le16 capability;
- u8 variable[0];
+ u8 variable[];
} __packed tdls_discover_resp;
struct {
u8 action_code;
@@ -1721,35 +1721,35 @@ struct ieee80211_tdls_data {
struct {
u8 dialog_token;
__le16 capability;
- u8 variable[0];
+ u8 variable[];
} __packed setup_req;
struct {
__le16 status_code;
u8 dialog_token;
__le16 capability;
- u8 variable[0];
+ u8 variable[];
} __packed setup_resp;
struct {
__le16 status_code;
u8 dialog_token;
- u8 variable[0];
+ u8 variable[];
} __packed setup_cfm;
struct {
__le16 reason_code;
- u8 variable[0];
+ u8 variable[];
} __packed teardown;
struct {
u8 dialog_token;
- u8 variable[0];
+ u8 variable[];
} __packed discover_req;
struct {
u8 target_channel;
u8 oper_class;
- u8 variable[0];
+ u8 variable[];
} __packed chan_switch_req;
struct {
__le16 status_code;
- u8 variable[0];
+ u8 variable[];
} __packed chan_switch_resp;
} u;
} __packed;
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 38456b42cdb5..15e01935d3fa 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -79,11 +79,6 @@ static inline struct vlan_ethhdr *skb_vlan_eth_hdr(const struct sk_buff *skb)
/* found in socket.c */
extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
-static inline bool is_vlan_dev(const struct net_device *dev)
-{
- return dev->priv_flags & IFF_802_1Q_VLAN;
-}
-
#define skb_vlan_tag_present(__skb) (!!(__skb)->vlan_all)
#define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci)
#define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK)
@@ -136,7 +131,7 @@ struct vlan_pcpu_stats {
u32 tx_dropped;
};
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
extern struct net_device *__vlan_find_dev_deep_rcu(struct net_device *real_dev,
__be16 vlan_proto, u16 vlan_id);
@@ -200,6 +195,11 @@ struct vlan_dev_priv {
#endif
};
+static inline bool is_vlan_dev(const struct net_device *dev)
+{
+ return dev->priv_flags & IFF_802_1Q_VLAN;
+}
+
static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
{
return netdev_priv(dev);
@@ -237,6 +237,11 @@ extern void vlan_vids_del_by_dev(struct net_device *dev,
extern bool vlan_uses_dev(const struct net_device *dev);
#else
+static inline bool is_vlan_dev(const struct net_device *dev)
+{
+ return false;
+}
+
static inline struct net_device *
__vlan_find_dev_deep_rcu(struct net_device *real_dev,
__be16 vlan_proto, u16 vlan_id)
@@ -254,19 +259,19 @@ vlan_for_each(struct net_device *dev,
static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev)
{
- BUG();
+ WARN_ON_ONCE(1);
return NULL;
}
static inline u16 vlan_dev_vlan_id(const struct net_device *dev)
{
- BUG();
+ WARN_ON_ONCE(1);
return 0;
}
static inline __be16 vlan_dev_vlan_proto(const struct net_device *dev)
{
- BUG();
+ WARN_ON_ONCE(1);
return 0;
}
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5aeeed22f35b..db0eb0d86b64 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -156,6 +156,7 @@ struct inet6_skb_parm {
#define IP6SKB_SEG6 256
#define IP6SKB_FAKEJUMBO 512
#define IP6SKB_MULTIPATH 1024
+#define IP6SKB_MCROUTE 2048
};
#if defined(CONFIG_NET_L3_MASTER_DEV)
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 31be45fd47a6..1e5aec839041 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1352,7 +1352,7 @@ int ata_acpi_stm(struct ata_port *ap, const struct ata_acpi_gtm *stm);
int ata_acpi_gtm(struct ata_port *ap, struct ata_acpi_gtm *stm);
unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev,
const struct ata_acpi_gtm *gtm);
-int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm);
+int ata_acpi_cbl_pata_type(struct ata_port *ap);
#else
static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap)
{
@@ -1377,10 +1377,9 @@ static inline unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev,
return 0;
}
-static inline int ata_acpi_cbl_80wire(struct ata_port *ap,
- const struct ata_acpi_gtm *gtm)
+static inline int ata_acpi_cbl_pata_type(struct ata_port *ap)
{
- return 0;
+ return ATA_CBL_PATA40;
}
#endif
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index e43ff9f980a4..c640ba44dd6e 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -95,7 +95,6 @@ void mdio_device_remove(struct mdio_device *mdiodev);
void mdio_device_reset(struct mdio_device *mdiodev, int value);
int mdio_driver_register(struct mdio_driver *drv);
void mdio_driver_unregister(struct mdio_driver *drv);
-int mdio_device_bus_match(struct device *dev, const struct device_driver *drv);
static inline void mdio_device_get(struct mdio_device *mdiodev)
{
diff --git a/include/linux/module.h b/include/linux/module.h
index 92e1420fccdf..5faa1fb1f4b4 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -586,11 +586,6 @@ struct module {
atomic_t refcnt;
#endif
-#ifdef CONFIG_MITIGATION_ITS
- int its_num_pages;
- void **its_page_array;
-#endif
-
#ifdef CONFIG_CONSTRUCTORS
/* Constructor functions. */
ctor_fn_t *ctors;
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 63ef5191cc57..fddafdc168f7 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -31,6 +31,7 @@ extern int ip6_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t);
extern int ip6_mr_input(struct sk_buff *skb);
extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
extern int ip6_mr_init(void);
+extern int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb);
extern void ip6_mr_cleanup(void);
int ip6mr_ioctl(struct sock *sk, int cmd, void *arg);
#else
@@ -58,6 +59,12 @@ static inline int ip6_mr_init(void)
return 0;
}
+static inline int
+ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ return ip6_output(net, sk, skb);
+}
+
static inline void ip6_mr_cleanup(void)
{
return;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 6863540f4b71..7f254bde5426 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -706,6 +706,8 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
struct irq_domain *parent);
u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev);
struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev);
+void pci_msix_prepare_desc(struct irq_domain *domain, msi_alloc_info_t *arg,
+ struct msi_desc *desc);
#else /* CONFIG_PCI_MSI */
static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev)
{
diff --git a/include/linux/net/intel/libie/pctype.h b/include/linux/net/intel/libie/pctype.h
new file mode 100644
index 000000000000..d783417fbf36
--- /dev/null
+++ b/include/linux/net/intel/libie/pctype.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2025 Intel Corporation */
+
+#ifndef __LIBIE_PCTYPE_H
+#define __LIBIE_PCTYPE_H
+
+/* Packet Classifier Type indexes, used to set the xxQF_HENA registers. Also
+ * communicated over the virtchnl API as part of struct virtchnl_rss_hashena.
+ */
+enum libie_filter_pctype {
+ /* Note: Values 0-28 are reserved for future use.
+ * Value 29, 30, 32 are not supported on XL710 and X710.
+ */
+ LIBIE_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP = 29,
+ LIBIE_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP = 30,
+ LIBIE_FILTER_PCTYPE_NONF_IPV4_UDP = 31,
+ LIBIE_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK = 32,
+ LIBIE_FILTER_PCTYPE_NONF_IPV4_TCP = 33,
+ LIBIE_FILTER_PCTYPE_NONF_IPV4_SCTP = 34,
+ LIBIE_FILTER_PCTYPE_NONF_IPV4_OTHER = 35,
+ LIBIE_FILTER_PCTYPE_FRAG_IPV4 = 36,
+ /* Note: Values 37-38 are reserved for future use.
+ * Value 39, 40, 42 are not supported on XL710 and X710.
+ */
+ LIBIE_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP = 39,
+ LIBIE_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP = 40,
+ LIBIE_FILTER_PCTYPE_NONF_IPV6_UDP = 41,
+ LIBIE_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK = 42,
+ LIBIE_FILTER_PCTYPE_NONF_IPV6_TCP = 43,
+ LIBIE_FILTER_PCTYPE_NONF_IPV6_SCTP = 44,
+ LIBIE_FILTER_PCTYPE_NONF_IPV6_OTHER = 45,
+ LIBIE_FILTER_PCTYPE_FRAG_IPV6 = 46,
+ /* Note: Value 47 is reserved for future use */
+ LIBIE_FILTER_PCTYPE_FCOE_OX = 48,
+ LIBIE_FILTER_PCTYPE_FCOE_RX = 49,
+ LIBIE_FILTER_PCTYPE_FCOE_OTHER = 50,
+ /* Note: Values 51-62 are reserved for future use */
+ LIBIE_FILTER_PCTYPE_L2_PAYLOAD = 63
+};
+
+#endif /* __LIBIE_PCTYPE_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index adb14db25798..03c26bb0fbbe 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2388,7 +2388,7 @@ struct net_device {
struct dm_hw_stat_delta __rcu *dm_private;
#endif
struct device dev;
- const struct attribute_group *sysfs_groups[4];
+ const struct attribute_group *sysfs_groups[5];
const struct attribute_group *sysfs_rx_queue_group;
const struct rtnl_link_ops *rtnl_link_ops;
@@ -3016,6 +3016,16 @@ static inline void dev_dstats_rx_dropped(struct net_device *dev)
u64_stats_update_end(&dstats->syncp);
}
+static inline void dev_dstats_rx_dropped_add(struct net_device *dev,
+ unsigned int packets)
+{
+ struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+ u64_stats_update_begin(&dstats->syncp);
+ u64_stats_add(&dstats->rx_drops, packets);
+ u64_stats_update_end(&dstats->syncp);
+}
+
static inline void dev_dstats_tx_add(struct net_device *dev,
unsigned int len)
{
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 0477208ed9ff..735e65c3cc11 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -42,6 +42,13 @@ struct netpoll {
struct work_struct refill_wq;
};
+#define np_info(np, fmt, ...) \
+ pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
+#define np_err(np, fmt, ...) \
+ pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
+#define np_notice(np, fmt, ...) \
+ pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
+
struct netpoll_info {
refcount_t refcnt;
@@ -65,11 +72,8 @@ static inline void netpoll_poll_enable(struct net_device *dev) { return; }
#endif
int netpoll_send_udp(struct netpoll *np, const char *msg, int len);
-void netpoll_print_options(struct netpoll *np);
-int netpoll_parse_options(struct netpoll *np, char *opt);
int __netpoll_setup(struct netpoll *np, struct net_device *ndev);
int netpoll_setup(struct netpoll *np);
-void __netpoll_cleanup(struct netpoll *np);
void __netpoll_free(struct netpoll *np);
void netpoll_cleanup(struct netpoll *np);
void do_netpoll_cleanup(struct netpoll *np);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e194dad1623d..b037aab7b71d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -269,8 +269,10 @@ static inline const char *phy_modes(phy_interface_t interface)
* rgmii_clock - map link speed to the clock rate
* @speed: link speed value
*
- * Description: maps RGMII supported link speeds
- * into the clock rates.
+ * Description: maps RGMII supported link speeds into the clock rates.
+ * This can also be used for MII, GMII, and RMII interface modes as the
+ * clock rates are indentical, but the caller must be aware that errors
+ * for unsupported clock rates will not be signalled.
*
* Returns: clock rate or negative errno
*/
@@ -526,6 +528,7 @@ struct macsec_ops;
* @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY
* @wol_enabled: Set to true if the PHY or the attached MAC have Wake-on-LAN
* enabled.
+ * @is_genphy_driven: PHY is driven by one of the generic PHY drivers
* @state: State of the PHY for management purposes
* @dev_flags: Device-specific flags used by the PHY driver.
*
@@ -629,6 +632,7 @@ struct phy_device {
unsigned is_on_sfp_module:1;
unsigned mac_managed_pm:1;
unsigned wol_enabled:1;
+ unsigned is_genphy_driven:1;
unsigned autoneg:1;
/* The most recently read link state */
@@ -1292,6 +1296,17 @@ static inline bool phy_is_started(struct phy_device *phydev)
}
/**
+ * phy_driver_is_genphy - Convenience function to check whether PHY is driven
+ * by one of the generic PHY drivers
+ * @phydev: The phy_device struct
+ * Return: true if PHY is driven by one of the genphy drivers
+ */
+static inline bool phy_driver_is_genphy(struct phy_device *phydev)
+{
+ return phydev->is_genphy_driven;
+}
+
+/**
* phy_disable_eee_mode - Don't advertise an EEE mode.
* @phydev: The phy_device struct
* @link_mode: The EEE mode to be disabled
@@ -1941,9 +1956,6 @@ int genphy_c45_ethtool_set_eee(struct phy_device *phydev,
struct ethtool_keee *data);
int genphy_c45_an_config_eee_aneg(struct phy_device *phydev);
-/* Generic C45 PHY driver */
-extern struct phy_driver genphy_c45_driver;
-
/* The gen10g_* functions are the old Clause 45 stub */
int gen10g_config_aneg(struct phy_device *phydev);
@@ -1997,8 +2009,8 @@ bool phy_validate_pause(struct phy_device *phydev,
struct ethtool_pauseparam *pp);
void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause);
-s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
- const int *delay_values, int size, bool is_rx);
+s32 phy_get_internal_delay(struct phy_device *phydev, const int *delay_values,
+ int size, bool is_rx);
int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev,
enum ethtool_link_mode_bit_indices linkmode,
@@ -2096,7 +2108,4 @@ module_exit(phy_module_exit)
#define module_phy_driver(__phy_drivers) \
phy_module_driver(__phy_drivers, ARRAY_SIZE(__phy_drivers))
-bool phy_driver_is_genphy(struct phy_device *phydev);
-bool phy_driver_is_genphy_10g(struct phy_device *phydev);
-
#endif /* __PHY_H */
diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h
index c773eeb92d04..e5f305cef82e 100644
--- a/include/linux/pse-pd/pse.h
+++ b/include/linux/pse-pd/pse.h
@@ -6,13 +6,18 @@
#define _LINUX_PSE_CONTROLLER_H
#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/kfifo.h>
#include <uapi/linux/ethtool.h>
+#include <uapi/linux/ethtool_netlink_generated.h>
+#include <linux/regulator/driver.h>
/* Maximum current in uA according to IEEE 802.3-2022 Table 145-1 */
#define MAX_PI_CURRENT 1920000
/* Maximum power in mW according to IEEE 802.3-2022 Table 145-16 */
#define MAX_PI_PW 99900
+struct net_device;
struct phy_device;
struct pse_controller_dev;
struct netlink_ext_ack;
@@ -38,6 +43,19 @@ struct ethtool_c33_pse_pw_limit_range {
};
/**
+ * struct pse_irq_desc - notification sender description for IRQ based events.
+ *
+ * @name: the visible name for the IRQ
+ * @map_event: driver callback to map IRQ status into PSE devices with events.
+ */
+struct pse_irq_desc {
+ const char *name;
+ int (*map_event)(int irq, struct pse_controller_dev *pcdev,
+ unsigned long *notifs,
+ unsigned long *notifs_mask);
+};
+
+/**
* struct pse_control_config - PSE control/channel configuration.
*
* @podl_admin_control: set PoDL PSE admin control as described in
@@ -98,6 +116,7 @@ struct pse_pw_limit_ranges {
/**
* struct ethtool_pse_control_status - PSE control/channel status.
*
+ * @pw_d_id: PSE power domain index.
* @podl_admin_state: operational state of the PoDL PSE
* functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
* @podl_pw_status: power detection status of the PoDL PSE.
@@ -117,8 +136,12 @@ struct pse_pw_limit_ranges {
* is in charge of the memory allocation
* @c33_pw_limit_nb_ranges: number of supported power limit configuration
* ranges
+ * @prio_max: max priority allowed for the c33_prio variable value.
+ * @prio: priority of the PSE. Managed by PSE core in case of static budget
+ * evaluation strategy.
*/
struct ethtool_pse_control_status {
+ u32 pw_d_id;
enum ethtool_podl_pse_admin_state podl_admin_state;
enum ethtool_podl_pse_pw_d_status podl_pw_status;
enum ethtool_c33_pse_admin_state c33_admin_state;
@@ -129,6 +152,8 @@ struct ethtool_pse_control_status {
u32 c33_avail_pw_limit;
struct ethtool_c33_pse_pw_limit_range *c33_pw_limit_ranges;
u32 c33_pw_limit_nb_ranges;
+ u32 prio_max;
+ u32 prio;
};
/**
@@ -152,6 +177,11 @@ struct ethtool_pse_control_status {
* range. The driver is in charge of the memory
* allocation and should return the number of
* ranges.
+ * @pi_get_prio: Get the PSE PI priority.
+ * @pi_set_prio: Configure the PSE PI priority.
+ * @pi_get_pw_req: Get the power requested by a PD before enabling the PSE PI.
+ * This is only relevant when an interrupt is registered using
+ * devm_pse_irq_helper helper.
*/
struct pse_controller_ops {
int (*setup_pi_matrix)(struct pse_controller_dev *pcdev);
@@ -172,6 +202,10 @@ struct pse_controller_ops {
int id, int max_mW);
int (*pi_get_pw_limit_ranges)(struct pse_controller_dev *pcdev, int id,
struct pse_pw_limit_ranges *pw_limit_ranges);
+ int (*pi_get_prio)(struct pse_controller_dev *pcdev, int id);
+ int (*pi_set_prio)(struct pse_controller_dev *pcdev, int id,
+ unsigned int prio);
+ int (*pi_get_pw_req)(struct pse_controller_dev *pcdev, int id);
};
struct module;
@@ -206,12 +240,35 @@ struct pse_pi_pairset {
* @np: device node pointer of the PSE PI node
* @rdev: regulator represented by the PSE PI
* @admin_state_enabled: PI enabled state
+ * @pw_d: Power domain of the PSE PI
+ * @prio: Priority of the PSE PI. Used in static budget evaluation strategy
+ * @isr_pd_detected: PSE PI detection status managed by the interruption
+ * handler. This variable is relevant when the power enabled
+ * management is managed in software like the static
+ * budget evaluation strategy.
+ * @pw_allocated_mW: Power allocated to a PSE PI to manage power budget in
+ * static budget evaluation strategy.
*/
struct pse_pi {
struct pse_pi_pairset pairset[2];
struct device_node *np;
struct regulator_dev *rdev;
bool admin_state_enabled;
+ struct pse_power_domain *pw_d;
+ int prio;
+ bool isr_pd_detected;
+ int pw_allocated_mW;
+};
+
+/**
+ * struct pse_ntf - PSE notification element
+ *
+ * @id: ID of the PSE control
+ * @notifs: PSE notifications to be reported
+ */
+struct pse_ntf {
+ int id;
+ unsigned long notifs;
};
/**
@@ -228,6 +285,13 @@ struct pse_pi {
* @types: types of the PSE controller
* @pi: table of PSE PIs described in this controller device
* @no_of_pse_pi: flag set if the pse_pis devicetree node is not used
+ * @irq: PSE interrupt
+ * @pis_prio_max: Maximum value allowed for the PSE PIs priority
+ * @supp_budget_eval_strategies: budget evaluation strategies supported
+ * by the PSE
+ * @ntf_work: workqueue for PSE notification management
+ * @ntf_fifo: PSE notifications FIFO
+ * @ntf_fifo_lock: protect @ntf_fifo writer
*/
struct pse_controller_dev {
const struct pse_controller_ops *ops;
@@ -241,6 +305,30 @@ struct pse_controller_dev {
enum ethtool_pse_types types;
struct pse_pi *pi;
bool no_of_pse_pi;
+ int irq;
+ unsigned int pis_prio_max;
+ u32 supp_budget_eval_strategies;
+ struct work_struct ntf_work;
+ DECLARE_KFIFO_PTR(ntf_fifo, struct pse_ntf);
+ spinlock_t ntf_fifo_lock; /* Protect @ntf_fifo writer */
+};
+
+/**
+ * enum pse_budget_eval_strategies - PSE budget evaluation strategies.
+ * @PSE_BUDGET_EVAL_STRAT_DISABLED: Budget evaluation strategy disabled.
+ * @PSE_BUDGET_EVAL_STRAT_STATIC: PSE static budget evaluation strategy.
+ * Budget evaluation strategy based on the power requested during PD
+ * classification. This strategy is managed by the PSE core.
+ * @PSE_BUDGET_EVAL_STRAT_DYNAMIC: PSE dynamic budget evaluation
+ * strategy. Budget evaluation strategy based on the current consumption
+ * per ports compared to the total power budget. This mode is managed by
+ * the PSE controller.
+ */
+
+enum pse_budget_eval_strategies {
+ PSE_BUDGET_EVAL_STRAT_DISABLED = 1 << 0,
+ PSE_BUDGET_EVAL_STRAT_STATIC = 1 << 1,
+ PSE_BUDGET_EVAL_STRAT_DYNAMIC = 1 << 2,
};
#if IS_ENABLED(CONFIG_PSE_CONTROLLER)
@@ -249,8 +337,11 @@ void pse_controller_unregister(struct pse_controller_dev *pcdev);
struct device;
int devm_pse_controller_register(struct device *dev,
struct pse_controller_dev *pcdev);
+int devm_pse_irq_helper(struct pse_controller_dev *pcdev, int irq,
+ int irq_flags, const struct pse_irq_desc *d);
-struct pse_control *of_pse_control_get(struct device_node *node);
+struct pse_control *of_pse_control_get(struct device_node *node,
+ struct phy_device *phydev);
void pse_control_put(struct pse_control *psec);
int pse_ethtool_get_status(struct pse_control *psec,
@@ -262,13 +353,17 @@ int pse_ethtool_set_config(struct pse_control *psec,
int pse_ethtool_set_pw_limit(struct pse_control *psec,
struct netlink_ext_ack *extack,
const unsigned int pw_limit);
+int pse_ethtool_set_prio(struct pse_control *psec,
+ struct netlink_ext_ack *extack,
+ unsigned int prio);
bool pse_has_podl(struct pse_control *psec);
bool pse_has_c33(struct pse_control *psec);
#else
-static inline struct pse_control *of_pse_control_get(struct device_node *node)
+static inline struct pse_control *of_pse_control_get(struct device_node *node,
+ struct phy_device *phydev)
{
return ERR_PTR(-ENOENT);
}
@@ -298,6 +393,13 @@ static inline int pse_ethtool_set_pw_limit(struct pse_control *psec,
return -EOPNOTSUPP;
}
+static inline int pse_ethtool_set_prio(struct pse_control *psec,
+ struct netlink_ext_ack *extack,
+ unsigned int prio)
+{
+ return -EOPNOTSUPP;
+}
+
static inline bool pse_has_podl(struct pse_control *psec)
{
return false;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5520524c93bf..9508968cb300 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3873,20 +3873,26 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l
bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i)
__must_check;
-static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
- const struct page *page, int off)
+static inline bool skb_can_coalesce_netmem(struct sk_buff *skb, int i,
+ netmem_ref netmem, int off)
{
if (skb_zcopy(skb))
return false;
if (i) {
const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
- return page == skb_frag_page(frag) &&
+ return netmem == skb_frag_netmem(frag) &&
off == skb_frag_off(frag) + skb_frag_size(frag);
}
return false;
}
+static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
+ const struct page *page, int off)
+{
+ return skb_can_coalesce_netmem(skb, i, page_to_netmem(page), off);
+}
+
static inline int __skb_linearize(struct sk_buff *skb)
{
return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM;
diff --git a/include/linux/soc/marvell/silicons.h b/include/linux/soc/marvell/silicons.h
new file mode 100644
index 000000000000..66bb9bfaf17d
--- /dev/null
+++ b/include/linux/soc/marvell/silicons.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ * Copyright (C) 2024 Marvell.
+ */
+
+#ifndef __SOC_SILICON_H
+#define __SOC_SILICON_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+
+#if defined(CONFIG_ARM64)
+
+#define CN20K_CHIPID 0x20
+/*
+ * Silicon check for CN20K family
+ */
+static inline bool is_cn20k(struct pci_dev *pdev)
+{
+ return (pdev->subsystem_device & 0xFF) == CN20K_CHIPID;
+}
+#else
+#define is_cn20k(pdev) ((void)(pdev), 0)
+#endif
+
+#endif /* __SOC_SILICON_H */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 29f59d50dc73..1a5737b3753d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -208,7 +208,6 @@ struct tcp_sock {
u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */
u16 gso_segs; /* Max number of segs per GSO packet */
/* from STCP, retrans queue hinting */
- struct sk_buff *lost_skb_hint;
struct sk_buff *retransmit_skb_hint;
__cacheline_group_end(tcp_sock_read_tx);
@@ -419,8 +418,6 @@ struct tcp_sock {
struct tcp_sack_block recv_sack_cache[4];
- int lost_cnt_hint;
-
u32 prior_ssthresh; /* ssthresh saved at recovery start */
u32 high_seq; /* snd_nxt at onset of congestion */
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 0b9f1e598e3a..208682f77179 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -58,7 +58,7 @@ struct usbnet {
unsigned interrupt_count;
struct mutex interrupt_mutex;
struct usb_anchor deferred;
- struct tasklet_struct bh;
+ struct work_struct bh_work;
struct work_struct kevent;
unsigned long flags;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 0091f23a40f7..63517646a497 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -520,6 +520,7 @@ enum devlink_param_generic_id {
DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC,
/* add new param generic ids above here*/
__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -578,6 +579,9 @@ enum devlink_param_generic_id {
#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME "event_eq_size"
#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32
+#define DEVLINK_PARAM_GENERIC_ENABLE_PHC_NAME "enable_phc"
+#define DEVLINK_PARAM_GENERIC_ENABLE_PHC_TYPE DEVLINK_PARAM_TYPE_BOOL
+
#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \
{ \
.id = DEVLINK_PARAM_GENERIC_ID_##_id, \
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index bcf9d7467e1a..b9e78290269e 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -121,6 +121,9 @@
FN(ARP_PVLAN_DISABLE) \
FN(MAC_IEEE_MAC_CONTROL) \
FN(BRIDGE_INGRESS_STP_STATE) \
+ FN(CAN_RX_INVALID_FRAME) \
+ FN(CANFD_RX_INVALID_FRAME) \
+ FN(CANXL_RX_INVALID_FRAME) \
FNe(MAX)
/**
@@ -574,6 +577,21 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_BRIDGE_INGRESS_STP_STATE,
/**
+ * @SKB_DROP_REASON_CAN_RX_INVALID_FRAME: received
+ * non conform CAN frame (or device is unable to receive CAN frames)
+ */
+ SKB_DROP_REASON_CAN_RX_INVALID_FRAME,
+ /**
+ * @SKB_DROP_REASON_CANFD_RX_INVALID_FRAME: received
+ * non conform CAN-FD frame (or device is unable to receive CAN frames)
+ */
+ SKB_DROP_REASON_CANFD_RX_INVALID_FRAME,
+ /**
+ * @SKB_DROP_REASON_CANXL_RX_INVALID_FRAME: received
+ * non conform CAN-XL frame (or device is unable to receive CAN frames)
+ */
+ SKB_DROP_REASON_CANXL_RX_INVALID_FRAME,
+ /**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
*/
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 55e2d97f247e..d73ea0880066 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -54,11 +54,13 @@ struct tc_action;
#define DSA_TAG_PROTO_RZN1_A5PSW_VALUE 26
#define DSA_TAG_PROTO_LAN937X_VALUE 27
#define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE 28
+#define DSA_TAG_PROTO_BRCM_LEGACY_FCS_VALUE 29
enum dsa_tag_protocol {
DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE,
DSA_TAG_PROTO_BRCM = DSA_TAG_PROTO_BRCM_VALUE,
DSA_TAG_PROTO_BRCM_LEGACY = DSA_TAG_PROTO_BRCM_LEGACY_VALUE,
+ DSA_TAG_PROTO_BRCM_LEGACY_FCS = DSA_TAG_PROTO_BRCM_LEGACY_FCS_VALUE,
DSA_TAG_PROTO_BRCM_PREPEND = DSA_TAG_PROTO_BRCM_PREPEND_VALUE,
DSA_TAG_PROTO_DSA = DSA_TAG_PROTO_DSA_VALUE,
DSA_TAG_PROTO_EDSA = DSA_TAG_PROTO_EDSA_VALUE,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 4564b5d348b1..ae09e91398a5 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -202,12 +202,6 @@ static inline spinlock_t *inet_ehash_lockp(
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo);
-static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h)
-{
- kfree(h->lhash2);
- h->lhash2 = NULL;
-}
-
static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
{
kvfree(hashinfo->ehash_locks);
diff --git a/include/net/ip.h b/include/net/ip.h
index 47ed6d23853d..375304bb99f6 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -59,6 +59,7 @@ struct inet_skb_parm {
#define IPSKB_L3SLAVE BIT(7)
#define IPSKB_NOPOLICY BIT(8)
#define IPSKB_MULTIPATH BIT(9)
+#define IPSKB_MCROUTE BIT(10)
u16 frag_max_size;
};
@@ -167,6 +168,7 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
int ip_local_deliver(struct sk_buff *skb);
void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int proto);
int ip_mr_input(struct sk_buff *skb);
+int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 399592405c72..dd163495f353 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -152,11 +152,12 @@ int ip6_tnl_get_iflink(const struct net_device *dev);
int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu);
static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
- struct net_device *dev)
+ struct net_device *dev, u16 ip6cb_flags)
{
int pkt_len, err;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+ IP6CB(skb)->flags = ip6cb_flags;
pkt_len = skb->len - skb_inner_network_offset(skb);
err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 0c3d571a04a1..8cf1380f3656 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -603,7 +603,7 @@ static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, u8 proto,
- u8 tos, u8 ttl, __be16 df, bool xnet);
+ u8 tos, u8 ttl, __be16 df, bool xnet, u16 ipcb_flags);
struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
gfp_t flags);
int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h
index ab05024be518..5d991404845e 100644
--- a/include/net/libeth/rx.h
+++ b/include/net/libeth/rx.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright (C) 2024 Intel Corporation */
+/* Copyright (C) 2024-2025 Intel Corporation */
#ifndef __LIBETH_RX_H
#define __LIBETH_RX_H
@@ -13,8 +13,10 @@
/* Space reserved in front of each frame */
#define LIBETH_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
+#define LIBETH_XDP_HEADROOM (ALIGN(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
+ NET_IP_ALIGN)
/* Maximum headroom for worst-case calculations */
-#define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM
+#define LIBETH_MAX_HEADROOM LIBETH_XDP_HEADROOM
/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
#define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
/* Maximum supported L2-L4 header length */
@@ -31,7 +33,7 @@
/**
* struct libeth_fqe - structure representing an Rx buffer (fill queue element)
- * @page: page holding the buffer
+ * @netmem: network memory reference holding the buffer
* @offset: offset from the page start (to the headroom)
* @truesize: total space occupied by the buffer (w/ headroom and tailroom)
*
@@ -40,7 +42,7 @@
* former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
*/
struct libeth_fqe {
- struct page *page;
+ netmem_ref netmem;
u32 offset;
u32 truesize;
} __aligned_largest;
@@ -66,6 +68,7 @@ enum libeth_fqe_type {
* @count: number of descriptors/buffers the queue has
* @type: type of the buffers this queue has
* @hsplit: flag whether header split is enabled
+ * @xdp: flag indicating whether XDP is enabled
* @buf_len: HW-writeable length per each buffer
* @nid: ID of the closest NUMA node with memory
*/
@@ -81,6 +84,7 @@ struct libeth_fq {
/* Cold fields */
enum libeth_fqe_type type:2;
bool hsplit:1;
+ bool xdp:1;
u32 buf_len;
int nid;
@@ -102,15 +106,16 @@ static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i)
struct libeth_fqe *buf = &fq->fqes[i];
buf->truesize = fq->truesize;
- buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize);
- if (unlikely(!buf->page))
+ buf->netmem = page_pool_dev_alloc_netmem(fq->pp, &buf->offset,
+ &buf->truesize);
+ if (unlikely(!buf->netmem))
return DMA_MAPPING_ERROR;
- return page_pool_get_dma_addr(buf->page) + buf->offset +
+ return page_pool_get_dma_addr_netmem(buf->netmem) + buf->offset +
fq->pp->p.offset;
}
-void libeth_rx_recycle_slow(struct page *page);
+void libeth_rx_recycle_slow(netmem_ref netmem);
/**
* libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA
@@ -126,18 +131,19 @@ void libeth_rx_recycle_slow(struct page *page);
static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe,
u32 len)
{
- struct page *page = fqe->page;
+ netmem_ref netmem = fqe->netmem;
/* Very rare, but possible case. The most common reason:
* the last fragment contained FCS only, which was then
* stripped by the HW.
*/
if (unlikely(!len)) {
- libeth_rx_recycle_slow(page);
+ libeth_rx_recycle_slow(netmem);
return false;
}
- page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len);
+ page_pool_dma_sync_netmem_for_cpu(netmem_get_pp(netmem), netmem,
+ fqe->offset, len);
return true;
}
diff --git a/include/net/libeth/tx.h b/include/net/libeth/tx.h
index 35614f9523f6..c3db5c6f1641 100644
--- a/include/net/libeth/tx.h
+++ b/include/net/libeth/tx.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright (C) 2024 Intel Corporation */
+/* Copyright (C) 2024-2025 Intel Corporation */
#ifndef __LIBETH_TX_H
#define __LIBETH_TX_H
@@ -12,11 +12,17 @@
/**
* enum libeth_sqe_type - type of &libeth_sqe to act on Tx completion
- * @LIBETH_SQE_EMPTY: unused/empty, no action required
+ * @LIBETH_SQE_EMPTY: unused/empty OR XDP_TX/XSk frame, no action required
* @LIBETH_SQE_CTX: context descriptor with empty SQE, no action required
* @LIBETH_SQE_SLAB: kmalloc-allocated buffer, unmap and kfree()
* @LIBETH_SQE_FRAG: mapped skb frag, only unmap DMA
* @LIBETH_SQE_SKB: &sk_buff, unmap and napi_consume_skb(), update stats
+ * @__LIBETH_SQE_XDP_START: separator between skb and XDP types
+ * @LIBETH_SQE_XDP_TX: &skb_shared_info, libeth_xdp_return_buff_bulk(), stats
+ * @LIBETH_SQE_XDP_XMIT: &xdp_frame, unmap and xdp_return_frame_bulk(), stats
+ * @LIBETH_SQE_XDP_XMIT_FRAG: &xdp_frame frag, only unmap DMA
+ * @LIBETH_SQE_XSK_TX: &libeth_xdp_buff on XSk queue, xsk_buff_free(), stats
+ * @LIBETH_SQE_XSK_TX_FRAG: &libeth_xdp_buff frag on XSk queue, xsk_buff_free()
*/
enum libeth_sqe_type {
LIBETH_SQE_EMPTY = 0U,
@@ -24,6 +30,13 @@ enum libeth_sqe_type {
LIBETH_SQE_SLAB,
LIBETH_SQE_FRAG,
LIBETH_SQE_SKB,
+
+ __LIBETH_SQE_XDP_START,
+ LIBETH_SQE_XDP_TX = __LIBETH_SQE_XDP_START,
+ LIBETH_SQE_XDP_XMIT,
+ LIBETH_SQE_XDP_XMIT_FRAG,
+ LIBETH_SQE_XSK_TX,
+ LIBETH_SQE_XSK_TX_FRAG,
};
/**
@@ -32,6 +45,9 @@ enum libeth_sqe_type {
* @rs_idx: index of the last buffer from the batch this one was sent in
* @raw: slab buffer to free via kfree()
* @skb: &sk_buff to consume
+ * @sinfo: skb shared info of an XDP_TX frame
+ * @xdpf: XDP frame from ::ndo_xdp_xmit()
+ * @xsk: XSk Rx frame from XDP_TX action
* @dma: DMA address to unmap
* @len: length of the mapped region to unmap
* @nr_frags: number of frags in the frame this buffer belongs to
@@ -46,6 +62,9 @@ struct libeth_sqe {
union {
void *raw;
struct sk_buff *skb;
+ struct skb_shared_info *sinfo;
+ struct xdp_frame *xdpf;
+ struct libeth_xdp_buff *xsk;
};
DEFINE_DMA_UNMAP_ADDR(dma);
@@ -71,7 +90,10 @@ struct libeth_sqe {
/**
* struct libeth_cq_pp - completion queue poll params
* @dev: &device to perform DMA unmapping
+ * @bq: XDP frame bulk to combine return operations
* @ss: onstack NAPI stats to fill
+ * @xss: onstack XDPSQ NAPI stats to fill
+ * @xdp_tx: number of XDP-not-XSk frames processed
* @napi: whether it's called from the NAPI context
*
* libeth uses this structure to access objects needed for performing full
@@ -80,7 +102,13 @@ struct libeth_sqe {
*/
struct libeth_cq_pp {
struct device *dev;
- struct libeth_sq_napi_stats *ss;
+ struct xdp_frame_bulk *bq;
+
+ union {
+ struct libeth_sq_napi_stats *ss;
+ struct libeth_xdpsq_napi_stats *xss;
+ };
+ u32 xdp_tx;
bool napi;
};
@@ -126,4 +154,6 @@ static inline void libeth_tx_complete(struct libeth_sqe *sqe,
sqe->type = LIBETH_SQE_EMPTY;
}
+void libeth_tx_complete_any(struct libeth_sqe *sqe, struct libeth_cq_pp *cp);
+
#endif /* __LIBETH_TX_H */
diff --git a/include/net/libeth/types.h b/include/net/libeth/types.h
index 603825e45133..cf1d78a9dc38 100644
--- a/include/net/libeth/types.h
+++ b/include/net/libeth/types.h
@@ -1,10 +1,32 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright (C) 2024 Intel Corporation */
+/* Copyright (C) 2024-2025 Intel Corporation */
#ifndef __LIBETH_TYPES_H
#define __LIBETH_TYPES_H
-#include <linux/types.h>
+#include <linux/workqueue.h>
+
+/* Stats */
+
+/**
+ * struct libeth_rq_napi_stats - "hot" counters to update in Rx polling loop
+ * @packets: received frames counter
+ * @bytes: sum of bytes of received frames above
+ * @fragments: sum of fragments of received S/G frames
+ * @hsplit: number of frames the device performed the header split for
+ * @raw: alias to access all the fields as an array
+ */
+struct libeth_rq_napi_stats {
+ union {
+ struct {
+ u32 packets;
+ u32 bytes;
+ u32 fragments;
+ u32 hsplit;
+ };
+ DECLARE_FLEX_ARRAY(u32, raw);
+ };
+};
/**
* struct libeth_sq_napi_stats - "hot" counters to update in Tx completion loop
@@ -22,4 +44,84 @@ struct libeth_sq_napi_stats {
};
};
+/**
+ * struct libeth_xdpsq_napi_stats - "hot" counters to update in XDP Tx
+ * completion loop
+ * @packets: completed frames counter
+ * @bytes: sum of bytes of completed frames above
+ * @fragments: sum of fragments of completed S/G frames
+ * @raw: alias to access all the fields as an array
+ */
+struct libeth_xdpsq_napi_stats {
+ union {
+ struct {
+ u32 packets;
+ u32 bytes;
+ u32 fragments;
+ };
+ DECLARE_FLEX_ARRAY(u32, raw);
+ };
+};
+
+/* XDP */
+
+/*
+ * The following structures should be embedded into driver's queue structure
+ * and passed to the libeth_xdp helpers, never used directly.
+ */
+
+/* XDPSQ sharing */
+
+/**
+ * struct libeth_xdpsq_lock - locking primitive for sharing XDPSQs
+ * @lock: spinlock for locking the queue
+ * @share: whether this particular queue is shared
+ */
+struct libeth_xdpsq_lock {
+ spinlock_t lock;
+ bool share;
+};
+
+/* XDPSQ clean-up timers */
+
+/**
+ * struct libeth_xdpsq_timer - timer for cleaning up XDPSQs w/o interrupts
+ * @xdpsq: queue this timer belongs to
+ * @lock: lock for the queue
+ * @dwork: work performing cleanups
+ *
+ * XDPSQs not using interrupts but lazy cleaning, i.e. only when there's no
+ * space for sending the current queued frame/bulk, must fire up timers to
+ * make sure there are no stale buffers to free.
+ */
+struct libeth_xdpsq_timer {
+ void *xdpsq;
+ struct libeth_xdpsq_lock *lock;
+
+ struct delayed_work dwork;
+};
+
+/* Rx polling path */
+
+/**
+ * struct libeth_xdp_buff_stash - struct for stashing &xdp_buff onto a queue
+ * @data: pointer to the start of the frame, xdp_buff.data
+ * @headroom: frame headroom, xdp_buff.data - xdp_buff.data_hard_start
+ * @len: frame linear space length, xdp_buff.data_end - xdp_buff.data
+ * @frame_sz: truesize occupied by the frame, xdp_buff.frame_sz
+ * @flags: xdp_buff.flags
+ *
+ * &xdp_buff is 56 bytes long on x64, &libeth_xdp_buff is 64 bytes. This
+ * structure carries only necessary fields to save/restore a partially built
+ * frame on the queue structure to finish it during the next NAPI poll.
+ */
+struct libeth_xdp_buff_stash {
+ void *data;
+ u16 headroom;
+ u16 len;
+
+ u32 frame_sz:24;
+ u32 flags:8;
+} __aligned_largest;
+
#endif /* __LIBETH_TYPES_H */
diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h
new file mode 100644
index 000000000000..6ce6aec6884c
--- /dev/null
+++ b/include/net/libeth/xdp.h
@@ -0,0 +1,1879 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2025 Intel Corporation */
+
+#ifndef __LIBETH_XDP_H
+#define __LIBETH_XDP_H
+
+#include <linux/bpf_trace.h>
+#include <linux/unroll.h>
+
+#include <net/libeth/rx.h>
+#include <net/libeth/tx.h>
+#include <net/xsk_buff_pool.h>
+
+/*
+ * Defined as bits to be able to use them as a mask on Rx.
+ * Also used as internal return values on Tx.
+ */
+enum {
+ LIBETH_XDP_PASS = 0U,
+ LIBETH_XDP_DROP = BIT(0),
+ LIBETH_XDP_ABORTED = BIT(1),
+ LIBETH_XDP_TX = BIT(2),
+ LIBETH_XDP_REDIRECT = BIT(3),
+};
+
+/*
+ * &xdp_buff_xsk is the largest structure &libeth_xdp_buff gets casted to,
+ * pick maximum pointer-compatible alignment.
+ */
+#define __LIBETH_XDP_BUFF_ALIGN \
+ (IS_ALIGNED(sizeof(struct xdp_buff_xsk), 16) ? 16 : \
+ IS_ALIGNED(sizeof(struct xdp_buff_xsk), 8) ? 8 : \
+ sizeof(long))
+
+/**
+ * struct libeth_xdp_buff - libeth extension over &xdp_buff
+ * @base: main &xdp_buff
+ * @data: shortcut for @base.data
+ * @desc: RQ descriptor containing metadata for this buffer
+ * @priv: driver-private scratchspace
+ *
+ * The main reason for this is to have a pointer to the descriptor to be able
+ * to quickly get frame metadata from xdpmo and driver buff-to-xdp callbacks
+ * (as well as bigger alignment).
+ * Pointer/layout-compatible with &xdp_buff and &xdp_buff_xsk.
+ */
+struct libeth_xdp_buff {
+ union {
+ struct xdp_buff base;
+ void *data;
+ };
+
+ const void *desc;
+ unsigned long priv[]
+ __aligned(__LIBETH_XDP_BUFF_ALIGN);
+} __aligned(__LIBETH_XDP_BUFF_ALIGN);
+static_assert(offsetof(struct libeth_xdp_buff, data) ==
+ offsetof(struct xdp_buff_xsk, xdp.data));
+static_assert(offsetof(struct libeth_xdp_buff, desc) ==
+ offsetof(struct xdp_buff_xsk, cb));
+static_assert(IS_ALIGNED(sizeof(struct xdp_buff_xsk),
+ __alignof(struct libeth_xdp_buff)));
+
+/**
+ * __LIBETH_XDP_ONSTACK_BUFF - declare a &libeth_xdp_buff on the stack
+ * @name: name of the variable to declare
+ * @...: sizeof() of the driver-private data
+ */
+#define __LIBETH_XDP_ONSTACK_BUFF(name, ...) \
+ ___LIBETH_XDP_ONSTACK_BUFF(name, ##__VA_ARGS__)
+/**
+ * LIBETH_XDP_ONSTACK_BUFF - declare a &libeth_xdp_buff on the stack
+ * @name: name of the variable to declare
+ * @...: type or variable name of the driver-private data
+ */
+#define LIBETH_XDP_ONSTACK_BUFF(name, ...) \
+ __LIBETH_XDP_ONSTACK_BUFF(name, __libeth_xdp_priv_sz(__VA_ARGS__))
+
+#define ___LIBETH_XDP_ONSTACK_BUFF(name, ...) \
+ __DEFINE_FLEX(struct libeth_xdp_buff, name, priv, \
+ LIBETH_XDP_PRIV_SZ(__VA_ARGS__ + 0), \
+ __uninitialized); \
+ LIBETH_XDP_ASSERT_PRIV_SZ(__VA_ARGS__ + 0)
+
+#define __libeth_xdp_priv_sz(...) \
+ CONCATENATE(__libeth_xdp_psz, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)
+
+#define __libeth_xdp_psz0(...)
+#define __libeth_xdp_psz1(...) sizeof(__VA_ARGS__)
+
+#define LIBETH_XDP_PRIV_SZ(sz) \
+ (ALIGN(sz, __alignof(struct libeth_xdp_buff)) / sizeof(long))
+
+/* Performs XSK_CHECK_PRIV_TYPE() */
+#define LIBETH_XDP_ASSERT_PRIV_SZ(sz) \
+ static_assert(offsetofend(struct xdp_buff_xsk, cb) >= \
+ struct_size_t(struct libeth_xdp_buff, priv, \
+ LIBETH_XDP_PRIV_SZ(sz)))
+
+/* XDPSQ sharing */
+
+DECLARE_STATIC_KEY_FALSE(libeth_xdpsq_share);
+
+/**
+ * libeth_xdpsq_num - calculate optimal number of XDPSQs for this device + sys
+ * @rxq: current number of active Rx queues
+ * @txq: current number of active Tx queues
+ * @max: maximum number of Tx queues
+ *
+ * Each RQ must have its own XDPSQ for XSk pairs, each CPU must have own XDPSQ
+ * for lockless sending (``XDP_TX``, .ndo_xdp_xmit()). Cap the maximum of these
+ * two with the number of SQs the device can have (minus used ones).
+ *
+ * Return: number of XDP Tx queues the device needs to use.
+ */
+static inline u32 libeth_xdpsq_num(u32 rxq, u32 txq, u32 max)
+{
+ return min(max(nr_cpu_ids, rxq), max - txq);
+}
+
+/**
+ * libeth_xdpsq_shared - whether XDPSQs can be shared between several CPUs
+ * @num: number of active XDPSQs
+ *
+ * Return: true if there's no 1:1 XDPSQ/CPU association, false otherwise.
+ */
+static inline bool libeth_xdpsq_shared(u32 num)
+{
+ return num < nr_cpu_ids;
+}
+
+/**
+ * libeth_xdpsq_id - get XDPSQ index corresponding to this CPU
+ * @num: number of active XDPSQs
+ *
+ * Helper for libeth_xdp routines, do not use in drivers directly.
+ *
+ * Return: XDPSQ index needs to be used on this CPU.
+ */
+static inline u32 libeth_xdpsq_id(u32 num)
+{
+ u32 ret = raw_smp_processor_id();
+
+ if (static_branch_unlikely(&libeth_xdpsq_share) &&
+ libeth_xdpsq_shared(num))
+ ret %= num;
+
+ return ret;
+}
+
+void __libeth_xdpsq_get(struct libeth_xdpsq_lock *lock,
+ const struct net_device *dev);
+void __libeth_xdpsq_put(struct libeth_xdpsq_lock *lock,
+ const struct net_device *dev);
+
+/**
+ * libeth_xdpsq_get - initialize &libeth_xdpsq_lock
+ * @lock: lock to initialize
+ * @dev: netdev which this lock belongs to
+ * @share: whether XDPSQs can be shared
+ *
+ * Tracks the current XDPSQ association and enables the static lock
+ * if needed.
+ */
+static inline void libeth_xdpsq_get(struct libeth_xdpsq_lock *lock,
+ const struct net_device *dev,
+ bool share)
+{
+ if (unlikely(share))
+ __libeth_xdpsq_get(lock, dev);
+}
+
+/**
+ * libeth_xdpsq_put - deinitialize &libeth_xdpsq_lock
+ * @lock: lock to deinitialize
+ * @dev: netdev which this lock belongs to
+ *
+ * Tracks the current XDPSQ association and disables the static lock
+ * if needed.
+ */
+static inline void libeth_xdpsq_put(struct libeth_xdpsq_lock *lock,
+ const struct net_device *dev)
+{
+ if (static_branch_unlikely(&libeth_xdpsq_share) && lock->share)
+ __libeth_xdpsq_put(lock, dev);
+}
+
+void __libeth_xdpsq_lock(struct libeth_xdpsq_lock *lock);
+void __libeth_xdpsq_unlock(struct libeth_xdpsq_lock *lock);
+
+/**
+ * libeth_xdpsq_lock - grab &libeth_xdpsq_lock if needed
+ * @lock: lock to take
+ *
+ * Touches the underlying spinlock only if the static key is enabled
+ * and the queue itself is marked as shareable.
+ */
+static inline void libeth_xdpsq_lock(struct libeth_xdpsq_lock *lock)
+{
+ if (static_branch_unlikely(&libeth_xdpsq_share) && lock->share)
+ __libeth_xdpsq_lock(lock);
+}
+
+/**
+ * libeth_xdpsq_unlock - free &libeth_xdpsq_lock if needed
+ * @lock: lock to free
+ *
+ * Touches the underlying spinlock only if the static key is enabled
+ * and the queue itself is marked as shareable.
+ */
+static inline void libeth_xdpsq_unlock(struct libeth_xdpsq_lock *lock)
+{
+ if (static_branch_unlikely(&libeth_xdpsq_share) && lock->share)
+ __libeth_xdpsq_unlock(lock);
+}
+
+/* XDPSQ clean-up timers */
+
+void libeth_xdpsq_init_timer(struct libeth_xdpsq_timer *timer, void *xdpsq,
+ struct libeth_xdpsq_lock *lock,
+ void (*poll)(struct work_struct *work));
+
+/**
+ * libeth_xdpsq_deinit_timer - deinitialize &libeth_xdpsq_timer
+ * @timer: timer to deinitialize
+ *
+ * Flush and disable the underlying workqueue.
+ */
+static inline void libeth_xdpsq_deinit_timer(struct libeth_xdpsq_timer *timer)
+{
+ cancel_delayed_work_sync(&timer->dwork);
+}
+
+/**
+ * libeth_xdpsq_queue_timer - run &libeth_xdpsq_timer
+ * @timer: timer to queue
+ *
+ * Should be called after the queue was filled and the transmission was run
+ * to complete the pending buffers if no further sending will be done in a
+ * second (-> lazy cleaning won't happen).
+ * If the timer was already run, it will be requeued back to one second
+ * timeout again.
+ */
+static inline void libeth_xdpsq_queue_timer(struct libeth_xdpsq_timer *timer)
+{
+ mod_delayed_work_on(raw_smp_processor_id(), system_bh_highpri_wq,
+ &timer->dwork, HZ);
+}
+
+/**
+ * libeth_xdpsq_run_timer - wrapper to run a queue clean-up on a timer event
+ * @work: workqueue belonging to the corresponding timer
+ * @poll: driver-specific completion queue poll function
+ *
+ * Run the polling function on the locked queue and requeue the timer if
+ * there's more work to do.
+ * Designed to be used via LIBETH_XDP_DEFINE_TIMER() below.
+ */
+static __always_inline void
+libeth_xdpsq_run_timer(struct work_struct *work,
+ u32 (*poll)(void *xdpsq, u32 budget))
+{
+ struct libeth_xdpsq_timer *timer = container_of(work, typeof(*timer),
+ dwork.work);
+
+ libeth_xdpsq_lock(timer->lock);
+
+ if (poll(timer->xdpsq, U32_MAX))
+ libeth_xdpsq_queue_timer(timer);
+
+ libeth_xdpsq_unlock(timer->lock);
+}
+
+/* Common Tx bits */
+
+/**
+ * enum - libeth_xdp internal Tx flags
+ * @LIBETH_XDP_TX_BULK: one bulk size at which it will be flushed to the queue
+ * @LIBETH_XDP_TX_BATCH: batch size for which the queue fill loop is unrolled
+ * @LIBETH_XDP_TX_DROP: indicates the send function must drop frames not sent
+ * @LIBETH_XDP_TX_NDO: whether the send function is called from .ndo_xdp_xmit()
+ * @LIBETH_XDP_TX_XSK: whether the function is called for ``XDP_TX`` for XSk
+ */
+enum {
+ LIBETH_XDP_TX_BULK = DEV_MAP_BULK_SIZE,
+ LIBETH_XDP_TX_BATCH = 8,
+
+ LIBETH_XDP_TX_DROP = BIT(0),
+ LIBETH_XDP_TX_NDO = BIT(1),
+ LIBETH_XDP_TX_XSK = BIT(2),
+};
+
+/**
+ * enum - &libeth_xdp_tx_frame and &libeth_xdp_tx_desc flags
+ * @LIBETH_XDP_TX_LEN: only for ``XDP_TX``, [15:0] of ::len_fl is actual length
+ * @LIBETH_XDP_TX_CSUM: for XSk xmit, enable checksum offload
+ * @LIBETH_XDP_TX_XSKMD: for XSk xmit, mask of the metadata bits
+ * @LIBETH_XDP_TX_FIRST: indicates the frag is the first one of the frame
+ * @LIBETH_XDP_TX_LAST: whether the frag is the last one of the frame
+ * @LIBETH_XDP_TX_MULTI: whether the frame contains several frags
+ * @LIBETH_XDP_TX_FLAGS: only for ``XDP_TX``, [31:16] of ::len_fl is flags
+ */
+enum {
+ LIBETH_XDP_TX_LEN = GENMASK(15, 0),
+
+ LIBETH_XDP_TX_CSUM = XDP_TXMD_FLAGS_CHECKSUM,
+ LIBETH_XDP_TX_XSKMD = LIBETH_XDP_TX_LEN,
+
+ LIBETH_XDP_TX_FIRST = BIT(16),
+ LIBETH_XDP_TX_LAST = BIT(17),
+ LIBETH_XDP_TX_MULTI = BIT(18),
+
+ LIBETH_XDP_TX_FLAGS = GENMASK(31, 16),
+};
+
+/**
+ * struct libeth_xdp_tx_frame - represents one XDP Tx element
+ * @data: frame start pointer for ``XDP_TX``
+ * @len_fl: ``XDP_TX``, combined flags [31:16] and len [15:0] field for speed
+ * @soff: ``XDP_TX``, offset from @data to the start of &skb_shared_info
+ * @frag: one (non-head) frag for ``XDP_TX``
+ * @xdpf: &xdp_frame for the head frag for .ndo_xdp_xmit()
+ * @dma: DMA address of the non-head frag for .ndo_xdp_xmit()
+ * @xsk: ``XDP_TX`` for XSk, XDP buffer for any frag
+ * @len: frag length for XSk ``XDP_TX`` and .ndo_xdp_xmit()
+ * @flags: Tx flags for the above
+ * @opts: combined @len + @flags for the above for speed
+ * @desc: XSk xmit descriptor for direct casting
+ */
+struct libeth_xdp_tx_frame {
+ union {
+ /* ``XDP_TX`` */
+ struct {
+ void *data;
+ u32 len_fl;
+ u32 soff;
+ };
+
+ /* ``XDP_TX`` frag */
+ skb_frag_t frag;
+
+ /* .ndo_xdp_xmit(), XSk ``XDP_TX`` */
+ struct {
+ union {
+ struct xdp_frame *xdpf;
+ dma_addr_t dma;
+
+ struct libeth_xdp_buff *xsk;
+ };
+ union {
+ struct {
+ u32 len;
+ u32 flags;
+ };
+ aligned_u64 opts;
+ };
+ };
+
+ /* XSk xmit */
+ struct xdp_desc desc;
+ };
+} __aligned(sizeof(struct xdp_desc));
+static_assert(offsetof(struct libeth_xdp_tx_frame, frag.len) ==
+ offsetof(struct libeth_xdp_tx_frame, len_fl));
+static_assert(sizeof(struct libeth_xdp_tx_frame) == sizeof(struct xdp_desc));
+
+/**
+ * struct libeth_xdp_tx_bulk - XDP Tx frame bulk for bulk sending
+ * @prog: corresponding active XDP program, %NULL for .ndo_xdp_xmit()
+ * @dev: &net_device which the frames are transmitted on
+ * @xdpsq: shortcut to the corresponding driver-specific XDPSQ structure
+ * @act_mask: Rx only, mask of all the XDP prog verdicts for that NAPI session
+ * @count: current number of frames in @bulk
+ * @bulk: array of queued frames for bulk Tx
+ *
+ * All XDP Tx operations except XSk xmit queue each frame to the bulk first
+ * and flush it when @count reaches the array end. Bulk is always placed on
+ * the stack for performance. One bulk element contains all the data necessary
+ * for sending a frame and then freeing it on completion.
+ * For XSk xmit, Tx descriptor array from &xsk_buff_pool is casted directly
+ * to &libeth_xdp_tx_frame as they are compatible and the bulk structure is
+ * not used.
+ */
+struct libeth_xdp_tx_bulk {
+ const struct bpf_prog *prog;
+ struct net_device *dev;
+ void *xdpsq;
+
+ u32 act_mask;
+ u32 count;
+ struct libeth_xdp_tx_frame bulk[LIBETH_XDP_TX_BULK];
+} __aligned(sizeof(struct libeth_xdp_tx_frame));
+
+/**
+ * LIBETH_XDP_ONSTACK_BULK - declare &libeth_xdp_tx_bulk on the stack
+ * @bq: name of the variable to declare
+ *
+ * Helper to declare a bulk on the stack with a compiler hint that it should
+ * not be initialized automatically (with `CONFIG_INIT_STACK_ALL_*`) for
+ * performance reasons.
+ */
+#define LIBETH_XDP_ONSTACK_BULK(bq) \
+ struct libeth_xdp_tx_bulk bq __uninitialized
+
+/**
+ * struct libeth_xdpsq - abstraction for an XDPSQ
+ * @pool: XSk buffer pool for XSk ``XDP_TX`` and xmit
+ * @sqes: array of Tx buffers from the actual queue struct
+ * @descs: opaque pointer to the HW descriptor array
+ * @ntu: pointer to the next free descriptor index
+ * @count: number of descriptors on that queue
+ * @pending: pointer to the number of sent-not-completed descs on that queue
+ * @xdp_tx: pointer to the above, but only for non-XSk-xmit frames
+ * @lock: corresponding XDPSQ lock
+ *
+ * Abstraction for driver-independent implementation of Tx. Placed on the stack
+ * and filled by the driver before the transmission, so that the generic
+ * functions can access and modify driver-specific resources.
+ */
+struct libeth_xdpsq {
+ struct xsk_buff_pool *pool;
+ struct libeth_sqe *sqes;
+ void *descs;
+
+ u32 *ntu;
+ u32 count;
+
+ u32 *pending;
+ u32 *xdp_tx;
+ struct libeth_xdpsq_lock *lock;
+};
+
+/**
+ * struct libeth_xdp_tx_desc - abstraction for an XDP Tx descriptor
+ * @addr: DMA address of the frame
+ * @len: length of the frame
+ * @flags: XDP Tx flags
+ * @opts: combined @len + @flags for speed
+ *
+ * Filled by the generic functions and then passed to driver-specific functions
+ * to fill a HW Tx descriptor, always placed on the [function] stack.
+ */
+struct libeth_xdp_tx_desc {
+ dma_addr_t addr;
+ union {
+ struct {
+ u32 len;
+ u32 flags;
+ };
+ aligned_u64 opts;
+ };
+} __aligned_largest;
+
+/**
+ * libeth_xdp_ptr_to_priv - convert pointer to a libeth_xdp u64 priv
+ * @ptr: pointer to convert
+ *
+ * The main sending function passes private data as the largest scalar, u64.
+ * Use this helper when you want to pass a pointer there.
+ */
+#define libeth_xdp_ptr_to_priv(ptr) ({ \
+ typecheck_pointer(ptr); \
+ ((u64)(uintptr_t)(ptr)); \
+})
+/**
+ * libeth_xdp_priv_to_ptr - convert libeth_xdp u64 priv to a pointer
+ * @priv: private data to convert
+ *
+ * The main sending function passes private data as the largest scalar, u64.
+ * Use this helper when your callback takes this u64 and you want to convert
+ * it back to a pointer.
+ */
+#define libeth_xdp_priv_to_ptr(priv) ({ \
+ static_assert(__same_type(priv, u64)); \
+ ((const void *)(uintptr_t)(priv)); \
+})
+
+/*
+ * On 64-bit systems, assigning one u64 is faster than two u32s. When ::len
+ * occupies lowest 32 bits (LE), whole ::opts can be assigned directly instead.
+ */
+#ifdef __LITTLE_ENDIAN
+#define __LIBETH_WORD_ACCESS 1
+#endif
+#ifdef __LIBETH_WORD_ACCESS
+#define __libeth_xdp_tx_len(flen, ...) \
+ .opts = ((flen) | FIELD_PREP(GENMASK_ULL(63, 32), (__VA_ARGS__ + 0)))
+#else
+#define __libeth_xdp_tx_len(flen, ...) \
+ .len = (flen), .flags = (__VA_ARGS__ + 0)
+#endif
+
+/**
+ * libeth_xdp_tx_xmit_bulk - main XDP Tx function
+ * @bulk: array of frames to send
+ * @xdpsq: pointer to the driver-specific XDPSQ struct
+ * @n: number of frames to send
+ * @unroll: whether to unroll the queue filling loop for speed
+ * @priv: driver-specific private data
+ * @prep: callback for cleaning the queue and filling abstract &libeth_xdpsq
+ * @fill: internal callback for filling &libeth_sqe and &libeth_xdp_tx_desc
+ * @xmit: callback for filling a HW descriptor with the frame info
+ *
+ * Internal abstraction for placing @n XDP Tx frames on the HW XDPSQ. Used for
+ * all types of frames: ``XDP_TX``, .ndo_xdp_xmit(), XSk ``XDP_TX``, and XSk
+ * xmit.
+ * @prep must lock the queue as this function releases it at the end. @unroll
+ * greatly increases the object code size, but also greatly increases XSk xmit
+ * performance; for other types of frames, it's not enabled.
+ * The compilers inline all those onstack abstractions to direct data accesses.
+ *
+ * Return: number of frames actually placed on the queue, <= @n. The function
+ * can't fail, but can send less frames if there's no enough free descriptors
+ * available. The actual free space is returned by @prep from the driver.
+ */
+static __always_inline u32
+libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq,
+ u32 n, bool unroll, u64 priv,
+ u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq),
+ struct libeth_xdp_tx_desc
+ (*fill)(struct libeth_xdp_tx_frame frm, u32 i,
+ const struct libeth_xdpsq *sq, u64 priv),
+ void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i,
+ const struct libeth_xdpsq *sq, u64 priv))
+{
+ struct libeth_xdpsq sq __uninitialized;
+ u32 this, batched, off = 0;
+ u32 ntu, i = 0;
+
+ n = min(n, prep(xdpsq, &sq));
+ if (unlikely(!n))
+ goto unlock;
+
+ ntu = *sq.ntu;
+
+ this = sq.count - ntu;
+ if (likely(this > n))
+ this = n;
+
+again:
+ if (!unroll)
+ goto linear;
+
+ batched = ALIGN_DOWN(this, LIBETH_XDP_TX_BATCH);
+
+ for ( ; i < off + batched; i += LIBETH_XDP_TX_BATCH) {
+ u32 base = ntu + i - off;
+
+ unrolled_count(LIBETH_XDP_TX_BATCH)
+ for (u32 j = 0; j < LIBETH_XDP_TX_BATCH; j++)
+ xmit(fill(bulk[i + j], base + j, &sq, priv),
+ base + j, &sq, priv);
+ }
+
+ if (batched < this) {
+linear:
+ for ( ; i < off + this; i++)
+ xmit(fill(bulk[i], ntu + i - off, &sq, priv),
+ ntu + i - off, &sq, priv);
+ }
+
+ ntu += this;
+ if (likely(ntu < sq.count))
+ goto out;
+
+ ntu = 0;
+
+ if (i < n) {
+ this = n - i;
+ off = i;
+
+ goto again;
+ }
+
+out:
+ *sq.ntu = ntu;
+ *sq.pending += n;
+ if (sq.xdp_tx)
+ *sq.xdp_tx += n;
+
+unlock:
+ libeth_xdpsq_unlock(sq.lock);
+
+ return n;
+}
+
+/* ``XDP_TX`` bulking */
+
+void libeth_xdp_return_buff_slow(struct libeth_xdp_buff *xdp);
+
+/**
+ * libeth_xdp_tx_queue_head - internal helper for queueing one ``XDP_TX`` head
+ * @bq: XDP Tx bulk to queue the head frag to
+ * @xdp: XDP buffer with the head to queue
+ *
+ * Return: false if it's the only frag of the frame, true if it's an S/G frame.
+ */
+static inline bool libeth_xdp_tx_queue_head(struct libeth_xdp_tx_bulk *bq,
+ const struct libeth_xdp_buff *xdp)
+{
+ const struct xdp_buff *base = &xdp->base;
+
+ bq->bulk[bq->count++] = (typeof(*bq->bulk)){
+ .data = xdp->data,
+ .len_fl = (base->data_end - xdp->data) | LIBETH_XDP_TX_FIRST,
+ .soff = xdp_data_hard_end(base) - xdp->data,
+ };
+
+ if (!xdp_buff_has_frags(base))
+ return false;
+
+ bq->bulk[bq->count - 1].len_fl |= LIBETH_XDP_TX_MULTI;
+
+ return true;
+}
+
+/**
+ * libeth_xdp_tx_queue_frag - internal helper for queueing one ``XDP_TX`` frag
+ * @bq: XDP Tx bulk to queue the frag to
+ * @frag: frag to queue
+ */
+static inline void libeth_xdp_tx_queue_frag(struct libeth_xdp_tx_bulk *bq,
+ const skb_frag_t *frag)
+{
+ bq->bulk[bq->count++].frag = *frag;
+}
+
+/**
+ * libeth_xdp_tx_queue_bulk - internal helper for queueing one ``XDP_TX`` frame
+ * @bq: XDP Tx bulk to queue the frame to
+ * @xdp: XDP buffer to queue
+ * @flush_bulk: driver callback to flush the bulk to the HW queue
+ *
+ * Return: true on success, false on flush error.
+ */
+static __always_inline bool
+libeth_xdp_tx_queue_bulk(struct libeth_xdp_tx_bulk *bq,
+ struct libeth_xdp_buff *xdp,
+ bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq,
+ u32 flags))
+{
+ const struct skb_shared_info *sinfo;
+ bool ret = true;
+ u32 nr_frags;
+
+ if (unlikely(bq->count == LIBETH_XDP_TX_BULK) &&
+ unlikely(!flush_bulk(bq, 0))) {
+ libeth_xdp_return_buff_slow(xdp);
+ return false;
+ }
+
+ if (!libeth_xdp_tx_queue_head(bq, xdp))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_buff(&xdp->base);
+ nr_frags = sinfo->nr_frags;
+
+ for (u32 i = 0; i < nr_frags; i++) {
+ if (unlikely(bq->count == LIBETH_XDP_TX_BULK) &&
+ unlikely(!flush_bulk(bq, 0))) {
+ ret = false;
+ break;
+ }
+
+ libeth_xdp_tx_queue_frag(bq, &sinfo->frags[i]);
+ }
+
+out:
+ bq->bulk[bq->count - 1].len_fl |= LIBETH_XDP_TX_LAST;
+ xdp->data = NULL;
+
+ return ret;
+}
+
+/**
+ * libeth_xdp_tx_fill_stats - fill &libeth_sqe with ``XDP_TX`` frame stats
+ * @sqe: SQ element to fill
+ * @desc: libeth_xdp Tx descriptor
+ * @sinfo: &skb_shared_info for this frame
+ *
+ * Internal helper for filling an SQE with the frame stats, do not use in
+ * drivers. Fills the number of frags and bytes for this frame.
+ */
+#define libeth_xdp_tx_fill_stats(sqe, desc, sinfo) \
+ __libeth_xdp_tx_fill_stats(sqe, desc, sinfo, __UNIQUE_ID(sqe_), \
+ __UNIQUE_ID(desc_), __UNIQUE_ID(sinfo_))
+
+#define __libeth_xdp_tx_fill_stats(sqe, desc, sinfo, ue, ud, us) do { \
+ const struct libeth_xdp_tx_desc *ud = (desc); \
+ const struct skb_shared_info *us; \
+ struct libeth_sqe *ue = (sqe); \
+ \
+ ue->nr_frags = 1; \
+ ue->bytes = ud->len; \
+ \
+ if (ud->flags & LIBETH_XDP_TX_MULTI) { \
+ us = (sinfo); \
+ ue->nr_frags += us->nr_frags; \
+ ue->bytes += us->xdp_frags_size; \
+ } \
+} while (0)
+
+/**
+ * libeth_xdp_tx_fill_buf - internal helper to fill one ``XDP_TX`` &libeth_sqe
+ * @frm: XDP Tx frame from the bulk
+ * @i: index on the HW queue
+ * @sq: XDPSQ abstraction for the queue
+ * @priv: private data
+ *
+ * Return: XDP Tx descriptor with the synced DMA and other info to pass to
+ * the driver callback.
+ */
+static inline struct libeth_xdp_tx_desc
+libeth_xdp_tx_fill_buf(struct libeth_xdp_tx_frame frm, u32 i,
+ const struct libeth_xdpsq *sq, u64 priv)
+{
+ struct libeth_xdp_tx_desc desc;
+ struct skb_shared_info *sinfo;
+ skb_frag_t *frag = &frm.frag;
+ struct libeth_sqe *sqe;
+ netmem_ref netmem;
+
+ if (frm.len_fl & LIBETH_XDP_TX_FIRST) {
+ sinfo = frm.data + frm.soff;
+ skb_frag_fill_netmem_desc(frag, virt_to_netmem(frm.data),
+ offset_in_page(frm.data),
+ frm.len_fl);
+ } else {
+ sinfo = NULL;
+ }
+
+ netmem = skb_frag_netmem(frag);
+ desc = (typeof(desc)){
+ .addr = page_pool_get_dma_addr_netmem(netmem) +
+ skb_frag_off(frag),
+ .len = skb_frag_size(frag) & LIBETH_XDP_TX_LEN,
+ .flags = skb_frag_size(frag) & LIBETH_XDP_TX_FLAGS,
+ };
+
+ dma_sync_single_for_device(__netmem_get_pp(netmem)->p.dev, desc.addr,
+ desc.len, DMA_BIDIRECTIONAL);
+
+ if (!sinfo)
+ return desc;
+
+ sqe = &sq->sqes[i];
+ sqe->type = LIBETH_SQE_XDP_TX;
+ sqe->sinfo = sinfo;
+ libeth_xdp_tx_fill_stats(sqe, &desc, sinfo);
+
+ return desc;
+}
+
+void libeth_xdp_tx_exception(struct libeth_xdp_tx_bulk *bq, u32 sent,
+ u32 flags);
+
+/**
+ * __libeth_xdp_tx_flush_bulk - internal helper to flush one XDP Tx bulk
+ * @bq: bulk to flush
+ * @flags: XDP TX flags (.ndo_xdp_xmit(), XSk etc.)
+ * @prep: driver-specific callback to prepare the queue for sending
+ * @fill: libeth_xdp callback to fill &libeth_sqe and &libeth_xdp_tx_desc
+ * @xmit: driver callback to fill a HW descriptor
+ *
+ * Internal abstraction to create bulk flush functions for drivers. Used for
+ * everything except XSk xmit.
+ *
+ * Return: true if anything was sent, false otherwise.
+ */
+static __always_inline bool
+__libeth_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags,
+ u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq),
+ struct libeth_xdp_tx_desc
+ (*fill)(struct libeth_xdp_tx_frame frm, u32 i,
+ const struct libeth_xdpsq *sq, u64 priv),
+ void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i,
+ const struct libeth_xdpsq *sq,
+ u64 priv))
+{
+ u32 sent, drops;
+ int err = 0;
+
+ sent = libeth_xdp_tx_xmit_bulk(bq->bulk, bq->xdpsq,
+ min(bq->count, LIBETH_XDP_TX_BULK),
+ false, 0, prep, fill, xmit);
+ drops = bq->count - sent;
+
+ if (unlikely(drops)) {
+ libeth_xdp_tx_exception(bq, sent, flags);
+ err = -ENXIO;
+ } else {
+ bq->count = 0;
+ }
+
+ trace_xdp_bulk_tx(bq->dev, sent, drops, err);
+
+ return likely(sent);
+}
+
+/**
+ * libeth_xdp_tx_flush_bulk - wrapper to define flush of one ``XDP_TX`` bulk
+ * @bq: bulk to flush
+ * @flags: Tx flags, see above
+ * @prep: driver callback to prepare the queue
+ * @xmit: driver callback to fill a HW descriptor
+ *
+ * Use via LIBETH_XDP_DEFINE_FLUSH_TX() to define an ``XDP_TX`` driver
+ * callback.
+ */
+#define libeth_xdp_tx_flush_bulk(bq, flags, prep, xmit) \
+ __libeth_xdp_tx_flush_bulk(bq, flags, prep, libeth_xdp_tx_fill_buf, \
+ xmit)
+
+/* .ndo_xdp_xmit() implementation */
+
+/**
+ * libeth_xdp_xmit_init_bulk - internal helper to initialize bulk for XDP xmit
+ * @bq: bulk to initialize
+ * @dev: target &net_device
+ * @xdpsqs: array of driver-specific XDPSQ structs
+ * @num: number of active XDPSQs (the above array length)
+ */
+#define libeth_xdp_xmit_init_bulk(bq, dev, xdpsqs, num) \
+ __libeth_xdp_xmit_init_bulk(bq, dev, (xdpsqs)[libeth_xdpsq_id(num)])
+
+static inline void __libeth_xdp_xmit_init_bulk(struct libeth_xdp_tx_bulk *bq,
+ struct net_device *dev,
+ void *xdpsq)
+{
+ bq->dev = dev;
+ bq->xdpsq = xdpsq;
+ bq->count = 0;
+}
+
+/**
+ * libeth_xdp_xmit_frame_dma - internal helper to access DMA of an &xdp_frame
+ * @xf: pointer to the XDP frame
+ *
+ * There's no place in &libeth_xdp_tx_frame to store DMA address for an
+ * &xdp_frame head. The headroom is used then, the address is placed right
+ * after the frame struct, naturally aligned.
+ *
+ * Return: pointer to the DMA address to use.
+ */
+#define libeth_xdp_xmit_frame_dma(xf) \
+ _Generic((xf), \
+ const struct xdp_frame *: \
+ (const dma_addr_t *)__libeth_xdp_xmit_frame_dma(xf), \
+ struct xdp_frame *: \
+ (dma_addr_t *)__libeth_xdp_xmit_frame_dma(xf) \
+ )
+
+static inline void *__libeth_xdp_xmit_frame_dma(const struct xdp_frame *xdpf)
+{
+ void *addr = (void *)(xdpf + 1);
+
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
+ __alignof(*xdpf) < sizeof(dma_addr_t))
+ addr = PTR_ALIGN(addr, sizeof(dma_addr_t));
+
+ return addr;
+}
+
+/**
+ * libeth_xdp_xmit_queue_head - internal helper for queueing one XDP xmit head
+ * @bq: XDP Tx bulk to queue the head frag to
+ * @xdpf: XDP frame with the head to queue
+ * @dev: device to perform DMA mapping
+ *
+ * Return: ``LIBETH_XDP_DROP`` on DMA mapping error,
+ * ``LIBETH_XDP_PASS`` if it's the only frag in the frame,
+ * ``LIBETH_XDP_TX`` if it's an S/G frame.
+ */
+static inline u32 libeth_xdp_xmit_queue_head(struct libeth_xdp_tx_bulk *bq,
+ struct xdp_frame *xdpf,
+ struct device *dev)
+{
+ dma_addr_t dma;
+
+ dma = dma_map_single(dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma))
+ return LIBETH_XDP_DROP;
+
+ *libeth_xdp_xmit_frame_dma(xdpf) = dma;
+
+ bq->bulk[bq->count++] = (typeof(*bq->bulk)){
+ .xdpf = xdpf,
+ __libeth_xdp_tx_len(xdpf->len, LIBETH_XDP_TX_FIRST),
+ };
+
+ if (!xdp_frame_has_frags(xdpf))
+ return LIBETH_XDP_PASS;
+
+ bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_MULTI;
+
+ return LIBETH_XDP_TX;
+}
+
+/**
+ * libeth_xdp_xmit_queue_frag - internal helper for queueing one XDP xmit frag
+ * @bq: XDP Tx bulk to queue the frag to
+ * @frag: frag to queue
+ * @dev: device to perform DMA mapping
+ *
+ * Return: true on success, false on DMA mapping error.
+ */
+static inline bool libeth_xdp_xmit_queue_frag(struct libeth_xdp_tx_bulk *bq,
+ const skb_frag_t *frag,
+ struct device *dev)
+{
+ dma_addr_t dma;
+
+ dma = skb_frag_dma_map(dev, frag);
+ if (dma_mapping_error(dev, dma))
+ return false;
+
+ bq->bulk[bq->count++] = (typeof(*bq->bulk)){
+ .dma = dma,
+ __libeth_xdp_tx_len(skb_frag_size(frag)),
+ };
+
+ return true;
+}
+
+/**
+ * libeth_xdp_xmit_queue_bulk - internal helper for queueing one XDP xmit frame
+ * @bq: XDP Tx bulk to queue the frame to
+ * @xdpf: XDP frame to queue
+ * @flush_bulk: driver callback to flush the bulk to the HW queue
+ *
+ * Return: ``LIBETH_XDP_TX`` on success,
+ * ``LIBETH_XDP_DROP`` if the frame should be dropped by the stack,
+ * ``LIBETH_XDP_ABORTED`` if the frame will be dropped by libeth_xdp.
+ */
+static __always_inline u32
+libeth_xdp_xmit_queue_bulk(struct libeth_xdp_tx_bulk *bq,
+ struct xdp_frame *xdpf,
+ bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq,
+ u32 flags))
+{
+ u32 head, nr_frags, i, ret = LIBETH_XDP_TX;
+ struct device *dev = bq->dev->dev.parent;
+ const struct skb_shared_info *sinfo;
+
+ if (unlikely(bq->count == LIBETH_XDP_TX_BULK) &&
+ unlikely(!flush_bulk(bq, LIBETH_XDP_TX_NDO)))
+ return LIBETH_XDP_DROP;
+
+ head = libeth_xdp_xmit_queue_head(bq, xdpf, dev);
+ if (head == LIBETH_XDP_PASS)
+ goto out;
+ else if (head == LIBETH_XDP_DROP)
+ return LIBETH_XDP_DROP;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ nr_frags = sinfo->nr_frags;
+
+ for (i = 0; i < nr_frags; i++) {
+ if (unlikely(bq->count == LIBETH_XDP_TX_BULK) &&
+ unlikely(!flush_bulk(bq, LIBETH_XDP_TX_NDO)))
+ break;
+
+ if (!libeth_xdp_xmit_queue_frag(bq, &sinfo->frags[i], dev))
+ break;
+ }
+
+ if (unlikely(i < nr_frags))
+ ret = LIBETH_XDP_ABORTED;
+
+out:
+ bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_LAST;
+
+ return ret;
+}
+
+/**
+ * libeth_xdp_xmit_fill_buf - internal helper to fill one XDP xmit &libeth_sqe
+ * @frm: XDP Tx frame from the bulk
+ * @i: index on the HW queue
+ * @sq: XDPSQ abstraction for the queue
+ * @priv: private data
+ *
+ * Return: XDP Tx descriptor with the mapped DMA and other info to pass to
+ * the driver callback.
+ */
+static inline struct libeth_xdp_tx_desc
+libeth_xdp_xmit_fill_buf(struct libeth_xdp_tx_frame frm, u32 i,
+ const struct libeth_xdpsq *sq, u64 priv)
+{
+ struct libeth_xdp_tx_desc desc;
+ struct libeth_sqe *sqe;
+ struct xdp_frame *xdpf;
+
+ if (frm.flags & LIBETH_XDP_TX_FIRST) {
+ xdpf = frm.xdpf;
+ desc.addr = *libeth_xdp_xmit_frame_dma(xdpf);
+ } else {
+ xdpf = NULL;
+ desc.addr = frm.dma;
+ }
+ desc.opts = frm.opts;
+
+ sqe = &sq->sqes[i];
+ dma_unmap_addr_set(sqe, dma, desc.addr);
+ dma_unmap_len_set(sqe, len, desc.len);
+
+ if (!xdpf) {
+ sqe->type = LIBETH_SQE_XDP_XMIT_FRAG;
+ return desc;
+ }
+
+ sqe->type = LIBETH_SQE_XDP_XMIT;
+ sqe->xdpf = xdpf;
+ libeth_xdp_tx_fill_stats(sqe, &desc,
+ xdp_get_shared_info_from_frame(xdpf));
+
+ return desc;
+}
+
+/**
+ * libeth_xdp_xmit_flush_bulk - wrapper to define flush of one XDP xmit bulk
+ * @bq: bulk to flush
+ * @flags: Tx flags, see __libeth_xdp_tx_flush_bulk()
+ * @prep: driver callback to prepare the queue
+ * @xmit: driver callback to fill a HW descriptor
+ *
+ * Use via LIBETH_XDP_DEFINE_FLUSH_XMIT() to define an XDP xmit driver
+ * callback.
+ */
+#define libeth_xdp_xmit_flush_bulk(bq, flags, prep, xmit) \
+ __libeth_xdp_tx_flush_bulk(bq, (flags) | LIBETH_XDP_TX_NDO, prep, \
+ libeth_xdp_xmit_fill_buf, xmit)
+
+u32 libeth_xdp_xmit_return_bulk(const struct libeth_xdp_tx_frame *bq,
+ u32 count, const struct net_device *dev);
+
+/**
+ * __libeth_xdp_xmit_do_bulk - internal function to implement .ndo_xdp_xmit()
+ * @bq: XDP Tx bulk to queue frames to
+ * @frames: XDP frames passed by the stack
+ * @n: number of frames
+ * @flags: flags passed by the stack
+ * @flush_bulk: driver callback to flush an XDP xmit bulk
+ * @finalize: driver callback to finalize sending XDP Tx frames on the queue
+ *
+ * Perform common checks, map the frags and queue them to the bulk, then flush
+ * the bulk to the XDPSQ. If requested by the stack, finalize the queue.
+ *
+ * Return: number of frames send or -errno on error.
+ */
+static __always_inline int
+__libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq,
+ struct xdp_frame **frames, u32 n, u32 flags,
+ bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq,
+ u32 flags),
+ void (*finalize)(void *xdpsq, bool sent, bool flush))
+{
+ u32 nxmit = 0;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ for (u32 i = 0; likely(i < n); i++) {
+ u32 ret;
+
+ ret = libeth_xdp_xmit_queue_bulk(bq, frames[i], flush_bulk);
+ if (unlikely(ret != LIBETH_XDP_TX)) {
+ nxmit += ret == LIBETH_XDP_ABORTED;
+ break;
+ }
+
+ nxmit++;
+ }
+
+ if (bq->count) {
+ flush_bulk(bq, LIBETH_XDP_TX_NDO);
+ if (unlikely(bq->count))
+ nxmit -= libeth_xdp_xmit_return_bulk(bq->bulk,
+ bq->count,
+ bq->dev);
+ }
+
+ finalize(bq->xdpsq, nxmit, flags & XDP_XMIT_FLUSH);
+
+ return nxmit;
+}
+
+/**
+ * libeth_xdp_xmit_do_bulk - implement full .ndo_xdp_xmit() in driver
+ * @dev: target &net_device
+ * @n: number of frames to send
+ * @fr: XDP frames to send
+ * @f: flags passed by the stack
+ * @xqs: array of XDPSQs driver structs
+ * @nqs: number of active XDPSQs, the above array length
+ * @fl: driver callback to flush an XDP xmit bulk
+ * @fin: driver cabback to finalize the queue
+ *
+ * If the driver has active XDPSQs, perform common checks and send the frames.
+ * Finalize the queue, if requested.
+ *
+ * Return: number of frames sent or -errno on error.
+ */
+#define libeth_xdp_xmit_do_bulk(dev, n, fr, f, xqs, nqs, fl, fin) \
+ _libeth_xdp_xmit_do_bulk(dev, n, fr, f, xqs, nqs, fl, fin, \
+ __UNIQUE_ID(bq_), __UNIQUE_ID(ret_), \
+ __UNIQUE_ID(nqs_))
+
+#define _libeth_xdp_xmit_do_bulk(d, n, fr, f, xqs, nqs, fl, fin, ub, ur, un) \
+({ \
+ u32 un = (nqs); \
+ int ur; \
+ \
+ if (likely(un)) { \
+ LIBETH_XDP_ONSTACK_BULK(ub); \
+ \
+ libeth_xdp_xmit_init_bulk(&ub, d, xqs, un); \
+ ur = __libeth_xdp_xmit_do_bulk(&ub, fr, n, f, fl, fin); \
+ } else { \
+ ur = -ENXIO; \
+ } \
+ \
+ ur; \
+})
+
+/* Rx polling path */
+
+/**
+ * libeth_xdp_tx_init_bulk - initialize an XDP Tx bulk for Rx NAPI poll
+ * @bq: bulk to initialize
+ * @prog: RCU pointer to the XDP program (can be %NULL)
+ * @dev: target &net_device
+ * @xdpsqs: array of driver XDPSQ structs
+ * @num: number of active XDPSQs, the above array length
+ *
+ * Should be called on an onstack XDP Tx bulk before the NAPI polling loop.
+ * Initializes all the needed fields to run libeth_xdp functions. If @num == 0,
+ * assumes XDP is not enabled.
+ * Do not use for XSk, it has its own optimized helper.
+ */
+#define libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num) \
+ __libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num, false, \
+ __UNIQUE_ID(bq_), __UNIQUE_ID(nqs_))
+
+#define __libeth_xdp_tx_init_bulk(bq, pr, d, xdpsqs, num, xsk, ub, un) do { \
+ typeof(bq) ub = (bq); \
+ u32 un = (num); \
+ \
+ rcu_read_lock(); \
+ \
+ if (un || (xsk)) { \
+ ub->prog = rcu_dereference(pr); \
+ ub->dev = (d); \
+ ub->xdpsq = (xdpsqs)[libeth_xdpsq_id(un)]; \
+ } else { \
+ ub->prog = NULL; \
+ } \
+ \
+ ub->act_mask = 0; \
+ ub->count = 0; \
+} while (0)
+
+void libeth_xdp_load_stash(struct libeth_xdp_buff *dst,
+ const struct libeth_xdp_buff_stash *src);
+void libeth_xdp_save_stash(struct libeth_xdp_buff_stash *dst,
+ const struct libeth_xdp_buff *src);
+void __libeth_xdp_return_stash(struct libeth_xdp_buff_stash *stash);
+
+/**
+ * libeth_xdp_init_buff - initialize a &libeth_xdp_buff for Rx NAPI poll
+ * @dst: onstack buffer to initialize
+ * @src: XDP buffer stash placed on the queue
+ * @rxq: registered &xdp_rxq_info corresponding to this queue
+ *
+ * Should be called before the main NAPI polling loop. Loads the content of
+ * the previously saved stash or initializes the buffer from scratch.
+ * Do not use for XSk.
+ */
+static inline void
+libeth_xdp_init_buff(struct libeth_xdp_buff *dst,
+ const struct libeth_xdp_buff_stash *src,
+ struct xdp_rxq_info *rxq)
+{
+ if (likely(!src->data))
+ dst->data = NULL;
+ else
+ libeth_xdp_load_stash(dst, src);
+
+ dst->base.rxq = rxq;
+}
+
+/**
+ * libeth_xdp_save_buff - save a partially built buffer on a queue
+ * @dst: XDP buffer stash placed on the queue
+ * @src: onstack buffer to save
+ *
+ * Should be called after the main NAPI polling loop. If the loop exited before
+ * the buffer was finished, saves its content on the queue, so that it can be
+ * completed during the next poll. Otherwise, clears the stash.
+ */
+static inline void libeth_xdp_save_buff(struct libeth_xdp_buff_stash *dst,
+ const struct libeth_xdp_buff *src)
+{
+ if (likely(!src->data))
+ dst->data = NULL;
+ else
+ libeth_xdp_save_stash(dst, src);
+}
+
+/**
+ * libeth_xdp_return_stash - free an XDP buffer stash from a queue
+ * @stash: stash to free
+ *
+ * If the queue is about to be destroyed, but it still has an incompleted
+ * buffer stash, this helper should be called to free it.
+ */
+static inline void libeth_xdp_return_stash(struct libeth_xdp_buff_stash *stash)
+{
+ if (stash->data)
+ __libeth_xdp_return_stash(stash);
+}
+
+static inline void libeth_xdp_return_va(const void *data, bool napi)
+{
+ netmem_ref netmem = virt_to_netmem(data);
+
+ page_pool_put_full_netmem(__netmem_get_pp(netmem), netmem, napi);
+}
+
+static inline void libeth_xdp_return_frags(const struct skb_shared_info *sinfo,
+ bool napi)
+{
+ for (u32 i = 0; i < sinfo->nr_frags; i++) {
+ netmem_ref netmem = skb_frag_netmem(&sinfo->frags[i]);
+
+ page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, napi);
+ }
+}
+
+/**
+ * libeth_xdp_return_buff - free/recycle &libeth_xdp_buff
+ * @xdp: buffer to free
+ *
+ * Hotpath helper to free &libeth_xdp_buff. Comparing to xdp_return_buff(),
+ * it's faster as it gets inlined and always assumes order-0 pages and safe
+ * direct recycling. Zeroes @xdp->data to avoid UAFs.
+ */
+#define libeth_xdp_return_buff(xdp) __libeth_xdp_return_buff(xdp, true)
+
+static inline void __libeth_xdp_return_buff(struct libeth_xdp_buff *xdp,
+ bool napi)
+{
+ if (!xdp_buff_has_frags(&xdp->base))
+ goto out;
+
+ libeth_xdp_return_frags(xdp_get_shared_info_from_buff(&xdp->base),
+ napi);
+
+out:
+ libeth_xdp_return_va(xdp->data, napi);
+ xdp->data = NULL;
+}
+
+bool libeth_xdp_buff_add_frag(struct libeth_xdp_buff *xdp,
+ const struct libeth_fqe *fqe,
+ u32 len);
+
+/**
+ * libeth_xdp_prepare_buff - fill &libeth_xdp_buff with head FQE data
+ * @xdp: XDP buffer to attach the head to
+ * @fqe: FQE containing the head buffer
+ * @len: buffer len passed from HW
+ *
+ * Internal, use libeth_xdp_process_buff() instead. Initializes XDP buffer
+ * head with the Rx buffer data: data pointer, length, headroom, and
+ * truesize/tailroom. Zeroes the flags.
+ * Uses faster single u64 write instead of per-field access.
+ */
+static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp,
+ const struct libeth_fqe *fqe,
+ u32 len)
+{
+ const struct page *page = __netmem_to_page(fqe->netmem);
+
+#ifdef __LIBETH_WORD_ACCESS
+ static_assert(offsetofend(typeof(xdp->base), flags) -
+ offsetof(typeof(xdp->base), frame_sz) ==
+ sizeof(u64));
+
+ *(u64 *)&xdp->base.frame_sz = fqe->truesize;
+#else
+ xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq);
+#endif
+ xdp_prepare_buff(&xdp->base, page_address(page) + fqe->offset,
+ page->pp->p.offset, len, true);
+}
+
+/**
+ * libeth_xdp_process_buff - attach Rx buffer to &libeth_xdp_buff
+ * @xdp: XDP buffer to attach the Rx buffer to
+ * @fqe: Rx buffer to process
+ * @len: received data length from the descriptor
+ *
+ * If the XDP buffer is empty, attaches the Rx buffer as head and initializes
+ * the required fields. Otherwise, attaches the buffer as a frag.
+ * Already performs DMA sync-for-CPU and frame start prefetch
+ * (for head buffers only).
+ *
+ * Return: true on success, false if the descriptor must be skipped (empty or
+ * no space for a new frag).
+ */
+static inline bool libeth_xdp_process_buff(struct libeth_xdp_buff *xdp,
+ const struct libeth_fqe *fqe,
+ u32 len)
+{
+ if (!libeth_rx_sync_for_cpu(fqe, len))
+ return false;
+
+ if (xdp->data)
+ return libeth_xdp_buff_add_frag(xdp, fqe, len);
+
+ libeth_xdp_prepare_buff(xdp, fqe, len);
+
+ prefetch(xdp->data);
+
+ return true;
+}
+
+/**
+ * libeth_xdp_buff_stats_frags - update onstack RQ stats with XDP frags info
+ * @ss: onstack stats to update
+ * @xdp: buffer to account
+ *
+ * Internal helper used by __libeth_xdp_run_pass(), do not call directly.
+ * Adds buffer's frags count and total len to the onstack stats.
+ */
+static inline void
+libeth_xdp_buff_stats_frags(struct libeth_rq_napi_stats *ss,
+ const struct libeth_xdp_buff *xdp)
+{
+ const struct skb_shared_info *sinfo;
+
+ sinfo = xdp_get_shared_info_from_buff(&xdp->base);
+ ss->bytes += sinfo->xdp_frags_size;
+ ss->fragments += sinfo->nr_frags + 1;
+}
+
+u32 libeth_xdp_prog_exception(const struct libeth_xdp_tx_bulk *bq,
+ struct libeth_xdp_buff *xdp,
+ enum xdp_action act, int ret);
+
+/**
+ * __libeth_xdp_run_prog - run XDP program on an XDP buffer
+ * @xdp: XDP buffer to run the prog on
+ * @bq: buffer bulk for ``XDP_TX`` queueing
+ *
+ * Internal inline abstraction to run XDP program. Handles ``XDP_DROP``
+ * and ``XDP_REDIRECT`` only, the rest is processed levels up.
+ * Reports an XDP prog exception on errors.
+ *
+ * Return: libeth_xdp prog verdict depending on the prog's verdict.
+ */
+static __always_inline u32
+__libeth_xdp_run_prog(struct libeth_xdp_buff *xdp,
+ const struct libeth_xdp_tx_bulk *bq)
+{
+ enum xdp_action act;
+
+ act = bpf_prog_run_xdp(bq->prog, &xdp->base);
+ if (unlikely(act < XDP_DROP || act > XDP_REDIRECT))
+ goto out;
+
+ switch (act) {
+ case XDP_PASS:
+ return LIBETH_XDP_PASS;
+ case XDP_DROP:
+ libeth_xdp_return_buff(xdp);
+
+ return LIBETH_XDP_DROP;
+ case XDP_TX:
+ return LIBETH_XDP_TX;
+ case XDP_REDIRECT:
+ if (unlikely(xdp_do_redirect(bq->dev, &xdp->base, bq->prog)))
+ break;
+
+ xdp->data = NULL;
+
+ return LIBETH_XDP_REDIRECT;
+ default:
+ break;
+ }
+
+out:
+ return libeth_xdp_prog_exception(bq, xdp, act, 0);
+}
+
+/**
+ * __libeth_xdp_run_flush - run XDP program and handle ``XDP_TX`` verdict
+ * @xdp: XDP buffer to run the prog on
+ * @bq: buffer bulk for ``XDP_TX`` queueing
+ * @run: internal callback for running XDP program
+ * @queue: internal callback for queuing ``XDP_TX`` frame
+ * @flush_bulk: driver callback for flushing a bulk
+ *
+ * Internal inline abstraction to run XDP program and additionally handle
+ * ``XDP_TX`` verdict. Used by both XDP and XSk, hence @run and @queue.
+ * Do not use directly.
+ *
+ * Return: libeth_xdp prog verdict depending on the prog's verdict.
+ */
+static __always_inline u32
+__libeth_xdp_run_flush(struct libeth_xdp_buff *xdp,
+ struct libeth_xdp_tx_bulk *bq,
+ u32 (*run)(struct libeth_xdp_buff *xdp,
+ const struct libeth_xdp_tx_bulk *bq),
+ bool (*queue)(struct libeth_xdp_tx_bulk *bq,
+ struct libeth_xdp_buff *xdp,
+ bool (*flush_bulk)
+ (struct libeth_xdp_tx_bulk *bq,
+ u32 flags)),
+ bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq,
+ u32 flags))
+{
+ u32 act;
+
+ act = run(xdp, bq);
+ if (act == LIBETH_XDP_TX && unlikely(!queue(bq, xdp, flush_bulk)))
+ act = LIBETH_XDP_DROP;
+
+ bq->act_mask |= act;
+
+ return act;
+}
+
+/**
+ * libeth_xdp_run_prog - run XDP program (non-XSk path) and handle all verdicts
+ * @xdp: XDP buffer to process
+ * @bq: XDP Tx bulk to queue ``XDP_TX`` buffers
+ * @fl: driver ``XDP_TX`` bulk flush callback
+ *
+ * Run the attached XDP program and handle all possible verdicts. XSk has its
+ * own version.
+ * Prefer using it via LIBETH_XDP_DEFINE_RUN{,_PASS,_PROG}().
+ *
+ * Return: true if the buffer should be passed up the stack, false if the poll
+ * should go to the next buffer.
+ */
+#define libeth_xdp_run_prog(xdp, bq, fl) \
+ (__libeth_xdp_run_flush(xdp, bq, __libeth_xdp_run_prog, \
+ libeth_xdp_tx_queue_bulk, \
+ fl) == LIBETH_XDP_PASS)
+
+/**
+ * __libeth_xdp_run_pass - helper to run XDP program and handle the result
+ * @xdp: XDP buffer to process
+ * @bq: XDP Tx bulk to queue ``XDP_TX`` frames
+ * @napi: NAPI to build an skb and pass it up the stack
+ * @rs: onstack libeth RQ stats
+ * @md: metadata that should be filled to the XDP buffer
+ * @prep: callback for filling the metadata
+ * @run: driver wrapper to run XDP program
+ * @populate: driver callback to populate an skb with the HW descriptor data
+ *
+ * Inline abstraction that does the following (non-XSk path):
+ * 1) adds frame size and frag number (if needed) to the onstack stats;
+ * 2) fills the descriptor metadata to the onstack &libeth_xdp_buff
+ * 3) runs XDP program if present;
+ * 4) handles all possible verdicts;
+ * 5) on ``XDP_PASS`, builds an skb from the buffer;
+ * 6) populates it with the descriptor metadata;
+ * 7) passes it up the stack.
+ *
+ * In most cases, number 2 means just writing the pointer to the HW descriptor
+ * to the XDP buffer. If so, please use LIBETH_XDP_DEFINE_RUN{,_PASS}()
+ * wrappers to build a driver function.
+ */
+static __always_inline void
+__libeth_xdp_run_pass(struct libeth_xdp_buff *xdp,
+ struct libeth_xdp_tx_bulk *bq, struct napi_struct *napi,
+ struct libeth_rq_napi_stats *rs, const void *md,
+ void (*prep)(struct libeth_xdp_buff *xdp,
+ const void *md),
+ bool (*run)(struct libeth_xdp_buff *xdp,
+ struct libeth_xdp_tx_bulk *bq),
+ bool (*populate)(struct sk_buff *skb,
+ const struct libeth_xdp_buff *xdp,
+ struct libeth_rq_napi_stats *rs))
+{
+ struct sk_buff *skb;
+
+ rs->bytes += xdp->base.data_end - xdp->data;
+ rs->packets++;
+
+ if (xdp_buff_has_frags(&xdp->base))
+ libeth_xdp_buff_stats_frags(rs, xdp);
+
+ if (prep && (!__builtin_constant_p(!!md) || md))
+ prep(xdp, md);
+
+ if (!bq || !run || !bq->prog)
+ goto build;
+
+ if (!run(xdp, bq))
+ return;
+
+build:
+ skb = xdp_build_skb_from_buff(&xdp->base);
+ if (unlikely(!skb)) {
+ libeth_xdp_return_buff_slow(xdp);
+ return;
+ }
+
+ xdp->data = NULL;
+
+ if (unlikely(!populate(skb, xdp, rs))) {
+ napi_consume_skb(skb, true);
+ return;
+ }
+
+ napi_gro_receive(napi, skb);
+}
+
+static inline void libeth_xdp_prep_desc(struct libeth_xdp_buff *xdp,
+ const void *desc)
+{
+ xdp->desc = desc;
+}
+
+/**
+ * libeth_xdp_run_pass - helper to run XDP program and handle the result
+ * @xdp: XDP buffer to process
+ * @bq: XDP Tx bulk to queue ``XDP_TX`` frames
+ * @napi: NAPI to build an skb and pass it up the stack
+ * @ss: onstack libeth RQ stats
+ * @desc: pointer to the HW descriptor for that frame
+ * @run: driver wrapper to run XDP program
+ * @populate: driver callback to populate an skb with the HW descriptor data
+ *
+ * Wrapper around the underscored version when "fill the descriptor metadata"
+ * means just writing the pointer to the HW descriptor as @xdp->desc.
+ */
+#define libeth_xdp_run_pass(xdp, bq, napi, ss, desc, run, populate) \
+ __libeth_xdp_run_pass(xdp, bq, napi, ss, desc, libeth_xdp_prep_desc, \
+ run, populate)
+
+/**
+ * libeth_xdp_finalize_rx - finalize XDPSQ after a NAPI polling loop (non-XSk)
+ * @bq: ``XDP_TX`` frame bulk
+ * @flush: driver callback to flush the bulk
+ * @finalize: driver callback to start sending the frames and run the timer
+ *
+ * Flush the bulk if there are frames left to send, kick the queue and flush
+ * the XDP maps.
+ */
+#define libeth_xdp_finalize_rx(bq, flush, finalize) \
+ __libeth_xdp_finalize_rx(bq, 0, flush, finalize)
+
+static __always_inline void
+__libeth_xdp_finalize_rx(struct libeth_xdp_tx_bulk *bq, u32 flags,
+ bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq,
+ u32 flags),
+ void (*finalize)(void *xdpsq, bool sent, bool flush))
+{
+ if (bq->act_mask & LIBETH_XDP_TX) {
+ if (bq->count)
+ flush_bulk(bq, flags | LIBETH_XDP_TX_DROP);
+ finalize(bq->xdpsq, true, true);
+ }
+ if (bq->act_mask & LIBETH_XDP_REDIRECT)
+ xdp_do_flush();
+
+ rcu_read_unlock();
+}
+
+/*
+ * Helpers to reduce boilerplate code in drivers.
+ *
+ * Typical driver Rx flow would be (excl. bulk and buff init, frag attach):
+ *
+ * LIBETH_XDP_DEFINE_START();
+ * LIBETH_XDP_DEFINE_FLUSH_TX(static driver_xdp_flush_tx, driver_xdp_tx_prep,
+ * driver_xdp_xmit);
+ * LIBETH_XDP_DEFINE_RUN(static driver_xdp_run, driver_xdp_run_prog,
+ * driver_xdp_flush_tx, driver_populate_skb);
+ * LIBETH_XDP_DEFINE_FINALIZE(static driver_xdp_finalize_rx,
+ * driver_xdp_flush_tx, driver_xdp_finalize_sq);
+ * LIBETH_XDP_DEFINE_END();
+ *
+ * This will build a set of 4 static functions. The compiler is free to decide
+ * whether to inline them.
+ * Then, in the NAPI polling function:
+ *
+ * while (packets < budget) {
+ * // ...
+ * driver_xdp_run(xdp, &bq, napi, &rs, desc);
+ * }
+ * driver_xdp_finalize_rx(&bq);
+ */
+
+#define LIBETH_XDP_DEFINE_START() \
+ __diag_push(); \
+ __diag_ignore(GCC, 8, "-Wold-style-declaration", \
+ "Allow specifying \'static\' after the return type")
+
+/**
+ * LIBETH_XDP_DEFINE_TIMER - define a driver XDPSQ cleanup timer callback
+ * @name: name of the function to define
+ * @poll: Tx polling/completion function
+ */
+#define LIBETH_XDP_DEFINE_TIMER(name, poll) \
+void name(struct work_struct *work) \
+{ \
+ libeth_xdpsq_run_timer(work, poll); \
+}
+
+/**
+ * LIBETH_XDP_DEFINE_FLUSH_TX - define a driver ``XDP_TX`` bulk flush function
+ * @name: name of the function to define
+ * @prep: driver callback to clean an XDPSQ
+ * @xmit: driver callback to write a HW Tx descriptor
+ */
+#define LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit) \
+ __LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit, xdp)
+
+#define __LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit, pfx) \
+bool name(struct libeth_xdp_tx_bulk *bq, u32 flags) \
+{ \
+ return libeth_##pfx##_tx_flush_bulk(bq, flags, prep, xmit); \
+}
+
+/**
+ * LIBETH_XDP_DEFINE_FLUSH_XMIT - define a driver XDP xmit bulk flush function
+ * @name: name of the function to define
+ * @prep: driver callback to clean an XDPSQ
+ * @xmit: driver callback to write a HW Tx descriptor
+ */
+#define LIBETH_XDP_DEFINE_FLUSH_XMIT(name, prep, xmit) \
+bool name(struct libeth_xdp_tx_bulk *bq, u32 flags) \
+{ \
+ return libeth_xdp_xmit_flush_bulk(bq, flags, prep, xmit); \
+}
+
+/**
+ * LIBETH_XDP_DEFINE_RUN_PROG - define a driver XDP program run function
+ * @name: name of the function to define
+ * @flush: driver callback to flush an ``XDP_TX`` bulk
+ */
+#define LIBETH_XDP_DEFINE_RUN_PROG(name, flush) \
+ bool __LIBETH_XDP_DEFINE_RUN_PROG(name, flush, xdp)
+
+#define __LIBETH_XDP_DEFINE_RUN_PROG(name, flush, pfx) \
+name(struct libeth_xdp_buff *xdp, struct libeth_xdp_tx_bulk *bq) \
+{ \
+ return libeth_##pfx##_run_prog(xdp, bq, flush); \
+}
+
+/**
+ * LIBETH_XDP_DEFINE_RUN_PASS - define a driver buffer process + pass function
+ * @name: name of the function to define
+ * @run: driver callback to run XDP program (above)
+ * @populate: driver callback to fill an skb with HW descriptor info
+ */
+#define LIBETH_XDP_DEFINE_RUN_PASS(name, run, populate) \
+ void __LIBETH_XDP_DEFINE_RUN_PASS(name, run, populate, xdp)
+
+#define __LIBETH_XDP_DEFINE_RUN_PASS(name, run, populate, pfx) \
+name(struct libeth_xdp_buff *xdp, struct libeth_xdp_tx_bulk *bq, \
+ struct napi_struct *napi, struct libeth_rq_napi_stats *ss, \
+ const void *desc) \
+{ \
+ return libeth_##pfx##_run_pass(xdp, bq, napi, ss, desc, run, \
+ populate); \
+}
+
+/**
+ * LIBETH_XDP_DEFINE_RUN - define a driver buffer process, run + pass function
+ * @name: name of the function to define
+ * @run: name of the XDP prog run function to define
+ * @flush: driver callback to flush an ``XDP_TX`` bulk
+ * @populate: driver callback to fill an skb with HW descriptor info
+ */
+#define LIBETH_XDP_DEFINE_RUN(name, run, flush, populate) \
+ __LIBETH_XDP_DEFINE_RUN(name, run, flush, populate, XDP)
+
+#define __LIBETH_XDP_DEFINE_RUN(name, run, flush, populate, pfx) \
+ LIBETH_##pfx##_DEFINE_RUN_PROG(static run, flush); \
+ LIBETH_##pfx##_DEFINE_RUN_PASS(name, run, populate)
+
+/**
+ * LIBETH_XDP_DEFINE_FINALIZE - define a driver Rx NAPI poll finalize function
+ * @name: name of the function to define
+ * @flush: driver callback to flush an ``XDP_TX`` bulk
+ * @finalize: driver callback to finalize an XDPSQ and run the timer
+ */
+#define LIBETH_XDP_DEFINE_FINALIZE(name, flush, finalize) \
+ __LIBETH_XDP_DEFINE_FINALIZE(name, flush, finalize, xdp)
+
+#define __LIBETH_XDP_DEFINE_FINALIZE(name, flush, finalize, pfx) \
+void name(struct libeth_xdp_tx_bulk *bq) \
+{ \
+ libeth_##pfx##_finalize_rx(bq, flush, finalize); \
+}
+
+#define LIBETH_XDP_DEFINE_END() __diag_pop()
+
+/* XMO */
+
+/**
+ * libeth_xdp_buff_to_rq - get RQ pointer from an XDP buffer pointer
+ * @xdp: &libeth_xdp_buff corresponding to the queue
+ * @type: typeof() of the driver Rx queue structure
+ * @member: name of &xdp_rxq_info inside @type
+ *
+ * Often times, pointer to the RQ is needed when reading/filling metadata from
+ * HW descriptors. The helper can be used to quickly jump from an XDP buffer
+ * to the queue corresponding to its &xdp_rxq_info without introducing
+ * additional fields (&libeth_xdp_buff is precisely 1 cacheline long on x64).
+ */
+#define libeth_xdp_buff_to_rq(xdp, type, member) \
+ container_of_const((xdp)->base.rxq, type, member)
+
+/**
+ * libeth_xdpmo_rx_hash - convert &libeth_rx_pt to an XDP RSS hash metadata
+ * @hash: pointer to the variable to write the hash to
+ * @rss_type: pointer to the variable to write the hash type to
+ * @val: hash value from the HW descriptor
+ * @pt: libeth parsed packet type
+ *
+ * Handle zeroed/non-available hash and convert libeth parsed packet type to
+ * the corresponding XDP RSS hash type. To be called at the end of
+ * xdp_metadata_ops idpf_xdpmo::xmo_rx_hash() implementation.
+ * Note that if the driver doesn't use a constant packet type lookup table but
+ * generates it at runtime, it must call libeth_rx_pt_gen_hash_type(pt) to
+ * generate XDP RSS hash type for each packet type.
+ *
+ * Return: 0 on success, -ENODATA when the hash is not available.
+ */
+static inline int libeth_xdpmo_rx_hash(u32 *hash,
+ enum xdp_rss_hash_type *rss_type,
+ u32 val, struct libeth_rx_pt pt)
+{
+ if (unlikely(!val))
+ return -ENODATA;
+
+ *hash = val;
+ *rss_type = pt.hash_type;
+
+ return 0;
+}
+
+/* Tx buffer completion */
+
+void libeth_xdp_return_buff_bulk(const struct skb_shared_info *sinfo,
+ struct xdp_frame_bulk *bq, bool frags);
+void libeth_xsk_buff_free_slow(struct libeth_xdp_buff *xdp);
+
+/**
+ * __libeth_xdp_complete_tx - complete sent XDPSQE
+ * @sqe: SQ element / Tx buffer to complete
+ * @cp: Tx polling/completion params
+ * @bulk: internal callback to bulk-free ``XDP_TX`` buffers
+ * @xsk: internal callback to free XSk ``XDP_TX`` buffers
+ *
+ * Use the non-underscored version in drivers instead. This one is shared
+ * internally with libeth_tx_complete_any().
+ * Complete an XDPSQE of any type of XDP frame. This includes DMA unmapping
+ * when needed, buffer freeing, stats update, and SQE invalidation.
+ */
+static __always_inline void
+__libeth_xdp_complete_tx(struct libeth_sqe *sqe, struct libeth_cq_pp *cp,
+ typeof(libeth_xdp_return_buff_bulk) bulk,
+ typeof(libeth_xsk_buff_free_slow) xsk)
+{
+ enum libeth_sqe_type type = sqe->type;
+
+ switch (type) {
+ case LIBETH_SQE_EMPTY:
+ return;
+ case LIBETH_SQE_XDP_XMIT:
+ case LIBETH_SQE_XDP_XMIT_FRAG:
+ dma_unmap_page(cp->dev, dma_unmap_addr(sqe, dma),
+ dma_unmap_len(sqe, len), DMA_TO_DEVICE);
+ break;
+ default:
+ break;
+ }
+
+ switch (type) {
+ case LIBETH_SQE_XDP_TX:
+ bulk(sqe->sinfo, cp->bq, sqe->nr_frags != 1);
+ break;
+ case LIBETH_SQE_XDP_XMIT:
+ xdp_return_frame_bulk(sqe->xdpf, cp->bq);
+ break;
+ case LIBETH_SQE_XSK_TX:
+ case LIBETH_SQE_XSK_TX_FRAG:
+ xsk(sqe->xsk);
+ break;
+ default:
+ break;
+ }
+
+ switch (type) {
+ case LIBETH_SQE_XDP_TX:
+ case LIBETH_SQE_XDP_XMIT:
+ case LIBETH_SQE_XSK_TX:
+ cp->xdp_tx -= sqe->nr_frags;
+
+ cp->xss->packets++;
+ cp->xss->bytes += sqe->bytes;
+ break;
+ default:
+ break;
+ }
+
+ sqe->type = LIBETH_SQE_EMPTY;
+}
+
+static inline void libeth_xdp_complete_tx(struct libeth_sqe *sqe,
+ struct libeth_cq_pp *cp)
+{
+ __libeth_xdp_complete_tx(sqe, cp, libeth_xdp_return_buff_bulk,
+ libeth_xsk_buff_free_slow);
+}
+
+/* Misc */
+
+u32 libeth_xdp_queue_threshold(u32 count);
+
+void __libeth_xdp_set_features(struct net_device *dev,
+ const struct xdp_metadata_ops *xmo,
+ u32 zc_segs,
+ const struct xsk_tx_metadata_ops *tmo);
+void libeth_xdp_set_redirect(struct net_device *dev, bool enable);
+
+/**
+ * libeth_xdp_set_features - set XDP features for netdev
+ * @dev: &net_device to configure
+ * @...: optional params, see __libeth_xdp_set_features()
+ *
+ * Set all the features libeth_xdp supports, including .ndo_xdp_xmit(). That
+ * said, it should be used only when XDPSQs are always available regardless
+ * of whether an XDP prog is attached to @dev.
+ */
+#define libeth_xdp_set_features(dev, ...) \
+ CONCATENATE(__libeth_xdp_feat, \
+ COUNT_ARGS(__VA_ARGS__))(dev, ##__VA_ARGS__)
+
+#define __libeth_xdp_feat0(dev) \
+ __libeth_xdp_set_features(dev, NULL, 0, NULL)
+#define __libeth_xdp_feat1(dev, xmo) \
+ __libeth_xdp_set_features(dev, xmo, 0, NULL)
+#define __libeth_xdp_feat2(dev, xmo, zc_segs) \
+ __libeth_xdp_set_features(dev, xmo, zc_segs, NULL)
+#define __libeth_xdp_feat3(dev, xmo, zc_segs, tmo) \
+ __libeth_xdp_set_features(dev, xmo, zc_segs, tmo)
+
+/**
+ * libeth_xdp_set_features_noredir - enable all libeth_xdp features w/o redir
+ * @dev: target &net_device
+ * @...: optional params, see __libeth_xdp_set_features()
+ *
+ * Enable everything except the .ndo_xdp_xmit() feature, use when XDPSQs are
+ * not available right after netdev registration.
+ */
+#define libeth_xdp_set_features_noredir(dev, ...) \
+ __libeth_xdp_set_features_noredir(dev, __UNIQUE_ID(dev_), \
+ ##__VA_ARGS__)
+
+#define __libeth_xdp_set_features_noredir(dev, ud, ...) do { \
+ struct net_device *ud = (dev); \
+ \
+ libeth_xdp_set_features(ud, ##__VA_ARGS__); \
+ libeth_xdp_set_redirect(ud, false); \
+} while (0)
+
+#define libeth_xsktmo ((const void *)GOLDEN_RATIO_PRIME)
+
+#endif /* __LIBETH_XDP_H */
diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h
new file mode 100644
index 000000000000..481a7b28e6f2
--- /dev/null
+++ b/include/net/libeth/xsk.h
@@ -0,0 +1,685 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2025 Intel Corporation */
+
+#ifndef __LIBETH_XSK_H
+#define __LIBETH_XSK_H
+
+#include <net/libeth/xdp.h>
+#include <net/xdp_sock_drv.h>
+
+/* ``XDP_TXMD_FLAGS_VALID`` is defined only under ``CONFIG_XDP_SOCKETS`` */
+#ifdef XDP_TXMD_FLAGS_VALID
+static_assert(XDP_TXMD_FLAGS_VALID <= LIBETH_XDP_TX_XSKMD);
+#endif
+
+/* ``XDP_TX`` bulking */
+
+/**
+ * libeth_xsk_tx_queue_head - internal helper for queueing XSk ``XDP_TX`` head
+ * @bq: XDP Tx bulk to queue the head frag to
+ * @xdp: XSk buffer with the head to queue
+ *
+ * Return: false if it's the only frag of the frame, true if it's an S/G frame.
+ */
+static inline bool libeth_xsk_tx_queue_head(struct libeth_xdp_tx_bulk *bq,
+ struct libeth_xdp_buff *xdp)
+{
+ bq->bulk[bq->count++] = (typeof(*bq->bulk)){
+ .xsk = xdp,
+ __libeth_xdp_tx_len(xdp->base.data_end - xdp->data,
+ LIBETH_XDP_TX_FIRST),
+ };
+
+ if (likely(!xdp_buff_has_frags(&xdp->base)))
+ return false;
+
+ bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_MULTI;
+
+ return true;
+}
+
+/**
+ * libeth_xsk_tx_queue_frag - internal helper for queueing XSk ``XDP_TX`` frag
+ * @bq: XDP Tx bulk to queue the frag to
+ * @frag: XSk frag to queue
+ */
+static inline void libeth_xsk_tx_queue_frag(struct libeth_xdp_tx_bulk *bq,
+ struct libeth_xdp_buff *frag)
+{
+ bq->bulk[bq->count++] = (typeof(*bq->bulk)){
+ .xsk = frag,
+ __libeth_xdp_tx_len(frag->base.data_end - frag->data),
+ };
+}
+
+/**
+ * libeth_xsk_tx_queue_bulk - internal helper for queueing XSk ``XDP_TX`` frame
+ * @bq: XDP Tx bulk to queue the frame to
+ * @xdp: XSk buffer to queue
+ * @flush_bulk: driver callback to flush the bulk to the HW queue
+ *
+ * Return: true on success, false on flush error.
+ */
+static __always_inline bool
+libeth_xsk_tx_queue_bulk(struct libeth_xdp_tx_bulk *bq,
+ struct libeth_xdp_buff *xdp,
+ bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq,
+ u32 flags))
+{
+ bool ret = true;
+
+ if (unlikely(bq->count == LIBETH_XDP_TX_BULK) &&
+ unlikely(!flush_bulk(bq, LIBETH_XDP_TX_XSK))) {
+ libeth_xsk_buff_free_slow(xdp);
+ return false;
+ }
+
+ if (!libeth_xsk_tx_queue_head(bq, xdp))
+ goto out;
+
+ for (const struct libeth_xdp_buff *head = xdp; ; ) {
+ xdp = container_of(xsk_buff_get_frag(&head->base),
+ typeof(*xdp), base);
+ if (!xdp)
+ break;
+
+ if (unlikely(bq->count == LIBETH_XDP_TX_BULK) &&
+ unlikely(!flush_bulk(bq, LIBETH_XDP_TX_XSK))) {
+ ret = false;
+ break;
+ }
+
+ libeth_xsk_tx_queue_frag(bq, xdp);
+ }
+
+out:
+ bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_LAST;
+
+ return ret;
+}
+
+/**
+ * libeth_xsk_tx_fill_buf - internal helper to fill XSk ``XDP_TX`` &libeth_sqe
+ * @frm: XDP Tx frame from the bulk
+ * @i: index on the HW queue
+ * @sq: XDPSQ abstraction for the queue
+ * @priv: private data
+ *
+ * Return: XDP Tx descriptor with the synced DMA and other info to pass to
+ * the driver callback.
+ */
+static inline struct libeth_xdp_tx_desc
+libeth_xsk_tx_fill_buf(struct libeth_xdp_tx_frame frm, u32 i,
+ const struct libeth_xdpsq *sq, u64 priv)
+{
+ struct libeth_xdp_buff *xdp = frm.xsk;
+ struct libeth_xdp_tx_desc desc = {
+ .addr = xsk_buff_xdp_get_dma(&xdp->base),
+ .opts = frm.opts,
+ };
+ struct libeth_sqe *sqe;
+
+ xsk_buff_raw_dma_sync_for_device(sq->pool, desc.addr, desc.len);
+
+ sqe = &sq->sqes[i];
+ sqe->xsk = xdp;
+
+ if (!(desc.flags & LIBETH_XDP_TX_FIRST)) {
+ sqe->type = LIBETH_SQE_XSK_TX_FRAG;
+ return desc;
+ }
+
+ sqe->type = LIBETH_SQE_XSK_TX;
+ libeth_xdp_tx_fill_stats(sqe, &desc,
+ xdp_get_shared_info_from_buff(&xdp->base));
+
+ return desc;
+}
+
+/**
+ * libeth_xsk_tx_flush_bulk - wrapper to define flush of XSk ``XDP_TX`` bulk
+ * @bq: bulk to flush
+ * @flags: Tx flags, see __libeth_xdp_tx_flush_bulk()
+ * @prep: driver callback to prepare the queue
+ * @xmit: driver callback to fill a HW descriptor
+ *
+ * Use via LIBETH_XSK_DEFINE_FLUSH_TX() to define an XSk ``XDP_TX`` driver
+ * callback.
+ */
+#define libeth_xsk_tx_flush_bulk(bq, flags, prep, xmit) \
+ __libeth_xdp_tx_flush_bulk(bq, (flags) | LIBETH_XDP_TX_XSK, prep, \
+ libeth_xsk_tx_fill_buf, xmit)
+
+/* XSk TMO */
+
+/**
+ * libeth_xsktmo_req_csum - XSk Tx metadata op to request checksum offload
+ * @csum_start: unused
+ * @csum_offset: unused
+ * @priv: &libeth_xdp_tx_desc from the filling helper
+ *
+ * Generic implementation of ::tmo_request_checksum. Works only when HW doesn't
+ * require filling checksum offsets and other parameters beside the checksum
+ * request bit.
+ * Consider using within @libeth_xsktmo unless the driver requires HW-specific
+ * callbacks.
+ */
+static inline void libeth_xsktmo_req_csum(u16 csum_start, u16 csum_offset,
+ void *priv)
+{
+ ((struct libeth_xdp_tx_desc *)priv)->flags |= LIBETH_XDP_TX_CSUM;
+}
+
+/* Only to inline the callbacks below, use @libeth_xsktmo in drivers instead */
+static const struct xsk_tx_metadata_ops __libeth_xsktmo = {
+ .tmo_request_checksum = libeth_xsktmo_req_csum,
+};
+
+/**
+ * __libeth_xsk_xmit_fill_buf_md - internal helper to prepare XSk xmit w/meta
+ * @xdesc: &xdp_desc from the XSk buffer pool
+ * @sq: XDPSQ abstraction for the queue
+ * @priv: XSk Tx metadata ops
+ *
+ * Same as __libeth_xsk_xmit_fill_buf(), but requests metadata pointer and
+ * fills additional fields in &libeth_xdp_tx_desc to ask for metadata offload.
+ *
+ * Return: XDP Tx descriptor with the DMA, metadata request bits, and other
+ * info to pass to the driver callback.
+ */
+static __always_inline struct libeth_xdp_tx_desc
+__libeth_xsk_xmit_fill_buf_md(const struct xdp_desc *xdesc,
+ const struct libeth_xdpsq *sq,
+ u64 priv)
+{
+ const struct xsk_tx_metadata_ops *tmo = libeth_xdp_priv_to_ptr(priv);
+ struct libeth_xdp_tx_desc desc;
+ struct xdp_desc_ctx ctx;
+
+ ctx = xsk_buff_raw_get_ctx(sq->pool, xdesc->addr);
+ desc = (typeof(desc)){
+ .addr = ctx.dma,
+ __libeth_xdp_tx_len(xdesc->len),
+ };
+
+ BUILD_BUG_ON(!__builtin_constant_p(tmo == libeth_xsktmo));
+ tmo = tmo == libeth_xsktmo ? &__libeth_xsktmo : tmo;
+
+ xsk_tx_metadata_request(ctx.meta, tmo, &desc);
+
+ return desc;
+}
+
+/* XSk xmit implementation */
+
+/**
+ * __libeth_xsk_xmit_fill_buf - internal helper to prepare XSk xmit w/o meta
+ * @xdesc: &xdp_desc from the XSk buffer pool
+ * @sq: XDPSQ abstraction for the queue
+ *
+ * Return: XDP Tx descriptor with the DMA and other info to pass to
+ * the driver callback.
+ */
+static inline struct libeth_xdp_tx_desc
+__libeth_xsk_xmit_fill_buf(const struct xdp_desc *xdesc,
+ const struct libeth_xdpsq *sq)
+{
+ return (struct libeth_xdp_tx_desc){
+ .addr = xsk_buff_raw_get_dma(sq->pool, xdesc->addr),
+ __libeth_xdp_tx_len(xdesc->len),
+ };
+}
+
+/**
+ * libeth_xsk_xmit_fill_buf - internal helper to prepare an XSk xmit
+ * @frm: &xdp_desc from the XSk buffer pool
+ * @i: index on the HW queue
+ * @sq: XDPSQ abstraction for the queue
+ * @priv: XSk Tx metadata ops
+ *
+ * Depending on the metadata ops presence (determined at compile time), calls
+ * the quickest helper to build a libeth XDP Tx descriptor.
+ *
+ * Return: XDP Tx descriptor with the synced DMA, metadata request bits,
+ * and other info to pass to the driver callback.
+ */
+static __always_inline struct libeth_xdp_tx_desc
+libeth_xsk_xmit_fill_buf(struct libeth_xdp_tx_frame frm, u32 i,
+ const struct libeth_xdpsq *sq, u64 priv)
+{
+ struct libeth_xdp_tx_desc desc;
+
+ if (priv)
+ desc = __libeth_xsk_xmit_fill_buf_md(&frm.desc, sq, priv);
+ else
+ desc = __libeth_xsk_xmit_fill_buf(&frm.desc, sq);
+
+ desc.flags |= xsk_is_eop_desc(&frm.desc) ? LIBETH_XDP_TX_LAST : 0;
+
+ xsk_buff_raw_dma_sync_for_device(sq->pool, desc.addr, desc.len);
+
+ return desc;
+}
+
+/**
+ * libeth_xsk_xmit_do_bulk - send XSk xmit frames
+ * @pool: XSk buffer pool containing the frames to send
+ * @xdpsq: opaque pointer to driver's XDPSQ struct
+ * @budget: maximum number of frames can be sent
+ * @tmo: optional XSk Tx metadata ops
+ * @prep: driver callback to build a &libeth_xdpsq
+ * @xmit: driver callback to put frames to a HW queue
+ * @finalize: driver callback to start a transmission
+ *
+ * Implements generic XSk xmit. Always turns on XSk Tx wakeup as it's assumed
+ * lazy cleaning is used and interrupts are disabled for the queue.
+ * HW descriptor filling is unrolled by ``LIBETH_XDP_TX_BATCH`` to optimize
+ * writes.
+ * Note that unlike other XDP Tx ops, the queue must be locked and cleaned
+ * prior to calling this function to already know available @budget.
+ * @prepare must only build a &libeth_xdpsq and return ``U32_MAX``.
+ *
+ * Return: false if @budget was exhausted, true otherwise.
+ */
+static __always_inline bool
+libeth_xsk_xmit_do_bulk(struct xsk_buff_pool *pool, void *xdpsq, u32 budget,
+ const struct xsk_tx_metadata_ops *tmo,
+ u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq),
+ void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i,
+ const struct libeth_xdpsq *sq, u64 priv),
+ void (*finalize)(void *xdpsq, bool sent, bool flush))
+{
+ const struct libeth_xdp_tx_frame *bulk;
+ bool wake;
+ u32 n;
+
+ wake = xsk_uses_need_wakeup(pool);
+ if (wake)
+ xsk_clear_tx_need_wakeup(pool);
+
+ n = xsk_tx_peek_release_desc_batch(pool, budget);
+ bulk = container_of(&pool->tx_descs[0], typeof(*bulk), desc);
+
+ libeth_xdp_tx_xmit_bulk(bulk, xdpsq, n, true,
+ libeth_xdp_ptr_to_priv(tmo), prep,
+ libeth_xsk_xmit_fill_buf, xmit);
+ finalize(xdpsq, n, true);
+
+ if (wake)
+ xsk_set_tx_need_wakeup(pool);
+
+ return n < budget;
+}
+
+/* Rx polling path */
+
+/**
+ * libeth_xsk_tx_init_bulk - initialize XDP Tx bulk for an XSk Rx NAPI poll
+ * @bq: bulk to initialize
+ * @prog: RCU pointer to the XDP program (never %NULL)
+ * @dev: target &net_device
+ * @xdpsqs: array of driver XDPSQ structs
+ * @num: number of active XDPSQs, the above array length
+ *
+ * Should be called on an onstack XDP Tx bulk before the XSk NAPI polling loop.
+ * Initializes all the needed fields to run libeth_xdp functions.
+ * Never checks if @prog is %NULL or @num == 0 as XDP must always be enabled
+ * when hitting this path.
+ */
+#define libeth_xsk_tx_init_bulk(bq, prog, dev, xdpsqs, num) \
+ __libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num, true, \
+ __UNIQUE_ID(bq_), __UNIQUE_ID(nqs_))
+
+struct libeth_xdp_buff *libeth_xsk_buff_add_frag(struct libeth_xdp_buff *head,
+ struct libeth_xdp_buff *xdp);
+
+/**
+ * libeth_xsk_process_buff - attach XSk Rx buffer to &libeth_xdp_buff
+ * @head: head XSk buffer to attach the XSk buffer to (or %NULL)
+ * @xdp: XSk buffer to process
+ * @len: received data length from the descriptor
+ *
+ * If @head == %NULL, treats the XSk buffer as head and initializes
+ * the required fields. Otherwise, attaches the buffer as a frag.
+ * Already performs DMA sync-for-CPU and frame start prefetch
+ * (for head buffers only).
+ *
+ * Return: head XSk buffer on success or if the descriptor must be skipped
+ * (empty), %NULL if there is no space for a new frag.
+ */
+static inline struct libeth_xdp_buff *
+libeth_xsk_process_buff(struct libeth_xdp_buff *head,
+ struct libeth_xdp_buff *xdp, u32 len)
+{
+ if (unlikely(!len)) {
+ libeth_xsk_buff_free_slow(xdp);
+ return head;
+ }
+
+ xsk_buff_set_size(&xdp->base, len);
+ xsk_buff_dma_sync_for_cpu(&xdp->base);
+
+ if (head)
+ return libeth_xsk_buff_add_frag(head, xdp);
+
+ prefetch(xdp->data);
+
+ return xdp;
+}
+
+void libeth_xsk_buff_stats_frags(struct libeth_rq_napi_stats *rs,
+ const struct libeth_xdp_buff *xdp);
+
+u32 __libeth_xsk_run_prog_slow(struct libeth_xdp_buff *xdp,
+ const struct libeth_xdp_tx_bulk *bq,
+ enum xdp_action act, int ret);
+
+/**
+ * __libeth_xsk_run_prog - run XDP program on XSk buffer
+ * @xdp: XSk buffer to run the prog on
+ * @bq: buffer bulk for ``XDP_TX`` queueing
+ *
+ * Internal inline abstraction to run XDP program on XSk Rx path. Handles
+ * only the most common ``XDP_REDIRECT`` inline, the rest is processed
+ * externally.
+ * Reports an XDP prog exception on errors.
+ *
+ * Return: libeth_xdp prog verdict depending on the prog's verdict.
+ */
+static __always_inline u32
+__libeth_xsk_run_prog(struct libeth_xdp_buff *xdp,
+ const struct libeth_xdp_tx_bulk *bq)
+{
+ enum xdp_action act;
+ int ret = 0;
+
+ act = bpf_prog_run_xdp(bq->prog, &xdp->base);
+ if (unlikely(act != XDP_REDIRECT))
+rest:
+ return __libeth_xsk_run_prog_slow(xdp, bq, act, ret);
+
+ ret = xdp_do_redirect(bq->dev, &xdp->base, bq->prog);
+ if (unlikely(ret))
+ goto rest;
+
+ return LIBETH_XDP_REDIRECT;
+}
+
+/**
+ * libeth_xsk_run_prog - run XDP program on XSk path and handle all verdicts
+ * @xdp: XSk buffer to process
+ * @bq: XDP Tx bulk to queue ``XDP_TX`` buffers
+ * @fl: driver ``XDP_TX`` bulk flush callback
+ *
+ * Run the attached XDP program and handle all possible verdicts.
+ * Prefer using it via LIBETH_XSK_DEFINE_RUN{,_PASS,_PROG}().
+ *
+ * Return: libeth_xdp prog verdict depending on the prog's verdict.
+ */
+#define libeth_xsk_run_prog(xdp, bq, fl) \
+ __libeth_xdp_run_flush(xdp, bq, __libeth_xsk_run_prog, \
+ libeth_xsk_tx_queue_bulk, fl)
+
+/**
+ * __libeth_xsk_run_pass - helper to run XDP program and handle the result
+ * @xdp: XSk buffer to process
+ * @bq: XDP Tx bulk to queue ``XDP_TX`` frames
+ * @napi: NAPI to build an skb and pass it up the stack
+ * @rs: onstack libeth RQ stats
+ * @md: metadata that should be filled to the XSk buffer
+ * @prep: callback for filling the metadata
+ * @run: driver wrapper to run XDP program
+ * @populate: driver callback to populate an skb with the HW descriptor data
+ *
+ * Inline abstraction, XSk's counterpart of __libeth_xdp_run_pass(), see its
+ * doc for details.
+ *
+ * Return: false if the polling loop must be exited due to lack of free
+ * buffers, true otherwise.
+ */
+static __always_inline bool
+__libeth_xsk_run_pass(struct libeth_xdp_buff *xdp,
+ struct libeth_xdp_tx_bulk *bq, struct napi_struct *napi,
+ struct libeth_rq_napi_stats *rs, const void *md,
+ void (*prep)(struct libeth_xdp_buff *xdp,
+ const void *md),
+ u32 (*run)(struct libeth_xdp_buff *xdp,
+ struct libeth_xdp_tx_bulk *bq),
+ bool (*populate)(struct sk_buff *skb,
+ const struct libeth_xdp_buff *xdp,
+ struct libeth_rq_napi_stats *rs))
+{
+ struct sk_buff *skb;
+ u32 act;
+
+ rs->bytes += xdp->base.data_end - xdp->data;
+ rs->packets++;
+
+ if (unlikely(xdp_buff_has_frags(&xdp->base)))
+ libeth_xsk_buff_stats_frags(rs, xdp);
+
+ if (prep && (!__builtin_constant_p(!!md) || md))
+ prep(xdp, md);
+
+ act = run(xdp, bq);
+ if (likely(act == LIBETH_XDP_REDIRECT))
+ return true;
+
+ if (act != LIBETH_XDP_PASS)
+ return act != LIBETH_XDP_ABORTED;
+
+ skb = xdp_build_skb_from_zc(&xdp->base);
+ if (unlikely(!skb)) {
+ libeth_xsk_buff_free_slow(xdp);
+ return true;
+ }
+
+ if (unlikely(!populate(skb, xdp, rs))) {
+ napi_consume_skb(skb, true);
+ return true;
+ }
+
+ napi_gro_receive(napi, skb);
+
+ return true;
+}
+
+/**
+ * libeth_xsk_run_pass - helper to run XDP program and handle the result
+ * @xdp: XSk buffer to process
+ * @bq: XDP Tx bulk to queue ``XDP_TX`` frames
+ * @napi: NAPI to build an skb and pass it up the stack
+ * @rs: onstack libeth RQ stats
+ * @desc: pointer to the HW descriptor for that frame
+ * @run: driver wrapper to run XDP program
+ * @populate: driver callback to populate an skb with the HW descriptor data
+ *
+ * Wrapper around the underscored version when "fill the descriptor metadata"
+ * means just writing the pointer to the HW descriptor as @xdp->desc.
+ */
+#define libeth_xsk_run_pass(xdp, bq, napi, rs, desc, run, populate) \
+ __libeth_xsk_run_pass(xdp, bq, napi, rs, desc, libeth_xdp_prep_desc, \
+ run, populate)
+
+/**
+ * libeth_xsk_finalize_rx - finalize XDPSQ after an XSk NAPI polling loop
+ * @bq: ``XDP_TX`` frame bulk
+ * @flush: driver callback to flush the bulk
+ * @finalize: driver callback to start sending the frames and run the timer
+ *
+ * Flush the bulk if there are frames left to send, kick the queue and flush
+ * the XDP maps.
+ */
+#define libeth_xsk_finalize_rx(bq, flush, finalize) \
+ __libeth_xdp_finalize_rx(bq, LIBETH_XDP_TX_XSK, flush, finalize)
+
+/*
+ * Helpers to reduce boilerplate code in drivers.
+ *
+ * Typical driver XSk Rx flow would be (excl. bulk and buff init, frag attach):
+ *
+ * LIBETH_XDP_DEFINE_START();
+ * LIBETH_XSK_DEFINE_FLUSH_TX(static driver_xsk_flush_tx, driver_xsk_tx_prep,
+ * driver_xdp_xmit);
+ * LIBETH_XSK_DEFINE_RUN(static driver_xsk_run, driver_xsk_run_prog,
+ * driver_xsk_flush_tx, driver_populate_skb);
+ * LIBETH_XSK_DEFINE_FINALIZE(static driver_xsk_finalize_rx,
+ * driver_xsk_flush_tx, driver_xdp_finalize_sq);
+ * LIBETH_XDP_DEFINE_END();
+ *
+ * This will build a set of 4 static functions. The compiler is free to decide
+ * whether to inline them.
+ * Then, in the NAPI polling function:
+ *
+ * while (packets < budget) {
+ * // ...
+ * if (!driver_xsk_run(xdp, &bq, napi, &rs, desc))
+ * break;
+ * }
+ * driver_xsk_finalize_rx(&bq);
+ */
+
+/**
+ * LIBETH_XSK_DEFINE_FLUSH_TX - define a driver XSk ``XDP_TX`` flush function
+ * @name: name of the function to define
+ * @prep: driver callback to clean an XDPSQ
+ * @xmit: driver callback to write a HW Tx descriptor
+ */
+#define LIBETH_XSK_DEFINE_FLUSH_TX(name, prep, xmit) \
+ __LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit, xsk)
+
+/**
+ * LIBETH_XSK_DEFINE_RUN_PROG - define a driver XDP program run function
+ * @name: name of the function to define
+ * @flush: driver callback to flush an XSk ``XDP_TX`` bulk
+ */
+#define LIBETH_XSK_DEFINE_RUN_PROG(name, flush) \
+ u32 __LIBETH_XDP_DEFINE_RUN_PROG(name, flush, xsk)
+
+/**
+ * LIBETH_XSK_DEFINE_RUN_PASS - define a driver buffer process + pass function
+ * @name: name of the function to define
+ * @run: driver callback to run XDP program (above)
+ * @populate: driver callback to fill an skb with HW descriptor info
+ */
+#define LIBETH_XSK_DEFINE_RUN_PASS(name, run, populate) \
+ bool __LIBETH_XDP_DEFINE_RUN_PASS(name, run, populate, xsk)
+
+/**
+ * LIBETH_XSK_DEFINE_RUN - define a driver buffer process, run + pass function
+ * @name: name of the function to define
+ * @run: name of the XDP prog run function to define
+ * @flush: driver callback to flush an XSk ``XDP_TX`` bulk
+ * @populate: driver callback to fill an skb with HW descriptor info
+ */
+#define LIBETH_XSK_DEFINE_RUN(name, run, flush, populate) \
+ __LIBETH_XDP_DEFINE_RUN(name, run, flush, populate, XSK)
+
+/**
+ * LIBETH_XSK_DEFINE_FINALIZE - define a driver XSk NAPI poll finalize function
+ * @name: name of the function to define
+ * @flush: driver callback to flush an XSk ``XDP_TX`` bulk
+ * @finalize: driver callback to finalize an XDPSQ and run the timer
+ */
+#define LIBETH_XSK_DEFINE_FINALIZE(name, flush, finalize) \
+ __LIBETH_XDP_DEFINE_FINALIZE(name, flush, finalize, xsk)
+
+/* Refilling */
+
+/**
+ * struct libeth_xskfq - structure representing an XSk buffer (fill) queue
+ * @fp: hotpath part of the structure
+ * @pool: &xsk_buff_pool for buffer management
+ * @fqes: array of XSk buffer pointers
+ * @descs: opaque pointer to the HW descriptor array
+ * @ntu: index of the next buffer to poll
+ * @count: number of descriptors/buffers the queue has
+ * @pending: current number of XSkFQEs to refill
+ * @thresh: threshold below which the queue is refilled
+ * @buf_len: HW-writeable length per each buffer
+ * @nid: ID of the closest NUMA node with memory
+ */
+struct libeth_xskfq {
+ struct_group_tagged(libeth_xskfq_fp, fp,
+ struct xsk_buff_pool *pool;
+ struct libeth_xdp_buff **fqes;
+ void *descs;
+
+ u32 ntu;
+ u32 count;
+ );
+
+ /* Cold fields */
+ u32 pending;
+ u32 thresh;
+
+ u32 buf_len;
+ int nid;
+};
+
+int libeth_xskfq_create(struct libeth_xskfq *fq);
+void libeth_xskfq_destroy(struct libeth_xskfq *fq);
+
+/**
+ * libeth_xsk_buff_xdp_get_dma - get DMA address of XSk &libeth_xdp_buff
+ * @xdp: buffer to get the DMA addr for
+ */
+#define libeth_xsk_buff_xdp_get_dma(xdp) \
+ xsk_buff_xdp_get_dma(&(xdp)->base)
+
+/**
+ * libeth_xskfqe_alloc - allocate @n XSk Rx buffers
+ * @fq: hotpath part of the XSkFQ, usually onstack
+ * @n: number of buffers to allocate
+ * @fill: driver callback to write DMA addresses to HW descriptors
+ *
+ * Note that @fq->ntu gets updated, but ::pending must be recalculated
+ * by the caller.
+ *
+ * Return: number of buffers refilled.
+ */
+static __always_inline u32
+libeth_xskfqe_alloc(struct libeth_xskfq_fp *fq, u32 n,
+ void (*fill)(const struct libeth_xskfq_fp *fq, u32 i))
+{
+ u32 this, ret, done = 0;
+ struct xdp_buff **xskb;
+
+ this = fq->count - fq->ntu;
+ if (likely(this > n))
+ this = n;
+
+again:
+ xskb = (typeof(xskb))&fq->fqes[fq->ntu];
+ ret = xsk_buff_alloc_batch(fq->pool, xskb, this);
+
+ for (u32 i = 0, ntu = fq->ntu; likely(i < ret); i++)
+ fill(fq, ntu + i);
+
+ done += ret;
+ fq->ntu += ret;
+
+ if (likely(fq->ntu < fq->count) || unlikely(ret < this))
+ goto out;
+
+ fq->ntu = 0;
+
+ if (this < n) {
+ this = n - this;
+ goto again;
+ }
+
+out:
+ return done;
+}
+
+/* .ndo_xsk_wakeup */
+
+void libeth_xsk_init_wakeup(call_single_data_t *csd, struct napi_struct *napi);
+void libeth_xsk_wakeup(call_single_data_t *csd, u32 qid);
+
+/* Pool setup */
+
+int libeth_xsk_setup_pool(struct net_device *dev, u32 qid, bool enable);
+
+#endif /* __LIBETH_XSK_H */
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 3ce56a816425..92ab85061df0 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -10,6 +10,7 @@
#include "shm_channel.h"
#define GDMA_STATUS_MORE_ENTRIES 0x00000105
+#define GDMA_STATUS_CMD_UNSUPPORTED 0xffffffff
/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
* them are naturally aligned and hence don't need __packed.
@@ -58,7 +59,7 @@ enum gdma_eqe_type {
GDMA_EQE_HWC_INIT_EQ_ID_DB = 129,
GDMA_EQE_HWC_INIT_DATA = 130,
GDMA_EQE_HWC_INIT_DONE = 131,
- GDMA_EQE_HWC_SOC_RECONFIG = 132,
+ GDMA_EQE_HWC_FPGA_RECONFIG = 132,
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
GDMA_EQE_HWC_SOC_SERVICE = 134,
GDMA_EQE_RNIC_QP_FATAL = 176,
@@ -388,7 +389,7 @@ struct gdma_context {
unsigned int max_num_queues;
unsigned int max_num_msix;
unsigned int num_msix_usable;
- struct gdma_irq_context *irq_contexts;
+ struct xarray irq_contexts;
/* L2 MTU */
u16 adapter_mtu;
@@ -403,6 +404,8 @@ struct gdma_context {
u32 test_event_eq_id;
bool is_pf;
+ bool in_service;
+
phys_addr_t bar0_pa;
void __iomem *bar0_va;
void __iomem *shm_base;
@@ -578,12 +581,20 @@ enum {
/* Driver can handle holes (zeros) in the device list */
#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
+/* Driver supports dynamic MSI-X vector allocation */
+#define GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT BIT(13)
+
+/* Driver can self reset on FPGA Reconfig EQE notification */
+#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
- GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP)
+ GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
+ GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
+ GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
#define GDMA_DRV_CAP_FLAGS2 0
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 9abb66461211..e1030a7d2daa 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -5,6 +5,7 @@
#define _MANA_H
#include <net/xdp.h>
+#include <net/net_shaper.h>
#include "gdma.h"
#include "hw_channel.h"
@@ -404,6 +405,65 @@ struct mana_ethtool_stats {
u64 rx_cqe_unknown_type;
};
+struct mana_ethtool_phy_stats {
+ /* Drop Counters */
+ u64 rx_pkt_drop_phy;
+ u64 tx_pkt_drop_phy;
+
+ /* Per TC traffic Counters */
+ u64 rx_pkt_tc0_phy;
+ u64 tx_pkt_tc0_phy;
+ u64 rx_pkt_tc1_phy;
+ u64 tx_pkt_tc1_phy;
+ u64 rx_pkt_tc2_phy;
+ u64 tx_pkt_tc2_phy;
+ u64 rx_pkt_tc3_phy;
+ u64 tx_pkt_tc3_phy;
+ u64 rx_pkt_tc4_phy;
+ u64 tx_pkt_tc4_phy;
+ u64 rx_pkt_tc5_phy;
+ u64 tx_pkt_tc5_phy;
+ u64 rx_pkt_tc6_phy;
+ u64 tx_pkt_tc6_phy;
+ u64 rx_pkt_tc7_phy;
+ u64 tx_pkt_tc7_phy;
+
+ u64 rx_byte_tc0_phy;
+ u64 tx_byte_tc0_phy;
+ u64 rx_byte_tc1_phy;
+ u64 tx_byte_tc1_phy;
+ u64 rx_byte_tc2_phy;
+ u64 tx_byte_tc2_phy;
+ u64 rx_byte_tc3_phy;
+ u64 tx_byte_tc3_phy;
+ u64 rx_byte_tc4_phy;
+ u64 tx_byte_tc4_phy;
+ u64 rx_byte_tc5_phy;
+ u64 tx_byte_tc5_phy;
+ u64 rx_byte_tc6_phy;
+ u64 tx_byte_tc6_phy;
+ u64 rx_byte_tc7_phy;
+ u64 tx_byte_tc7_phy;
+
+ /* Per TC pause Counters */
+ u64 rx_pause_tc0_phy;
+ u64 tx_pause_tc0_phy;
+ u64 rx_pause_tc1_phy;
+ u64 tx_pause_tc1_phy;
+ u64 rx_pause_tc2_phy;
+ u64 tx_pause_tc2_phy;
+ u64 rx_pause_tc3_phy;
+ u64 tx_pause_tc3_phy;
+ u64 rx_pause_tc4_phy;
+ u64 tx_pause_tc4_phy;
+ u64 rx_pause_tc5_phy;
+ u64 tx_pause_tc5_phy;
+ u64 rx_pause_tc6_phy;
+ u64 tx_pause_tc6_phy;
+ u64 rx_pause_tc7_phy;
+ u64 tx_pause_tc7_phy;
+};
+
struct mana_context {
struct gdma_dev *gdma_dev;
@@ -467,13 +527,22 @@ struct mana_port_context {
struct mutex vport_mutex;
int vport_use_count;
+ /* Net shaper handle*/
+ struct net_shaper_handle handle;
+
u16 port_idx;
+ /* Currently configured speed (mbps) */
+ u32 speed;
+ /* Maximum speed supported by the SKU (mbps) */
+ u32 max_speed;
bool port_is_up;
bool port_st_save; /* Saved port state */
struct mana_ethtool_stats eth_stats;
+ struct mana_ethtool_phy_stats phy_stats;
+
/* Debugfs */
struct dentry *mana_port_debugfs;
};
@@ -501,6 +570,10 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
void mana_query_gf_stats(struct mana_port_context *apc);
+int mana_query_link_cfg(struct mana_port_context *apc);
+int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed,
+ int enable_clamping);
+void mana_query_phy_stats(struct mana_port_context *apc);
int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
@@ -527,6 +600,9 @@ enum mana_command_code {
MANA_FENCE_RQ = 0x20006,
MANA_CONFIG_VPORT_RX = 0x20007,
MANA_QUERY_VPORT_CONFIG = 0x20008,
+ MANA_QUERY_LINK_CONFIG = 0x2000A,
+ MANA_SET_BW_CLAMP = 0x2000B,
+ MANA_QUERY_PHY_STAT = 0x2000c,
/* Privileged commands for the PF mode */
MANA_REGISTER_FILTER = 0x28000,
@@ -535,6 +611,35 @@ enum mana_command_code {
MANA_DEREGISTER_HW_PORT = 0x28004,
};
+/* Query Link Configuration*/
+struct mana_query_link_config_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t vport;
+}; /* HW DATA */
+
+struct mana_query_link_config_resp {
+ struct gdma_resp_hdr hdr;
+ u32 qos_speed_mbps;
+ u8 qos_unconfigured;
+ u8 reserved1[3];
+ u32 link_speed_mbps;
+ u8 reserved2[4];
+}; /* HW DATA */
+
+/* Set Bandwidth Clamp*/
+struct mana_set_bw_clamp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t vport;
+ enum TRI_STATE enable_clamping;
+ u32 link_speed_mbps;
+}; /* HW DATA */
+
+struct mana_set_bw_clamp_resp {
+ struct gdma_resp_hdr hdr;
+ u8 qos_unconfigured;
+ u8 reserved[7];
+}; /* HW DATA */
+
/* Query Device Configuration */
struct mana_query_device_cfg_req {
struct gdma_req_hdr hdr;
@@ -689,6 +794,74 @@ struct mana_query_gf_stat_resp {
u64 tx_err_gdma;
}; /* HW DATA */
+/* Query phy stats */
+struct mana_query_phy_stat_req {
+ struct gdma_req_hdr hdr;
+ u64 req_stats;
+}; /* HW DATA */
+
+struct mana_query_phy_stat_resp {
+ struct gdma_resp_hdr hdr;
+ u64 reported_stats;
+
+ /* Aggregate Drop Counters */
+ u64 rx_pkt_drop_phy;
+ u64 tx_pkt_drop_phy;
+
+ /* Per TC(Traffic class) traffic Counters */
+ u64 rx_pkt_tc0_phy;
+ u64 tx_pkt_tc0_phy;
+ u64 rx_pkt_tc1_phy;
+ u64 tx_pkt_tc1_phy;
+ u64 rx_pkt_tc2_phy;
+ u64 tx_pkt_tc2_phy;
+ u64 rx_pkt_tc3_phy;
+ u64 tx_pkt_tc3_phy;
+ u64 rx_pkt_tc4_phy;
+ u64 tx_pkt_tc4_phy;
+ u64 rx_pkt_tc5_phy;
+ u64 tx_pkt_tc5_phy;
+ u64 rx_pkt_tc6_phy;
+ u64 tx_pkt_tc6_phy;
+ u64 rx_pkt_tc7_phy;
+ u64 tx_pkt_tc7_phy;
+
+ u64 rx_byte_tc0_phy;
+ u64 tx_byte_tc0_phy;
+ u64 rx_byte_tc1_phy;
+ u64 tx_byte_tc1_phy;
+ u64 rx_byte_tc2_phy;
+ u64 tx_byte_tc2_phy;
+ u64 rx_byte_tc3_phy;
+ u64 tx_byte_tc3_phy;
+ u64 rx_byte_tc4_phy;
+ u64 tx_byte_tc4_phy;
+ u64 rx_byte_tc5_phy;
+ u64 tx_byte_tc5_phy;
+ u64 rx_byte_tc6_phy;
+ u64 tx_byte_tc6_phy;
+ u64 rx_byte_tc7_phy;
+ u64 tx_byte_tc7_phy;
+
+ /* Per TC(Traffic Class) pause Counters */
+ u64 rx_pause_tc0_phy;
+ u64 tx_pause_tc0_phy;
+ u64 rx_pause_tc1_phy;
+ u64 tx_pause_tc1_phy;
+ u64 rx_pause_tc2_phy;
+ u64 tx_pause_tc2_phy;
+ u64 rx_pause_tc3_phy;
+ u64 tx_pause_tc3_phy;
+ u64 rx_pause_tc4_phy;
+ u64 tx_pause_tc4_phy;
+ u64 rx_pause_tc5_phy;
+ u64 tx_pause_tc5_phy;
+ u64 rx_pause_tc6_phy;
+ u64 tx_pause_tc6_phy;
+ u64 rx_pause_tc7_phy;
+ u64 tx_pause_tc7_phy;
+}; /* HW DATA */
+
/* Configure vPort Rx Steering */
struct mana_cfg_rx_steer_req_v2 {
struct gdma_req_hdr hdr;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 9a832cab5b1d..c7ce5ec7be23 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -182,6 +182,7 @@ struct pneigh_entry {
netdevice_tracker dev_tracker;
u32 flags;
u8 protocol;
+ bool permanent;
u32 key[];
};
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index ba2eaf39089b..6e835972abd1 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -294,6 +294,15 @@ netdev_txq_completed_mb(struct netdev_queue *dev_queue,
netif_txq_try_stop(_txq, get_desc, start_thrs); \
})
+static inline void netif_subqueue_sent(const struct net_device *dev,
+ unsigned int idx, unsigned int bytes)
+{
+ struct netdev_queue *txq;
+
+ txq = netdev_get_tx_queue(dev, idx);
+ netdev_tx_sent_queue(txq, bytes);
+}
+
#define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \
({ \
struct netdev_queue *_txq; \
diff --git a/include/net/netmem.h b/include/net/netmem.h
index 386164fb9c18..7a1dafa3f080 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -89,8 +89,7 @@ static inline unsigned int net_iov_idx(const struct net_iov *niov)
* typedef netmem_ref - a nonexistent type marking a reference to generic
* network memory.
*
- * A netmem_ref currently is always a reference to a struct page. This
- * abstraction is introduced so support for new memory types can be added.
+ * A netmem_ref can be a struct page* or a struct net_iov* underneath.
*
* Use the supplied helpers to obtain the underlying memory pointer and fields.
*/
@@ -117,9 +116,6 @@ static inline struct page *__netmem_to_page(netmem_ref netmem)
return (__force struct page *)netmem;
}
-/* This conversion fails (returns NULL) if the netmem_ref is not struct page
- * backed.
- */
static inline struct page *netmem_to_page(netmem_ref netmem)
{
if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
@@ -143,7 +139,7 @@ static inline netmem_ref net_iov_to_netmem(struct net_iov *niov)
return (__force netmem_ref)((unsigned long)niov | NET_IOV);
}
-static inline netmem_ref page_to_netmem(struct page *page)
+static inline netmem_ref page_to_netmem(const struct page *page)
{
return (__force netmem_ref)page;
}
@@ -178,6 +174,21 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem)
return page_to_pfn(netmem_to_page(netmem));
}
+/* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to
+ * common fields.
+ * @netmem: netmem reference to extract as net_iov.
+ *
+ * All the sub types of netmem_ref (page, net_iov) have the same pp, pp_magic,
+ * dma_addr, and pp_ref_count fields at the same offsets. Thus, we can access
+ * these fields without a type check to make sure that the underlying mem is
+ * net_iov or page.
+ *
+ * The resulting value of this function can only be used to access the fields
+ * that are NET_IOV_ASSERT_OFFSET'd. Accessing any other fields will result in
+ * undefined behavior.
+ *
+ * Return: the netmem_ref cast to net_iov* regardless of its underlying type.
+ */
static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
{
return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 93f2c31baf9b..773fc65780b5 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -153,6 +153,13 @@ static inline netmem_ref page_pool_dev_alloc_netmem(struct page_pool *pool,
return page_pool_alloc_netmem(pool, offset, size, gfp);
}
+static inline netmem_ref page_pool_dev_alloc_netmems(struct page_pool *pool)
+{
+ gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+
+ return page_pool_alloc_netmems(pool, gfp);
+}
+
static inline struct page *page_pool_alloc(struct page_pool *pool,
unsigned int *offset,
unsigned int *size, gfp_t gfp)
diff --git a/include/net/pfcp.h b/include/net/pfcp.h
index af14f970b80e..639553797d3e 100644
--- a/include/net/pfcp.h
+++ b/include/net/pfcp.h
@@ -45,7 +45,7 @@ struct pfcphdr_session {
reserved:4;
#elif defined(__BIG_ENDIAN_BITFIELD)
u8 reserved:4,
- message_priprity:4;
+ message_priority:4;
#else
#error "Please fix <asm/byteorder>"
#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 4c37015b7cf7..ca532227cbfd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2677,6 +2677,10 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
+bool skb_has_tx_timestamp(struct sk_buff *skb, const struct sock *sk);
+int skb_get_tx_timestamp(struct sk_buff *skb, struct sock *sk,
+ struct timespec64 *ts);
+
static inline void
sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
{
@@ -2982,7 +2986,6 @@ void sock_set_timestamp(struct sock *sk, int optname, bool valbool);
int sock_set_timestamping(struct sock *sk, int optname,
struct so_timestamping timestamping);
-void sock_enable_timestamps(struct sock *sk);
#if defined(CONFIG_CGROUP_BPF)
void bpf_skops_tx_timestamping(struct sock *sk, struct sk_buff *skb, int op);
#else
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5078ad868fee..761c4a0ad386 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -321,7 +321,7 @@ extern struct proto tcp_prot;
#define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
-void tcp_tasklet_init(void);
+void tcp_tsq_work_init(void);
int tcp_v4_err(struct sk_buff *skb, u32);
@@ -1811,14 +1811,8 @@ static inline void tcp_mib_init(struct net *net)
}
/* from STCP */
-static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp)
-{
- tp->lost_skb_hint = NULL;
-}
-
static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
{
- tcp_clear_retrans_hints_partial(tp);
tp->retransmit_skb_hint = NULL;
}
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 2df3b8344eb5..cbd3a43074bd 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -130,35 +130,20 @@ void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type);
void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type);
-static inline void udp_tunnel_get_rx_info(struct net_device *dev)
-{
- ASSERT_RTNL();
- if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
- return;
- call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev);
-}
-
-static inline void udp_tunnel_drop_rx_info(struct net_device *dev)
-{
- ASSERT_RTNL();
- if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
- return;
- call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev);
-}
-
/* Transmit the skb using UDP encapsulation. */
void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl,
__be16 df, __be16 src_port, __be16 dst_port,
- bool xnet, bool nocheck);
+ bool xnet, bool nocheck, u16 ipcb_flags);
-int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb,
- struct net_device *dev,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- __u8 prio, __u8 ttl, __be32 label,
- __be16 src_port, __be16 dst_port, bool nocheck);
+void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb,
+ struct net_device *dev,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u8 prio, __u8 ttl, __be32 label,
+ __be16 src_port, __be16 dst_port, bool nocheck,
+ u16 ip6cb_flags);
void udp_tunnel_sock_release(struct socket *sock);
@@ -221,19 +206,17 @@ static inline void udp_tunnel_encap_enable(struct sock *sk)
#define UDP_TUNNEL_NIC_MAX_TABLES 4
enum udp_tunnel_nic_info_flags {
- /* Device callbacks may sleep */
- UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0),
/* Device only supports offloads when it's open, all ports
* will be removed before close and re-added after open.
*/
- UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1),
+ UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(0),
/* Device supports only IPv4 tunnels */
- UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2),
+ UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(1),
/* Device has hard-coded the IANA VXLAN port (4789) as VXLAN.
* This port must not be counted towards n_entries of any table.
* Driver will not receive any callback associated with port 4789.
*/
- UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3),
+ UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(2),
};
struct udp_tunnel_nic;
@@ -324,6 +307,9 @@ struct udp_tunnel_nic_ops {
size_t (*dump_size)(struct net_device *dev, unsigned int table);
int (*dump_write)(struct net_device *dev, unsigned int table,
struct sk_buff *skb);
+ void (*assert_locked)(struct net_device *dev);
+ void (*lock)(struct net_device *dev);
+ void (*unlock)(struct net_device *dev);
};
#ifdef CONFIG_INET
@@ -352,8 +338,29 @@ static inline void
udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table,
unsigned int idx, u8 priv)
{
- if (udp_tunnel_nic_ops)
+ if (udp_tunnel_nic_ops) {
+ udp_tunnel_nic_ops->lock(dev);
udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv);
+ udp_tunnel_nic_ops->unlock(dev);
+ }
+}
+
+static inline void udp_tunnel_nic_assert_locked(struct net_device *dev)
+{
+ if (udp_tunnel_nic_ops)
+ udp_tunnel_nic_ops->assert_locked(dev);
+}
+
+static inline void udp_tunnel_nic_lock(struct net_device *dev)
+{
+ if (udp_tunnel_nic_ops)
+ udp_tunnel_nic_ops->lock(dev);
+}
+
+static inline void udp_tunnel_nic_unlock(struct net_device *dev)
+{
+ if (udp_tunnel_nic_ops)
+ udp_tunnel_nic_ops->unlock(dev);
}
static inline void
@@ -395,17 +402,50 @@ static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev)
static inline size_t
udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table)
{
+ size_t ret;
+
if (!udp_tunnel_nic_ops)
return 0;
- return udp_tunnel_nic_ops->dump_size(dev, table);
+
+ udp_tunnel_nic_ops->lock(dev);
+ ret = udp_tunnel_nic_ops->dump_size(dev, table);
+ udp_tunnel_nic_ops->unlock(dev);
+
+ return ret;
}
static inline int
udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table,
struct sk_buff *skb)
{
+ int ret;
+
if (!udp_tunnel_nic_ops)
return 0;
- return udp_tunnel_nic_ops->dump_write(dev, table, skb);
+
+ udp_tunnel_nic_ops->lock(dev);
+ ret = udp_tunnel_nic_ops->dump_write(dev, table, skb);
+ udp_tunnel_nic_ops->unlock(dev);
+
+ return ret;
+}
+
+static inline void udp_tunnel_get_rx_info(struct net_device *dev)
+{
+ ASSERT_RTNL();
+ if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+ return;
+ udp_tunnel_nic_assert_locked(dev);
+ call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev);
}
+
+static inline void udp_tunnel_drop_rx_info(struct net_device *dev)
+{
+ ASSERT_RTNL();
+ if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+ return;
+ udp_tunnel_nic_assert_locked(dev);
+ call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev);
+}
+
#endif
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index e2f7ca045d3e..0ee50785f4f1 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -332,6 +332,7 @@ struct vxlan_dev {
#define VXLAN_F_VNIFILTER 0x20000
#define VXLAN_F_MDB 0x40000
#define VXLAN_F_LOCALBYPASS 0x80000
+#define VXLAN_F_MC_ROUTE 0x100000
/* Flags that are used in the receive path. These flags must match in
* order for a socket to be shareable
@@ -353,7 +354,9 @@ struct vxlan_dev {
VXLAN_F_UDP_ZERO_CSUM6_RX | \
VXLAN_F_COLLECT_METADATA | \
VXLAN_F_VNIFILTER | \
- VXLAN_F_LOCALBYPASS)
+ VXLAN_F_LOCALBYPASS | \
+ VXLAN_F_MC_ROUTE | \
+ 0)
struct net_device *vxlan_dev_create(struct net *net, const char *name,
u8 name_assign_type, struct vxlan_config *conf);
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 95f59c1a6f57..54e60c6009e3 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -692,6 +692,7 @@ DEFINE_EVENT(tcp_ao_event, tcp_ao_handshake_failure,
TP_ARGS(sk, skb, keyid, rnext, maclen)
);
+#ifdef CONFIG_TCP_AO
DEFINE_EVENT(tcp_ao_event, tcp_ao_wrong_maclen,
TP_PROTO(const struct sock *sk, const struct sk_buff *skb,
const __u8 keyid, const __u8 rnext, const __u8 maclen),
@@ -830,6 +831,7 @@ DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update,
TP_PROTO(const struct sock *sk, __u32 new_sne),
TP_ARGS(sk, new_sne)
);
+#endif /* CONFIG_TCP_AO */
#endif /* _TRACE_TCP_H */
diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h
index bf97d4b6d51f..349e1b3ca1ae 100644
--- a/include/uapi/linux/dpll.h
+++ b/include/uapi/linux/dpll.h
@@ -192,6 +192,17 @@ enum dpll_pin_capabilities {
#define DPLL_PHASE_OFFSET_DIVIDER 1000
+/**
+ * enum dpll_feature_state - Allow control (enable/disable) and status checking
+ * over features.
+ * @DPLL_FEATURE_STATE_DISABLE: feature shall be disabled
+ * @DPLL_FEATURE_STATE_ENABLE: feature shall be enabled
+ */
+enum dpll_feature_state {
+ DPLL_FEATURE_STATE_DISABLE,
+ DPLL_FEATURE_STATE_ENABLE,
+};
+
enum dpll_a {
DPLL_A_ID = 1,
DPLL_A_MODULE_NAME,
@@ -204,6 +215,7 @@ enum dpll_a {
DPLL_A_TYPE,
DPLL_A_LOCK_STATUS_ERROR,
DPLL_A_CLOCK_QUALITY_LEVEL,
+ DPLL_A_PHASE_OFFSET_MONITOR,
__DPLL_A_MAX,
DPLL_A_MAX = (__DPLL_A_MAX - 1)
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 9ff72cfb2e98..09a75bdb6560 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -208,10 +208,6 @@ enum {
ETHTOOL_A_STATS_PHY_MAX = (__ETHTOOL_A_STATS_PHY_CNT - 1)
};
-/* generic netlink info */
-#define ETHTOOL_GENL_NAME "ethtool"
-#define ETHTOOL_GENL_VERSION 1
-
#define ETHTOOL_MCGRP_MONITOR_NAME "monitor"
#endif /* _UAPI_LINUX_ETHTOOL_NETLINK_H_ */
diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
index 9a02f579de22..4944badf9fba 100644
--- a/include/uapi/linux/ethtool_netlink_generated.h
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -6,8 +6,8 @@
#ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H
#define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H
-#define ETHTOOL_FAMILY_NAME "ethtool"
-#define ETHTOOL_FAMILY_VERSION 1
+#define ETHTOOL_GENL_NAME "ethtool"
+#define ETHTOOL_GENL_VERSION 1
enum {
ETHTOOL_UDP_TUNNEL_TYPE_VXLAN,
@@ -49,6 +49,34 @@ enum hwtstamp_source {
HWTSTAMP_SOURCE_PHYLIB,
};
+/**
+ * enum ethtool_pse_event - PSE event list for the PSE controller
+ * @ETHTOOL_PSE_EVENT_OVER_CURRENT: PSE output current is too high
+ * @ETHTOOL_PSE_EVENT_OVER_TEMP: PSE in over temperature state
+ * @ETHTOOL_C33_PSE_EVENT_DETECTION: detection process occur on the PSE. IEEE
+ * 802.3-2022 33.2.5 and 145.2.6 PSE detection of PDs. IEEE 802.3-202
+ * 30.9.1.1.5 aPSEPowerDetectionStatus
+ * @ETHTOOL_C33_PSE_EVENT_CLASSIFICATION: classification process occur on the
+ * PSE. IEEE 802.3-2022 33.2.6 and 145.2.8 classification of PDs mutual
+ * identification. IEEE 802.3-2022 30.9.1.1.8 aPSEPowerClassification.
+ * @ETHTOOL_C33_PSE_EVENT_DISCONNECTION: PD has been disconnected on the PSE.
+ * IEEE 802.3-2022 33.3.8 and 145.3.9 PD Maintain Power Signature. IEEE
+ * 802.3-2022 33.5.1.2.9 MPS Absent. IEEE 802.3-2022 30.9.1.1.20
+ * aPSEMPSAbsentCounter.
+ * @ETHTOOL_PSE_EVENT_OVER_BUDGET: PSE turned off due to over budget situation
+ * @ETHTOOL_PSE_EVENT_SW_PW_CONTROL_ERROR: PSE faced an error managing the
+ * power control from software
+ */
+enum ethtool_pse_event {
+ ETHTOOL_PSE_EVENT_OVER_CURRENT = 1,
+ ETHTOOL_PSE_EVENT_OVER_TEMP = 2,
+ ETHTOOL_C33_PSE_EVENT_DETECTION = 4,
+ ETHTOOL_C33_PSE_EVENT_CLASSIFICATION = 8,
+ ETHTOOL_C33_PSE_EVENT_DISCONNECTION = 16,
+ ETHTOOL_PSE_EVENT_OVER_BUDGET = 32,
+ ETHTOOL_PSE_EVENT_SW_PW_CONTROL_ERROR = 64,
+};
+
enum {
ETHTOOL_A_HEADER_UNSPEC,
ETHTOOL_A_HEADER_DEV_INDEX,
@@ -642,6 +670,9 @@ enum {
ETHTOOL_A_C33_PSE_EXT_SUBSTATE,
ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT,
ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES,
+ ETHTOOL_A_PSE_PW_D_ID,
+ ETHTOOL_A_PSE_PRIO_MAX,
+ ETHTOOL_A_PSE_PRIO,
__ETHTOOL_A_PSE_CNT,
ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1)
@@ -719,6 +750,14 @@ enum {
};
enum {
+ ETHTOOL_A_PSE_NTF_HEADER = 1,
+ ETHTOOL_A_PSE_NTF_EVENTS,
+
+ __ETHTOOL_A_PSE_NTF_CNT,
+ ETHTOOL_A_PSE_NTF_MAX = (__ETHTOOL_A_PSE_NTF_CNT - 1)
+};
+
+enum {
ETHTOOL_MSG_USER_NONE = 0,
ETHTOOL_MSG_STRSET_GET = 1,
ETHTOOL_MSG_LINKINFO_GET,
@@ -822,6 +861,7 @@ enum {
ETHTOOL_MSG_PHY_NTF,
ETHTOOL_MSG_TSCONFIG_GET_REPLY,
ETHTOOL_MSG_TSCONFIG_SET_REPLY,
+ ETHTOOL_MSG_PSE_NTF,
__ETHTOOL_MSG_KERNEL_CNT,
ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 3ad2d5d98034..873c285996fe 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1398,6 +1398,7 @@ enum {
IFLA_VXLAN_LOCALBYPASS,
IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
IFLA_VXLAN_RESERVED_BITS,
+ IFLA_VXLAN_MC_ROUTE,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index ff8d21f9e95b..5a47339ef7d7 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -152,7 +152,6 @@ struct in6_flowlabel_req {
/*
* IPV6 socket options
*/
-#if __UAPI_DEF_IPV6_OPTIONS
#define IPV6_ADDRFORM 1
#define IPV6_2292PKTINFO 2
#define IPV6_2292HOPOPTS 3
@@ -169,8 +168,10 @@ struct in6_flowlabel_req {
#define IPV6_MULTICAST_IF 17
#define IPV6_MULTICAST_HOPS 18
#define IPV6_MULTICAST_LOOP 19
+#if __UAPI_DEF_IPV6_OPTIONS
#define IPV6_ADD_MEMBERSHIP 20
#define IPV6_DROP_MEMBERSHIP 21
+#endif
#define IPV6_ROUTER_ALERT 22
#define IPV6_MTU_DISCOVER 23
#define IPV6_MTU 24
@@ -203,7 +204,6 @@ struct in6_flowlabel_req {
#define IPV6_IPSEC_POLICY 34
#define IPV6_XFRM_POLICY 35
#define IPV6_HDRINCL 36
-#endif
/*
* Multicast: