summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/af_vsock.h9
-rw-r--r--include/net/bluetooth/hci.h16
-rw-r--r--include/net/bonding.h13
-rw-r--r--include/net/cfg80211.h328
-rw-r--r--include/net/codel_impl.h45
-rw-r--r--include/net/devlink.h20
-rw-r--r--include/net/dropreason-core.h66
-rw-r--r--include/net/dropreason-qdisc.h114
-rw-r--r--include/net/dropreason.h6
-rw-r--r--include/net/dsa.h16
-rw-r--r--include/net/hotdata.h5
-rw-r--r--include/net/inet6_connection_sock.h4
-rw-r--r--include/net/inet6_hashtables.h2
-rw-r--r--include/net/inet_common.h3
-rw-r--r--include/net/inet_connection_sock.h3
-rw-r--r--include/net/inet_hashtables.h1
-rw-r--r--include/net/ip.h10
-rw-r--r--include/net/ip6_checksum.h2
-rw-r--r--include/net/ip6_fib.h35
-rw-r--r--include/net/ip6_route.h41
-rw-r--r--include/net/ip6_tunnel.h2
-rw-r--r--include/net/ip_vs.h396
-rw-r--r--include/net/ipv6.h20
-rw-r--r--include/net/ipv6_stubs.h102
-rw-r--r--include/net/mac80211.h176
-rw-r--r--include/net/mana/gdma.h20
-rw-r--r--include/net/mana/mana.h18
-rw-r--r--include/net/mctp.h1
-rw-r--r--include/net/ndisc.h31
-rw-r--r--include/net/netdev_queues.h23
-rw-r--r--include/net/netdev_rx_queue.h27
-rw-r--r--include/net/netfilter/ipv4/nf_conntrack_ipv4.h3
-rw-r--r--include/net/netfilter/nf_conntrack_l4proto.h7
-rw-r--r--include/net/netfilter/nf_tables.h36
-rw-r--r--include/net/netfilter/nf_tables_ipv4.h17
-rw-r--r--include/net/netfilter/nf_tables_ipv6.h16
-rw-r--r--include/net/netfilter/nf_tables_offload.h10
-rw-r--r--include/net/netfilter/nft_fib.h2
-rw-r--r--include/net/netfilter/nft_meta.h3
-rw-r--r--include/net/netlink.h19
-rw-r--r--include/net/netmem.h38
-rw-r--r--include/net/netns/ipv4.h9
-rw-r--r--include/net/netns/ipv6.h2
-rw-r--r--include/net/netns/mib.h5
-rw-r--r--include/net/netns/vsock.h2
-rw-r--r--include/net/page_pool/memory_provider.h8
-rw-r--r--include/net/page_pool/types.h11
-rw-r--r--include/net/ping.h5
-rw-r--r--include/net/rps-types.h24
-rw-r--r--include/net/rps.h49
-rw-r--r--include/net/sch_generic.h61
-rw-r--r--include/net/sock.h34
-rw-r--r--include/net/switchdev.h1
-rw-r--r--include/net/tc_wrapper.h47
-rw-r--r--include/net/tcp.h120
-rw-r--r--include/net/transp_v6.h3
-rw-r--r--include/net/tso.h100
-rw-r--r--include/net/udp.h89
-rw-r--r--include/net/udp_tunnel.h3
-rw-r--r--include/net/udplite.h88
-rw-r--r--include/net/xsk_buff_pool.h7
61 files changed, 1701 insertions, 673 deletions
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 533d8e75f7bb..4e40063adab4 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -179,6 +179,15 @@ struct vsock_transport {
/* Addressing. */
u32 (*get_local_cid)(void);
+ /* Check if this transport serves a specific remote CID.
+ * For H2G transports: return true if the CID belongs to a registered
+ * guest. If not implemented, all CIDs > VMADDR_CID_HOST go to H2G.
+ * For G2H transports: return true if the transport can reach arbitrary
+ * CIDs via the hypervisor (i.e. supports the fallback overlay). VMCI
+ * does not implement this as it only serves CIDs 0 and 2.
+ */
+ bool (*has_remote_cid)(struct vsock_sock *vsk, u32 remote_cid);
+
/* Read a single skb */
int (*read_skb)(struct vsock_sock *, skb_read_actor_t);
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 89ad9470fa71..572b1c620c5d 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1468,8 +1468,12 @@ struct hci_rp_read_data_block_size {
} __packed;
#define HCI_OP_READ_LOCAL_CODECS 0x100b
-struct hci_std_codecs {
+struct hci_std_codecs_hdr {
__u8 num;
+} __packed;
+
+struct hci_std_codecs {
+ struct hci_std_codecs_hdr;
__u8 codec[];
} __packed;
@@ -1487,7 +1491,7 @@ struct hci_vnd_codecs {
struct hci_rp_read_local_supported_codecs {
__u8 status;
- struct hci_std_codecs std_codecs;
+ struct hci_std_codecs_hdr std_codecs;
struct hci_vnd_codecs vnd_codecs;
} __packed;
@@ -1504,8 +1508,12 @@ struct hci_std_codec_v2 {
__u8 transport;
} __packed;
-struct hci_std_codecs_v2 {
+struct hci_std_codecs_v2_hdr {
__u8 num;
+} __packed;
+
+struct hci_std_codecs_v2 {
+ struct hci_std_codecs_v2_hdr;
struct hci_std_codec_v2 codec[];
} __packed;
@@ -1522,7 +1530,7 @@ struct hci_vnd_codecs_v2 {
struct hci_rp_read_local_supported_codecs_v2 {
__u8 status;
- struct hci_std_codecs_v2 std_codecs;
+ struct hci_std_codecs_v2_hdr std_codecs;
struct hci_vnd_codecs_v2 vendor_codecs;
} __packed;
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 395c6e281c5f..edd1942dcd73 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -69,9 +69,6 @@
#define bond_first_slave_rcu(bond) \
netdev_lower_get_first_private_rcu(bond->dev)
-#define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond))
-#define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond))
-
/**
* bond_for_each_slave - iterate over all slaves
* @bond: the bond holding this list
@@ -91,22 +88,22 @@
NETIF_F_GSO_ESP)
#ifdef CONFIG_NET_POLL_CONTROLLER
-extern atomic_t netpoll_block_tx;
+DECLARE_STATIC_KEY_FALSE(netpoll_block_tx);
static inline void block_netpoll_tx(void)
{
- atomic_inc(&netpoll_block_tx);
+ static_branch_inc(&netpoll_block_tx);
}
static inline void unblock_netpoll_tx(void)
{
- atomic_dec(&netpoll_block_tx);
+ static_branch_dec(&netpoll_block_tx);
}
static inline int is_netpoll_tx_blocked(struct net_device *dev)
{
- if (unlikely(netpoll_tx_running(dev)))
- return atomic_read(&netpoll_block_tx);
+ if (static_branch_unlikely(&netpoll_block_tx))
+ return netpoll_tx_running(dev);
return 0;
}
#else
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index fc01de19c798..9d3639ff9c28 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -190,6 +190,8 @@ enum ieee80211_channel_flags {
* on this channel.
* @dfs_state_entered: timestamp (jiffies) when the dfs state was entered.
* @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels.
+ * @cac_start_time: timestamp (CLOCK_BOOTTIME, nanoseconds) when CAC was
+ * started on this channel. Zero when CAC is not in progress.
* @psd: power spectral density (in dBm)
*/
struct ieee80211_channel {
@@ -207,6 +209,7 @@ struct ieee80211_channel {
enum nl80211_dfs_state dfs_state;
unsigned long dfs_state_entered;
unsigned int dfs_cac_ms;
+ u64 cac_start_time;
s8 psd;
};
@@ -1828,6 +1831,7 @@ struct cfg80211_ttlm_params {
* @eml_cap: EML capabilities of this station
* @link_sta_params: link related params.
* @epp_peer: EPP peer indication
+ * @nmi_mac: MAC address of the NMI station of the NAN peer
*/
struct station_parameters {
struct net_device *vlan;
@@ -1855,6 +1859,7 @@ struct station_parameters {
u16 eml_cap;
struct link_station_parameters link_sta_params;
bool epp_peer;
+ const u8 *nmi_mac;
};
/**
@@ -1894,6 +1899,8 @@ struct station_del_parameters {
* entry that is operating, has been marked authorized by userspace)
* @CFG80211_STA_MESH_PEER_KERNEL: peer on mesh interface (kernel managed)
* @CFG80211_STA_MESH_PEER_USER: peer on mesh interface (user managed)
+ * @CFG80211_STA_NAN_MGMT: NAN management interface station
+ * @CFG80211_STA_NAN_DATA: NAN data path station
*/
enum cfg80211_station_type {
CFG80211_STA_AP_CLIENT,
@@ -1905,6 +1912,8 @@ enum cfg80211_station_type {
CFG80211_STA_TDLS_PEER_ACTIVE,
CFG80211_STA_MESH_PEER_KERNEL,
CFG80211_STA_MESH_PEER_USER,
+ CFG80211_STA_NAN_MGMT,
+ CFG80211_STA_NAN_DATA,
};
/**
@@ -3978,6 +3987,77 @@ struct cfg80211_qos_map {
};
/**
+ * DOC: Neighbor Awareness Networking (NAN)
+ *
+ * NAN uses two interface types:
+ *
+ * - %NL80211_IFTYPE_NAN: a non-netdev interface. This has two roles: (1) holds
+ * the configuration of all NAN activities (DE parameters, synchronisation
+ * parameters, local schedule, etc.), and (2) uses as the NAN Management
+ * Interface (NMI), which is used for NAN management communication.
+ *
+ * - %NL80211_IFTYPE_NAN_DATA: The NAN Data Interface (NDI), used for data
+ * communication with NAN peers.
+ *
+ * An NDI interface can only be started (IFF_UP) if the NMI one is running and
+ * NAN is started. Before NAN is stopped, all associated NDI interfaces
+ * must be stopped first.
+ *
+ * The local schedule specifies which channels the device is available on and
+ * when. Must be cancelled before NAN is stopped.
+ *
+ * NAN Stations
+ * ~~~~~~~~~~~~
+ *
+ * There are two types of stations corresponding to the two interface types:
+ *
+ * - NMI station: Represents the NAN peer. Peer-specific data such as the peer's
+ * schedule and the HT, VHT and HE capabilities belongs to the NMI station.
+ * Also used for Tx/Rx of NAN management frames to/from the peer.
+ * Added on the %NL80211_IFTYPE_NAN interface.
+ *
+ * - NDI station: Used for Tx/Rx of data frames (and non-NAN management frames)
+ * for a specific NDP established with the NAN peer. Added on the
+ * %NL80211_IFTYPE_NAN_DATA interface.
+ *
+ * A peer may reuse its NMI address as the NDI address. In that case, two
+ * separate stations should be added even though they share the same MAC
+ * address.
+ *
+ * HT, VHT and HE capabilities should not changes after it was set. It is the
+ * driver's responsibility to check that.
+ *
+ * An NDI station can only be added if the corresponding NMI station has already
+ * been configured with HT (and possibly VHT and HE) capabilities. It is the
+ * driver's responsibility to check that.
+ *
+ * All NDI stations must be removed before corresponding NMI station is removed.
+ * Therefore, removing a NMI station implies that the associated NDI station(s)
+ * (if any) will be removed first.
+ *
+ * NAN Dependencies
+ * ~~~~~~~~~~~~~~~~
+ *
+ * The following diagram shows the dependencies between NAN components.
+ * An arrow from A to B means A must be started/added before B, and B must be
+ * stopped/removed before A:
+ *
+ * +-------------+
+ * | NMI iface |---(local schedule)
+ * +------+------+
+ * / \
+ * v v
+ * +-----------+ +-------------+
+ * | NDI iface | | NMI sta |---(peer schedule)
+ * +-----+-----+ +------+------+
+ * \ /
+ * v v
+ * +----------+
+ * | NDI sta |
+ * +----------+
+ */
+
+/**
* struct cfg80211_nan_band_config - NAN band specific configuration
*
* @chan: Pointer to the IEEE 802.11 channel structure. The channel to be used
@@ -4020,7 +4100,6 @@ struct cfg80211_nan_band_config {
* (i.e. BIT(NL80211_BAND_2GHZ)).
* @cluster_id: cluster ID used for NAN synchronization. This is a MAC address
* that can take a value from 50-6F-9A-01-00-00 to 50-6F-9A-01-FF-FF.
- * If NULL, the device will pick a random Cluster ID.
* @scan_period: period (in seconds) between NAN scans.
* @scan_dwell_time: dwell time (in milliseconds) for NAN scans.
* @discovery_beacon_interval: interval (in TUs) for discovery beacons.
@@ -4036,7 +4115,7 @@ struct cfg80211_nan_band_config {
struct cfg80211_nan_conf {
u8 master_pref;
u8 bands;
- const u8 *cluster_id;
+ u8 cluster_id[ETH_ALEN] __aligned(2);
u16 scan_period;
u16 scan_dwell_time;
u8 discovery_beacon_interval;
@@ -4048,6 +4127,102 @@ struct cfg80211_nan_conf {
u16 vendor_elems_len;
};
+#define CFG80211_NAN_SCHED_NUM_TIME_SLOTS 32
+
+/**
+ * struct cfg80211_nan_channel - NAN channel configuration
+ *
+ * This struct defines a NAN channel configuration
+ *
+ * @chandef: the channel definition
+ * @channel_entry: pointer to the Channel Entry blob as defined in Wi-Fi Aware
+ * (TM) 4.0 specification Table 100 (Channel Entry format for the NAN
+ * Availability attribute).
+ * @rx_nss: number of spatial streams supported on this channel
+ */
+struct cfg80211_nan_channel {
+ struct cfg80211_chan_def chandef;
+ const u8 *channel_entry;
+ u8 rx_nss;
+};
+
+/**
+ * struct cfg80211_nan_local_sched - NAN local schedule
+ *
+ * This struct defines NAN local schedule parameters
+ *
+ * @schedule: a mapping of time slots to chandef indexes in %nan_channels.
+ * An unscheduled slot will be set to %NL80211_NAN_SCHED_NOT_AVAIL_SLOT.
+ * @n_channels: number of channel definitions in %nan_channels.
+ * @nan_avail_blob: pointer to NAN Availability attribute blob.
+ * See %NL80211_ATTR_NAN_AVAIL_BLOB for more details.
+ * @nan_avail_blob_len: length of the @nan_avail_blob in bytes.
+ * @deferred: if true, the command containing this schedule configuration is a
+ * request from the device to perform an announced schedule update. This
+ * means that it needs to send the updated NAN availability to the peers,
+ * and do the actual switch on the right time (i.e. at the end of the slot
+ * after the slot in which the updated NAN Availability was sent).
+ * See %NL80211_ATTR_NAN_SCHED_DEFERRED for more details.
+ * If false, the schedule is applied immediately.
+ * @nan_channels: array of NAN channel definitions that can be scheduled.
+ */
+struct cfg80211_nan_local_sched {
+ u8 schedule[CFG80211_NAN_SCHED_NUM_TIME_SLOTS];
+ u8 n_channels;
+ const u8 *nan_avail_blob;
+ u16 nan_avail_blob_len;
+ bool deferred;
+ struct cfg80211_nan_channel nan_channels[] __counted_by(n_channels);
+};
+
+/**
+ * struct cfg80211_nan_peer_map - NAN peer schedule map
+ *
+ * This struct defines a single NAN peer schedule map
+ *
+ * @map_id: map ID of this schedule map
+ * @schedule: a mapping of time slots to chandef indexes in the schedule's
+ * @nan_channels. Each slot lasts 16TUs. An unscheduled slot will be
+ * set to %NL80211_NAN_SCHED_NOT_AVAIL_SLOT.
+ */
+struct cfg80211_nan_peer_map {
+ u8 map_id;
+ u8 schedule[CFG80211_NAN_SCHED_NUM_TIME_SLOTS];
+};
+
+#define CFG80211_NAN_MAX_PEER_MAPS 2
+#define CFG80211_NAN_INVALID_MAP_ID 0xff
+
+/**
+ * struct cfg80211_nan_peer_sched - NAN peer schedule
+ *
+ * This struct defines NAN peer schedule parameters for a peer.
+ *
+ * @peer_addr: MAC address of the peer (NMI address)
+ * @seq_id: sequence ID of the peer schedule.
+ * @committed_dw: committed DW as published by the peer.
+ * See %NL80211_ATTR_NAN_COMMITTED_DW
+ * @max_chan_switch: maximum channel switch time in microseconds as published
+ * by the peer. See %NL80211_ATTR_NAN_MAX_CHAN_SWITCH_TIME.
+ * @init_ulw: initial ULWs as published by the peer.
+ * @ulw_size: number of bytes in @init_ulw.
+ * @n_channels: number of channel definitions in @nan_channels.
+ * @nan_channels: array of NAN channel definitions for this schedule.
+ * @maps: array of peer schedule maps. Unused entries have
+ * map_id = %CFG80211_NAN_INVALID_MAP_ID.
+ */
+struct cfg80211_nan_peer_sched {
+ const u8 *peer_addr;
+ u8 seq_id;
+ u16 committed_dw;
+ u16 max_chan_switch;
+ const u8 *init_ulw;
+ u16 ulw_size;
+ u8 n_channels;
+ struct cfg80211_nan_channel *nan_channels;
+ struct cfg80211_nan_peer_map maps[CFG80211_NAN_MAX_PEER_MAPS];
+};
+
/**
* enum cfg80211_nan_conf_changes - indicates changed fields in NAN
* configuration
@@ -4828,6 +5003,19 @@ struct mgmt_frame_regs {
* @nan_change_conf: changes NAN configuration. The changed parameters must
* be specified in @changes (using &enum cfg80211_nan_conf_changes);
* All other parameters must be ignored.
+ * @nan_set_local_sched: configure the local schedule for NAN. The schedule
+ * consists of an array of %cfg80211_nan_channel and the schedule itself,
+ * in which each entry maps each time slot to the channel on which the
+ * radio should operate on. If the chandef of a NAN channel is not
+ * changed, the channel entry must also remain unchanged. It is the
+ * driver's responsibility to verify this.
+ * @nan_set_peer_sched: configure the peer schedule for NAN. The schedule
+ * consists of an array of %cfg80211_nan_channel and the schedule itself,
+ * in which each entry maps each time slot to a channel on which the
+ * radio should operate on. In addition, it contains more peer's schedule
+ * information such as committed DW, etc. When updating an existing peer
+ * schedule, the full new schedule is provided - partial updates are not
+ * supported, and the new schedule completely replaces the previous one.
*
* @set_multicast_to_unicast: configure multicast to unicast conversion for BSS
*
@@ -4922,24 +5110,24 @@ struct cfg80211_ops {
struct wireless_dev *wdev,
unsigned int link_id);
- int (*add_key)(struct wiphy *wiphy, struct net_device *netdev,
+ int (*add_key)(struct wiphy *wiphy, struct wireless_dev *wdev,
int link_id, u8 key_index, bool pairwise,
const u8 *mac_addr, struct key_params *params);
- int (*get_key)(struct wiphy *wiphy, struct net_device *netdev,
+ int (*get_key)(struct wiphy *wiphy, struct wireless_dev *wdev,
int link_id, u8 key_index, bool pairwise,
const u8 *mac_addr, void *cookie,
void (*callback)(void *cookie, struct key_params*));
- int (*del_key)(struct wiphy *wiphy, struct net_device *netdev,
+ int (*del_key)(struct wiphy *wiphy, struct wireless_dev *wdev,
int link_id, u8 key_index, bool pairwise,
const u8 *mac_addr);
int (*set_default_key)(struct wiphy *wiphy,
struct net_device *netdev, int link_id,
u8 key_index, bool unicast, bool multicast);
int (*set_default_mgmt_key)(struct wiphy *wiphy,
- struct net_device *netdev, int link_id,
+ struct wireless_dev *wdev, int link_id,
u8 key_index);
int (*set_default_beacon_key)(struct wiphy *wiphy,
- struct net_device *netdev,
+ struct wireless_dev *wdev,
int link_id,
u8 key_index);
@@ -4951,17 +5139,17 @@ struct cfg80211_ops {
unsigned int link_id);
- int (*add_station)(struct wiphy *wiphy, struct net_device *dev,
+ int (*add_station)(struct wiphy *wiphy, struct wireless_dev *wdev,
const u8 *mac,
struct station_parameters *params);
- int (*del_station)(struct wiphy *wiphy, struct net_device *dev,
+ int (*del_station)(struct wiphy *wiphy, struct wireless_dev *wdev,
struct station_del_parameters *params);
- int (*change_station)(struct wiphy *wiphy, struct net_device *dev,
+ int (*change_station)(struct wiphy *wiphy, struct wireless_dev *wdev,
const u8 *mac,
struct station_parameters *params);
- int (*get_station)(struct wiphy *wiphy, struct net_device *dev,
+ int (*get_station)(struct wiphy *wiphy, struct wireless_dev *wdev,
const u8 *mac, struct station_info *sinfo);
- int (*dump_station)(struct wiphy *wiphy, struct net_device *dev,
+ int (*dump_station)(struct wiphy *wiphy, struct wireless_dev *wdev,
int idx, u8 *mac, struct station_info *sinfo);
int (*add_mpath)(struct wiphy *wiphy, struct net_device *dev,
@@ -5205,7 +5393,12 @@ struct cfg80211_ops {
struct wireless_dev *wdev,
struct cfg80211_nan_conf *conf,
u32 changes);
-
+ int (*nan_set_local_sched)(struct wiphy *wiphy,
+ struct wireless_dev *wdev,
+ struct cfg80211_nan_local_sched *sched);
+ int (*nan_set_peer_sched)(struct wiphy *wiphy,
+ struct wireless_dev *wdev,
+ struct cfg80211_nan_peer_sched *sched);
int (*set_multicast_to_unicast)(struct wiphy *wiphy,
struct net_device *dev,
const bool enabled);
@@ -5834,6 +6027,12 @@ enum wiphy_nan_flags {
* @max_channel_switch_time: maximum channel switch time in milliseconds.
* @dev_capabilities: NAN device capabilities as defined in Wi-Fi Aware (TM)
* specification Table 79 (Capabilities field).
+ * @phy: Band-agnostic capabilities for NAN data interfaces. Since NAN
+ * operates on multiple channels simultaneously, these capabilities apply
+ * across all bands. Valid only if NL80211_IFTYPE_NAN_DATA is supported.
+ * @phy.ht: HT capabilities (mandatory for NAN data)
+ * @phy.vht: VHT capabilities (optional)
+ * @phy.he: HE capabilities (optional)
*/
struct wiphy_nan_capa {
u32 flags;
@@ -5841,6 +6040,11 @@ struct wiphy_nan_capa {
u8 n_antennas;
u16 max_channel_switch_time;
u8 dev_capabilities;
+ struct {
+ struct ieee80211_sta_ht_cap ht;
+ struct ieee80211_sta_vht_cap vht;
+ struct ieee80211_sta_he_cap he;
+ } phy;
};
#define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff
@@ -6734,8 +6938,8 @@ enum ieee80211_ap_reg_power {
* the P2P Device.
* @ps: powersave mode is enabled
* @ps_timeout: dynamic powersave timeout
- * @ap_unexpected_nlportid: (private) netlink port ID of application
- * registered for unexpected class 3 frames (AP mode)
+ * @unexpected_nlportid: (private) netlink port ID of application
+ * registered for unexpected frames (AP mode or NAN_DATA mode)
* @conn: (private) cfg80211 software SME connection state machine data
* @connect_keys: (private) keys to set after connection is established
* @conn_bss_type: connecting/connected BSS type
@@ -6797,7 +7001,7 @@ struct wireless_dev {
bool ps;
int ps_timeout;
- u32 ap_unexpected_nlportid;
+ u32 unexpected_nlportid;
u32 owner_nlportid;
bool nl_owner_dead;
@@ -6857,6 +7061,9 @@ struct wireless_dev {
} ocb;
struct {
u8 cluster_id[ETH_ALEN] __aligned(2);
+ u8 n_channels;
+ struct cfg80211_chan_def *chandefs;
+ bool sched_update_pending;
} nan;
} u;
@@ -8962,35 +9169,35 @@ static inline void cfg80211_sinfo_release_content(struct station_info *sinfo)
/**
* cfg80211_new_sta - notify userspace about station
*
- * @dev: the netdev
+ * @wdev: the wireless device
* @mac_addr: the station's address
* @sinfo: the station information
* @gfp: allocation flags
*/
-void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
+void cfg80211_new_sta(struct wireless_dev *wdev, const u8 *mac_addr,
struct station_info *sinfo, gfp_t gfp);
/**
* cfg80211_del_sta_sinfo - notify userspace about deletion of a station
- * @dev: the netdev
+ * @wdev: the wireless device
* @mac_addr: the station's address. For MLD station, MLD address is used.
* @sinfo: the station information/statistics
* @gfp: allocation flags
*/
-void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr,
+void cfg80211_del_sta_sinfo(struct wireless_dev *wdev, const u8 *mac_addr,
struct station_info *sinfo, gfp_t gfp);
/**
* cfg80211_del_sta - notify userspace about deletion of a station
*
- * @dev: the netdev
+ * @wdev: the wireless device
* @mac_addr: the station's address. For MLD station, MLD address is used.
* @gfp: allocation flags
*/
-static inline void cfg80211_del_sta(struct net_device *dev,
+static inline void cfg80211_del_sta(struct wireless_dev *wdev,
const u8 *mac_addr, gfp_t gfp)
{
- cfg80211_del_sta_sinfo(dev, mac_addr, NULL, gfp);
+ cfg80211_del_sta_sinfo(wdev, mac_addr, NULL, gfp);
}
/**
@@ -9365,9 +9572,10 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
* @addr: the transmitter address
* @gfp: context flags
*
- * This function is used in AP mode (only!) to inform userspace that
- * a spurious class 3 frame was received, to be able to deauth the
- * sender.
+ * This function is used in AP mode to inform userspace that a spurious
+ * class 3 frame was received, to be able to deauth the sender.
+ * It is also used in NAN_DATA mode to report frames from unknown peers
+ * (A2 not assigned to any active NDP), per Wi-Fi Aware (TM) 4.0 specification 6.2.5.
* Return: %true if the frame was passed to userspace (or this failed
* for a reason other than not having a subscription.)
*/
@@ -10014,6 +10222,18 @@ void cfg80211_nan_func_terminated(struct wireless_dev *wdev,
enum nl80211_nan_func_term_reason reason,
u64 cookie, gfp_t gfp);
+/**
+ * cfg80211_nan_sched_update_done - notify deferred schedule update completion
+ * @wdev: the wireless device reporting the event
+ * @success: whether or not the schedule update was successful
+ * @gfp: allocation flags
+ *
+ * This function notifies user space that a deferred local NAN schedule update
+ * (requested with %NL80211_ATTR_NAN_SCHED_DEFERRED) has been completed.
+ */
+void cfg80211_nan_sched_update_done(struct wireless_dev *wdev, bool success,
+ gfp_t gfp);
+
/* ethtool helper */
void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
@@ -10354,6 +10574,39 @@ void cfg80211_nan_cluster_joined(struct wireless_dev *wdev,
const u8 *cluster_id, bool new_cluster,
gfp_t gfp);
+/**
+ * cfg80211_nan_ulw_update - Notify user space about ULW update
+ * @wdev: Pointer to the wireless device structure
+ * @ulw: Pointer to the ULW blob data
+ * @ulw_len: Length of the ULW blob in bytes
+ * @gfp: Memory allocation flags
+ *
+ * This function is used by drivers to notify user space when the device's
+ * ULW (Unaligned Schedule) blob has been updated. User space can use this
+ * blob to attach to frames sent to peers.
+ */
+void cfg80211_nan_ulw_update(struct wireless_dev *wdev,
+ const u8 *ulw, size_t ulw_len, gfp_t gfp);
+
+/**
+ * cfg80211_nan_channel_evac - Notify user space about NAN channel evacuation
+ * @wdev: Pointer to the wireless device structure
+ * @chandef: Pointer to the channel definition of the NAN channel that was
+ * evacuated
+ * @gfp: Memory allocation flags
+ *
+ * This function is used by drivers to notify user space when a NAN
+ * channel has been evacuated (i.e. ULWed) due to channel resource conflicts
+ * with other interfaces.
+ * This can happen when another interface sharing the channel resource with NAN
+ * needs to move to a different channel (e.g. due to channel switch or link
+ * switch). User space may reconfigure the local schedule to exclude the
+ * evacuated channel.
+ */
+void cfg80211_nan_channel_evac(struct wireless_dev *wdev,
+ const struct cfg80211_chan_def *chandef,
+ gfp_t gfp);
+
#ifdef CONFIG_CFG80211_DEBUGFS
/**
* wiphy_locked_debugfs_read - do a locked read in debugfs
@@ -10472,4 +10725,27 @@ cfg80211_s1g_get_primary_sibling(struct wiphy *wiphy,
return ieee80211_get_channel_khz(wiphy, sibling_1mhz_khz);
}
+
+/**
+ * cfg80211_incumbent_signal_notify - Notify userspace of incumbent signal detection
+ * @wiphy: the wiphy to use
+ * @chandef: channel definition in which the interference was detected
+ * @signal_interference_bitmap: bitmap indicating interference across 20 MHz segments
+ * @gfp: allocation context for message creation and multicast; pass GFP_ATOMIC
+ * if called from atomic context (e.g. firmware event handler), otherwise
+ * GFP_KERNEL
+ *
+ * Use this function to notify userspace when an incumbent signal is detected on
+ * the operating channel in the 6 GHz band. The notification includes the
+ * current channel definition and a bitmap representing interference across
+ * the operating bandwidth. Each bit in the bitmap corresponds to a 20 MHz
+ * segment, with the lowest bit representing the lowest frequency segment.
+ * Punctured sub-channels are included in the bitmap structure but are always
+ * set to zero since interference detection is not performed on them.
+ */
+void cfg80211_incumbent_signal_notify(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef,
+ u32 signal_interference_bitmap,
+ gfp_t gfp);
+
#endif /* __NET_CFG80211_H */
diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h
index b2c359c6dd1b..2c1f0ec309e9 100644
--- a/include/net/codel_impl.h
+++ b/include/net/codel_impl.h
@@ -120,10 +120,10 @@ static bool codel_should_drop(const struct sk_buff *skb,
}
skb_len = skb_len_func(skb);
- vars->ldelay = now - skb_time_func(skb);
+ WRITE_ONCE(vars->ldelay, now - skb_time_func(skb));
if (unlikely(skb_len > stats->maxpacket))
- stats->maxpacket = skb_len;
+ WRITE_ONCE(stats->maxpacket, skb_len);
if (codel_time_before(vars->ldelay, params->target) ||
*backlog <= params->mtu) {
@@ -159,7 +159,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
if (!skb) {
vars->first_above_time = 0;
- vars->dropping = false;
+ WRITE_ONCE(vars->dropping, false);
return skb;
}
now = codel_get_time();
@@ -168,7 +168,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
if (vars->dropping) {
if (!drop) {
/* sojourn time below target - leave dropping state */
- vars->dropping = false;
+ WRITE_ONCE(vars->dropping, false);
} else if (codel_time_after_eq(now, vars->drop_next)) {
/* It's time for the next drop. Drop the current
* packet and dequeue the next. The dequeue might
@@ -180,16 +180,18 @@ static struct sk_buff *codel_dequeue(void *ctx,
*/
while (vars->dropping &&
codel_time_after_eq(now, vars->drop_next)) {
- vars->count++; /* dont care of possible wrap
- * since there is no more divide
- */
+ /* dont care of possible wrap
+ * since there is no more divide.
+ */
+ WRITE_ONCE(vars->count, vars->count + 1);
codel_Newton_step(vars);
if (params->ecn && INET_ECN_set_ce(skb)) {
- stats->ecn_mark++;
- vars->drop_next =
+ WRITE_ONCE(stats->ecn_mark,
+ stats->ecn_mark + 1);
+ WRITE_ONCE(vars->drop_next,
codel_control_law(vars->drop_next,
params->interval,
- vars->rec_inv_sqrt);
+ vars->rec_inv_sqrt));
goto end;
}
stats->drop_len += skb_len_func(skb);
@@ -202,13 +204,13 @@ static struct sk_buff *codel_dequeue(void *ctx,
skb_time_func,
backlog, now)) {
/* leave dropping state */
- vars->dropping = false;
+ WRITE_ONCE(vars->dropping, false);
} else {
/* and schedule the next drop */
- vars->drop_next =
+ WRITE_ONCE(vars->drop_next,
codel_control_law(vars->drop_next,
params->interval,
- vars->rec_inv_sqrt);
+ vars->rec_inv_sqrt));
}
}
}
@@ -216,7 +218,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
u32 delta;
if (params->ecn && INET_ECN_set_ce(skb)) {
- stats->ecn_mark++;
+ WRITE_ONCE(stats->ecn_mark, stats->ecn_mark + 1);
} else {
stats->drop_len += skb_len_func(skb);
drop_func(skb, ctx);
@@ -227,7 +229,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
stats, skb_len_func,
skb_time_func, backlog, now);
}
- vars->dropping = true;
+ WRITE_ONCE(vars->dropping, true);
/* if min went above target close to when we last went below it
* assume that the drop rate that controlled the queue on the
* last cycle is a good starting point to control it now.
@@ -236,19 +238,20 @@ static struct sk_buff *codel_dequeue(void *ctx,
if (delta > 1 &&
codel_time_before(now - vars->drop_next,
16 * params->interval)) {
- vars->count = delta;
+ WRITE_ONCE(vars->count, delta);
/* we dont care if rec_inv_sqrt approximation
* is not very precise :
* Next Newton steps will correct it quadratically.
*/
codel_Newton_step(vars);
} else {
- vars->count = 1;
+ WRITE_ONCE(vars->count, 1);
vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
}
- vars->lastcount = vars->count;
- vars->drop_next = codel_control_law(now, params->interval,
- vars->rec_inv_sqrt);
+ WRITE_ONCE(vars->lastcount, vars->count);
+ WRITE_ONCE(vars->drop_next,
+ codel_control_law(now, params->interval,
+ vars->rec_inv_sqrt));
}
end:
if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) {
@@ -262,7 +265,7 @@ end:
params->ce_threshold_selector));
}
if (set_ce && INET_ECN_set_ce(skb))
- stats->ce_mark++;
+ WRITE_ONCE(stats->ce_mark, stats->ce_mark + 1);
}
return skb;
}
diff --git a/include/net/devlink.h b/include/net/devlink.h
index cb839e0435a1..bcd31de1f890 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -129,6 +129,7 @@ struct devlink_rate {
struct devlink_port {
struct list_head list;
struct list_head region_list;
+ struct list_head resource_list;
struct devlink *devlink;
const struct devlink_port_ops *ops;
unsigned int index;
@@ -1611,6 +1612,9 @@ struct devlink_ops {
void *devlink_priv(struct devlink *devlink);
struct devlink *priv_to_devlink(void *priv);
struct device *devlink_to_dev(const struct devlink *devlink);
+const char *devlink_bus_name(const struct devlink *devlink);
+const char *devlink_dev_name(const struct devlink *devlink);
+const char *devlink_dev_driver_name(const struct devlink *devlink);
/* Devlink instance explicit locking */
void devl_lock(struct devlink *devlink);
@@ -1644,6 +1648,13 @@ void devlink_register(struct devlink *devlink);
void devlink_unregister(struct devlink *devlink);
void devlink_free(struct devlink *devlink);
+struct devlink *devlink_shd_get(const char *id,
+ const struct devlink_ops *ops,
+ size_t priv_size,
+ const struct device_driver *driver);
+void devlink_shd_put(struct devlink *devlink);
+void *devlink_shd_get_priv(struct devlink *devlink);
+
/**
* struct devlink_port_ops - Port operations
* @port_split: Callback used to split the port into multiple ones.
@@ -1875,12 +1886,19 @@ int devl_resource_register(struct devlink *devlink,
u64 resource_size,
u64 resource_id,
u64 parent_resource_id,
- const struct devlink_resource_size_params *size_params);
+ const struct devlink_resource_size_params *params);
void devl_resources_unregister(struct devlink *devlink);
void devlink_resources_unregister(struct devlink *devlink);
int devl_resource_size_get(struct devlink *devlink,
u64 resource_id,
u64 *p_resource_size);
+int
+devl_port_resource_register(struct devlink_port *devlink_port,
+ const char *resource_name,
+ u64 resource_size, u64 resource_id,
+ u64 parent_resource_id,
+ const struct devlink_resource_size_params *params);
+void devl_port_resources_unregister(struct devlink_port *devlink_port);
int devl_dpipe_table_resource_set(struct devlink *devlink,
const char *table_name, u64 resource_id,
u64 resource_units);
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index a7b7abd66e21..e0ca3904ff8e 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -68,18 +68,15 @@
FN(SECURITY_HOOK) \
FN(QDISC_DROP) \
FN(QDISC_BURST_DROP) \
- FN(QDISC_OVERLIMIT) \
- FN(QDISC_CONGESTED) \
- FN(CAKE_FLOOD) \
- FN(FQ_BAND_LIMIT) \
- FN(FQ_HORIZON_LIMIT) \
- FN(FQ_FLOW_LIMIT) \
FN(CPU_BACKLOG) \
+ FN(MACVLAN_BROADCAST_BACKLOG) \
+ FN(IPVLAN_MULTICAST_BACKLOG) \
FN(XDP) \
FN(TC_INGRESS) \
FN(UNHANDLED_PROTO) \
FN(SKB_CSUM) \
FN(SKB_GSO_SEG) \
+ FN(SKB_BAD_GSO) \
FN(SKB_UCOPY_FAULT) \
FN(DEV_HDR) \
FN(DEV_READY) \
@@ -127,9 +124,9 @@
FN(CANFD_RX_INVALID_FRAME) \
FN(CANXL_RX_INVALID_FRAME) \
FN(PFMEMALLOC) \
- FN(DUALPI2_STEP_DROP) \
FN(PSP_INPUT) \
FN(PSP_OUTPUT) \
+ FN(RECURSION_LIMIT) \
FNe(MAX)
/**
@@ -371,8 +368,10 @@ enum skb_drop_reason {
/** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */
SKB_DROP_REASON_SECURITY_HOOK,
/**
- * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting (
- * failed to enqueue to current qdisc)
+ * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc during enqueue or
+ * dequeue. More specific drop reasons are available via the
+ * qdisc:qdisc_drop tracepoint, which also provides qdisc handle
+ * and name for identifying the source.
*/
SKB_DROP_REASON_QDISC_DROP,
/**
@@ -381,41 +380,21 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_QDISC_BURST_DROP,
/**
- * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc
- * instance exceeds its total buffer size limit.
- */
- SKB_DROP_REASON_QDISC_OVERLIMIT,
- /**
- * @SKB_DROP_REASON_QDISC_CONGESTED: dropped by a qdisc AQM algorithm
- * due to congestion.
- */
- SKB_DROP_REASON_QDISC_CONGESTED,
- /**
- * @SKB_DROP_REASON_CAKE_FLOOD: dropped by the flood protection part of
- * CAKE qdisc AQM algorithm (BLUE).
- */
- SKB_DROP_REASON_CAKE_FLOOD,
- /**
- * @SKB_DROP_REASON_FQ_BAND_LIMIT: dropped by fq qdisc when per band
- * limit is reached.
- */
- SKB_DROP_REASON_FQ_BAND_LIMIT,
- /**
- * @SKB_DROP_REASON_FQ_HORIZON_LIMIT: dropped by fq qdisc when packet
- * timestamp is too far in the future.
- */
- SKB_DROP_REASON_FQ_HORIZON_LIMIT,
- /**
- * @SKB_DROP_REASON_FQ_FLOW_LIMIT: dropped by fq qdisc when a flow
- * exceeds its limits.
- */
- SKB_DROP_REASON_FQ_FLOW_LIMIT,
- /**
* @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU
* backlog queue. This can be caused by backlog queue full (see
* netdev_max_backlog in net.rst) or RPS flow limit
*/
SKB_DROP_REASON_CPU_BACKLOG,
+ /**
+ * @SKB_DROP_REASON_MACVLAN_BROADCAST_BACKLOG: failed to enqueue the skb
+ * to macvlan broadcast queue.
+ */
+ SKB_DROP_REASON_MACVLAN_BROADCAST_BACKLOG,
+ /**
+ * @SKB_DROP_REASON_IPVLAN_MULTICAST_BACKLOG: failed to enqueue the skb
+ * to ipvlan multicast queue.
+ */
+ SKB_DROP_REASON_IPVLAN_MULTICAST_BACKLOG,
/** @SKB_DROP_REASON_XDP: dropped by XDP in input path */
SKB_DROP_REASON_XDP,
/** @SKB_DROP_REASON_TC_INGRESS: dropped in TC ingress HOOK */
@@ -426,6 +405,8 @@ enum skb_drop_reason {
SKB_DROP_REASON_SKB_CSUM,
/** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */
SKB_DROP_REASON_SKB_GSO_SEG,
+ /** @SKB_DROP_REASON_SKB_BAD_GSO: malicious gso packet. */
+ SKB_DROP_REASON_SKB_BAD_GSO,
/**
* @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space,
* e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx()
@@ -613,15 +594,12 @@ enum skb_drop_reason {
* reached a path or socket not eligible for use of memory reserves
*/
SKB_DROP_REASON_PFMEMALLOC,
- /**
- * @SKB_DROP_REASON_DUALPI2_STEP_DROP: dropped by the step drop
- * threshold of DualPI2 qdisc.
- */
- SKB_DROP_REASON_DUALPI2_STEP_DROP,
/** @SKB_DROP_REASON_PSP_INPUT: PSP input checks failed */
SKB_DROP_REASON_PSP_INPUT,
/** @SKB_DROP_REASON_PSP_OUTPUT: PSP output checks failed */
SKB_DROP_REASON_PSP_OUTPUT,
+ /** @SKB_DROP_REASON_RECURSION_LIMIT: Dead loop on virtual device. */
+ SKB_DROP_REASON_RECURSION_LIMIT,
/**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
diff --git a/include/net/dropreason-qdisc.h b/include/net/dropreason-qdisc.h
new file mode 100644
index 000000000000..fb151cd31751
--- /dev/null
+++ b/include/net/dropreason-qdisc.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _LINUX_DROPREASON_QDISC_H
+#define _LINUX_DROPREASON_QDISC_H
+#include <net/dropreason.h>
+
+#define DEFINE_QDISC_DROP_REASON(FN, FNe) \
+ FN(UNSPEC) \
+ FN(GENERIC) \
+ FN(OVERLIMIT) \
+ FN(CONGESTED) \
+ FN(MAXFLOWS) \
+ FN(FLOOD_PROTECTION) \
+ FN(BAND_LIMIT) \
+ FN(HORIZON_LIMIT) \
+ FN(FLOW_LIMIT) \
+ FN(L4S_STEP_NON_ECN) \
+ FNe(MAX)
+
+#undef FN
+#undef FNe
+#define FN(reason) QDISC_DROP_##reason,
+#define FNe(reason) QDISC_DROP_##reason
+
+/**
+ * enum qdisc_drop_reason - reason why a qdisc dropped a packet
+ *
+ * Qdisc-specific drop reasons for packet drops that occur within the
+ * traffic control (TC) queueing discipline layer. These reasons provide
+ * detailed diagnostics about why packets were dropped by various qdisc
+ * algorithms, enabling fine-grained monitoring and troubleshooting of
+ * queue behavior.
+ */
+enum qdisc_drop_reason {
+ /**
+ * @QDISC_DROP_UNSPEC: unspecified/invalid qdisc drop reason.
+ * Value 0 serves as analogous to SKB_NOT_DROPPED_YET for enum skb_drop_reason.
+ * Used for catching zero-initialized drop_reason fields.
+ */
+ QDISC_DROP_UNSPEC = 0,
+ /**
+ * @__QDISC_DROP_REASON: subsystem base value for qdisc drop reasons
+ */
+ __QDISC_DROP_REASON = SKB_DROP_REASON_SUBSYS_QDISC <<
+ SKB_DROP_REASON_SUBSYS_SHIFT,
+ /**
+ * @QDISC_DROP_GENERIC: generic/default qdisc drop, used when no
+ * more specific reason applies
+ */
+ QDISC_DROP_GENERIC,
+ /**
+ * @QDISC_DROP_OVERLIMIT: packet dropped because the qdisc queue
+ * length exceeded its configured limit (sch->limit). This typically
+ * indicates the queue is full and cannot accept more packets.
+ */
+ QDISC_DROP_OVERLIMIT,
+ /**
+ * @QDISC_DROP_CONGESTED: packet dropped due to active congestion
+ * control algorithms (e.g., CoDel, PIE, RED) detecting network
+ * congestion. The qdisc proactively dropped the packet to signal
+ * congestion to the sender and prevent bufferbloat.
+ */
+ QDISC_DROP_CONGESTED,
+ /**
+ * @QDISC_DROP_MAXFLOWS: packet dropped because the qdisc's flow
+ * tracking table is full and no free slots are available to allocate
+ * for a new flow. This indicates flow table exhaustion in flow-based
+ * qdiscs that maintain per-flow state (e.g., SFQ).
+ */
+ QDISC_DROP_MAXFLOWS,
+ /**
+ * @QDISC_DROP_FLOOD_PROTECTION: packet dropped by flood protection
+ * mechanism detecting unresponsive flows (potential DoS/flood).
+ * Used by qdiscs implementing probabilistic drop algorithms like
+ * BLUE (e.g., CAKE's Cobalt AQM).
+ */
+ QDISC_DROP_FLOOD_PROTECTION,
+ /**
+ * @QDISC_DROP_BAND_LIMIT: packet dropped because the priority band's
+ * limit was reached. Used by qdiscs with priority bands that have
+ * per-band packet limits (e.g., FQ).
+ */
+ QDISC_DROP_BAND_LIMIT,
+ /**
+ * @QDISC_DROP_HORIZON_LIMIT: packet dropped because its timestamp
+ * is too far in the future (beyond the configured horizon).
+ * Used by qdiscs with time-based scheduling (e.g., FQ).
+ */
+ QDISC_DROP_HORIZON_LIMIT,
+ /**
+ * @QDISC_DROP_FLOW_LIMIT: packet dropped because an individual flow
+ * exceeded its per-flow packet/depth limit. Used by FQ and SFQ qdiscs
+ * to enforce per-flow fairness and prevent a single flow from
+ * monopolizing queue resources.
+ */
+ QDISC_DROP_FLOW_LIMIT,
+ /**
+ * @QDISC_DROP_L4S_STEP_NON_ECN: DualPI2 qdisc dropped a non-ECN-capable
+ * packet because the L4S queue delay exceeded the step threshold.
+ * Since the packet cannot be ECN-marked, it must be dropped to signal
+ * congestion. See RFC 9332 for the DualQ Coupled AQM step mechanism.
+ */
+ QDISC_DROP_L4S_STEP_NON_ECN,
+ /**
+ * @QDISC_DROP_MAX: the maximum of qdisc drop reasons, which
+ * shouldn't be used as a real 'reason' - only for tracing code gen
+ */
+ QDISC_DROP_MAX,
+};
+
+#undef FN
+#undef FNe
+
+#endif
diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index 7d3b1a2a6fec..1df60645fb27 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -23,6 +23,12 @@ enum skb_drop_reason_subsys {
*/
SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
+ /**
+ * @SKB_DROP_REASON_SUBSYS_QDISC: TC qdisc drop reasons,
+ * see include/net/dropreason-qdisc.h
+ */
+ SKB_DROP_REASON_SUBSYS_QDISC,
+
/** @SKB_DROP_REASON_SUBSYS_NUM: number of subsystems defined */
SKB_DROP_REASON_SUBSYS_NUM
};
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6c17446f3dcc..8b6d34e8a6f0 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -831,6 +831,22 @@ dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst,
return false;
}
+#define dsa_switch_for_each_bridge_member(_dp, _ds, _bdev) \
+ dsa_switch_for_each_user_port(_dp, _ds) \
+ if (dsa_port_offloads_bridge_dev(_dp, _bdev))
+
+static inline u32
+dsa_bridge_ports(struct dsa_switch *ds, const struct net_device *bdev)
+{
+ struct dsa_port *dp;
+ u32 mask = 0;
+
+ dsa_switch_for_each_bridge_member(dp, ds, bdev)
+ mask |= BIT(dp->index);
+
+ return mask;
+}
+
static inline bool dsa_port_tree_same(const struct dsa_port *a,
const struct dsa_port *b)
{
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 6632b1aa7584..62534d1f3c70 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -6,6 +6,9 @@
#include <linux/types.h>
#include <linux/netdevice.h>
#include <net/protocol.h>
+#ifdef CONFIG_RPS
+#include <net/rps-types.h>
+#endif
struct skb_defer_node {
struct llist_head defer_list;
@@ -33,7 +36,7 @@ struct net_hotdata {
struct kmem_cache *skbuff_fclone_cache;
struct kmem_cache *skb_small_head_cache;
#ifdef CONFIG_RPS
- struct rps_sock_flow_table __rcu *rps_sock_flow_table;
+ rps_tag_ptr rps_sock_flow_table;
u32 rps_cpu_mask;
#endif
struct skb_defer_node __percpu *skb_defer_nodes;
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index ece8dabd209a..b814e1acc512 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -18,6 +18,9 @@ struct sk_buff;
struct sock;
struct sockaddr;
+struct dst_entry *inet6_csk_route_socket(struct sock *sk,
+ struct flowi6 *fl6);
+
struct dst_entry *inet6_csk_route_req(const struct sock *sk,
struct dst_entry *dst,
struct flowi6 *fl6,
@@ -25,5 +28,4 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
-struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu);
#endif /* _INET6_CONNECTION_SOCK_H */
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index c16de5b7963f..2cc5d416bbb5 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -24,6 +24,8 @@
struct inet_hashinfo;
+void inet6_init_ehash_secret(void);
+
static inline unsigned int __inet6_ehashfn(const u32 lhash,
const u16 lport,
const u32 fhash,
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 5dd2bf24449e..3d747896be30 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -59,8 +59,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int inet_ctl_sock_create(struct sock **sk, unsigned short family,
unsigned short type, unsigned char protocol,
struct net *net);
-int inet_recv_error(struct sock *sk, struct msghdr *msg, int len,
- int *addr_len);
+int inet_recv_error(struct sock *sk, struct msghdr *msg, int len);
struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb);
int inet_gro_complete(struct sk_buff *skb, int nhoff);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 5cb3056d6ddc..433c2df23076 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -34,7 +34,7 @@ struct tcp_congestion_ops;
*/
struct inet_connection_sock_af_ops {
int (*queue_xmit)(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
- void (*send_check)(struct sock *sk, struct sk_buff *skb);
+ u16 net_header_len;
int (*rebuild_header)(struct sock *sk);
void (*sk_rx_dst_set)(struct sock *sk, const struct sk_buff *skb);
int (*conn_request)(struct sock *sk, struct sk_buff *skb);
@@ -45,7 +45,6 @@ struct inet_connection_sock_af_ops {
bool *own_req,
void (*opt_child_init)(struct sock *newsk,
const struct sock *sk));
- u16 net_header_len;
int (*setsockopt)(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int (*getsockopt)(struct sock *sk, int level, int optname,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 6d936e9f2fd3..6e2fe186d0dc 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -300,7 +300,6 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
unsigned long numentries, int scale,
unsigned long low_limit,
unsigned long high_limit);
-int inet_hashinfo2_init_mod(struct inet_hashinfo *h);
bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk);
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk,
diff --git a/include/net/ip.h b/include/net/ip.h
index 7f9abd457e01..7f2fe1a8401b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -692,14 +692,6 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
#endif
-#if IS_MODULE(CONFIG_IPV6)
-#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X)
-#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X)
-#else
-#define EXPORT_IPV6_MOD(X)
-#define EXPORT_IPV6_MOD_GPL(X)
-#endif
-
static inline unsigned int ipv4_addr_hash(__be32 ip)
{
return (__force unsigned int) ip;
@@ -812,7 +804,7 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
int ip_ra_control(struct sock *sk, unsigned char on,
void (*destructor)(struct sock *));
-int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len);
+int ip_recv_error(struct sock *sk, struct msghdr *msg, int len);
void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
u32 info, u8 *payload);
void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h
index c8a96b888277..6677b3cc3972 100644
--- a/include/net/ip6_checksum.h
+++ b/include/net/ip6_checksum.h
@@ -82,6 +82,4 @@ static inline __sum16 udp_v6_check(int len,
void udp6_set_csum(bool nocheck, struct sk_buff *skb,
const struct in6_addr *saddr,
const struct in6_addr *daddr, int len);
-
-int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto);
#endif
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 9f8b6814a96a..9cd27e1b9b69 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -486,11 +486,30 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
rcu_read_unlock();
}
+#if IS_ENABLED(CONFIG_IPV6)
int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
void fib6_nh_release(struct fib6_nh *fib6_nh);
void fib6_nh_release_dsts(struct fib6_nh *fib6_nh);
+#else
+static inline int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
+ return -EAFNOSUPPORT;
+}
+
+static inline void fib6_nh_release(struct fib6_nh *fib6_nh)
+{
+}
+
+static inline void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
+{
+}
+#endif
+
int call_fib6_entry_notifiers(struct net *net,
enum fib_event_type event_type,
@@ -502,8 +521,15 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
unsigned int nsiblings,
struct netlink_ext_ack *extack);
int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt);
+#if IS_ENABLED(CONFIG_IPV6)
void fib6_rt_update(struct net *net, struct fib6_info *rt,
struct nl_info *info);
+#else
+static inline void fib6_rt_update(struct net *net, struct fib6_info *rt,
+ struct nl_info *info)
+{
+}
+#endif
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int flags);
@@ -588,8 +614,13 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack);
void fib6_update_sernum(struct net *net, struct fib6_info *rt);
+#if IS_ENABLED(CONFIG_IPV6)
void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt);
-void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i);
+#else
+static inline void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
+{
+}
+#endif
void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val);
static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
@@ -599,7 +630,7 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
bool offload, bool trap, bool offload_failed);
-#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
+#if IS_ENABLED(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
struct bpf_iter__ipv6_route {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct fib6_info *, rt);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index a55f9bf95fe3..09ffe0f13ce7 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -77,7 +77,14 @@ static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
f6i->fib6_nh->fib_nh_gw_family;
}
+#if IS_ENABLED(CONFIG_IPV6)
void ip6_route_input(struct sk_buff *skb);
+#else
+static inline void ip6_route_input(struct sk_buff *skb)
+{
+}
+#endif
+
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
struct flowi6 *fl6,
@@ -119,7 +126,15 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd,
int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
int ip6_ins_rt(struct net *net, struct fib6_info *f6i);
+#if IS_ENABLED(CONFIG_IPV6)
int ip6_del_rt(struct net *net, struct fib6_info *f6i, bool skip_notify);
+#else
+static inline int ip6_del_rt(struct net *net, struct fib6_info *f6i,
+ bool skip_notify)
+{
+ return -EAFNOSUPPORT;
+}
+#endif
void rt6_flush_exceptions(struct fib6_info *f6i);
void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args,
@@ -252,19 +267,37 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb)
return rt->rt6i_flags & RTF_LOCAL;
}
+static inline bool __ipv6_anycast_destination(const struct rt6key *rt6i_dst,
+ u32 rt6i_flags,
+ const struct in6_addr *daddr)
+{
+ return rt6i_flags & RTF_ANYCAST ||
+ (rt6i_dst->plen < 127 &&
+ !(rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) &&
+ ipv6_addr_equal(&rt6i_dst->addr, daddr));
+}
+
static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
const struct in6_addr *daddr)
{
const struct rt6_info *rt = dst_rt6_info(dst);
- return rt->rt6i_flags & RTF_ANYCAST ||
- (rt->rt6i_dst.plen < 127 &&
- !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) &&
- ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
+ return __ipv6_anycast_destination(&rt->rt6i_dst, rt->rt6i_flags, daddr);
}
+#if IS_ENABLED(CONFIG_IPV6)
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *));
+#else
+static inline int ip6_fragment(struct net *net, struct sock *sk,
+ struct sk_buff *skb,
+ int (*output)(struct net *, struct sock *,
+ struct sk_buff *))
+{
+ kfree_skb(skb);
+ return -EAFNOSUPPORT;
+}
+#endif
/* Variant of dst_mtu() for IPv6 users */
static inline u32 dst6_mtu(const struct dst_entry *dst)
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 359b595f1df9..b99805ee2fd1 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -162,7 +162,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
dev->name);
DEV_STATS_INC(dev, tx_errors);
}
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_RECURSION_LIMIT);
return;
}
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 29a36709e7f3..72d325c81313 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -11,6 +11,7 @@
#include <asm/types.h> /* for __uXX types */
#include <linux/list.h> /* for struct list_head */
+#include <linux/rculist_bl.h> /* for struct hlist_bl_head */
#include <linux/spinlock.h> /* for struct rwlock_t */
#include <linux/atomic.h> /* for struct atomic_t */
#include <linux/refcount.h> /* for struct refcount_t */
@@ -30,10 +31,23 @@
#endif
#include <net/net_namespace.h> /* Netw namespace */
#include <linux/sched/isolation.h>
+#include <linux/siphash.h>
#define IP_VS_HDR_INVERSE 1
#define IP_VS_HDR_ICMP 2
+/* conn_tab limits (as per Kconfig) */
+#define IP_VS_CONN_TAB_MIN_BITS 8
+#if BITS_PER_LONG > 32
+#define IP_VS_CONN_TAB_MAX_BITS 27
+#else
+#define IP_VS_CONN_TAB_MAX_BITS 20
+#endif
+
+/* svc_table limits */
+#define IP_VS_SVC_TAB_MIN_BITS 4
+#define IP_VS_SVC_TAB_MAX_BITS 20
+
/* Generic access of ipvs struct */
static inline struct netns_ipvs *net_ipvs(struct net* net)
{
@@ -43,8 +57,6 @@ static inline struct netns_ipvs *net_ipvs(struct net* net)
/* Connections' size value needed by ip_vs_ctl.c */
extern int ip_vs_conn_tab_size;
-extern struct mutex __ip_vs_mutex;
-
struct ip_vs_iphdr {
int hdr_flags; /* ipvs flags */
__u32 off; /* Where IP or IPv4 header starts */
@@ -265,6 +277,29 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
pr_err(msg, ##__VA_ARGS__); \
} while (0)
+struct ip_vs_aligned_lock {
+ spinlock_t l; /* Protect buckets */
+} ____cacheline_aligned_in_smp;
+
+/* For arrays per family */
+enum {
+ IP_VS_AF_INET,
+ IP_VS_AF_INET6,
+ IP_VS_AF_MAX
+};
+
+static inline int ip_vs_af_index(int af)
+{
+ return af == AF_INET6 ? IP_VS_AF_INET6 : IP_VS_AF_INET;
+}
+
+/* work_flags */
+enum {
+ IP_VS_WORK_SVC_RESIZE, /* Schedule svc_resize_work */
+ IP_VS_WORK_SVC_NORESIZE, /* Stopping svc_resize_work */
+ IP_VS_WORK_CONN_RESIZE, /* Schedule conn_resize_work */
+};
+
/* The port number of FTP service (in network order). */
#define FTPPORT cpu_to_be16(21)
#define FTPDATA cpu_to_be16(20)
@@ -466,6 +501,198 @@ struct ip_vs_est_kt_data {
int est_row; /* estimated row */
};
+/* IPVS resizable hash tables */
+struct ip_vs_rht {
+ struct hlist_bl_head *buckets;
+ struct ip_vs_rht __rcu *new_tbl; /* New/Same table */
+ seqcount_t *seqc; /* Protects moves */
+ struct ip_vs_aligned_lock *lock; /* Protect seqc */
+ int mask; /* Buckets mask */
+ int size; /* Buckets */
+ int seqc_mask; /* seqc mask */
+ int lock_mask; /* lock mask */
+ u32 table_id;
+ int u_thresh; /* upper threshold */
+ int l_thresh; /* lower threshold */
+ int lfactor; /* Load Factor (shift)*/
+ int bits; /* size = 1 << bits */
+ siphash_key_t hash_key;
+ struct rcu_head rcu_head;
+};
+
+/**
+ * ip_vs_rht_for_each_table() - Walk the hash tables
+ * @table: struct ip_vs_rht __rcu *table
+ * @t: current table, used as cursor, struct ip_vs_rht *var
+ * @p: previous table, temp struct ip_vs_rht *var
+ *
+ * Walk tables assuming others can not change the installed tables
+ */
+#define ip_vs_rht_for_each_table(table, t, p) \
+ for (p = NULL, t = rcu_dereference_protected(table, 1); \
+ t != p; \
+ p = t, t = rcu_dereference_protected(t->new_tbl, 1))
+
+/**
+ * ip_vs_rht_for_each_table_rcu() - Walk the hash tables under RCU reader lock
+ * @table: struct ip_vs_rht __rcu *table
+ * @t: current table, used as cursor, struct ip_vs_rht *var
+ * @p: previous table, temp struct ip_vs_rht *var
+ *
+ * We usually search in one table and also in second table on resizing
+ */
+#define ip_vs_rht_for_each_table_rcu(table, t, p) \
+ for (p = NULL, t = rcu_dereference(table); \
+ t != p; \
+ p = t, t = rcu_dereference(t->new_tbl))
+
+/**
+ * ip_vs_rht_for_each_bucket() - Walk all table buckets
+ * @t: current table, used as cursor, struct ip_vs_rht *var
+ * @bucket: bucket index, used as cursor, u32 var
+ * @head: bucket address, used as cursor, struct hlist_bl_head *var
+ */
+#define ip_vs_rht_for_each_bucket(t, bucket, head) \
+ for (bucket = 0, head = (t)->buckets; \
+ bucket < t->size; bucket++, head++)
+
+/**
+ * ip_vs_rht_for_bucket_retry() - Retry bucket if entries are moved
+ * @t: current table, used as cursor, struct ip_vs_rht *var
+ * @bucket: index of current bucket or hash key
+ * @sc: temp seqcount_t *var
+ * @seq: temp unsigned int var for sequence count
+ * @retry: temp int var
+ */
+#define ip_vs_rht_for_bucket_retry(t, bucket, sc, seq, retry) \
+ for (retry = 1, sc = &(t)->seqc[(bucket) & (t)->seqc_mask]; \
+ retry && ({ seq = read_seqcount_begin(sc); 1; }); \
+ retry = read_seqcount_retry(sc, seq))
+
+/**
+ * DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU() - Declare variables
+ *
+ * Variables for ip_vs_rht_walk_buckets_rcu
+ */
+#define DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU() \
+ struct ip_vs_rht *_t, *_p; \
+ unsigned int _seq; \
+ seqcount_t *_sc; \
+ u32 _bucket; \
+ int _retry
+/**
+ * ip_vs_rht_walk_buckets_rcu() - Walk all buckets under RCU read lock
+ * @table: struct ip_vs_rht __rcu *table
+ * @head: bucket address, used as cursor, struct hlist_bl_head *var
+ *
+ * Can be used while others add/delete/move entries
+ * Not suitable if duplicates are not desired
+ * Possible cases for reader that uses cond_resched_rcu() in the loop:
+ * - new table can not be installed, no need to repeat
+ * - new table can be installed => check and repeat if new table is
+ * installed, needed for !PREEMPT_RCU
+ */
+#define ip_vs_rht_walk_buckets_rcu(table, head) \
+ ip_vs_rht_for_each_table_rcu(table, _t, _p) \
+ ip_vs_rht_for_each_bucket(_t, _bucket, head) \
+ ip_vs_rht_for_bucket_retry(_t, _bucket, _sc, \
+ _seq, _retry)
+
+/**
+ * DECLARE_IP_VS_RHT_WALK_BUCKET_RCU() - Declare variables
+ *
+ * Variables for ip_vs_rht_walk_bucket_rcu
+ */
+#define DECLARE_IP_VS_RHT_WALK_BUCKET_RCU() \
+ unsigned int _seq; \
+ seqcount_t *_sc; \
+ int _retry
+/**
+ * ip_vs_rht_walk_bucket_rcu() - Walk bucket under RCU read lock
+ * @t: current table, struct ip_vs_rht *var
+ * @bucket: index of current bucket or hash key
+ * @head: bucket address, used as cursor, struct hlist_bl_head *var
+ *
+ * Can be used while others add/delete/move entries
+ * Not suitable if duplicates are not desired
+ * Possible cases for reader that uses cond_resched_rcu() in the loop:
+ * - new table can not be installed, no need to repeat
+ * - new table can be installed => check and repeat if new table is
+ * installed, needed for !PREEMPT_RCU
+ */
+#define ip_vs_rht_walk_bucket_rcu(t, bucket, head) \
+ if (({ head = (t)->buckets + ((bucket) & (t)->mask); 0; })) \
+ {} \
+ else \
+ ip_vs_rht_for_bucket_retry(t, (bucket), _sc, _seq, _retry)
+
+/**
+ * DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU() - Declare variables
+ *
+ * Variables for ip_vs_rht_walk_buckets_safe_rcu
+ */
+#define DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU() \
+ struct ip_vs_rht *_t, *_p; \
+ u32 _bucket
+/**
+ * ip_vs_rht_walk_buckets_safe_rcu() - Walk all buckets under RCU read lock
+ * @table: struct ip_vs_rht __rcu *table
+ * @head: bucket address, used as cursor, struct hlist_bl_head *var
+ *
+ * Can be used while others add/delete entries but moving is disabled
+ * Using cond_resched_rcu() should be safe if tables do not change
+ */
+#define ip_vs_rht_walk_buckets_safe_rcu(table, head) \
+ ip_vs_rht_for_each_table_rcu(table, _t, _p) \
+ ip_vs_rht_for_each_bucket(_t, _bucket, head)
+
+/**
+ * DECLARE_IP_VS_RHT_WALK_BUCKETS() - Declare variables
+ *
+ * Variables for ip_vs_rht_walk_buckets
+ */
+#define DECLARE_IP_VS_RHT_WALK_BUCKETS() \
+ struct ip_vs_rht *_t, *_p; \
+ u32 _bucket
+
+/**
+ * ip_vs_rht_walk_buckets() - Walk all buckets
+ * @table: struct ip_vs_rht __rcu *table
+ * @head: bucket address, used as cursor, struct hlist_bl_head *var
+ *
+ * Use if others can not add/delete/move entries
+ */
+#define ip_vs_rht_walk_buckets(table, head) \
+ ip_vs_rht_for_each_table(table, _t, _p) \
+ ip_vs_rht_for_each_bucket(_t, _bucket, head)
+
+/* Entries can be in one of two tables, so we flip bit when new table is
+ * created and store it as highest bit in hash keys
+ */
+#define IP_VS_RHT_TABLE_ID_MASK BIT(31)
+
+/* Check if hash key is from this table */
+static inline bool ip_vs_rht_same_table(struct ip_vs_rht *t, u32 hash_key)
+{
+ return !((t->table_id ^ hash_key) & IP_VS_RHT_TABLE_ID_MASK);
+}
+
+/* Build per-table hash key from hash value */
+static inline u32 ip_vs_rht_build_hash_key(struct ip_vs_rht *t, u32 hash)
+{
+ return t->table_id | (hash & ~IP_VS_RHT_TABLE_ID_MASK);
+}
+
+void ip_vs_rht_free(struct ip_vs_rht *t);
+void ip_vs_rht_rcu_free(struct rcu_head *head);
+struct ip_vs_rht *ip_vs_rht_alloc(int buckets, int scounts, int locks);
+int ip_vs_rht_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, int n,
+ int lfactor, int min_bits, int max_bits);
+void ip_vs_rht_set_thresholds(struct ip_vs_rht *t, int size, int lfactor,
+ int min_bits, int max_bits);
+u32 ip_vs_rht_hash_linfo(struct ip_vs_rht *t, int af,
+ const union nf_inet_addr *addr, u32 v1, u32 v2);
+
struct dst_entry;
struct iphdr;
struct ip_vs_conn;
@@ -559,50 +786,48 @@ struct ip_vs_conn_param {
__u8 pe_data_len;
};
+/* Hash node in conn_tab */
+struct ip_vs_conn_hnode {
+ struct hlist_bl_node node; /* node in conn_tab */
+ u32 hash_key; /* Key for the hash table */
+ u8 dir; /* 0=out->in, 1=in->out */
+} __packed;
+
/* IP_VS structure allocated for each dynamically scheduled connection */
struct ip_vs_conn {
- struct hlist_node c_list; /* hashed list heads */
- /* Protocol, addresses and port numbers */
+ /* Cacheline for hash table nodes - rarely modified */
+
+ struct ip_vs_conn_hnode hn0; /* Original direction */
+ u8 af; /* address family */
__be16 cport;
+ struct ip_vs_conn_hnode hn1; /* Reply direction */
+ u8 daf; /* Address family of the dest */
__be16 dport;
- __be16 vport;
- u16 af; /* address family */
- union nf_inet_addr caddr; /* client address */
- union nf_inet_addr vaddr; /* virtual address */
- union nf_inet_addr daddr; /* destination address */
+ struct ip_vs_dest *dest; /* real server */
+ atomic_t n_control; /* Number of controlled ones */
volatile __u32 flags; /* status flags */
- __u16 protocol; /* Which protocol (TCP/UDP) */
- __u16 daf; /* Address family of the dest */
- struct netns_ipvs *ipvs;
-
- /* counter and timer */
- refcount_t refcnt; /* reference count */
- struct timer_list timer; /* Expiration timer */
- volatile unsigned long timeout; /* timeout */
+ /* 44/64 */
- /* Flags and state transition */
- spinlock_t lock; /* lock for state transition */
+ struct ip_vs_conn *control; /* Master control connection */
+ const struct ip_vs_pe *pe;
+ char *pe_data;
+ __u8 pe_data_len;
volatile __u16 state; /* state info */
volatile __u16 old_state; /* old state, to be used for
* state transition triggered
* synchronization
*/
- __u32 fwmark; /* Fire wall mark from skb */
- unsigned long sync_endtime; /* jiffies + sent_retries */
+ /* 2-byte hole */
+ /* 64/96 */
- /* Control members */
- struct ip_vs_conn *control; /* Master control connection */
- atomic_t n_control; /* Number of controlled ones */
- struct ip_vs_dest *dest; /* real server */
- atomic_t in_pkts; /* incoming packet counter */
+ union nf_inet_addr caddr; /* client address */
+ union nf_inet_addr vaddr; /* virtual address */
+ /* 96/128 */
- /* Packet transmitter for different forwarding methods. If it
- * mangles the packet, it must return NF_DROP or better NF_STOLEN,
- * otherwise this must be changed to a sk_buff **.
- * NF_ACCEPT can be returned when destination is local.
- */
- int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
+ union nf_inet_addr daddr; /* destination address */
+ __u32 fwmark; /* Fire wall mark from skb */
+ __be16 vport;
+ __u16 protocol; /* Which protocol (TCP/UDP) */
/* Note: we can group the following members into a structure,
* in order to save more space, and the following members are
@@ -610,14 +835,31 @@ struct ip_vs_conn {
*/
struct ip_vs_app *app; /* bound ip_vs_app object */
void *app_data; /* Application private data */
+ /* 128/168 */
struct_group(sync_conn_opt,
struct ip_vs_seq in_seq; /* incoming seq. struct */
struct ip_vs_seq out_seq; /* outgoing seq. struct */
);
+ /* 152/192 */
- const struct ip_vs_pe *pe;
- char *pe_data;
- __u8 pe_data_len;
+ struct timer_list timer; /* Expiration timer */
+ volatile unsigned long timeout; /* timeout */
+ spinlock_t lock; /* lock for state transition */
+ refcount_t refcnt; /* reference count */
+ atomic_t in_pkts; /* incoming packet counter */
+ /* 64-bit: 4-byte gap */
+
+ /* 188/256 */
+ unsigned long sync_endtime; /* jiffies + sent_retries */
+ struct netns_ipvs *ipvs;
+
+ /* Packet transmitter for different forwarding methods. If it
+ * mangles the packet, it must return NF_DROP or better NF_STOLEN,
+ * otherwise this must be changed to a sk_buff **.
+ * NF_ACCEPT can be returned when destination is local.
+ */
+ int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
struct rcu_head rcu_head;
};
@@ -673,15 +915,15 @@ struct ip_vs_dest_user_kern {
* forwarding entries.
*/
struct ip_vs_service {
- struct hlist_node s_list; /* for normal service table */
- struct hlist_node f_list; /* for fwmark-based service table */
- atomic_t refcnt; /* reference counter */
-
+ struct hlist_bl_node s_list; /* node in service table */
+ u32 hash_key; /* Key for the hash table */
u16 af; /* address family */
__u16 protocol; /* which protocol (TCP/UDP) */
+
union nf_inet_addr addr; /* IP address for virtual service */
- __be16 port; /* port number for the service */
__u32 fwmark; /* firewall mark of the service */
+ atomic_t refcnt; /* reference counter */
+ __be16 port; /* port number for the service */
unsigned int flags; /* service status flags */
unsigned int timeout; /* persistent timeout in ticks */
__be32 netmask; /* grouping granularity, mask/plen */
@@ -791,8 +1033,8 @@ struct ip_vs_pe {
int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb);
bool (*ct_match)(const struct ip_vs_conn_param *p,
struct ip_vs_conn *ct);
- u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval,
- bool inverse);
+ u32 (*hashkey_raw)(const struct ip_vs_conn_param *p,
+ struct ip_vs_rht *t, bool inverse);
int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf);
/* create connections for real-server outgoing packets */
struct ip_vs_conn* (*conn_out)(struct ip_vs_service *svc,
@@ -931,21 +1173,27 @@ struct netns_ipvs {
#endif
/* ip_vs_conn */
atomic_t conn_count; /* connection counter */
+ atomic_t no_cport_conns[IP_VS_AF_MAX];
+ struct delayed_work conn_resize_work;/* resize conn_tab */
/* ip_vs_ctl */
struct ip_vs_stats_rcu *tot_stats; /* Statistics & est. */
- int num_services; /* no of virtual services */
- int num_services6; /* IPv6 virtual services */
-
/* Trash for destinations */
struct list_head dest_trash;
spinlock_t dest_trash_lock;
struct timer_list dest_trash_timer; /* expiration timer */
+ struct mutex service_mutex; /* service reconfig */
+ struct rw_semaphore svc_resize_sem; /* svc_table resizing */
+ struct delayed_work svc_resize_work; /* resize svc_table */
+ atomic_t svc_table_changes;/* ++ on new table */
/* Service counters */
- atomic_t ftpsvc_counter;
- atomic_t nullsvc_counter;
- atomic_t conn_out_counter;
+ atomic_t num_services[IP_VS_AF_MAX]; /* Services */
+ atomic_t fwm_services[IP_VS_AF_MAX]; /* Services */
+ atomic_t nonfwm_services[IP_VS_AF_MAX];/* Services */
+ atomic_t ftpsvc_counter[IP_VS_AF_MAX]; /* FTPPORT */
+ atomic_t nullsvc_counter[IP_VS_AF_MAX];/* Zero port */
+ atomic_t conn_out_counter[IP_VS_AF_MAX];/* out conn */
#ifdef CONFIG_SYSCTL
/* delayed work for expiring no dest connections */
@@ -956,6 +1204,7 @@ struct netns_ipvs {
int drop_counter;
int old_secure_tcp;
atomic_t dropentry;
+ s8 dropentry_counters[8];
/* locks in ctl.c */
spinlock_t dropentry_lock; /* drop entry handling */
spinlock_t droppacket_lock; /* drop packet handling */
@@ -1002,6 +1251,8 @@ struct netns_ipvs {
int sysctl_est_nice; /* kthread nice */
int est_stopped; /* stop tasks */
#endif
+ int sysctl_conn_lfactor;
+ int sysctl_svc_lfactor;
/* ip_vs_lblc */
int sysctl_lblc_expiration;
@@ -1011,6 +1262,7 @@ struct netns_ipvs {
int sysctl_lblcr_expiration;
struct ctl_table_header *lblcr_ctl_header;
struct ctl_table *lblcr_ctl_table;
+ unsigned long work_flags; /* IP_VS_WORK_* flags */
/* ip_vs_est */
struct delayed_work est_reload_work;/* Reload kthread tasks */
struct mutex est_mutex; /* protect kthread tasks */
@@ -1041,6 +1293,10 @@ struct netns_ipvs {
*/
unsigned int mixed_address_family_dests;
unsigned int hooks_afmask; /* &1=AF_INET, &2=AF_INET6 */
+
+ struct ip_vs_rht __rcu *svc_table; /* Services */
+ struct ip_vs_rht __rcu *conn_tab; /* Connections */
+ atomic_t conn_tab_changes;/* ++ on new table */
};
#define DEFAULT_SYNC_THRESHOLD 3
@@ -1290,6 +1546,24 @@ static inline int sysctl_est_nice(struct netns_ipvs *ipvs)
#endif
+/* Get load factor to map conn_count/u_thresh to t->size */
+static inline int sysctl_conn_lfactor(struct netns_ipvs *ipvs)
+{
+ return READ_ONCE(ipvs->sysctl_conn_lfactor);
+}
+
+/* Get load factor to map num_services/u_thresh to t->size
+ * Smaller value decreases u_thresh to reduce collisions but increases
+ * the table size
+ * Returns factor where:
+ * - <0: u_thresh = size >> -factor, eg. lfactor -2 = 25% load
+ * - >=0: u_thresh = size << factor, eg. lfactor 1 = 200% load
+ */
+static inline int sysctl_svc_lfactor(struct netns_ipvs *ipvs)
+{
+ return READ_ONCE(ipvs->sysctl_svc_lfactor);
+}
+
/* IPVS core functions
* (from ip_vs_core.c)
*/
@@ -1363,6 +1637,23 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
}
void ip_vs_conn_put(struct ip_vs_conn *cp);
void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
+int ip_vs_conn_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t,
+ int lfactor);
+struct ip_vs_rht *ip_vs_conn_tab_alloc(struct netns_ipvs *ipvs, int buckets,
+ int lfactor);
+
+static inline struct ip_vs_conn *
+ip_vs_hn0_to_conn(struct ip_vs_conn_hnode *hn)
+{
+ return container_of(hn, struct ip_vs_conn, hn0);
+}
+
+static inline struct ip_vs_conn *
+ip_vs_hn_to_conn(struct ip_vs_conn_hnode *hn)
+{
+ return hn->dir ? container_of(hn, struct ip_vs_conn, hn1) :
+ container_of(hn, struct ip_vs_conn, hn0);
+}
struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
const union nf_inet_addr *daddr,
@@ -1716,6 +2007,13 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
return fwd;
}
+/* Check if connection uses double hashing */
+static inline bool ip_vs_conn_use_hash2(struct ip_vs_conn *cp)
+{
+ return IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ &&
+ !(cp->flags & IP_VS_CONN_F_TEMPLATE);
+}
+
void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int dir);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 53c5056508be..d042afe7a245 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1044,8 +1044,18 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6);
+#if IS_ENABLED(CONFIG_IPV6)
struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst);
+#else
+static inline struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk,
+ struct flowi6 *fl6,
+ const struct in6_addr *final_dst)
+{
+ return ERR_PTR(-EAFNOSUPPORT);
+}
+#endif
+
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst,
bool connected);
@@ -1129,10 +1139,8 @@ int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr_unsized *addr,
int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr);
void ip6_datagram_release_cb(struct sock *sk);
-int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
- int *addr_len);
-int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len,
- int *addr_len);
+int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len);
+int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len);
void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
u32 info, u8 *payload);
void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
@@ -1141,6 +1149,8 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
void inet6_cleanup_sock(struct sock *sk);
void inet6_sock_destruct(struct sock *sk);
int inet6_release(struct socket *sock);
+int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len,
+ u32 flags);
int inet6_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len);
int inet6_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
@@ -1181,8 +1191,6 @@ int tcp6_proc_init(struct net *net);
void tcp6_proc_exit(struct net *net);
int udp6_proc_init(struct net *net);
void udp6_proc_exit(struct net *net);
-int udplite6_proc_init(void);
-void udplite6_proc_exit(void);
int ipv6_misc_proc_init(void);
void ipv6_misc_proc_exit(void);
int snmp6_register_dev(struct inet6_dev *idev);
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
deleted file mode 100644
index d3013e721b14..000000000000
--- a/include/net/ipv6_stubs.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _IPV6_STUBS_H
-#define _IPV6_STUBS_H
-
-#include <linux/in6.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/neighbour.h>
-#include <net/sock.h>
-#include <net/ipv6.h>
-
-/* structs from net/ip6_fib.h */
-struct fib6_info;
-struct fib6_nh;
-struct fib6_config;
-struct fib6_result;
-
-/* This is ugly, ideally these symbols should be built
- * into the core kernel.
- */
-struct ipv6_stub {
- int (*ipv6_sock_mc_join)(struct sock *sk, int ifindex,
- const struct in6_addr *addr);
- int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex,
- const struct in6_addr *addr);
- struct dst_entry *(*ipv6_dst_lookup_flow)(struct net *net,
- const struct sock *sk,
- struct flowi6 *fl6,
- const struct in6_addr *final_dst);
- int (*ipv6_route_input)(struct sk_buff *skb);
-
- struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
- int (*fib6_lookup)(struct net *net, int oif, struct flowi6 *fl6,
- struct fib6_result *res, int flags);
- int (*fib6_table_lookup)(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6,
- struct fib6_result *res, int flags);
- void (*fib6_select_path)(const struct net *net, struct fib6_result *res,
- struct flowi6 *fl6, int oif, bool oif_match,
- const struct sk_buff *skb, int strict);
- u32 (*ip6_mtu_from_fib6)(const struct fib6_result *res,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr);
-
- int (*fib6_nh_init)(struct net *net, struct fib6_nh *fib6_nh,
- struct fib6_config *cfg, gfp_t gfp_flags,
- struct netlink_ext_ack *extack);
- void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
- void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh);
- void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt);
- int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify);
- void (*fib6_rt_update)(struct net *net, struct fib6_info *rt,
- struct nl_info *info);
-
- void (*udpv6_encap_enable)(void);
- void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
- const struct in6_addr *solicited_addr,
- bool router, bool solicited, bool override, bool inc_opt);
-#if IS_ENABLED(CONFIG_XFRM)
- void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
- int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
- struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
- struct list_head *head,
- struct sk_buff *skb);
- int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
- int encap_type);
-#endif
- struct neigh_table *nd_tbl;
-
- int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
- int (*output)(struct net *, struct sock *, struct sk_buff *));
- struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr,
- struct net_device *dev);
- int (*ip6_xmit)(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
- __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority);
-};
-extern const struct ipv6_stub *ipv6_stub __read_mostly;
-
-/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
-struct ipv6_bpf_stub {
- int (*inet6_bind)(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len,
- u32 flags);
- struct sock *(*udp6_lib_lookup)(const struct net *net,
- const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, __be16 dport,
- int dif, int sdif, struct udp_table *tbl,
- struct sk_buff *skb);
- int (*ipv6_setsockopt)(struct sock *sk, int level, int optname,
- sockptr_t optval, unsigned int optlen);
- int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
- sockptr_t optval, sockptr_t optlen);
- int (*ipv6_dev_get_saddr)(struct net *net,
- const struct net_device *dst_dev,
- const struct in6_addr *daddr,
- unsigned int prefs,
- struct in6_addr *saddr);
-};
-extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
-
-#endif
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index adce2144a678..40cb20d9309c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -365,6 +365,7 @@ struct ieee80211_vif_chanctx_switch {
* @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed.
* @BSS_CHANGED_MLD_TTLM: negotiated TID to link mapping was changed
* @BSS_CHANGED_TPE: transmit power envelope changed
+ * @BSS_CHANGED_NAN_LOCAL_SCHED: NAN local schedule changed (NAN mode only)
*/
enum ieee80211_bss_change {
BSS_CHANGED_ASSOC = 1<<0,
@@ -402,6 +403,7 @@ enum ieee80211_bss_change {
BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33),
BSS_CHANGED_MLD_TTLM = BIT_ULL(34),
BSS_CHANGED_TPE = BIT_ULL(35),
+ BSS_CHANGED_NAN_LOCAL_SCHED = BIT_ULL(36),
/* when adding here, make sure to change ieee80211_reconfig */
};
@@ -866,6 +868,74 @@ struct ieee80211_bss_conf {
u8 s1g_long_beacon_period;
};
+#define IEEE80211_NAN_MAX_CHANNELS 3
+
+/**
+ * struct ieee80211_nan_channel - NAN channel information
+ *
+ * @chanreq: channel request for this NAN channel. Even though this chanreq::ap
+ * is irrelevant for NAN, still store it for convenience - some functions
+ * require it as an argument.
+ * @needed_rx_chains: number of RX chains needed for this NAN channel
+ * @chanctx_conf: chanctx_conf assigned to this NAN channel.
+ * If a local channel is being ULWed (because we needed this chanctx for
+ * something else), the local NAN channel that used this chanctx,
+ * will have this pointer set to %NULL.
+ * A peer NAN channel should never have this pointer set to %NULL.
+ * @channel_entry: the Channel Entry blob as defined in Wi-Fi Aware
+ * (TM) 4.0 specification Table 100 (Channel Entry format for the NAN
+ * Availability attribute).
+ */
+struct ieee80211_nan_channel {
+ struct ieee80211_chan_req chanreq;
+ u8 needed_rx_chains;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ u8 channel_entry[6];
+};
+
+/**
+ * struct ieee80211_nan_peer_map - NAN peer schedule map
+ *
+ * This stores a single map from a peer's schedule. Each peer can have
+ * multiple maps.
+ *
+ * @map_id: the map ID from the peer schedule, %CFG80211_NAN_INVALID_MAP_ID
+ * if unused
+ * @slots: mapping of time slots to channel configurations in the schedule's
+ * channels array
+ */
+struct ieee80211_nan_peer_map {
+ u8 map_id;
+ struct ieee80211_nan_channel *slots[CFG80211_NAN_SCHED_NUM_TIME_SLOTS];
+};
+
+/**
+ * struct ieee80211_nan_peer_sched - NAN peer schedule
+ *
+ * This stores the complete schedule from a peer. Contains peer-level
+ * parameters and an array of schedule maps.
+ *
+ * @seq_id: the sequence ID from the peer schedule
+ * @committed_dw: committed DW as published by the peer
+ * @max_chan_switch: maximum channel switch time in microseconds
+ * @init_ulw: initial ULWs as published by the peer (copied)
+ * @ulw_size: number of bytes in @init_ulw
+ * @maps: array of peer schedule maps. Invalid slots have map_id set to
+ * %CFG80211_NAN_INVALID_MAP_ID.
+ * @n_channels: number of valid channel entries in @channels
+ * @channels: flexible array of negotiated peer channels for this schedule
+ */
+struct ieee80211_nan_peer_sched {
+ u8 seq_id;
+ u16 committed_dw;
+ u16 max_chan_switch;
+ const u8 *init_ulw;
+ u16 ulw_size;
+ struct ieee80211_nan_peer_map maps[CFG80211_NAN_MAX_PEER_MAPS];
+ u8 n_channels;
+ struct ieee80211_nan_channel channels[] __counted_by(n_channels);
+};
+
/**
* enum mac80211_tx_info_flags - flags to describe transmission information/status
*
@@ -1917,6 +1987,8 @@ enum ieee80211_offload_flags {
IEEE80211_OFFLOAD_DECAP_ENABLED = BIT(2),
};
+#define IEEE80211_NAN_AVAIL_BLOB_MAX_LEN 54
+
/**
* struct ieee80211_eml_params - EHT Operating mode notification parameters
*
@@ -1943,6 +2015,32 @@ struct ieee80211_eml_params {
};
/**
+ * struct ieee80211_nan_sched_cfg - NAN schedule configuration
+ * @channels: array of NAN channels. A channel entry is in use if
+ * channels[i].chanreq.oper.chan is not NULL.
+ * @schedule: NAN local schedule - mapping of each 16TU time slot to
+ * the NAN channel on which the radio will operate. NULL if unscheduled.
+ * @avail_blob: NAN Availability attribute blob.
+ * @avail_blob_len: length of the @avail_blob in bytes.
+ * @deferred: indicates that the driver should notify peers before applying the
+ * new NAN schedule, and apply the new schedule the second NAN Slot
+ * boundary after it notified the peers, as defined in Wi-Fi Aware (TM) 4.0
+ * specification, section 5.2.2.
+ * The driver must call ieee80211_nan_sched_update_done() after the
+ * schedule has been applied.
+ * If a HW restart happened while a deferred schedule update was pending,
+ * mac80211 will reconfigure the deferred schedule (and wait for the driver
+ * to notify that the schedule has been applied).
+ */
+struct ieee80211_nan_sched_cfg {
+ struct ieee80211_nan_channel channels[IEEE80211_NAN_MAX_CHANNELS];
+ struct ieee80211_nan_channel *schedule[CFG80211_NAN_SCHED_NUM_TIME_SLOTS];
+ u8 avail_blob[IEEE80211_NAN_AVAIL_BLOB_MAX_LEN];
+ u16 avail_blob_len;
+ bool deferred;
+};
+
+/**
* struct ieee80211_vif_cfg - interface configuration
* @assoc: association status
* @ibss_joined: indicates whether this station is part of an IBSS or not
@@ -1970,6 +2068,7 @@ struct ieee80211_eml_params {
* your driver/device needs to do.
* @ap_addr: AP MLD address, or BSSID for non-MLO connections
* (station mode only)
+ * @nan_sched: NAN schedule parameters. &struct ieee80211_nan_sched_cfg
*/
struct ieee80211_vif_cfg {
/* association related data */
@@ -1988,6 +2087,8 @@ struct ieee80211_vif_cfg {
bool s1g;
bool idle;
u8 ap_addr[ETH_ALEN] __aligned(2);
+ /* Protected by the wiphy mutex */
+ struct ieee80211_nan_sched_cfg nan_sched;
};
#define IEEE80211_TTLM_NUM_TIDS 8
@@ -2074,6 +2175,7 @@ enum ieee80211_neg_ttlm_res {
* @drv_priv: data area for driver use, will always be aligned to
* sizeof(void \*).
* @txq: the multicast data TX queue
+ * @txq_mgmt: the mgmt frame TX queue, currently only exists for NAN devices
* @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see
* &enum ieee80211_offload_flags.
*/
@@ -2092,6 +2194,7 @@ struct ieee80211_vif {
u8 hw_queue[IEEE80211_NUM_ACS];
struct ieee80211_txq *txq;
+ struct ieee80211_txq *txq_mgmt;
netdev_features_t netdev_features;
u32 driver_flags;
@@ -2477,11 +2580,15 @@ struct ieee80211_sta_aggregates {
* @uhr_cap: UHR capabilities of this STA
* @s1g_cap: S1G capabilities of this STA
* @agg: per-link data for multi-link aggregation
- * @bandwidth: current bandwidth the station can receive with
+ * @bandwidth: current bandwidth the station can receive with.
+ * This is the minimum between the peer's capabilities and our own
+ * operating channel width; Invalid for NAN since that is operating on
+ * multiple channels.
* @rx_nss: in HT/VHT, the maximum number of spatial streams the
* station can receive at the moment, changed by operating mode
* notifications and capabilities. The value is only valid after
- * the station moves to associated state.
+ * the station moves to associated state. Invalid for NAN since it
+ * operates on multiple configurations of rx_nss.
* @txpwr: the station tx power configuration
*
*/
@@ -2563,6 +2670,8 @@ struct ieee80211_link_sta {
* @valid_links: bitmap of valid links, or 0 for non-MLO
* @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not.
* @epp_peer: indicates that the peer is an EPP peer.
+ * @nmi: For NDI stations, pointer to the NMI station of the peer.
+ * @nan_sched: NAN peer schedule for this station. Valid only for NMI stations.
*/
struct ieee80211_sta {
u8 addr[ETH_ALEN] __aligned(2);
@@ -2591,6 +2700,11 @@ struct ieee80211_sta {
struct ieee80211_link_sta deflink;
struct ieee80211_link_sta __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS];
+ struct ieee80211_sta __rcu *nmi;
+
+ /* should only be accessed with the wiphy mutex held */
+ struct ieee80211_nan_peer_sched *nan_sched;
+
/* must be last */
u8 drv_priv[] __aligned(sizeof(void *));
};
@@ -2824,6 +2938,8 @@ struct ieee80211_txq {
* station has a unique address, i.e. each station entry can be identified
* by just its MAC address; this prevents, for example, the same station
* from connecting to two virtual AP interfaces at the same time.
+ * Note that this doesn't apply for NAN, in which the peer's NMI address
+ * can be equal to its NDI address.
*
* @IEEE80211_HW_SUPPORTS_REORDERING_BUFFER: Hardware (or driver) manages the
* reordering buffer internally, guaranteeing mac80211 receives frames in
@@ -2913,6 +3029,9 @@ struct ieee80211_txq {
* HW flag so drivers can opt in according to their own control, e.g. in
* testing.
*
+ * @IEEE80211_HW_SUPPORTS_NDP_BLOCKACK: HW can transmit/receive S1G NDP
+ * BlockAck frames.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
@@ -2973,6 +3092,7 @@ enum ieee80211_hw_flags {
IEEE80211_HW_DISALLOW_PUNCTURING,
IEEE80211_HW_HANDLES_QUIET_CSA,
IEEE80211_HW_STRICT,
+ IEEE80211_HW_SUPPORTS_NDP_BLOCKACK,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
@@ -4486,6 +4606,12 @@ struct ieee80211_prep_tx_info {
* @del_nan_func: Remove a NAN function. The driver must call
* ieee80211_nan_func_terminated() with
* NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST reason code upon removal.
+ * @nan_peer_sched_changed: Notifies the driver that the peer NAN schedule
+ * has changed. The new schedule is available via sta->nan_sched.
+ * Note that the channel_entry blob might not match the actual chandef
+ * since the bandwidth of the chandef is the minimum of the local and peer
+ * bandwidth. It is the driver responsibility to remove the peer schedule
+ * when the NMI station is removed.
* @can_aggregate_in_amsdu: Called in order to determine if HW supports
* aggregating two specific frames in the same A-MSDU. The relation
* between the skbs should be symmetric and transitive. Note that while
@@ -4891,6 +5017,8 @@ struct ieee80211_ops {
void (*del_nan_func)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
u8 instance_id);
+ int (*nan_peer_sched_changed)(struct ieee80211_hw *hw,
+ struct ieee80211_sta *sta);
bool (*can_aggregate_in_amsdu)(struct ieee80211_hw *hw,
struct sk_buff *head,
struct sk_buff *skb);
@@ -7388,6 +7516,24 @@ void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif);
int ieee80211_ave_rssi(struct ieee80211_vif *vif, int link_id);
/**
+ * ieee80211_calculate_rx_timestamp - calculate timestamp in frame
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @status: RX status
+ * @mpdu_len: total MPDU length (including FCS)
+ * @mpdu_offset: offset into MPDU to calculate timestamp at
+ *
+ * This function calculates the RX timestamp at the given MPDU offset, taking
+ * into account what the RX timestamp was. An offset of 0 will just normalize
+ * the timestamp to TSF at beginning of MPDU reception.
+ *
+ * Returns: the calculated timestamp
+ */
+u64 ieee80211_calculate_rx_timestamp(struct ieee80211_hw *hw,
+ struct ieee80211_rx_status *status,
+ unsigned int mpdu_len,
+ unsigned int mpdu_offset);
+
+/**
* ieee80211_report_wowlan_wakeup - report WoWLAN wakeup
* @vif: virtual interface
* @wakeup: wakeup reason(s)
@@ -7733,6 +7879,17 @@ void ieee80211_nan_func_match(struct ieee80211_vif *vif,
gfp_t gfp);
/**
+ * ieee80211_nan_sched_update_done - notify that NAN schedule update is done
+ *
+ * This function is called by the driver to notify mac80211 that the NAN
+ * schedule update has been applied.
+ * Must be called with wiphy mutex held. May sleep.
+ *
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ */
+void ieee80211_nan_sched_update_done(struct ieee80211_vif *vif);
+
+/**
* ieee80211_calc_rx_airtime - calculate estimated transmission airtime for RX.
*
* This function calculates the estimated airtime usage of a frame based on the
@@ -7768,19 +7925,22 @@ u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw,
* ieee80211_get_fils_discovery_tmpl - Get FILS discovery template.
* @hw: pointer obtained from ieee80211_alloc_hw().
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: valid link_id during MLO or 0 for non-MLO.
*
* The driver is responsible for freeing the returned skb.
*
* Return: FILS discovery template. %NULL on error.
*/
struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+ struct ieee80211_vif *vif,
+ unsigned int link_id);
/**
* ieee80211_get_unsol_bcast_probe_resp_tmpl - Get unsolicited broadcast
* probe response template.
* @hw: pointer obtained from ieee80211_alloc_hw().
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: valid link_id during MLO or 0 for non-MLO.
*
* The driver is responsible for freeing the returned skb.
*
@@ -7788,7 +7948,8 @@ struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw,
*/
struct sk_buff *
ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+ struct ieee80211_vif *vif,
+ unsigned int link_id);
/**
* ieee80211_obss_color_collision_notify - notify userland about a BSS color
@@ -7964,4 +8125,11 @@ int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw,
* Return: %true iff the vif is a NAN interface and NAN is started
*/
bool ieee80211_vif_nan_started(struct ieee80211_vif *vif);
+
+/**
+ * ieee80211_encrypt_tx_skb - Encrypt the transmit skb
+ * @skb: the skb
+ * Return: 0 if success and non-zero on error
+ */
+int ieee80211_encrypt_tx_skb(struct sk_buff *skb);
#endif /* MAC80211_H */
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 766f4fb25e26..7fe3a1b61b2d 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -215,6 +215,12 @@ enum gdma_page_type {
#define GDMA_INVALID_DMA_REGION 0
+struct mana_serv_work {
+ struct work_struct serv_work;
+ struct pci_dev *pdev;
+ enum gdma_eqe_type type;
+};
+
struct gdma_mem_info {
struct device *dev;
@@ -386,6 +392,7 @@ struct gdma_irq_context {
enum gdma_context_flags {
GC_PROBE_SUCCEEDED = 0,
+ GC_IN_SERVICE = 1,
};
struct gdma_context {
@@ -411,14 +418,15 @@ struct gdma_context {
u32 test_event_eq_id;
bool is_pf;
- bool in_service;
phys_addr_t bar0_pa;
void __iomem *bar0_va;
+ resource_size_t bar0_size;
void __iomem *shm_base;
void __iomem *db_page_base;
phys_addr_t phys_db_page_base;
- u32 db_page_size;
+ u64 db_page_off;
+ u64 db_page_size;
int numa_node;
/* Shared memory chanenl (used to bootstrap HWC) */
@@ -473,6 +481,8 @@ int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe);
void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit);
+int mana_schedule_serv_work(struct gdma_context *gc, enum gdma_eqe_type type);
+
struct gdma_wqe {
u32 reserved :24;
u32 last_vbytes :8;
@@ -615,6 +625,9 @@ enum {
/* Driver can handle hardware recovery events during probe */
#define GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY BIT(22)
+/* Driver supports self recovery on Hardware Channel timeouts */
+#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECOVERY BIT(25)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -628,7 +641,8 @@ enum {
GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \
GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE | \
GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY | \
- GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY)
+ GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY | \
+ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECOVERY)
#define GDMA_DRV_CAP_FLAGS2 0
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index a078af283bdd..96d21cbbdee2 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -61,8 +61,11 @@ enum TRI_STATE {
#define MAX_PORTS_IN_MANA_DEV 256
+/* Maximum number of packets per coalesced CQE */
+#define MANA_RXCOMP_OOB_NUM_PPI 4
+
/* Update this count whenever the respective structures are changed */
-#define MANA_STATS_RX_COUNT 5
+#define MANA_STATS_RX_COUNT (6 + MANA_RXCOMP_OOB_NUM_PPI - 1)
#define MANA_STATS_TX_COUNT 11
#define MANA_RX_FRAG_ALIGNMENT 64
@@ -73,6 +76,8 @@ struct mana_stats_rx {
u64 xdp_drop;
u64 xdp_tx;
u64 xdp_redirect;
+ u64 pkt_len0_err;
+ u64 coalesced_cqe[MANA_RXCOMP_OOB_NUM_PPI - 1];
struct u64_stats_sync syncp;
};
@@ -227,8 +232,6 @@ struct mana_rxcomp_perpkt_info {
u32 pkt_hash;
}; /* HW DATA */
-#define MANA_RXCOMP_OOB_NUM_PPI 4
-
/* Receive completion OOB */
struct mana_rxcomp_oob {
struct mana_cqe_header cqe_hdr;
@@ -378,7 +381,6 @@ struct mana_ethtool_stats {
u64 tx_cqe_err;
u64 tx_cqe_unknown_type;
u64 tx_linear_pkt_cnt;
- u64 rx_coalesced_err;
u64 rx_cqe_unknown_type;
};
@@ -557,6 +559,9 @@ struct mana_port_context {
bool port_is_up;
bool port_st_save; /* Saved port state */
+ u8 cqe_coalescing_enable;
+ u32 cqe_coalescing_timeout_ns;
+
struct mana_ethtool_stats eth_stats;
struct mana_ethtool_phy_stats phy_stats;
@@ -902,6 +907,10 @@ struct mana_cfg_rx_steer_req_v2 {
struct mana_cfg_rx_steer_resp {
struct gdma_resp_hdr hdr;
+
+ /* V2 */
+ u32 cqe_coalescing_timeout_ns;
+ u32 reserved1;
}; /* HW DATA */
/* Register HW vPort */
@@ -998,6 +1007,7 @@ struct mana_deregister_filter_resp {
#define STATISTICS_FLAGS_TX_ERRORS_GDMA_ERROR 0x0000000004000000
#define MANA_MAX_NUM_QUEUES 64
+#define MANA_DEF_NUM_QUEUES 16
#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1)
diff --git a/include/net/mctp.h b/include/net/mctp.h
index c3207ce98f07..e1e0a69afdce 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -270,6 +270,7 @@ struct mctp_dst {
struct mctp_dev *dev;
unsigned int mtu;
mctp_eid_t nexthop;
+ mctp_eid_t saddr;
/* set for direct addressing */
unsigned char halen;
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index d38783a2ce57..3da1a6f8d3f9 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -2,8 +2,6 @@
#ifndef _NDISC_H
#define _NDISC_H
-#include <net/ipv6_stubs.h>
-
/*
* ICMP codes for neighbour discovery messages
*/
@@ -359,14 +357,6 @@ static inline struct neighbour *__ipv6_neigh_lookup_noref(struct net_device *dev
return ___neigh_lookup_noref(&nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev);
}
-static inline
-struct neighbour *__ipv6_neigh_lookup_noref_stub(struct net_device *dev,
- const void *pkey)
-{
- return ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
- ndisc_hashfn, pkey, dev);
-}
-
static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, const void *pkey)
{
struct neighbour *n;
@@ -391,28 +381,20 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev,
rcu_read_unlock();
}
-static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
- const void *pkey)
-{
- struct neighbour *n;
-
- rcu_read_lock();
- n = __ipv6_neigh_lookup_noref_stub(dev, pkey);
- neigh_confirm(n);
- rcu_read_unlock();
-}
-
-/* uses ipv6_stub and is meant for use outside of IPv6 core */
static inline struct neighbour *ip_neigh_gw6(struct net_device *dev,
const void *addr)
{
+#if IS_ENABLED(CONFIG_IPV6)
struct neighbour *neigh;
- neigh = __ipv6_neigh_lookup_noref_stub(dev, addr);
+ neigh = __ipv6_neigh_lookup_noref(dev, addr);
if (unlikely(!neigh))
- neigh = __neigh_create(ipv6_stub->nd_tbl, addr, dev, false);
+ neigh = __neigh_create(&nd_tbl, addr, dev, false);
return neigh;
+#else
+ return ERR_PTR(-EAFNOSUPPORT);
+#endif
}
int ndisc_init(void);
@@ -434,6 +416,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
void ndisc_send_rs(struct net_device *dev,
const struct in6_addr *saddr, const struct in6_addr *daddr);
+
void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
bool router, bool solicited, bool override, bool inc_opt);
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index 95ed28212f4e..70c9fe9e83cc 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -150,6 +150,11 @@ enum {
* When NIC-wide config is changed the callback will
* be invoked for all queues.
*
+ * @ndo_queue_create: Create a new RX queue on a virtual device that will
+ * be paired with a physical device's queue via leasing.
+ * Return the new queue id on success, negative error
+ * on failure.
+ *
* @supported_params: Bitmask of supported parameters, see QCFG_*.
*
* Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
@@ -178,6 +183,8 @@ struct netdev_queue_mgmt_ops {
struct netlink_ext_ack *extack);
struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev,
int idx);
+ int (*ndo_queue_create)(struct net_device *dev,
+ struct netlink_ext_ack *extack);
unsigned int supported_params;
};
@@ -185,7 +192,7 @@ struct netdev_queue_mgmt_ops {
void netdev_queue_config(struct net_device *dev, int rxq,
struct netdev_queue_config *qcfg);
-bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx);
+bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx);
/**
* DOC: Lockless queue stopping / waking helpers.
@@ -373,6 +380,14 @@ static inline unsigned int netif_xmit_timeout_ms(struct netdev_queue *txq)
get_desc, start_thrs); \
})
-struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx);
-
-#endif
+struct device *netdev_queue_get_dma_dev(struct net_device *dev,
+ unsigned int idx,
+ enum netdev_queue_type type);
+bool netdev_can_create_queue(const struct net_device *dev,
+ struct netlink_ext_ack *extack);
+bool netdev_can_lease_queue(const struct net_device *dev,
+ struct netlink_ext_ack *extack);
+bool netdev_queue_busy(struct net_device *dev, unsigned int idx,
+ enum netdev_queue_type type,
+ struct netlink_ext_ack *extack);
+#endif /* _LINUX_NET_QUEUES_H */
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index cfa72c485387..9415a94d333d 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -8,13 +8,14 @@
#include <net/xdp.h>
#include <net/page_pool/types.h>
#include <net/netdev_queues.h>
+#include <net/rps-types.h>
/* This structure contains an instance of an RX queue. */
struct netdev_rx_queue {
struct xdp_rxq_info xdp_rxq;
#ifdef CONFIG_RPS
struct rps_map __rcu *rps_map;
- struct rps_dev_flow_table __rcu *rps_flow_table;
+ rps_tag_ptr rps_flow_table;
#endif
struct kobject kobj;
const struct attribute_group **groups;
@@ -30,6 +31,14 @@ struct netdev_rx_queue {
struct napi_struct *napi;
struct netdev_queue_config qcfg;
struct pp_memory_provider_params mp_params;
+
+ /* If a queue is leased, then the lease pointer is always
+ * valid. From the physical device it points to the virtual
+ * queue, and from the virtual device it points to the
+ * physical queue.
+ */
+ struct netdev_rx_queue *lease;
+ netdevice_tracker lease_tracker;
} ____cacheline_aligned_in_smp;
/*
@@ -58,6 +67,18 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
return index;
}
-int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
+enum netif_lease_dir {
+ NETIF_VIRT_TO_PHYS,
+ NETIF_PHYS_TO_VIRT,
+};
-#endif
+struct netdev_rx_queue *
+__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq,
+ enum netif_lease_dir dir);
+
+int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
+void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst,
+ struct netdev_rx_queue *rxq_src);
+void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
+ struct netdev_rx_queue *rxq_src);
+#endif /* _LINUX_NETDEV_RX_QUEUE_H */
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 8d65ffbf57de..b39417ad955e 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -16,9 +16,6 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
#ifdef CONFIG_NF_CT_PROTO_SCTP
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp;
#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite;
-#endif
#ifdef CONFIG_NF_CT_PROTO_GRE
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre;
#endif
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index cd5020835a6d..fde2427ceb8f 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -107,11 +107,6 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state);
-int nf_conntrack_udplite_packet(struct nf_conn *ct,
- struct sk_buff *skb,
- unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- const struct nf_hook_state *state);
int nf_conntrack_tcp_packet(struct nf_conn *ct,
struct sk_buff *skb,
unsigned int dataoff,
@@ -139,8 +134,6 @@ void nf_conntrack_icmpv6_init_net(struct net *net);
/* Existing built-in generic protocol */
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
-#define MAX_NF_CT_PROTO IPPROTO_UDPLITE
-
const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto);
/* Generic netlink helpers */
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index ec8a8ec9c0aa..2c0173d9309c 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -31,7 +31,9 @@ struct nft_pktinfo {
const struct nf_hook_state *state;
u8 flags;
u8 tprot;
+ __be16 ethertype;
u16 fragoff;
+ u16 nhoff;
u16 thoff;
u16 inneroff;
};
@@ -83,6 +85,8 @@ static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt)
{
pkt->flags = 0;
pkt->tprot = 0;
+ pkt->ethertype = pkt->skb->protocol;
+ pkt->nhoff = 0;
pkt->thoff = 0;
pkt->fragoff = 0;
}
@@ -122,17 +126,6 @@ struct nft_regs {
};
};
-struct nft_regs_track {
- struct {
- const struct nft_expr *selector;
- const struct nft_expr *bitwise;
- u8 num_reg;
- } regs[NFT_REG32_NUM];
-
- const struct nft_expr *cur;
- const struct nft_expr *last;
-};
-
/* Store/load an u8, u16 or u64 integer to/from the u32 data register.
*
* Note, when using concatenations, register allocation happens at 32-bit
@@ -425,8 +418,6 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp);
void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
const struct nft_expr *expr, bool reset);
-bool nft_expr_reduce_bitwise(struct nft_regs_track *track,
- const struct nft_expr *expr);
struct nft_set_ext;
@@ -941,7 +932,6 @@ struct nft_offload_ctx;
* @destroy_clone: destruction clone function
* @dump: function to dump parameters
* @validate: validate expression, called during loop detection
- * @reduce: reduce expression
* @gc: garbage collection expression
* @offload: hardware offload expression
* @offload_action: function to report true/false to allocate one slot or not in the flow
@@ -975,8 +965,6 @@ struct nft_expr_ops {
bool reset);
int (*validate)(const struct nft_ctx *ctx,
const struct nft_expr *expr);
- bool (*reduce)(struct nft_regs_track *track,
- const struct nft_expr *expr);
bool (*gc)(struct net *net,
const struct nft_expr *expr);
int (*offload)(struct nft_offload_ctx *ctx,
@@ -1959,20 +1947,4 @@ static inline u64 nft_net_tstamp(const struct net *net)
return nft_pernet(net)->tstamp;
}
-#define __NFT_REDUCE_READONLY 1UL
-#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY
-
-void nft_reg_track_update(struct nft_regs_track *track,
- const struct nft_expr *expr, u8 dreg, u8 len);
-void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len);
-void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg);
-
-static inline bool nft_reg_track_cmp(struct nft_regs_track *track,
- const struct nft_expr *expr, u8 dreg)
-{
- return track->regs[dreg].selector &&
- track->regs[dreg].selector->ops == expr->ops &&
- track->regs[dreg].num_reg == 0;
-}
-
#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index fcf967286e37..e715405a73cb 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -12,16 +12,19 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt)
ip = ip_hdr(pkt->skb);
pkt->flags = NFT_PKTINFO_L4PROTO;
pkt->tprot = ip->protocol;
+ pkt->ethertype = pkt->skb->protocol;
+ pkt->nhoff = 0;
pkt->thoff = ip_hdrlen(pkt->skb);
pkt->fragoff = ntohs(ip->frag_off) & IP_OFFSET;
}
-static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
+static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+ int nhoff)
{
struct iphdr *iph, _iph;
u32 len, thoff, skb_len;
- iph = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb),
+ iph = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb) + nhoff,
sizeof(*iph), &_iph);
if (!iph)
return -1;
@@ -31,7 +34,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
len = iph_totlen(pkt->skb, iph);
thoff = iph->ihl * 4;
- skb_len = pkt->skb->len - skb_network_offset(pkt->skb);
+ skb_len = pkt->skb->len - skb_network_offset(pkt->skb) - nhoff;
if (skb_len < len)
return -1;
@@ -42,7 +45,9 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
pkt->flags = NFT_PKTINFO_L4PROTO;
pkt->tprot = iph->protocol;
- pkt->thoff = skb_network_offset(pkt->skb) + thoff;
+ pkt->ethertype = pkt->skb->protocol;
+ pkt->nhoff = nhoff;
+ pkt->thoff = skb_network_offset(pkt->skb) + nhoff + thoff;
pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET;
return 0;
@@ -50,7 +55,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
{
- if (__nft_set_pktinfo_ipv4_validate(pkt) < 0)
+ if (__nft_set_pktinfo_ipv4_validate(pkt, 0) < 0)
nft_set_pktinfo_unspec(pkt);
}
@@ -78,6 +83,8 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt)
}
pkt->flags = NFT_PKTINFO_L4PROTO;
+ pkt->ethertype = pkt->skb->protocol;
+ pkt->nhoff = 0;
pkt->tprot = iph->protocol;
pkt->thoff = thoff;
pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET;
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index c53ac00bb974..d7b8c559b795 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -20,21 +20,23 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt)
pkt->flags = NFT_PKTINFO_L4PROTO;
pkt->tprot = protohdr;
+ pkt->ethertype = pkt->skb->protocol;
+ pkt->nhoff = 0;
pkt->thoff = thoff;
pkt->fragoff = frag_off;
}
-static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
+static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, int nhoff)
{
#if IS_ENABLED(CONFIG_IPV6)
unsigned int flags = IP6_FH_F_AUTH;
struct ipv6hdr *ip6h, _ip6h;
- unsigned int thoff = 0;
+ unsigned int thoff = nhoff;
unsigned short frag_off;
u32 pkt_len, skb_len;
int protohdr;
- ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb),
+ ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb) + nhoff,
sizeof(*ip6h), &_ip6h);
if (!ip6h)
return -1;
@@ -43,7 +45,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
return -1;
pkt_len = ipv6_payload_len(pkt->skb, ip6h);
- skb_len = pkt->skb->len - skb_network_offset(pkt->skb);
+ skb_len = pkt->skb->len - skb_network_offset(pkt->skb) - nhoff;
if (pkt_len + sizeof(*ip6h) > skb_len)
return -1;
@@ -53,6 +55,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
pkt->flags = NFT_PKTINFO_L4PROTO;
pkt->tprot = protohdr;
+ pkt->ethertype = pkt->skb->protocol;
+ pkt->nhoff = nhoff;
pkt->thoff = thoff;
pkt->fragoff = frag_off;
@@ -64,7 +68,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
{
- if (__nft_set_pktinfo_ipv6_validate(pkt) < 0)
+ if (__nft_set_pktinfo_ipv6_validate(pkt, 0) < 0)
nft_set_pktinfo_unspec(pkt);
}
@@ -99,6 +103,8 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt)
pkt->flags = NFT_PKTINFO_L4PROTO;
pkt->tprot = protohdr;
+ pkt->ethertype = pkt->skb->protocol;
+ pkt->nhoff = 0;
pkt->thoff = thoff;
pkt->fragoff = frag_off;
diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 3568b6a2f5f0..14c427891ee6 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -67,6 +67,16 @@ struct nft_flow_rule {
struct flow_rule *rule;
};
+static inline struct flow_action_entry *
+nft_flow_action_entry_next(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow)
+{
+ if (unlikely(ctx->num_actions >= flow->rule->action.num_entries))
+ return NULL;
+
+ return &flow->rule->action.entries[ctx->num_actions++];
+}
+
void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
enum flow_dissector_key_id addr_type);
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index 7370fba844ef..e0422456f27b 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -66,6 +66,4 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
void nft_fib_store_result(void *reg, const struct nft_fib *priv,
const struct net_device *dev);
-bool nft_fib_reduce(struct nft_regs_track *track,
- const struct nft_expr *expr);
#endif
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
index d602263590fe..f74e63290603 100644
--- a/include/net/netfilter/nft_meta.h
+++ b/include/net/netfilter/nft_meta.h
@@ -43,9 +43,6 @@ void nft_meta_set_destroy(const struct nft_ctx *ctx,
int nft_meta_set_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr);
-bool nft_meta_get_reduce(struct nft_regs_track *track,
- const struct nft_expr *expr);
-
struct nft_inner_tun_ctx;
void nft_meta_inner_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt,
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 1a8356ca4b78..546d10586576 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -2265,6 +2265,25 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
}
/**
+ * nla_nest_end_safe - Validate and finalize nesting of attributes
+ * @skb: socket buffer the attributes are stored in
+ * @start: container attribute
+ *
+ * Corrects the container attribute header to include all appended
+ * attributes.
+ *
+ * Returns: the total data length of the skb, or -EMSGSIZE if the
+ * nested attribute length exceeds U16_MAX.
+ */
+static inline int nla_nest_end_safe(struct sk_buff *skb, struct nlattr *start)
+{
+ if (skb_tail_pointer(skb) - (unsigned char *)start > U16_MAX)
+ return -EMSGSIZE;
+
+ return nla_nest_end(skb, start);
+}
+
+/**
* nla_nest_cancel - Cancel nesting of attributes
* @skb: socket buffer the message is stored in
* @start: container attribute
diff --git a/include/net/netmem.h b/include/net/netmem.h
index a96b3e5e5574..a6d65ced5231 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -93,23 +93,7 @@ enum net_iov_type {
* supported.
*/
struct net_iov {
- union {
- struct netmem_desc desc;
-
- /* XXX: The following part should be removed once all
- * the references to them are converted so as to be
- * accessed via netmem_desc e.g. niov->desc.pp instead
- * of niov->pp.
- */
- struct {
- unsigned long _flags;
- unsigned long pp_magic;
- struct page_pool *pp;
- unsigned long _pp_mapping_pad;
- unsigned long dma_addr;
- atomic_long_t pp_ref_count;
- };
- };
+ struct netmem_desc desc;
struct net_iov_area *owner;
enum net_iov_type type;
};
@@ -123,26 +107,6 @@ struct net_iov_area {
unsigned long base_virtual;
};
-/* net_iov is union'ed with struct netmem_desc mirroring struct page, so
- * the page_pool can access these fields without worrying whether the
- * underlying fields are accessed via netmem_desc or directly via
- * net_iov, until all the references to them are converted so as to be
- * accessed via netmem_desc e.g. niov->desc.pp instead of niov->pp.
- *
- * The non-net stack fields of struct page are private to the mm stack
- * and must never be mirrored to net_iov.
- */
-#define NET_IOV_ASSERT_OFFSET(desc, iov) \
- static_assert(offsetof(struct netmem_desc, desc) == \
- offsetof(struct net_iov, iov))
-NET_IOV_ASSERT_OFFSET(_flags, _flags);
-NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic);
-NET_IOV_ASSERT_OFFSET(pp, pp);
-NET_IOV_ASSERT_OFFSET(_pp_mapping_pad, _pp_mapping_pad);
-NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
-NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
-#undef NET_IOV_ASSERT_OFFSET
-
static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov)
{
return niov->owner;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 8e971c7bf164..80ccd4dda8e0 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -74,6 +74,7 @@ struct netns_ipv4 {
/* TXRX readonly hotpath cache lines */
__cacheline_group_begin(netns_ipv4_read_txrx);
+ u8 sysctl_tcp_shrink_window;
__cacheline_group_end(netns_ipv4_read_txrx);
/* RX readonly hotpath cache line */
@@ -122,7 +123,6 @@ struct netns_ipv4 {
#endif
bool fib_has_custom_local_routes;
bool fib_offload_disabled;
- u8 sysctl_tcp_shrink_window;
#ifdef CONFIG_IP_ROUTE_CLASSID
atomic_t fib_num_tclassid_users;
#endif
@@ -166,6 +166,7 @@ struct netns_ipv4 {
u8 sysctl_ip_autobind_reuse;
/* Shall we try to damage output packets if routing dev changes? */
u8 sysctl_ip_dynaddr;
+ u32 sysctl_ip_local_port_step_width;
#ifdef CONFIG_NET_L3_MASTER_DEV
u8 sysctl_raw_l3mdev_accept;
#endif
@@ -279,6 +280,9 @@ struct netns_ipv4 {
struct list_head mr_tables;
struct fib_rules_ops *mr_rules_ops;
#endif
+ struct fib_notifier_ops *ipmr_notifier_ops;
+ atomic_t ipmr_seq;
+ struct mutex mfc_mutex;
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed;
@@ -290,9 +294,6 @@ struct netns_ipv4 {
struct fib_notifier_ops *notifier_ops;
unsigned int fib_seq; /* writes protected by rtnl_mutex */
- struct fib_notifier_ops *ipmr_notifier_ops;
- unsigned int ipmr_seq; /* protected by rtnl_mutex */
-
atomic_t rt_genid;
siphash_key_t ip_id_key;
struct hlist_head *inet_addr_lst;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 34bdb1308e8f..499e4288170f 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -118,7 +118,7 @@ struct netns_ipv6 {
struct seg6_pernet_data *seg6_data;
struct fib_notifier_ops *notifier_ops;
struct fib_notifier_ops *ip6mr_notifier_ops;
- unsigned int ipmr_seq; /* protected by rtnl_mutex */
+ atomic_t ipmr_seq;
struct {
struct hlist_head head;
spinlock_t lock;
diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 7e373664b1e7..dce05f8e6a33 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -28,11 +28,6 @@ struct netns_mib {
DEFINE_SNMP_STAT(struct mptcp_mib, mptcp_statistics);
#endif
- DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics);
-#if IS_ENABLED(CONFIG_IPV6)
- DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
-#endif
-
DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
DEFINE_SNMP_STAT_ATOMIC(struct icmpmsg_mib, icmpmsg_statistics);
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/include/net/netns/vsock.h b/include/net/netns/vsock.h
index dc8cbe45f406..7f84aad92f57 100644
--- a/include/net/netns/vsock.h
+++ b/include/net/netns/vsock.h
@@ -20,5 +20,7 @@ struct netns_vsock {
/* 0 = unlocked, 1 = locked to global, 2 = locked to local */
int child_ns_mode_locked;
+
+ int g2h_fallback;
};
#endif /* __NET_NET_NAMESPACE_VSOCK_H */
diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h
index ada4f968960a..255ce4cfd975 100644
--- a/include/net/page_pool/memory_provider.h
+++ b/include/net/page_pool/memory_provider.h
@@ -23,14 +23,10 @@ bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr);
void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov);
void net_mp_niov_clear_page_pool(struct net_iov *niov);
-int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
- struct pp_memory_provider_params *p);
-int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
const struct pp_memory_provider_params *p,
struct netlink_ext_ack *extack);
-void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
- struct pp_memory_provider_params *old_p);
-void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
+void netif_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
const struct pp_memory_provider_params *old_p);
/**
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index cdd95477af7a..03da138722f5 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -44,6 +44,8 @@
* use-case. The NAPI budget is 64 packets. After a NAPI poll the RX
* ring is usually refilled and the max consumed elements will be 64,
* thus a natural max size of objects needed in the cache.
+ * The refill watermark is set to 64 for 4KB pages,
+ * and scales to balance its size in bytes across page sizes.
*
* Keeping room for more objects, is due to XDP_DROP use-case. As
* XDP_DROP allows the opportunity to recycle objects directly into
@@ -51,8 +53,15 @@
* cache is already full (or partly full) then the XDP_DROP recycles
* would have to take a slower code path.
*/
-#define PP_ALLOC_CACHE_SIZE 128
+#if PAGE_SIZE >= SZ_64K
+#define PP_ALLOC_CACHE_REFILL 4
+#elif PAGE_SIZE >= SZ_16K
+#define PP_ALLOC_CACHE_REFILL 16
+#else
#define PP_ALLOC_CACHE_REFILL 64
+#endif
+
+#define PP_ALLOC_CACHE_SIZE (PP_ALLOC_CACHE_REFILL * 2)
struct pp_alloc_cache {
u32 count;
netmem_ref cache[PP_ALLOC_CACHE_SIZE];
diff --git a/include/net/ping.h b/include/net/ping.h
index 05bfd594a64c..bcbdb5a136e3 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -20,8 +20,7 @@
/* Compatibility glue so we can support IPv6 when it's compiled as a module */
struct pingv6_ops {
- int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len,
- int *addr_len);
+ int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len);
void (*ip6_datagram_recv_common_ctl)(struct sock *sk,
struct msghdr *msg,
struct sk_buff *skb);
@@ -64,7 +63,7 @@ int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd,
struct sk_buff *);
int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int flags, int *addr_len);
+ int flags);
int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
void *user_icmph, size_t icmph_len);
int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
diff --git a/include/net/rps-types.h b/include/net/rps-types.h
new file mode 100644
index 000000000000..6b90a66866c1
--- /dev/null
+++ b/include/net/rps-types.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_RPS_TYPES_H
+#define _NET_RPS_TYPES_H
+
+/* Define a rps_tag_ptr:
+ * Low order 5 bits are used to store the ilog2(size) of an RPS table.
+ */
+typedef unsigned long rps_tag_ptr;
+
+static inline u8 rps_tag_to_log(rps_tag_ptr tag_ptr)
+{
+ return tag_ptr & 31U;
+}
+
+static inline u32 rps_tag_to_mask(rps_tag_ptr tag_ptr)
+{
+ return (1U << rps_tag_to_log(tag_ptr)) - 1;
+}
+
+static inline void *rps_tag_to_table(rps_tag_ptr tag_ptr)
+{
+ return (void *)(tag_ptr & ~31UL);
+}
+#endif /* _NET_RPS_TYPES_H */
diff --git a/include/net/rps.h b/include/net/rps.h
index f1794cd2e7fb..e33c6a2fa8bb 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -8,6 +8,7 @@
#include <net/hotdata.h>
#ifdef CONFIG_RPS
+#include <net/rps-types.h>
extern struct static_key_false rps_needed;
extern struct static_key_false rfs_needed;
@@ -39,17 +40,6 @@ struct rps_dev_flow {
#define RPS_NO_FILTER 0xffff
/*
- * The rps_dev_flow_table structure contains a table of flow mappings.
- */
-struct rps_dev_flow_table {
- u8 log;
- struct rcu_head rcu;
- struct rps_dev_flow flows[];
-};
-#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
- ((_num) * sizeof(struct rps_dev_flow)))
-
-/*
* The rps_sock_flow_table contains mappings of flows to the last CPU
* on which they were processed by the application (set in recvmsg).
* Each entry is a 32bit value. Upper part is the high-order bits
@@ -60,41 +50,38 @@ struct rps_dev_flow_table {
* meaning we use 32-6=26 bits for the hash.
*/
struct rps_sock_flow_table {
- struct rcu_head rcu;
- u32 mask;
-
- u32 ents[] ____cacheline_aligned_in_smp;
+ u32 ent;
};
-#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
#define RPS_NO_CPU 0xffff
-static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
- u32 hash)
+static inline void rps_record_sock_flow(rps_tag_ptr tag_ptr, u32 hash)
{
- unsigned int index = hash & table->mask;
+ unsigned int index = hash & rps_tag_to_mask(tag_ptr);
u32 val = hash & ~net_hotdata.rps_cpu_mask;
+ struct rps_sock_flow_table *table;
/* We only give a hint, preemption can change CPU under us */
val |= raw_smp_processor_id();
+ table = rps_tag_to_table(tag_ptr);
/* The following WRITE_ONCE() is paired with the READ_ONCE()
* here, and another one in get_rps_cpu().
*/
- if (READ_ONCE(table->ents[index]) != val)
- WRITE_ONCE(table->ents[index], val);
+ if (READ_ONCE(table[index].ent) != val)
+ WRITE_ONCE(table[index].ent, val);
}
static inline void _sock_rps_record_flow_hash(__u32 hash)
{
- struct rps_sock_flow_table *sock_flow_table;
+ rps_tag_ptr tag_ptr;
if (!hash)
return;
rcu_read_lock();
- sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table);
- if (sock_flow_table)
- rps_record_sock_flow(sock_flow_table, hash);
+ tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table);
+ if (tag_ptr)
+ rps_record_sock_flow(tag_ptr, hash);
rcu_read_unlock();
}
@@ -121,6 +108,7 @@ static inline void _sock_rps_record_flow(const struct sock *sk)
static inline void _sock_rps_delete_flow(const struct sock *sk)
{
struct rps_sock_flow_table *table;
+ rps_tag_ptr tag_ptr;
u32 hash, index;
hash = READ_ONCE(sk->sk_rxhash);
@@ -128,11 +116,12 @@ static inline void _sock_rps_delete_flow(const struct sock *sk)
return;
rcu_read_lock();
- table = rcu_dereference(net_hotdata.rps_sock_flow_table);
- if (table) {
- index = hash & table->mask;
- if (READ_ONCE(table->ents[index]) != RPS_NO_CPU)
- WRITE_ONCE(table->ents[index], RPS_NO_CPU);
+ tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table);
+ if (tag_ptr) {
+ index = hash & rps_tag_to_mask(tag_ptr);
+ table = rps_tag_to_table(tag_ptr);
+ if (READ_ONCE(table[index].ent) != RPS_NO_CPU)
+ WRITE_ONCE(table[index].ent, RPS_NO_CPU);
}
rcu_read_unlock();
}
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c3d657359a3d..11159a50d6a1 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -20,12 +20,15 @@
#include <net/rtnetlink.h>
#include <net/flow_offload.h>
#include <linux/xarray.h>
+#include <net/dropreason-qdisc.h>
struct Qdisc_ops;
struct qdisc_walker;
struct tcf_walker;
struct module;
struct bpf_flow_keys;
+struct Qdisc;
+struct netdev_queue;
struct qdisc_rate_table {
struct tc_ratespec rate;
@@ -707,8 +710,8 @@ void dev_qdisc_change_real_num_tx(struct net_device *dev,
void dev_init_scheduler(struct net_device *dev);
void dev_shutdown(struct net_device *dev);
void dev_activate(struct net_device *dev);
-void dev_deactivate(struct net_device *dev);
-void dev_deactivate_many(struct list_head *head);
+void dev_deactivate(struct net_device *dev, bool reset_needed);
+void dev_deactivate_many(struct list_head *head, bool reset_needed);
struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
struct Qdisc *qdisc);
void qdisc_reset(struct Qdisc *qdisc);
@@ -1144,38 +1147,62 @@ static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
return cb;
}
+/* TC classifier accessors - use enum skb_drop_reason */
static inline enum skb_drop_reason
tcf_get_drop_reason(const struct sk_buff *skb)
{
- return tc_skb_cb(skb)->drop_reason;
+ return (enum skb_drop_reason)tc_skb_cb(skb)->drop_reason;
}
static inline void tcf_set_drop_reason(const struct sk_buff *skb,
enum skb_drop_reason reason)
{
- tc_skb_cb(skb)->drop_reason = reason;
+ tc_skb_cb(skb)->drop_reason = (enum qdisc_drop_reason)reason;
}
-static inline void tcf_kfree_skb_list(struct sk_buff *skb)
+/* Qdisc accessors - use enum qdisc_drop_reason */
+static inline enum qdisc_drop_reason
+tcf_get_qdisc_drop_reason(const struct sk_buff *skb)
{
- while (unlikely(skb)) {
- struct sk_buff *next = skb->next;
+ return tc_skb_cb(skb)->drop_reason;
+}
- prefetch(next);
- kfree_skb_reason(skb, tcf_get_drop_reason(skb));
- skb = next;
- }
+static inline void tcf_set_qdisc_drop_reason(const struct sk_buff *skb,
+ enum qdisc_drop_reason reason)
+{
+ tc_skb_cb(skb)->drop_reason = reason;
+}
+
+void __tcf_kfree_skb_list(struct sk_buff *skb, struct Qdisc *q,
+ struct netdev_queue *txq, struct net_device *dev);
+
+static inline void tcf_kfree_skb_list(struct sk_buff *skb, struct Qdisc *q,
+ struct netdev_queue *txq,
+ struct net_device *dev)
+{
+ if (unlikely(skb))
+ __tcf_kfree_skb_list(skb, q, txq, dev);
}
static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb,
- enum skb_drop_reason reason)
+ enum qdisc_drop_reason reason)
{
+ struct Qdisc *root;
+
DEBUG_NET_WARN_ON_ONCE(!(q->flags & TCQ_F_DEQUEUE_DROPS));
DEBUG_NET_WARN_ON_ONCE(q->flags & TCQ_F_NOLOCK);
- tcf_set_drop_reason(skb, reason);
- skb->next = q->to_free;
- q->to_free = skb;
+ rcu_read_lock();
+ root = qdisc_root_sleeping(q);
+
+ if (root->flags & TCQ_F_DEQUEUE_DROPS) {
+ tcf_set_qdisc_drop_reason(skb, reason);
+ skb->next = root->to_free;
+ root->to_free = skb;
+ } else {
+ kfree_skb_reason(skb, (enum skb_drop_reason)reason);
+ }
+ rcu_read_unlock();
}
/* Instead of calling kfree_skb() while root qdisc lock is held,
@@ -1350,9 +1377,9 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
static inline int qdisc_drop_reason(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free,
- enum skb_drop_reason reason)
+ enum qdisc_drop_reason reason)
{
- tcf_set_drop_reason(skb, reason);
+ tcf_set_qdisc_drop_reason(skb, reason);
return qdisc_drop(skb, sch, to_free);
}
diff --git a/include/net/sock.h b/include/net/sock.h
index cfae4fefb8f5..dccd3738c368 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -81,8 +81,13 @@
* mini-semaphore synchronizes multiple users amongst themselves.
*/
typedef struct {
- spinlock_t slock;
- int owned;
+ union {
+ struct slock_owned {
+ int owned;
+ spinlock_t slock;
+ };
+ long combined;
+ };
wait_queue_head_t wq;
/*
* We express the mutex-alike socket_lock semantics
@@ -121,14 +126,14 @@ typedef __u64 __bitwise __addrpair;
* @skc_bypass_prot_mem: bypass the per-protocol memory accounting for skb
* @skc_bound_dev_if: bound device index if != 0
* @skc_bind_node: bind hash linkage for various protocol lookup tables
- * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
+ * @skc_portaddr_node: second hash linkage for UDP
* @skc_prot: protocol handlers inside a network family
* @skc_net: reference to the network namespace of this socket
* @skc_v6_daddr: IPV6 destination address
* @skc_v6_rcv_saddr: IPV6 source address
* @skc_cookie: socket's cookie value
* @skc_node: main hash linkage for various protocol lookup tables
- * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
+ * @skc_nulls_node: main hash linkage for TCP
* @skc_tx_queue_mapping: tx queue number for this connection
* @skc_rx_queue_mapping: rx queue number for this connection
* @skc_flags: place holder for sk_flags
@@ -1316,7 +1321,7 @@ struct proto {
int (*sendmsg)(struct sock *sk, struct msghdr *msg,
size_t len);
int (*recvmsg)(struct sock *sk, struct msghdr *msg,
- size_t len, int flags, int *addr_len);
+ size_t len, int flags);
void (*splice_eof)(struct socket *sock);
int (*bind)(struct sock *sk,
struct sockaddr_unsized *addr, int addr_len);
@@ -1387,7 +1392,6 @@ struct proto {
union {
struct inet_hashinfo *hashinfo;
- struct udp_table *udp_table;
struct raw_hashinfo *raw_hash;
struct smc_hashinfo *smc_hash;
} h;
@@ -1709,7 +1713,6 @@ static inline void lock_sock(struct sock *sk)
lock_sock_nested(sk, 0);
}
-void __lock_sock(struct sock *sk);
void __release_sock(struct sock *sk);
void release_sock(struct sock *sk);
@@ -2499,12 +2502,23 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
struct sk_buff *skb));
int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
-int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
- enum skb_drop_reason *reason);
+enum skb_drop_reason
+sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb);
static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
- return sock_queue_rcv_skb_reason(sk, skb, NULL);
+ enum skb_drop_reason drop_reason = sock_queue_rcv_skb_reason(sk, skb);
+
+ switch (drop_reason) {
+ case SKB_DROP_REASON_SOCKET_RCVBUFF:
+ return -ENOMEM;
+ case SKB_DROP_REASON_PROTO_MEM:
+ return -ENOBUFS;
+ case 0:
+ return 0;
+ default:
+ return -EPERM;
+ }
}
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 8346b0d29542..ee500706496b 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -15,6 +15,7 @@
#define SWITCHDEV_F_NO_RECURSE BIT(0)
#define SWITCHDEV_F_SKIP_EOPNOTSUPP BIT(1)
#define SWITCHDEV_F_DEFER BIT(2)
+#define SWITCHDEV_F_NO_FOREIGN BIT(3)
enum switchdev_attr_id {
SWITCHDEV_ATTR_ID_UNDEFINED,
diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index ffe58a02537c..4ebb053bb0dd 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -12,7 +12,8 @@
#define TC_INDIRECT_SCOPE
-extern struct static_key_false tc_skip_wrapper;
+extern struct static_key_false tc_skip_wrapper_act;
+extern struct static_key_false tc_skip_wrapper_cls;
/* TC Actions */
#ifdef CONFIG_NET_CLS_ACT
@@ -46,7 +47,7 @@ TC_INDIRECT_ACTION_DECLARE(tunnel_key_act);
static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
- if (static_branch_likely(&tc_skip_wrapper))
+ if (static_branch_likely(&tc_skip_wrapper_act))
goto skip;
#if IS_BUILTIN(CONFIG_NET_ACT_GACT)
@@ -153,7 +154,7 @@ TC_INDIRECT_FILTER_DECLARE(u32_classify);
static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- if (static_branch_likely(&tc_skip_wrapper))
+ if (static_branch_likely(&tc_skip_wrapper_cls))
goto skip;
#if IS_BUILTIN(CONFIG_NET_CLS_BPF)
@@ -202,8 +203,44 @@ skip:
static inline void tc_wrapper_init(void)
{
#ifdef CONFIG_X86
- if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE))
- static_branch_enable(&tc_skip_wrapper);
+ int cnt_cls = IS_BUILTIN(CONFIG_NET_CLS_BPF) +
+ IS_BUILTIN(CONFIG_NET_CLS_U32) +
+ IS_BUILTIN(CONFIG_NET_CLS_FLOWER) +
+ IS_BUILTIN(CONFIG_NET_CLS_FW) +
+ IS_BUILTIN(CONFIG_NET_CLS_MATCHALL) +
+ IS_BUILTIN(CONFIG_NET_CLS_BASIC) +
+ IS_BUILTIN(CONFIG_NET_CLS_CGROUP) +
+ IS_BUILTIN(CONFIG_NET_CLS_FLOW) +
+ IS_BUILTIN(CONFIG_NET_CLS_ROUTE4);
+
+ int cnt_act = IS_BUILTIN(CONFIG_NET_ACT_GACT) +
+ IS_BUILTIN(CONFIG_NET_ACT_MIRRED) +
+ IS_BUILTIN(CONFIG_NET_ACT_PEDIT) +
+ IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT) +
+ IS_BUILTIN(CONFIG_NET_ACT_SKBMOD) +
+ IS_BUILTIN(CONFIG_NET_ACT_POLICE) +
+ IS_BUILTIN(CONFIG_NET_ACT_BPF) +
+ IS_BUILTIN(CONFIG_NET_ACT_CONNMARK) +
+ IS_BUILTIN(CONFIG_NET_ACT_CSUM) +
+ IS_BUILTIN(CONFIG_NET_ACT_CT) +
+ IS_BUILTIN(CONFIG_NET_ACT_CTINFO) +
+ IS_BUILTIN(CONFIG_NET_ACT_GATE) +
+ IS_BUILTIN(CONFIG_NET_ACT_MPLS) +
+ IS_BUILTIN(CONFIG_NET_ACT_NAT) +
+ IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY) +
+ IS_BUILTIN(CONFIG_NET_ACT_VLAN) +
+ IS_BUILTIN(CONFIG_NET_ACT_IFE) +
+ IS_BUILTIN(CONFIG_NET_ACT_SIMP) +
+ IS_BUILTIN(CONFIG_NET_ACT_SAMPLE);
+
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE))
+ return;
+
+ if (cnt_cls > 1)
+ static_branch_enable(&tc_skip_wrapper_cls);
+
+ if (cnt_act > 1)
+ static_branch_enable(&tc_skip_wrapper_act);
#endif
}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 978eea2d5df0..dfa52ceefd23 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -363,7 +363,6 @@ int tcp_v4_err(struct sk_buff *skb, u32);
void tcp_shutdown(struct sock *sk, int how);
-int tcp_v4_early_demux(struct sk_buff *skb);
int tcp_v4_rcv(struct sk_buff *skb);
void tcp_remove_empty_skb(struct sock *sk);
@@ -376,7 +375,21 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
int tcp_wmem_schedule(struct sock *sk, int copy);
void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
int size_goal);
+
void tcp_release_cb(struct sock *sk);
+
+static inline bool tcp_release_cb_cond(struct sock *sk)
+{
+#ifdef CONFIG_INET
+ if (likely(sk->sk_prot->release_cb == tcp_release_cb)) {
+ if (unlikely(smp_load_acquire(&sk->sk_tsq_flags) & TCP_DEFERRED_ALL))
+ tcp_release_cb(sk);
+ return true;
+ }
+#endif
+ return false;
+}
+
void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
void tcp_delack_timer_handler(struct sock *sk);
@@ -501,11 +514,19 @@ void tcp_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int flags, int *addr_len);
+ int flags);
int tcp_set_rcvlowat(struct sock *sk, int val);
+void tcp_set_rcvbuf(struct sock *sk, int val);
int tcp_set_window_clamp(struct sock *sk, int val);
-void tcp_update_recv_tstamps(struct sk_buff *skb,
- struct scm_timestamping_internal *tss);
+
+static inline void
+tcp_update_recv_tstamps(struct sk_buff *skb,
+ struct scm_timestamping_internal *tss)
+{
+ tss->ts[0] = skb->tstamp;
+ tss->ts[2] = skb_hwtstamps(skb)->hwtstamp;
+}
+
void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
struct scm_timestamping_internal *tss);
void tcp_data_ready(struct sock *sk);
@@ -532,7 +553,6 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
* TCP v4 functions exported for the inet6 API
*/
-void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
void tcp_v4_mtu_reduced(struct sock *sk);
void tcp_req_err(struct sock *sk, u32 seq, bool abort);
void tcp_ld_RTO_revert(struct sock *sk, u32 seq);
@@ -915,6 +935,28 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp)
return (u32) win;
}
+/* Compute the maximum receive window we ever advertised.
+ * Rcv_nxt can be after the window if our peer push more data
+ * than the offered window.
+ */
+static inline u32 tcp_max_receive_window(const struct tcp_sock *tp)
+{
+ s32 win = tp->rcv_mwnd_seq - tp->rcv_nxt;
+
+ if (win < 0)
+ win = 0;
+ return (u32) win;
+}
+
+/* Check if we need to update the maximum receive window sequence number */
+static inline void tcp_update_max_rcv_wnd_seq(struct tcp_sock *tp)
+{
+ u32 wre = tp->rcv_wup + tp->rcv_wnd;
+
+ if (after(wre, tp->rcv_mwnd_seq))
+ tp->rcv_mwnd_seq = wre;
+}
+
/* Choose a new window, without checks for shrinking, and without
* scaling applied to the result. The caller does these things
* if necessary. This is a "raw" window selection.
@@ -1135,9 +1177,7 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb)
extern const struct inet_connection_sock_af_ops ipv6_specific;
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
-void tcp_v6_early_demux(struct sk_buff *skb);
#endif
@@ -1302,6 +1342,9 @@ struct tcp_congestion_ops {
/* call when cwnd event occurs (optional) */
void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
+ /* call when CA_EVENT_TX_START cwnd event occurs (optional) */
+ void (*cwnd_event_tx_start)(struct sock *sk);
+
/* call when ack arrives (optional) */
void (*in_ack_event)(struct sock *sk, u32 flags);
@@ -1401,6 +1444,11 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
+ if (event == CA_EVENT_TX_START) {
+ if (icsk->icsk_ca_ops->cwnd_event_tx_start)
+ icsk->icsk_ca_ops->cwnd_event_tx_start(sk);
+ return;
+ }
if (icsk->icsk_ca_ops->cwnd_event)
icsk->icsk_ca_ops->cwnd_event(sk, event);
}
@@ -1633,15 +1681,14 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
__skb_checksum_complete(skb);
}
-bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
- enum skb_drop_reason *reason);
+enum skb_drop_reason tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
-static inline int tcp_filter(struct sock *sk, struct sk_buff *skb,
- enum skb_drop_reason *reason)
+static inline enum skb_drop_reason
+tcp_filter(struct sock *sk, struct sk_buff *skb)
{
const struct tcphdr *th = (const struct tcphdr *)skb->data;
- return sk_filter_trim_cap(sk, skb, __tcp_hdrlen(th), reason);
+ return sk_filter_trim_cap(sk, skb, __tcp_hdrlen(th));
}
void tcp_set_state(struct sock *sk, int state);
@@ -2156,7 +2203,30 @@ enum tcp_chrono {
__TCP_CHRONO_MAX,
};
-void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type);
+static inline void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
+{
+ const u32 now = tcp_jiffies32;
+ enum tcp_chrono old = tp->chrono_type;
+
+ if (old > TCP_CHRONO_UNSPEC)
+ tp->chrono_stat[old - 1] += now - tp->chrono_start;
+ tp->chrono_start = now;
+ tp->chrono_type = new;
+}
+
+static inline void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ /* If there are multiple conditions worthy of tracking in a
+ * chronograph then the highest priority enum takes precedence
+ * over the other conditions. So that if something "more interesting"
+ * starts happening, stop the previous chrono and start a new one.
+ */
+ if (type > tp->chrono_type)
+ tcp_chrono_set(tp, type);
+}
+
void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type);
/* This helper is needed, because skb->tcp_tsorted_anchor uses
@@ -2385,7 +2455,15 @@ void tcp_gro_complete(struct sk_buff *skb);
static inline void tcp_gro_complete(struct sk_buff *skb) { }
#endif
-void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
+static inline void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr,
+ __be32 daddr)
+{
+ struct tcphdr *th = tcp_hdr(skb);
+
+ th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+}
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
@@ -2999,4 +3077,18 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk,
const void *saddr, const void *daddr,
int family, int dif, int sdif);
+static inline int tcp_recv_should_stop(struct sock *sk)
+{
+ return sk->sk_err ||
+ sk->sk_state == TCP_CLOSE ||
+ (sk->sk_shutdown & RCV_SHUTDOWN) ||
+ signal_pending(current);
+}
+
+INDIRECT_CALLABLE_DECLARE(union tcp_seq_and_ts_off
+ tcp_v4_init_seq_and_ts_off(const struct net *net,
+ const struct sk_buff *skb));
+INDIRECT_CALLABLE_DECLARE(union tcp_seq_and_ts_off
+ tcp_v6_init_seq_and_ts_off(const struct net *net,
+ const struct sk_buff *skb));
#endif /* _TCP_H */
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index 1a97e3f32029..c0a421fe0c2a 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -8,7 +8,6 @@
/* IPv6 transport protocols */
extern struct proto rawv6_prot;
extern struct proto udpv6_prot;
-extern struct proto udplitev6_prot;
extern struct proto tcpv6_prot;
extern struct proto pingv6_prot;
@@ -28,8 +27,6 @@ int rawv6_init(void);
void rawv6_exit(void);
int udpv6_init(void);
void udpv6_exit(void);
-int udplitev6_init(void);
-void udplitev6_exit(void);
int tcpv6_init(void);
void tcpv6_exit(void);
diff --git a/include/net/tso.h b/include/net/tso.h
index e7e157ae0526..da82aabd1d48 100644
--- a/include/net/tso.h
+++ b/include/net/tso.h
@@ -3,6 +3,7 @@
#define _TSO_H
#include <linux/skbuff.h>
+#include <linux/dma-mapping.h>
#include <net/ip.h>
#define TSO_HEADER_SIZE 256
@@ -28,4 +29,103 @@ void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso,
void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size);
int tso_start(struct sk_buff *skb, struct tso_t *tso);
+/**
+ * struct tso_dma_map - DMA mapping state for GSO payload
+ * @dev: device used for DMA mapping
+ * @skb: the GSO skb being mapped
+ * @hdr_len: per-segment header length
+ * @iova_state: DMA IOVA state (when IOMMU available)
+ * @iova_offset: global byte offset into IOVA range (IOVA path only)
+ * @total_len: total payload length
+ * @frag_idx: current region (-1 = linear, 0..nr_frags-1 = frag)
+ * @offset: byte offset within current region
+ * @linear_dma: DMA address of the linear payload
+ * @linear_len: length of the linear payload
+ * @nr_frags: number of frags successfully DMA-mapped
+ * @frags: per-frag DMA address and length
+ *
+ * DMA-maps the payload regions of a GSO skb (linear data + frags).
+ * Prefers the DMA IOVA API for a single contiguous mapping with one
+ * IOTLB sync; falls back to per-region dma_map_phys() otherwise.
+ */
+struct tso_dma_map {
+ struct device *dev;
+ const struct sk_buff *skb;
+ unsigned int hdr_len;
+ /* IOVA path */
+ struct dma_iova_state iova_state;
+ size_t iova_offset;
+ size_t total_len;
+ /* Fallback path if IOVA path fails */
+ int frag_idx;
+ unsigned int offset;
+ dma_addr_t linear_dma;
+ unsigned int linear_len;
+ unsigned int nr_frags;
+ struct {
+ dma_addr_t dma;
+ unsigned int len;
+ } frags[MAX_SKB_FRAGS];
+};
+
+/**
+ * struct tso_dma_map_completion_state - Completion-time cleanup state
+ * @iova_state: DMA IOVA state (when IOMMU available)
+ * @total_len: total payload length of the IOVA mapping
+ *
+ * Drivers store this on their SW ring at xmit time via
+ * tso_dma_map_completion_save(), then call tso_dma_map_complete() at
+ * completion time.
+ */
+struct tso_dma_map_completion_state {
+ struct dma_iova_state iova_state;
+ size_t total_len;
+};
+
+int tso_dma_map_init(struct tso_dma_map *map, struct device *dev,
+ const struct sk_buff *skb, unsigned int hdr_len);
+void tso_dma_map_cleanup(struct tso_dma_map *map);
+unsigned int tso_dma_map_count(struct tso_dma_map *map, unsigned int len);
+bool tso_dma_map_next(struct tso_dma_map *map, dma_addr_t *addr,
+ unsigned int *chunk_len, unsigned int *mapping_len,
+ unsigned int seg_remaining);
+
+/**
+ * tso_dma_map_completion_save - save state needed for completion-time cleanup
+ * @map: the xmit-time DMA map
+ * @cstate: driver-owned storage that persists until completion
+ *
+ * Should be called at xmit time to update the completion state and later passed
+ * to tso_dma_map_complete().
+ */
+static inline void
+tso_dma_map_completion_save(const struct tso_dma_map *map,
+ struct tso_dma_map_completion_state *cstate)
+{
+ cstate->iova_state = map->iova_state;
+ cstate->total_len = map->total_len;
+}
+
+/**
+ * tso_dma_map_complete - tear down mapping at completion time
+ * @dev: the device that owns the mapping
+ * @cstate: state saved by tso_dma_map_completion_save()
+ *
+ * Return: true if the IOVA path was used and the mapping has been
+ * destroyed; false if the fallback per-region path was used and the
+ * driver must unmap via its normal completion path.
+ */
+static inline bool
+tso_dma_map_complete(struct device *dev,
+ struct tso_dma_map_completion_state *cstate)
+{
+ if (dma_use_iova(&cstate->iova_state)) {
+ dma_iova_destroy(dev, &cstate->iova_state, cstate->total_len,
+ DMA_TO_DEVICE, 0);
+ return true;
+ }
+
+ return false;
+}
+
#endif /* _TSO_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index da68702ddf6e..8262e2b215b4 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -29,13 +29,12 @@
#include <linux/seq_file.h>
#include <linux/poll.h>
#include <linux/indirect_call_wrapper.h>
+#include <linux/math.h>
/**
- * struct udp_skb_cb - UDP(-Lite) private variables
+ * struct udp_skb_cb - UDP private variables
*
* @header: private variables used by IPv4/IPv6
- * @cscov: checksum coverage length (UDP-Lite only)
- * @partial_cov: if set indicates partial csum coverage
*/
struct udp_skb_cb {
union {
@@ -44,8 +43,6 @@ struct udp_skb_cb {
struct inet6_skb_parm h6;
#endif
} header;
- __u16 cscov;
- __u8 partial_cov;
};
#define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb))
@@ -104,7 +101,7 @@ struct udp_table {
unsigned int log;
};
extern struct udp_table udp_table;
-void udp_table_init(struct udp_table *, const char *);
+
static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
const struct net *net,
unsigned int num)
@@ -215,13 +212,11 @@ extern int sysctl_udp_wmem_min;
struct sk_buff;
/*
- * Generic checksumming routines for UDP(-Lite) v4 and v6
+ * Generic checksumming routines for UDP v4 and v6
*/
static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb)
{
- return (UDP_SKB_CB(skb)->cscov == skb->len ?
- __skb_checksum_complete(skb) :
- __skb_checksum_complete_head(skb, UDP_SKB_CB(skb)->cscov));
+ return __skb_checksum_complete(skb);
}
static inline int udp_lib_checksum_complete(struct sk_buff *skb)
@@ -272,7 +267,6 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
skb->csum = csum_partial(skb->data, sizeof(struct udphdr),
skb->csum);
skb_pull_rcsum(skb, sizeof(struct udphdr));
- UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr);
}
typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport,
@@ -281,6 +275,10 @@ typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport,
void udp_v6_early_demux(struct sk_buff *skb);
INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
+int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
+INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int flags));
+
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
netdev_features_t features, bool is_ipv6);
@@ -307,7 +305,7 @@ static inline void udp_drops_inc(struct sock *sk)
numa_drop_add(&udp_sk(sk)->drop_counters, 1);
}
-/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
+/* hash routines shared between UDPv4/6 */
static inline int udp_lib_hash(struct sock *sk)
{
BUG();
@@ -376,7 +374,7 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
*/
hash ^= hash << 16;
- return htons((((u64) hash * (max - min)) >> 32) + min);
+ return htons(reciprocal_scale(hash, max - min + 1) + min);
}
static inline int udp_rqueue_get(struct sock *sk)
@@ -415,6 +413,8 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
int udp_err(struct sk_buff *, u32);
int udp_abort(struct sock *sk, int err);
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
+INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int flags));
void udp_splice_eof(struct socket *sock);
int udp_push_pending_frames(struct sock *sk);
void udp_flush_pending_frames(struct sock *sk);
@@ -422,7 +422,6 @@ int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
int udp_rcv(struct sk_buff *skb);
int udp_ioctl(struct sock *sk, int cmd, int *karg);
-int udp_init_sock(struct sock *sk);
int udp_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len);
int __udp_disconnect(struct sock *sk, int flags);
int udp_disconnect(struct sock *sk, int flags);
@@ -438,9 +437,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif);
struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
- __be16 sport,
- __be32 daddr, __be16 dport, int dif, int sdif,
- struct udp_table *tbl, struct sk_buff *skb);
+ __be16 sport, __be32 daddr, __be16 dport,
+ int dif, int sdif, struct sk_buff *skb);
struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport);
struct sock *udp6_lib_lookup(const struct net *net,
@@ -450,8 +448,7 @@ struct sock *udp6_lib_lookup(const struct net *net,
struct sock *__udp6_lib_lookup(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
- int dif, int sdif, struct udp_table *tbl,
- struct sk_buff *skb);
+ int dif, int sdif, struct sk_buff *skb);
struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport);
int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
@@ -523,38 +520,28 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
}
/*
- * SNMP statistics for UDP and UDP-Lite
+ * SNMP statistics for UDP
*/
-#define UDP_INC_STATS(net, field, is_udplite) do { \
- if (unlikely(is_udplite)) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \
- else SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0)
-#define __UDP_INC_STATS(net, field, is_udplite) do { \
- if (unlikely(is_udplite)) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \
- else __SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0)
-
-#define __UDP6_INC_STATS(net, field, is_udplite) do { \
- if (unlikely(is_udplite)) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \
- else __SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \
-} while(0)
-#define UDP6_INC_STATS(net, field, __lite) do { \
- if (unlikely(__lite)) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \
- else SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \
-} while(0)
+#define __UDP_INC_STATS(net, field) \
+ __SNMP_INC_STATS((net)->mib.udp_statistics, field)
+#define UDP_INC_STATS(net, field) \
+ SNMP_INC_STATS((net)->mib.udp_statistics, field)
+#define __UDP6_INC_STATS(net, field) \
+ __SNMP_INC_STATS((net)->mib.udp_stats_in6, field)
+#define UDP6_INC_STATS(net, field) \
+ SNMP_INC_STATS((net)->mib.udp_stats_in6, field)
#if IS_ENABLED(CONFIG_IPV6)
-#define __UDPX_MIB(sk, ipv4) \
-({ \
- ipv4 ? (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \
- sock_net(sk)->mib.udp_statistics) : \
- (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_stats_in6 : \
- sock_net(sk)->mib.udp_stats_in6); \
-})
+#define __UDPX_MIB(sk, ipv4) \
+ ({ \
+ ipv4 ? sock_net(sk)->mib.udp_statistics : \
+ sock_net(sk)->mib.udp_stats_in6; \
+ })
#else
-#define __UDPX_MIB(sk, ipv4) \
-({ \
- IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \
- sock_net(sk)->mib.udp_statistics; \
-})
+#define __UDPX_MIB(sk, ipv4) \
+ ({ \
+ sock_net(sk)->mib.udp_statistics; \
+ })
#endif
#define __UDPX_INC_STATS(sk, field) \
@@ -563,7 +550,6 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
#ifdef CONFIG_PROC_FS
struct udp_seq_afinfo {
sa_family_t family;
- struct udp_table *udp_table;
};
struct udp_iter_state {
@@ -575,9 +561,6 @@ void *udp_seq_start(struct seq_file *seq, loff_t *pos);
void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos);
void udp_seq_stop(struct seq_file *seq, void *v);
-extern const struct seq_operations udp_seq_ops;
-extern const struct seq_operations udp6_seq_ops;
-
int udp4_proc_init(void);
void udp4_proc_exit(void);
#endif /* CONFIG_PROC_FS */
@@ -648,9 +631,6 @@ drop:
static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
{
- /* UDP-lite can't land here - no GRO */
- WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
-
/* UDP packets generated with UDP_SEGMENT and traversing:
*
* UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx)
@@ -664,7 +644,6 @@ static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
* a valid csum after the segmentation.
* Additionally fixup the UDP CB.
*/
- UDP_SKB_CB(skb)->cscov = skb->len;
if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid)
skb->csum_valid = 1;
}
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index fc1fc43345b5..47c23d4a1740 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -7,7 +7,6 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
-#include <net/ipv6_stubs.h>
#endif
#define UDP_TUNNEL_PARTIAL_FEATURES NETIF_F_GSO_ENCAP_ALL
@@ -230,7 +229,7 @@ static inline void udp_tunnel_encap_enable(struct sock *sk)
#if IS_ENABLED(CONFIG_IPV6)
if (READ_ONCE(sk->sk_family) == PF_INET6)
- ipv6_stub->udpv6_encap_enable();
+ udpv6_encap_enable();
#endif
udp_encap_enable();
}
diff --git a/include/net/udplite.h b/include/net/udplite.h
deleted file mode 100644
index 786919d29f8d..000000000000
--- a/include/net/udplite.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Definitions for the UDP-Lite (RFC 3828) code.
- */
-#ifndef _UDPLITE_H
-#define _UDPLITE_H
-
-#include <net/ip6_checksum.h>
-#include <net/udp.h>
-
-/* UDP-Lite socket options */
-#define UDPLITE_SEND_CSCOV 10 /* sender partial coverage (as sent) */
-#define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */
-
-extern struct proto udplite_prot;
-extern struct udp_table udplite_table;
-
-/*
- * Checksum computation is all in software, hence simpler getfrag.
- */
-static __inline__ int udplite_getfrag(void *from, char *to, int offset,
- int len, int odd, struct sk_buff *skb)
-{
- struct msghdr *msg = from;
- return copy_from_iter_full(to, len, &msg->msg_iter) ? 0 : -EFAULT;
-}
-
-/*
- * Checksumming routines
- */
-static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
-{
- u16 cscov;
-
- /* In UDPv4 a zero checksum means that the transmitter generated no
- * checksum. UDP-Lite (like IPv6) mandates checksums, hence packets
- * with a zero checksum field are illegal. */
- if (uh->check == 0) {
- net_dbg_ratelimited("UDPLite: zeroed checksum field\n");
- return 1;
- }
-
- cscov = ntohs(uh->len);
-
- if (cscov == 0) /* Indicates that full coverage is required. */
- ;
- else if (cscov < 8 || cscov > skb->len) {
- /*
- * Coverage length violates RFC 3828: log and discard silently.
- */
- net_dbg_ratelimited("UDPLite: bad csum coverage %d/%d\n",
- cscov, skb->len);
- return 1;
-
- } else if (cscov < skb->len) {
- UDP_SKB_CB(skb)->partial_cov = 1;
- UDP_SKB_CB(skb)->cscov = cscov;
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = CHECKSUM_NONE;
- skb->csum_valid = 0;
- }
-
- return 0;
-}
-
-/* Fast-path computation of checksum. Socket may not be locked. */
-static inline __wsum udplite_csum(struct sk_buff *skb)
-{
- const int off = skb_transport_offset(skb);
- const struct sock *sk = skb->sk;
- int len = skb->len - off;
-
- if (udp_test_bit(UDPLITE_SEND_CC, sk)) {
- u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen);
-
- if (pcslen < len) {
- if (pcslen > 0)
- len = pcslen;
- udp_hdr(skb)->len = htons(pcslen);
- }
- }
- skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */
-
- return skb_checksum(skb, off, len, 0);
-}
-
-void udplite4_register(void);
-#endif /* _UDPLITE_H */
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 0b1abdb99c9e..ccb3b350001f 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -174,13 +174,6 @@ static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool,
dma_sync_single_for_device(pool->dev, dma, size, DMA_BIDIRECTIONAL);
}
-/* Masks for xdp_umem_page flags.
- * The low 12-bits of the addr will be 0 since this is the page address, so we
- * can use them for flags.
- */
-#define XSK_NEXT_PG_CONTIG_SHIFT 0
-#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT)
-
static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
u64 addr, u32 len)
{