diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2021-02-22 21:35:15 -0800 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2021-02-22 21:35:15 -0800 |
| commit | cbecf716ca618fd44feda6bd9a64a8179d031fc5 (patch) | |
| tree | 186c9f69f0d11f773253c440dac85087f67288b7 /include/net | |
| parent | 6524d8eac258452e547f8a49c8a965ac6dd8a161 (diff) | |
| parent | 4c47097f8514e4b35a31e04e33172d0193cb38ed (diff) | |
Merge branch 'next' into for-linus
Prepare input updates for 5.12 merge window.
Diffstat (limited to 'include/net')
77 files changed, 1800 insertions, 1193 deletions
diff --git a/include/net/9p/client.h b/include/net/9p/client.h index dd5b5bd781a4..e1c308d8d288 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -140,10 +140,16 @@ struct p9_client { * * TODO: This needs lots of explanation. */ +enum fid_source { + FID_FROM_OTHER, + FID_FROM_INODE, + FID_FROM_DENTRY, +}; struct p9_fid { struct p9_client *clnt; u32 fid; + refcount_t count; int mode; struct p9_qid qid; u32 iounit; @@ -152,6 +158,7 @@ struct p9_fid { void *rdir; struct hlist_node dlist; /* list of all fids attached to a dentry */ + struct hlist_node ilist; }; /** diff --git a/include/net/act_api.h b/include/net/act_api.h index 87214927314a..55dab604861f 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -239,6 +239,12 @@ int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, struct netlink_ext_ack *newchain); struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, struct tcf_chain *newchain); + +#ifdef CONFIG_INET +DECLARE_STATIC_KEY_FALSE(tcf_frag_xmit_count); +#endif + +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index c8e67042a3b1..c1504aa3d9cf 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1797,6 +1797,13 @@ struct hci_cp_le_set_adv_set_rand_addr { bdaddr_t bdaddr; } __packed; +#define HCI_OP_LE_READ_TRANSMIT_POWER 0x204b +struct hci_rp_le_read_transmit_power { + __u8 status; + __s8 min_le_tx_power; + __s8 max_le_tx_power; +} __packed; + #define HCI_OP_LE_READ_BUFFER_SIZE_V2 0x2060 struct hci_rp_le_read_buffer_size_v2 { __u8 status; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 8caac20556b4..677a8c50b2ad 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -230,6 +230,8 @@ struct adv_info { __u16 scan_rsp_len; __u8 scan_rsp_data[HCI_MAX_AD_LENGTH]; __s8 tx_power; + __u32 min_interval; + __u32 max_interval; bdaddr_t random_addr; bool rpa_expired; struct delayed_work rpa_expired_cb; @@ -238,6 +240,8 @@ struct adv_info { #define HCI_MAX_ADV_INSTANCES 5 #define HCI_DEFAULT_ADV_DURATION 2 +#define HCI_ADV_TX_POWER_NO_PREFERENCE 0x7F + struct adv_pattern { struct list_head list; __u8 ad_type; @@ -361,6 +365,9 @@ struct hci_dev { __u8 ssp_debug_mode; __u8 hw_error_code; __u32 clock; + __u16 advmon_allowlist_duration; + __u16 advmon_no_filter_duration; + __u8 enable_advmon_interleave_scan; __u16 devid_source; __u16 devid_vendor; @@ -377,6 +384,8 @@ struct hci_dev { __u16 def_page_timeout; __u16 def_multi_adv_rotation_duration; __u16 def_le_autoconnect_timeout; + __s8 min_le_tx_power; + __s8 max_le_tx_power; __u16 pkt_type; __u16 esco_type; @@ -484,6 +493,9 @@ struct hci_dev { enum suspended_state suspend_state; bool scanning_paused; bool suspended; + u8 wake_reason; + bdaddr_t wake_addr; + u8 wake_addr_type; wait_queue_head_t suspend_wait_q; DECLARE_BITMAP(suspend_tasks, __SUSPEND_NUM_TASKS); @@ -539,6 +551,14 @@ struct hci_dev { struct delayed_work rpa_expired; bdaddr_t rpa; + enum { + INTERLEAVE_SCAN_NONE, + INTERLEAVE_SCAN_NO_FILTER, + INTERLEAVE_SCAN_ALLOWLIST + } interleave_scan_state; + + struct delayed_work interleave_scan; + #if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; #endif @@ -1287,7 +1307,11 @@ struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance); int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data, - u16 timeout, u16 duration); + u16 timeout, u16 duration, s8 tx_power, + u32 min_interval, u32 max_interval); +int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, + u16 adv_data_len, u8 *adv_data, + u16 scan_rsp_len, u8 *scan_rsp_data); int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance); void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired); @@ -1750,6 +1774,9 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, s8 rssi, u8 *name, u8 name_len); void mgmt_discovering(struct hci_dev *hdev, u8 discovering); +void mgmt_suspending(struct hci_dev *hdev, u8 state); +void mgmt_resuming(struct hci_dev *hdev, u8 reason, bdaddr_t *bdaddr, + u8 addr_type); bool mgmt_powering_down(struct hci_dev *hdev); void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent); void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent); diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 8f1e6a7a2df8..1d1232917de7 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -665,6 +665,8 @@ struct l2cap_ops { struct sk_buff *(*alloc_skb) (struct l2cap_chan *chan, unsigned long hdr_len, unsigned long len, int nb); + int (*filter) (struct l2cap_chan * chan, + struct sk_buff *skb); }; struct l2cap_conn { diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index beae5c3980f0..f9a6638e20b3 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -572,6 +572,12 @@ struct mgmt_rp_add_advertising { #define MGMT_ADV_FLAG_SEC_1M BIT(7) #define MGMT_ADV_FLAG_SEC_2M BIT(8) #define MGMT_ADV_FLAG_SEC_CODED BIT(9) +#define MGMT_ADV_FLAG_CAN_SET_TX_POWER BIT(10) +#define MGMT_ADV_FLAG_HW_OFFLOAD BIT(11) +#define MGMT_ADV_PARAM_DURATION BIT(12) +#define MGMT_ADV_PARAM_TIMEOUT BIT(13) +#define MGMT_ADV_PARAM_INTERVALS BIT(14) +#define MGMT_ADV_PARAM_TX_POWER BIT(15) #define MGMT_ADV_FLAG_SEC_MASK (MGMT_ADV_FLAG_SEC_1M | MGMT_ADV_FLAG_SEC_2M | \ MGMT_ADV_FLAG_SEC_CODED) @@ -619,7 +625,7 @@ struct mgmt_cp_set_appearance { #define MGMT_SET_APPEARANCE_SIZE 2 #define MGMT_OP_GET_PHY_CONFIGURATION 0x0044 -struct mgmt_rp_get_phy_confguration { +struct mgmt_rp_get_phy_configuration { __le32 supported_phys; __le32 configurable_phys; __le32 selected_phys; @@ -656,7 +662,7 @@ struct mgmt_rp_get_phy_confguration { MGMT_PHY_LE_CODED_RX) #define MGMT_OP_SET_PHY_CONFIGURATION 0x0045 -struct mgmt_cp_set_phy_confguration { +struct mgmt_cp_set_phy_configuration { __le32 selected_phys; } __packed; #define MGMT_SET_PHY_CONFIGURATION_SIZE 4 @@ -680,11 +686,16 @@ struct mgmt_cp_set_blocked_keys { #define MGMT_OP_SET_WIDEBAND_SPEECH 0x0047 -#define MGMT_OP_READ_SECURITY_INFO 0x0048 -#define MGMT_READ_SECURITY_INFO_SIZE 0 -struct mgmt_rp_read_security_info { - __le16 sec_len; - __u8 sec[]; +#define MGMT_CAP_SEC_FLAGS 0x01 +#define MGMT_CAP_MAX_ENC_KEY_SIZE 0x02 +#define MGMT_CAP_SMP_MAX_ENC_KEY_SIZE 0x03 +#define MGMT_CAP_LE_TX_PWR 0x04 + +#define MGMT_OP_READ_CONTROLLER_CAP 0x0048 +#define MGMT_READ_CONTROLLER_CAP_SIZE 0 +struct mgmt_rp_read_controller_cap { + __le16 cap_len; + __u8 cap[0]; } __packed; #define MGMT_OP_READ_EXP_FEATURES_INFO 0x0049 @@ -780,6 +791,36 @@ struct mgmt_rp_remove_adv_monitor { __le16 monitor_handle; } __packed; +#define MGMT_OP_ADD_EXT_ADV_PARAMS 0x0054 +struct mgmt_cp_add_ext_adv_params { + __u8 instance; + __le32 flags; + __le16 duration; + __le16 timeout; + __le32 min_interval; + __le32 max_interval; + __s8 tx_power; +} __packed; +#define MGMT_ADD_EXT_ADV_PARAMS_MIN_SIZE 18 +struct mgmt_rp_add_ext_adv_params { + __u8 instance; + __s8 tx_power; + __u8 max_adv_data_len; + __u8 max_scan_rsp_len; +} __packed; + +#define MGMT_OP_ADD_EXT_ADV_DATA 0x0055 +struct mgmt_cp_add_ext_adv_data { + __u8 instance; + __u8 adv_data_len; + __u8 scan_rsp_len; + __u8 data[]; +} __packed; +#define MGMT_ADD_EXT_ADV_DATA_SIZE 3 +struct mgmt_rp_add_ext_adv_data { + __u8 instance; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; @@ -840,6 +881,7 @@ struct mgmt_ev_device_connected { #define MGMT_DEV_DISCONN_LOCAL_HOST 0x02 #define MGMT_DEV_DISCONN_REMOTE 0x03 #define MGMT_DEV_DISCONN_AUTH_FAILURE 0x04 +#define MGMT_DEV_DISCONN_LOCAL_HOST_SUSPEND 0x05 #define MGMT_EV_DEVICE_DISCONNECTED 0x000C struct mgmt_ev_device_disconnected { @@ -1028,3 +1070,18 @@ struct mgmt_ev_adv_monitor_added { struct mgmt_ev_adv_monitor_removed { __le16 monitor_handle; } __packed; + +#define MGMT_EV_CONTROLLER_SUSPEND 0x002d +struct mgmt_ev_controller_suspend { + __u8 suspend_state; +} __packed; + +#define MGMT_EV_CONTROLLER_RESUME 0x002e +struct mgmt_ev_controller_resume { + __u8 wake_reason; + struct mgmt_addr_info addr; +} __packed; + +#define MGMT_WAKE_REASON_NON_BT_WAKE 0x0 +#define MGMT_WAKE_REASON_UNEXPECTED 0x1 +#define MGMT_WAKE_REASON_REMOTE_WAKE 0x2 diff --git a/include/net/bonding.h b/include/net/bonding.h index 7d132cc1e584..adc3da776970 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -86,10 +86,8 @@ #define bond_for_each_slave_rcu(bond, pos, iter) \ netdev_for_each_lower_private_rcu((bond)->dev, pos, iter) -#ifdef CONFIG_XFRM_OFFLOAD #define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \ NETIF_F_GSO_ESP) -#endif /* CONFIG_XFRM_OFFLOAD */ #ifdef CONFIG_NET_POLL_CONTROLLER extern atomic_t netpoll_block_tx; @@ -185,6 +183,11 @@ struct slave { struct rtnl_link_stats64 slave_stats; }; +static inline struct slave *to_slave(struct kobject *kobj) +{ + return container_of(kobj, struct slave, kobj); +} + struct bond_up_slave { unsigned int count; struct rcu_head rcu; @@ -750,6 +753,9 @@ extern struct bond_parm_tbl ad_select_tbl[]; /* exported from bond_netlink.c */ extern struct rtnl_link_ops bond_link_ops; +/* exported from bond_sysfs_slave.c */ +extern const struct sysfs_ops slave_sysfs_ops; + static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb) { atomic_long_inc(&dev->tx_dropped); diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h index 5036c94c0503..0e85713f56df 100644 --- a/include/net/bpf_sk_storage.h +++ b/include/net/bpf_sk_storage.h @@ -3,13 +3,27 @@ #ifndef _BPF_SK_STORAGE_H #define _BPF_SK_STORAGE_H +#include <linux/rculist.h> +#include <linux/list.h> +#include <linux/hash.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/bpf.h> +#include <net/sock.h> +#include <uapi/linux/sock_diag.h> +#include <uapi/linux/btf.h> +#include <linux/bpf_local_storage.h> + struct sock; void bpf_sk_storage_free(struct sock *sk); extern const struct bpf_func_proto bpf_sk_storage_get_proto; extern const struct bpf_func_proto bpf_sk_storage_delete_proto; +extern const struct bpf_func_proto bpf_sk_storage_get_tracing_proto; +extern const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto; +struct bpf_local_storage_elem; struct bpf_sk_storage_diag; struct sk_buff; struct nlattr; diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index b001fa91c14e..73af4a64a599 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -23,6 +23,8 @@ */ #define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1)) +#define BUSY_POLL_BUDGET 8 + #ifdef CONFIG_NET_RX_BUSY_POLL struct napi_struct; @@ -43,7 +45,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time); void napi_busy_loop(unsigned int napi_id, bool (*loop_end)(void *, unsigned long), - void *loop_end_arg); + void *loop_end_arg, bool prefer_busy_poll, u16 budget); #else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) @@ -105,7 +107,9 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock) unsigned int napi_id = READ_ONCE(sk->sk_napi_id); if (napi_id >= MIN_NAPI_ID) - napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk); + napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk, + READ_ONCE(sk->sk_prefer_busy_poll), + READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET); #endif } @@ -131,13 +135,28 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) sk_rx_queue_set(sk, skb); } +static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + if (!READ_ONCE(sk->sk_napi_id)) + WRITE_ONCE(sk->sk_napi_id, napi_id); +#endif +} + /* variant used for unconnected sockets */ static inline void sk_mark_napi_id_once(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL - if (!READ_ONCE(sk->sk_napi_id)) - WRITE_ONCE(sk->sk_napi_id, skb->napi_id); + __sk_mark_napi_id_once(sk, skb->napi_id); +#endif +} + +static inline void sk_mark_napi_id_once_xdp(struct sock *sk, + const struct xdp_buff *xdp) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + __sk_mark_napi_id_once(sk, xdp->rxq->napi_id); #endif } diff --git a/include/net/caif/caif_spi.h b/include/net/caif/caif_spi.h deleted file mode 100644 index a0bf4cbce71b..000000000000 --- a/include/net/caif/caif_spi.h +++ /dev/null @@ -1,155 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Daniel Martensson / Daniel.Martensson@stericsson.com - */ - -#ifndef CAIF_SPI_H_ -#define CAIF_SPI_H_ - -#include <net/caif/caif_device.h> - -#define SPI_CMD_WR 0x00 -#define SPI_CMD_RD 0x01 -#define SPI_CMD_EOT 0x02 -#define SPI_CMD_IND 0x04 - -#define SPI_DMA_BUF_LEN 8192 - -#define WL_SZ 2 /* 16 bits. */ -#define SPI_CMD_SZ 4 /* 32 bits. */ -#define SPI_IND_SZ 4 /* 32 bits. */ - -#define SPI_XFER 0 -#define SPI_SS_ON 1 -#define SPI_SS_OFF 2 -#define SPI_TERMINATE 3 - -/* Minimum time between different levels is 50 microseconds. */ -#define MIN_TRANSITION_TIME_USEC 50 - -/* Defines for calculating duration of SPI transfers for a particular - * number of bytes. - */ -#define SPI_MASTER_CLK_MHZ 13 -#define SPI_XFER_TIME_USEC(bytes, clk) (((bytes) * 8) / clk) - -/* Normally this should be aligned on the modem in order to benefit from full - * duplex transfers. However a size of 8188 provokes errors when running with - * the modem. These errors occur when packet sizes approaches 4 kB of data. - */ -#define CAIF_MAX_SPI_FRAME 4092 - -/* Maximum number of uplink CAIF frames that can reside in the same SPI frame. - * This number should correspond with the modem setting. The application side - * CAIF accepts any number of embedded downlink CAIF frames. - */ -#define CAIF_MAX_SPI_PKTS 9 - -/* Decides if SPI buffers should be prefilled with 0xFF pattern for easier - * debugging. Both TX and RX buffers will be filled before the transfer. - */ -#define CFSPI_DBG_PREFILL 0 - -/* Structure describing a SPI transfer. */ -struct cfspi_xfer { - u16 tx_dma_len; - u16 rx_dma_len; - void *va_tx[2]; - dma_addr_t pa_tx[2]; - void *va_rx; - dma_addr_t pa_rx; -}; - -/* Structure implemented by the SPI interface. */ -struct cfspi_ifc { - void (*ss_cb) (bool assert, struct cfspi_ifc *ifc); - void (*xfer_done_cb) (struct cfspi_ifc *ifc); - void *priv; -}; - -/* Structure implemented by SPI clients. */ -struct cfspi_dev { - int (*init_xfer) (struct cfspi_xfer *xfer, struct cfspi_dev *dev); - void (*sig_xfer) (bool xfer, struct cfspi_dev *dev); - struct cfspi_ifc *ifc; - char *name; - u32 clk_mhz; - void *priv; -}; - -/* Enumeration describing the CAIF SPI state. */ -enum cfspi_state { - CFSPI_STATE_WAITING = 0, - CFSPI_STATE_AWAKE, - CFSPI_STATE_FETCH_PKT, - CFSPI_STATE_GET_NEXT, - CFSPI_STATE_INIT_XFER, - CFSPI_STATE_WAIT_ACTIVE, - CFSPI_STATE_SIG_ACTIVE, - CFSPI_STATE_WAIT_XFER_DONE, - CFSPI_STATE_XFER_DONE, - CFSPI_STATE_WAIT_INACTIVE, - CFSPI_STATE_SIG_INACTIVE, - CFSPI_STATE_DELIVER_PKT, - CFSPI_STATE_MAX, -}; - -/* Structure implemented by SPI physical interfaces. */ -struct cfspi { - struct caif_dev_common cfdev; - struct net_device *ndev; - struct platform_device *pdev; - struct sk_buff_head qhead; - struct sk_buff_head chead; - u16 cmd; - u16 tx_cpck_len; - u16 tx_npck_len; - u16 rx_cpck_len; - u16 rx_npck_len; - struct cfspi_ifc ifc; - struct cfspi_xfer xfer; - struct cfspi_dev *dev; - unsigned long state; - struct work_struct work; - struct workqueue_struct *wq; - struct list_head list; - int flow_off_sent; - u32 qd_low_mark; - u32 qd_high_mark; - struct completion comp; - wait_queue_head_t wait; - spinlock_t lock; - bool flow_stop; - bool slave; - bool slave_talked; -#ifdef CONFIG_DEBUG_FS - enum cfspi_state dbg_state; - u16 pcmd; - u16 tx_ppck_len; - u16 rx_ppck_len; - struct dentry *dbgfs_dir; - struct dentry *dbgfs_state; - struct dentry *dbgfs_frame; -#endif /* CONFIG_DEBUG_FS */ -}; - -extern int spi_frm_align; -extern int spi_up_head_align; -extern int spi_up_tail_align; -extern int spi_down_head_align; -extern int spi_down_tail_align; -extern struct platform_driver cfspi_spi_driver; - -void cfspi_dbg_state(struct cfspi *cfspi, int state); -int cfspi_xmitfrm(struct cfspi *cfspi, u8 *buf, size_t len); -int cfspi_xmitlen(struct cfspi *cfspi); -int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len); -int cfspi_spi_remove(struct platform_device *pdev); -int cfspi_spi_probe(struct platform_device *pdev); -int cfspi_xmitfrm(struct cfspi *cfspi, u8 *buf, size_t len); -int cfspi_xmitlen(struct cfspi *cfspi); -int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len); -void cfspi_xfer(struct work_struct *work); - -#endif /* CAIF_SPI_H_ */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d9e6b9fbd95b..0d6f7ec86061 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -10,6 +10,7 @@ * Copyright (C) 2018-2020 Intel Corporation */ +#include <linux/ethtool.h> #include <linux/netdevice.h> #include <linux/debugfs.h> #include <linux/list.h> @@ -96,6 +97,16 @@ struct wiphy; * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted * on this channel. * @IEEE80211_CHAN_NO_HE: HE operation is not permitted on this channel. + * @IEEE80211_CHAN_1MHZ: 1 MHz bandwidth is permitted + * on this channel. + * @IEEE80211_CHAN_2MHZ: 2 MHz bandwidth is permitted + * on this channel. + * @IEEE80211_CHAN_4MHZ: 4 MHz bandwidth is permitted + * on this channel. + * @IEEE80211_CHAN_8MHZ: 8 MHz bandwidth is permitted + * on this channel. + * @IEEE80211_CHAN_16MHZ: 16 MHz bandwidth is permitted + * on this channel. * */ enum ieee80211_channel_flags { @@ -113,6 +124,11 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_20MHZ = 1<<11, IEEE80211_CHAN_NO_10MHZ = 1<<12, IEEE80211_CHAN_NO_HE = 1<<13, + IEEE80211_CHAN_1MHZ = 1<<14, + IEEE80211_CHAN_2MHZ = 1<<15, + IEEE80211_CHAN_4MHZ = 1<<16, + IEEE80211_CHAN_8MHZ = 1<<17, + IEEE80211_CHAN_16MHZ = 1<<18, }; #define IEEE80211_CHAN_NO_HT40 \ @@ -254,13 +270,23 @@ struct ieee80211_rate { * struct ieee80211_he_obss_pd - AP settings for spatial reuse * * @enable: is the feature enabled. + * @sr_ctrl: The SR Control field of SRP element. + * @non_srg_max_offset: non-SRG maximum tx power offset * @min_offset: minimal tx power offset an associated station shall use * @max_offset: maximum tx power offset an associated station shall use + * @bss_color_bitmap: bitmap that indicates the BSS color values used by + * members of the SRG + * @partial_bssid_bitmap: bitmap that indicates the partial BSSID values + * used by members of the SRG */ struct ieee80211_he_obss_pd { bool enable; + u8 sr_ctrl; + u8 non_srg_max_offset; u8 min_offset; u8 max_offset; + u8 bss_color_bitmap[8]; + u8 partial_bssid_bitmap[8]; }; /** @@ -277,19 +303,6 @@ struct cfg80211_he_bss_color { }; /** - * struct ieee80211_he_bss_color - AP settings for BSS coloring - * - * @color: the current color. - * @disabled: is the feature disabled. - * @partial: define the AID equation. - */ -struct ieee80211_he_bss_color { - u8 color; - bool disabled; - bool partial; -}; - -/** * struct ieee80211_sta_ht_cap - STA's HT capabilities * * This structure describes most essential parameters needed @@ -449,7 +462,9 @@ struct ieee80211_sta_s1g_cap { * @n_bitrates: Number of bitrates in @bitrates * @ht_cap: HT capabilities in this band * @vht_cap: VHT capabilities in this band + * @s1g_cap: S1G capabilities in this band * @edmg_cap: EDMG capabilities in this band + * @s1g_cap: S1G capabilities in this band (S1B band only, of course) * @n_iftype_data: number of iftype data entries * @iftype_data: interface type data entries. Note that the bits in * @types_mask inside this structure cannot overlap (i.e. only @@ -678,7 +693,10 @@ struct cfg80211_bitrate_mask { u32 legacy; u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; u16 vht_mcs[NL80211_VHT_NSS_MAX]; + u16 he_mcs[NL80211_HE_NSS_MAX]; enum nl80211_txrate_gi gi; + enum nl80211_he_gi he_gi; + enum nl80211_he_ltf he_ltf; } control[NUM_NL80211_BANDS]; }; @@ -978,6 +996,21 @@ struct survey_info { * @sae_pwd: password for SAE authentication (for devices supporting SAE * offload) * @sae_pwd_len: length of SAE password (for devices supporting SAE offload) + * @sae_pwe: The mechanisms allowed for SAE PWE derivation: + * + * NL80211_SAE_PWE_UNSPECIFIED + * Not-specified, used to indicate userspace did not specify any + * preference. The driver should follow its internal policy in + * such a scenario. + * + * NL80211_SAE_PWE_HUNT_AND_PECK + * Allow hunting-and-pecking loop only + * + * NL80211_SAE_PWE_HASH_TO_ELEMENT + * Allow hash-to-element only + * + * NL80211_SAE_PWE_BOTH + * Allow either hunting-and-pecking loop or hash-to-element */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -996,6 +1029,7 @@ struct cfg80211_crypto_settings { const u8 *psk; const u8 *sae_pwd; u8 sae_pwd_len; + enum nl80211_sae_pwe_mechanism sae_pwe; }; /** @@ -1065,6 +1099,39 @@ struct cfg80211_acl_data { }; /** + * struct cfg80211_fils_discovery - FILS discovery parameters from + * IEEE Std 802.11ai-2016, Annex C.3 MIB detail. + * + * @min_interval: Minimum packet interval in TUs (0 - 10000) + * @max_interval: Maximum packet interval in TUs (0 - 10000) + * @tmpl_len: Template length + * @tmpl: Template data for FILS discovery frame including the action + * frame headers. + */ +struct cfg80211_fils_discovery { + u32 min_interval; + u32 max_interval; + size_t tmpl_len; + const u8 *tmpl; +}; + +/** + * struct cfg80211_unsol_bcast_probe_resp - Unsolicited broadcast probe + * response parameters in 6GHz. + * + * @interval: Packet interval in TUs. Maximum allowed is 20 TU, as mentioned + * in IEEE P802.11ax/D6.0 26.17.2.3.2 - AP behavior for fast passive + * scanning + * @tmpl_len: Template length + * @tmpl: Template data for probe response + */ +struct cfg80211_unsol_bcast_probe_resp { + u32 interval; + size_t tmpl_len; + const u8 *tmpl; +}; + +/** * enum cfg80211_ap_settings_flags - AP settings flags * * Used by cfg80211_ap_settings @@ -1107,10 +1174,13 @@ enum cfg80211_ap_settings_flags { * @vht_required: stations must support VHT * @twt_responder: Enable Target Wait Time * @he_required: stations must support HE + * @sae_h2e_required: stations must support direct H2E technique in SAE * @flags: flags, as defined in enum cfg80211_ap_settings_flags * @he_obss_pd: OBSS Packet Detection settings * @he_bss_color: BSS Color settings * @he_oper: HE operation IE (or %NULL if HE isn't enabled) + * @fils_discovery: FILS discovery transmission parameters + * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -1136,11 +1206,13 @@ struct cfg80211_ap_settings { const struct ieee80211_vht_cap *vht_cap; const struct ieee80211_he_cap_elem *he_cap; const struct ieee80211_he_operation *he_oper; - bool ht_required, vht_required, he_required; + bool ht_required, vht_required, he_required, sae_h2e_required; bool twt_responder; u32 flags; struct ieee80211_he_obss_pd he_obss_pd; struct cfg80211_he_bss_color he_bss_color; + struct cfg80211_fils_discovery fils_discovery; + struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; }; /** @@ -1377,7 +1449,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy, enum cfg80211_station_type statype); /** - * enum station_info_rate_flags - bitrate info flags + * enum rate_info_flags - bitrate info flags * * Used by the driver to indicate the specific rate transmission * type for 802.11n transmissions. @@ -1450,7 +1522,7 @@ struct rate_info { }; /** - * enum station_info_rate_flags - bitrate info flags + * enum bss_param_flags - bitrate info flags * * Used by the driver to indicate the specific rate transmission * type for 802.11n transmissions. @@ -1660,6 +1732,54 @@ struct station_info { u8 connected_to_as; }; +/** + * struct cfg80211_sar_sub_specs - sub specs limit + * @power: power limitation in 0.25dbm + * @freq_range_index: index the power limitation applies to + */ +struct cfg80211_sar_sub_specs { + s32 power; + u32 freq_range_index; +}; + +/** + * struct cfg80211_sar_specs - sar limit specs + * @type: it's set with power in 0.25dbm or other types + * @num_sub_specs: number of sar sub specs + * @sub_specs: memory to hold the sar sub specs + */ +struct cfg80211_sar_specs { + enum nl80211_sar_type type; + u32 num_sub_specs; + struct cfg80211_sar_sub_specs sub_specs[]; +}; + + +/** + * struct cfg80211_sar_freq_ranges - sar frequency ranges + * @start_freq: start range edge frequency + * @end_freq: end range edge frequency + */ +struct cfg80211_sar_freq_ranges { + u32 start_freq; + u32 end_freq; +}; + +/** + * struct cfg80211_sar_capa - sar limit capability + * @type: it's set via power in 0.25dbm or other types + * @num_freq_ranges: number of frequency ranges + * @freq_ranges: memory to hold the freq ranges. + * + * Note: WLAN driver may append new ranges or split an existing + * range to small ones and then append them. + */ +struct cfg80211_sar_capa { + enum nl80211_sar_type type; + u32 num_freq_ranges; + const struct cfg80211_sar_freq_ranges *freq_ranges; +}; + #if IS_ENABLED(CONFIG_CFG80211) /** * cfg80211_get_station - retrieve information about a given station @@ -1784,6 +1904,7 @@ struct mpath_info { * (or NULL for no change) * @basic_rates_len: number of basic rates * @ap_isolate: do not forward packets between connected stations + * (0 = no, 1 = yes, -1 = do not change) * @ht_opmode: HT Operation mode * (u16 = opmode, -1 = do not change) * @p2p_ctwindow: P2P CT Window (-1 = no change) @@ -2039,6 +2160,27 @@ struct cfg80211_scan_info { }; /** + * struct cfg80211_scan_6ghz_params - relevant for 6 GHz only + * + * @short_bssid: short ssid to scan for + * @bssid: bssid to scan for + * @channel_idx: idx of the channel in the channel array in the scan request + * which the above info relvant to + * @unsolicited_probe: the AP transmits unsolicited probe response every 20 TU + * @short_ssid_valid: short_ssid is valid and can be used + * @psc_no_listen: when set, and the channel is a PSC channel, no need to wait + * 20 TUs before starting to send probe requests. + */ +struct cfg80211_scan_6ghz_params { + u32 short_ssid; + u32 channel_idx; + u8 bssid[ETH_ALEN]; + bool unsolicited_probe; + bool short_ssid_valid; + bool psc_no_listen; +}; + +/** * struct cfg80211_scan_request - scan request description * * @ssids: SSIDs to scan for (active scan only) @@ -2065,6 +2207,10 @@ struct cfg80211_scan_info { * @mac_addr_mask: MAC address mask used with randomisation, bits that * are 0 in the mask should be randomised, bits that are 1 should * be taken from the @mac_addr + * @scan_6ghz: relevant for split scan request only, + * true if this is the second scan request + * @n_6ghz_params: number of 6 GHz params + * @scan_6ghz_params: 6 GHz params * @bssid: BSSID to scan for (most commonly, the wildcard BSSID) */ struct cfg80211_scan_request { @@ -2092,6 +2238,9 @@ struct cfg80211_scan_request { struct cfg80211_scan_info info; bool notified; bool no_cck; + bool scan_6ghz; + u32 n_6ghz_params; + struct cfg80211_scan_6ghz_params *scan_6ghz_params; /* keep last */ struct ieee80211_channel *channels[]; @@ -2471,6 +2620,8 @@ enum cfg80211_assoc_req_flags { * @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association * Request/Response frame or %NULL if FILS is not used. This field starts * with 16 octets of STA Nonce followed by 16 octets of AP Nonce. + * @s1g_capa: S1G capability override + * @s1g_capa_mask: S1G capability override mask */ struct cfg80211_assoc_request { struct cfg80211_bss *bss; @@ -2485,6 +2636,7 @@ struct cfg80211_assoc_request { const u8 *fils_kek; size_t fils_kek_len; const u8 *fils_nonces; + struct ieee80211_s1g_cap s1g_capa, s1g_capa_mask; }; /** @@ -3637,8 +3789,6 @@ struct mgmt_frame_regs { * @get_tx_power: store the current TX power into the dbm variable; * return 0 if successful * - * @set_wds_peer: set the WDS peer for a WDS interface - * * @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting * functions to adjust rfkill hw state * @@ -3822,6 +3972,8 @@ struct mgmt_frame_regs { * This callback may sleep. * @reset_tid_config: Reset TID specific configuration for the peer, for the * given TIDs. This callback may sleep. + * + * @set_sar_specs: Update the SAR (TX power) settings. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -3959,9 +4111,6 @@ struct cfg80211_ops { int (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, int *dbm); - int (*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev, - const u8 *addr); - void (*rfkill_poll)(struct wiphy *wiphy); #ifdef CONFIG_NL80211_TESTMODE @@ -4150,6 +4299,8 @@ struct cfg80211_ops { struct cfg80211_tid_config *tid_conf); int (*reset_tid_config)(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u8 tids); + int (*set_sar_specs)(struct wiphy *wiphy, + struct cfg80211_sar_specs *sar); }; /* @@ -4160,6 +4311,8 @@ struct cfg80211_ops { /** * enum wiphy_flags - wiphy capability flags * + * @WIPHY_FLAG_SPLIT_SCAN_6GHZ: if set to true, the scan request will be split + * into two, first for legacy bands and second for UHB. * @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this * wiphy at all * @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled @@ -4203,7 +4356,7 @@ struct cfg80211_ops { enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), /* use hole at 1 */ - /* use hole at 2 */ + WIPHY_FLAG_SPLIT_SCAN_6GHZ = BIT(2), WIPHY_FLAG_NETNS_OK = BIT(3), WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), WIPHY_FLAG_4ADDR_AP = BIT(5), @@ -4778,6 +4931,7 @@ struct wiphy_iftype_akm_suites { * @max_data_retry_count: maximum supported per TID retry count for * configuration through the %NL80211_TID_CONFIG_ATTR_RETRY_SHORT and * %NL80211_TID_CONFIG_ATTR_RETRY_LONG attributes + * @sar_capa: SAR control capabilities */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -4916,6 +5070,8 @@ struct wiphy { u8 max_data_retry_count; + const struct cfg80211_sar_capa *sar_capa; + char priv[] __aligned(NETDEV_ALIGN); }; @@ -5276,6 +5432,16 @@ ieee80211_channel_to_khz(const struct ieee80211_channel *chan) } /** + * ieee80211_s1g_channel_width - get allowed channel width from @chan + * + * Only allowed for band NL80211_BAND_S1GHZ + * @chan: channel + * Return: The allowed channel width for this center_freq + */ +enum nl80211_chan_width +ieee80211_s1g_channel_width(const struct ieee80211_channel *chan); + +/** * ieee80211_channel_to_freq_khz - convert channel number to frequency * @chan: channel number * @band: band, necessary due to channel number overlap @@ -6295,13 +6461,15 @@ void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss); * @dev: network device * @buf: 802.11 frame (header + body) * @len: length of the frame data + * @reconnect: immediate reconnect is desired (include the nl80211 attribute) * * This function is called whenever deauthentication has been processed in * station mode. This includes both received deauthentication frames and * locally generated ones. This function may sleep. The caller must hold the * corresponding wdev's mutex. */ -void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len); +void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len, + bool reconnect); /** * cfg80211_rx_unprot_mlme_mgmt - notification of unprotected mlme mgmt frame @@ -6356,7 +6524,8 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, struct ieee80211_channel *channel, gfp_t gfp); /** - * cfg80211_notify_new_candidate - notify cfg80211 of a new mesh peer candidate + * cfg80211_notify_new_peer_candidate - notify cfg80211 of a new mesh peer + * candidate * * @dev: network device * @macaddr: the MAC address of the new candidate @@ -7407,6 +7576,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, * @dev: the device on which the channel switch started * @chandef: the future channel definition * @count: the number of TBTTs until the channel switch happens + * @quiet: whether or not immediate quiet was requested by the AP * * Inform the userspace about the channel switch that has just * started, so that it can take appropriate actions (eg. starting @@ -7414,7 +7584,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, */ void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, - u8 count); + u8 count, bool quiet); /** * ieee80211_operating_class_to_band - convert operating class to band @@ -7495,7 +7665,7 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate); void cfg80211_unregister_wdev(struct wireless_dev *wdev); /** - * struct cfg80211_ft_event - FT Information Elements + * struct cfg80211_ft_event_params - FT Information Elements * @ies: FT IEs * @ies_len: length of the FT IE in bytes * @target_ap: target AP's MAC address diff --git a/include/net/checksum.h b/include/net/checksum.h index 46754ba9d7b7..0d05b9e8690b 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -24,26 +24,32 @@ #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER static inline __wsum csum_and_copy_from_user (const void __user *src, void *dst, - int len, __wsum sum, int *err_ptr) + int len) { if (copy_from_user(dst, src, len)) - *err_ptr = -EFAULT; - return csum_partial(dst, len, sum); + return 0; + return csum_partial(dst, len, ~0U); } #endif #ifndef HAVE_CSUM_COPY_USER static __inline__ __wsum csum_and_copy_to_user -(const void *src, void __user *dst, int len, __wsum sum, int *err_ptr) +(const void *src, void __user *dst, int len) { - sum = csum_partial(src, len, sum); + __wsum sum = csum_partial(src, len, ~0U); if (copy_to_user(dst, src, len) == 0) return sum; - if (len) - *err_ptr = -EFAULT; + return 0; +} +#endif - return (__force __wsum)-1; /* invalid checksum */ +#ifndef _HAVE_ARCH_CSUM_AND_COPY +static inline __wsum +csum_partial_copy_nocheck(const void *src, void *dst, int len) +{ + memcpy(dst, src, len); + return csum_partial(dst, len, 0); } #endif diff --git a/include/net/compat.h b/include/net/compat.h index 745db0d605b6..84805bdc4435 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -5,8 +5,6 @@ struct sock; -#if defined(CONFIG_COMPAT) - #include <linux/compat.h> struct compat_msghdr { @@ -48,14 +46,6 @@ struct compat_rtentry { unsigned short rt_irtt; /* Initial RTT */ }; -#else /* defined(CONFIG_COMPAT) */ -/* - * To avoid compiler warnings: - */ -#define compat_msghdr msghdr -#define compat_mmsghdr mmsghdr -#endif /* defined(CONFIG_COMPAT) */ - int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg, struct sockaddr __user **save_addr, compat_uptr_t *ptr, compat_size_t *len); diff --git a/include/net/devlink.h b/include/net/devlink.h index 8f3c8a443238..f466819cc477 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -19,6 +19,15 @@ #include <net/flow_offload.h> #include <uapi/linux/devlink.h> #include <linux/xarray.h> +#include <linux/firmware.h> + +#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \ + (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX) + +struct devlink_dev_stats { + u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; + u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; +}; struct devlink_ops; @@ -38,6 +47,7 @@ struct devlink { struct list_head trap_policer_list; const struct devlink_ops *ops; struct xarray snapshot_ids; + struct devlink_dev_stats stats; struct device *dev; possible_net_t _net; struct mutex lock; /* Serializes access to devlink instance specific objects such as @@ -57,13 +67,30 @@ struct devlink_port_phys_attrs { u32 split_subport_number; /* If the port is split, this is the number of subport. */ }; +/** + * struct devlink_port_pci_pf_attrs - devlink port's PCI PF attributes + * @controller: Associated controller number + * @pf: Associated PCI PF number for this port. + * @external: when set, indicates if a port is for an external controller + */ struct devlink_port_pci_pf_attrs { - u16 pf; /* Associated PCI PF for this port. */ + u32 controller; + u16 pf; + u8 external:1; }; +/** + * struct devlink_port_pci_vf_attrs - devlink port's PCI VF attributes + * @controller: Associated controller number + * @pf: Associated PCI PF number for this port. + * @vf: Associated PCI VF for of the PCI PF for this port. + * @external: when set, indicates if a port is for an external controller + */ struct devlink_port_pci_vf_attrs { - u16 pf; /* Associated PCI PF for this port. */ - u16 vf; /* Associated PCI VF for of the PCI PF for this port. */ + u32 controller; + u16 pf; + u16 vf; + u8 external:1; }; /** @@ -73,6 +100,9 @@ struct devlink_port_pci_vf_attrs { * @splittable: indicates if the port can be split. * @lanes: maximum number of lanes the port supports. 0 value is not passed to netlink. * @switch_id: if the port is part of switch, this is buffer with ID, otherwise this is NULL + * @phys: physical port attributes + * @pci_pf: PCI PF port attributes + * @pci_vf: PCI VF port attributes */ struct devlink_port_attrs { u8 split:1, @@ -90,6 +120,7 @@ struct devlink_port_attrs { struct devlink_port { struct list_head list; struct list_head param_list; + struct list_head region_list; struct devlink *devlink; unsigned int index; bool registered; @@ -372,6 +403,25 @@ struct devlink_param_gset_ctx { }; /** + * struct devlink_flash_notify - devlink dev flash notify data + * @status_msg: current status string + * @component: firmware component being updated + * @done: amount of work completed of total amount + * @total: amount of work expected to be done + * @timeout: expected max timeout in seconds + * + * These are values to be given to userland to be displayed in order + * to show current activity in a firmware update process. + */ +struct devlink_flash_notify { + const char *status_msg; + const char *component; + unsigned long done; + unsigned long total; + unsigned long timeout; +}; + +/** * struct devlink_param - devlink configuration parameter data * @name: name of the parameter * @generic: indicates if the parameter is generic or driver specific @@ -420,6 +470,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE, DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -457,6 +508,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME "enable_roce" #define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME "enable_remote_dev_reset" +#define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ @@ -511,6 +565,24 @@ enum devlink_param_generic_id { /* Firmware bundle identifier */ #define DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID "fw.bundle_id" +/** + * struct devlink_flash_update_params - Flash Update parameters + * @fw: pointer to the firmware data to update from + * @component: the flash component to update + * + * With the exception of fw, drivers must opt-in to parameters by + * setting the appropriate bit in the supported_flash_update_params field in + * their devlink_ops structure. + */ +struct devlink_flash_update_params { + const struct firmware *fw; + const char *component; + u32 overwrite_mask; +}; + +#define DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT BIT(0) +#define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK BIT(1) + struct devlink_region; struct devlink_info_req; @@ -522,12 +594,36 @@ struct devlink_info_req; * the data variable must be updated to point to the snapshot data. * The function will be called while the devlink instance lock is * held. + * @priv: Pointer to driver private data for the region operation */ struct devlink_region_ops { const char *name; void (*destructor)(const void *data); - int (*snapshot)(struct devlink *devlink, struct netlink_ext_ack *extack, + int (*snapshot)(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u8 **data); + void *priv; +}; + +/** + * struct devlink_port_region_ops - Region operations for a port + * @name: region name + * @destructor: callback used to free snapshot memory when deleting + * @snapshot: callback to request an immediate snapshot. On success, + * the data variable must be updated to point to the snapshot data. + * The function will be called while the devlink instance lock is + * held. + * @priv: Pointer to driver private data for the region operation + */ +struct devlink_port_region_ops { + const char *name; + void (*destructor)(const void *data); + int (*snapshot)(struct devlink_port *port, + const struct devlink_port_region_ops *ops, + struct netlink_ext_ack *extack, u8 **data); + void *priv; }; struct devlink_fmsg; @@ -546,6 +642,7 @@ enum devlink_health_reporter_state { * @dump: callback to dump an object * if priv_ctx is NULL, run a full dump * @diagnose: callback to diagnose the current status + * @test: callback to trigger a test event */ struct devlink_health_reporter_ops { @@ -558,6 +655,24 @@ struct devlink_health_reporter_ops { int (*diagnose)(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, struct netlink_ext_ack *extack); + int (*test)(struct devlink_health_reporter *reporter, + struct netlink_ext_ack *extack); +}; + +/** + * struct devlink_trap_metadata - Packet trap metadata. + * @trap_name: Trap name. + * @trap_group_name: Trap group name. + * @input_dev: Input netdevice. + * @fa_cookie: Flow action user cookie. + * @trap_type: Trap type. + */ +struct devlink_trap_metadata { + const char *trap_name; + const char *trap_group_name; + struct net_device *input_dev; + const struct flow_action_cookie *fa_cookie; + enum devlink_trap_type trap_type; }; /** @@ -704,6 +819,23 @@ enum devlink_trap_generic_id { DEVLINK_TRAP_GENERIC_ID_FLOW_ACTION_SAMPLE, DEVLINK_TRAP_GENERIC_ID_FLOW_ACTION_TRAP, DEVLINK_TRAP_GENERIC_ID_EARLY_DROP, + DEVLINK_TRAP_GENERIC_ID_VXLAN_PARSING, + DEVLINK_TRAP_GENERIC_ID_LLC_SNAP_PARSING, + DEVLINK_TRAP_GENERIC_ID_VLAN_PARSING, + DEVLINK_TRAP_GENERIC_ID_PPPOE_PPP_PARSING, + DEVLINK_TRAP_GENERIC_ID_MPLS_PARSING, + DEVLINK_TRAP_GENERIC_ID_ARP_PARSING, + DEVLINK_TRAP_GENERIC_ID_IP_1_PARSING, + DEVLINK_TRAP_GENERIC_ID_IP_N_PARSING, + DEVLINK_TRAP_GENERIC_ID_GRE_PARSING, + DEVLINK_TRAP_GENERIC_ID_UDP_PARSING, + DEVLINK_TRAP_GENERIC_ID_TCP_PARSING, + DEVLINK_TRAP_GENERIC_ID_IPSEC_PARSING, + DEVLINK_TRAP_GENERIC_ID_SCTP_PARSING, + DEVLINK_TRAP_GENERIC_ID_DCCP_PARSING, + DEVLINK_TRAP_GENERIC_ID_GTP_PARSING, + DEVLINK_TRAP_GENERIC_ID_ESP_PARSING, + DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_NEXTHOP, /* Add new generic trap IDs above */ __DEVLINK_TRAP_GENERIC_ID_MAX, @@ -739,6 +871,7 @@ enum devlink_trap_group_generic_id { DEVLINK_TRAP_GROUP_GENERIC_ID_PTP_GENERAL, DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_SAMPLE, DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_TRAP, + DEVLINK_TRAP_GROUP_GENERIC_ID_PARSER_ERROR_DROPS, /* Add new generic trap group IDs above */ __DEVLINK_TRAP_GROUP_GENERIC_ID_MAX, @@ -894,6 +1027,40 @@ enum devlink_trap_group_generic_id { "flow_action_trap" #define DEVLINK_TRAP_GENERIC_NAME_EARLY_DROP \ "early_drop" +#define DEVLINK_TRAP_GENERIC_NAME_VXLAN_PARSING \ + "vxlan_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_LLC_SNAP_PARSING \ + "llc_snap_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_VLAN_PARSING \ + "vlan_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_PPPOE_PPP_PARSING \ + "pppoe_ppp_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_MPLS_PARSING \ + "mpls_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_ARP_PARSING \ + "arp_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_IP_1_PARSING \ + "ip_1_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_IP_N_PARSING \ + "ip_n_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_GRE_PARSING \ + "gre_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_UDP_PARSING \ + "udp_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_TCP_PARSING \ + "tcp_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_IPSEC_PARSING \ + "ipsec_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_SCTP_PARSING \ + "sctp_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_DCCP_PARSING \ + "dccp_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_GTP_PARSING \ + "gtp_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_ESP_PARSING \ + "esp_parsing" +#define DEVLINK_TRAP_GENERIC_NAME_BLACKHOLE_NEXTHOP \ + "blackhole_nexthop" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \ "l2_drops" @@ -945,6 +1112,8 @@ enum devlink_trap_group_generic_id { "acl_sample" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_ACL_TRAP \ "acl_trap" +#define DEVLINK_TRAP_GROUP_GENERIC_NAME_PARSER_ERROR_DROPS \ + "parser_error_drops" #define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group_id, \ _metadata_cap) \ @@ -991,9 +1160,20 @@ enum devlink_trap_group_generic_id { } struct devlink_ops { + /** + * @supported_flash_update_params: + * mask of parameters supported by the driver's .flash_update + * implemementation. + */ + u32 supported_flash_update_params; + unsigned long reload_actions; + unsigned long reload_limits; int (*reload_down)(struct devlink *devlink, bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, struct netlink_ext_ack *extack); - int (*reload_up)(struct devlink *devlink, + int (*reload_up)(struct devlink *devlink, enum devlink_reload_action action, + enum devlink_reload_limit limit, u32 *actions_performed, struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); @@ -1051,8 +1231,15 @@ struct devlink_ops { struct netlink_ext_ack *extack); int (*info_get)(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack); - int (*flash_update)(struct devlink *devlink, const char *file_name, - const char *component, + /** + * @flash_update: Device flash update function + * + * Used to perform a flash update for the device. The set of + * parameters supported by the driver should be set in + * supported_flash_update_params. + */ + int (*flash_update)(struct devlink *devlink, + struct devlink_flash_update_params *params, struct netlink_ext_ack *extack); /** * @trap_init: Trap initialization function. @@ -1098,6 +1285,16 @@ struct devlink_ops { const struct devlink_trap_policer *policer, struct netlink_ext_ack *extack); /** + * @trap_group_action_set: Trap group action set function. + * + * If this callback is populated, it will take precedence over looping + * over all traps in a group and calling .trap_action_set(). + */ + int (*trap_group_action_set)(struct devlink *devlink, + const struct devlink_trap_group *group, + enum devlink_trap_action action, + struct netlink_ext_ack *extack); + /** * @trap_policer_init: Trap policer initialization function. * * Should be used by device drivers to initialize the trap policer in @@ -1203,9 +1400,10 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port, void devlink_port_type_clear(struct devlink_port *devlink_port); void devlink_port_attrs_set(struct devlink_port *devlink_port, struct devlink_port_attrs *devlink_port_attrs); -void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf); -void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, - u16 pf, u16 vf); +void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller, + u16 pf, bool external); +void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller, + u16 pf, u16 vf, bool external); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, @@ -1289,7 +1487,13 @@ struct devlink_region * devlink_region_create(struct devlink *devlink, const struct devlink_region_ops *ops, u32 region_max_snapshots, u64 region_size); +struct devlink_region * +devlink_port_region_create(struct devlink_port *port, + const struct devlink_port_region_ops *ops, + u32 region_max_snapshots, u64 region_size); void devlink_region_destroy(struct devlink_region *region); +void devlink_port_region_destroy(struct devlink_region *region); + int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id); void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id); int devlink_region_snapshot_create(struct devlink_region *region, @@ -1371,14 +1575,19 @@ void devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter); bool devlink_is_reload_failed(const struct devlink *devlink); +void devlink_remote_reload_actions_performed(struct devlink *devlink, + enum devlink_reload_limit limit, + u32 actions_performed); -void devlink_flash_update_begin_notify(struct devlink *devlink); -void devlink_flash_update_end_notify(struct devlink *devlink); void devlink_flash_update_status_notify(struct devlink *devlink, const char *status_msg, const char *component, unsigned long done, unsigned long total); +void devlink_flash_update_timeout_notify(struct devlink *devlink, + const char *status_msg, + const char *component, + unsigned long timeout); int devlink_traps_register(struct devlink *devlink, const struct devlink_trap *traps, diff --git a/include/net/drop_monitor.h b/include/net/drop_monitor.h deleted file mode 100644 index 3f5b6ddb3179..000000000000 --- a/include/net/drop_monitor.h +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ - -#ifndef _NET_DROP_MONITOR_H_ -#define _NET_DROP_MONITOR_H_ - -#include <linux/ktime.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/flow_offload.h> - -/** - * struct net_dm_hw_metadata - Hardware-supplied packet metadata. - * @trap_group_name: Hardware trap group name. - * @trap_name: Hardware trap name. - * @input_dev: Input netdevice. - * @fa_cookie: Flow action user cookie. - */ -struct net_dm_hw_metadata { - const char *trap_group_name; - const char *trap_name; - struct net_device *input_dev; - const struct flow_action_cookie *fa_cookie; -}; - -#if IS_REACHABLE(CONFIG_NET_DROP_MONITOR) -void net_dm_hw_report(struct sk_buff *skb, - const struct net_dm_hw_metadata *hw_metadata); -#else -static inline void -net_dm_hw_report(struct sk_buff *skb, - const struct net_dm_hw_metadata *hw_metadata) -{ -} -#endif - -#endif /* _NET_DROP_MONITOR_H_ */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 75c8fac82017..4e60d2610f20 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -45,6 +45,7 @@ struct phylink_link_state; #define DSA_TAG_PROTO_OCELOT_VALUE 15 #define DSA_TAG_PROTO_AR9331_VALUE 16 #define DSA_TAG_PROTO_RTL4_A_VALUE 17 +#define DSA_TAG_PROTO_HELLCREEK_VALUE 18 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -65,6 +66,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_OCELOT = DSA_TAG_PROTO_OCELOT_VALUE, DSA_TAG_PROTO_AR9331 = DSA_TAG_PROTO_AR9331_VALUE, DSA_TAG_PROTO_RTL4_A = DSA_TAG_PROTO_RTL4_A_VALUE, + DSA_TAG_PROTO_HELLCREEK = DSA_TAG_PROTO_HELLCREEK_VALUE, }; struct packet_type; @@ -74,8 +76,8 @@ struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); - int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, - int *offset); + void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, + int *offset); /* Used to determine which traffic should match the DSA filter in * eth_type_trans, and which, if any, should bypass it and be processed * as regular on the master net device. @@ -84,6 +86,13 @@ struct dsa_device_ops { unsigned int overhead; const char *name; enum dsa_tag_protocol proto; + /* Some tagging protocols either mangle or shift the destination MAC + * address, in which case the DSA master would drop packets on ingress + * if what it understands out of the destination MAC address is not in + * its RX filter. + */ + bool promisc_on_master; + bool tail_tag; }; /* This structure defines the control interfaces that are overlayed by the @@ -208,6 +217,7 @@ struct dsa_port { u8 stp_state; struct net_device *bridge_dev; struct devlink_port devlink_port; + bool devlink_port_setup; struct phylink *pl; struct phylink_config pl_config; @@ -301,6 +311,14 @@ struct dsa_switch { */ bool configure_vlan_while_not_filtering; + /* If the switch driver always programs the CPU port as egress tagged + * despite the VLAN configuration indicating otherwise, then setting + * @untag_bridge_pvid will force the DSA receive path to pop the bridge's + * default_pvid VLAN tagged frames to offer a consistent behavior + * between a vlan_filtering=0 and vlan_filtering=1 bridge device. + */ + bool untag_bridge_pvid; + /* In case vlan_filtering_is_global is set, the VLAN awareness state * should be retrieved from here and not from the per-port settings. */ @@ -519,6 +537,12 @@ struct dsa_switch_ops { struct ethtool_regs *regs, void *p); /* + * Upper device tracking. + */ + int (*port_prechangeupper)(struct dsa_switch *ds, int port, + struct netdev_notifier_changeupper_info *info); + + /* * Bridge integration */ int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); @@ -536,7 +560,8 @@ struct dsa_switch_ops { * VLAN support */ int (*port_vlan_filtering)(struct dsa_switch *ds, int port, - bool vlan_filtering); + bool vlan_filtering, + struct switchdev_trans *trans); int (*port_vlan_prepare)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); void (*port_vlan_add)(struct dsa_switch *ds, int port, @@ -612,11 +637,14 @@ struct dsa_switch_ops { bool (*port_rxtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb, unsigned int type); - /* Devlink parameters */ + /* Devlink parameters, etc */ int (*devlink_param_get)(struct dsa_switch *ds, u32 id, struct devlink_param_gset_ctx *ctx); int (*devlink_param_set)(struct dsa_switch *ds, u32 id, struct devlink_param_gset_ctx *ctx); + int (*devlink_info_get)(struct dsa_switch *ds, + struct devlink_info_req *req, + struct netlink_ext_ack *extack); /* * MTU change functionality. Switches can also adjust their MRU through @@ -658,12 +686,44 @@ void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds, void *occ_get_priv); void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds, u64 resource_id); +struct devlink_region * +dsa_devlink_region_create(struct dsa_switch *ds, + const struct devlink_region_ops *ops, + u32 region_max_snapshots, u64 region_size); +struct devlink_region * +dsa_devlink_port_region_create(struct dsa_switch *ds, + int port, + const struct devlink_port_region_ops *ops, + u32 region_max_snapshots, u64 region_size); +void dsa_devlink_region_destroy(struct devlink_region *region); + struct dsa_port *dsa_port_from_netdev(struct net_device *netdev); struct dsa_devlink_priv { struct dsa_switch *ds; }; +static inline struct dsa_switch *dsa_devlink_to_ds(struct devlink *dl) +{ + struct dsa_devlink_priv *dl_priv = devlink_priv(dl); + + return dl_priv->ds; +} + +static inline +struct dsa_switch *dsa_devlink_port_to_ds(struct devlink_port *port) +{ + struct devlink *dl = port->devlink; + struct dsa_devlink_priv *dl_priv = devlink_priv(dl); + + return dl_priv->ds; +} + +static inline int dsa_devlink_port_to_port(struct devlink_port *port) +{ + return port->index; +} + struct dsa_switch_driver { struct list_head list; const struct dsa_switch_ops *ops; @@ -689,6 +749,32 @@ static inline bool dsa_can_decode(const struct sk_buff *skb, return false; } +/* All DSA tags that push the EtherType to the right (basically all except tail + * tags, which don't break dissection) can be treated the same from the + * perspective of the flow dissector. + * + * We need to return: + * - offset: the (B - A) difference between: + * A. the position of the real EtherType and + * B. the current skb->data (aka ETH_HLEN bytes into the frame, aka 2 bytes + * after the normal EtherType was supposed to be) + * The offset in bytes is exactly equal to the tagger overhead (and half of + * that, in __be16 shorts). + * + * - proto: the value of the real EtherType. + */ +static inline void dsa_tag_generic_flow_dissect(const struct sk_buff *skb, + __be16 *proto, int *offset) +{ +#if IS_ENABLED(CONFIG_NET_DSA) + const struct dsa_device_ops *ops = skb->dev->dsa_ptr->tag_ops; + int tag_len = ops->overhead; + + *offset = tag_len; + *proto = ((__be16 *)skb->data)[(tag_len / 2) - 1]; +#endif +} + #if IS_ENABLED(CONFIG_NET_DSA) static inline int __dsa_netdevice_ops_check(struct net_device *dev) { diff --git a/include/net/dst.h b/include/net/dst.h index 6ae2e625050d..10f0a8399867 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -214,7 +214,7 @@ dst_allfrag(const struct dst_entry *dst) static inline int dst_metric_locked(const struct dst_entry *dst, int metric) { - return dst_metric(dst, RTAX_LOCK) & (1<<metric); + return dst_metric(dst, RTAX_LOCK) & (1 << metric); } static inline void dst_hold(struct dst_entry *dst) @@ -400,14 +400,12 @@ static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, co static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, struct sk_buff *skb) { - struct neighbour *n = NULL; + struct neighbour *n; - /* The packets from tunnel devices (eg bareudp) may have only - * metadata in the dst pointer of skb. Hence a pointer check of - * neigh_lookup is needed. - */ - if (dst->ops->neigh_lookup) - n = dst->ops->neigh_lookup(dst, skb, NULL); + if (WARN_ON_ONCE(!dst->ops->neigh_lookup)) + return NULL; + + n = dst->ops->neigh_lookup(dst, skb, NULL); return IS_ERR(n) ? NULL : n; } diff --git a/include/net/flow.h b/include/net/flow.h index b2531df3f65f..39d0cedcddee 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -195,11 +195,21 @@ static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) return container_of(fl4, struct flowi, u.ip4); } +static inline struct flowi_common *flowi4_to_flowi_common(struct flowi4 *fl4) +{ + return &(flowi4_to_flowi(fl4)->u.__fl_common); +} + static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6) { return container_of(fl6, struct flowi, u.ip6); } +static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6) +{ + return &(flowi6_to_flowi(fl6)->u.__fl_common); +} + static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) { return container_of(fldn, struct flowi, u.dn); diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 8899d7429ccb..e55ec1597ce7 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -41,6 +41,8 @@ struct genl_info; * (private) * @ops: the operations supported by this family * @n_ops: number of operations supported by this family + * @small_ops: the small-struct operations supported by this family + * @n_small_ops: number of small-struct operations supported by this family */ struct genl_family { int id; /* private */ @@ -48,8 +50,12 @@ struct genl_family { char name[GENL_NAMSIZ]; unsigned int version; unsigned int maxattr; - bool netnsok; - bool parallel_ops; + unsigned int mcgrp_offset; /* private */ + u8 netnsok:1; + u8 parallel_ops:1; + u8 n_ops; + u8 n_small_ops; + u8 n_mcgrps; const struct nla_policy *policy; int (*pre_doit)(const struct genl_ops *ops, struct sk_buff *skb, @@ -58,10 +64,8 @@ struct genl_family { struct sk_buff *skb, struct genl_info *info); const struct genl_ops * ops; + const struct genl_small_ops *small_ops; const struct genl_multicast_group *mcgrps; - unsigned int n_ops; - unsigned int n_mcgrps; - unsigned int mcgrp_offset; /* private */ struct module *module; }; @@ -101,14 +105,6 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg) -static inline int genl_err_attr(struct genl_info *info, int err, - const struct nlattr *attr) -{ - info->extack->bad_attr = attr; - - return err; -} - enum genl_validate_flags { GENL_DONT_VALIDATE_STRICT = BIT(0), GENL_DONT_VALIDATE_DUMP = BIT(1), @@ -116,28 +112,33 @@ enum genl_validate_flags { }; /** - * struct genl_info - info that is available during dumpit op call - * @family: generic netlink family - for internal genl code usage - * @ops: generic netlink ops - for internal genl code usage - * @attrs: netlink attributes + * struct genl_small_ops - generic netlink operations (small version) + * @cmd: command identifier + * @internal_flags: flags used by the family + * @flags: flags + * @validate: validation flags from enum genl_validate_flags + * @doit: standard command callback + * @dumpit: callback for dumpers + * + * This is a cut-down version of struct genl_ops for users who don't need + * most of the ancillary infra and want to save space. */ -struct genl_dumpit_info { - const struct genl_family *family; - const struct genl_ops *ops; - struct nlattr **attrs; +struct genl_small_ops { + int (*doit)(struct sk_buff *skb, struct genl_info *info); + int (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb); + u8 cmd; + u8 internal_flags; + u8 flags; + u8 validate; }; -static inline const struct genl_dumpit_info * -genl_dumpit_info(struct netlink_callback *cb) -{ - return cb->data; -} - /** * struct genl_ops - generic netlink operations * @cmd: command identifier * @internal_flags: flags used by the family * @flags: flags + * @maxattr: maximum number of attributes supported + * @policy: netlink policy (takes precedence over family policy) * @validate: validation flags from enum genl_validate_flags * @doit: standard command callback * @start: start callback for dumps @@ -151,12 +152,32 @@ struct genl_ops { int (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); + const struct nla_policy *policy; + unsigned int maxattr; u8 cmd; u8 internal_flags; u8 flags; u8 validate; }; +/** + * struct genl_info - info that is available during dumpit op call + * @family: generic netlink family - for internal genl code usage + * @ops: generic netlink ops - for internal genl code usage + * @attrs: netlink attributes + */ +struct genl_dumpit_info { + const struct genl_family *family; + struct genl_ops op; + struct nlattr **attrs; +}; + +static inline const struct genl_dumpit_info * +genl_dumpit_info(struct netlink_callback *cb) +{ + return cb->data; +} + int genl_register_family(struct genl_family *family); int genl_unregister_family(const struct genl_family *family); void genl_notify(const struct genl_family *family, struct sk_buff *skb, diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 19c00d100096..c0854933e24f 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -118,6 +118,7 @@ enum ieee80211_radiotap_tx_flags { IEEE80211_RADIOTAP_F_TX_RTS = 0x0004, IEEE80211_RADIOTAP_F_TX_NOACK = 0x0008, IEEE80211_RADIOTAP_F_TX_NOSEQNO = 0x0010, + IEEE80211_RADIOTAP_F_TX_ORDER = 0x0020, }; /* for IEEE80211_RADIOTAP_MCS "have" flags */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index aa8893c68c50..111d7771b208 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -76,6 +76,8 @@ struct inet_connection_sock_af_ops { * @icsk_ext_hdr_len: Network protocol overhead (IP/IPv6 options) * @icsk_ack: Delayed ACK control data * @icsk_mtup; MTU probing control data + * @icsk_probes_tstamp: Probe timestamp (cleared by non-zero window ack) + * @icsk_user_timeout: TCP_USER_TIMEOUT value */ struct inet_connection_sock { /* inet_sock has to be the first member! */ @@ -86,6 +88,8 @@ struct inet_connection_sock { struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; + __u32 icsk_rto_min; + __u32 icsk_delack_max; __u32 icsk_pmtu_cookie; const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; @@ -94,7 +98,8 @@ struct inet_connection_sock { void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq); struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); - __u8 icsk_ca_state:6, + __u8 icsk_ca_state:5, + icsk_ca_initialized:1, icsk_ca_setsockopt:1, icsk_ca_dst_locked:1; __u8 icsk_retransmits; @@ -107,7 +112,7 @@ struct inet_connection_sock { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ __u8 pingpong; /* The session is interactive */ - __u8 blocked; /* Delayed ACK was blocked by socket lock */ + __u8 retry; /* Number of attempts */ __u32 ato; /* Predicted tick of soft clock */ unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ @@ -126,6 +131,7 @@ struct inet_connection_sock { u32 probe_timestamp; } icsk_mtup; + u32 icsk_probes_tstamp; u32 icsk_user_timeout; u64 icsk_ca_priv[104 / sizeof(u64)]; @@ -195,7 +201,8 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) sk_stop_timer(sk, &icsk->icsk_retransmit_timer); #endif } else if (what == ICSK_TIME_DACK) { - icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; + icsk->icsk_ack.pending = 0; + icsk->icsk_ack.retry = 0; #ifdef INET_CSK_CLEAR_TIMERS sk_stop_timer(sk, &icsk->icsk_delack_timer); #endif diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index e1eaf1780288..ba77f47ef61e 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -8,6 +8,7 @@ #include <net/inet_sock.h> #include <net/dsfield.h> +#include <net/checksum.h> enum { INET_ECN_NOT_ECT = 0, @@ -75,8 +76,8 @@ static inline void INET_ECN_dontxmit(struct sock *sk) static inline int IP_ECN_set_ce(struct iphdr *iph) { - u32 check = (__force u32)iph->check; u32 ecn = (iph->tos + 1) & INET_ECN_MASK; + __be16 check_add; /* * After the last operation we have (in binary): @@ -93,23 +94,20 @@ static inline int IP_ECN_set_ce(struct iphdr *iph) * INET_ECN_ECT_1 => check += htons(0xFFFD) * INET_ECN_ECT_0 => check += htons(0xFFFE) */ - check += (__force u16)htons(0xFFFB) + (__force u16)htons(ecn); + check_add = (__force __be16)((__force u16)htons(0xFFFB) + + (__force u16)htons(ecn)); - iph->check = (__force __sum16)(check + (check>=0xFFFF)); + iph->check = csum16_add(iph->check, check_add); iph->tos |= INET_ECN_CE; return 1; } static inline int IP_ECN_set_ect1(struct iphdr *iph) { - u32 check = (__force u32)iph->check; - if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; - check += (__force u16)htons(0x100); - - iph->check = (__force __sum16)(check + (check>=0xFFFF)); + iph->check = csum16_add(iph->check, htons(0x1)); iph->tos ^= INET_ECN_MASK; return 1; } diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index bac79e817776..48cc5795ceda 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -21,6 +21,7 @@ struct fqdir { /* Keep atomic mem on separate cachelines in structs that include it */ atomic_long_t mem ____cacheline_aligned_in_smp; struct work_struct destroy_work; + struct llist_node free_list; }; /** diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 92560974ea67..ca6a3ea9057e 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -247,8 +247,9 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long high_limit); int inet_hashinfo2_init_mod(struct inet_hashinfo *h); -bool inet_ehash_insert(struct sock *sk, struct sock *osk); -bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); +bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk); +bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, + bool *found_dup_sk); int __inet_hash(struct sock *sk, struct sock *osk); int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index a3702d1d4875..89163ef8cf4b 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -296,13 +296,6 @@ static inline void __inet_sk_copy_descendant(struct sock *sk_to, memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1, sk_from->sk_prot->obj_size - ancestor_size); } -#if !(IS_ENABLED(CONFIG_IPV6)) -static inline void inet_sk_copy_descendant(struct sock *sk_to, - const struct sock *sk_from) -{ - __inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock)); -} -#endif int inet_sk_rebuild_header(struct sock *sk); diff --git a/include/net/ip.h b/include/net/ip.h index 2a52787db64a..e20874059f82 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -99,7 +99,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb)) /* return enslaved device index if relevant */ -static inline int inet_sdif(struct sk_buff *skb) +static inline int inet_sdif(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) @@ -151,7 +151,7 @@ int igmp_mc_init(void); int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, __be32 saddr, __be32 daddr, - struct ip_options_rcu *opt); + struct ip_options_rcu *opt, u8 tos); int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); void ip_list_rcv(struct list_head *head, struct packet_type *pt, diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 02ccd32542d0..548b65bd3973 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -274,8 +274,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); -void ip_tunnel_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot); struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, int link, __be16 flags, __be32 remote, __be32 local, @@ -478,9 +476,11 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, __be16 flags) { - memcpy(ip_tunnel_info_opts(info), from, len); info->options_len = len; - info->key.tun_flags |= flags; + if (len > 0) { + memcpy(ip_tunnel_info_opts(info), from, len); + info->key.tun_flags |= flags; + } } static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) @@ -526,7 +526,6 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, __be16 flags) { info->options_len = 0; - info->key.tun_flags |= flags; } #endif /* CONFIG_INET */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 9a59a33787cb..d609e957a3ec 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -25,9 +25,6 @@ #include <linux/ip.h> #include <linux/ipv6.h> /* for struct ipv6hdr */ #include <net/ipv6.h> -#if IS_ENABLED(CONFIG_IP_VS_IPV6) -#include <linux/netfilter_ipv6/ip6_tables.h> -#endif #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #endif diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index a21e8b1381a1..851029ecff13 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -108,5 +108,35 @@ out_rcu_unlock: rcu_read_unlock(); inet_frag_put(&fq->q); } + +/* Check if the upper layer header is truncated in the first fragment. */ +static inline bool +ipv6frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp) +{ + u8 nexthdr = *nexthdrp; + __be16 frag_off; + int offset; + + offset = ipv6_skip_exthdr(skb, start, &nexthdr, &frag_off); + if (offset < 0 || (frag_off & htons(IP6_OFFSET))) + return false; + switch (nexthdr) { + case NEXTHDR_TCP: + offset += sizeof(struct tcphdr); + break; + case NEXTHDR_UDP: + offset += sizeof(struct udphdr); + break; + case NEXTHDR_ICMP: + offset += sizeof(struct icmp6hdr); + break; + default: + offset += 1; + } + if (offset > skb->len) + return true; + return false; +} + #endif #endif diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index d7a7f7c81e7b..8fce558b5fea 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -63,6 +63,9 @@ struct ipv6_stub { int encap_type); #endif struct neigh_table *nd_tbl; + + int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)); }; extern const struct ipv6_stub *ipv6_stub __read_mostly; diff --git a/include/net/lapb.h b/include/net/lapb.h index ccc3d1f020b0..eee73442a1ba 100644 --- a/include/net/lapb.h +++ b/include/net/lapb.h @@ -92,6 +92,7 @@ struct lapb_cb { unsigned short n2, n2count; unsigned short t1, t2; struct timer_list t1timer, t2timer; + bool t1timer_stop, t2timer_stop; /* Internal control information */ struct sk_buff_head write_queue; @@ -103,6 +104,7 @@ struct lapb_cb { struct lapb_frame frmr_data; unsigned char frmr_type; + spinlock_t lock; refcount_t refcnt; }; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 66e2bfd165e8..2bdbf62f4ecd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -317,6 +317,9 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_TWT: TWT status changed * @BSS_CHANGED_HE_OBSS_PD: OBSS Packet Detection status changed. * @BSS_CHANGED_HE_BSS_COLOR: BSS Color has changed + * @BSS_CHANGED_FILS_DISCOVERY: FILS discovery status changed. + * @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response + * status changed. * */ enum ieee80211_bss_change { @@ -350,6 +353,8 @@ enum ieee80211_bss_change { BSS_CHANGED_TWT = 1<<27, BSS_CHANGED_HE_OBSS_PD = 1<<28, BSS_CHANGED_HE_BSS_COLOR = 1<<29, + BSS_CHANGED_FILS_DISCOVERY = 1<<30, + BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -491,6 +496,18 @@ struct ieee80211_ftm_responder_params { }; /** + * struct ieee80211_fils_discovery - FILS discovery parameters from + * IEEE Std 802.11ai-2016, Annex C.3 MIB detail. + * + * @min_interval: Minimum packet interval in TUs (0 - 10000) + * @max_interval: Maximum packet interval in TUs (0 - 10000) + */ +struct ieee80211_fils_discovery { + u32 min_interval; + u32 max_interval; +}; + +/** * struct ieee80211_bss_conf - holds the BSS's changing parameters * * This structure keeps information about a BSS (and an association @@ -604,16 +621,21 @@ struct ieee80211_ftm_responder_params { * nontransmitted BSSIDs * @profile_periodicity: the least number of beacon frames need to be received * in order to discover all the nontransmitted BSSIDs in the set. - * @he_oper: HE operation information of the AP we are connected to + * @he_oper: HE operation information of the BSS (AP/Mesh) or of the AP we are + * connected to (STA) * @he_obss_pd: OBSS Packet Detection parameters. * @he_bss_color: BSS coloring settings, if BSS supports HE + * @fils_discovery: FILS discovery configuration + * @unsol_bcast_probe_resp_interval: Unsolicited broadcast probe response + * interval. + * @s1g: BSS is S1G BSS (affects Association Request format). + * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed + * to driver when rate control is offloaded to firmware. */ struct ieee80211_bss_conf { const u8 *bssid; u8 htc_trig_based_pkt_ext; - bool multi_sta_back_32bit; bool uora_exists; - bool ack_enabled; u8 uora_ocw_range; u16 frame_time_rts_th; bool he_support; @@ -674,6 +696,10 @@ struct ieee80211_bss_conf { } he_oper; struct ieee80211_he_obss_pd he_obss_pd; struct cfg80211_he_bss_color he_bss_color; + struct ieee80211_fils_discovery fils_discovery; + u32 unsol_bcast_probe_resp_interval; + bool s1g; + struct cfg80211_bitrate_mask beacon_tx_rate; }; /** @@ -720,9 +746,8 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_INTFL_OFFCHAN_TX_OK: Internal to mac80211. Used to indicate * that a frame can be transmitted while the queues are stopped for * off-channel operation. - * @IEEE80211_TX_INTFL_NEED_TXPROCESSING: completely internal to mac80211, - * used to indicate that a pending frame requires TX processing before - * it can be sent out. + * @IEEE80211_TX_CTL_HW_80211_ENCAP: This frame uses hardware encapsulation + * (header conversion) * @IEEE80211_TX_INTFL_RETRIED: completely internal to mac80211, * used to indicate that a frame was already retried due to PS * @IEEE80211_TX_INTFL_DONT_ENCRYPT: completely internal to mac80211, @@ -791,7 +816,7 @@ enum mac80211_tx_info_flags { IEEE80211_TX_STAT_AMPDU_NO_BACK = BIT(11), IEEE80211_TX_CTL_RATE_CTRL_PROBE = BIT(12), IEEE80211_TX_INTFL_OFFCHAN_TX_OK = BIT(13), - IEEE80211_TX_INTFL_NEED_TXPROCESSING = BIT(14), + IEEE80211_TX_CTL_HW_80211_ENCAP = BIT(14), IEEE80211_TX_INTFL_RETRIED = BIT(15), IEEE80211_TX_INTFL_DONT_ENCRYPT = BIT(16), IEEE80211_TX_CTL_NO_PS_BUFFER = BIT(17), @@ -812,6 +837,8 @@ enum mac80211_tx_info_flags { #define IEEE80211_TX_CTL_STBC_SHIFT 23 +#define IEEE80211_TX_RC_S1G_MCS IEEE80211_TX_RC_VHT_MCS + /** * enum mac80211_tx_control_flags - flags to describe transmit control * @@ -823,10 +850,14 @@ enum mac80211_tx_info_flags { * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame * @IEEE80211_TX_CTRL_FAST_XMIT: This frame is going through the fast_xmit path * @IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP: This frame skips mesh path lookup - * @IEEE80211_TX_CTRL_HW_80211_ENCAP: This frame uses hardware encapsulation - * (header conversion) + * @IEEE80211_TX_INTCFL_NEED_TXPROCESSING: completely internal to mac80211, + * used to indicate that a pending frame requires TX processing before + * it can be sent out. * @IEEE80211_TX_CTRL_NO_SEQNO: Do not overwrite the sequence number that * has already been assigned to this frame. + * @IEEE80211_TX_CTRL_DONT_REORDER: This frame should not be reordered + * relative to other frames that have this flag set, independent + * of their QoS TID or other priority field values. * * These flags are used in tx_info->control.flags. */ @@ -837,8 +868,9 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_AMSDU = BIT(3), IEEE80211_TX_CTRL_FAST_XMIT = BIT(4), IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP = BIT(5), - IEEE80211_TX_CTRL_HW_80211_ENCAP = BIT(6), + IEEE80211_TX_INTCFL_NEED_TXPROCESSING = BIT(6), IEEE80211_TX_CTRL_NO_SEQNO = BIT(7), + IEEE80211_TX_CTRL_DONT_REORDER = BIT(8), }; /* @@ -1002,7 +1034,8 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) * @status.ampdu_ack_len: AMPDU ack length * @status.ampdu_len: AMPDU length * @status.antenna: (legacy, kept only for iwlegacy) - * @status.tx_time: airtime consumed for transmission + * @status.tx_time: airtime consumed for transmission; note this is only + * used for WMM AC, not for airtime fairness * @status.is_valid_ack_signal: ACK signal is valid * @status.status_driver_data: driver use area * @ack: union part for pure ACK data @@ -1095,12 +1128,14 @@ ieee80211_info_get_tx_time_est(struct ieee80211_tx_info *info) * @info: Basic tx status information * @skb: Packet skb (can be NULL if not provided by the driver) * @rate: The TX rate that was used when sending the packet + * @free_list: list where processed skbs are stored to be free'd by the driver */ struct ieee80211_tx_status { struct ieee80211_sta *sta; struct ieee80211_tx_info *info; struct sk_buff *skb; struct rate_info *rate; + struct list_head *free_list; }; /** @@ -1606,6 +1641,21 @@ enum ieee80211_vif_flags { IEEE80211_VIF_GET_NOA_UPDATE = BIT(3), }; + +/** + * enum ieee80211_offload_flags - virtual interface offload flags + * + * @IEEE80211_OFFLOAD_ENCAP_ENABLED: tx encapsulation offload is enabled + * The driver supports sending frames passed as 802.3 frames by mac80211. + * It must also support sending 802.11 packets for the same interface. + * @IEEE80211_OFFLOAD_ENCAP_4ADDR: support 4-address mode encapsulation offload + */ + +enum ieee80211_offload_flags { + IEEE80211_OFFLOAD_ENCAP_ENABLED = BIT(0), + IEEE80211_OFFLOAD_ENCAP_4ADDR = BIT(1), +}; + /** * struct ieee80211_vif - per-interface data * @@ -1626,6 +1676,11 @@ enum ieee80211_vif_flags { * these need to be set (or cleared) when the interface is added * or, if supported by the driver, the interface type is changed * at runtime, mac80211 will never touch this field + * @offloaad_flags: hardware offload capabilities/flags for this interface. + * These are initialized by mac80211 before calling .add_interface, + * .change_interface or .update_vif_offload and updated by the driver + * within these ops, based on supported features or runtime change + * restrictions. * @hw_queue: hardware queue for each AC * @cab_queue: content-after-beacon (DTIM beacon really) queue, AP mode only * @chanctx_conf: The channel context this interface is assigned to, or %NULL @@ -1645,6 +1700,8 @@ enum ieee80211_vif_flags { * @txq: the multicast data TX queue (if driver uses the TXQ abstraction) * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped, * protected by fq->lock. + * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see + * &enum ieee80211_offload_flags. */ struct ieee80211_vif { enum nl80211_iftype type; @@ -1662,6 +1719,7 @@ struct ieee80211_vif { struct ieee80211_chanctx_conf __rcu *chanctx_conf; u32 driver_flags; + u32 offload_flags; #ifdef CONFIG_MAC80211_DEBUGFS struct dentry *debugfs_dir; @@ -2328,6 +2386,9 @@ struct ieee80211_txq { * aggregating MPDUs with the same keyid, allowing mac80211 to keep Tx * A-MPDU sessions active while rekeying with Extended Key ID. * + * @IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD: Hardware supports tx encapsulation + * offload + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2380,6 +2441,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_MULTI_BSSID, IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID, IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT, + IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -3252,7 +3314,7 @@ enum ieee80211_roc_type { }; /** - * enum ieee80211_reconfig_complete_type - reconfig type + * enum ieee80211_reconfig_type - reconfig type * * This enum is used by the reconfig_complete() callback to indicate what * reconfiguration type was completed. @@ -3736,7 +3798,7 @@ enum ieee80211_reconfig_type { * decremented, and when they reach 1 the driver must call * ieee80211_csa_finish(). Drivers which use ieee80211_beacon_get() * get the csa counter decremented by mac80211, but must check if it is - * 1 using ieee80211_csa_is_complete() after the beacon has been + * 1 using ieee80211_beacon_counter_is_complete() after the beacon has been * transmitted and then call ieee80211_csa_finish(). * If the CSA count starts as zero or 1, this function will not be called, * since there won't be any time to beacon before the switch anyway. @@ -3814,6 +3876,11 @@ enum ieee80211_reconfig_type { * @set_tid_config: Apply TID specific configurations. This callback may sleep. * @reset_tid_config: Reset TID specific configuration for the peer. * This callback may sleep. + * @update_vif_offload: Update virtual interface offload flags + * This callback may sleep. + * @sta_set_4addr: Called to notify the driver when a station starts/stops using + * 4-address mode + * @set_sar_specs: Update the SAR (TX power) settings. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4125,6 +4192,12 @@ struct ieee80211_ops { int (*reset_tid_config)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, u8 tids); + void (*update_vif_offload)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); + void (*sta_set_4addr)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_sta *sta, bool enabled); + int (*set_sar_specs)(struct ieee80211_hw *hw, + const struct cfg80211_sar_specs *sar); }; /** @@ -4763,21 +4836,21 @@ void ieee80211_tx_status_8023(struct ieee80211_hw *hw, */ void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets); -#define IEEE80211_MAX_CSA_COUNTERS_NUM 2 +#define IEEE80211_MAX_CNTDWN_COUNTERS_NUM 2 /** * struct ieee80211_mutable_offsets - mutable beacon offsets * @tim_offset: position of TIM element * @tim_length: size of TIM element - * @csa_counter_offs: array of IEEE80211_MAX_CSA_COUNTERS_NUM offsets - * to CSA counters. This array can contain zero values which + * @cntdwn_counter_offs: array of IEEE80211_MAX_CNTDWN_COUNTERS_NUM offsets + * to countdown counters. This array can contain zero values which * should be ignored. */ struct ieee80211_mutable_offsets { u16 tim_offset; u16 tim_length; - u16 csa_counter_offs[IEEE80211_MAX_CSA_COUNTERS_NUM]; + u16 cntdwn_counter_offs[IEEE80211_MAX_CNTDWN_COUNTERS_NUM]; }; /** @@ -4846,31 +4919,31 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, } /** - * ieee80211_csa_update_counter - request mac80211 to decrement the csa counter + * ieee80211_beacon_update_cntdwn - request mac80211 to decrement the beacon countdown * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * The csa counter should be updated after each beacon transmission. + * The beacon counter should be updated after each beacon transmission. * This function is called implicitly when * ieee80211_beacon_get/ieee80211_beacon_get_tim are called, however if the * beacon frames are generated by the device, the driver should call this - * function after each beacon transmission to sync mac80211's csa counters. + * function after each beacon transmission to sync mac80211's beacon countdown. * - * Return: new csa counter value + * Return: new countdown value */ -u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif); +u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif); /** - * ieee80211_csa_set_counter - request mac80211 to set csa counter + * ieee80211_beacon_set_cntdwn - request mac80211 to set beacon countdown * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @counter: the new value for the counter * - * The csa counter can be changed by the device, this API should be + * The beacon countdown can be changed by the device, this API should be * used by the device driver to update csa counter in mac80211. * - * It should never be used together with ieee80211_csa_update_counter(), + * It should never be used together with ieee80211_beacon_update_cntdwn(), * as it will cause a race condition around the counter value. */ -void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter); +void ieee80211_beacon_set_cntdwn(struct ieee80211_vif *vif, u8 counter); /** * ieee80211_csa_finish - notify mac80211 about channel switch @@ -4883,13 +4956,12 @@ void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter); void ieee80211_csa_finish(struct ieee80211_vif *vif); /** - * ieee80211_csa_is_complete - find out if counters reached 1 + * ieee80211_beacon_cntdwn_is_complete - find out if countdown reached 1 * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * This function returns whether the channel switch counters reached zero. + * This function returns whether the countdown reached zero. */ -bool ieee80211_csa_is_complete(struct ieee80211_vif *vif); - +bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif); /** * ieee80211_proberesp_get - retrieve a Probe Response template @@ -5253,6 +5325,26 @@ void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp); /** + * ieee80211_key_mic_failure - increment MIC failure counter for the key + * + * Note: this is really only safe if no other RX function is called + * at the same time. + * + * @keyconf: the key in question + */ +void ieee80211_key_mic_failure(struct ieee80211_key_conf *keyconf); + +/** + * ieee80211_key_replay - increment replay counter for the key + * + * Note: this is really only safe if no other RX function is called + * at the same time. + * + * @keyconf: the key in question + */ +void ieee80211_key_replay(struct ieee80211_key_conf *keyconf); + +/** * ieee80211_wake_queue - wake specific queue * @hw: pointer as obtained from ieee80211_alloc_hw(). * @queue: queue number (counted from zero). @@ -5344,11 +5436,15 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw); * @IEEE80211_IFACE_ITER_RESUME_ALL: During resume, iterate over all * interfaces, even if they haven't been re-added to the driver yet. * @IEEE80211_IFACE_ITER_ACTIVE: Iterate only active interfaces (netdev is up). + * @IEEE80211_IFACE_SKIP_SDATA_NOT_IN_DRIVER: Skip any interfaces where SDATA + * is not in the driver. This may fix crashes during firmware recovery + * for instance. */ enum ieee80211_interface_iteration_flags { IEEE80211_IFACE_ITER_NORMAL = 0, IEEE80211_IFACE_ITER_RESUME_ALL = BIT(0), IEEE80211_IFACE_ITER_ACTIVE = BIT(1), + IEEE80211_IFACE_SKIP_SDATA_NOT_IN_DRIVER = BIT(2), }; /** @@ -5648,7 +5744,7 @@ void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid); /** * ieee80211_sta_register_airtime - register airtime usage for a sta/tid * - * Register airtime usage for a given sta on a given tid. The driver can call + * Register airtime usage for a given sta on a given tid. The driver must call * this function to notify mac80211 that a station used a certain amount of * airtime. This information will be used by the TXQ scheduler to schedule * stations in a way that ensures airtime fairness. @@ -5807,6 +5903,17 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif); void ieee80211_connection_loss(struct ieee80211_vif *vif); /** + * ieee80211_disconnect - request disconnection + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @reconnect: immediate reconnect is desired + * + * Request disconnection from the current network and, if enabled, send a + * hint to the higher layers that immediate reconnect is desired. + */ +void ieee80211_disconnect(struct ieee80211_vif *vif, bool reconnect); + +/** * ieee80211_resume_disconnect - disconnect from AP after resume * * @vif: &struct ieee80211_vif pointer from the add_interface callback. @@ -6264,7 +6371,8 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, int band, struct ieee80211_sta **sta); /** - * Sanity-check and parse the radiotap header of injected frames + * ieee80211_parse_tx_radiotap - Sanity-check and parse the radiotap header + * of injected frames * @skb: packet injected by userspace * @dev: the &struct device of this 802.11 device */ @@ -6319,7 +6427,7 @@ int ieee80211_parse_p2p_noa(const struct ieee80211_p2p_noa_attr *attr, void ieee80211_update_p2p_noa(struct ieee80211_noa_data *data, u32 tsf); /** - * ieee80211_tdls_oper - request userspace to perform a TDLS operation + * ieee80211_tdls_oper_request - request userspace to perform a TDLS operation * @vif: virtual interface * @peer: the peer's destination address * @oper: the requested TDLS operation @@ -6594,4 +6702,29 @@ u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw, */ bool ieee80211_set_hw_80211_encap(struct ieee80211_vif *vif, bool enable); +/** + * ieee80211_get_fils_discovery_tmpl - Get FILS discovery template. + * @hw: pointer obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * + * The driver is responsible for freeing the returned skb. + * + * Return: FILS discovery template. %NULL on error. + */ +struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); + +/** + * ieee80211_get_unsol_bcast_probe_resp_tmpl - Get unsolicited broadcast + * probe response template. + * @hw: pointer obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * + * The driver is responsible for freeing the returned skb. + * + * Return: Unsolicited broadcast probe response template. %NULL on error. + */ +struct sk_buff * +ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); #endif /* MAC80211_H */ diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 3525d2822abe..5694370be3d4 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -29,7 +29,8 @@ struct mptcp_ext { use_ack:1, ack64:1, mpc_map:1, - __unused:2; + frozen:1, + __unused:1; /* one byte hole */ }; @@ -45,6 +46,7 @@ struct mptcp_out_options { #endif }; u8 addr_id; + u16 port; u64 ahmac; u8 rm_id; u8 join_id; @@ -85,10 +87,10 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts); -void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, - struct tcp_options_received *opt_rx); +void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); -void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts); +void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, + struct mptcp_out_options *opts); /* move the skb extension owership, with the assumption that 'to' is * newly allocated @@ -107,6 +109,19 @@ static inline void mptcp_skb_ext_move(struct sk_buff *to, from->active_extensions = 0; } +static inline void mptcp_skb_ext_copy(struct sk_buff *to, + struct sk_buff *from) +{ + struct mptcp_ext *from_ext; + + from_ext = skb_ext_find(from, SKB_EXT_MPTCP); + if (!from_ext) + return; + + from_ext->frozen = 1; + skb_ext_copy(to, from); +} + static inline bool mptcp_ext_matches(const struct mptcp_ext *to_ext, const struct mptcp_ext *from_ext) { @@ -185,8 +200,7 @@ static inline bool mptcp_established_options(struct sock *sk, } static inline void mptcp_incoming_options(struct sock *sk, - struct sk_buff *skb, - struct tcp_options_received *opt_rx) + struct sk_buff *skb) { } @@ -195,6 +209,11 @@ static inline void mptcp_skb_ext_move(struct sk_buff *to, { } +static inline void mptcp_skb_ext_copy(struct sk_buff *to, + struct sk_buff *from) +{ +} + static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, const struct sk_buff *from) { diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 81ee17594c32..22ced1381ede 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -204,6 +204,7 @@ struct neigh_table { int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); void (*proxy_redo)(struct sk_buff *skb); + int (*is_multicast)(const void *pkey); bool (*allow_add)(const struct net_device *dev, struct netlink_ext_ack *extack); char *id; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 2ee5901bec7a..29567875f428 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -60,9 +60,6 @@ struct net { refcount_t passive; /* To decide when the network * namespace should be freed. */ - refcount_t count; /* To decided when the network - * namespace should be shut down. - */ spinlock_t rules_mod_lock; unsigned int dev_unreg_count; @@ -151,9 +148,6 @@ struct net { #endif struct sock *nfnl; struct sock *nfnl_stash; -#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT) - struct list_head nfnl_acct_list; -#endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) struct list_head nfct_timeout_list; #endif @@ -230,7 +224,7 @@ extern struct list_head net_namespace_list; struct net *get_net_ns_by_pid(pid_t pid); struct net *get_net_ns_by_fd(int fd); -u64 net_gen_cookie(struct net *net); +u64 __net_gen_cookie(struct net *net); #ifdef CONFIG_SYSCTL void ipx_register_sysctl(void); @@ -245,7 +239,7 @@ void __put_net(struct net *net); static inline struct net *get_net(struct net *net) { - refcount_inc(&net->count); + refcount_inc(&net->ns.count); return net; } @@ -256,14 +250,14 @@ static inline struct net *maybe_get_net(struct net *net) * exists. If the reference count is zero this * function fails and returns NULL. */ - if (!refcount_inc_not_zero(&net->count)) + if (!refcount_inc_not_zero(&net->ns.count)) net = NULL; return net; } static inline void put_net(struct net *net) { - if (refcount_dec_and_test(&net->count)) + if (refcount_dec_and_test(&net->ns.count)) __put_net(net); } @@ -275,7 +269,7 @@ int net_eq(const struct net *net1, const struct net *net2) static inline int check_net(const struct net *net) { - return refcount_read(&net->count) != 0; + return refcount_read(&net->ns.count) != 0; } void net_drop_ns(void *); diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index 40e0e0623f46..c653fcb88354 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -8,8 +8,8 @@ #include <net/netfilter/nf_reject.h> void nf_send_unreach(struct sk_buff *skb_in, int code, int hook); -void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook); - +void nf_send_reset(struct net *net, struct sock *, struct sk_buff *oldskb, + int hook); const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, struct tcphdr *_oth, int hook); struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, @@ -18,4 +18,14 @@ struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, const struct tcphdr *oth); +struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code); +struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook); + + #endif /* _IPV4_NF_REJECT_H */ diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h index 4a3ef9ebdf6f..d729344ba644 100644 --- a/include/net/netfilter/ipv6/nf_reject.h +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -7,9 +7,8 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code, unsigned int hooknum); - -void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook); - +void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, + int hook); const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, struct tcphdr *otcph, unsigned int *otcplen, int hook); @@ -20,4 +19,13 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, const struct tcphdr *oth, unsigned int otcplen); +struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook); +struct sk_buff *nf_reject_skb_v6_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code); + #endif /* _IPV6_NF_REJECT_H */ diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 88186b95b3c2..96f9cf81f46b 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -32,7 +32,7 @@ struct nf_conntrack_l4proto { /* convert protoinfo to nfnetink attributes */ int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, - struct nf_conn *ct); + struct nf_conn *ct, bool destroy); /* convert nfnetlink attributes to protoinfo */ int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct); @@ -203,6 +203,20 @@ static inline struct nf_icmp_net *nf_icmpv6_pernet(struct net *net) { return &net->ct.nf_ct_proto.icmpv6; } + +/* Caller must check nf_ct_protonum(ct) is IPPROTO_TCP before calling. */ +static inline void nf_ct_set_tcp_be_liberal(struct nf_conn *ct) +{ + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; +} + +/* Caller must check nf_ct_protonum(ct) is IPPROTO_TCP before calling. */ +static inline bool nf_conntrack_tcp_established(const struct nf_conn *ct) +{ + return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED && + test_bit(IPS_ASSURED_BIT, &ct->status); +} #endif #ifdef CONFIG_NF_CT_PROTO_DCCP diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 0d3920896d50..716db4a0fed8 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -108,6 +108,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, unsigned int logflags); void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m, struct sock *sk); +void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb); void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 224d194ad29d..4b6ecf532623 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -14,6 +14,8 @@ #include <net/netlink.h> #include <net/flow_offload.h> +#define NFT_MAX_HOOKS (NF_INET_INGRESS + 1) + struct module; #define NFT_JUMP_STACK_SIZE 16 @@ -148,13 +150,6 @@ static inline void nft_data_copy(u32 *dst, const struct nft_data *src, memcpy(dst, src, len); } -static inline void nft_data_debug(const struct nft_data *data) -{ - pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n", - data->data[0], data->data[1], - data->data[2], data->data[3]); -} - /** * struct nft_ctx - nf_tables rule/set context * @@ -310,8 +305,33 @@ struct nft_set_estimate { enum nft_set_class space; }; +#define NFT_EXPR_MAXATTR 16 +#define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \ + ALIGN(size, __alignof__(struct nft_expr))) + +/** + * struct nft_expr - nf_tables expression + * + * @ops: expression ops + * @data: expression private data + */ +struct nft_expr { + const struct nft_expr_ops *ops; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); +}; + +static inline void *nft_expr_priv(const struct nft_expr *expr) +{ + return (void *)expr->data; +} + +int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); +void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); +int nft_expr_dump(struct sk_buff *skb, unsigned int attr, + const struct nft_expr *expr); + struct nft_set_ext; -struct nft_expr; /** * struct nft_set_ops - nf_tables set operations @@ -401,6 +421,22 @@ struct nft_set_type { }; #define to_set_type(o) container_of(o, struct nft_set_type, ops) +struct nft_set_elem_expr { + u8 size; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +#define nft_setelem_expr_at(__elem_expr, __offset) \ + ((struct nft_expr *)&__elem_expr->data[__offset]) + +#define nft_setelem_expr_foreach(__expr, __elem_expr, __size) \ + for (__expr = nft_setelem_expr_at(__elem_expr, 0), __size = 0; \ + __size < (__elem_expr)->size; \ + __size += (__expr)->ops->size, __expr = ((void *)(__expr)) + (__expr)->ops->size) + +#define NFT_SET_EXPR_MAX 2 + /** * struct nft_set - nf_tables set instance * @@ -453,13 +489,14 @@ struct nft_set { u16 policy; u16 udlen; unsigned char *udata; - struct nft_expr *expr; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; u16 flags:14, genmask:2; u8 klen; u8 dlen; + u8 num_exprs; + struct nft_expr *exprs[NFT_SET_EXPR_MAX]; unsigned char data[] __attribute__((aligned(__alignof__(u64)))); }; @@ -524,7 +561,7 @@ void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); * @NFT_SET_EXT_TIMEOUT: element timeout * @NFT_SET_EXT_EXPIRATION: element expiration time * @NFT_SET_EXT_USERDATA: user data associated with the element - * @NFT_SET_EXT_EXPR: expression assiociated with the element + * @NFT_SET_EXT_EXPRESSIONS: expressions assiciated with the element * @NFT_SET_EXT_OBJREF: stateful object reference associated with element * @NFT_SET_EXT_NUM: number of extension types */ @@ -536,7 +573,7 @@ enum nft_set_extensions { NFT_SET_EXT_TIMEOUT, NFT_SET_EXT_EXPIRATION, NFT_SET_EXT_USERDATA, - NFT_SET_EXT_EXPR, + NFT_SET_EXT_EXPRESSIONS, NFT_SET_EXT_OBJREF, NFT_SET_EXT_NUM }; @@ -654,9 +691,9 @@ static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext return nft_set_ext(ext, NFT_SET_EXT_USERDATA); } -static inline struct nft_expr *nft_set_ext_expr(const struct nft_set_ext *ext) +static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ext *ext) { - return nft_set_ext(ext, NFT_SET_EXT_EXPR); + return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS); } static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) @@ -684,6 +721,8 @@ void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *key_end, const u32 *data, u64 timeout, u64 expiration, gfp_t gfp); +int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_expr *expr_array[]); void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr); @@ -799,7 +838,6 @@ struct nft_offload_ctx; * @validate: validate expression, called during loop detection * @data: extra data to attach to this expression operation */ -struct nft_expr; struct nft_expr_ops { void (*eval)(const struct nft_expr *expr, struct nft_regs *regs, @@ -835,32 +873,6 @@ struct nft_expr_ops { void *data; }; -#define NFT_EXPR_MAXATTR 16 -#define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \ - ALIGN(size, __alignof__(struct nft_expr))) - -/** - * struct nft_expr - nf_tables expression - * - * @ops: expression ops - * @data: expression private data - */ -struct nft_expr { - const struct nft_expr_ops *ops; - unsigned char data[] - __attribute__((aligned(__alignof__(u64)))); -}; - -static inline void *nft_expr_priv(const struct nft_expr *expr) -{ - return (void *)expr->data; -} - -int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); -void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); -int nft_expr_dump(struct sk_buff *skb, unsigned int attr, - const struct nft_expr *expr); - /** * struct nft_rule - nf_tables rule * @@ -896,6 +908,12 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) return (struct nft_expr *)&rule->data[rule->dlen]; } +static inline bool nft_expr_more(const struct nft_rule *rule, + const struct nft_expr *expr) +{ + return expr != nft_expr_last(rule) && expr->ops; +} + static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) { return (void *)&rule->data[rule->dlen]; @@ -907,11 +925,17 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, struct nft_regs *regs, const struct nft_pktinfo *pkt) { + struct nft_set_elem_expr *elem_expr; struct nft_expr *expr; - - if (__nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) { - expr = nft_set_ext_expr(ext); - expr->ops->eval(expr, regs, pkt); + u32 size; + + if (__nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) { + elem_expr = nft_set_ext_expr(ext); + nft_setelem_expr_foreach(expr, elem_expr, size) { + expr->ops->eval(expr, regs, pkt); + if (regs->verdict.code == NFT_BREAK) + return; + } } } @@ -952,6 +976,8 @@ struct nft_chain { bound:1, genmask:2; char *name; + u16 udlen; + u8 *udata; /* Only used during control plane commit phase: */ struct nft_rule **rules_next; @@ -984,7 +1010,7 @@ struct nft_chain_type { int family; struct module *owner; unsigned int hook_mask; - nf_hookfn *hooks[NF_MAX_HOOKS]; + nf_hookfn *hooks[NFT_MAX_HOOKS]; int (*ops_register)(struct net *net, const struct nf_hook_ops *ops); void (*ops_unregister)(struct net *net, const struct nf_hook_ops *ops); }; @@ -1082,8 +1108,16 @@ struct nft_table { flags:8, genmask:2; char *name; + u16 udlen; + u8 *udata; }; +static inline bool nft_base_chain_netdev(int family, u32 hooknum) +{ + return family == NFPROTO_NETDEV || + (family == NFPROTO_INET && hooknum == NF_INET_INGRESS); +} + void nft_register_chain_type(const struct nft_chain_type *); void nft_unregister_chain_type(const struct nft_chain_type *); @@ -1123,6 +1157,8 @@ struct nft_object { u32 genmask:2, use:30; u64 handle; + u16 udlen; + u8 *udata; /* runtime data below here */ const struct nft_object_ops *ops ____cacheline_aligned; unsigned char data[] @@ -1511,4 +1547,8 @@ void __init nft_chain_route_init(void); void nft_chain_route_fini(void); void nf_tables_trans_destroy_flush_work(void); + +int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); +__be64 nf_jiffies64_to_msecs(u64 input); + #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 78516de14d31..8657e6815b07 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -23,10 +23,19 @@ extern struct nft_object_type nft_secmark_obj_type; int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); +struct nft_bitwise_fast_expr { + u32 mask; + u32 xor; + enum nft_registers sreg:8; + enum nft_registers dreg:8; +}; + struct nft_cmp_fast_expr { u32 data; + u32 mask; enum nft_registers sreg:8; u8 len; + bool inv; }; struct nft_immediate_expr { @@ -66,6 +75,8 @@ struct nft_payload_set { extern const struct nft_expr_ops nft_payload_fast_ops; +extern const struct nft_expr_ops nft_bitwise_fast_ops; + extern struct static_key_false nft_counters_enabled; extern struct static_key_false nft_trace_enabled; diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index ed7b511f0a59..1f7bea39ad1b 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -53,4 +53,37 @@ static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, nft_set_pktinfo_unspec(pkt, skb); } +static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt, + struct sk_buff *skb) +{ + struct iphdr *iph; + u32 len, thoff; + + if (!pskb_may_pull(skb, sizeof(*iph))) + return -1; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + goto inhdr_error; + + len = ntohs(iph->tot_len); + thoff = iph->ihl * 4; + if (skb->len < len) { + __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INTRUNCATEDPKTS); + return -1; + } else if (len < thoff) { + goto inhdr_error; + } + + pkt->tprot_set = true; + pkt->tprot = iph->protocol; + pkt->xt.thoff = thoff; + pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; + + return 0; + +inhdr_error: + __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INHDRERRORS); + return -1; +} #endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index d0f1c537b017..867de29f3f7a 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -70,4 +70,50 @@ static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, nft_set_pktinfo_unspec(pkt, skb); } +static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt, + struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + unsigned int flags = IP6_FH_F_AUTH; + unsigned short frag_off; + unsigned int thoff = 0; + struct inet6_dev *idev; + struct ipv6hdr *ip6h; + int protohdr; + u32 pkt_len; + + if (!pskb_may_pull(skb, sizeof(*ip6h))) + return -1; + + ip6h = ipv6_hdr(skb); + if (ip6h->version != 6) + goto inhdr_error; + + pkt_len = ntohs(ip6h->payload_len); + if (pkt_len + sizeof(*ip6h) > skb->len) { + idev = __in6_dev_get(nft_in(pkt)); + __IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INTRUNCATEDPKTS); + return -1; + } + + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); + if (protohdr < 0) + goto inhdr_error; + + pkt->tprot_set = true; + pkt->tprot = protohdr; + pkt->xt.thoff = thoff; + pkt->xt.fragoff = frag_off; + + return 0; + +inhdr_error: + idev = __in6_dev_get(nft_in(pkt)); + __IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INHDRERRORS); + return -1; +#else + return -1; +#endif +} + #endif diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index ea7d1d78b92d..1d34fe154fe0 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -37,6 +37,7 @@ void nft_offload_update_dependency(struct nft_offload_ctx *ctx, struct nft_flow_key { struct flow_dissector_key_basic basic; + struct flow_dissector_key_control control; union { struct flow_dissector_key_ipv4_addrs ipv4; struct flow_dissector_key_ipv6_addrs ipv6; @@ -62,6 +63,9 @@ struct nft_flow_rule { #define NFT_OFFLOAD_F_ACTION (1 << 0) +void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, + enum flow_dissector_key_id addr_type); + struct nft_rule; struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule); void nft_flow_rule_destroy(struct nft_flow_rule *flow); @@ -74,6 +78,9 @@ int nft_flow_rule_offload_commit(struct net *net); offsetof(struct nft_flow_key, __base.__field); \ (__reg)->len = __len; \ (__reg)->key = __key; \ + +#define NFT_OFFLOAD_MATCH_EXACT(__key, __base, __field, __len, __reg) \ + NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ memset(&(__reg)->mask, 0xff, (__reg)->len); int nft_chain_offload_priority(struct nft_base_chain *basechain); diff --git a/include/net/netlink.h b/include/net/netlink.h index 271620f6bc7f..1ceec518ab49 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -142,7 +142,7 @@ * Attribute Misc: * nla_memcpy(dest, nla, count) copy attribute into memory * nla_memcmp(nla, data, size) compare attribute with memory area - * nla_strlcpy(dst, nla, size) copy attribute to a sized string + * nla_strscpy(dst, nla, size) copy attribute to a sized string * nla_strcmp(nla, str) compare attribute with string * * Attribute Parsing: @@ -181,8 +181,6 @@ enum { NLA_S64, NLA_BITFIELD32, NLA_REJECT, - NLA_EXACT_LEN, - NLA_MIN_LEN, __NLA_TYPE_MAX, }; @@ -199,11 +197,12 @@ struct netlink_range_validation_signed { enum nla_policy_validation { NLA_VALIDATE_NONE, NLA_VALIDATE_RANGE, + NLA_VALIDATE_RANGE_WARN_TOO_LONG, NLA_VALIDATE_MIN, NLA_VALIDATE_MAX, + NLA_VALIDATE_MASK, NLA_VALIDATE_RANGE_PTR, NLA_VALIDATE_FUNCTION, - NLA_VALIDATE_WARN_TOO_LONG, }; /** @@ -222,7 +221,7 @@ enum nla_policy_validation { * NLA_NUL_STRING Maximum length of string (excluding NUL) * NLA_FLAG Unused * NLA_BINARY Maximum length of attribute payload - * NLA_MIN_LEN Minimum length of attribute payload + * (but see also below with the validation type) * NLA_NESTED, * NLA_NESTED_ARRAY Length verification is done by checking len of * nested header (or empty); len field is used if @@ -237,11 +236,6 @@ enum nla_policy_validation { * just like "All other" * NLA_BITFIELD32 Unused * NLA_REJECT Unused - * NLA_EXACT_LEN Attribute should have exactly this length, otherwise - * it is rejected or warned about, the latter happening - * if and only if the `validation_type' is set to - * NLA_VALIDATE_WARN_TOO_LONG. - * NLA_MIN_LEN Minimum length of attribute payload * All other Minimum length of attribute payload * * Meaning of validation union: @@ -296,6 +290,11 @@ enum nla_policy_validation { * pointer to a struct netlink_range_validation_signed * that indicates the min/max values. * Use NLA_POLICY_FULL_RANGE_SIGNED(). + * + * NLA_BINARY If the validation type is like the ones for integers + * above, then the min/max length (not value like for + * integers) of the attribute is enforced. + * * All other Unused - but note that it's a union * * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN: @@ -309,7 +308,7 @@ enum nla_policy_validation { * static const struct nla_policy my_policy[ATTR_MAX+1] = { * [ATTR_FOO] = { .type = NLA_U16 }, * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ }, - * [ATTR_BAZ] = { .type = NLA_EXACT_LEN, .len = sizeof(struct mystruct) }, + * [ATTR_BAZ] = NLA_POLICY_EXACT_LEN(sizeof(struct mystruct)), * [ATTR_GOO] = NLA_POLICY_BITFIELD32(myvalidflags), * }; */ @@ -319,6 +318,7 @@ struct nla_policy { u16 len; union { const u32 bitfield32_valid; + const u32 mask; const char *reject_message; const struct nla_policy *nested_policy; struct netlink_range_validation *range; @@ -335,9 +335,10 @@ struct nla_policy { * nesting validation starts here. * * Additionally, it means that NLA_UNSPEC is actually NLA_REJECT - * for any types >= this, so need to use NLA_MIN_LEN to get the - * previous pure { .len = xyz } behaviour. The advantage of this - * is that types not specified in the policy will be rejected. + * for any types >= this, so need to use NLA_POLICY_MIN_LEN() to + * get the previous pure { .len = xyz } behaviour. The advantage + * of this is that types not specified in the policy will be + * rejected. * * For completely new families it should be set to 1 so that the * validation is enforced for all attributes. For existing ones @@ -349,12 +350,6 @@ struct nla_policy { }; }; -#define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_EXACT_LEN, .len = _len } -#define NLA_POLICY_EXACT_LEN_WARN(_len) \ - { .type = NLA_EXACT_LEN, .len = _len, \ - .validation_type = NLA_VALIDATE_WARN_TOO_LONG, } -#define NLA_POLICY_MIN_LEN(_len) { .type = NLA_MIN_LEN, .len = _len } - #define NLA_POLICY_ETH_ADDR NLA_POLICY_EXACT_LEN(ETH_ALEN) #define NLA_POLICY_ETH_ADDR_COMPAT NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN) @@ -369,20 +364,25 @@ struct nla_policy { #define NLA_POLICY_BITFIELD32(valid) \ { .type = NLA_BITFIELD32, .bitfield32_valid = valid } +#define __NLA_IS_UINT_TYPE(tp) \ + (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || tp == NLA_U64) +#define __NLA_IS_SINT_TYPE(tp) \ + (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64) + #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) #define NLA_ENSURE_UINT_TYPE(tp) \ - (__NLA_ENSURE(tp == NLA_U8 || tp == NLA_U16 || \ - tp == NLA_U32 || tp == NLA_U64 || \ - tp == NLA_MSECS) + tp) + (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp)) + tp) +#define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp) \ + (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) || \ + tp == NLA_MSECS || \ + tp == NLA_BINARY) + tp) #define NLA_ENSURE_SINT_TYPE(tp) \ - (__NLA_ENSURE(tp == NLA_S8 || tp == NLA_S16 || \ - tp == NLA_S32 || tp == NLA_S64) + tp) -#define NLA_ENSURE_INT_TYPE(tp) \ - (__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 || \ - tp == NLA_S16 || tp == NLA_U16 || \ - tp == NLA_S32 || tp == NLA_U32 || \ - tp == NLA_S64 || tp == NLA_U64 || \ - tp == NLA_MSECS) + tp) + (__NLA_ENSURE(__NLA_IS_SINT_TYPE(tp)) + tp) +#define NLA_ENSURE_INT_OR_BINARY_TYPE(tp) \ + (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) || \ + __NLA_IS_SINT_TYPE(tp) || \ + tp == NLA_MSECS || \ + tp == NLA_BINARY) + tp) #define NLA_ENSURE_NO_VALIDATION_PTR(tp) \ (__NLA_ENSURE(tp != NLA_BITFIELD32 && \ tp != NLA_REJECT && \ @@ -390,14 +390,14 @@ struct nla_policy { tp != NLA_NESTED_ARRAY) + tp) #define NLA_POLICY_RANGE(tp, _min, _max) { \ - .type = NLA_ENSURE_INT_TYPE(tp), \ + .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_RANGE, \ .min = _min, \ .max = _max \ } #define NLA_POLICY_FULL_RANGE(tp, _range) { \ - .type = NLA_ENSURE_UINT_TYPE(tp), \ + .type = NLA_ENSURE_UINT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_RANGE_PTR, \ .range = _range, \ } @@ -409,17 +409,23 @@ struct nla_policy { } #define NLA_POLICY_MIN(tp, _min) { \ - .type = NLA_ENSURE_INT_TYPE(tp), \ + .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_MIN, \ .min = _min, \ } #define NLA_POLICY_MAX(tp, _max) { \ - .type = NLA_ENSURE_INT_TYPE(tp), \ + .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_MAX, \ .max = _max, \ } +#define NLA_POLICY_MASK(tp, _mask) { \ + .type = NLA_ENSURE_UINT_TYPE(tp), \ + .validation_type = NLA_VALIDATE_MASK, \ + .mask = _mask, \ +} + #define NLA_POLICY_VALIDATE_FN(tp, fn, ...) { \ .type = NLA_ENSURE_NO_VALIDATION_PTR(tp), \ .validation_type = NLA_VALIDATE_FUNCTION, \ @@ -427,6 +433,15 @@ struct nla_policy { .len = __VA_ARGS__ + 0, \ } +#define NLA_POLICY_EXACT_LEN(_len) NLA_POLICY_RANGE(NLA_BINARY, _len, _len) +#define NLA_POLICY_EXACT_LEN_WARN(_len) { \ + .type = NLA_BINARY, \ + .validation_type = NLA_VALIDATE_RANGE_WARN_TOO_LONG, \ + .min = _len, \ + .max = _len \ +} +#define NLA_POLICY_MIN_LEN(_len) NLA_POLICY_MIN(NLA_BINARY, _len) + /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request @@ -491,7 +506,7 @@ int __nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, struct netlink_ext_ack *extack); int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); -size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); +ssize_t nla_strscpy(char *dst, const struct nlattr *nla, size_t dstsize); char *nla_strdup(const struct nlattr *nla, gfp_t flags); int nla_memcpy(void *dest, const struct nlattr *src, int count); int nla_memcmp(const struct nlattr *nla, const void *data, size_t size); @@ -1931,11 +1946,21 @@ void nla_get_range_unsigned(const struct nla_policy *pt, void nla_get_range_signed(const struct nla_policy *pt, struct netlink_range_validation_signed *range); -int netlink_policy_dump_start(const struct nla_policy *policy, - unsigned int maxtype, - unsigned long *state); -bool netlink_policy_dump_loop(unsigned long state); -int netlink_policy_dump_write(struct sk_buff *skb, unsigned long state); -void netlink_policy_dump_free(unsigned long state); +struct netlink_policy_dump_state; + +int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate, + const struct nla_policy *policy, + unsigned int maxtype); +int netlink_policy_dump_get_policy_idx(struct netlink_policy_dump_state *state, + const struct nla_policy *policy, + unsigned int maxtype); +bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state); +int netlink_policy_dump_write(struct sk_buff *skb, + struct netlink_policy_dump_state *state); +int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt); +int netlink_policy_dump_write_attr(struct sk_buff *skb, + const struct nla_policy *pt, + int nestattr); +void netlink_policy_dump_free(struct netlink_policy_dump_state *state); #endif diff --git a/include/net/netns/can.h b/include/net/netns/can.h index b6ab7d1530d7..52fbd8291a96 100644 --- a/include/net/netns/can.h +++ b/include/net/netns/can.h @@ -15,7 +15,6 @@ struct can_rcv_lists_stats; struct netns_can { #if IS_ENABLED(CONFIG_PROC_FS) struct proc_dir_entry *proc_dir; - struct proc_dir_entry *pde_version; struct proc_dir_entry *pde_stats; struct proc_dir_entry *pde_reset_stats; struct proc_dir_entry *pde_rcvlist_all; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9e36738c1fe1..8e4fcac4df72 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -183,6 +183,7 @@ struct netns_ipv4 { unsigned int sysctl_tcp_fastopen_blackhole_timeout; atomic_t tfo_active_disable_times; unsigned long tfo_active_disable_stamp; + int sysctl_tcp_reflect_tos; int sysctl_udp_wmem_min; int sysctl_udp_rmem_min; diff --git a/include/net/netns/nexthop.h b/include/net/netns/nexthop.h index 1937476c94a0..1849e77eb68a 100644 --- a/include/net/netns/nexthop.h +++ b/include/net/netns/nexthop.h @@ -14,6 +14,6 @@ struct netns_nexthop { unsigned int seq; /* protected by rtnl_mutex */ u32 last_id_allocated; - struct atomic_notifier_head notifier_chain; + struct blocking_notifier_head notifier_chain; }; #endif diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index d8d02e4188d1..a0f315effa94 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -22,6 +22,14 @@ struct netns_sctp { */ struct sock *ctl_sock; + /* UDP tunneling listening sock. */ + struct sock *udp4_sock; + struct sock *udp6_sock; + /* UDP tunneling listening port. */ + int udp_port; + /* UDP tunneling remote encap port. */ + int encap_port; + /* This is the global local address list. * We actively maintain this complete list of addresses on * the system by catching address add/delete events. diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 3a4f9e3b91a5..226930d66b63 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -105,15 +105,49 @@ struct nexthop { }; enum nexthop_event_type { - NEXTHOP_EVENT_ADD, - NEXTHOP_EVENT_DEL + NEXTHOP_EVENT_DEL, + NEXTHOP_EVENT_REPLACE, }; -int call_nexthop_notifier(struct notifier_block *nb, struct net *net, - enum nexthop_event_type event_type, - struct nexthop *nh); -int register_nexthop_notifier(struct net *net, struct notifier_block *nb); +struct nh_notifier_single_info { + struct net_device *dev; + u8 gw_family; + union { + __be32 ipv4; + struct in6_addr ipv6; + }; + u8 is_reject:1, + is_fdb:1, + has_encap:1; +}; + +struct nh_notifier_grp_entry_info { + u8 weight; + u32 id; + struct nh_notifier_single_info nh; +}; + +struct nh_notifier_grp_info { + u16 num_nh; + bool is_fdb; + struct nh_notifier_grp_entry_info nh_entries[]; +}; + +struct nh_notifier_info { + struct net *net; + struct netlink_ext_ack *extack; + u32 id; + bool is_grp; + union { + struct nh_notifier_single_info *nh; + struct nh_notifier_grp_info *nh_grp; + }; +}; + +int register_nexthop_notifier(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); +void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h index 0550e0380b8d..e82f55f543bb 100644 --- a/include/net/nfc/nci.h +++ b/include/net/nfc/nci.h @@ -25,6 +25,8 @@ #define NCI_MAX_PARAM_LEN 251 #define NCI_MAX_PAYLOAD_SIZE 255 #define NCI_MAX_PACKET_SIZE 258 +#define NCI_MAX_LARGE_PARAMS_NCI_v2 15 +#define NCI_VER_2_MASK 0x20 /* NCI Status Codes */ #define NCI_STATUS_OK 0x00 @@ -131,6 +133,9 @@ #define NCI_LF_CON_BITR_F_212 0x02 #define NCI_LF_CON_BITR_F_424 0x04 +/* NCI 2.x Feature Enable Bit */ +#define NCI_FEATURE_DISABLE 0x00 + /* NCI Reset types */ #define NCI_RESET_TYPE_KEEP_CONFIG 0x00 #define NCI_RESET_TYPE_RESET_CONFIG 0x01 @@ -220,6 +225,11 @@ struct nci_core_reset_cmd { } __packed; #define NCI_OP_CORE_INIT_CMD nci_opcode_pack(NCI_GID_CORE, 0x01) +/* To support NCI 2.x */ +struct nci_core_init_v2_cmd { + u8 feature1; + u8 feature2; +}; #define NCI_OP_CORE_SET_CONFIG_CMD nci_opcode_pack(NCI_GID_CORE, 0x02) struct set_config_param { @@ -334,6 +344,20 @@ struct nci_core_init_rsp_2 { __le32 manufact_specific_info; } __packed; +/* To support NCI ver 2.x */ +struct nci_core_init_rsp_nci_ver2 { + u8 status; + __le32 nfcc_features; + u8 max_logical_connections; + __le16 max_routing_table_size; + u8 max_ctrl_pkt_payload_len; + u8 max_data_pkt_hci_payload_len; + u8 number_of_hci_credit; + __le16 max_nfc_v_frame_size; + u8 num_supported_rf_interfaces; + u8 supported_rf_interfaces[]; +} __packed; + #define NCI_OP_CORE_SET_CONFIG_RSP nci_opcode_pack(NCI_GID_CORE, 0x02) struct nci_core_set_config_rsp { __u8 status; @@ -372,6 +396,16 @@ struct nci_nfcee_discover_rsp { /* --------------------------- */ /* ---- NCI Notifications ---- */ /* --------------------------- */ +#define NCI_OP_CORE_RESET_NTF nci_opcode_pack(NCI_GID_CORE, 0x00) +struct nci_core_reset_ntf { + u8 reset_trigger; + u8 config_status; + u8 nci_ver; + u8 manufact_id; + u8 manufacturer_specific_len; + __le32 manufact_specific_info; +} __packed; + #define NCI_OP_CORE_CONN_CREDITS_NTF nci_opcode_pack(NCI_GID_CORE, 0x06) struct conn_credit_entry { __u8 conn_id; diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 81d7773f96cd..b5b195305346 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -152,6 +152,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); void page_pool_release_page(struct page_pool *pool, struct page *page); +void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count); #else static inline void page_pool_destroy(struct page_pool *pool) { @@ -165,6 +167,11 @@ static inline void page_pool_release_page(struct page_pool *pool, struct page *page) { } + +static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count) +{ +} #endif void page_pool_put_page(struct page_pool *pool, struct page *page, @@ -215,4 +222,23 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) if (unlikely(pool->p.nid != new_nid)) page_pool_update_nid(pool, new_nid); } + +static inline void page_pool_ring_lock(struct page_pool *pool) + __acquires(&pool->ring.producer_lock) +{ + if (in_serving_softirq()) + spin_lock(&pool->ring.producer_lock); + else + spin_lock_bh(&pool->ring.producer_lock); +} + +static inline void page_pool_ring_unlock(struct page_pool *pool) + __releases(&pool->ring.producer_lock) +{ + if (in_serving_softirq()) + spin_unlock(&pool->ring.producer_lock); + else + spin_unlock_bh(&pool->ring.producer_lock); +} + #endif /* _NET_PAGE_POOL_H */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index d4d461236351..0f2a9c44171c 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -48,7 +48,7 @@ void tcf_chain_put_by_act(struct tcf_chain *chain); struct tcf_chain *tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain); struct tcf_proto *tcf_get_next_proto(struct tcf_chain *chain, - struct tcf_proto *tp, bool rtnl_held); + struct tcf_proto *tp); void tcf_block_netif_keep_dst(struct tcf_block *block); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, @@ -512,7 +512,7 @@ tcf_change_indev(struct net *net, struct nlattr *indev_tlv, char indev[IFNAMSIZ]; struct net_device *dev; - if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) { + if (nla_strscpy(indev, indev_tlv, IFNAMSIZ) < 0) { NL_SET_ERR_MSG_ATTR(extack, indev_tlv, "Interface name too long"); return -EINVAL; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index ac8c890a2657..15b1b30f454e 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -19,12 +19,14 @@ struct qdisc_walker { int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *); }; -#define QDISC_ALIGNTO 64 -#define QDISC_ALIGN(len) (((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1)) - static inline void *qdisc_priv(struct Qdisc *q) { - return (char *) q + QDISC_ALIGN(sizeof(struct Qdisc)); + return &q->privdata; +} + +static inline struct Qdisc *qdisc_from_priv(void *priv) +{ + return container_of(priv, struct Qdisc, privdata); } /* diff --git a/include/net/red.h b/include/net/red.h index fc455445f4b2..932f0d79d60c 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -168,12 +168,14 @@ static inline void red_set_vars(struct red_vars *v) v->qcount = -1; } -static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog) +static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog, u8 Scell_log) { if (fls(qth_min) + Wlog > 32) return false; if (fls(qth_max) + Wlog > 32) return false; + if (Scell_log >= 32) + return false; if (qth_max < qth_min) return false; return true; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index b2eb8b4ba697..29e41ff3ec93 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -41,6 +41,13 @@ struct request_sock_ops { int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req); +struct saved_syn { + u32 mac_hdrlen; + u32 network_hdrlen; + u32 tcp_hdrlen; + u8 data[]; +}; + /* struct request_sock - mini sock to represent a connection request */ struct request_sock { @@ -60,7 +67,7 @@ struct request_sock { struct timer_list rsk_timer; const struct request_sock_ops *rsk_ops; struct sock *sk; - u32 *saved_syn; + struct saved_syn *saved_syn; u32 secid; u32 peer_secid; }; diff --git a/include/net/route.h b/include/net/route.h index ff021cab657e..2e6c0e153e3a 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -165,7 +165,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi sk ? inet_sk_flowi_flags(sk) : 0, daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } @@ -322,7 +322,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, ip_rt_put(rt); flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr); } - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } @@ -338,7 +338,7 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable flowi4_update_output(fl4, sk->sk_bound_dev_if, RT_CONN_FLAGS(sk), fl4->daddr, fl4->saddr); - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(sock_net(sk), fl4, sk); } return rt; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d60e7c39d60c..5b490b5591df 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -91,7 +91,7 @@ struct Qdisc { struct net_rate_estimator __rcu *rate_est; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; - int padded; + int pad; refcount_t refcnt; /* @@ -112,6 +112,9 @@ struct Qdisc { /* for NOLOCK qdisc, true if there are no enqueued skbs */ bool empty; struct rcu_head rcu; + + /* private data */ + long privdata[] ____cacheline_aligned; }; static inline void qdisc_refcount_inc(struct Qdisc *qdisc) @@ -432,7 +435,6 @@ struct tcf_block { struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */ }; -#ifdef CONFIG_PROVE_LOCKING static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain) { return lockdep_is_held(&chain->filter_chain_lock); @@ -442,17 +444,6 @@ static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) { return lockdep_is_held(&tp->lock); } -#else -static inline bool lockdep_tcf_chain_is_locked(struct tcf_block *chain) -{ - return true; -} - -static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) -{ - return true; -} -#endif /* #ifdef CONFIG_PROVE_LOCKING */ #define tcf_chain_dereference(p, chain) \ rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain)) @@ -1047,12 +1038,6 @@ static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, return 0; } -static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch, - struct sk_buff **to_free) -{ - return __qdisc_queue_drop_head(sch, &sch->q, to_free); -} - static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) { const struct qdisc_skb_head *qh = &sch->q; @@ -1158,7 +1143,7 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, old = *pold; *pold = new; if (old != NULL) - qdisc_tree_flush_backlog(old); + qdisc_purge_queue(old); sch_tree_unlock(sch); return old; @@ -1284,9 +1269,6 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, struct tcf_block *block); -static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res) -{ - return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb); -} +int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); #endif diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 122d9e2d8dfd..14a0d22c9113 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -286,6 +286,8 @@ enum { SCTP_MAX_GABS = 16 }; * functions simpler to write. */ +#define SCTP_DEFAULT_UDP_PORT 9899 /* default UDP tunneling port */ + /* These are the values for pf exposure, UNUSED is to keep compatible with old * applications by default. */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 4fc747b778eb..86f74f2fe6de 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -84,6 +84,8 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *addr, struct sctp_pf *sctp_get_pf_specific(sa_family_t family); int sctp_register_pf(struct sctp_pf *, sa_family_t); void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); +int sctp_udp_sock_start(struct net *net); +void sctp_udp_sock_stop(struct net *net); /* * sctp/socket.c @@ -576,10 +578,13 @@ static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp, { __u32 overhead = sizeof(struct sctphdr) + extra; - if (sp) + if (sp) { overhead += sp->pf->af->net_header_len; - else + if (sp->udp_port) + overhead += sizeof(struct udphdr); + } else { overhead += sizeof(struct ipv6hdr); + } if (WARN_ON_ONCE(mtu && mtu <= overhead)) mtu = overhead; diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 5c491a3bc27e..fd223c94589a 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -221,6 +221,9 @@ struct sctp_chunk *sctp_make_violation_paramlen( struct sctp_chunk *sctp_make_violation_max_retrans( const struct sctp_association *asoc, const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_new_encap_port( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport); struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc, @@ -380,6 +383,7 @@ sctp_vtag_verify(const struct sctp_chunk *chunk, if (ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag) return 1; + chunk->transport->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port; return 0; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0bdff38eb4bb..1aa585216f34 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -178,6 +178,9 @@ struct sctp_sock { */ __u32 hbinterval; + __be16 udp_port; + __be16 encap_port; + /* This is the max_retrans value for new associations. */ __u16 pathmaxrxt; @@ -877,6 +880,8 @@ struct sctp_transport { */ unsigned long last_time_ecne_reduced; + __be16 encap_port; + /* This is the max_retrans value for the transport and will * be initialized from the assocs value. This can be changed * using the SCTP_SET_PEER_ADDR_PARAMS socket option. @@ -1117,13 +1122,14 @@ static inline void sctp_outq_cork(struct sctp_outq *q) */ struct sctp_input_cb { union { - struct inet_skb_parm h4; + struct inet_skb_parm h4; #if IS_ENABLED(CONFIG_IPV6) - struct inet6_skb_parm h6; + struct inet6_skb_parm h6; #endif } header; struct sctp_chunk *chunk; struct sctp_af *af; + __be16 encap_port; }; #define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) @@ -1790,6 +1796,8 @@ struct sctp_association { */ unsigned long hbinterval; + __be16 encap_port; + /* This is the max_retrans value for new transports in the * association. */ diff --git a/include/net/smc.h b/include/net/smc.h index 646feb4bc75f..e441aa97ad61 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -37,6 +37,8 @@ struct smcd_dmb { #define ISM_EVENT_GID 1 #define ISM_EVENT_SWR 2 +#define ISM_RESERVED_VLANID 0x1FFF + #define ISM_ERROR 0xFFFF struct smcd_event { @@ -63,6 +65,8 @@ struct smcd_ops { int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); + void (*get_system_eid)(struct smcd_dev *dev, u8 **eid); + u16 (*get_chid)(struct smcd_dev *dev); }; struct smcd_dev { diff --git a/include/net/sock.h b/include/net/sock.h index 064637d1ddf6..129d200bccb4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -60,7 +60,7 @@ #include <linux/rculist_nulls.h> #include <linux/poll.h> #include <linux/sockptr.h> - +#include <linux/indirect_call_wrapper.h> #include <linux/atomic.h> #include <linux/refcount.h> #include <net/dst.h> @@ -246,7 +246,7 @@ struct sock_common { /* public: */ }; -struct bpf_sk_storage; +struct bpf_local_storage; /** * struct sock - network layer representation of sockets @@ -301,6 +301,8 @@ struct bpf_sk_storage; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_uid: user id of owner + * @sk_prefer_busy_poll: prefer busypolling over softirq processing + * @sk_busy_poll_budget: napi processing budget when busypolling * @sk_priority: %SO_PRIORITY setting * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family @@ -479,6 +481,10 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; +#ifdef CONFIG_NET_RX_BUSY_POLL + u8 sk_prefer_busy_poll; + u16 sk_busy_poll_budget; +#endif struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; @@ -517,7 +523,7 @@ struct sock { void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; #ifdef CONFIG_BPF_SYSCALL - struct bpf_sk_storage __rcu *sk_bpf_storage; + struct bpf_local_storage __rcu *sk_bpf_storage; #endif struct rcu_head sk_rcu; }; @@ -845,7 +851,6 @@ enum sock_flags { SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */ SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */ SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ - SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ SOCK_MEMALLOC, /* VM depends on this socket for swapping */ SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */ SOCK_FASYNC, /* fasync() active */ @@ -1265,13 +1270,22 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ +INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake)); + static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) return false; +#ifdef CONFIG_INET + return sk->sk_prot->stream_memory_free ? + INDIRECT_CALL_1(sk->sk_prot->stream_memory_free, + tcp_stream_memory_free, + sk, wake) : true; +#else return sk->sk_prot->stream_memory_free ? sk->sk_prot->stream_memory_free(sk, wake) : true; +#endif } static inline bool sk_stream_memory_free(const struct sock *sk) @@ -1478,7 +1492,7 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) { if (!sk_has_account(sk)) return true; - return size<= sk->sk_forward_alloc || + return size <= sk->sk_forward_alloc || __sk_mem_schedule(sk, size, SK_MEM_RECV) || skb_pfmemalloc(skb); } @@ -1526,7 +1540,6 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { - sock_set_flag(sk, SOCK_QUEUE_SHRUNK); sk_wmem_queued_add(sk, -skb->truesize); sk_mem_uncharge(sk, skb->truesize); if (static_branch_unlikely(&tcp_tx_skb_cache_key) && @@ -1568,13 +1581,11 @@ do { \ lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ } while (0) -#ifdef CONFIG_LOCKDEP static inline bool lockdep_sock_is_held(const struct sock *sk) { return lockdep_is_held(&sk->sk_lock) || lockdep_is_held(&sk->sk_lock.slock); } -#endif void lock_sock_nested(struct sock *sk, int subclass); @@ -1583,6 +1594,7 @@ static inline void lock_sock(struct sock *sk) lock_sock_nested(sk, 0); } +void __lock_sock(struct sock *sk); void __release_sock(struct sock *sk); void release_sock(struct sock *sk); @@ -1593,7 +1605,8 @@ void release_sock(struct sock *sk); SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -bool lock_sock_fast(struct sock *sk); +bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock); + /** * unlock_sock_fast - complement of lock_sock_fast * @sk: socket @@ -1603,11 +1616,14 @@ bool lock_sock_fast(struct sock *sk); * If slow mode is on, we call regular release_sock() */ static inline void unlock_sock_fast(struct sock *sk, bool slow) + __releases(&sk->sk_lock.slock) { - if (slow) + if (slow) { release_sock(sk); - else + __release(&sk->sk_lock.slock); + } else { spin_unlock_bh(&sk->sk_lock.slock); + } } /* Used by processes to "lock" a socket state, so that @@ -1905,10 +1921,13 @@ static inline void sk_set_txhash(struct sock *sk) sk->sk_txhash = net_tx_rndhash(); } -static inline void sk_rethink_txhash(struct sock *sk) +static inline bool sk_rethink_txhash(struct sock *sk) { - if (sk->sk_txhash) + if (sk->sk_txhash) { sk_set_txhash(sk); + return true; + } + return false; } static inline struct dst_entry * @@ -1931,12 +1950,10 @@ sk_dst_get(struct sock *sk) return dst; } -static inline void dst_negative_advice(struct sock *sk) +static inline void __dst_negative_advice(struct sock *sk) { struct dst_entry *ndst, *dst = __sk_dst_get(sk); - sk_rethink_txhash(sk); - if (dst && dst->ops->negative_advice) { ndst = dst->ops->negative_advice(dst); @@ -1948,6 +1965,12 @@ static inline void dst_negative_advice(struct sock *sk) } } +static inline void dst_negative_advice(struct sock *sk) +{ + sk_rethink_txhash(sk); + __dst_negative_advice(sk); +} + static inline void __sk_dst_set(struct sock *sk, struct dst_entry *dst) { @@ -2197,6 +2220,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); +void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer); + int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, struct sk_buff *skb, unsigned int flags, void (*destructor)(struct sock *sk, diff --git a/include/net/switchdev.h b/include/net/switchdev.h index ff2246914301..afdf8bd1b4fe 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -38,10 +38,10 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_PORT_MROUTER, SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, + SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL, SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, #if IS_ENABLED(CONFIG_BRIDGE_MRP) - SWITCHDEV_ATTR_ID_MRP_PORT_STATE, SWITCHDEV_ATTR_ID_MRP_PORT_ROLE, #endif }; @@ -58,9 +58,9 @@ struct switchdev_attr { bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ + u16 vlan_protocol; /* BRIDGE_VLAN_PROTOCOL */ bool mc_disabled; /* MC_DISABLED */ #if IS_ENABLED(CONFIG_BRIDGE_MRP) - u8 mrp_port_state; /* MRP_PORT_STATE */ u8 mrp_port_role; /* MRP_PORT_ROLE */ #endif } u; @@ -203,6 +203,7 @@ enum switchdev_notifier_type { SWITCHDEV_FDB_ADD_TO_DEVICE, SWITCHDEV_FDB_DEL_TO_DEVICE, SWITCHDEV_FDB_OFFLOADED, + SWITCHDEV_FDB_FLUSH_TO_BRIDGE, SWITCHDEV_PORT_OBJ_ADD, /* Blocking. */ SWITCHDEV_PORT_OBJ_DEL, /* Blocking. */ diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index e1057b255f69..879fe8cff581 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -56,7 +56,10 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT struct tcf_tunnel_key *t = to_tunnel_key(a); - struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + struct tcf_tunnel_key_params *params; + + params = rcu_dereference_protected(t->params, + lockdep_is_held(&a->tcfa_lock)); return ¶ms->tcft_enc_metadata->u.tun_info; #else diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 4e2502408c31..f051046ba034 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -11,6 +11,8 @@ struct tcf_vlan_params { int tcfv_action; + unsigned char tcfv_push_dst[ETH_ALEN]; + unsigned char tcfv_push_src[ETH_ALEN]; u16 tcfv_push_vid; __be16 tcfv_push_proto; u8 tcfv_push_prio; diff --git a/include/net/tcp.h b/include/net/tcp.h index eab6c7510b5b..25bbada379c4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -322,6 +322,7 @@ void tcp_shutdown(struct sock *sk, int how); int tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); +void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); @@ -329,6 +330,8 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags); +struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, + struct page *page, int offset, size_t *size); ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tcp_send_mss(struct sock *sk, int *size_goal, int flags); @@ -386,15 +389,16 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); -void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag); +void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag); void tcp_clear_retrans(struct tcp_sock *tp); void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); +void __tcp_close(struct sock *sk, long timeout); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); -void tcp_init_transfer(struct sock *sk, int bpf_op); +void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb); __poll_t tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, @@ -406,6 +410,7 @@ void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); int tcp_set_rcvlowat(struct sock *sk, int val); +int tcp_set_window_clamp(struct sock *sk, int val); void tcp_data_ready(struct sock *sk); #ifdef CONFIG_MMU int tcp_mmap(struct file *file, struct socket *sock, @@ -455,7 +460,8 @@ enum tcp_synack_type { struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc, - enum tcp_synack_type synack_type); + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb); int tcp_disconnect(struct sock *sk, int flags); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); @@ -605,7 +611,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, /* tcp_input.c */ void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); -void tcp_reset(struct sock *sk); +void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); void tcp_fin(struct sock *sk); @@ -624,6 +630,7 @@ static inline void tcp_clear_xmit_timers(struct sock *sk) unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); unsigned int tcp_current_mss(struct sock *sk); +u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when); /* Bound MSS / TSO packet size with the half of the window */ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) @@ -699,7 +706,7 @@ static inline void tcp_fast_path_check(struct sock *sk) static inline u32 tcp_rto_min(struct sock *sk) { const struct dst_entry *dst = __sk_dst_get(sk); - u32 rto_min = TCP_RTO_MIN; + u32 rto_min = inet_csk(sk)->icsk_rto_min; if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); @@ -941,16 +948,6 @@ INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb)); #endif -static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) -{ -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - if (!net->ipv4.sysctl_tcp_l3mdev_accept && - skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) - return true; -#endif - return false; -} - /* TCP_SKB_CB reference means this can not be used from early demux */ static inline int tcp_v4_sdif(struct sk_buff *skb) { @@ -1113,7 +1110,7 @@ void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, - bool reinit, bool cap_net_admin); + bool cap_net_admin); u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); @@ -1330,7 +1327,9 @@ static inline unsigned long tcp_probe0_base(const struct sock *sk) static inline unsigned long tcp_probe0_when(const struct sock *sk, unsigned long max_when) { - u64 when = (u64)tcp_probe0_base(sk) << inet_csk(sk)->icsk_backoff; + u8 backoff = min_t(u8, ilog2(TCP_RTO_MAX / TCP_RTO_MIN) + 1, + inet_csk(sk)->icsk_backoff); + u64 when = (u64)tcp_probe0_base(sk) << backoff; return (unsigned long)min_t(u64, when, max_when); } @@ -1423,6 +1422,8 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } +void tcp_cleanup_rbuf(struct sock *sk, int copied); + /* We provision sk_rcvbuf around 200% of sk_rcvlowat. * If 87.5 % (7/8) of the space has been consumed, we want to override * SO_RCVLOWAT constraint, since we are receiving skbs with too small @@ -1972,18 +1973,7 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; } -/* @wake is one when sk_stream_write_space() calls us. - * This sends EPOLLOUT only if notsent_bytes is half the limit. - * This mimics the strategy used in sock_def_write_space(). - */ -static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) -{ - const struct tcp_sock *tp = tcp_sk(sk); - u32 notsent_bytes = READ_ONCE(tp->write_seq) - - READ_ONCE(tp->snd_nxt); - - return (notsent_bytes << wake) < tcp_notsent_lowat(tp); -} +bool tcp_stream_memory_free(const struct sock *sk, int wake); #ifdef CONFIG_PROC_FS int tcp4_proc_init(void); @@ -2021,21 +2011,21 @@ struct tcp_request_sock_ops { const struct sock *sk, const struct sk_buff *skb); #endif - void (*init_req)(struct request_sock *req, - const struct sock *sk_listener, - struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES __u32 (*cookie_init_seq)(const struct sk_buff *skb, __u16 *mss); #endif - struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, - const struct request_sock *req); + struct dst_entry *(*route_req)(const struct sock *sk, + struct sk_buff *skb, + struct flowi *fl, + struct request_sock *req); u32 (*init_seq)(const struct sk_buff *skb); u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, - enum tcp_synack_type synack_type); + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb); }; extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops; @@ -2071,7 +2061,7 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb); void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced); extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd); -extern void tcp_rack_mark_lost(struct sock *sk); +extern bool tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); @@ -2233,6 +2223,22 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); #endif /* CONFIG_NET_SOCK_MSG */ +#ifdef CONFIG_CGROUP_BPF +static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, + struct sk_buff *skb, + unsigned int end_offset) +{ + skops->skb = skb; + skops->skb_data_end = skb->data + end_offset; +} +#else +static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, + struct sk_buff *skb, + unsigned int end_offset) +{ +} +#endif + /* Call BPF_SOCK_OPS program that returns an int. If the return value * is < 0, then the BPF op failed (for example if the loaded BPF * program does not support the chosen operation or there is no BPF diff --git a/include/net/tls.h b/include/net/tls.h index e5dac7e74e79..3eccb525e8f7 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -199,6 +199,12 @@ enum tls_context_flags { * to be atomic. */ TLS_TX_SYNC_SCHED = 1, + /* tls_dev_del was called for the RX side, device state was released, + * but tls_ctx->netdev might still be kept, because TX-side driver + * resources might not be released yet. Used to prevent the second + * tls_dev_del call in tls_device_down if it happens simultaneously. + */ + TLS_RX_DEV_CLOSED = 2, }; struct cipher_context { @@ -211,6 +217,7 @@ union tls_crypto_context { union { struct tls12_crypto_info_aes_gcm_128 aes_gcm_128; struct tls12_crypto_info_aes_gcm_256 aes_gcm_256; + struct tls12_crypto_info_chacha20_poly1305 chacha20_poly1305; }; }; @@ -300,7 +307,8 @@ enum tls_offload_sync_type { #define TLS_DEVICE_RESYNC_ASYNC_LOGMAX 13 struct tls_offload_resync_async { atomic64_t req; - u32 loglen; + u16 loglen; + u16 rcd_delta; u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX]; }; @@ -471,6 +479,18 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len) return (i == -1); } +static inline void tls_bigint_subtract(unsigned char *seq, int n) +{ + u64 rcd_sn; + __be64 *p; + + BUILD_BUG_ON(TLS_MAX_REC_SEQ_SIZE != 8); + + p = (__be64 *)seq; + rcd_sn = be64_to_cpu(*p); + *p = cpu_to_be64(rcd_sn - n); +} + static inline struct tls_context *tls_get_ctx(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -488,32 +508,33 @@ static inline void tls_advance_record_sn(struct sock *sk, if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) tls_err_abort(sk, EBADMSG); - if (prot->version != TLS_1_3_VERSION) - tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + if (prot->version != TLS_1_3_VERSION && + prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305) + tls_bigint_increment(ctx->iv + prot->salt_size, prot->iv_size); } static inline void tls_fill_prepend(struct tls_context *ctx, char *buf, size_t plaintext_len, - unsigned char record_type, - int version) + unsigned char record_type) { struct tls_prot_info *prot = &ctx->prot_info; size_t pkt_len, iv_size = prot->iv_size; pkt_len = plaintext_len + prot->tag_size; - if (version != TLS_1_3_VERSION) { + if (prot->version != TLS_1_3_VERSION && + prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305) { pkt_len += iv_size; memcpy(buf + TLS_NONCE_OFFSET, - ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size); + ctx->tx.iv + prot->salt_size, iv_size); } /* we cover nonce explicit here as well, so buf should be of * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE */ - buf[0] = version == TLS_1_3_VERSION ? + buf[0] = prot->version == TLS_1_3_VERSION ? TLS_RECORD_TYPE_DATA : record_type; /* Note that VERSION must be TLS_1_2 for both TLS1.2 and TLS1.3 */ buf[1] = TLS_1_2_VERSION_MINOR; @@ -526,18 +547,17 @@ static inline void tls_fill_prepend(struct tls_context *ctx, static inline void tls_make_aad(char *buf, size_t size, char *record_sequence, - int record_sequence_size, unsigned char record_type, - int version) + struct tls_prot_info *prot) { - if (version != TLS_1_3_VERSION) { - memcpy(buf, record_sequence, record_sequence_size); + if (prot->version != TLS_1_3_VERSION) { + memcpy(buf, record_sequence, prot->rec_seq_size); buf += 8; } else { - size += TLS_CIPHER_AES_GCM_128_TAG_SIZE; + size += prot->tag_size; } - buf[0] = version == TLS_1_3_VERSION ? + buf[0] = prot->version == TLS_1_3_VERSION ? TLS_RECORD_TYPE_DATA : record_type; buf[1] = TLS_1_2_VERSION_MAJOR; buf[2] = TLS_1_2_VERSION_MINOR; @@ -545,11 +565,12 @@ static inline void tls_make_aad(char *buf, buf[4] = size & 0xFF; } -static inline void xor_iv_with_seq(int version, char *iv, char *seq) +static inline void xor_iv_with_seq(struct tls_prot_info *prot, char *iv, char *seq) { int i; - if (version == TLS_1_3_VERSION) { + if (prot->version == TLS_1_3_VERSION || + prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) { for (i = 0; i < 8; i++) iv[i + 4] ^= seq[i]; } @@ -639,6 +660,7 @@ tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len) atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) | ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); rx_ctx->resync_async->loglen = 0; + rx_ctx->resync_async->rcd_delta = 0; } static inline void @@ -679,10 +701,6 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout); struct sk_buff *tls_encrypt_skb(struct sk_buff *skb); -struct sk_buff *tls_validate_xmit_skb(struct sock *sk, - struct net_device *dev, - struct sk_buff *skb); - int tls_sw_fallback_init(struct sock *sk, struct tls_offload_context_tx *offload_ctx, struct tls_crypto_info *crypto_info); diff --git a/include/net/udp.h b/include/net/udp.h index 295d52a73598..01351ba25b87 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -164,7 +164,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr); } -typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport, +typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, @@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, - netdev_features_t features); + netdev_features_t features, bool is_ipv6); static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) { @@ -313,7 +313,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); -struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, +struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -324,7 +324,7 @@ struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); -struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, +struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); /* UDP uses skb->dev_scratch to cache as much information as possible and avoid diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 94bb7a882250..2ea453dac876 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -200,11 +200,27 @@ enum udp_tunnel_nic_info_flags { UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3), }; +struct udp_tunnel_nic; + +#define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES (U16_MAX / 2) + +struct udp_tunnel_nic_shared { + struct udp_tunnel_nic *udp_tunnel_nic_info; + + struct list_head devices; +}; + +struct udp_tunnel_nic_shared_node { + struct net_device *dev; + struct list_head list; +}; + /** * struct udp_tunnel_nic_info - driver UDP tunnel offload information * @set_port: callback for adding a new port * @unset_port: callback for removing a port * @sync_table: callback for syncing the entire port table at once + * @shared: reference to device global state (optional) * @flags: device flags from enum udp_tunnel_nic_info_flags * @tables: UDP port tables this device has * @tables.n_entries: number of entries in this table @@ -213,6 +229,12 @@ enum udp_tunnel_nic_info_flags { * Drivers are expected to provide either @set_port and @unset_port callbacks * or the @sync_table callback. Callbacks are invoked with rtnl lock held. * + * Devices which (misguidedly) share the UDP tunnel port table across multiple + * netdevs should allocate an instance of struct udp_tunnel_nic_shared and + * point @shared at it. + * There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices + * sharing a table. + * * Known limitations: * - UDP tunnel port notifications are fundamentally best-effort - * it is likely the driver will both see skbs which use a UDP tunnel port, @@ -234,6 +256,8 @@ struct udp_tunnel_nic_info { /* all at once */ int (*sync_table)(struct net_device *dev, unsigned int table); + struct udp_tunnel_nic_shared *shared; + unsigned int flags; struct udp_tunnel_nic_table_info { diff --git a/include/net/wimax.h b/include/net/wimax.h deleted file mode 100644 index f6e31d2f47aa..000000000000 --- a/include/net/wimax.h +++ /dev/null @@ -1,503 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Linux WiMAX - * Kernel space API for accessing WiMAX devices - * - * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com> - * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> - * - * The WiMAX stack provides an API for controlling and managing the - * system's WiMAX devices. This API affects the control plane; the - * data plane is accessed via the network stack (netdev). - * - * Parts of the WiMAX stack API and notifications are exported to - * user space via Generic Netlink. In user space, libwimax (part of - * the wimax-tools package) provides a shim layer for accessing those - * calls. - * - * The API is standarized for all WiMAX devices and different drivers - * implement the backend support for it. However, device-specific - * messaging pipes are provided that can be used to issue commands and - * receive notifications in free form. - * - * Currently the messaging pipes are the only means of control as it - * is not known (due to the lack of more devices in the market) what - * will be a good abstraction layer. Expect this to change as more - * devices show in the market. This API is designed to be growable in - * order to address this problem. - * - * USAGE - * - * Embed a `struct wimax_dev` at the beginning of the device's - * private structure, initialize and register it. For details, see - * `struct wimax_dev`s documentation. - * - * Once this is done, wimax-tools's libwimaxll can be used to - * communicate with the driver from user space. You user space - * application does not have to forcibily use libwimaxll and can talk - * the generic netlink protocol directly if desired. - * - * Remember this is a very low level API that will to provide all of - * WiMAX features. Other daemons and services running in user space - * are the expected clients of it. They offer a higher level API that - * applications should use (an example of this is the Intel's WiMAX - * Network Service for the i2400m). - * - * DESIGN - * - * Although not set on final stone, this very basic interface is - * mostly completed. Remember this is meant to grow as new common - * operations are decided upon. New operations will be added to the - * interface, intent being on keeping backwards compatibility as much - * as possible. - * - * This layer implements a set of calls to control a WiMAX device, - * exposing a frontend to the rest of the kernel and user space (via - * generic netlink) and a backend implementation in the driver through - * function pointers. - * - * WiMAX devices have a state, and a kernel-only API allows the - * drivers to manipulate that state. State transitions are atomic, and - * only some of them are allowed (see `enum wimax_st`). - * - * Most API calls will set the state automatically; in most cases - * drivers have to only report state changes due to external - * conditions. - * - * All API operations are 'atomic', serialized through a mutex in the - * `struct wimax_dev`. - * - * EXPORTING TO USER SPACE THROUGH GENERIC NETLINK - * - * The API is exported to user space using generic netlink (other - * methods can be added as needed). - * - * There is a Generic Netlink Family named "WiMAX", where interfaces - * supporting the WiMAX interface receive commands and broadcast their - * signals over a multicast group named "msg". - * - * Mapping to the source/destination interface is done by an interface - * index attribute. - * - * For user-to-kernel traffic (commands) we use a function call - * marshalling mechanism, where a message X with attributes A, B, C - * sent from user space to kernel space means executing the WiMAX API - * call wimax_X(A, B, C), sending the results back as a message. - * - * Kernel-to-user (notifications or signals) communication is sent - * over multicast groups. This allows to have multiple applications - * monitoring them. - * - * Each command/signal gets assigned it's own attribute policy. This - * way the validator will verify that all the attributes in there are - * only the ones that should be for each command/signal. Thing of an - * attribute mapping to a type+argumentname for each command/signal. - * - * If we had a single policy for *all* commands/signals, after running - * the validator we'd have to check "does this attribute belong in - * here"? for each one. It can be done manually, but it's just easier - * to have the validator do that job with multiple policies. As well, - * it makes it easier to later expand each command/signal signature - * without affecting others and keeping the namespace more or less - * sane. Not that it is too complicated, but it makes it even easier. - * - * No state information is maintained in the kernel for each user - * space connection (the connection is stateless). - * - * TESTING FOR THE INTERFACE AND VERSIONING - * - * If network interface X is a WiMAX device, there will be a Generic - * Netlink family named "WiMAX X" and the device will present a - * "wimax" directory in it's network sysfs directory - * (/sys/class/net/DEVICE/wimax) [used by HAL]. - * - * The inexistence of any of these means the device does not support - * this WiMAX API. - * - * By querying the generic netlink controller, versioning information - * and the multicast groups available can be found. Applications using - * the interface can either rely on that or use the generic netlink - * controller to figure out which generic netlink commands/signals are - * supported. - * - * NOTE: this versioning is a last resort to avoid hard - * incompatibilities. It is the intention of the design of this - * stack not to introduce backward incompatible changes. - * - * The version code has to fit in one byte (restrictions imposed by - * generic netlink); we use `version / 10` for the major version and - * `version % 10` for the minor. This gives 9 minors for each major - * and 25 majors. - * - * The version change protocol is as follow: - * - * - Major versions: needs to be increased if an existing message/API - * call is changed or removed. Doesn't need to be changed if a new - * message is added. - * - * - Minor version: needs to be increased if new messages/API calls are - * being added or some other consideration that doesn't impact the - * user-kernel interface too much (like some kind of bug fix) and - * that is kind of left up in the air to common sense. - * - * User space code should not try to work if the major version it was - * compiled for differs from what the kernel offers. As well, if the - * minor version of the kernel interface is lower than the one user - * space is expecting (the one it was compiled for), the kernel - * might be missing API calls; user space shall be ready to handle - * said condition. Use the generic netlink controller operations to - * find which ones are supported and which not. - * - * libwimaxll:wimaxll_open() takes care of checking versions. - * - * THE OPERATIONS: - * - * Each operation is defined in its on file (drivers/net/wimax/op-*.c) - * for clarity. The parts needed for an operation are: - * - * - a function pointer in `struct wimax_dev`: optional, as the - * operation might be implemented by the stack and not by the - * driver. - * - * All function pointers are named wimax_dev->op_*(), and drivers - * must implement them except where noted otherwise. - * - * - When exported to user space, a `struct nla_policy` to define the - * attributes of the generic netlink command and a `struct genl_ops` - * to define the operation. - * - * All the declarations for the operation codes (WIMAX_GNL_OP_<NAME>) - * and generic netlink attributes (WIMAX_GNL_<NAME>_*) are declared in - * include/linux/wimax.h; this file is intended to be cloned by user - * space to gain access to those declarations. - * - * A few caveats to remember: - * - * - Need to define attribute numbers starting in 1; otherwise it - * fails. - * - * - the `struct genl_family` requires a maximum attribute id; when - * defining the `struct nla_policy` for each message, it has to have - * an array size of WIMAX_GNL_ATTR_MAX+1. - * - * The op_*() function pointers will not be called if the wimax_dev is - * in a state <= %WIMAX_ST_UNINITIALIZED. The exception is: - * - * - op_reset: can be called at any time after wimax_dev_add() has - * been called. - * - * THE PIPE INTERFACE: - * - * This interface is kept intentionally simple. The driver can send - * and receive free-form messages to/from user space through a - * pipe. See drivers/net/wimax/op-msg.c for details. - * - * The kernel-to-user messages are sent with - * wimax_msg(). user-to-kernel messages are delivered via - * wimax_dev->op_msg_from_user(). - * - * RFKILL: - * - * RFKILL support is built into the wimax_dev layer; the driver just - * needs to call wimax_report_rfkill_{hw,sw}() to inform of changes in - * the hardware or software RF kill switches. When the stack wants to - * turn the radio off, it will call wimax_dev->op_rfkill_sw_toggle(), - * which the driver implements. - * - * User space can set the software RF Kill switch by calling - * wimax_rfkill(). - * - * The code for now only supports devices that don't require polling; - * If the device needs to be polled, create a self-rearming delayed - * work struct for polling or look into adding polled support to the - * WiMAX stack. - * - * When initializing the hardware (_probe), after calling - * wimax_dev_add(), query the device for it's RF Kill switches status - * and feed it back to the WiMAX stack using - * wimax_report_rfkill_{hw,sw}(). If any switch is missing, always - * report it as ON. - * - * NOTE: the wimax stack uses an inverted terminology to that of the - * RFKILL subsystem: - * - * - ON: radio is ON, RFKILL is DISABLED or OFF. - * - OFF: radio is OFF, RFKILL is ENABLED or ON. - * - * MISCELLANEOUS OPS: - * - * wimax_reset() can be used to reset the device to power on state; by - * default it issues a warm reset that maintains the same device - * node. If that is not possible, it falls back to a cold reset - * (device reconnect). The driver implements the backend to this - * through wimax_dev->op_reset(). - */ - -#ifndef __NET__WIMAX_H__ -#define __NET__WIMAX_H__ - -#include <linux/wimax.h> -#include <net/genetlink.h> -#include <linux/netdevice.h> - -struct net_device; -struct genl_info; -struct wimax_dev; - -/** - * struct wimax_dev - Generic WiMAX device - * - * @net_dev: [fill] Pointer to the &struct net_device this WiMAX - * device implements. - * - * @op_msg_from_user: [fill] Driver-specific operation to - * handle a raw message from user space to the driver. The - * driver can send messages to user space using with - * wimax_msg_to_user(). - * - * @op_rfkill_sw_toggle: [fill] Driver-specific operation to act on - * userspace (or any other agent) requesting the WiMAX device to - * change the RF Kill software switch (WIMAX_RF_ON or - * WIMAX_RF_OFF). - * If such hardware support is not present, it is assumed the - * radio cannot be switched off and it is always on (and the stack - * will error out when trying to switch it off). In such case, - * this function pointer can be left as NULL. - * - * @op_reset: [fill] Driver specific operation to reset the - * device. - * This operation should always attempt first a warm reset that - * does not disconnect the device from the bus and return 0. - * If that fails, it should resort to some sort of cold or bus - * reset (even if it implies a bus disconnection and device - * disappearance). In that case, -ENODEV should be returned to - * indicate the device is gone. - * This operation has to be synchronous, and return only when the - * reset is complete. In case of having had to resort to bus/cold - * reset implying a device disconnection, the call is allowed to - * return immediately. - * NOTE: wimax_dev->mutex is NOT locked when this op is being - * called; however, wimax_dev->mutex_reset IS locked to ensure - * serialization of calls to wimax_reset(). - * See wimax_reset()'s documentation. - * - * @name: [fill] A way to identify this device. We need to register a - * name with many subsystems (rfkill, workqueue creation, etc). - * We can't use the network device name as that - * might change and in some instances we don't know it yet (until - * we don't call register_netdev()). So we generate an unique one - * using the driver name and device bus id, place it here and use - * it across the board. Recommended naming: - * DRIVERNAME-BUSNAME:BUSID (dev->bus->name, dev->bus_id). - * - * @id_table_node: [private] link to the list of wimax devices kept by - * id-table.c. Protected by it's own spinlock. - * - * @mutex: [private] Serializes all concurrent access and execution of - * operations. - * - * @mutex_reset: [private] Serializes reset operations. Needs to be a - * different mutex because as part of the reset operation, the - * driver has to call back into the stack to do things such as - * state change, that require wimax_dev->mutex. - * - * @state: [private] Current state of the WiMAX device. - * - * @rfkill: [private] integration into the RF-Kill infrastructure. - * - * @rf_sw: [private] State of the software radio switch (OFF/ON) - * - * @rf_hw: [private] State of the hardware radio switch (OFF/ON) - * - * @debugfs_dentry: [private] Used to hook up a debugfs entry. This - * shows up in the debugfs root as wimax\:DEVICENAME. - * - * Description: - * This structure defines a common interface to access all WiMAX - * devices from different vendors and provides a common API as well as - * a free-form device-specific messaging channel. - * - * Usage: - * 1. Embed a &struct wimax_dev at *the beginning* the network - * device structure so that netdev_priv() points to it. - * - * 2. memset() it to zero - * - * 3. Initialize with wimax_dev_init(). This will leave the WiMAX - * device in the %__WIMAX_ST_NULL state. - * - * 4. Fill all the fields marked with [fill]; once called - * wimax_dev_add(), those fields CANNOT be modified. - * - * 5. Call wimax_dev_add() *after* registering the network - * device. This will leave the WiMAX device in the %WIMAX_ST_DOWN - * state. - * Protect the driver's net_device->open() against succeeding if - * the wimax device state is lower than %WIMAX_ST_DOWN. - * - * 6. Select when the device is going to be turned on/initialized; - * for example, it could be initialized on 'ifconfig up' (when the - * netdev op 'open()' is called on the driver). - * - * When the device is initialized (at `ifconfig up` time, or right - * after calling wimax_dev_add() from _probe(), make sure the - * following steps are taken - * - * a. Move the device to %WIMAX_ST_UNINITIALIZED. This is needed so - * some API calls that shouldn't work until the device is ready - * can be blocked. - * - * b. Initialize the device. Make sure to turn the SW radio switch - * off and move the device to state %WIMAX_ST_RADIO_OFF when - * done. When just initialized, a device should be left in RADIO - * OFF state until user space devices to turn it on. - * - * c. Query the device for the state of the hardware rfkill switch - * and call wimax_rfkill_report_hw() and wimax_rfkill_report_sw() - * as needed. See below. - * - * wimax_dev_rm() undoes before unregistering the network device. Once - * wimax_dev_add() is called, the driver can get called on the - * wimax_dev->op_* function pointers - * - * CONCURRENCY: - * - * The stack provides a mutex for each device that will disallow API - * calls happening concurrently; thus, op calls into the driver - * through the wimax_dev->op*() function pointers will always be - * serialized and *never* concurrent. - * - * For locking, take wimax_dev->mutex is taken; (most) operations in - * the API have to check for wimax_dev_is_ready() to return 0 before - * continuing (this is done internally). - * - * REFERENCE COUNTING: - * - * The WiMAX device is reference counted by the associated network - * device. The only operation that can be used to reference the device - * is wimax_dev_get_by_genl_info(), and the reference it acquires has - * to be released with dev_put(wimax_dev->net_dev). - * - * RFKILL: - * - * At startup, both HW and SW radio switchess are assumed to be off. - * - * At initialization time [after calling wimax_dev_add()], have the - * driver query the device for the status of the software and hardware - * RF kill switches and call wimax_report_rfkill_hw() and - * wimax_rfkill_report_sw() to indicate their state. If any is - * missing, just call it to indicate it is ON (radio always on). - * - * Whenever the driver detects a change in the state of the RF kill - * switches, it should call wimax_report_rfkill_hw() or - * wimax_report_rfkill_sw() to report it to the stack. - */ -struct wimax_dev { - struct net_device *net_dev; - struct list_head id_table_node; - struct mutex mutex; /* Protects all members and API calls */ - struct mutex mutex_reset; - enum wimax_st state; - - int (*op_msg_from_user)(struct wimax_dev *wimax_dev, - const char *, - const void *, size_t, - const struct genl_info *info); - int (*op_rfkill_sw_toggle)(struct wimax_dev *wimax_dev, - enum wimax_rf_state); - int (*op_reset)(struct wimax_dev *wimax_dev); - - struct rfkill *rfkill; - unsigned int rf_hw; - unsigned int rf_sw; - char name[32]; - - struct dentry *debugfs_dentry; -}; - - - -/* - * WiMAX stack public API for device drivers - * ----------------------------------------- - * - * These functions are not exported to user space. - */ -void wimax_dev_init(struct wimax_dev *); -int wimax_dev_add(struct wimax_dev *, struct net_device *); -void wimax_dev_rm(struct wimax_dev *); - -static inline -struct wimax_dev *net_dev_to_wimax(struct net_device *net_dev) -{ - return netdev_priv(net_dev); -} - -static inline -struct device *wimax_dev_to_dev(struct wimax_dev *wimax_dev) -{ - return wimax_dev->net_dev->dev.parent; -} - -void wimax_state_change(struct wimax_dev *, enum wimax_st); -enum wimax_st wimax_state_get(struct wimax_dev *); - -/* - * Radio Switch state reporting. - * - * enum wimax_rf_state is declared in linux/wimax.h so the exports - * to user space can use it. - */ -void wimax_report_rfkill_hw(struct wimax_dev *, enum wimax_rf_state); -void wimax_report_rfkill_sw(struct wimax_dev *, enum wimax_rf_state); - - -/* - * Free-form messaging to/from user space - * - * Sending a message: - * - * wimax_msg(wimax_dev, pipe_name, buf, buf_size, GFP_KERNEL); - * - * Broken up: - * - * skb = wimax_msg_alloc(wimax_dev, pipe_name, buf_size, GFP_KERNEL); - * ...fill up skb... - * wimax_msg_send(wimax_dev, pipe_name, skb); - * - * Be sure not to modify skb->data in the middle (ie: don't use - * skb_push()/skb_pull()/skb_reserve() on the skb). - * - * "pipe_name" is any string, that can be interpreted as the name of - * the pipe or recipient; the interpretation of it is driver - * specific, so the recipient can multiplex it as wished. It can be - * NULL, it won't be used - an example is using a "diagnostics" tag to - * send diagnostics information that a device-specific diagnostics - * tool would be interested in. - */ -struct sk_buff *wimax_msg_alloc(struct wimax_dev *, const char *, const void *, - size_t, gfp_t); -int wimax_msg_send(struct wimax_dev *, struct sk_buff *); -int wimax_msg(struct wimax_dev *, const char *, const void *, size_t, gfp_t); - -const void *wimax_msg_data_len(struct sk_buff *, size_t *); -const void *wimax_msg_data(struct sk_buff *); -ssize_t wimax_msg_len(struct sk_buff *); - - -/* - * WiMAX stack user space API - * -------------------------- - * - * This API is what gets exported to user space for general - * operations. As well, they can be called from within the kernel, - * (with a properly referenced `struct wimax_dev`). - * - * Properly referenced means: the 'struct net_device' that embeds the - * device's control structure and (as such) the 'struct wimax_dev' is - * referenced by the caller. - */ -int wimax_rfkill(struct wimax_dev *, enum wimax_rf_state); -int wimax_reset(struct wimax_dev *); - -#endif /* #ifndef __NET__WIMAX_H__ */ diff --git a/include/net/xdp.h b/include/net/xdp.h index 3814fb631d52..600acb307db6 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -59,6 +59,7 @@ struct xdp_rxq_info { u32 queue_index; u32 reg_state; struct xdp_mem_info mem; + unsigned int napi_id; } ____cacheline_aligned; /* perf critical, avoid false-sharing */ struct xdp_txq_info { @@ -104,6 +105,18 @@ struct xdp_frame { struct net_device *dev_rx; /* used by cpumap */ }; +#define XDP_BULK_QUEUE_SIZE 16 +struct xdp_frame_bulk { + int count; + void *xa; + void *q[XDP_BULK_QUEUE_SIZE]; +}; + +static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) +{ + /* bq->count will be zero'ed when bq->xa gets updated */ + bq->xa = NULL; +} static inline struct skb_shared_info * xdp_get_shared_info_from_frame(struct xdp_frame *frame) @@ -194,6 +207,9 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); +void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); +void xdp_return_frame_bulk(struct xdp_frame *xdpf, + struct xdp_frame_bulk *bq); /* When sending xdp_frame into the network stack, then there is no * return point callback, which is needed to release e.g. DMA-mapping @@ -211,7 +227,7 @@ static inline void xdp_release_frame(struct xdp_frame *xdpf) } int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, - struct net_device *dev, u32 queue_index); + struct net_device *dev, u32 queue_index, unsigned int napi_id); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); @@ -240,11 +256,9 @@ struct xdp_attachment_info { }; struct netdev_bpf; -bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, - struct netdev_bpf *bpf); void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf); -#define DEV_MAP_BULK_SIZE 16 +#define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE #endif /* __LINUX_NET_XDP_H__ */ diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index c9d87cc40c11..cc17bc957548 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -18,25 +18,20 @@ struct xsk_queue; struct xdp_buff; struct xdp_umem { - struct xsk_queue *fq; - struct xsk_queue *cq; - struct xsk_buff_pool *pool; + void *addrs; u64 size; u32 headroom; u32 chunk_size; + u32 chunks; + u32 npgs; struct user_struct *user; refcount_t users; - struct work_struct work; - struct page **pgs; - u32 npgs; - u16 queue_id; - u8 need_wakeup; u8 flags; - int id; - struct net_device *dev; bool zc; - spinlock_t xsk_tx_list_lock; - struct list_head xsk_tx_list; + struct page **pgs; + int id; + struct list_head xsk_dma_list; + struct work_struct work; }; struct xsk_map { @@ -48,10 +43,11 @@ struct xsk_map { struct xdp_sock { /* struct sock must be the first member of struct xdp_sock */ struct sock sk; - struct xsk_queue *rx; + struct xsk_queue *rx ____cacheline_aligned_in_smp; struct net_device *dev; struct xdp_umem *umem; struct list_head flush_node; + struct xsk_buff_pool *pool; u16 queue_id; bool zc; enum { @@ -59,14 +55,9 @@ struct xdp_sock { XSK_BOUND, XSK_UNBOUND, } state; - /* Protects multiple processes in the control path */ - struct mutex mutex; + struct xsk_queue *tx ____cacheline_aligned_in_smp; - struct list_head list; - /* Mutual exclusion of NAPI TX thread and sendmsg error paths - * in the SKB destructor callback. - */ - spinlock_t tx_completion_lock; + struct list_head tx_list; /* Protects generic receive. */ spinlock_t rx_lock; @@ -77,6 +68,10 @@ struct xdp_sock { struct list_head map_list; /* Protects map_list */ spinlock_t map_list_lock; + /* Protects multiple processes in the control path */ + struct mutex mutex; + struct xsk_queue *fq_tmp; /* Only as tmp storage before bind */ + struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */ }; #ifdef CONFIG_XDP_SOCKETS diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index ccf848f7efa4..4e295541e396 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -11,47 +11,51 @@ #ifdef CONFIG_XDP_SOCKETS -void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); -bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc); -void xsk_umem_consume_tx_done(struct xdp_umem *umem); -struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id); -void xsk_set_rx_need_wakeup(struct xdp_umem *umem); -void xsk_set_tx_need_wakeup(struct xdp_umem *umem); -void xsk_clear_rx_need_wakeup(struct xdp_umem *umem); -void xsk_clear_tx_need_wakeup(struct xdp_umem *umem); -bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem); +void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); +bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); +u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, u32 max); +void xsk_tx_release(struct xsk_buff_pool *pool); +struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, + u16 queue_id); +void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool); +void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool); +void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool); +void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool); +bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool); -static inline u32 xsk_umem_get_headroom(struct xdp_umem *umem) +static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool) { - return XDP_PACKET_HEADROOM + umem->headroom; + return XDP_PACKET_HEADROOM + pool->headroom; } -static inline u32 xsk_umem_get_chunk_size(struct xdp_umem *umem) +static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool) { - return umem->chunk_size; + return pool->chunk_size; } -static inline u32 xsk_umem_get_rx_frame_size(struct xdp_umem *umem) +static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) { - return xsk_umem_get_chunk_size(umem) - xsk_umem_get_headroom(umem); + return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool); } -static inline void xsk_buff_set_rxq_info(struct xdp_umem *umem, +static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { - xp_set_rxq_info(umem->pool, rxq); + xp_set_rxq_info(pool, rxq); } -static inline void xsk_buff_dma_unmap(struct xdp_umem *umem, +static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { - xp_dma_unmap(umem->pool, attrs); + xp_dma_unmap(pool, attrs); } -static inline int xsk_buff_dma_map(struct xdp_umem *umem, struct device *dev, - unsigned long attrs) +static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool, + struct device *dev, unsigned long attrs) { - return xp_dma_map(umem->pool, dev, attrs, umem->pgs, umem->npgs); + struct xdp_umem *umem = pool->umem; + + return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs); } static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp) @@ -68,14 +72,14 @@ static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp) return xp_get_frame_dma(xskb); } -static inline struct xdp_buff *xsk_buff_alloc(struct xdp_umem *umem) +static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) { - return xp_alloc(umem->pool); + return xp_alloc(pool); } -static inline bool xsk_buff_can_alloc(struct xdp_umem *umem, u32 count) +static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count) { - return xp_can_alloc(umem->pool, count); + return xp_can_alloc(pool, count); } static inline void xsk_buff_free(struct xdp_buff *xdp) @@ -85,100 +89,110 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) xp_free(xskb); } -static inline dma_addr_t xsk_buff_raw_get_dma(struct xdp_umem *umem, u64 addr) +static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, + u64 addr) { - return xp_raw_get_dma(umem->pool, addr); + return xp_raw_get_dma(pool, addr); } -static inline void *xsk_buff_raw_get_data(struct xdp_umem *umem, u64 addr) +static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) { - return xp_raw_get_data(umem->pool, addr); + return xp_raw_get_data(pool, addr); } -static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp) +static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); + if (!pool->dma_need_sync) + return; + xp_dma_sync_for_cpu(xskb); } -static inline void xsk_buff_raw_dma_sync_for_device(struct xdp_umem *umem, +static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) { - xp_dma_sync_for_device(umem->pool, dma, size); + xp_dma_sync_for_device(pool, dma, size); } #else -static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) +static inline void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries) { } -static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, - struct xdp_desc *desc) +static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, + struct xdp_desc *desc) { return false; } -static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem) +static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, + u32 max) +{ + return 0; +} + +static inline void xsk_tx_release(struct xsk_buff_pool *pool) { } -static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, - u16 queue_id) +static inline struct xsk_buff_pool * +xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id) { return NULL; } -static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem) +static inline void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) { } -static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem) +static inline void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool) { } -static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem) +static inline void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool) { } -static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem) +static inline void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool) { } -static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem) +static inline bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool) { return false; } -static inline u32 xsk_umem_get_headroom(struct xdp_umem *umem) +static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool) { return 0; } -static inline u32 xsk_umem_get_chunk_size(struct xdp_umem *umem) +static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool) { return 0; } -static inline u32 xsk_umem_get_rx_frame_size(struct xdp_umem *umem) +static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) { return 0; } -static inline void xsk_buff_set_rxq_info(struct xdp_umem *umem, +static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { } -static inline void xsk_buff_dma_unmap(struct xdp_umem *umem, +static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { } -static inline int xsk_buff_dma_map(struct xdp_umem *umem, struct device *dev, - unsigned long attrs) +static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool, + struct device *dev, unsigned long attrs) { return 0; } @@ -193,12 +207,12 @@ static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp) return 0; } -static inline struct xdp_buff *xsk_buff_alloc(struct xdp_umem *umem) +static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) { return NULL; } -static inline bool xsk_buff_can_alloc(struct xdp_umem *umem, u32 count) +static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count) { return false; } @@ -207,21 +221,22 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) { } -static inline dma_addr_t xsk_buff_raw_get_dma(struct xdp_umem *umem, u64 addr) +static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, + u64 addr) { return 0; } -static inline void *xsk_buff_raw_get_data(struct xdp_umem *umem, u64 addr) +static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) { return NULL; } -static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp) +static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool) { } -static inline void xsk_buff_raw_dma_sync_for_device(struct xdp_umem *umem, +static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9e806c781025..b2a06f10b62c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1996,6 +1996,39 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x, return 0; } +extern const int xfrm_msg_min[XFRM_NR_MSGTYPES]; +extern const struct nla_policy xfrma_policy[XFRMA_MAX+1]; + +struct xfrm_translator { + /* Allocate frag_list and put compat translation there */ + int (*alloc_compat)(struct sk_buff *skb, const struct nlmsghdr *src); + + /* Allocate nlmsg with 64-bit translaton of received 32-bit message */ + struct nlmsghdr *(*rcv_msg_compat)(const struct nlmsghdr *nlh, + int maxtype, const struct nla_policy *policy, + struct netlink_ext_ack *extack); + + /* Translate 32-bit user_policy from sockptr */ + int (*xlate_user_policy_sockptr)(u8 **pdata32, int optlen); + + struct module *owner; +}; + +#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT) +extern int xfrm_register_translator(struct xfrm_translator *xtr); +extern int xfrm_unregister_translator(struct xfrm_translator *xtr); +extern struct xfrm_translator *xfrm_get_translator(void); +extern void xfrm_put_translator(struct xfrm_translator *xtr); +#else +static inline struct xfrm_translator *xfrm_get_translator(void) +{ + return NULL; +} +static inline void xfrm_put_translator(struct xfrm_translator *xtr) +{ +} +#endif + #if IS_ENABLED(CONFIG_IPV6) static inline bool xfrm6_local_dontfrag(const struct sock *sk) { diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 6842990e2712..eaa8386dbc63 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -13,6 +13,8 @@ struct xsk_buff_pool; struct xdp_rxq_info; struct xsk_queue; struct xdp_desc; +struct xdp_umem; +struct xdp_sock; struct device; struct page; @@ -26,34 +28,73 @@ struct xdp_buff_xsk { struct list_head free_list_node; }; +struct xsk_dma_map { + dma_addr_t *dma_pages; + struct device *dev; + struct net_device *netdev; + refcount_t users; + struct list_head list; /* Protected by the RTNL_LOCK */ + u32 dma_pages_cnt; + bool dma_need_sync; +}; + struct xsk_buff_pool { - struct xsk_queue *fq; + /* Members only used in the control path first. */ + struct device *dev; + struct net_device *netdev; + struct list_head xsk_tx_list; + /* Protects modifications to the xsk_tx_list */ + spinlock_t xsk_tx_list_lock; + refcount_t users; + struct xdp_umem *umem; + struct work_struct work; struct list_head free_list; + u32 heads_cnt; + u16 queue_id; + + /* Data path members as close to free_heads at the end as possible. */ + struct xsk_queue *fq ____cacheline_aligned_in_smp; + struct xsk_queue *cq; + /* For performance reasons, each buff pool has its own array of dma_pages + * even when they are identical. + */ dma_addr_t *dma_pages; struct xdp_buff_xsk *heads; u64 chunk_mask; u64 addrs_cnt; u32 free_list_cnt; u32 dma_pages_cnt; - u32 heads_cnt; u32 free_heads_cnt; u32 headroom; u32 chunk_size; u32 frame_len; + u8 cached_need_wakeup; + bool uses_need_wakeup; bool dma_need_sync; bool unaligned; void *addrs; - struct device *dev; + /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect: + * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when + * sockets share a single cq when the same netdev and queue id is shared. + */ + spinlock_t cq_lock; struct xdp_buff_xsk *free_heads[]; }; /* AF_XDP core. */ -struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks, - u32 chunk_size, u32 headroom, u64 size, - bool unaligned); -void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq); +struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, + struct xdp_umem *umem); +int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, + u16 queue_id, u16 flags); +int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, + struct net_device *dev, u16 queue_id); void xp_destroy(struct xsk_buff_pool *pool); void xp_release(struct xdp_buff_xsk *xskb); +void xp_get_pool(struct xsk_buff_pool *pool); +bool xp_put_pool(struct xsk_buff_pool *pool); +void xp_clear_dev(struct xsk_buff_pool *pool); +void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs); +void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs); /* AF_XDP, and XDP core. */ void xp_free(struct xdp_buff_xsk *xskb); @@ -80,9 +121,6 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb) void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb); static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb) { - if (!xskb->pool->dma_need_sync) - return; - xp_dma_sync_for_cpu_slow(xskb); } |
