diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2022-08-02 10:06:12 -0700 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2022-08-02 10:06:12 -0700 |
| commit | 8bb5e7f4dcd9b9ef22a3ea25c9066a8a968f12dd (patch) | |
| tree | 0f1383880607a227142f9388a066959926233ff1 /include/net | |
| parent | 2a96271fb66c499e4a89d76a89d3d01170c10bef (diff) | |
| parent | 7c744d00990ea999d27f306f6db5ccb61b1304b2 (diff) | |
Merge branch 'next' into for-linus
Prepare input updates for 5.20 (or 6.0) merge window.
Diffstat (limited to 'include/net')
98 files changed, 2039 insertions, 718 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index 3049cb69c025..9cf6870b526e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -134,7 +134,8 @@ struct tc_action_ops { (*get_psample_group)(const struct tc_action *a, tc_action_priv_destructor *destructor); int (*offload_act_setup)(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind); + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack); }; struct tc_action_net { diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 59940e230b78..f7506f08e505 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -64,6 +64,8 @@ struct ifa6_config { const struct in6_addr *pfx; unsigned int plen; + u8 ifa_proto; + const struct in6_addr *peer_pfx; u32 rt_priority; diff --git a/include/net/amt.h b/include/net/amt.h index 7a4db8b903ee..0e40c3d64fcf 100644 --- a/include/net/amt.h +++ b/include/net/amt.h @@ -15,7 +15,7 @@ enum amt_msg_type { AMT_MSG_MEMBERSHIP_QUERY, AMT_MSG_MEMBERSHIP_UPDATE, AMT_MSG_MULTICAST_DATA, - AMT_MSG_TEARDOWM, + AMT_MSG_TEARDOWN, __AMT_MSG_MAX, }; diff --git a/include/net/arp.h b/include/net/arp.h index 031374ac2f22..d7ef4ec71dfe 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -65,6 +65,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, const unsigned char *src_hw, const unsigned char *th); int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); void arp_ifdown(struct net_device *dev); +int arp_invalidate(struct net_device *dev, __be32 ip, bool force); struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, diff --git a/include/net/ax25.h b/include/net/ax25.h index 8221af1811df..a427a05672e2 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -187,18 +187,12 @@ typedef struct { typedef struct ax25_route { struct ax25_route *next; - refcount_t refcount; ax25_address callsign; struct net_device *dev; ax25_digi *digipeat; char ip_mode; } ax25_route; -static inline void ax25_hold_route(ax25_route *ax25_rt) -{ - refcount_inc(&ax25_rt->refcount); -} - void __ax25_put_route(ax25_route *ax25_rt); extern rwlock_t ax25_route_lock; @@ -213,12 +207,6 @@ static inline void ax25_route_lock_unuse(void) read_unlock(&ax25_route_lock); } -static inline void ax25_put_route(ax25_route *ax25_rt) -{ - if (refcount_dec_and_test(&ax25_rt->refcount)) - __ax25_put_route(ax25_rt); -} - typedef struct { char slave; /* slave_mode? */ struct timer_list slave_timer; /* timeout timer */ @@ -240,6 +228,7 @@ typedef struct ax25_dev { ax25_dama_info dama; #endif refcount_t refcount; + bool device_up; } ax25_dev; typedef struct ax25_cb { diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index a647e5fabdbd..6b48d9e2aab9 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -204,19 +204,21 @@ void bt_err_ratelimited(const char *fmt, ...); #define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__) #endif +#define bt_dev_name(hdev) ((hdev) ? (hdev)->name : "null") + #define bt_dev_info(hdev, fmt, ...) \ - BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_INFO("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_warn(hdev, fmt, ...) \ - BT_WARN("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_WARN("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_err(hdev, fmt, ...) \ - BT_ERR("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_ERR("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_dbg(hdev, fmt, ...) \ - BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_DBG("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_warn_ratelimited(hdev, fmt, ...) \ - bt_warn_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + bt_warn_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_err_ratelimited(hdev, fmt, ...) \ - bt_err_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) /* Connection and socket states */ enum { @@ -343,7 +345,7 @@ int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, __poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); -int bt_sock_wait_ready(struct sock *sk, unsigned long flags); +int bt_sock_wait_ready(struct sock *sk, unsigned int msg_flags); void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh); void bt_accept_unlink(struct sock *sk); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 35c073d44ec5..fe7935be7dc4 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -255,6 +255,25 @@ enum { * during the hdev->setup vendor callback. */ HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, + + /* When this quirk is set, HCI_OP_SET_EVENT_FLT requests with + * HCI_FLT_CLEAR_ALL are ignored and event filtering is + * completely avoided. A subset of the CSR controller + * clones struggle with this and instantly lock up. + * + * Note that devices using this must (separately) disable + * runtime suspend, because event filtering takes place there. + */ + HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, + + /* + * When this quirk is set, disables the use of + * HCI_OP_ENHANCED_SETUP_SYNC_CONN command to setup SCO connections. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, }; /* HCI device flags */ @@ -568,6 +587,7 @@ enum { #define HCI_ERROR_CONNECTION_TIMEOUT 0x08 #define HCI_ERROR_REJ_LIMITED_RESOURCES 0x0d #define HCI_ERROR_REJ_BAD_ADDR 0x0f +#define HCI_ERROR_INVALID_PARAMETERS 0x12 #define HCI_ERROR_REMOTE_USER_TERM 0x13 #define HCI_ERROR_REMOTE_LOW_RESOURCES 0x14 #define HCI_ERROR_REMOTE_POWER_OFF 0x15 @@ -605,6 +625,7 @@ enum { #define EIR_SSP_RAND_R192 0x0F /* Simple Pairing Randomizer R-192 */ #define EIR_DEVICE_ID 0x10 /* device ID */ #define EIR_APPEARANCE 0x19 /* Device appearance */ +#define EIR_SERVICE_DATA 0x16 /* Service Data */ #define EIR_LE_BDADDR 0x1B /* LE Bluetooth device address */ #define EIR_LE_ROLE 0x1C /* LE role */ #define EIR_SSP_HASH_C256 0x1D /* Simple Pairing Hash C-256 */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index e336e9c1dda4..c0ea2a4892b1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -36,6 +36,9 @@ /* HCI priority */ #define HCI_PRIO_MAX 7 +/* HCI maximum id value */ +#define HCI_MAX_ID 10000 + /* HCI Core structures */ struct inquiry_data { bdaddr_t bdaddr; @@ -152,21 +155,18 @@ struct bdaddr_list_with_irk { u8 local_irk[16]; }; +/* Bitmask of connection flags */ enum hci_conn_flags { - HCI_CONN_FLAG_REMOTE_WAKEUP, - HCI_CONN_FLAG_DEVICE_PRIVACY, - - __HCI_CONN_NUM_FLAGS, + HCI_CONN_FLAG_REMOTE_WAKEUP = 1, + HCI_CONN_FLAG_DEVICE_PRIVACY = 2, }; - -/* Make sure number of flags doesn't exceed sizeof(current_flags) */ -static_assert(__HCI_CONN_NUM_FLAGS < 32); +typedef u8 hci_conn_flags_t; struct bdaddr_list_with_flags { struct list_head list; bdaddr_t bdaddr; u8 bdaddr_type; - DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS); + hci_conn_flags_t flags; }; struct bt_uuid { @@ -258,6 +258,15 @@ struct adv_info { #define HCI_ADV_TX_POWER_NO_PREFERENCE 0x7F +struct monitored_device { + struct list_head list; + + bdaddr_t bdaddr; + __u8 addr_type; + __u16 handle; + bool notified; +}; + struct adv_pattern { struct list_head list; __u8 ad_type; @@ -294,6 +303,9 @@ struct adv_monitor { #define HCI_MAX_SHORT_NAME_LENGTH 10 +#define HCI_CONN_HANDLE_UNSET 0xffff +#define HCI_CONN_HANDLE_MAX 0x0eff + /* Min encryption key size to match with SMP */ #define HCI_MIN_ENC_KEY_SIZE 7 @@ -561,7 +573,7 @@ struct hci_dev { struct rfkill *rfkill; DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS); - DECLARE_BITMAP(conn_flags, __HCI_CONN_NUM_FLAGS); + hci_conn_flags_t conn_flags; __s8 adv_tx_power; __u8 adv_data[HCI_MAX_EXT_AD_LENGTH]; @@ -591,6 +603,9 @@ struct hci_dev { struct delayed_work interleave_scan; + struct list_head monitored_devices; + bool advmon_pend_notify; + #if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; #endif @@ -757,7 +772,7 @@ struct hci_conn_params { struct hci_conn *conn; bool explicit_connect; - DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS); + hci_conn_flags_t flags; u8 privacy_mode; }; @@ -1141,7 +1156,7 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role); void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); -void hci_le_conn_failed(struct hci_conn *conn, u8 status); +void hci_conn_failed(struct hci_conn *conn, u8 status); /* * hci_conn_get() and hci_conn_put() are used to control the life-time of an @@ -1477,8 +1492,12 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define privacy_mode_capable(dev) (use_ll_privacy(dev) && \ (hdev->commands[39] & 0x04)) -/* Use enhanced synchronous connection if command is supported */ -#define enhanced_sco_capable(dev) ((dev)->commands[29] & 0x08) +/* Use enhanced synchronous connection if command is supported and its quirk + * has not been set. + */ +#define enhanced_sync_conn_capable(dev) \ + (((dev)->commands[29] & 0x08) && \ + !test_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &(dev)->quirks)) /* Use ext scanning if set ext scan param and ext scan enable is supported */ #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \ @@ -1855,6 +1874,8 @@ void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status); int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status); +void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, + bdaddr_t *bdaddr, u8 addr_type); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 107b25deae68..7c1ad0f6fcec 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -696,7 +696,7 @@ struct mgmt_cp_set_blocked_keys { #define MGMT_READ_CONTROLLER_CAP_SIZE 0 struct mgmt_rp_read_controller_cap { __le16 cap_len; - __u8 cap[0]; + __u8 cap[]; } __packed; #define MGMT_OP_READ_EXP_FEATURES_INFO 0x0049 @@ -1104,3 +1104,19 @@ struct mgmt_ev_controller_resume { #define MGMT_WAKE_REASON_NON_BT_WAKE 0x0 #define MGMT_WAKE_REASON_UNEXPECTED 0x1 #define MGMT_WAKE_REASON_REMOTE_WAKE 0x2 + +#define MGMT_EV_ADV_MONITOR_DEVICE_FOUND 0x002f +struct mgmt_ev_adv_monitor_device_found { + __le16 monitor_handle; + struct mgmt_addr_info addr; + __s8 rssi; + __le32 flags; + __le16 eir_len; + __u8 eir[]; +} __packed; + +#define MGMT_EV_ADV_MONITOR_DEVICE_LOST 0x0030 +struct mgmt_ev_adv_monitor_device_lost { + __le16 monitor_handle; + struct mgmt_addr_info addr; +} __packed; diff --git a/include/net/bond_options.h b/include/net/bond_options.h index dd75c071f67e..61b49063791c 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -66,19 +66,24 @@ enum { BOND_OPT_PEER_NOTIF_DELAY, BOND_OPT_LACP_ACTIVE, BOND_OPT_MISSED_MAX, + BOND_OPT_NS_TARGETS, BOND_OPT_LAST }; /* This structure is used for storing option values and for passing option * values when changing an option. The logic when used as an arg is as follows: - * - if string != NULL -> parse it, if the opt is RAW type then return it, else - * return the parse result - * - if string == NULL -> parse value + * - if value != ULLONG_MAX -> parse value + * - if string != NULL -> parse string + * - if the opt is RAW data and length less than maxlen, + * copy the data to extra storage */ + +#define BOND_OPT_EXTRA_MAXLEN 16 struct bond_opt_value { char *string; u64 value; u32 flags; + char extra[BOND_OPT_EXTRA_MAXLEN]; }; struct bonding; @@ -118,18 +123,26 @@ const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val); * When value is ULLONG_MAX then string will be used. */ static inline void __bond_opt_init(struct bond_opt_value *optval, - char *string, u64 value) + char *string, u64 value, + void *extra, size_t extra_len) { memset(optval, 0, sizeof(*optval)); optval->value = ULLONG_MAX; - if (value == ULLONG_MAX) - optval->string = string; - else + if (value != ULLONG_MAX) optval->value = value; + else if (string) + optval->string = string; + else if (extra_len <= BOND_OPT_EXTRA_MAXLEN) + memcpy(optval->extra, extra, extra_len); } -#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value) -#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX) +#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value, NULL, 0) +#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX, NULL, 0) +#define bond_opt_initextra(optval, extra, extra_len) \ + __bond_opt_init(optval, NULL, ULLONG_MAX, extra, extra_len) void bond_option_arp_ip_targets_clear(struct bonding *bond); +#if IS_ENABLED(CONFIG_IPV6) +void bond_option_ns_ip6_targets_clear(struct bonding *bond); +#endif #endif /* _NET_BOND_OPTIONS_H */ diff --git a/include/net/bonding.h b/include/net/bonding.h index 83cfd2d70247..cb904d356e31 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -29,8 +29,11 @@ #include <net/bond_3ad.h> #include <net/bond_alb.h> #include <net/bond_options.h> +#include <net/ipv6.h> +#include <net/addrconf.h> #define BOND_MAX_ARP_TARGETS 16 +#define BOND_MAX_NS_TARGETS BOND_MAX_ARP_TARGETS #define BOND_DEFAULT_MIIMON 100 @@ -146,6 +149,9 @@ struct bond_params { struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; u16 ad_user_port_key; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; +#endif /* 2 bytes of padding : see ether_addr_equal_64bits() */ u8 ad_actor_system[ETH_ALEN + 2]; @@ -499,6 +505,15 @@ static inline int bond_is_ip_target_ok(__be32 addr) return !ipv4_is_lbcast(addr) && !ipv4_is_zeronet(addr); } +#if IS_ENABLED(CONFIG_IPV6) +static inline int bond_is_ip6_target_ok(struct in6_addr *addr) +{ + return !ipv6_addr_any(addr) && + !ipv6_addr_loopback(addr) && + !ipv6_addr_is_multicast(addr); +} +#endif + /* Get the oldest arp which we've received on this slave for bond's * arp_targets. */ @@ -628,7 +643,7 @@ struct bond_net { struct class_attribute class_attr_bonding_masters; }; -int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); +int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); int bond_create(struct net *net, const char *name); int bond_create_sysfs(struct bond_net *net); @@ -699,20 +714,6 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond, } /* Caller must hold rcu_read_lock() for read */ -static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond, - const u8 *mac) -{ - struct list_head *iter; - struct slave *tmp; - - bond_for_each_slave_rcu(bond, tmp, iter) - if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) - return tmp; - - return NULL; -} - -/* Caller must hold rcu_read_lock() for read */ static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac) { struct list_head *iter; @@ -749,6 +750,21 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) return -1; } +#if IS_ENABLED(CONFIG_IPV6) +static inline int bond_get_targets_ip6(struct in6_addr *targets, struct in6_addr *ip) +{ + int i; + + for (i = 0; i < BOND_MAX_NS_TARGETS; i++) + if (ipv6_addr_equal(&targets[i], ip)) + return i; + else if (ipv6_addr_any(&targets[i])) + break; + + return -1; +} +#endif + /* exported from bond_main.c */ extern unsigned int bond_net_id; @@ -760,7 +776,7 @@ extern const struct sysfs_ops slave_sysfs_ops; static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb) { - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); return NET_XMIT_DROP; } diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d19e48f9fdc6..6d02e12e4702 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -109,7 +109,12 @@ struct wiphy; * on this channel. * @IEEE80211_CHAN_16MHZ: 16 MHz bandwidth is permitted * on this channel. - * + * @IEEE80211_CHAN_NO_320MHZ: If the driver supports 320 MHz on the band, + * this flag indicates that a 320 MHz channel cannot use this + * channel as the control or any of the secondary channels. + * This may be due to the driver or due to regulatory bandwidth + * restrictions. + * @IEEE80211_CHAN_NO_EHT: EHT operation is not permitted on this channel. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, @@ -131,6 +136,8 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_4MHZ = 1<<16, IEEE80211_CHAN_8MHZ = 1<<17, IEEE80211_CHAN_16MHZ = 1<<18, + IEEE80211_CHAN_NO_320MHZ = 1<<19, + IEEE80211_CHAN_NO_EHT = 1<<20, }; #define IEEE80211_CHAN_NO_HT40 \ @@ -361,6 +368,48 @@ struct ieee80211_sta_he_cap { }; /** + * struct ieee80211_eht_mcs_nss_supp - EHT max supported NSS per MCS + * + * See P802.11be_D1.3 Table 9-401k - "Subfields of the Supported EHT-MCS + * and NSS Set field" + * + * @only_20mhz: MCS/NSS support for 20 MHz-only STA. + * @bw._80: MCS/NSS support for BW <= 80 MHz + * @bw._160: MCS/NSS support for BW = 160 MHz + * @bw._320: MCS/NSS support for BW = 320 MHz + */ +struct ieee80211_eht_mcs_nss_supp { + union { + struct ieee80211_eht_mcs_nss_supp_20mhz_only only_20mhz; + struct { + struct ieee80211_eht_mcs_nss_supp_bw _80; + struct ieee80211_eht_mcs_nss_supp_bw _160; + struct ieee80211_eht_mcs_nss_supp_bw _320; + } __packed bw; + } __packed; +} __packed; + +#define IEEE80211_EHT_PPE_THRES_MAX_LEN 32 + +/** + * struct ieee80211_sta_eht_cap - STA's EHT capabilities + * + * This structure describes most essential parameters needed + * to describe 802.11be EHT capabilities for a STA. + * + * @has_eht: true iff EHT data is valid. + * @eht_cap_elem: Fixed portion of the eht capabilities element. + * @eht_mcs_nss_supp: The supported NSS/MCS combinations. + * @eht_ppe_thres: Holds the PPE Thresholds data. + */ +struct ieee80211_sta_eht_cap { + bool has_eht; + struct ieee80211_eht_cap_elem_fixed eht_cap_elem; + struct ieee80211_eht_mcs_nss_supp eht_mcs_nss_supp; + u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN]; +}; + +/** * struct ieee80211_sband_iftype_data - sband data per interface type * * This structure encapsulates sband data that is relevant for the @@ -379,6 +428,7 @@ struct ieee80211_sband_iftype_data { u16 types_mask; struct ieee80211_sta_he_cap he_cap; struct ieee80211_he_6ghz_capa he_6ghz_capa; + struct ieee80211_sta_eht_cap eht_cap; struct { const u8 *data; unsigned int len; @@ -562,6 +612,26 @@ ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband, } /** + * ieee80211_get_eht_iftype_cap - return ETH capabilities for an sband's iftype + * @sband: the sband to search for the iftype on + * @iftype: enum nl80211_iftype + * + * Return: pointer to the struct ieee80211_sta_eht_cap, or NULL is none found + */ +static inline const struct ieee80211_sta_eht_cap * +ieee80211_get_eht_iftype_cap(const struct ieee80211_supported_band *sband, + enum nl80211_iftype iftype) +{ + const struct ieee80211_sband_iftype_data *data = + ieee80211_get_sband_iftype_data(sband, iftype); + + if (data && data->eht_cap.has_eht) + return &data->eht_cap; + + return NULL; +} + +/** * wiphy_read_of_freq_limits - read frequency limits from device tree * * @wiphy: the wireless device to get extra limits for @@ -1113,6 +1183,9 @@ struct cfg80211_mbssid_elems { * Token (measurement type 11) * @lci_len: LCI data length * @civicloc_len: Civic location data length + * @he_bss_color: BSS Color settings + * @he_bss_color_valid: indicates whether bss color + * attribute is present in beacon data or not. */ struct cfg80211_beacon_data { const u8 *head, *tail; @@ -1132,6 +1205,8 @@ struct cfg80211_beacon_data { size_t probe_resp_len; size_t lci_len; size_t civicloc_len; + struct cfg80211_he_bss_color he_bss_color; + bool he_bss_color_valid; }; struct mac_address { @@ -1222,7 +1297,6 @@ struct cfg80211_unsol_bcast_probe_resp { * @sae_h2e_required: stations must support direct H2E technique in SAE * @flags: flags, as defined in enum cfg80211_ap_settings_flags * @he_obss_pd: OBSS Packet Detection settings - * @he_bss_color: BSS Color settings * @he_oper: HE operation IE (or %NULL if HE isn't enabled) * @fils_discovery: FILS discovery transmission parameters * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters @@ -1256,7 +1330,6 @@ struct cfg80211_ap_settings { bool twt_responder; u32 flags; struct ieee80211_he_obss_pd he_obss_pd; - struct cfg80211_he_bss_color he_bss_color; struct cfg80211_fils_discovery fils_discovery; struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; struct cfg80211_mbssid_config mbssid_config; @@ -1417,6 +1490,8 @@ struct sta_txpwr { * @airtime_weight: airtime scheduler weight for this station * @txpwr: transmit power for an associated station * @he_6ghz_capa: HE 6 GHz Band capabilities of station + * @eht_capa: EHT capabilities of station + * @eht_capa_len: the length of the EHT capabilities */ struct station_parameters { const u8 *supported_rates; @@ -1450,6 +1525,8 @@ struct station_parameters { u16 airtime_weight; struct sta_txpwr txpwr; const struct ieee80211_he_6ghz_capa *he_6ghz_capa; + const struct ieee80211_eht_cap_elem *eht_capa; + u8 eht_capa_len; }; /** @@ -1527,6 +1604,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy, * @RATE_INFO_FLAGS_HE_MCS: HE MCS information * @RATE_INFO_FLAGS_EDMG: 60GHz MCS in EDMG mode * @RATE_INFO_FLAGS_EXTENDED_SC_DMG: 60GHz extended SC MCS + * @RATE_INFO_FLAGS_EHT_MCS: EHT MCS information */ enum rate_info_flags { RATE_INFO_FLAGS_MCS = BIT(0), @@ -1536,6 +1614,7 @@ enum rate_info_flags { RATE_INFO_FLAGS_HE_MCS = BIT(4), RATE_INFO_FLAGS_EDMG = BIT(5), RATE_INFO_FLAGS_EXTENDED_SC_DMG = BIT(6), + RATE_INFO_FLAGS_EHT_MCS = BIT(7), }; /** @@ -1550,6 +1629,8 @@ enum rate_info_flags { * @RATE_INFO_BW_80: 80 MHz bandwidth * @RATE_INFO_BW_160: 160 MHz bandwidth * @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation + * @RATE_INFO_BW_320: 320 MHz bandwidth + * @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT RU allocation */ enum rate_info_bw { RATE_INFO_BW_20 = 0, @@ -1559,6 +1640,8 @@ enum rate_info_bw { RATE_INFO_BW_80, RATE_INFO_BW_160, RATE_INFO_BW_HE_RU, + RATE_INFO_BW_320, + RATE_INFO_BW_EHT_RU, }; /** @@ -1576,6 +1659,9 @@ enum rate_info_bw { * @he_ru_alloc: HE RU allocation (from &enum nl80211_he_ru_alloc, * only valid if bw is %RATE_INFO_BW_HE_RU) * @n_bonded_ch: In case of EDMG the number of bonded channels (1-4) + * @eht_gi: EHT guard interval (from &enum nl80211_eht_gi) + * @eht_ru_alloc: EHT RU allocation (from &enum nl80211_eht_ru_alloc, + * only valid if bw is %RATE_INFO_BW_EHT_RU) */ struct rate_info { u8 flags; @@ -1587,6 +1673,8 @@ struct rate_info { u8 he_dcm; u8 he_ru_alloc; u8 n_bonded_ch; + u8 eht_gi; + u8 eht_ru_alloc; }; /** @@ -2604,7 +2692,7 @@ const struct element *ieee80211_bss_get_elem(struct cfg80211_bss *bss, u8 id); */ static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id) { - return (void *)ieee80211_bss_get_elem(bss, id); + return (const void *)ieee80211_bss_get_elem(bss, id); } @@ -2650,6 +2738,7 @@ struct cfg80211_auth_request { * userspace if this flag is set. Only applicable for cfg80211_connect() * request (connect callback). * @ASSOC_REQ_DISABLE_HE: Disable HE + * @ASSOC_REQ_DISABLE_EHT: Disable EHT */ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HT = BIT(0), @@ -2657,6 +2746,7 @@ enum cfg80211_assoc_req_flags { ASSOC_REQ_USE_RRM = BIT(2), CONNECT_REQ_EXTERNAL_AUTH_SUPPORT = BIT(3), ASSOC_REQ_DISABLE_HE = BIT(4), + ASSOC_REQ_DISABLE_EHT = BIT(5), }; /** @@ -5464,8 +5554,6 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * @conn_owner_nlportid: (private) connection owner socket port ID * @disconnect_wk: (private) auto-disconnect work * @disconnect_bssid: (private) the BSSID to use for auto-disconnect - * @ibss_fixed: (private) IBSS is using fixed BSSID - * @ibss_dfs_possible: (private) IBSS may change to a DFS channel * @event_list: (private) list for internal event processing * @event_lock: (private) lock for event list * @owner_nlportid: (private) owner socket port ID @@ -5514,9 +5602,6 @@ struct wireless_dev { struct cfg80211_chan_def preset_chandef; struct cfg80211_chan_def chandef; - bool ibss_fixed; - bool ibss_dfs_possible; - bool ps; int ps_timeout; @@ -5970,9 +6055,9 @@ cfg80211_find_ie_match(u8 eid, const u8 *ies, unsigned int len, (!match_len && match_offset))) return NULL; - return (void *)cfg80211_find_elem_match(eid, ies, len, - match, match_len, - match_offset ? + return (const void *)cfg80211_find_elem_match(eid, ies, len, + match, match_len, + match_offset ? match_offset - 2 : 0); } @@ -6099,7 +6184,7 @@ static inline const u8 * cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, unsigned int len) { - return (void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len); + return (const void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len); } /** @@ -7921,7 +8006,9 @@ int cfg80211_register_netdevice(struct net_device *dev); */ static inline void cfg80211_unregister_netdevice(struct net_device *dev) { +#if IS_ENABLED(CONFIG_CFG80211) cfg80211_unregister_wdev(dev->ieee80211_ptr); +#endif } /** diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 6ed07844eb24..d8d8719315fd 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -203,8 +203,8 @@ struct wpan_phy { /* PHY depended MAC PIB values */ - /* 802.15.4 acronym: Tdsym in usec */ - u8 symbol_duration; + /* 802.15.4 acronym: Tdsym in nsec */ + u32 symbol_duration; /* lifs and sifs periods timing */ u16 lifs_period; u16 sifs_period; @@ -227,6 +227,16 @@ static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net) write_pnet(&wpan_phy->_net, net); } +/** + * struct ieee802154_addr - IEEE802.15.4 device address + * @mode: Address mode from frame header. Can be one of: + * - @IEEE802154_ADDR_NONE + * - @IEEE802154_ADDR_SHORT + * - @IEEE802154_ADDR_LONG + * @pan_id: The PAN ID this address belongs to + * @short_addr: address if @mode is @IEEE802154_ADDR_SHORT + * @extended_addr: address if @mode is @IEEE802154_ADDR_LONG + */ struct ieee802154_addr { u8 mode; __le16 pan_id; @@ -363,6 +373,7 @@ struct wpan_dev { #define to_phy(_dev) container_of(_dev, struct wpan_phy, dev) +#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN) static inline int wpan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, const struct ieee802154_addr *daddr, @@ -373,6 +384,7 @@ wpan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, return wpan_dev->header_ops->create(skb, dev, daddr, saddr, len); } +#endif struct wpan_phy * wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size); @@ -405,4 +417,6 @@ static inline const char *wpan_phy_name(struct wpan_phy *phy) return dev_name(&phy->dev); } +void ieee802154_configure_durations(struct wpan_phy *phy); + #endif /* __NET_CFG802154_H */ diff --git a/include/net/checksum.h b/include/net/checksum.h index 79c67f14c448..6bc783b7a06c 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -80,6 +80,7 @@ static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend) return csum16_add(csum, ~addend); } +#ifndef HAVE_ARCH_CSUM_SHIFT static __always_inline __wsum csum_shift(__wsum sum, int offset) { /* rotate sum to align it with a 16b boundary */ @@ -87,6 +88,7 @@ static __always_inline __wsum csum_shift(__wsum sum, int offset) return (__force __wsum)ror32((__force u32)sum, 8); return sum; } +#endif static __always_inline __wsum csum_block_add(__wsum csum, __wsum csum2, int offset) diff --git a/include/net/devlink.h b/include/net/devlink.h index 8d5349d2fb68..2a2a2a0c93f7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -22,6 +22,7 @@ #include <linux/firmware.h> struct devlink; +struct devlink_linecard; struct devlink_port_phys_attrs { u32 port_number; /* Same value as "split group". @@ -135,6 +136,7 @@ struct devlink_port { struct mutex reporters_lock; /* Protects reporter_list */ struct devlink_rate *devlink_rate; + struct devlink_linecard *linecard; }; struct devlink_port_new_attrs { @@ -148,6 +150,40 @@ struct devlink_port_new_attrs { sfnum_valid:1; }; +/** + * struct devlink_linecard_ops - Linecard operations + * @provision: callback to provision the linecard slot with certain + * type of linecard. As a result of this operation, + * driver is expected to eventually (could be after + * the function call returns) call one of: + * devlink_linecard_provision_set() + * devlink_linecard_provision_fail() + * @unprovision: callback to unprovision the linecard slot. As a result + * of this operation, driver is expected to eventually + * (could be after the function call returns) call + * devlink_linecard_provision_clear() + * devlink_linecard_provision_fail() + * @same_provision: callback to ask the driver if linecard is already + * provisioned in the same way user asks this linecard to be + * provisioned. + * @types_count: callback to get number of supported types + * @types_get: callback to get next type in list + */ +struct devlink_linecard_ops { + int (*provision)(struct devlink_linecard *linecard, void *priv, + const char *type, const void *type_priv, + struct netlink_ext_ack *extack); + int (*unprovision)(struct devlink_linecard *linecard, void *priv, + struct netlink_ext_ack *extack); + bool (*same_provision)(struct devlink_linecard *linecard, void *priv, + const char *type, const void *type_priv); + unsigned int (*types_count)(struct devlink_linecard *linecard, + void *priv); + void (*types_get)(struct devlink_linecard *linecard, + void *priv, unsigned int index, const char **type, + const void **type_priv); +}; + struct devlink_sb_pool_info { enum devlink_sb_pool_type pool_type; u32 size; @@ -1197,9 +1233,9 @@ struct devlink_ops { struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); - int (*port_split)(struct devlink *devlink, unsigned int port_index, + int (*port_split)(struct devlink *devlink, struct devlink_port *port, unsigned int count, struct netlink_ext_ack *extack); - int (*port_unsplit)(struct devlink *devlink, unsigned int port_index, + int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port, struct netlink_ext_ack *extack); int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index, u16 pool_index, @@ -1479,6 +1515,21 @@ void *devlink_priv(struct devlink *devlink); struct devlink *priv_to_devlink(void *priv); struct device *devlink_to_dev(const struct devlink *devlink); +/* Devlink instance explicit locking */ +void devl_lock(struct devlink *devlink); +void devl_unlock(struct devlink *devlink); +void devl_assert_locked(struct devlink *devlink); +bool devl_lock_is_held(struct devlink *devlink); + +int devl_port_register(struct devlink *devlink, + struct devlink_port *devlink_port, + unsigned int port_index); +void devl_port_unregister(struct devlink_port *devlink_port); + +int devl_rate_leaf_create(struct devlink_port *port, void *priv); +void devl_rate_leaf_destroy(struct devlink_port *devlink_port); +void devl_rate_nodes_destroy(struct devlink *devlink); + struct ib_device; struct net *devlink_net(const struct devlink *devlink); @@ -1521,6 +1572,18 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, int devlink_rate_leaf_create(struct devlink_port *port, void *priv); void devlink_rate_leaf_destroy(struct devlink_port *devlink_port); void devlink_rate_nodes_destroy(struct devlink *devlink); +void devlink_port_linecard_set(struct devlink_port *devlink_port, + struct devlink_linecard *linecard); +struct devlink_linecard * +devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index, + const struct devlink_linecard_ops *ops, void *priv); +void devlink_linecard_destroy(struct devlink_linecard *linecard); +void devlink_linecard_provision_set(struct devlink_linecard *linecard, + const char *type); +void devlink_linecard_provision_clear(struct devlink_linecard *linecard); +void devlink_linecard_provision_fail(struct devlink_linecard *linecard); +void devlink_linecard_activate(struct devlink_linecard *linecard); +void devlink_linecard_deactivate(struct devlink_linecard *linecard); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/net/dsa.h b/include/net/dsa.h index 85a5ba3772f5..14f07275852b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -52,6 +52,7 @@ struct phylink_link_state; #define DSA_TAG_PROTO_BRCM_LEGACY_VALUE 22 #define DSA_TAG_PROTO_SJA1110_VALUE 23 #define DSA_TAG_PROTO_RTL8_4_VALUE 24 +#define DSA_TAG_PROTO_RTL8_4T_VALUE 25 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -79,6 +80,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_SEVILLE = DSA_TAG_PROTO_SEVILLE_VALUE, DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE, DSA_TAG_PROTO_RTL8_4 = DSA_TAG_PROTO_RTL8_4_VALUE, + DSA_TAG_PROTO_RTL8_4T = DSA_TAG_PROTO_RTL8_4T_VALUE, }; struct dsa_switch; @@ -116,6 +118,14 @@ struct dsa_netdevice_ops { #define MODULE_ALIAS_DSA_TAG_DRIVER(__proto) \ MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE)) +struct dsa_lag { + struct net_device *dev; + unsigned int id; + struct mutex fdb_lock; + struct list_head fdbs; + refcount_t refcount; +}; + struct dsa_switch_tree { struct list_head list; @@ -134,7 +144,7 @@ struct dsa_switch_tree { /* Maps offloaded LAG netdevs to a zero-based linear ID for * drivers that need it. */ - struct net_device **lags; + struct dsa_lag **lags; /* Tagging protocol operations */ const struct dsa_device_ops *tag_ops; @@ -163,32 +173,36 @@ struct dsa_switch_tree { unsigned int last_switch; }; +/* LAG IDs are one-based, the dst->lags array is zero-based */ #define dsa_lags_foreach_id(_id, _dst) \ - for ((_id) = 0; (_id) < (_dst)->lags_len; (_id)++) \ - if ((_dst)->lags[(_id)]) + for ((_id) = 1; (_id) <= (_dst)->lags_len; (_id)++) \ + if ((_dst)->lags[(_id) - 1]) #define dsa_lag_foreach_port(_dp, _dst, _lag) \ list_for_each_entry((_dp), &(_dst)->ports, list) \ - if ((_dp)->lag_dev == (_lag)) + if (dsa_port_offloads_lag((_dp), (_lag))) #define dsa_hsr_foreach_port(_dp, _ds, _hsr) \ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds) && (_dp)->hsr_dev == (_hsr)) -static inline struct net_device *dsa_lag_dev(struct dsa_switch_tree *dst, - unsigned int id) +static inline struct dsa_lag *dsa_lag_by_id(struct dsa_switch_tree *dst, + unsigned int id) { - return dst->lags[id]; + /* DSA LAG IDs are one-based, dst->lags is zero-based */ + return dst->lags[id - 1]; } static inline int dsa_lag_id(struct dsa_switch_tree *dst, - struct net_device *lag) + struct net_device *lag_dev) { unsigned int id; dsa_lags_foreach_id(id, dst) { - if (dsa_lag_dev(dst, id) == lag) - return id; + struct dsa_lag *lag = dsa_lag_by_id(dst, id); + + if (lag->dev == lag_dev) + return lag->id; } return -ENODEV; @@ -278,6 +292,10 @@ struct dsa_port { u8 devlink_port_setup:1; + /* Master state bits, valid only on CPU ports */ + u8 master_admin_up:1; + u8 master_oper_up:1; + u8 setup:1; struct device_node *dn; @@ -287,7 +305,7 @@ struct dsa_port { struct devlink_port devlink_port; struct phylink *pl; struct phylink_config pl_config; - struct net_device *lag_dev; + struct dsa_lag *lag; struct net_device *hsr_dev; struct list_head list; @@ -308,6 +326,10 @@ struct dsa_port { struct mutex addr_lists_lock; struct list_head fdbs; struct list_head mdbs; + + /* List of VLANs that CPU and DSA ports are members of. */ + struct mutex vlans_lock; + struct list_head vlans; }; /* TODO: ideally DSA ports would have a single dp->link_dp member, @@ -321,11 +343,34 @@ struct dsa_link { struct list_head list; }; +enum dsa_db_type { + DSA_DB_PORT, + DSA_DB_LAG, + DSA_DB_BRIDGE, +}; + +struct dsa_db { + enum dsa_db_type type; + + union { + const struct dsa_port *dp; + struct dsa_lag lag; + struct dsa_bridge bridge; + }; +}; + struct dsa_mac_addr { unsigned char addr[ETH_ALEN]; u16 vid; refcount_t refcount; struct list_head list; + struct dsa_db db; +}; + +struct dsa_vlan { + u16 vid; + refcount_t refcount; + struct list_head list; }; struct dsa_switch { @@ -377,17 +422,19 @@ struct dsa_switch { */ u32 vlan_filtering:1; - /* MAC PCS does not provide link state change interrupt, and requires - * polling. Flag passed on to PHYLINK. - */ - u32 pcs_poll:1; - /* For switches that only have the MRU configurable. To ensure the * configured MTU is not exceeded, normalization of MRU on all bridged * interfaces is needed. */ u32 mtu_enforcement_ingress:1; + /* Drivers that isolate the FDBs of multiple bridges must set this + * to true to receive the bridge as an argument in .port_fdb_{add,del} + * and .port_mdb_{add,del}. Otherwise, the bridge.num will always be + * passed as zero. + */ + u32 fdb_isolation:1; + /* Listener for switch fabric events */ struct notifier_block nb; @@ -478,6 +525,12 @@ static inline bool dsa_port_is_unused(struct dsa_port *dp) return dp->type == DSA_PORT_TYPE_UNUSED; } +static inline bool dsa_port_master_is_operational(struct dsa_port *dp) +{ + return dsa_port_is_cpu(dp) && dp->master_admin_up && + dp->master_oper_up; +} + static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED; @@ -526,6 +579,10 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) dsa_switch_for_each_port((_dp), (_ds)) \ if (dsa_port_is_cpu((_dp))) +#define dsa_switch_for_each_cpu_port_continue_reverse(_dp, _ds) \ + dsa_switch_for_each_port_continue_reverse((_dp), (_ds)) \ + if (dsa_port_is_cpu((_dp))) + static inline u32 dsa_user_ports(struct dsa_switch *ds) { struct dsa_port *dp; @@ -537,6 +594,17 @@ static inline u32 dsa_user_ports(struct dsa_switch *ds) return mask; } +static inline u32 dsa_cpu_ports(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp; + u32 mask = 0; + + dsa_switch_for_each_cpu_port(cpu_dp, ds) + mask |= BIT(cpu_dp->index); + + return mask; +} + /* Return the local port used to reach an arbitrary switch device */ static inline unsigned int dsa_routing_port(struct dsa_switch *ds, int device) { @@ -581,6 +649,24 @@ static inline bool dsa_is_upstream_port(struct dsa_switch *ds, int port) return port == dsa_upstream_port(ds, port); } +/* Return true if this is a DSA port leading away from the CPU */ +static inline bool dsa_is_downstream_port(struct dsa_switch *ds, int port) +{ + return dsa_is_dsa_port(ds, port) && !dsa_is_upstream_port(ds, port); +} + +/* Return the local port used to reach the CPU port */ +static inline unsigned int dsa_switch_upstream_port(struct dsa_switch *ds) +{ + struct dsa_port *dp; + + dsa_switch_for_each_available_port(dp, ds) { + return dsa_upstream_port(ds, dp->index); + } + + return ds->num_ports; +} + /* Return true if @upstream_ds is an upstream switch of @downstream_ds, meaning * that the routing port from @downstream_ds to @upstream_ds is also the port * which @downstream_ds uses to reach its dedicated CPU. @@ -608,14 +694,30 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) return dp->vlan_filtering; } +static inline unsigned int dsa_port_lag_id_get(struct dsa_port *dp) +{ + return dp->lag ? dp->lag->id : 0; +} + +static inline struct net_device *dsa_port_lag_dev_get(struct dsa_port *dp) +{ + return dp->lag ? dp->lag->dev : NULL; +} + +static inline bool dsa_port_offloads_lag(struct dsa_port *dp, + const struct dsa_lag *lag) +{ + return dsa_port_lag_dev_get(dp) == lag->dev; +} + static inline struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) { if (!dp->bridge) return NULL; - if (dp->lag_dev) - return dp->lag_dev; + if (dp->lag) + return dp->lag->dev; else if (dp->hsr_dev) return dp->hsr_dev; @@ -705,7 +807,7 @@ struct dsa_switch_ops { enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol mprot); - int (*change_tag_protocol)(struct dsa_switch *ds, int port, + int (*change_tag_protocol)(struct dsa_switch *ds, enum dsa_tag_protocol proto); /* * Method for switch drivers to connect to the tagging protocol driver @@ -750,6 +852,9 @@ struct dsa_switch_ops { void (*phylink_validate)(struct dsa_switch *ds, int port, unsigned long *supported, struct phylink_link_state *state); + struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds, + int port, + phy_interface_t iface); int (*phylink_mac_link_state)(struct dsa_switch *ds, int port, struct phylink_link_state *state); void (*phylink_mac_config)(struct dsa_switch *ds, int port, @@ -803,6 +908,18 @@ struct dsa_switch_ops { struct ethtool_ts_info *ts); /* + * DCB ops + */ + int (*port_get_default_prio)(struct dsa_switch *ds, int port); + int (*port_set_default_prio)(struct dsa_switch *ds, int port, + u8 prio); + int (*port_get_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp); + int (*port_add_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, + u8 prio); + int (*port_del_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, + u8 prio); + + /* * Suspend and resume */ int (*suspend)(struct dsa_switch *ds); @@ -849,18 +966,24 @@ struct dsa_switch_ops { int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload); + bool *tx_fwd_offload, + struct netlink_ext_ack *extack); void (*port_bridge_leave)(struct dsa_switch *ds, int port, struct dsa_bridge bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); + int (*port_mst_state_set)(struct dsa_switch *ds, int port, + const struct switchdev_mst_state *state); void (*port_fast_age)(struct dsa_switch *ds, int port); + int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid); int (*port_pre_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); int (*port_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); + void (*port_set_host_flood)(struct dsa_switch *ds, int port, + bool uc, bool mc); /* * VLAN support @@ -873,23 +996,36 @@ struct dsa_switch_ops { struct netlink_ext_ack *extack); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); + int (*vlan_msti_set)(struct dsa_switch *ds, struct dsa_bridge bridge, + const struct switchdev_vlan_msti *msti); + /* * Forwarding database */ int (*port_fdb_add)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + struct dsa_db db); int (*port_fdb_del)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + struct dsa_db db); int (*port_fdb_dump)(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data); + int (*lag_fdb_add)(struct dsa_switch *ds, struct dsa_lag lag, + const unsigned char *addr, u16 vid, + struct dsa_db db); + int (*lag_fdb_del)(struct dsa_switch *ds, struct dsa_lag lag, + const unsigned char *addr, u16 vid, + struct dsa_db db); /* * Multicast database */ int (*port_mdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); int (*port_mdb_del)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); /* * RXNFC */ @@ -909,7 +1045,7 @@ struct dsa_switch_ops { struct flow_cls_offload *cls, bool ingress); int (*port_mirror_add)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, - bool ingress); + bool ingress, struct netlink_ext_ack *extack); void (*port_mirror_del)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); int (*port_policer_add)(struct dsa_switch *ds, int port, @@ -923,17 +1059,18 @@ struct dsa_switch_ops { */ int (*crosschip_bridge_join)(struct dsa_switch *ds, int tree_index, int sw_index, int port, - struct dsa_bridge bridge); + struct dsa_bridge bridge, + struct netlink_ext_ack *extack); void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, struct dsa_bridge bridge); int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, int port); int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, - int port, struct net_device *lag, + int port, struct dsa_lag lag, struct netdev_lag_upper_info *info); int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index, - int port, struct net_device *lag); + int port, struct dsa_lag lag); /* * PTP functionality @@ -1005,10 +1142,10 @@ struct dsa_switch_ops { */ int (*port_lag_change)(struct dsa_switch *ds, int port); int (*port_lag_join)(struct dsa_switch *ds, int port, - struct net_device *lag, + struct dsa_lag lag, struct netdev_lag_upper_info *info); int (*port_lag_leave)(struct dsa_switch *ds, int port, - struct net_device *lag); + struct dsa_lag lag); /* * HSR integration @@ -1036,6 +1173,13 @@ struct dsa_switch_ops { int (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags); int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid); + + /* + * DSA master tracking operations + */ + void (*master_state_change)(struct dsa_switch *ds, + const struct net_device *master, + bool operational); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ @@ -1112,6 +1256,13 @@ struct dsa_switch_driver { struct net_device *dsa_dev_to_net_device(struct device *dev); +bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db); +bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); + /* Keep inline for faster access in hot path */ static inline bool netdev_uses_dsa(const struct net_device *dev) { @@ -1212,9 +1363,6 @@ static inline bool dsa_slave_dev_check(const struct net_device *dev) #endif netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev); -int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data); -int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data); -int dsa_port_get_phy_sset_count(struct dsa_port *dp); void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up); struct dsa_tag_driver { @@ -1247,7 +1395,7 @@ module_exit(dsa_tag_driver_module_exit) /** * module_dsa_tag_drivers() - Helper macro for registering DSA tag * drivers - * @__ops_array: Array of tag driver strucutres + * @__ops_array: Array of tag driver structures * * Helper macro for DSA tag drivers which do not do anything special * in module init/exit. Each module may only use this macro once, and diff --git a/include/net/esp.h b/include/net/esp.h index 90cd02ff77ef..9c5637d41d95 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -4,8 +4,6 @@ #include <linux/skbuff.h> -#define ESP_SKB_FRAG_MAXSIZE (PAGE_SIZE << SKB_FRAG_PAGE_ORDER) - struct ip_esp_hdr; static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb) diff --git a/include/net/flow.h b/include/net/flow.h index 58beb16a49b8..987bd511d652 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -29,6 +29,7 @@ struct flowi_tunnel { struct flowi_common { int flowic_oif; int flowic_iif; + int flowic_l3mdev; __u32 flowic_mark; __u8 flowic_tos; __u8 flowic_scope; @@ -36,7 +37,6 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 -#define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; kuid_t flowic_uid; struct flowi_tunnel flowic_tun_key; @@ -70,6 +70,7 @@ struct flowi4 { struct flowi_common __fl_common; #define flowi4_oif __fl_common.flowic_oif #define flowi4_iif __fl_common.flowic_iif +#define flowi4_l3mdev __fl_common.flowic_l3mdev #define flowi4_mark __fl_common.flowic_mark #define flowi4_tos __fl_common.flowic_tos #define flowi4_scope __fl_common.flowic_scope @@ -102,6 +103,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; + fl4->flowi4_l3mdev = 0; fl4->flowi4_mark = mark; fl4->flowi4_tos = tos; fl4->flowi4_scope = scope; @@ -132,6 +134,7 @@ struct flowi6 { struct flowi_common __fl_common; #define flowi6_oif __fl_common.flowic_oif #define flowi6_iif __fl_common.flowic_iif +#define flowi6_l3mdev __fl_common.flowic_l3mdev #define flowi6_mark __fl_common.flowic_mark #define flowi6_scope __fl_common.flowic_scope #define flowi6_proto __fl_common.flowic_proto @@ -177,6 +180,7 @@ struct flowi { } u; #define flowi_oif u.__fl_common.flowic_oif #define flowi_iif u.__fl_common.flowic_iif +#define flowi_l3mdev u.__fl_common.flowic_l3mdev #define flowi_mark u.__fl_common.flowic_mark #define flowi_tos u.__fl_common.flowic_tos #define flowi_scope u.__fl_common.flowic_scope diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index aa33e1092e2c..a4c6057c7097 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -59,6 +59,8 @@ struct flow_dissector_key_vlan { __be16 vlan_tci; }; __be16 vlan_tpid; + __be16 vlan_eth_type; + u16 padding; }; struct flow_dissector_mpls_lse { @@ -251,6 +253,14 @@ struct flow_dissector_key_hash { u32 hash; }; +/** + * struct flow_dissector_key_num_of_vlans: + * @num_of_vlans: num_of_vlans value + */ +struct flow_dissector_key_num_of_vlans { + u8 num_of_vlans; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -280,6 +290,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */ FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */ FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */ + FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 5b8c54eb7a6b..6484095a8c01 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -148,6 +148,10 @@ enum flow_action_id { FLOW_ACTION_MPLS_MANGLE, FLOW_ACTION_GATE, FLOW_ACTION_PPPOE_PUSH, + FLOW_ACTION_JUMP, + FLOW_ACTION_PIPE, + FLOW_ACTION_VLAN_PUSH_ETH, + FLOW_ACTION_VLAN_POP_ETH, NUM_FLOW_ACTIONS, }; @@ -209,6 +213,10 @@ struct flow_action_entry { __be16 proto; u8 prio; } vlan; + struct { /* FLOW_ACTION_VLAN_PUSH_ETH */ + unsigned char dst[ETH_ALEN]; + unsigned char src[ETH_ALEN]; + } vlan_push_eth; struct { /* FLOW_ACTION_MANGLE */ /* FLOW_ACTION_ADD */ enum flow_action_mangle_base htype; @@ -235,9 +243,16 @@ struct flow_action_entry { struct { /* FLOW_ACTION_POLICE */ u32 burst; u64 rate_bytes_ps; + u64 peakrate_bytes_ps; + u32 avrate; + u16 overhead; u64 burst_pkt; u64 rate_pkt_ps; u32 mtu; + struct { + enum flow_action_id act_id; + u32 extval; + } exceed, notexceed; } police; struct { /* FLOW_ACTION_CT */ int action; @@ -302,6 +317,12 @@ static inline bool flow_offload_has_one_action(const struct flow_action *action) return action->num_entries == 1; } +static inline bool flow_action_is_last_entry(const struct flow_action *action, + const struct flow_action_entry *entry) +{ + return entry == &action->entries[action->num_entries - 1]; +} + #define flow_action_for_each(__i, __act, __actions) \ for (__i = 0, __act = &(__actions)->entries[0]; \ __i < (__actions)->num_entries; \ @@ -591,5 +612,6 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)); +bool flow_indr_dev_exists(void); #endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/gro.h b/include/net/gro.h index 8f75802d50fd..867656b0739c 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -29,46 +29,51 @@ struct napi_gro_cb { /* Number of segments aggregated. */ u16 count; - /* Start offset for remote checksum offload */ - u16 gro_remcsum_start; + /* Used in ipv6_gro_receive() and foo-over-udp */ + u16 proto; /* jiffies when first packet was created/queued */ unsigned long age; - /* Used in ipv6_gro_receive() and foo-over-udp */ - u16 proto; +/* Used in napi_gro_cb::free */ +#define NAPI_GRO_FREE 1 +#define NAPI_GRO_FREE_STOLEN_HEAD 2 + /* portion of the cb set to zero at every gro iteration */ + struct_group(zeroed, - /* This is non-zero if the packet may be of the same flow. */ - u8 same_flow:1; + /* Start offset for remote checksum offload */ + u16 gro_remcsum_start; - /* Used in tunnel GRO receive */ - u8 encap_mark:1; + /* This is non-zero if the packet may be of the same flow. */ + u8 same_flow:1; - /* GRO checksum is valid */ - u8 csum_valid:1; + /* Used in tunnel GRO receive */ + u8 encap_mark:1; - /* Number of checksums via CHECKSUM_UNNECESSARY */ - u8 csum_cnt:3; + /* GRO checksum is valid */ + u8 csum_valid:1; - /* Free the skb? */ - u8 free:2; -#define NAPI_GRO_FREE 1 -#define NAPI_GRO_FREE_STOLEN_HEAD 2 + /* Number of checksums via CHECKSUM_UNNECESSARY */ + u8 csum_cnt:3; + + /* Free the skb? */ + u8 free:2; - /* Used in foo-over-udp, set in udp[46]_gro_receive */ - u8 is_ipv6:1; + /* Used in foo-over-udp, set in udp[46]_gro_receive */ + u8 is_ipv6:1; - /* Used in GRE, set in fou/gue_gro_receive */ - u8 is_fou:1; + /* Used in GRE, set in fou/gue_gro_receive */ + u8 is_fou:1; - /* Used to determine if flush_id can be ignored */ - u8 is_atomic:1; + /* Used to determine if flush_id can be ignored */ + u8 is_atomic:1; - /* Number of gro_receive callbacks this packet already went through */ - u8 recursion_counter:4; + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; - /* GRO is done by frag_list pointer chaining. */ - u8 is_flist:1; + /* GRO is done by frag_list pointer chaining. */ + u8 is_flist:1; + ); /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; diff --git a/include/net/gtp.h b/include/net/gtp.h index 0e16ebb2a82d..c1d6169df331 100644 --- a/include/net/gtp.h +++ b/include/net/gtp.h @@ -7,8 +7,13 @@ #define GTP0_PORT 3386 #define GTP1U_PORT 2152 +/* GTP messages types */ +#define GTP_ECHO_REQ 1 /* Echo Request */ +#define GTP_ECHO_RSP 2 /* Echo Response */ #define GTP_TPDU 255 +#define GTPIE_RECOVERY 14 + struct gtp0_header { /* According to GSM TS 09.60. */ __u8 flags; __u8 type; @@ -27,6 +32,43 @@ struct gtp1_header { /* According to 3GPP TS 29.060. */ __be32 tid; } __attribute__ ((packed)); +struct gtp1_header_long { /* According to 3GPP TS 29.060. */ + __u8 flags; + __u8 type; + __be16 length; + __be32 tid; + __be16 seq; + __u8 npdu; + __u8 next; +} __packed; + +/* GTP Information Element */ +struct gtp_ie { + __u8 tag; + __u8 val; +} __packed; + +struct gtp0_packet { + struct gtp0_header gtp0_h; + struct gtp_ie ie; +} __packed; + +struct gtp1u_packet { + struct gtp1_header_long gtp1u_h; + struct gtp_ie ie; +} __packed; + +struct gtp_pdu_session_info { /* According to 3GPP TS 38.415. */ + u8 pdu_type; + u8 qfi; +}; + +static inline bool netif_is_gtp(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "gtp"); +} + #define GTP1_F_NPDU 0x01 #define GTP1_F_SEQ 0x02 #define GTP1_F_EXTHDR 0x04 diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 11630351c978..598f53d2a3a0 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2017 Intel Deutschland GmbH - * Copyright (c) 2018-2019 Intel Corporation + * Copyright (c) 2018-2019, 2021 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -365,7 +365,7 @@ enum ieee80211_radiotap_zero_len_psdu_type { */ static inline u16 ieee80211_get_radiotap_len(const char *data) { - struct ieee80211_radiotap_header *hdr = (void *)data; + const struct ieee80211_radiotap_header *hdr = (const void *)data; return get_unaligned_le16(&hdr->it_len); } diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f026cf08a8e8..c8490729b4ae 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -64,6 +64,14 @@ struct inet6_ifaddr { struct hlist_node addr_lst; struct list_head if_list; + /* + * Used to safely traverse idev->addr_list in process context + * if the idev->lock needed to protect idev->addr_list cannot be held. + * In that case, add the items to this list temporarily and iterate + * without holding idev->lock. + * See addrconf_ifdown and dev_forward_change. + */ + struct list_head if_list_aux; struct list_head tmp_list; struct inet6_ifaddr *ifpub; @@ -71,6 +79,8 @@ struct inet6_ifaddr { bool tokenized; + u8 ifa_proto; + struct rcu_head rcu; struct in6_addr peer_addr; }; diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 81b965953036..f259e1ae14ba 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -103,15 +103,25 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, const int dif); int inet6_hash(struct sock *sk); -#endif /* IS_ENABLED(CONFIG_IPV6) */ -#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ - ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ - (((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ - net_eq(sock_net(__sk), (__net))) +static inline bool inet6_match(struct net *net, const struct sock *sk, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + const __portpair ports, + const int dif, const int sdif) +{ + int bound_dev_if; + + if (!net_eq(sock_net(sk), net) || + sk->sk_family != AF_INET6 || + sk->sk_portpair != ports || + !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || + !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return false; + + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + return bound_dev_if == dif || bound_dev_if == sdif; +} +#endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* _INET6_HASHTABLES_H */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 4ad47d9f9d27..85cd695e7fd1 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -66,7 +66,6 @@ struct inet_connection_sock_af_ops { * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data * @icsk_clean_acked Clean acked data hook - * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -96,7 +95,6 @@ struct inet_connection_sock { const struct tcp_ulp_ops *icsk_ulp_ops; void __rcu *icsk_ulp_data; void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq); - struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:5, icsk_ca_initialized:1, @@ -285,6 +283,14 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); +static inline unsigned long +reqsk_timeout(struct request_sock *req, unsigned long max_timeout) +{ + u64 timeout = (u64)req->timeout << req->num_timeout; + + return (unsigned long)min_t(u64, timeout, max_timeout); +} + static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) { /* The below has to be done to allow calling inet_csk_destroy_sock */ diff --git a/include/net/inet_dscp.h b/include/net/inet_dscp.h new file mode 100644 index 000000000000..72f250dffada --- /dev/null +++ b/include/net/inet_dscp.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * inet_dscp.h: helpers for handling differentiated services codepoints (DSCP) + * + * DSCP is defined in RFC 2474: + * + * 0 1 2 3 4 5 6 7 + * +---+---+---+---+---+---+---+---+ + * | DSCP | CU | + * +---+---+---+---+---+---+---+---+ + * + * DSCP: differentiated services codepoint + * CU: currently unused + * + * The whole DSCP + CU bits form the DS field. + * The DS field is also commonly called TOS or Traffic Class (for IPv6). + * + * Note: the CU bits are now used for Explicit Congestion Notification + * (RFC 3168). + */ + +#ifndef _INET_DSCP_H +#define _INET_DSCP_H + +#include <linux/types.h> + +/* Special type for storing DSCP values. + * + * A dscp_t variable stores a DS field with the CU (ECN) bits cleared. + * Using dscp_t allows to strictly separate DSCP and ECN bits, thus avoiding + * bugs where ECN bits are erroneously taken into account during FIB lookups + * or policy routing. + * + * Note: to get the real DSCP value contained in a dscp_t variable one would + * have to do a bit shift after calling inet_dscp_to_dsfield(). We could have + * a helper for that, but there's currently no users. + */ +typedef u8 __bitwise dscp_t; + +#define INET_DSCP_MASK 0xfc + +static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield) +{ + return (__force dscp_t)(dsfield & INET_DSCP_MASK); +} + +static inline __u8 inet_dscp_to_dsfield(dscp_t dscp) +{ + return (__force __u8)dscp; +} + +static inline bool inet_validate_dscp(__u8 val) +{ + return !(val & ~INET_DSCP_MASK); +} + +#endif /* _INET_DSCP_H */ diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 63540be0fc34..911ad930867d 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -70,6 +70,7 @@ struct frag_v6_compare_key { * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far + * @mono_delivery_time: stamp has a mono delivery time (EDT) * @flags: fragment queue flags * @max_size: maximum received fragment size * @fqdir: pointer to struct fqdir @@ -90,6 +91,7 @@ struct inet_frag_queue { ktime_t stamp; int len; int meat; + u8 mono_delivery_time; __u8 flags; u16 max_size; struct fqdir *fqdir; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f72ec113ae56..ebfa3df6f8dc 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -111,11 +111,7 @@ struct inet_bind_hashbucket { #define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; - unsigned int count; - union { - struct hlist_head head; - struct hlist_nulls_head nulls_head; - }; + struct hlist_nulls_head nulls_head; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ @@ -143,32 +139,8 @@ struct inet_hashinfo { /* The 2nd listener table hashed by local port and address */ unsigned int lhash2_mask; struct inet_listen_hashbucket *lhash2; - - /* All the above members are written once at bootup and - * never written again _or_ are predominantly read-access. - * - * Now align to a new cache line as all the following members - * might be often dirty. - */ - /* All sockets in TCP_LISTEN state will be in listening_hash. - * This is the only table where wildcard'd TCP sockets can - * exist. listening_hash is only hashed by local port number. - * If lhash2 is initialized, the same socket will also be hashed - * to lhash2 by port and address. - */ - struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] - ____cacheline_aligned_in_smp; }; -#define inet_lhash2_for_each_icsk_continue(__icsk) \ - hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node) - -#define inet_lhash2_for_each_icsk(__icsk, list) \ - hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node) - -#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ - hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) - static inline struct inet_listen_hashbucket * inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash) { @@ -230,23 +202,11 @@ static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum); -/* These can have wildcards, don't try too hard. */ -static inline u32 inet_lhashfn(const struct net *net, const unsigned short num) -{ - return (num + net_hash_mix(net)) & (INET_LHTABLE_SIZE - 1); -} - -static inline int inet_sk_listen_hashfn(const struct sock *sk) -{ - return inet_lhashfn(sock_net(sk), inet_sk(sk)->inet_num); -} - /* Caller must disable local BH processing. */ int __inet_inherit_port(const struct sock *sk, struct sock *child); void inet_put_port(struct sock *sk); -void inet_hashinfo_init(struct inet_hashinfo *h); void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, @@ -295,7 +255,6 @@ static inline struct sock *inet_lookup_listener(struct net *net, ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport))) #endif -#if (BITS_PER_LONG == 64) #ifdef __BIG_ENDIAN #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const __addrpair __name = (__force __addrpair) ( \ @@ -307,24 +266,22 @@ static inline struct sock *inet_lookup_listener(struct net *net, (((__force __u64)(__be32)(__daddr)) << 32) | \ ((__force __u64)(__be32)(__saddr))) #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_addrpair == (__cookie)) && \ - (((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ - net_eq(sock_net(__sk), (__net))) -#else /* 32-bit arch */ -#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ - const int __name __deprecated __attribute__((unused)) -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_daddr == (__saddr)) && \ - ((__sk)->sk_rcv_saddr == (__daddr)) && \ - (((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ - net_eq(sock_net(__sk), (__net))) -#endif /* 64-bit arch */ +static inline bool inet_match(struct net *net, const struct sock *sk, + const __addrpair cookie, const __portpair ports, + int dif, int sdif) +{ + int bound_dev_if; + + if (!net_eq(sock_net(sk), net) || + sk->sk_portpair != ports || + sk->sk_addrpair != cookie) + return false; + + /* Paired with WRITE_ONCE() from sock_bindtoindex_locked() */ + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + return bound_dev_if == dif || bound_dev_if == sdif; +} /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need * not check it for lookups anymore, thanks Alexey. -DaveM @@ -425,7 +382,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) } int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 234d70ae5f4c..daead5fb389a 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -116,14 +116,15 @@ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) static inline int inet_request_bound_dev_if(const struct sock *sk, struct sk_buff *skb) { + int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) + if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) return l3mdev_master_ifindex_by_index(net, skb->skb_iif); #endif - return sk->sk_bound_dev_if; + return bound_dev_if; } static inline int inet_sk_bound_l3mdev(const struct sock *sk) @@ -252,6 +253,11 @@ struct inet_sock { #define IP_CMSG_CHECKSUM BIT(7) #define IP_CMSG_RECVFRAGSIZE BIT(8) +static inline bool sk_is_inet(struct sock *sk) +{ + return sk->sk_family == AF_INET || sk->sk_family == AF_INET6; +} + /** * sk_to_full_sk - Access to a full socket * @sk: pointer to a socket diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index dfd919b3119e..5b47545f22d3 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -65,10 +65,9 @@ struct inet_timewait_sock { /* these three are in inet_sock */ __be16 tw_sport; /* And these are ours. */ - unsigned int tw_kill : 1, - tw_transparent : 1, + unsigned int tw_transparent : 1, tw_flowlabel : 20, - tw_pad : 2, /* 2 bits hole */ + tw_pad : 3, /* 3 bits hole */ tw_tos : 8; u32 tw_txhash; u32 tw_priority; diff --git a/include/net/ioam6.h b/include/net/ioam6.h index 3f45ba37a2c6..781d2d8b2f29 100644 --- a/include/net/ioam6.h +++ b/include/net/ioam6.h @@ -35,7 +35,7 @@ struct ioam6_schema { int len; __be32 hdr; - u8 data[0]; + u8 data[]; }; struct ioam6_pernet_data { diff --git a/include/net/ip.h b/include/net/ip.h index b51bae43b0dd..26fffda78cca 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -56,6 +56,7 @@ struct inet_skb_parm { #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) #define IPSKB_L3SLAVE BIT(7) +#define IPSKB_NOPOLICY BIT(8) u16 frag_max_size; }; @@ -93,7 +94,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm->sockc.mark = inet->sk.sk_mark; ipcm->sockc.tsflags = inet->sk.sk_tsflags; - ipcm->oif = inet->sk.sk_bound_dev_if; + ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; } @@ -517,7 +518,6 @@ void ip_dst_metrics_put(struct dst_entry *dst) kfree(p); } -u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, @@ -712,7 +712,7 @@ int ip_forward(struct sk_buff *skb); */ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, - __be32 daddr, struct rtable *rt, int is_frag); + __be32 daddr, struct rtable *rt); int __ip_options_echo(struct net *net, struct ip_options *dopt, struct sk_buff *skb, const struct ip_options *sopt); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 2048bc8748cb..6268963d9599 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -369,9 +369,8 @@ struct rt6_statistics { __u32 fib_rt_cache; /* cached rt entries in exception table */ __u32 fib_discarded_routes; /* total number of routes delete */ - /* The following stats are not protected by any lock */ + /* The following stat is not protected by any lock */ atomic_t fib_rt_alloc; /* total number of routes alloced */ - atomic_t fib_rt_uncache; /* rt entries in uncached list */ }; #define RTN_TL_ROOT 0x0001 diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index a38c4f1e4e5c..74b369bddf49 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -58,7 +58,7 @@ struct ip6_tnl { /* These fields used only by GRE */ __u32 i_seqno; /* The last seen seqno */ - __u32 o_seqno; /* The last output seqno */ + atomic_t o_seqno; /* The last output seqno */ int hlen; /* tun_hlen + encap_hlen */ int tun_hlen; /* Precalculated header length */ int encap_hlen; /* Encap header length (FOU,GUE) */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c4297704bbcb..a378eff827c7 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -17,6 +17,7 @@ #include <linux/rcupdate.h> #include <net/fib_notifier.h> #include <net/fib_rules.h> +#include <net/inet_dscp.h> #include <net/inetpeer.h> #include <linux/percpu.h> #include <linux/notifier.h> @@ -24,7 +25,7 @@ struct fib_config { u8 fc_dst_len; - u8 fc_tos; + dscp_t fc_dscp; u8 fc_protocol; u8 fc_scope; u8 fc_type; @@ -211,7 +212,7 @@ struct fib_rt_info { u32 tb_id; __be32 dst; int dst_len; - u8 tos; + dscp_t dscp; u8 type; u8 offload:1, trap:1, @@ -224,7 +225,7 @@ struct fib_entry_notifier_info { u32 dst; int dst_len; struct fib_info *fi; - u8 tos; + dscp_t dscp; u8 type; u32 tb_id; }; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 0219fe907b26..c24fa934221d 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -116,7 +116,7 @@ struct ip_tunnel { /* These four fields used only by GRE */ u32 i_seqno; /* The last seen seqno */ - u32 o_seqno; /* The last output seqno */ + atomic_t o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ /* These four fields used only by ERSPAN */ @@ -243,11 +243,18 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id) static inline void ip_tunnel_init_flow(struct flowi4 *fl4, int proto, __be32 daddr, __be32 saddr, - __be32 key, __u8 tos, int oif, + __be32 key, __u8 tos, + struct net *net, int oif, __u32 mark, __u32 tun_inner_hash) { memset(fl4, 0, sizeof(*fl4)); - fl4->flowi4_oif = oif; + + if (oif) { + fl4->flowi4_l3mdev = l3mdev_master_upper_ifindex_by_index_rcu(net, oif); + /* Legacy VRF/l3mdev use case */ + fl4->flowi4_oif = fl4->flowi4_l3mdev ? 0 : oif; + } + fl4->daddr = daddr; fl4->saddr = saddr; fl4->flowi4_tos = tos; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 92eec13d1693..de9dcc5652c4 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -15,9 +15,9 @@ #include <linux/refcount.h> #include <linux/jump_label_ratelimit.h> #include <net/if_inet6.h> -#include <net/ndisc.h> #include <net/flow.h> #include <net/flow_dissector.h> +#include <net/inet_dscp.h> #include <net/snmp.h> #include <net/netns/hash.h> @@ -151,6 +151,17 @@ struct frag_hdr { __be32 identification; }; +/* + * Jumbo payload option, as described in RFC 2675 2. + */ +struct hop_jumbo_hdr { + u8 nexthdr; + u8 hdrlen; + u8 tlv_type; /* IPV6_TLV_JUMBO, 0xC2 */ + u8 tlv_len; /* 4 */ + __be32 jumbo_payload_len; +}; + #define IP6_MF 0x0001 #define IP6_OFFSET 0xFFF8 @@ -440,14 +451,55 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, int newtype, struct ipv6_opt_hdr *newopt); -struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, - struct ipv6_txoptions *opt); +struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space, + struct ipv6_txoptions *opt); + +static inline struct ipv6_txoptions * +ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt) +{ + if (!opt) + return NULL; + return __ipv6_fixup_options(opt_space, opt); +} bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct inet6_skb_parm *opt); struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt); +/* This helper is specialized for BIG TCP needs. + * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. + * It assumes headers are already in skb->head. + * Returns 0, or IPPROTO_TCP if a BIG TCP packet is there. + */ +static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb) +{ + const struct hop_jumbo_hdr *jhdr; + const struct ipv6hdr *nhdr; + + if (likely(skb->len <= GRO_LEGACY_MAX_SIZE)) + return 0; + + if (skb->protocol != htons(ETH_P_IPV6)) + return 0; + + if (skb_network_offset(skb) + + sizeof(struct ipv6hdr) + + sizeof(struct hop_jumbo_hdr) > skb_headlen(skb)) + return 0; + + nhdr = ipv6_hdr(skb); + + if (nhdr->nexthdr != NEXTHDR_HOP) + return 0; + + jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1); + if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 || + jhdr->nexthdr != IPPROTO_TCP) + return 0; + return jhdr->nexthdr; +} + static inline bool ipv6_accept_ra(struct inet6_dev *idev) { /* If forwarding is enabled, RA are not accepted unless the special @@ -970,6 +1022,11 @@ static inline u8 ip6_tclass(__be32 flowinfo) return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT; } +static inline dscp_t ip6_dscp(__be32 flowinfo) +{ + return inet_dsfield_to_dscp(ip6_tclass(flowinfo)); +} + static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel) { return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel; @@ -1006,7 +1063,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr); int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, + void *from, size_t length, int transhdrlen, struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags); @@ -1022,8 +1079,8 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue, struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - struct ipcm6_cookie *ipc6, struct flowi6 *fl6, + void *from, size_t length, int transhdrlen, + struct ipcm6_cookie *ipc6, struct rt6_info *rt, unsigned int flags, struct inet_cork_full *cork); diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 0a4779175a52..5052c66e22d2 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _IPV6_FRAG_H #define _IPV6_FRAG_H +#include <linux/icmpv6.h> #include <linux/kernel.h> #include <net/addrconf.h> #include <net/ipv6.h> diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c50221d7e82c..ebadb2103968 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7,7 +7,7 @@ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2021 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation */ #ifndef MAC80211_H @@ -514,7 +514,6 @@ struct ieee80211_fils_discovery { * to that BSS) that can change during the lifetime of the BSS. * * @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE - * @multi_sta_back_32bit: supports BA bitmap of 32-bits in Multi-STA BACK * @uora_exists: is the UORA element advertised by AP * @ack_enabled: indicates support to receive a multi-TID that solicits either * ACK, BACK or both @@ -636,6 +635,7 @@ struct ieee80211_fils_discovery { * @tx_pwr_env: transmit power envelope array of BSS. * @tx_pwr_env_num: number of @tx_pwr_env. * @pwr_reduction: power constraint of BSS. + * @eht_support: does this BSS support EHT */ struct ieee80211_bss_conf { const u8 *bssid; @@ -710,6 +710,7 @@ struct ieee80211_bss_conf { struct ieee80211_tx_pwr_env tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; u8 tx_pwr_env_num; u8 pwr_reduction; + bool eht_support; }; /** @@ -883,6 +884,17 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_DONT_REORDER = BIT(8), }; +/** + * enum mac80211_tx_status_flags - flags to describe transmit status + * + * @IEEE80211_TX_STATUS_ACK_SIGNAL_VALID: ACK signal is valid + * + * These flags are used in tx_info->status.flags. + */ +enum mac80211_tx_status_flags { + IEEE80211_TX_STATUS_ACK_SIGNAL_VALID = BIT(0), +}; + /* * This definition is used as a mask to clear all temporary flags, which are * set by the tx handlers for each transmission attempt by the mac80211 stack. @@ -1046,7 +1058,7 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) * @status.antenna: (legacy, kept only for iwlegacy) * @status.tx_time: airtime consumed for transmission; note this is only * used for WMM AC, not for airtime fairness - * @status.is_valid_ack_signal: ACK signal is valid + * @status.flags: status flags, see &enum mac80211_tx_status_flags * @status.status_driver_data: driver use area * @ack: union part for pure ACK data * @ack.cookie: cookie for the ACK @@ -1099,8 +1111,8 @@ struct ieee80211_tx_info { u8 ampdu_len; u8 antenna; u16 tx_time; - bool is_valid_ack_signal; - void *status_driver_data[19 / sizeof(void *)]; + u8 flags; + void *status_driver_data[18 / sizeof(void *)]; } status; struct { struct ieee80211_tx_rate driver_rates[ @@ -1131,20 +1143,41 @@ ieee80211_info_get_tx_time_est(struct ieee80211_tx_info *info) return info->tx_time_est << 2; } +/*** + * struct ieee80211_rate_status - mrr stage for status path + * + * This struct is used in struct ieee80211_tx_status to provide drivers a + * dynamic way to report about used rates and power levels per packet. + * + * @rate_idx The actual used rate. + * @try_count How often the rate was tried. + * @tx_power_idx An idx into the ieee80211_hw->tx_power_levels list of the + * corresponding wifi hardware. The idx shall point to the power level + * that was used when sending the packet. + */ +struct ieee80211_rate_status { + struct rate_info rate_idx; + u8 try_count; + u8 tx_power_idx; +}; + /** * struct ieee80211_tx_status - extended tx status info for rate control * * @sta: Station that the packet was transmitted for * @info: Basic tx status information * @skb: Packet skb (can be NULL if not provided by the driver) - * @rate: The TX rate that was used when sending the packet + * @rates: Mrr stages that were used when sending the packet + * @n_rates: Number of mrr stages (count of instances for @rates) * @free_list: list where processed skbs are stored to be free'd by the driver */ struct ieee80211_tx_status { struct ieee80211_sta *sta; struct ieee80211_tx_info *info; struct sk_buff *skb; - struct rate_info *rate; + struct ieee80211_rate_status *rates; + u8 n_rates; + struct list_head *free_list; }; @@ -1188,9 +1221,9 @@ static inline struct ieee80211_rx_status *IEEE80211_SKB_RXCB(struct sk_buff *skb * in the TX status but the rate control information (it does clear * the count since you need to fill that in anyway). * - * NOTE: You can only use this function if you do NOT use - * info->driver_data! Use info->rate_driver_data - * instead if you need only the less space that allows. + * NOTE: While the rates array is kept intact, this will wipe all of the + * driver_data fields in info, so it's up to the driver to restore + * any fields it needs after calling this helper. */ static inline void ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) @@ -1688,7 +1721,7 @@ enum ieee80211_offload_flags { * these need to be set (or cleared) when the interface is added * or, if supported by the driver, the interface type is changed * at runtime, mac80211 will never touch this field - * @offloaad_flags: hardware offload capabilities/flags for this interface. + * @offload_flags: hardware offload capabilities/flags for this interface. * These are initialized by mac80211 before calling .add_interface, * .change_interface or .update_vif_offload and updated by the driver * within these ops, based on supported features or runtime change @@ -1994,6 +2027,7 @@ enum ieee80211_sta_state { * @IEEE80211_STA_RX_BW_80: station can receive up to 80 MHz * @IEEE80211_STA_RX_BW_160: station can receive up to 160 MHz * (including 80+80 MHz) + * @IEEE80211_STA_RX_BW_320: station can receive up to 320 MHz * * Implementation note: 20 must be zero to be initialized * correctly, the values must be sorted. @@ -2003,6 +2037,7 @@ enum ieee80211_sta_rx_bandwidth { IEEE80211_STA_RX_BW_40, IEEE80211_STA_RX_BW_80, IEEE80211_STA_RX_BW_160, + IEEE80211_STA_RX_BW_320, }; /** @@ -2041,6 +2076,45 @@ struct ieee80211_sta_txpwr { enum nl80211_tx_power_setting type; }; +#define MAX_STA_LINKS 15 + +/** + * struct ieee80211_link_sta - station Link specific info + * All link specific info for a STA link for a non MLD STA(single) + * or a MLD STA(multiple entries) are stored here. + * + * @addr: MAC address of the Link STA. For non-MLO STA this is same as the addr + * in ieee80211_sta. For MLO Link STA this addr can be same or different + * from addr in ieee80211_sta (representing MLD STA addr) + * @supp_rates: Bitmap of supported rates + * @ht_cap: HT capabilities of this STA; restricted to our own capabilities + * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities + * @he_cap: HE capabilities of this STA + * @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities + * @eht_cap: EHT capabilities of this STA + * @bandwidth: current bandwidth the station can receive with + * @rx_nss: in HT/VHT, the maximum number of spatial streams the + * station can receive at the moment, changed by operating mode + * notifications and capabilities. The value is only valid after + * the station moves to associated state. + * @txpwr: the station tx power configuration + * + */ +struct ieee80211_link_sta { + u8 addr[ETH_ALEN]; + + u32 supp_rates[NUM_NL80211_BANDS]; + struct ieee80211_sta_ht_cap ht_cap; + struct ieee80211_sta_vht_cap vht_cap; + struct ieee80211_sta_he_cap he_cap; + struct ieee80211_he_6ghz_capa he_6ghz_capa; + struct ieee80211_sta_eht_cap eht_cap; + + u8 rx_nss; + enum ieee80211_sta_rx_bandwidth bandwidth; + struct ieee80211_sta_txpwr txpwr; +}; + /** * struct ieee80211_sta - station table entry * @@ -2050,14 +2124,11 @@ struct ieee80211_sta_txpwr { * either be protected by rcu_read_lock() explicitly or implicitly, * or you must take good care to not use such a pointer after a * call to your sta_remove callback that removed it. + * This also represents the MLD STA in case of MLO association + * and holds pointers to various link STA's * * @addr: MAC address * @aid: AID we assigned to the station if we're an AP - * @supp_rates: Bitmap of supported rates (per band) - * @ht_cap: HT capabilities of this STA; restricted to our own capabilities - * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities - * @he_cap: HE capabilities of this STA - * @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU * that this station is allowed to transmit to us. * Can be modified by driver. @@ -2069,11 +2140,6 @@ struct ieee80211_sta_txpwr { * if wme is supported. The bits order is like in * IEEE80211_WMM_IE_STA_QOSINFO_AC_*. * @max_sp: max Service Period. Only valid if wme is supported. - * @bandwidth: current bandwidth the station can receive with - * @rx_nss: in HT/VHT, the maximum number of spatial streams the - * station can receive at the moment, changed by operating mode - * notifications and capabilities. The value is only valid after - * the station moves to associated state. * @smps_mode: current SMPS mode (off, static or dynamic) * @rates: rate control selection table * @tdls: indicates whether the STA is a TDLS peer @@ -2086,24 +2152,28 @@ struct ieee80211_sta_txpwr { * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. * @max_tid_amsdu_len: Maximum A-MSDU size in bytes for this TID - * @txpwr: the station tx power configuration * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that * the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames + * @multi_link_sta: Identifies if this sta is a MLD STA + * @deflink: This holds the default link STA information, for non MLO STA all link + * specific STA information is accessed through @deflink or through + * link[0] which points to address of @deflink. For MLO Link STA + * the first added link STA will point to deflink. + * @link: reference to Link Sta entries. For Non MLO STA, except 1st link, + * i.e link[0] all links would be assigned to NULL by default and + * would access link information via @deflink or link[0]. For MLO + * STA, first link STA being added will point its link pointer to + * @deflink address and remaining would be allocated and the address + * would be assigned to link[link_id] where link_id is the id assigned + * by the AP. */ struct ieee80211_sta { - u32 supp_rates[NUM_NL80211_BANDS]; u8 addr[ETH_ALEN]; u16 aid; - struct ieee80211_sta_ht_cap ht_cap; - struct ieee80211_sta_vht_cap vht_cap; - struct ieee80211_sta_he_cap he_cap; - struct ieee80211_he_6ghz_capa he_6ghz_capa; u16 max_rx_aggregation_subframes; bool wme; u8 uapsd_queues; u8 max_sp; - u8 rx_nss; - enum ieee80211_sta_rx_bandwidth bandwidth; enum ieee80211_smps_mode smps_mode; struct ieee80211_sta_rates __rcu *rates; bool tdls; @@ -2130,10 +2200,13 @@ struct ieee80211_sta { bool support_p2p_ps; u16 max_rc_amsdu_len; u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS]; - struct ieee80211_sta_txpwr txpwr; struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1]; + bool multi_link_sta; + struct ieee80211_link_sta deflink; + struct ieee80211_link_sta *link[MAX_STA_LINKS]; + /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; @@ -2417,6 +2490,9 @@ struct ieee80211_txq { * usage and 802.11 frames with %RX_FLAG_ONLY_MONITOR set for monitor to * the stack. * + * @IEEE80211_HW_DETECTS_COLOR_COLLISION: HW/driver has support for BSS color + * collision detection and doesn't need it in software. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2472,6 +2548,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD, IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD, IEEE80211_HW_SUPPORTS_CONC_MON_RX_DECAP, + IEEE80211_HW_DETECTS_COLOR_COLLISION, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -2601,6 +2678,12 @@ enum ieee80211_hw_flags { * refilling deficit of each TXQ. * * @max_mtu: the max mtu could be set. + * + * @tx_power_levels: a list of power levels supported by the wifi hardware. + * The power levels can be specified either as integer or fractions. + * The power level at idx 0 shall be the maximum positive power level. + * + * @max_txpwr_levels_idx: the maximum valid idx of 'tx_power_levels' list. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -2639,6 +2722,8 @@ struct ieee80211_hw { u8 tx_sk_pacing_shift; u8 weight_multiplier; u32 max_mtu; + const s8 *tx_power_levels; + u8 max_txpwr_levels_idx; }; static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, @@ -4931,12 +5016,14 @@ void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets); * @cntdwn_counter_offs: array of IEEE80211_MAX_CNTDWN_COUNTERS_NUM offsets * to countdown counters. This array can contain zero values which * should be ignored. + * @mbssid_off: position of the multiple bssid element */ struct ieee80211_mutable_offsets { u16 tim_offset; u16 tim_length; u16 cntdwn_counter_offs[IEEE80211_MAX_CNTDWN_COUNTERS_NUM]; + u16 mbssid_off; }; /** @@ -6054,6 +6141,16 @@ void ieee80211_disconnect(struct ieee80211_vif *vif, bool reconnect); void ieee80211_resume_disconnect(struct ieee80211_vif *vif); /** + * ieee80211_hw_restart_disconnect - disconnect from AP after + * hardware restart + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * + * Instructs mac80211 to disconnect from the AP after + * hardware restart. + */ +void ieee80211_hw_restart_disconnect(struct ieee80211_vif *vif); + +/** * ieee80211_cqm_rssi_notify - inform a configured connection quality monitoring * rssi threshold triggered * @@ -6338,7 +6435,7 @@ static inline int rate_supported(struct ieee80211_sta *sta, enum nl80211_band band, int index) { - return (sta == NULL || sta->supp_rates[band] & BIT(index)); + return (sta == NULL || sta->deflink.supp_rates[band] & BIT(index)); } static inline s8 diff --git a/include/net/mac802154.h b/include/net/mac802154.h index d524ffb9eb25..bdac0ddbdcdb 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -464,6 +464,12 @@ void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, * ieee802154_wake_queue - wake ieee802154 queue * @hw: pointer as obtained from ieee802154_alloc_hw(). * + * Tranceivers usually have either one transmit framebuffer or one framebuffer + * for both transmitting and receiving. Hence, the core currently only handles + * one frame at a time for each phy, which means we had to stop the queue to + * avoid new skb to come during the transmission. The queue then needs to be + * woken up after the operation. + * * Drivers should use this function instead of netif_wake_queue. */ void ieee802154_wake_queue(struct ieee802154_hw *hw); @@ -472,6 +478,12 @@ void ieee802154_wake_queue(struct ieee802154_hw *hw); * ieee802154_stop_queue - stop ieee802154 queue * @hw: pointer as obtained from ieee802154_alloc_hw(). * + * Tranceivers usually have either one transmit framebuffer or one framebuffer + * for both transmitting and receiving. Hence, the core currently only handles + * one frame at a time for each phy, which means we need to tell upper layers to + * stop giving us new skbs while we are busy with the transmitted one. The queue + * must then be stopped before transmitting. + * * Drivers should use this function instead of netif_stop_queue. */ void ieee802154_stop_queue(struct ieee802154_hw *hw); @@ -486,4 +498,23 @@ void ieee802154_stop_queue(struct ieee802154_hw *hw); void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, bool ifs_handling); +/** + * ieee802154_xmit_error - offloaded frame transmission failed + * + * @hw: pointer as obtained from ieee802154_alloc_hw(). + * @skb: buffer for transmission + * @reason: error code + */ +void ieee802154_xmit_error(struct ieee802154_hw *hw, struct sk_buff *skb, + int reason); + +/** + * ieee802154_xmit_hw_error - frame could not be offloaded to the transmitter + * because of a hardware error (bus error, timeout, etc) + * + * @hw: pointer as obtained from ieee802154_alloc_hw(). + * @skb: buffer for transmission + */ +void ieee802154_xmit_hw_error(struct ieee802154_hw *hw, struct sk_buff *skb); + #endif /* NET_MAC802154_H */ diff --git a/include/net/mctp.h b/include/net/mctp.h index 7e35ec79b909..82800d521c3d 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -36,15 +36,28 @@ struct mctp_hdr { #define MCTP_HDR_TAG_SHIFT 0 #define MCTP_HDR_TAG_MASK GENMASK(2, 0) -#define MCTP_HEADER_MAXLEN 4 - #define MCTP_INITIAL_DEFAULT_NET 1 -static inline bool mctp_address_ok(mctp_eid_t eid) +static inline bool mctp_address_unicast(mctp_eid_t eid) { return eid >= 8 && eid < 255; } +static inline bool mctp_address_broadcast(mctp_eid_t eid) +{ + return eid == 255; +} + +static inline bool mctp_address_null(mctp_eid_t eid) +{ + return eid == 0; +} + +static inline bool mctp_address_matches(mctp_eid_t match, mctp_eid_t eid) +{ + return match == eid || match == MCTP_ADDR_ANY; +} + static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb) { return (struct mctp_hdr *)skb_network_header(skb); @@ -121,7 +134,7 @@ struct mctp_sock { */ struct mctp_sk_key { mctp_eid_t peer_addr; - mctp_eid_t local_addr; + mctp_eid_t local_addr; /* MCTP_ADDR_ANY for local owned tags */ __u8 tag; /* incoming tag match; invert TO for local */ /* we hold a ref to sk when set */ @@ -158,6 +171,12 @@ struct mctp_sk_key { */ unsigned long dev_flow_state; struct mctp_dev *dev; + + /* a tag allocated with SIOCMCTPALLOCTAG ioctl will not expire + * automatically on timeout or response, instead SIOCMCTPDROPTAG + * is used. + */ + bool manual_alloc; }; struct mctp_skb_cb { @@ -234,6 +253,9 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); void mctp_key_unref(struct mctp_sk_key *key); +struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, + mctp_eid_t daddr, mctp_eid_t saddr, + bool manual, u8 *tagp); /* routing <--> device interface */ unsigned int mctp_default_net(struct net *net); diff --git a/include/net/mptcp.h b/include/net/mptcp.h index a925349b4b89..4d761ad530c9 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -35,7 +35,8 @@ struct mptcp_ext { frozen:1, reset_transient:1; u8 reset_reason:4, - csum_reqd:1; + csum_reqd:1, + infinite_map:1; }; #define MPTCP_RM_IDS_MAX 8 @@ -124,7 +125,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, struct mptcp_out_options *opts); bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); -void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, +void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp, struct mptcp_out_options *opts); void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info); @@ -217,12 +218,6 @@ static inline bool rsk_drop_req(const struct request_sock *req) return false; } -static inline void mptcp_parse_option(const struct sk_buff *skb, - const unsigned char *ptr, int opsize, - struct tcp_options_received *opt_rx) -{ -} - static inline bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, unsigned int *size, struct mptcp_out_options *opts) @@ -289,4 +284,10 @@ static inline int mptcpv6_init(void) { return 0; } static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { } #endif +#if defined(CONFIG_MPTCP) && defined(CONFIG_BPF_SYSCALL) +struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk); +#else +static inline struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) { return NULL; } +#endif + #endif /* __NET_MPTCP_H */ diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 47ffb360ddfa..da7eec8669ec 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -447,10 +447,15 @@ void ndisc_cleanup(void); int ndisc_rcv(struct sk_buff *skb); +struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit, + const struct in6_addr *saddr, u64 nonce); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *daddr, const struct in6_addr *saddr, u64 nonce); +void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, + const struct in6_addr *saddr); + void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, diff --git a/include/net/net_debug.h b/include/net/net_debug.h new file mode 100644 index 000000000000..1e74684cbbdb --- /dev/null +++ b/include/net/net_debug.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_NET_DEBUG_H +#define _LINUX_NET_DEBUG_H + +#include <linux/bug.h> +#include <linux/kern_levels.h> + +struct net_device; + +__printf(3, 4) __cold +void netdev_printk(const char *level, const struct net_device *dev, + const char *format, ...); +__printf(2, 3) __cold +void netdev_emerg(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_alert(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_crit(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_err(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_warn(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_notice(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_info(const struct net_device *dev, const char *format, ...); + +#define netdev_level_once(level, dev, fmt, ...) \ +do { \ + static bool __section(".data.once") __print_once; \ + \ + if (!__print_once) { \ + __print_once = true; \ + netdev_printk(level, dev, fmt, ##__VA_ARGS__); \ + } \ +} while (0) + +#define netdev_emerg_once(dev, fmt, ...) \ + netdev_level_once(KERN_EMERG, dev, fmt, ##__VA_ARGS__) +#define netdev_alert_once(dev, fmt, ...) \ + netdev_level_once(KERN_ALERT, dev, fmt, ##__VA_ARGS__) +#define netdev_crit_once(dev, fmt, ...) \ + netdev_level_once(KERN_CRIT, dev, fmt, ##__VA_ARGS__) +#define netdev_err_once(dev, fmt, ...) \ + netdev_level_once(KERN_ERR, dev, fmt, ##__VA_ARGS__) +#define netdev_warn_once(dev, fmt, ...) \ + netdev_level_once(KERN_WARNING, dev, fmt, ##__VA_ARGS__) +#define netdev_notice_once(dev, fmt, ...) \ + netdev_level_once(KERN_NOTICE, dev, fmt, ##__VA_ARGS__) +#define netdev_info_once(dev, fmt, ...) \ + netdev_level_once(KERN_INFO, dev, fmt, ##__VA_ARGS__) + +#if defined(CONFIG_DYNAMIC_DEBUG) || \ + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) +#define netdev_dbg(__dev, format, args...) \ +do { \ + dynamic_netdev_dbg(__dev, format, ##args); \ +} while (0) +#elif defined(DEBUG) +#define netdev_dbg(__dev, format, args...) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args) +#else +#define netdev_dbg(__dev, format, args...) \ +({ \ + if (0) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args); \ +}) +#endif + +#if defined(VERBOSE_DEBUG) +#define netdev_vdbg netdev_dbg +#else + +#define netdev_vdbg(dev, format, args...) \ +({ \ + if (0) \ + netdev_printk(KERN_DEBUG, dev, format, ##args); \ + 0; \ +}) +#endif + +/* netif printk helpers, similar to netdev_printk */ + +#define netif_printk(priv, type, level, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_printk(level, (dev), fmt, ##args); \ +} while (0) + +#define netif_level(level, priv, type, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_##level(dev, fmt, ##args); \ +} while (0) + +#define netif_emerg(priv, type, dev, fmt, args...) \ + netif_level(emerg, priv, type, dev, fmt, ##args) +#define netif_alert(priv, type, dev, fmt, args...) \ + netif_level(alert, priv, type, dev, fmt, ##args) +#define netif_crit(priv, type, dev, fmt, args...) \ + netif_level(crit, priv, type, dev, fmt, ##args) +#define netif_err(priv, type, dev, fmt, args...) \ + netif_level(err, priv, type, dev, fmt, ##args) +#define netif_warn(priv, type, dev, fmt, args...) \ + netif_level(warn, priv, type, dev, fmt, ##args) +#define netif_notice(priv, type, dev, fmt, args...) \ + netif_level(notice, priv, type, dev, fmt, ##args) +#define netif_info(priv, type, dev, fmt, args...) \ + netif_level(info, priv, type, dev, fmt, ##args) + +#if defined(CONFIG_DYNAMIC_DEBUG) || \ + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) +#define netif_dbg(priv, type, netdev, format, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + dynamic_netdev_dbg(netdev, format, ##args); \ +} while (0) +#elif defined(DEBUG) +#define netif_dbg(priv, type, dev, format, args...) \ + netif_printk(priv, type, KERN_DEBUG, dev, format, ##args) +#else +#define netif_dbg(priv, type, dev, format, args...) \ +({ \ + if (0) \ + netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ + 0; \ +}) +#endif + +/* if @cond then downgrade to debug, else print at @level */ +#define netif_cond_dbg(priv, type, netdev, cond, level, fmt, args...) \ + do { \ + if (cond) \ + netif_dbg(priv, type, netdev, fmt, ##args); \ + else \ + netif_ ## level(priv, type, netdev, fmt, ##args); \ + } while (0) + +#if defined(VERBOSE_DEBUG) +#define netif_vdbg netif_dbg +#else +#define netif_vdbg(priv, type, dev, format, args...) \ +({ \ + if (0) \ + netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ + 0; \ +}) +#endif + + +#if defined(CONFIG_DEBUG_NET) +#define DEBUG_NET_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) +#else +#define DEBUG_NET_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#endif + +#endif /* _LINUX_NET_DEBUG_H */ diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 5b61c462e534..c4f5601f6e32 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -63,7 +63,7 @@ struct net { */ spinlock_t rules_mod_lock; - unsigned int dev_unreg_count; + atomic_t dev_unreg_count; unsigned int dev_base_seq; /* protected by rtnl_mutex */ int ifindex; @@ -513,4 +513,10 @@ static inline void fnhe_genid_bump(struct net *net) atomic_inc(&net->fnhe_genid); } +#ifdef CONFIG_NET +void net_ns_init(void); +#else +static inline void net_ns_init(void) {} +#endif + #endif /* __NET_NET_NAMESPACE_H */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index b08b70989d2c..a32be8aa7ed2 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -43,6 +43,12 @@ union nf_conntrack_expect_proto { /* insert expect proto private data here */ }; +struct nf_conntrack_net_ecache { + struct delayed_work dwork; + spinlock_t dying_lock; + struct hlist_nulls_head dying_list; +}; + struct nf_conntrack_net { /* only used when new connection is allocated: */ atomic_t count; @@ -58,8 +64,7 @@ struct nf_conntrack_net { struct ctl_table_header *sysctl_header; #endif #ifdef CONFIG_NF_CONNTRACK_EVENTS - struct delayed_work ecache_dwork; - struct netns_ct *ct_net; + struct nf_conntrack_net_ecache ecache; #endif }; @@ -96,7 +101,6 @@ struct nf_conn { /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; - u16 cpu; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) @@ -232,13 +236,16 @@ static inline bool nf_ct_kill(struct nf_conn *ct) return nf_ct_delete(ct, 0, 0); } -/* Set all unconfirmed conntrack as dying */ -void nf_ct_unconfirmed_destroy(struct net *); +struct nf_ct_iter_data { + struct net *net; + void *data; + u32 portid; + int report; +}; /* Iterate over all conntracks: if iter returns true, it's deleted. */ -void nf_ct_iterate_cleanup_net(struct net *net, - int (*iter)(struct nf_conn *i, void *data), - void *data, u32 portid, int report); +void nf_ct_iterate_cleanup_net(int (*iter)(struct nf_conn *i, void *data), + const struct nf_ct_iter_data *iter_data); /* also set unconfirmed conntracks as dying. Only use in module exit path. */ void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index 7f44a771530e..4b2b7f8914ea 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -78,7 +78,6 @@ static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir, void nf_conntrack_acct_pernet_init(struct net *net); -int nf_conntrack_acct_init(void); void nf_conntrack_acct_fini(void); #endif /* _NF_CONNTRACK_ACCT_H */ diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h new file mode 100644 index 000000000000..a473b56842c5 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NF_CONNTRACK_BPF_H +#define _NF_CONNTRACK_BPF_H + +#include <linux/btf.h> +#include <linux/kconfig.h> + +#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern int register_nf_conntrack_bpf(void); + +#else + +static inline int register_nf_conntrack_bpf(void) +{ + return 0; +} + +#endif + +#endif /* _NF_CONNTRACK_BPF_H */ diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 13807ea94cd2..37866c8386e2 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -58,9 +58,14 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) int ret = NF_ACCEPT; if (ct) { - if (!nf_ct_is_confirmed(ct)) + if (!nf_ct_is_confirmed(ct)) { ret = __nf_conntrack_confirm(skb); - if (likely(ret == NF_ACCEPT)) + + if (ret == NF_ACCEPT) + ct = (struct nf_conn *)skb_nfct(skb); + } + + if (ret == NF_ACCEPT && nf_ct_ecache_exist(ct)) nf_ct_deliver_cached_events(ct); } return ret; diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index 9645b47fa7e4..e227d997fc71 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -10,6 +10,7 @@ struct nf_conncount_data; struct nf_conncount_list { spinlock_t list_lock; + u32 last_gc; /* jiffies at most recent gc */ struct list_head head; /* connections with the same filtering key */ unsigned int count; /* length of list */ }; diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index d932e22edcb4..0c1dac318e02 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -14,17 +14,15 @@ #include <net/netfilter/nf_conntrack_extend.h> enum nf_ct_ecache_state { - NFCT_ECACHE_UNKNOWN, /* destroy event not sent */ NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */ NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */ }; struct nf_conntrack_ecache { unsigned long cache; /* bitops want long */ - u16 missed; /* missed events */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ - enum nf_ct_ecache_state state:8;/* ecache state */ + u32 missed; /* missed events */ u32 portid; /* netlink portid of destroyer */ }; @@ -38,28 +36,12 @@ nf_ct_ecache_find(const struct nf_conn *ct) #endif } -static inline struct nf_conntrack_ecache * -nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) +static inline bool nf_ct_ecache_exist(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - struct net *net = nf_ct_net(ct); - struct nf_conntrack_ecache *e; - - if (!ctmask && !expmask && net->ct.sysctl_events) { - ctmask = ~0; - expmask = ~0; - } - if (!ctmask && !expmask) - return NULL; - - e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); - if (e) { - e->ctmask = ctmask; - e->expmask = expmask; - } - return e; + return nf_ct_ext_exist(ct, NF_CT_EXT_ECACHE); #else - return NULL; + return false; #endif } @@ -91,6 +73,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct); int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, u32 portid, int report); +bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp); #else static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) @@ -105,6 +88,10 @@ static inline int nf_conntrack_eventmask_report(unsigned int eventmask, return 0; } +static inline bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) +{ + return false; +} #endif static inline void @@ -130,30 +117,20 @@ nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, u32 portid, int report) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - const struct net *net = nf_ct_net(ct); - - if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) - return 0; - - return nf_conntrack_eventmask_report(1 << event, ct, portid, report); -#else - return 0; + if (nf_ct_ecache_exist(ct)) + return nf_conntrack_eventmask_report(1 << event, ct, portid, report); #endif + return 0; } static inline int nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - const struct net *net = nf_ct_net(ct); - - if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) - return 0; - - return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); -#else - return 0; + if (nf_ct_ecache_exist(ct)) + return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); #endif + return 0; } #ifdef CONFIG_NF_CONNTRACK_EVENTS @@ -166,8 +143,7 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state); void nf_conntrack_ecache_pernet_init(struct net *net); void nf_conntrack_ecache_pernet_fini(struct net *net); -int nf_conntrack_ecache_init(void); -void nf_conntrack_ecache_fini(void); +struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net); static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { @@ -194,16 +170,6 @@ static inline void nf_conntrack_ecache_pernet_init(struct net *net) static inline void nf_conntrack_ecache_pernet_fini(struct net *net) { } - -static inline int nf_conntrack_ecache_init(void) -{ - return 0; -} - -static inline void nf_conntrack_ecache_fini(void) -{ -} - static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return false; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ #endif /*_NF_CONNTRACK_ECACHE_H*/ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index c7515d82ab06..0b247248b032 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -34,22 +34,12 @@ enum nf_ct_ext_id { NF_CT_EXT_NUM, }; -#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help -#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat -#define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj -#define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct -#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache -#define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp -#define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout -#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels -#define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy -#define NF_CT_EXT_ACT_CT_TYPE struct nf_conn_act_ct_ext - /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { u8 offset[NF_CT_EXT_NUM]; u8 len; - char data[]; + unsigned int gen_id; + char data[] __aligned(8); }; static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id) @@ -62,33 +52,28 @@ static inline bool nf_ct_ext_exist(const struct nf_conn *ct, u8 id) return (ct->ext && __nf_ct_ext_exist(ct->ext, id)); } -static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) +void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id); + +static inline void *nf_ct_ext_find(const struct nf_conn *ct, u8 id) { - if (!nf_ct_ext_exist(ct, id)) + struct nf_ct_ext *ext = ct->ext; + + if (!ext || !__nf_ct_ext_exist(ext, id)) return NULL; + if (unlikely(ext->gen_id)) + return __nf_ct_ext_find(ext, id); + return (void *)ct->ext + ct->ext->offset[id]; } -#define nf_ct_ext_find(ext, id) \ - ((id##_TYPE *)__nf_ct_ext_find((ext), (id))) - -/* Destroy all relationships */ -void nf_ct_ext_destroy(struct nf_conn *ct); /* Add this type, returns pointer to data or NULL. */ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); -struct nf_ct_ext_type { - /* Destroys relationships (can be NULL). */ - void (*destroy)(struct nf_conn *ct); - - enum nf_ct_ext_id id; - - /* Length and min alignment. */ - u8 len; - u8 align; -}; +/* ext genid. if ext->id != ext_genid, extensions cannot be used + * anymore unless conntrack has CONFIRMED bit set. + */ +extern atomic_t nf_conntrack_ext_genid; +void nf_ct_ext_bump_genid(void); -int nf_ct_extend_register(const struct nf_ct_ext_type *type); -void nf_ct_extend_unregister(const struct nf_ct_ext_type *type); #endif /* _NF_CONNTRACK_EXTEND_H */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 37f0fbefb060..9939c366f720 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -177,4 +177,5 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat); int nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum); void nf_nat_helper_put(struct nf_conntrack_helper *helper); +void nf_ct_set_auto_assign_helper_warned(struct net *net); #endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index ba916411c4e1..66bab6c60d12 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -17,10 +17,18 @@ struct nf_conn_labels { unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)]; }; +/* Can't use nf_ct_ext_find(), flow dissector cannot use symbols + * exported by nf_conntrack module. + */ static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_LABELS - return nf_ct_ext_find(ct, NF_CT_EXT_LABELS); + struct nf_ct_ext *ext = ct->ext; + + if (!ext || !__nf_ct_ext_exist(ext, NF_CT_EXT_LABELS)) + return NULL; + + return (void *)ct->ext + ct->ext->offset[NF_CT_EXT_LABELS]; #else return NULL; #endif @@ -45,12 +53,9 @@ int nf_connlabels_replace(struct nf_conn *ct, #ifdef CONFIG_NF_CONNTRACK_LABELS int nf_conntrack_labels_init(void); -void nf_conntrack_labels_fini(void); int nf_connlabels_get(struct net *net, unsigned int bit); void nf_connlabels_put(struct net *net); #else -static inline int nf_conntrack_labels_init(void) { return 0; } -static inline void nf_conntrack_labels_fini(void) {} static inline int nf_connlabels_get(struct net *net, unsigned int bit) { return 0; } static inline void nf_connlabels_put(struct net *net) {} #endif diff --git a/include/net/netfilter/nf_conntrack_seqadj.h b/include/net/netfilter/nf_conntrack_seqadj.h index 0a10b50537ae..883c414b768e 100644 --- a/include/net/netfilter/nf_conntrack_seqadj.h +++ b/include/net/netfilter/nf_conntrack_seqadj.h @@ -42,7 +42,4 @@ int nf_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff); s32 nf_ct_seq_offset(const struct nf_conn *ct, enum ip_conntrack_dir, u32 seq); -int nf_conntrack_seqadj_init(void); -void nf_conntrack_seqadj_fini(void); - #endif /* _NF_CONNTRACK_SEQADJ_H */ diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 659b0ea25b4d..fea258983d23 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -17,14 +17,6 @@ struct nf_ct_timeout { char data[]; }; -struct ctnl_timeout { - struct list_head head; - struct rcu_head rcu_head; - refcount_t refcnt; - char name[CTNL_TIMEOUT_NAME_MAX]; - struct nf_ct_timeout timeout; -}; - struct nf_conn_timeout { struct nf_ct_timeout __rcu *timeout; }; @@ -89,23 +81,11 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct) } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -int nf_conntrack_timeout_init(void); -void nf_conntrack_timeout_fini(void); void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout); int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, const char *timeout_name); void nf_ct_destroy_timeout(struct nf_conn *ct); #else -static inline int nf_conntrack_timeout_init(void) -{ - return 0; -} - -static inline void nf_conntrack_timeout_fini(void) -{ - return; -} - static inline int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, const char *timeout_name) @@ -120,8 +100,12 @@ static inline void nf_ct_destroy_timeout(struct nf_conn *ct) #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -extern struct nf_ct_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name); -extern void (*nf_ct_timeout_put_hook)(struct nf_ct_timeout *timeout); +struct nf_ct_timeout_hooks { + struct nf_ct_timeout *(*timeout_find_get)(struct net *net, const char *name); + void (*timeout_put)(struct nf_ct_timeout *timeout); +}; + +extern const struct nf_ct_timeout_hooks *nf_ct_timeout_hook; #endif #endif /* _NF_CONNTRACK_TIMEOUT_H */ diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h index 820ea34b6029..57138d974a9f 100644 --- a/include/net/netfilter/nf_conntrack_timestamp.h +++ b/include/net/netfilter/nf_conntrack_timestamp.h @@ -40,21 +40,8 @@ struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp) #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP void nf_conntrack_tstamp_pernet_init(struct net *net); - -int nf_conntrack_tstamp_init(void); -void nf_conntrack_tstamp_fini(void); #else static inline void nf_conntrack_tstamp_pernet_init(struct net *net) {} - -static inline int nf_conntrack_tstamp_init(void) -{ - return 0; -} - -static inline void nf_conntrack_tstamp_fini(void) -{ - return; -} #endif /* CONFIG_NF_CONNTRACK_TIMESTAMP */ #endif /* _NF_CONNTRACK_TSTAMP_H */ diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index bd59e950f4d6..64daafd1fc41 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -10,6 +10,8 @@ #include <linux/netfilter/nf_conntrack_tuple_common.h> #include <net/flow_offload.h> #include <net/dst.h> +#include <linux/if_pppox.h> +#include <linux/ppp_defs.h> struct nf_flowtable; struct nf_flow_rule; @@ -317,4 +319,20 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, int nf_flow_table_offload_init(void); void nf_flow_table_offload_exit(void); +static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) +{ + __be16 proto; + + proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + + sizeof(struct pppoe_hdr))); + switch (proto) { + case htons(PPP_IP): + return htons(ETH_P_IP); + case htons(PPP_IPV6): + return htons(ETH_P_IPV6); + } + + return 0; +} + #endif /* _NF_FLOW_TABLE_H */ diff --git a/include/net/netfilter/nf_reject.h b/include/net/netfilter/nf_reject.h index 9051c3a0c8e7..7c669792fb9c 100644 --- a/include/net/netfilter/nf_reject.h +++ b/include/net/netfilter/nf_reject.h @@ -5,12 +5,28 @@ #include <linux/types.h> #include <uapi/linux/in.h> -static inline bool nf_reject_verify_csum(__u8 proto) +static inline bool nf_reject_verify_csum(struct sk_buff *skb, int dataoff, + __u8 proto) { /* Skip protocols that don't use 16-bit one's complement checksum * of the entire payload. */ switch (proto) { + /* Protocols with optional checksums. */ + case IPPROTO_UDP: { + const struct udphdr *udp_hdr; + struct udphdr _udp_hdr; + + udp_hdr = skb_header_pointer(skb, dataoff, + sizeof(_udp_hdr), + &_udp_hdr); + if (!udp_hdr || udp_hdr->check) + return true; + + return false; + } + case IPPROTO_GRE: + /* Protocols with other integrity checks. */ case IPPROTO_AH: case IPPROTO_ESP: @@ -19,9 +35,6 @@ static inline bool nf_reject_verify_csum(__u8 proto) /* Protocols with partial checksums. */ case IPPROTO_UDPLITE: case IPPROTO_DCCP: - - /* Protocols with optional checksums. */ - case IPPROTO_GRE: return false; } return true; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index c4c0861deac1..5c4e5a96a984 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -126,6 +126,7 @@ struct nft_regs_track { struct { const struct nft_expr *selector; const struct nft_expr *bitwise; + u8 num_reg; } regs[NFT_REG32_NUM]; const struct nft_expr *cur; @@ -1089,7 +1090,6 @@ struct nft_stats { struct nft_hook { struct list_head list; - bool inactive; struct nf_hook_ops ops; struct rcu_head rcu; }; @@ -1338,24 +1338,28 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type); /** * struct nft_traceinfo - nft tracing information and state * + * @trace: other struct members are initialised + * @nf_trace: copy of skb->nf_trace before rule evaluation + * @type: event type (enum nft_trace_types) + * @skbid: hash of skb to be used as trace id + * @packet_dumped: packet headers sent in a previous traceinfo message * @pkt: pktinfo currently processed * @basechain: base chain currently processed * @chain: chain currently processed * @rule: rule that was evaluated * @verdict: verdict given by rule - * @type: event type (enum nft_trace_types) - * @packet_dumped: packet headers sent in a previous traceinfo message - * @trace: other struct members are initialised */ struct nft_traceinfo { + bool trace; + bool nf_trace; + bool packet_dumped; + enum nft_trace_types type:8; + u32 skbid; const struct nft_pktinfo *pkt; const struct nft_base_chain *basechain; const struct nft_chain *chain; const struct nft_rule_dp *rule; const struct nft_verdict *verdict; - enum nft_trace_types type; - bool packet_dumped; - bool trace; }; void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, @@ -1633,4 +1637,25 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net) return net_generic(net, nf_tables_net_id); } +#define __NFT_REDUCE_READONLY 1UL +#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY + +static inline bool nft_reduce_is_readonly(const struct nft_expr *expr) +{ + return expr->ops->reduce == NFT_REDUCE_READONLY; +} + +void nft_reg_track_update(struct nft_regs_track *track, + const struct nft_expr *expr, u8 dreg, u8 len); +void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len); +void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg); + +static inline bool nft_reg_track_cmp(struct nft_regs_track *track, + const struct nft_expr *expr, u8 dreg) +{ + return track->regs[dreg].selector && + track->regs[dreg].selector->ops == expr->ops && + track->regs[dreg].num_reg == 0; +} + #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index b6fb1fdff9b2..0ea7c55cea4d 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -42,6 +42,14 @@ struct nft_cmp_fast_expr { bool inv; }; +struct nft_cmp16_fast_expr { + struct nft_data data; + struct nft_data mask; + u8 sreg; + u8 len; + bool inv; +}; + struct nft_immediate_expr { struct nft_data data; u8 dreg; @@ -59,6 +67,7 @@ static inline u32 nft_cmp_fast_mask(unsigned int len) } extern const struct nft_expr_ops nft_cmp_fast_ops; +extern const struct nft_expr_ops nft_cmp16_fast_ops; struct nft_payload { enum nft_payload_bases base:8; diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index 797147843958..3568b6a2f5f0 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -92,7 +92,7 @@ int nft_flow_rule_offload_commit(struct net *net); NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ memset(&(__reg)->mask, 0xff, (__reg)->len); -int nft_chain_offload_priority(struct nft_base_chain *basechain); +bool nft_chain_offload_support(const struct nft_base_chain *basechain); int nft_offload_init(void); void nft_offload_exit(void); diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 237f3757637e..eed099eae672 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -37,4 +37,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, void nft_fib_store_result(void *reg, const struct nft_fib *priv, const struct net_device *dev); + +bool nft_fib_reduce(struct nft_regs_track *track, + const struct nft_expr *expr); #endif diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 2dce55c736f4..9b51cc67de54 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -6,6 +6,7 @@ struct nft_meta { enum nft_meta_keys key:8; + u8 len; union { u8 dreg; u8 sreg; @@ -43,4 +44,6 @@ int nft_meta_set_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); +bool nft_meta_get_reduce(struct nft_regs_track *track, + const struct nft_expr *expr); #endif diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 0294f3d473af..0677cd3de034 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -93,14 +93,9 @@ struct nf_ip_net { #endif }; -struct ct_pcpu { - spinlock_t lock; - struct hlist_nulls_head unconfirmed; - struct hlist_nulls_head dying; -}; - struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS + bool ctnetlink_has_listener; bool ecache_dwork_pending; #endif u8 sysctl_log_invalid; /* Log invalid packets */ @@ -110,7 +105,6 @@ struct netns_ct { u8 sysctl_tstamp; u8 sysctl_checksum; - struct ct_pcpu __percpu *pcpu_lists; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_ip_net nf_ct_proto; diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 552bc25b1933..388244e315e7 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -10,6 +10,7 @@ struct netns_core { struct ctl_table_header *sysctl_hdr; int sysctl_somaxconn; + u8 sysctl_txrehash; #ifdef CONFIG_PROC_FS struct prot_inuse __percpu *prot_inuse; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 78557643526e..ce0cc4e8d8c7 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -31,18 +31,16 @@ struct ping_group_range { struct inet_hashinfo; struct inet_timewait_death_row { - atomic_t tw_count; - char tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)]; + refcount_t tw_refcount; - struct inet_hashinfo *hashinfo; + struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; int sysctl_max_tw_buckets; }; struct tcp_fastopen_context; struct netns_ipv4 { - /* Please keep tcp_death_row at first field in netns_ipv4 */ - struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp; + struct inet_timewait_death_row *tcp_death_row; #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -70,11 +68,9 @@ struct netns_ipv4 { struct hlist_head *fib_table_hash; struct sock *fibnl; - struct sock * __percpu *icmp_sk; struct sock *mc_autojoin_sk; struct inet_peer_base *peers; - struct sock * __percpu *tcp_sk; struct fqdir *fqdir; u8 sysctl_icmp_echo_ignore_all; @@ -87,6 +83,7 @@ struct netns_ipv4 { u32 ip_rt_min_pmtu; int ip_rt_mtu_expires; + int ip_rt_min_advmss; struct local_ports ip_local_ports; @@ -130,6 +127,7 @@ struct netns_ipv4 { u8 sysctl_tcp_synack_retries; u8 sysctl_tcp_syncookies; u8 sysctl_tcp_migrate_req; + u8 sysctl_tcp_comp_sack_nr; int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; @@ -163,9 +161,9 @@ struct netns_ipv4 { int sysctl_tcp_challenge_ack_limit; int sysctl_tcp_min_rtt_wlen; u8 sysctl_tcp_min_tso_segs; + u8 sysctl_tcp_tso_rtt_log; u8 sysctl_tcp_autocorking; u8 sysctl_tcp_reflect_tos; - u8 sysctl_tcp_comp_sack_nr; int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 6bd7e5a85ce7..b4af4837d80b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -75,8 +75,8 @@ struct netns_ipv6 { struct list_head fib6_walkers; rwlock_t fib6_walker_lock; spinlock_t fib6_gc_lock; - unsigned int ip6_rt_gc_expire; - unsigned long ip6_rt_last_gc; + atomic_t ip6_rt_gc_expire; + unsigned long ip6_rt_last_gc; unsigned char flowlabel_has_excl; #ifdef CONFIG_IPV6_MULTIPLE_TABLES bool fib6_has_custom_rules; @@ -89,11 +89,15 @@ struct netns_ipv6 { struct fib6_table *fib6_local_tbl; struct fib_rules_ops *fib6_rules_ops; #endif - struct sock * __percpu *icmp_sk; struct sock *ndisc_sk; struct sock *tcp_sk; struct sock *igmp_sk; struct sock *mc_autojoin_sk; + + struct hlist_head *inet6_addr_lst; + spinlock_t addrconf_hash_lock; + struct delayed_work addr_chk_work; + #ifdef CONFIG_IPV6_MROUTE #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES struct mr_table *mrt6; diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index ea8a9cf2619b..e5389eeaf8bd 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -12,5 +12,11 @@ struct netns_smc { /* protect fback_rsn */ struct mutex mutex_fback_rsn; struct smc_stats_rsn *fback_rsn; + + bool limit_smc_hs; /* constraint on handshake */ +#ifdef CONFIG_SYSCTL + struct ctl_table_header *smc_hdr; +#endif + unsigned int sysctl_autocorking_size; }; #endif diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 947733a639a6..bd7c3be4af5d 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -66,11 +66,7 @@ struct netns_xfrm { int sysctl_larval_drop; u32 sysctl_acq_expires; - u8 policy_default; -#define XFRM_POL_DEFAULT_IN 1 -#define XFRM_POL_DEFAULT_OUT 2 -#define XFRM_POL_DEFAULT_FWD 4 -#define XFRM_POL_DEFAULT_MASK 7 + u8 policy_default[XFRM_POLICY_MAX]; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_hdr; diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 79a805542d0f..813c93499f20 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -84,6 +84,69 @@ struct page_pool_params { void *init_arg; }; +#ifdef CONFIG_PAGE_POOL_STATS +struct page_pool_alloc_stats { + u64 fast; /* fast path allocations */ + u64 slow; /* slow-path order 0 allocations */ + u64 slow_high_order; /* slow-path high order allocations */ + u64 empty; /* failed refills due to empty ptr ring, forcing + * slow path allocation + */ + u64 refill; /* allocations via successful refill */ + u64 waive; /* failed refills due to numa zone mismatch */ +}; + +struct page_pool_recycle_stats { + u64 cached; /* recycling placed page in the cache. */ + u64 cache_full; /* cache was full */ + u64 ring; /* recycling placed page back into ptr ring */ + u64 ring_full; /* page was released from page-pool because + * PTR ring was full. + */ + u64 released_refcnt; /* page released because of elevated + * refcnt + */ +}; + +/* This struct wraps the above stats structs so users of the + * page_pool_get_stats API can pass a single argument when requesting the + * stats for the page pool. + */ +struct page_pool_stats { + struct page_pool_alloc_stats alloc_stats; + struct page_pool_recycle_stats recycle_stats; +}; + +int page_pool_ethtool_stats_get_count(void); +u8 *page_pool_ethtool_stats_get_strings(u8 *data); +u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); + +/* + * Drivers that wish to harvest page pool stats and report them to users + * (perhaps via ethtool, debugfs, or another mechanism) can allocate a + * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. + */ +bool page_pool_get_stats(struct page_pool *pool, + struct page_pool_stats *stats); +#else + +static inline int page_pool_ethtool_stats_get_count(void) +{ + return 0; +} + +static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data) +{ + return data; +} + +static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +{ + return data; +} + +#endif + struct page_pool { struct page_pool_params p; @@ -96,6 +159,11 @@ struct page_pool { unsigned int frag_offset; struct page *frag_page; long frag_users; + +#ifdef CONFIG_PAGE_POOL_STATS + /* these stats are incremented while in softirq context */ + struct page_pool_alloc_stats alloc_stats; +#endif u32 xdp_mem_id; /* @@ -126,6 +194,10 @@ struct page_pool { */ struct ptr_ring ring; +#ifdef CONFIG_PAGE_POOL_STATS + /* recycle stats are per-cpu to avoid locking */ + struct page_pool_recycle_stats __percpu *recycle_stats; +#endif atomic_t pages_state_release_cnt; /* A page_pool is strictly tied to a single RX-queue being @@ -201,21 +273,67 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, } #endif -void page_pool_put_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, bool allow_direct); +void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, + bool allow_direct); -/* Same as above but will try to sync the entire area pool->max_len */ -static inline void page_pool_put_full_page(struct page_pool *pool, - struct page *page, bool allow_direct) +static inline void page_pool_fragment_page(struct page *page, long nr) +{ + atomic_long_set(&page->pp_frag_count, nr); +} + +static inline long page_pool_defrag_page(struct page *page, long nr) +{ + long ret; + + /* If nr == pp_frag_count then we have cleared all remaining + * references to the page. No need to actually overwrite it, instead + * we can leave this to be overwritten by the calling function. + * + * The main advantage to doing this is that an atomic_read is + * generally a much cheaper operation than an atomic update, + * especially when dealing with a page that may be partitioned + * into only 2 or 3 pieces. + */ + if (atomic_long_read(&page->pp_frag_count) == nr) + return 0; + + ret = atomic_long_sub_return(nr, &page->pp_frag_count); + WARN_ON(ret < 0); + return ret; +} + +static inline bool page_pool_is_last_frag(struct page_pool *pool, + struct page *page) +{ + /* If fragments aren't enabled or count is 0 we were the last user */ + return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || + (page_pool_defrag_page(page, 1) == 0); +} + +static inline void page_pool_put_page(struct page_pool *pool, + struct page *page, + unsigned int dma_sync_size, + bool allow_direct) { /* When page_pool isn't compiled-in, net/core/xdp.c doesn't * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - page_pool_put_page(pool, page, -1, allow_direct); + if (!page_pool_is_last_frag(pool, page)) + return; + + page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct); #endif } +/* Same as above but will try to sync the entire area pool->max_len */ +static inline void page_pool_put_full_page(struct page_pool *pool, + struct page *page, bool allow_direct) +{ + page_pool_put_page(pool, page, -1, allow_direct); +} + /* Same as above but the caller must guarantee safe context. e.g NAPI */ static inline void page_pool_recycle_direct(struct page_pool *pool, struct page *page) @@ -243,30 +361,6 @@ static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) page->dma_addr_upper = upper_32_bits(addr); } -static inline void page_pool_set_frag_count(struct page *page, long nr) -{ - atomic_long_set(&page->pp_frag_count, nr); -} - -static inline long page_pool_atomic_sub_frag_count_return(struct page *page, - long nr) -{ - long ret; - - /* As suggested by Alexander, atomic_long_read() may cover up the - * reference count errors, so avoid calling atomic_long_read() in - * the cases of freeing or draining the page_frags, where we would - * not expect it to match or that are slowpath anyway. - */ - if (__builtin_constant_p(nr) && - atomic_long_read(&page->pp_frag_count) == nr) - return 0; - - ret = atomic_long_sub_return(nr, &page->pp_frag_count); - WARN_ON(ret < 0); - return ret; -} - static inline bool is_page_pool_compiled_in(void) { #ifdef CONFIG_PAGE_POOL diff --git a/include/net/ping.h b/include/net/ping.h index 2fe78874318c..e4ff3911cbf5 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -71,12 +71,12 @@ void ping_err(struct sk_buff *skb, int offset, u32 info); int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *); -int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -bool ping_rcv(struct sk_buff *skb); +enum skb_drop_reason ping_rcv(struct sk_buff *skb); #ifdef CONFIG_PROC_FS void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 676cb8ea9e15..8cf001aed858 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -547,10 +547,12 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } int tc_setup_offload_action(struct flow_action *flow_action, - const struct tcf_exts *exts); + const struct tcf_exts *exts, + struct netlink_ext_ack *extack); void tc_cleanup_offload_action(struct flow_action *flow_action); int tc_setup_action(struct flow_action *flow_action, - struct tc_action *actions[]); + struct tc_action *actions[], + struct netlink_ext_ack *extack); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); @@ -1028,4 +1030,15 @@ struct tc_fifo_qopt_offload { }; }; +#ifdef CONFIG_NET_CLS_ACT +DECLARE_STATIC_KEY_FALSE(tc_skb_ext_tc); +void tc_skb_ext_tc_enable(void); +void tc_skb_ext_tc_disable(void); +#define tc_skb_ext_tc_enabled() static_branch_unlikely(&tc_skb_ext_tc) +#else /* CONFIG_NET_CLS_ACT */ +static inline void tc_skb_ext_tc_enable(void) { } +static inline void tc_skb_ext_tc_disable(void) { } +#define tc_skb_ext_tc_enabled() false +#endif + #endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 9e7b21c0b3a6..44a35531952e 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -63,12 +63,6 @@ static inline psched_time_t psched_get_time(void) return PSCHED_NS2TICKS(ktime_get_ns()); } -static inline psched_tdiff_t -psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound) -{ - return min(tv1 - tv2, bound); -} - struct qdisc_watchdog { u64 last_expires; struct hrtimer timer; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 29e41ff3ec93..144c39db9898 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -70,6 +70,7 @@ struct request_sock { struct saved_syn *saved_syn; u32 secid; u32 peer_secid; + u32 timeout; }; static inline struct request_sock *inet_reqsk(const struct sock *sk) @@ -104,6 +105,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, sk_node_init(&req_to_sk(req)->sk_node); sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; + req->timeout = 0; req->num_timeout = 0; req->num_retrans = 0; req->sk = NULL; diff --git a/include/net/route.h b/include/net/route.h index 25404fc2b483..991a3985712d 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -43,6 +43,19 @@ #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) +static inline __u8 ip_sock_rt_scope(const struct sock *sk) +{ + if (sock_flag(sk, SOCK_LOCALROUTE)) + return RT_SCOPE_LINK; + + return RT_SCOPE_UNIVERSE; +} + +static inline __u8 ip_sock_rt_tos(const struct sock *sk) +{ + return RT_TOS(inet_sk(sk)->tos); +} + struct ip_tunnel_info; struct fib_nh; struct fib_info; @@ -289,39 +302,38 @@ static inline char rt_tos2priority(u8 tos) * ip_route_newports() calls. */ -static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src, - u32 tos, int oif, u8 protocol, +static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, + __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, - struct sock *sk) + const struct sock *sk) { __u8 flow_flags = 0; if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport, - sk->sk_uid); + flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk), + ip_sock_rt_scope(sk), protocol, flow_flags, dst, + src, dport, sport, sk->sk_uid); } -static inline struct rtable *ip_route_connect(struct flowi4 *fl4, - __be32 dst, __be32 src, u32 tos, - int oif, u8 protocol, +static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst, + __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, struct sock *sk) { struct net *net = sock_net(sk); struct rtable *rt; - ip_route_connect_init(fl4, dst, src, tos, oif, protocol, - sport, dport, sk); + ip_route_connect_init(fl4, dst, src, oif, protocol, sport, dport, sk); if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) return rt; ip_rt_put(rt); - flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr); + flowi4_update_output(fl4, oif, fl4->flowi4_tos, fl4->daddr, + fl4->saddr); } security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 9f48733bfd21..bf8bb3357825 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -10,9 +10,23 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { - RTNL_FLAG_DOIT_UNLOCKED = 1, + RTNL_FLAG_DOIT_UNLOCKED = BIT(0), + RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1), }; +enum rtnl_kinds { + RTNL_KIND_NEW, + RTNL_KIND_DEL, + RTNL_KIND_GET, + RTNL_KIND_SET +}; +#define RTNL_KIND_MASK 0x3 + +static inline enum rtnl_kinds rtnl_msgtype_kind(int msgtype) +{ + return msgtype & RTNL_KIND_MASK; +} + void rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_register_module(struct module *owner, int protocol, int msgtype, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 472843eedbae..d6cf5116b5f9 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -187,37 +187,17 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) if (spin_trylock(&qdisc->seqlock)) return true; - /* Paired with smp_mb__after_atomic() to make sure - * STATE_MISSED checking is synchronized with clearing - * in pfifo_fast_dequeue(). + /* No need to insist if the MISSED flag was already set. + * Note that test_and_set_bit() also gives us memory ordering + * guarantees wrt potential earlier enqueue() and below + * spin_trylock(), both of which are necessary to prevent races */ - smp_mb__before_atomic(); - - /* If the MISSED flag is set, it means other thread has - * set the MISSED flag before second spin_trylock(), so - * we can return false here to avoid multi cpus doing - * the set_bit() and second spin_trylock() concurrently. - */ - if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) + if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state)) return false; - /* Set the MISSED flag before the second spin_trylock(), - * if the second spin_trylock() return false, it means - * other cpu holding the lock will do dequeuing for us - * or it will see the MISSED flag set after releasing - * lock and reschedule the net_tx_action() to do the - * dequeuing. - */ - set_bit(__QDISC_STATE_MISSED, &qdisc->state); - - /* spin_trylock() only has load-acquire semantic, so use - * smp_mb__after_atomic() to ensure STATE_MISSED is set - * before doing the second spin_trylock(). - */ - smp_mb__after_atomic(); - - /* Retry again in case other CPU may not see the new flag - * after it releases the lock at the end of qdisc_run_end(). + /* Try to take the lock again to make sure that we will either + * grab it or the CPU that still has it will see MISSED set + * when testing it in qdisc_run_end() */ return spin_trylock(&qdisc->seqlock); } @@ -229,6 +209,12 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); + /* spin_unlock() only has store-release semantic. The unlock + * and test_bit() ordering is a store-load ordering, so a full + * memory barrier is needed here. + */ + smp_mb(); + if (unlikely(test_bit(__QDISC_STATE_MISSED, &qdisc->state))) __netif_schedule(qdisc); @@ -518,11 +504,6 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) BUILD_BUG_ON(sizeof(qcb->data) < sz); } -static inline int qdisc_qlen_cpu(const struct Qdisc *q) -{ - return this_cpu_ptr(q->cpu_qstats)->qlen; -} - static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index bf3716fe83e0..a04999ee99b0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,7 +103,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct sctp_association *asoc); extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); -struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int *); typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index d7d2495f83c2..dac91aa38c5a 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -4,8 +4,8 @@ #include <linux/types.h> -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); u32 secure_tcp_seq(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); diff --git a/include/net/sock.h b/include/net/sock.h index 50aecd28b355..72ca97ccb460 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -161,9 +161,6 @@ typedef __u64 __bitwise __addrpair; * for struct sock and struct inet_timewait_sock. */ struct sock_common { - /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned - * address on 64bit arches : cf INET_MATCH() - */ union { __addrpair skc_addrpair; struct { @@ -292,7 +289,6 @@ struct sk_filter; * @sk_pacing_shift: scaling factor for TCP Small Queues * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held - * @defer_list: head of llist storing skbs to be freed * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, @@ -316,6 +312,7 @@ struct sk_filter; * @sk_rcvtimeo: %SO_RCVTIMEO setting * @sk_sndtimeo: %SO_SNDTIMEO setting * @sk_txhash: computed flow hash for use on transmit + * @sk_txrehash: enable TX hash rethink * @sk_filter: socket filtering instructions * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received @@ -416,7 +413,6 @@ struct sock { struct sk_buff *head; struct sk_buff *tail; } sk_backlog; - struct llist_head defer_list; #define sk_rmem_alloc sk_backlog.rmem_alloc @@ -491,6 +487,7 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; + u8 sk_txrehash; #ifdef CONFIG_NET_RX_BUSY_POLL u8 sk_prefer_busy_poll; u16 sk_busy_poll_budget; @@ -587,6 +584,18 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk) __tmp | SK_USER_DATA_NOCOPY); \ }) +static inline +struct net *sock_net(const struct sock *sk) +{ + return read_pnet(&sk->sk_net); +} + +static inline +void sock_net_set(struct sock *sk, struct net *net) +{ + write_pnet(&sk->sk_net, net); +} + /* * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK * or not whether his port will be reused by someone else. SK_FORCE_REUSE @@ -881,6 +890,7 @@ enum sock_flags { SOCK_TXTIME, SOCK_XDP, /* XDP is attached */ SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ + SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -1188,8 +1198,7 @@ struct proto { int (*sendmsg)(struct sock *sk, struct msghdr *msg, size_t len); int (*recvmsg)(struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, - int *addr_len); + size_t len, int flags, int *addr_len); int (*sendpage)(struct sock *sk, struct page *page, int offset, size_t size, int flags); int (*bind)(struct sock *sk, @@ -1811,11 +1820,17 @@ int sock_getsockopt(struct socket *sock, int level, int op, char __user *optval, int __user *optlen); int sock_gettstamp(struct socket *sock, void __user *userstamp, bool timeval, bool time32); -struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, - int noblock, int *errcode); struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, unsigned long data_len, int noblock, int *errcode, int max_page_order); + +static inline struct sk_buff *sock_alloc_send_skb(struct sock *sk, + unsigned long size, + int noblock, int *errcode) +{ + return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0); +} + void *sock_kmalloc(struct sock *sk, int size, gfp_t priority); void sock_kfree_s(struct sock *sk, void *mem, int size); void sock_kzfree_s(struct sock *sk, void *mem, int size); @@ -2054,7 +2069,7 @@ static inline void sk_set_txhash(struct sock *sk) static inline bool sk_rethink_txhash(struct sock *sk) { - if (sk->sk_txhash) { + if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) { sk_set_txhash(sk); return true; } @@ -2378,7 +2393,14 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, void (*destructor)(struct sock *sk, struct sk_buff *skb)); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + +int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason); + +static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + return sock_queue_rcv_skb_reason(sk, skb, NULL); +} int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); struct sk_buff *sock_dequeue_err_skb(struct sock *sk); @@ -2629,20 +2651,21 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) __sock_recv_wifi_status(msg, sk, skb); } -void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb); +void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb); #define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC) -static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb) +static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) { -#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \ - (1UL << SOCK_RCVTSTAMP)) +#define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \ + (1UL << SOCK_RCVTSTAMP) | \ + (1UL << SOCK_RCVMARK)) #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) - if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) - __sock_recv_ts_and_drops(msg, sk, skb); + if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY) + __sock_recv_cmsgs(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) sock_write_timestamp(sk, skb->tstamp); else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) @@ -2704,18 +2727,6 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } -static inline -struct net *sock_net(const struct sock *sk) -{ - return read_pnet(&sk->sk_net); -} - -static inline -void sock_net_set(struct sock *sk, struct net *net) -{ - write_pnet(&sk->sk_net, net); -} - static inline bool skb_sk_is_prefetched(struct sk_buff *skb) { @@ -2864,13 +2875,14 @@ static inline void sk_pacing_shift_update(struct sock *sk, int val) */ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif) { + int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); int mdif; - if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif) + if (!bound_dev_if || bound_dev_if == dif) return true; mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif); - if (mdif && mdif == sk->sk_bound_dev_if) + if (mdif && mdif == bound_dev_if) return true; return false; diff --git a/include/net/strparser.h b/include/net/strparser.h index 732b7097d78e..a191486eb1e4 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -70,6 +70,10 @@ struct sk_skb_cb { * when dst_reg == src_reg. */ u64 temp_reg; + struct tls_msg { + u8 control; + u8 decrypted; + } tls; }; static inline struct strp_msg *strp_msg(struct sk_buff *skb) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d353793dfeb5..aa0171d5786d 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -19,6 +19,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_UNDEFINED, SWITCHDEV_ATTR_ID_PORT_STP_STATE, + SWITCHDEV_ATTR_ID_PORT_MST_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_MROUTER, @@ -27,7 +28,14 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL, SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, + SWITCHDEV_ATTR_ID_BRIDGE_MST, SWITCHDEV_ATTR_ID_MRP_PORT_ROLE, + SWITCHDEV_ATTR_ID_VLAN_MSTI, +}; + +struct switchdev_mst_state { + u16 msti; + u8 state; }; struct switchdev_brport_flags { @@ -35,6 +43,11 @@ struct switchdev_brport_flags { unsigned long mask; }; +struct switchdev_vlan_msti { + u16 vid; + u16 msti; +}; + struct switchdev_attr { struct net_device *orig_dev; enum switchdev_attr_id id; @@ -43,13 +56,16 @@ struct switchdev_attr { void (*complete)(struct net_device *dev, int err, void *priv); union { u8 stp_state; /* PORT_STP_STATE */ + struct switchdev_mst_state mst_state; /* PORT_MST_STATE */ struct switchdev_brport_flags brport_flags; /* PORT_BRIDGE_FLAGS */ bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ u16 vlan_protocol; /* BRIDGE_VLAN_PROTOCOL */ + bool mst; /* BRIDGE_MST */ bool mc_disabled; /* MC_DISABLED */ u8 mrp_port_role; /* MRP_PORT_ROLE */ + struct switchdev_vlan_msti vlan_msti; /* VLAN_MSTI */ } u; }; @@ -81,6 +97,13 @@ struct switchdev_obj_port_vlan { struct switchdev_obj obj; u16 flags; u16 vid; + /* If set, the notifier signifies a change of one of the following + * flags for a VLAN that already exists: + * - BRIDGE_VLAN_INFO_PVID + * - BRIDGE_VLAN_INFO_UNTAGGED + * Entries with BRIDGE_VLAN_INFO_BRENTRY unset are not notified at all. + */ + bool changed; }; #define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \ @@ -306,10 +329,7 @@ int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long e const struct net_device *foreign_dev), int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)); + const struct switchdev_notifier_fdb_info *fdb_info)); int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, @@ -317,11 +337,26 @@ int switchdev_handle_port_obj_add(struct net_device *dev, int (*add_cb)(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj, struct netlink_ext_ack *extack)); +int switchdev_handle_port_obj_add_foreign(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack)); int switchdev_handle_port_obj_del(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), int (*del_cb)(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj)); +int switchdev_handle_port_obj_del_foreign(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj)); int switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, @@ -421,10 +456,7 @@ switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event const struct net_device *foreign_dev), int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)) + const struct switchdev_notifier_fdb_info *fdb_info)) { return 0; } @@ -440,6 +472,18 @@ switchdev_handle_port_obj_add(struct net_device *dev, return 0; } +static inline int switchdev_handle_port_obj_add_foreign(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack)) +{ + return 0; +} + static inline int switchdev_handle_port_obj_del(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, @@ -451,6 +495,18 @@ switchdev_handle_port_obj_del(struct net_device *dev, } static inline int +switchdev_handle_port_obj_del_foreign(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj)) +{ + return 0; +} + +static inline int switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, bool (*check_cb)(const struct net_device *dev), diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index eb8f01c819e6..832efd40e023 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -59,4 +59,19 @@ static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a) return READ_ONCE(a->tcfa_action) & TC_ACT_EXT_VAL_MASK; } +static inline bool is_tcf_gact_continue(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_UNSPEC, false); +} + +static inline bool is_tcf_gact_reclassify(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_RECLASSIFY, false); +} + +static inline bool is_tcf_gact_pipe(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_PIPE, false); +} + #endif /* __NET_TC_GACT_H */ diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 748cf87a4d7e..3e02709a1df6 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -14,6 +14,7 @@ struct tcf_pedit { struct tc_action common; unsigned char tcfp_nkeys; unsigned char tcfp_flags; + u32 tcfp_off_max_hint; struct tc_pedit_key *tcfp_keys; struct tcf_pedit_key_ex *tcfp_keys_ex; }; diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h index 72649512dcdd..283bde711a42 100644 --- a/include/net/tc_act/tc_police.h +++ b/include/net/tc_act/tc_police.h @@ -159,4 +159,34 @@ static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act) return params->tcfp_mtu; } +static inline u64 tcf_police_peakrate_bytes_ps(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->peak.rate_bytes_ps; +} + +static inline u32 tcf_police_tcfp_ewma_rate(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->tcfp_ewma_rate; +} + +static inline u16 tcf_police_rate_overhead(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->rate.overhead; +} + #endif /* __NET_TC_POLICE_H */ diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 00bfee70609e..dc1079f28e13 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -17,6 +17,7 @@ struct tcf_skbedit_params { u32 mark; u32 mask; u16 queue_mapping; + u16 mapping_mod; u16 ptype; struct rcu_head rcu; }; @@ -94,4 +95,16 @@ static inline u32 tcf_skbedit_priority(const struct tc_action *a) return priority; } +/* Return true iff action is queue_mapping */ +static inline bool is_tcf_skbedit_queue_mapping(const struct tc_action *a) +{ + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_QUEUE_MAPPING); +} + +/* Return true iff action is inheritdsfield */ +static inline bool is_tcf_skbedit_inheritdsfield(const struct tc_action *a) +{ + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_INHERITDSFIELD); +} + #endif /* __NET_TC_SKBEDIT_H */ diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index f94b8bc26f9e..904eddfc1826 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -78,4 +78,14 @@ static inline u8 tcf_vlan_push_prio(const struct tc_action *a) return tcfv_push_prio; } + +static inline void tcf_vlan_push_eth(unsigned char *src, unsigned char *dest, + const struct tc_action *a) +{ + rcu_read_lock(); + memcpy(dest, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_dst, ETH_ALEN); + memcpy(src, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_src, ETH_ALEN); + rcu_read_unlock(); +} + #endif /* __NET_TC_VLAN_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b9fc978fb2ca..1e99f5c61f84 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -407,7 +407,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); -int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); int tcp_set_rcvlowat(struct sock *sk, int val); int tcp_set_window_clamp(struct sock *sk, int val); @@ -480,6 +480,7 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, + const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES @@ -620,6 +621,7 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); void tcp_fin(struct sock *sk); +void tcp_check_space(struct sock *sk); /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); @@ -1042,6 +1044,7 @@ struct rate_sample { int losses; /* number of packets marked lost upon ACK */ u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ u32 prior_in_flight; /* in flight before this ACK */ + u32 last_end_seq; /* end_seq of most recently ACKed packet */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ @@ -1139,15 +1142,6 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk) return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN; } -static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (icsk->icsk_ca_ops->set_state) - icsk->icsk_ca_ops->set_state(sk, ca_state); - icsk->icsk_ca_state = ca_state; -} - static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1156,6 +1150,9 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) icsk->icsk_ca_ops->cwnd_event(sk, event); } +/* From tcp_cong.c */ +void tcp_set_ca_state(struct sock *sk, const u8 ca_state); + /* From tcp_rate.c */ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, @@ -1164,6 +1161,11 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, bool is_sack_reneg, struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); +static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) +{ + return t1 > t2 || (t1 == t2 && after(seq1, seq2)); +} + /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. @@ -1207,9 +1209,20 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) #define TCP_INFINITE_SSTHRESH 0x7fffffff +static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp) +{ + return tp->snd_cwnd; +} + +static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) +{ + WARN_ON_ONCE((int)val <= 0); + tp->snd_cwnd = val; +} + static inline bool tcp_in_slow_start(const struct tcp_sock *tp) { - return tp->snd_cwnd < tp->snd_ssthresh; + return tcp_snd_cwnd(tp) < tp->snd_ssthresh; } static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) @@ -1235,8 +1248,8 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, - ((tp->snd_cwnd >> 1) + - (tp->snd_cwnd >> 2))); + ((tcp_snd_cwnd(tp) >> 1) + + (tcp_snd_cwnd(tp) >> 2))); } /* Use define here intentionally to get WARN_ON location shown at the caller */ @@ -1278,7 +1291,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) /* If in slow start, ensure cwnd grows to twice what was ACKed. */ if (tcp_in_slow_start(tp)) - return tp->snd_cwnd < 2 * tp->max_packets_out; + return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out; return tp->is_cwnd_limited; } @@ -1367,20 +1380,9 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) __skb_checksum_complete(skb); } -bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); +bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason); -#ifdef CONFIG_INET -void __sk_defer_free_flush(struct sock *sk); - -static inline void sk_defer_free_flush(struct sock *sk) -{ - if (llist_empty(&sk->defer_list)) - return; - __sk_defer_free_flush(sk); -} -#else -static inline void sk_defer_free_flush(struct sock *sk) {} -#endif int tcp_filter(struct sock *sk, struct sk_buff *skb); void tcp_set_state(struct sock *sk, int state); @@ -1674,6 +1676,12 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, return __tcp_md5_do_lookup(sk, l3index, addr, family); } +enum skb_drop_reason +tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif); + + #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else static inline struct tcp_md5sig_key * @@ -1682,6 +1690,14 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, { return NULL; } + +static inline enum skb_drop_reason +tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif) +{ + return SKB_NOT_DROPPED_YET; +} #define tcp_twsk_md5_key(twsk) NULL #endif @@ -1817,11 +1833,6 @@ static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk) return skb_rb_last(&sk->tcp_rtx_queue); } -static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) -{ - return skb_peek(&sk->sk_write_queue); -} - static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk) { return skb_peek_tail(&sk->sk_write_queue); @@ -2358,7 +2369,7 @@ static inline u32 tcp_timeout_init(struct sock *sk) if (timeout <= 0) timeout = TCP_TIMEOUT_INIT; - return timeout; + return min_t(int, timeout, TCP_RTO_MAX); } static inline u32 tcp_rwnd_init_bpf(struct sock *sk) diff --git a/include/net/tls.h b/include/net/tls.h index 526cb2c3b724..8017f1703447 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -64,6 +64,7 @@ #define TLS_AAD_SPACE_SIZE 13 #define MAX_IV_SIZE 16 +#define TLS_TAG_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 /* For CCM mode, the full 16-bytes of IV is made of '4' fields of given sizes. @@ -117,11 +118,6 @@ struct tls_rec { u8 aead_req_ctx[]; }; -struct tls_msg { - struct strp_msg rxm; - u8 control; -}; - struct tx_work { struct delayed_work work; struct sock *sk; @@ -152,13 +148,10 @@ struct tls_sw_context_rx { void (*saved_data_ready)(struct sock *sk); struct sk_buff *recv_pkt; - u8 control; u8 async_capable:1; - u8 decrypted:1; atomic_t decrypt_pending; /* protect crypto_wait with decrypt_pending*/ spinlock_t decrypt_compl_lock; - bool async_notify; }; struct tls_record_info { @@ -245,6 +238,7 @@ struct tls_context { u8 tx_conf:3; u8 rx_conf:3; + u8 zerocopy_sendfile:1; int (*push_pending_record)(struct sock *sk, int flags); void (*sk_write_space)(struct sock *sk); @@ -378,7 +372,7 @@ void tls_sw_free_resources_rx(struct sock *sk); void tls_sw_release_resources_rx(struct sock *sk); void tls_sw_free_ctx_rx(struct tls_context *tls_ctx); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len); + int flags, int *addr_len); bool tls_sw_sock_is_readable(struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, @@ -411,7 +405,9 @@ void tls_free_partial_record(struct sock *sk, struct tls_context *ctx); static inline struct tls_msg *tls_msg(struct sk_buff *skb) { - return (struct tls_msg *)strp_msg(skb); + struct sk_skb_cb *scb = (struct sk_skb_cb *)skb->cb; + + return &scb->tls; } static inline bool tls_is_partially_sent_record(struct tls_context *ctx) @@ -626,7 +622,6 @@ tls_offload_ctx_rx(const struct tls_context *tls_ctx) return (struct tls_offload_context_rx *)tls_ctx->priv_ctx_rx; } -#if IS_ENABLED(CONFIG_TLS_DEVICE) static inline void *__tls_driver_ctx(struct tls_context *tls_ctx, enum tls_offload_ctx_dir direction) { @@ -641,7 +636,6 @@ tls_driver_ctx(const struct sock *sk, enum tls_offload_ctx_dir direction) { return __tls_driver_ctx(tls_get_ctx(sk), direction); } -#endif #define RESYNC_REQ BIT(0) #define RESYNC_REQ_ASYNC BIT(1) diff --git a/include/net/udp.h b/include/net/udp.h index f1c2a88c9005..b83a00330566 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -250,14 +250,14 @@ void udp_destruct_sock(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); -struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *off, int *err); +struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off, + int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *err) + int *err) { int off = 0; - return __skb_recv_udp(sk, flags, noblock, &off, err); + return __skb_recv_udp(sk, flags, &off, err); } int udp_v4_early_demux(struct sk_buff *skb); diff --git a/include/net/udplite.h b/include/net/udplite.h index 9185e45b997f..a3c53110d30b 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -70,49 +70,6 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) return 0; } -/* Slow-path computation of checksum. Socket is locked. */ -static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) -{ - const struct udp_sock *up = udp_sk(skb->sk); - int cscov = up->len; - __wsum csum = 0; - - if (up->pcflag & UDPLITE_SEND_CC) { - /* - * Sender has set `partial coverage' option on UDP-Lite socket. - * The special case "up->pcslen == 0" signifies full coverage. - */ - if (up->pcslen < up->len) { - if (0 < up->pcslen) - cscov = up->pcslen; - udp_hdr(skb)->len = htons(up->pcslen); - } - /* - * NOTE: Causes for the error case `up->pcslen > up->len': - * (i) Application error (will not be penalized). - * (ii) Payload too big for send buffer: data is split - * into several packets, each with its own header. - * In this case (e.g. last segment), coverage may - * exceed packet length. - * Since packets with coverage length > packet length are - * illegal, we fall back to the defaults here. - */ - } - - skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ - - skb_queue_walk(&sk->sk_write_queue, skb) { - const int off = skb_transport_offset(skb); - const int len = skb->len - off; - - csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum); - - if ((cscov -= len) <= 0) - break; - } - return csum; -} - /* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 5a934bebe630..bca5b01af247 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -227,11 +227,56 @@ struct vxlan_config { enum ifla_vxlan_df df; }; +enum { + VXLAN_VNI_STATS_RX, + VXLAN_VNI_STATS_RX_DROPS, + VXLAN_VNI_STATS_RX_ERRORS, + VXLAN_VNI_STATS_TX, + VXLAN_VNI_STATS_TX_DROPS, + VXLAN_VNI_STATS_TX_ERRORS, +}; + +struct vxlan_vni_stats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_drops; + u64 rx_errors; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; + u64 tx_errors; +}; + +struct vxlan_vni_stats_pcpu { + struct vxlan_vni_stats stats; + struct u64_stats_sync syncp; +}; + struct vxlan_dev_node { struct hlist_node hlist; struct vxlan_dev *vxlan; }; +struct vxlan_vni_node { + struct rhash_head vnode; + struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ +#if IS_ENABLED(CONFIG_IPV6) + struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ +#endif + struct list_head vlist; + __be32 vni; + union vxlan_addr remote_ip; /* default remote ip for this vni */ + struct vxlan_vni_stats_pcpu __percpu *stats; + + struct rcu_head rcu; +}; + +struct vxlan_vni_group { + struct rhashtable vni_hash; + struct list_head vni_list; + u32 num_vnis; +}; + /* Pseudo network device */ struct vxlan_dev { struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ @@ -254,6 +299,8 @@ struct vxlan_dev { struct vxlan_config cfg; + struct vxlan_vni_group __rcu *vnigrp; + struct hlist_head fdb_head[FDB_HASH_SIZE]; }; @@ -274,6 +321,7 @@ struct vxlan_dev { #define VXLAN_F_GPE 0x4000 #define VXLAN_F_IPV6_LINKLOCAL 0x8000 #define VXLAN_F_TTL_INHERIT 0x10000 +#define VXLAN_F_VNIFILTER 0x20000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -283,7 +331,8 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_REMCSUM_RX | \ VXLAN_F_REMCSUM_NOPARTIAL | \ - VXLAN_F_COLLECT_METADATA) + VXLAN_F_COLLECT_METADATA | \ + VXLAN_F_VNIFILTER) /* Flags that can be set together with VXLAN_F_GPE. */ #define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ @@ -292,7 +341,8 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM_TX | \ VXLAN_F_UDP_ZERO_CSUM6_TX | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ - VXLAN_F_COLLECT_METADATA) + VXLAN_F_COLLECT_METADATA | \ + VXLAN_F_VNIFILTER) struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); diff --git a/include/net/xdp.h b/include/net/xdp.h index 8f0812e4996d..04c852c7a77f 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -60,12 +60,20 @@ struct xdp_rxq_info { u32 reg_state; struct xdp_mem_info mem; unsigned int napi_id; + u32 frag_size; } ____cacheline_aligned; /* perf critical, avoid false-sharing */ struct xdp_txq_info { struct net_device *dev; }; +enum xdp_buff_flags { + XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */ + XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under + * pressure + */ +}; + struct xdp_buff { void *data; void *data_end; @@ -74,13 +82,40 @@ struct xdp_buff { struct xdp_rxq_info *rxq; struct xdp_txq_info *txq; u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ + u32 flags; /* supported values defined in xdp_buff_flags */ }; +static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp) +{ + return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS); +} + +static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp) +{ + xdp->flags |= XDP_FLAGS_HAS_FRAGS; +} + +static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) +{ + xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; +} + +static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp) +{ + return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); +} + +static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) +{ + xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; +} + static __always_inline void xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) { xdp->frame_sz = frame_sz; xdp->rxq = rxq; + xdp->flags = 0; } static __always_inline void @@ -111,6 +146,20 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp) return (struct skb_shared_info *)xdp_data_hard_end(xdp); } +static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp) +{ + unsigned int len = xdp->data_end - xdp->data; + struct skb_shared_info *sinfo; + + if (likely(!xdp_buff_has_frags(xdp))) + goto out; + + sinfo = xdp_get_shared_info_from_buff(xdp); + len += sinfo->xdp_frags_size; +out: + return len; +} + struct xdp_frame { void *data; u16 len; @@ -122,8 +171,19 @@ struct xdp_frame { */ struct xdp_mem_info mem; struct net_device *dev_rx; /* used by cpumap */ + u32 flags; /* supported values defined in xdp_buff_flags */ }; +static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame) +{ + return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); +} + +static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame) +{ + return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); +} + #define XDP_BULK_QUEUE_SIZE 16 struct xdp_frame_bulk { int count; @@ -159,6 +219,19 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame) frame->dev_rx = NULL; } +static inline void +xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, + unsigned int size, unsigned int truesize, + bool pfmemalloc) +{ + skb_shinfo(skb)->nr_frags = nr_frags; + + skb->len += size; + skb->data_len += size; + skb->truesize += truesize; + skb->pfmemalloc |= pfmemalloc; +} + /* Avoids inlining WARN macro in fast-path */ void xdp_warn(const char *msg, const char *func, const int line); #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) @@ -180,6 +253,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) xdp->data_end = frame->data + frame->len; xdp->data_meta = frame->data - frame->metasize; xdp->frame_sz = frame->frame_sz; + xdp->flags = frame->flags; } static inline @@ -206,6 +280,7 @@ int xdp_update_frame_from_buff(struct xdp_buff *xdp, xdp_frame->headroom = headroom - sizeof(*xdp_frame); xdp_frame->metasize = metasize; xdp_frame->frame_sz = xdp->frame_sz; + xdp_frame->flags = xdp->flags; return 0; } @@ -230,6 +305,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) return xdp_frame; } +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp); void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); @@ -246,14 +323,51 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem); static inline void xdp_release_frame(struct xdp_frame *xdpf) { struct xdp_mem_info *mem = &xdpf->mem; + struct skb_shared_info *sinfo; + int i; /* Curr only page_pool needs this */ - if (mem->type == MEM_TYPE_PAGE_POOL) - __xdp_release_frame(xdpf->data, mem); + if (mem->type != MEM_TYPE_PAGE_POOL) + return; + + if (likely(!xdp_frame_has_frags(xdpf))) + goto out; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (i = 0; i < sinfo->nr_frags; i++) { + struct page *page = skb_frag_page(&sinfo->frags[i]); + + __xdp_release_frame(page_address(page), mem); + } +out: + __xdp_release_frame(xdpf->data, mem); +} + +static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf) +{ + struct skb_shared_info *sinfo; + unsigned int len = xdpf->len; + + if (likely(!xdp_frame_has_frags(xdpf))) + goto out; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + len += sinfo->xdp_frags_size; +out: + return len; +} + +int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index, + unsigned int napi_id, u32 frag_size); +static inline int +xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index, + unsigned int napi_id) +{ + return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0); } -int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, - struct net_device *dev, u32 queue_index, unsigned int napi_id); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 443d45951564..4aa031849668 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -13,7 +13,7 @@ void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); -u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, u32 max); +u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max); void xsk_tx_release(struct xsk_buff_pool *pool); struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id); @@ -142,8 +142,7 @@ static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, return false; } -static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, - u32 max) +static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max) { return 0; } diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 76aa6f11a540..c39d910d4b45 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -126,13 +126,17 @@ struct xfrm_state_walk { struct xfrm_address_filter *filter; }; -struct xfrm_state_offload { +enum { + XFRM_DEV_OFFLOAD_IN = 1, + XFRM_DEV_OFFLOAD_OUT, +}; + +struct xfrm_dev_offload { struct net_device *dev; netdevice_tracker dev_tracker; struct net_device *real_dev; unsigned long offload_handle; - unsigned int num_exthdrs; - u8 flags; + u8 dir : 2; }; struct xfrm_mode { @@ -247,7 +251,7 @@ struct xfrm_state { struct xfrm_lifetime_cur curlft; struct hrtimer mtimer; - struct xfrm_state_offload xso; + struct xfrm_dev_offload xso; /* used to fix curlft->add_time when changing date */ long saved_tmo; @@ -1006,7 +1010,7 @@ struct xfrm_offload { #define CRYPTO_FALLBACK 8 #define XFRM_GSO_SEGMENT 16 #define XFRM_GRO 32 -#define XFRM_ESP_NO_TRAILER 64 +/* 64 is free */ #define XFRM_DEV_RESUME 128 #define XFRM_XMIT 256 @@ -1081,24 +1085,29 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un } #ifdef CONFIG_XFRM -static inline bool -xfrm_default_allow(struct net *net, int dir) -{ - u8 def = net->xfrm.policy_default; - - switch (dir) { - case XFRM_POLICY_IN: - return def & XFRM_POL_DEFAULT_IN ? false : true; - case XFRM_POLICY_OUT: - return def & XFRM_POL_DEFAULT_OUT ? false : true; - case XFRM_POLICY_FWD: - return def & XFRM_POL_DEFAULT_FWD ? false : true; - } +int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, + unsigned short family); + +static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb, + int dir) +{ + if (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) + return net->xfrm.policy_default[dir] == XFRM_USERPOLICY_ACCEPT; + return false; } -int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, - unsigned short family); +static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb, + int dir, unsigned short family) +{ + if (dir != XFRM_POLICY_OUT && family == AF_INET) { + /* same dst may be used for traffic originating from + * devices with different policy settings. + */ + return IPCB(skb)->flags & IPSKB_NOPOLICY; + } + return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY); +} static inline int __xfrm_policy_check2(struct sock *sk, int dir, struct sk_buff *skb, @@ -1110,13 +1119,9 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - if (xfrm_default_allow(net, dir)) - return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) || - (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || - __xfrm_policy_check(sk, ndir, skb, family); - else - return (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || - __xfrm_policy_check(sk, ndir, skb, family); + return __xfrm_check_nopolicy(net, skb, dir) || + __xfrm_check_dev_nopolicy(skb, dir, family) || + __xfrm_policy_check(sk, ndir, skb, family); } static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) @@ -1168,13 +1173,12 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); - if (xfrm_default_allow(net, XFRM_POLICY_OUT)) - return !net->xfrm.policy_count[XFRM_POLICY_OUT] || - (skb_dst(skb)->flags & DST_NOXFRM) || - __xfrm_route_forward(skb, family); - else - return (skb_dst(skb)->flags & DST_NOXFRM) || - __xfrm_route_forward(skb, family); + if (!net->xfrm.policy_count[XFRM_POLICY_OUT] && + net->xfrm.policy_default[XFRM_POLICY_OUT] == XFRM_USERPOLICY_ACCEPT) + return true; + + return (skb_dst(skb)->flags & DST_NOXFRM) || + __xfrm_route_forward(skb, family); } static inline int xfrm4_route_forward(struct sk_buff *skb) @@ -1878,7 +1882,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) { - struct xfrm_state_offload *xso = &x->xso; + struct xfrm_dev_offload *xso = &x->xso; if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn) xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x); @@ -1904,7 +1908,7 @@ static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) static inline void xfrm_dev_state_delete(struct xfrm_state *x) { - struct xfrm_state_offload *xso = &x->xso; + struct xfrm_dev_offload *xso = &x->xso; if (xso->dev) xso->dev->xfrmdev_ops->xdo_dev_state_delete(x); @@ -1912,7 +1916,7 @@ static inline void xfrm_dev_state_delete(struct xfrm_state *x) static inline void xfrm_dev_state_free(struct xfrm_state *x) { - struct xfrm_state_offload *xso = &x->xso; + struct xfrm_dev_offload *xso = &x->xso; struct net_device *dev = xso->dev; if (dev && dev->xfrmdev_ops) { diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index ddeefc4a1040..647722e847b4 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -60,6 +60,7 @@ struct xsk_buff_pool { */ dma_addr_t *dma_pages; struct xdp_buff_xsk *heads; + struct xdp_desc *tx_descs; u64 chunk_mask; u64 addrs_cnt; u32 free_list_cnt; @@ -96,6 +97,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, u16 queue_id, u16 flags); int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, struct net_device *dev, u16 queue_id); +int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs); void xp_destroy(struct xsk_buff_pool *pool); void xp_get_pool(struct xsk_buff_pool *pool); bool xp_put_pool(struct xsk_buff_pool *pool); |
