diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2018-04-04 16:11:49 -0700 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2018-04-04 16:11:49 -0700 |
| commit | 664b0bae0b87f69bc9deb098f5e0158b9cf18e04 (patch) | |
| tree | d5841492b396ff483723b9339c7c11dc33b67688 /include/net | |
| parent | 567b9b549cfa1cbc202762ae97b5385c29ade1e3 (diff) | |
| parent | 04bb1719c4de94700056241d4c0fe3c1413f5aff (diff) | |
Merge branch 'next' into for-linus
Prepare input updates for 4.17 merge window.
Diffstat (limited to 'include/net')
98 files changed, 2760 insertions, 791 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index 1e6df0eb058f..6ed9692f20bd 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -14,7 +14,6 @@ struct tcf_idrinfo { spinlock_t lock; struct idr action_idr; - struct net *net; }; struct tc_action_ops; @@ -87,7 +86,7 @@ struct tc_action_ops { int (*act)(struct sk_buff *, const struct tc_action *, struct tcf_result *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); - void (*cleanup)(struct tc_action *, int bind); + void (*cleanup)(struct tc_action *); int (*lookup)(struct net *, struct tc_action **, u32); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, @@ -95,8 +94,7 @@ struct tc_action_ops { int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, const struct tc_action_ops *); void (*stats_update)(struct tc_action *, u64, u32, u64); - int (*get_dev)(const struct tc_action *a, struct net *net, - struct net_device **mirred_dev); + struct net_device *(*get_dev)(const struct tc_action *a); }; struct tc_action_net { @@ -106,7 +104,7 @@ struct tc_action_net { static inline int tc_action_net_init(struct tc_action_net *tn, - const struct tc_action_ops *ops, struct net *net) + const struct tc_action_ops *ops) { int err = 0; @@ -114,7 +112,6 @@ int tc_action_net_init(struct tc_action_net *tn, if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; - tn->idrinfo->net = net; spin_lock_init(&tn->idrinfo->lock); idr_init(&tn->idrinfo->action_idr); return err; @@ -123,12 +120,19 @@ int tc_action_net_init(struct tc_action_net *tn, void tcf_idrinfo_destroy(const struct tc_action_ops *ops, struct tcf_idrinfo *idrinfo); -static inline void tc_action_net_exit(struct tc_action_net *tn) +static inline void tc_action_net_exit(struct list_head *net_list, + unsigned int id) { + struct net *net; + rtnl_lock(); - tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + list_for_each_entry(net, net_list, exit_list) { + struct tc_action_net *tn = net_generic(net, id); + + tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + kfree(tn->idrinfo); + } rtnl_unlock(); - kfree(tn->idrinfo); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, @@ -180,4 +184,38 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, #endif } +typedef int tc_setup_cb_t(enum tc_setup_type type, + void *type_data, void *cb_priv); + +#ifdef CONFIG_NET_CLS_ACT +int tc_setup_cb_egdev_register(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv); +void tc_setup_cb_egdev_unregister(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv); +int tc_setup_cb_egdev_call(const struct net_device *dev, + enum tc_setup_type type, void *type_data, + bool err_stop); +#else +static inline +int tc_setup_cb_egdev_register(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + return 0; +} + +static inline +void tc_setup_cb_egdev_unregister(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ +} + +static inline +int tc_setup_cb_egdev_call(const struct net_device *dev, + enum tc_setup_type type, void *type_data, + bool err_stop) +{ + return 0; +} +#endif + #endif diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 35f5aabd432f..c4185a7b0e90 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -56,11 +56,9 @@ struct prefix_info { struct in6_validator_info { struct in6_addr i6vi_addr; struct inet6_dev *i6vi_dev; + struct netlink_ext_ack *extack; }; -#define IN6_ADDR_HSIZE_SHIFT 4 -#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) - int addrconf_init(void); void addrconf_cleanup(void); @@ -95,8 +93,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); -int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard); +bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); @@ -182,7 +180,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout) */ int ipv6_addr_label_init(void); void ipv6_addr_label_cleanup(void); -void ipv6_addr_label_rtnl_register(void); +int ipv6_addr_label_rtnl_register(void); u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, int type, int ifindex); @@ -208,7 +206,7 @@ void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed); -void addrconf_dad_failure(struct inet6_ifaddr *ifp); +void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, const struct in6_addr *src_addr); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 3ac79150291f..2b3a6eec4570 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -49,17 +49,19 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, unsigned long, s64, gfp_t, - rxrpc_notify_rx_t); + rxrpc_notify_rx_t, + bool); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t, rxrpc_notify_end_tx_t); int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, - void *, size_t, size_t *, bool, u32 *); + void *, size_t, size_t *, bool, u32 *, u16 *); bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32, int, const char *); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); +u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); @@ -67,5 +69,6 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *, struct key *); int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, enum rxrpc_call_completion *, u32 *); +u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *); #endif /* _NET_RXRPC_H */ diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f9fb566e75cf..9324ac2d9ff2 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -22,11 +22,13 @@ #include "vsock_addr.h" -/* vsock-specific sock->sk_state constants */ -#define VSOCK_SS_LISTEN 255 - #define LAST_RESERVED_PORT 1023 +#define VSOCK_HASH_SIZE 251 +extern struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; +extern struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; +extern spinlock_t vsock_table_lock; + #define vsock_sk(__sk) ((struct vsock_sock *)__sk) #define sk_vsock(__vsk) (&(__vsk)->sk) @@ -175,6 +177,18 @@ const struct vsock_transport *vsock_core_get_transport(void); /**** UTILS ****/ +/* vsock_table_lock must be held */ +static inline bool __vsock_in_bound_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->bound_table); +} + +/* vsock_table_lock must be held */ +static inline bool __vsock_in_connected_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->connected_table); +} + void vsock_release_pending(struct sock *pending); void vsock_add_pending(struct sock *listener, struct sock *pending); void vsock_remove_pending(struct sock *listener, struct sock *pending); diff --git a/include/net/arp.h b/include/net/arp.h index dc8cd47f883b..977aabfcdc03 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -20,6 +20,9 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32 static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + key = INADDR_ANY; + return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev); } diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 020142bb9735..ec9d6bc65855 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -147,6 +147,9 @@ void bt_err_ratelimited(const char *fmt, ...); #define bt_dev_dbg(hdev, fmt, ...) \ BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__) +#define bt_dev_err_ratelimited(hdev, fmt, ...) \ + BT_ERR_RATELIMITED("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + /* Connection and socket states */ enum { BT_CONNECTED = 1, /* Equal to TCP_ESTABLISHED to make net code happy */ @@ -268,7 +271,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -uint bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); +__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); int bt_sock_wait_ready(struct sock *sk, unsigned long flags); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index fe98f0a5bef0..1668211297a9 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -273,7 +273,7 @@ enum { #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ #define HCI_POWER_OFF_TIMEOUT msecs_to_jiffies(5000) /* 5 seconds */ #define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ -#define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ +#define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ /* HCI data types */ #define HCI_COMMAND_PKT 0x01 diff --git a/include/net/bonding.h b/include/net/bonding.h index b2e68657a216..f801fc940b29 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -330,7 +330,6 @@ static inline void bond_set_active_slave(struct slave *slave) slave->backup = 0; bond_queue_slave_event(slave); bond_lower_state_changed(slave); - rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -340,7 +339,6 @@ static inline void bond_set_backup_slave(struct slave *slave) slave->backup = 1; bond_queue_slave_event(slave); bond_lower_state_changed(slave); - rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -353,7 +351,6 @@ static inline void bond_set_slave_state(struct slave *slave, slave->backup = slave_state; if (notify) { bond_lower_state_changed(slave); - rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); bond_queue_slave_event(slave); slave->should_notify = 0; } else { @@ -385,7 +382,6 @@ static inline void bond_slave_state_notify(struct bonding *bond) bond_for_each_slave(bond, tmp, iter) { if (tmp->should_notify) { bond_lower_state_changed(tmp); - rtmsg_ifinfo(RTM_NEWLINK, tmp->dev, 0, GFP_ATOMIC); tmp->should_notify = 0; } } @@ -596,7 +592,8 @@ void bond_destroy_sysfs(struct bond_net *net); void bond_prepare_sysfs_group(struct bonding *bond); int bond_sysfs_slave_add(struct slave *slave); void bond_sysfs_slave_del(struct slave *slave); -int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev); +int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, + struct netlink_ext_ack *extack); int bond_release(struct net_device *bond_dev, struct net_device *slave_dev); u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb); int bond_set_carrier(struct bonding *bond); diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h index fe328c52c46b..801489bb14c3 100644 --- a/include/net/caif/cfpkt.h +++ b/include/net/caif/cfpkt.h @@ -32,6 +32,33 @@ void cfpkt_destroy(struct cfpkt *pkt); */ int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len); +static inline u8 cfpkt_extr_head_u8(struct cfpkt *pkt) +{ + u8 tmp; + + cfpkt_extr_head(pkt, &tmp, 1); + + return tmp; +} + +static inline u16 cfpkt_extr_head_u16(struct cfpkt *pkt) +{ + __le16 tmp; + + cfpkt_extr_head(pkt, &tmp, 2); + + return le16_to_cpu(tmp); +} + +static inline u32 cfpkt_extr_head_u32(struct cfpkt *pkt) +{ + __le32 tmp; + + cfpkt_extr_head(pkt, &tmp, 4); + + return le32_to_cpu(tmp); +} + /* * Peek header from packet. * Reads data from packet without changing packet. diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f12fa5245a45..81174f9b8d14 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -815,6 +815,8 @@ struct cfg80211_csa_settings { u8 count; }; +#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 + /** * struct iface_combination_params - input parameters for interface combinations * @@ -1773,6 +1775,8 @@ enum cfg80211_signal_type { * by %parent_bssid. * @parent_bssid: the BSS according to which %parent_tsf is set. This is set to * the BSS that requested the scan in which the beacon/probe was received. + * @chains: bitmask for filled values in @chain_signal. + * @chain_signal: per-chain signal strength of last received BSS in dBm. */ struct cfg80211_inform_bss { struct ieee80211_channel *chan; @@ -1781,6 +1785,8 @@ struct cfg80211_inform_bss { u64 boottime_ns; u64 parent_tsf; u8 parent_bssid[ETH_ALEN] __aligned(2); + u8 chains; + s8 chain_signal[IEEE80211_MAX_CHAINS]; }; /** @@ -1824,6 +1830,8 @@ struct cfg80211_bss_ies { * that holds the beacon data. @beacon_ies is still valid, of course, and * points to the same data as hidden_beacon_bss->beacon_ies in that case. * @signal: signal strength value (type depends on the wiphy's signal_type) + * @chains: bitmask for filled values in @chain_signal. + * @chain_signal: per-chain signal strength of last received BSS in dBm. * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes */ struct cfg80211_bss { @@ -1842,6 +1850,8 @@ struct cfg80211_bss { u16 capability; u8 bssid[ETH_ALEN]; + u8 chains; + s8 chain_signal[IEEE80211_MAX_CHAINS]; u8 priv[0] __aligned(sizeof(void *)); }; @@ -2021,6 +2031,9 @@ struct cfg80211_disassoc_request { * @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask * will be used in ht_capa. Un-supported values will be ignored. * @ht_capa_mask: The bits of ht_capa which are to be used. + * @wep_keys: static WEP keys, if not NULL points to an array of + * CFG80211_MAX_WEP_KEYS WEP keys + * @wep_tx_key: key index (0..3) of the default TX static WEP key */ struct cfg80211_ibss_params { const u8 *ssid; @@ -2037,6 +2050,8 @@ struct cfg80211_ibss_params { int mcast_rate[NUM_NL80211_BANDS]; struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; + struct key_params *wep_keys; + int wep_tx_key; }; /** @@ -3226,7 +3241,6 @@ struct cfg80211_ops { * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN. * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing * auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH. - * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans. * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the * firmware. * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP. @@ -4347,19 +4361,6 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, } /** - * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11 - * @skb: the 802.3 frame - * @addr: the device MAC address - * @iftype: the virtual interface type - * @bssid: the network bssid (used only for iftype STATION and ADHOC) - * @qos: build 802.11 QoS data frame - * Return: 0 on success, or a negative error code. - */ -int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype, const u8 *bssid, - bool qos); - -/** * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame * * Decode an IEEE 802.11 A-MSDU and convert it to a list of 802.3 frames. @@ -5441,9 +5442,6 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length - * @authorized: true if the 802.1X authentication was done by the driver or is - * not needed (e.g., when Fast Transition protocol was used), false - * otherwise. Ignored for networks that don't use 802.1X authentication. */ struct cfg80211_roam_info { struct ieee80211_channel *channel; @@ -5453,7 +5451,6 @@ struct cfg80211_roam_info { size_t req_ie_len; const u8 *resp_ie; size_t resp_ie_len; - bool authorized; }; /** @@ -5478,6 +5475,23 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, gfp_t gfp); /** + * cfg80211_port_authorized - notify cfg80211 of successful security association + * + * @dev: network device + * @bssid: the BSSID of the AP + * @gfp: allocation flags + * + * This function should be called by a driver that supports 4 way handshake + * offload after a security association was successfully established (i.e., + * the 4 way handshake was completed successfully). The call to this function + * should be preceded with a call to cfg80211_connect_result(), + * cfg80211_connect_done(), cfg80211_connect_bss() or cfg80211_roamed() to + * indicate the 802.11 association. + */ +void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, + gfp_t gfp); + +/** * cfg80211_disconnected - notify cfg80211 that connection was dropped * * @dev: network device @@ -5576,7 +5590,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, * cfg80211_rx_mgmt - notification of received, unprocessed management frame * @wdev: wireless device receiving the frame * @freq: Frequency on which the frame was received in MHz - * @sig_dbm: signal strength in mBm, or 0 if unknown + * @sig_dbm: signal strength in dBm, or 0 if unknown * @buf: Management frame (header + body) * @len: length of the frame data * @flags: flags, as defined in enum nl80211_rxmgmt_flags @@ -5755,7 +5769,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, * @frame: the frame * @len: length of the frame * @freq: frequency the frame was received on - * @sig_dbm: signal strength in mBm, or 0 if unknown + * @sig_dbm: signal strength in dBm, or 0 if unknown * * Use this function to report to userspace when a beacon was * received. It is not useful to call this when there is no @@ -5934,7 +5948,8 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, * @ies: the IE buffer * @ielen: the length of the IE buffer * @ids: an array with element IDs that are allowed before - * the split + * the split. A WLAN_EID_EXTENSION value means that the next + * EID in the list is a sub-element of the EXTENSION IE. * @n_ids: the size of the element ID array * @after_ric: array IE types that come after the RIC element * @n_after_ric: size of the @after_ric array @@ -5965,7 +5980,8 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, * @ies: the IE buffer * @ielen: the length of the IE buffer * @ids: an array with element IDs that are allowed before - * the split + * the split. A WLAN_EID_EXTENSION value means that the next + * EID in the list is a sub-element of the EXTENSION IE. * @n_ids: the size of the element ID array * @offset: offset where to start splitting in the buffer * diff --git a/include/net/devlink.h b/include/net/devlink.h index b9654e133599..6545b03e97f7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -26,10 +26,12 @@ struct devlink { struct list_head port_list; struct list_head sb_list; struct list_head dpipe_table_list; + struct list_head resource_list; struct devlink_dpipe_headers *dpipe_headers; const struct devlink_ops *ops; struct device *dev; possible_net_t _net; + struct mutex lock; char priv[0] __aligned(NETDEV_ALIGN); }; @@ -181,6 +183,9 @@ struct devlink_dpipe_table_ops; * @counters_enabled: indicates if counters are active * @counter_control_extern: indicates if counter control is in dpipe or * external tool + * @resource_valid: Indicate that the resource id is valid + * @resource_id: relative resource this table is related to + * @resource_units: number of resource's unit consumed per table's entry * @table_ops: table operations * @rcu: rcu */ @@ -190,6 +195,9 @@ struct devlink_dpipe_table { const char *name; bool counters_enabled; bool counter_control_extern; + bool resource_valid; + u64 resource_id; + u64 resource_units; struct devlink_dpipe_table_ops *table_ops; struct rcu_head rcu; }; @@ -223,7 +231,63 @@ struct devlink_dpipe_headers { unsigned int headers_count; }; +/** + * struct devlink_resource_ops - resource ops + * @occ_get: get the occupied size + * @size_validate: validate the size of the resource before update, reload + * is needed for changes to take place + */ +struct devlink_resource_ops { + u64 (*occ_get)(struct devlink *devlink); + int (*size_validate)(struct devlink *devlink, u64 size, + struct netlink_ext_ack *extack); +}; + +/** + * struct devlink_resource_size_params - resource's size parameters + * @size_min: minimum size which can be set + * @size_max: maximum size which can be set + * @size_granularity: size granularity + * @size_unit: resource's basic unit + */ +struct devlink_resource_size_params { + u64 size_min; + u64 size_max; + u64 size_granularity; + enum devlink_resource_unit unit; +}; + +/** + * struct devlink_resource - devlink resource + * @name: name of the resource + * @id: id, per devlink instance + * @size: size of the resource + * @size_new: updated size of the resource, reload is needed + * @size_valid: valid in case the total size of the resource is valid + * including its children + * @parent: parent resource + * @size_params: size parameters + * @list: parent list + * @resource_list: list of child resources + * @resource_ops: resource ops + */ +struct devlink_resource { + const char *name; + u64 id; + u64 size; + u64 size_new; + bool size_valid; + struct devlink_resource *parent; + struct devlink_resource_size_params *size_params; + struct list_head list; + struct list_head resource_list; + const struct devlink_resource_ops *resource_ops; +}; + +#define DEVLINK_RESOURCE_ID_PARENT_TOP 0 + struct devlink_ops { + int (*reload)(struct devlink *devlink); int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); int (*port_split)(struct devlink *devlink, unsigned int port_index, @@ -332,6 +396,23 @@ extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; extern struct devlink_dpipe_header devlink_dpipe_header_ipv4; extern struct devlink_dpipe_header devlink_dpipe_header_ipv6; +int devlink_resource_register(struct devlink *devlink, + const char *resource_name, + bool top_hierarchy, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + struct devlink_resource_size_params *size_params, + const struct devlink_resource_ops *resource_ops); +void devlink_resources_unregister(struct devlink *devlink, + struct devlink_resource *resource); +int devlink_resource_size_get(struct devlink *devlink, + u64 resource_id, + u64 *p_resource_size); +int devlink_dpipe_table_resource_set(struct devlink *devlink, + const char *table_name, u64 resource_id, + u64 resource_units); + #else static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, @@ -468,6 +549,40 @@ devlink_dpipe_match_put(struct sk_buff *skb, return 0; } +static inline int +devlink_resource_register(struct devlink *devlink, + const char *resource_name, + bool top_hierarchy, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + struct devlink_resource_size_params *size_params, + const struct devlink_resource_ops *resource_ops) +{ + return 0; +} + +static inline void +devlink_resources_unregister(struct devlink *devlink, + struct devlink_resource *resource) +{ +} + +static inline int +devlink_resource_size_get(struct devlink *devlink, u64 resource_id, + u64 *p_resource_size) +{ + return -EOPNOTSUPP; +} + +static inline int +devlink_dpipe_table_resource_set(struct devlink *devlink, + const char *table_name, u64 resource_id, + u64 resource_units) +{ + return -EOPNOTSUPP; +} + #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dn.h b/include/net/dn.h index fc0036228d20..56ab0726c641 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -123,13 +123,6 @@ struct dn_scp /* Session Control Port */ unsigned long keepalive; void (*keepalive_fxn)(struct sock *sk); - /* - * This stuff is for the fast timer for delayed acks - */ - struct timer_list delack_timer; - int delack_pending; - void (*delack_fxn)(struct sock *sk); - }; static inline struct dn_scp *DN_SK(struct sock *sk) diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h index 3a3e33d18456..413a15e5339c 100644 --- a/include/net/dn_nsp.h +++ b/include/net/dn_nsp.h @@ -17,7 +17,6 @@ void dn_nsp_send_data_ack(struct sock *sk); void dn_nsp_send_oth_ack(struct sock *sk); -void dn_nsp_delayed_ack(struct sock *sk); void dn_send_conn_ack(struct sock *sk); void dn_send_conn_conf(struct sock *sk, gfp_t gfp); void dn_nsp_send_disc(struct sock *sk, unsigned char type, diff --git a/include/net/dn_route.h b/include/net/dn_route.h index 55df9939bca2..342d2503cba5 100644 --- a/include/net/dn_route.h +++ b/include/net/dn_route.h @@ -69,6 +69,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, */ struct dn_route { struct dst_entry dst; + struct dn_route __rcu *dn_next; struct neighbour *n; diff --git a/include/net/dsa.h b/include/net/dsa.h index dd44d6ce1097..6cb602dd970c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -29,6 +29,7 @@ struct fixed_phy_status; enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = 0, DSA_TAG_PROTO_BRCM, + DSA_TAG_PROTO_BRCM_PREPEND, DSA_TAG_PROTO_DSA, DSA_TAG_PROTO_EDSA, DSA_TAG_PROTO_KSZ, @@ -116,13 +117,13 @@ struct dsa_switch_tree { struct raw_notifier_head nh; /* Tree identifier */ - u32 tree; + unsigned int index; /* Number of switches attached to this tree */ struct kref refcount; /* Has this tree been applied to the hardware? */ - bool applied; + bool setup; /* * Configuration data for the platform device that owns @@ -130,11 +131,6 @@ struct dsa_switch_tree { */ struct dsa_platform_data *pd; - /* Copy of tag_ops->rcv for faster access in hot path */ - struct sk_buff * (*rcv)(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt); - /* * The switch port to which the CPU is attached. */ @@ -144,12 +140,6 @@ struct dsa_switch_tree { * Data for the individual switch chips. */ struct dsa_switch *ds[DSA_MAX_SWITCHES]; - - /* - * Tagging protocol operations for adding and removing an - * encapsulation tag. - */ - const struct dsa_device_ops *tag_ops; }; /* TC matchall action types, only mirroring for now */ @@ -175,11 +165,33 @@ struct dsa_mall_tc_entry { struct dsa_port { + /* A CPU port is physically connected to a master device. + * A user port exposed to userspace has a slave device. + */ + union { + struct net_device *master; + struct net_device *slave; + }; + + /* CPU port tagging operations used by master or slave devices */ + const struct dsa_device_ops *tag_ops; + + /* Copies for faster access in master receive hot path */ + struct dsa_switch_tree *dst; + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt); + + enum { + DSA_PORT_TYPE_UNUSED = 0, + DSA_PORT_TYPE_CPU, + DSA_PORT_TYPE_DSA, + DSA_PORT_TYPE_USER, + } type; + struct dsa_switch *ds; unsigned int index; const char *name; - struct dsa_port *cpu_dp; - struct net_device *netdev; + const struct dsa_port *cpu_dp; struct device_node *dn; unsigned int ageing_time; u8 stp_state; @@ -188,7 +200,6 @@ struct dsa_port { /* * Original copy of the master netdev ethtool_ops */ - struct ethtool_ops ethtool_ops; const struct ethtool_ops *orig_ethtool_ops; }; @@ -199,7 +210,7 @@ struct dsa_switch { * Parent switch tree, and switch index. */ struct dsa_switch_tree *dst; - int index; + unsigned int index; /* Listener for switch fabric events */ struct notifier_block nb; @@ -230,9 +241,6 @@ struct dsa_switch { /* * Slave mii_bus and devices for the individual ports. */ - u32 dsa_port_mask; - u32 cpu_port_mask; - u32 enabled_port_mask; u32 phys_mii_mask; struct mii_bus *slave_mii_bus; @@ -251,51 +259,81 @@ struct dsa_switch { struct dsa_port ports[]; }; +static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) +{ + return &ds->ports[p]; +} + +static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) +{ + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED; +} + static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) { - return !!(ds->cpu_port_mask & (1 << p)); + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_CPU; } static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) { - return !!((ds->dsa_port_mask) & (1 << p)); + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_DSA; } -static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) +static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) { - return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p); + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_USER; } -static inline u8 dsa_upstream_port(struct dsa_switch *ds) +static inline u32 dsa_user_ports(struct dsa_switch *ds) { - struct dsa_switch_tree *dst = ds->dst; + u32 mask = 0; + int p; - /* - * If this is the root switch (i.e. the switch that connects - * to the CPU), return the cpu port number on this switch. - * Else return the (DSA) port number that connects to the - * switch that is one hop closer to the cpu. - */ - if (dst->cpu_dp->ds == ds) - return dst->cpu_dp->index; + for (p = 0; p < ds->num_ports; p++) + if (dsa_is_user_port(ds, p)) + mask |= BIT(p); + + return mask; +} + +/* Return the local port used to reach an arbitrary switch port */ +static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device, + int port) +{ + if (device == ds->index) + return port; else - return ds->rtable[dst->cpu_dp->ds->index]; + return ds->rtable[device]; +} + +/* Return the local port used to reach the dedicated CPU port */ +static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port) +{ + const struct dsa_port *dp = dsa_to_port(ds, port); + const struct dsa_port *cpu_dp = dp->cpu_dp; + + if (!cpu_dp) + return port; + + return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index); } typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { +#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) /* * Legacy probing. */ const char *(*probe)(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **priv); +#endif - enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds); + enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, + int port); int (*setup)(struct dsa_switch *ds); - int (*set_addr)(struct dsa_switch *ds, u8 *addr); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); /* @@ -382,12 +420,10 @@ struct dsa_switch_ops { */ int (*port_vlan_filtering)(struct dsa_switch *ds, int port, bool vlan_filtering); - int (*port_vlan_prepare)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); - void (*port_vlan_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); + int (*port_vlan_prepare)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan); + void (*port_vlan_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); /* @@ -403,12 +439,10 @@ struct dsa_switch_ops { /* * Multicast database */ - int (*port_mdb_prepare)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans); - void (*port_mdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans); + int (*port_mdb_prepare)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); + void (*port_mdb_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); int (*port_mdb_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); /* @@ -442,11 +476,20 @@ struct dsa_switch_driver { const struct dsa_switch_ops *ops; }; +#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) /* Legacy driver registration */ void register_switch_driver(struct dsa_switch_driver *type); void unregister_switch_driver(struct dsa_switch_driver *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); +#else +static inline void register_switch_driver(struct dsa_switch_driver *type) { } +static inline void unregister_switch_driver(struct dsa_switch_driver *type) { } +static inline struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) +{ + return NULL; +} +#endif struct net_device *dsa_dev_to_net_device(struct device *dev); /* Keep inline for faster access in hot path */ @@ -475,4 +518,54 @@ static inline int dsa_switch_resume(struct dsa_switch *ds) } #endif /* CONFIG_PM_SLEEP */ +enum dsa_notifier_type { + DSA_PORT_REGISTER, + DSA_PORT_UNREGISTER, +}; + +struct dsa_notifier_info { + struct net_device *dev; +}; + +struct dsa_notifier_register_info { + struct dsa_notifier_info info; /* must be first */ + struct net_device *master; + unsigned int port_number; + unsigned int switch_number; +}; + +static inline struct net_device * +dsa_notifier_info_to_dev(const struct dsa_notifier_info *info) +{ + return info->dev; +} + +#if IS_ENABLED(CONFIG_NET_DSA) +int register_dsa_notifier(struct notifier_block *nb); +int unregister_dsa_notifier(struct notifier_block *nb); +int call_dsa_notifiers(unsigned long val, struct net_device *dev, + struct dsa_notifier_info *info); +#else +static inline int register_dsa_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int unregister_dsa_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev, + struct dsa_notifier_info *info) +{ + return NOTIFY_DONE; +} +#endif + +/* Broadcom tag specific helpers to insert and extract queue/port number */ +#define BRCM_TAG_SET_PORT_QUEUE(p, q) ((p) << 8 | q) +#define BRCM_TAG_GET_PORT(v) ((v) >> 8) +#define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff) + #endif diff --git a/include/net/dst.h b/include/net/dst.h index 694c2e6ae618..c63d2c37f6e9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -34,13 +34,9 @@ struct sk_buff; struct dst_entry { struct net_device *dev; - struct rcu_head rcu_head; - struct dst_entry *child; struct dst_ops *ops; unsigned long _metrics; unsigned long expires; - struct dst_entry *path; - struct dst_entry *from; #ifdef CONFIG_XFRM struct xfrm_state *xfrm; #else @@ -59,8 +55,6 @@ struct dst_entry { #define DST_XFRM_QUEUE 0x0040 #define DST_METADATA 0x0080 - short error; - /* A non-zero value of dst->obsolete forces by-hand validation * of the route entry. Positive values are set by the generic * dst layer to indicate that the entry has been forcefully @@ -76,35 +70,24 @@ struct dst_entry { #define DST_OBSOLETE_KILL -2 unsigned short header_len; /* more space at head required */ unsigned short trailer_len; /* space to reserve at tail */ - unsigned short __pad3; - -#ifdef CONFIG_IP_ROUTE_CLASSID - __u32 tclassid; -#else - __u32 __pad2; -#endif -#ifdef CONFIG_64BIT - /* - * Align __refcnt to a 64 bytes alignment - * (L1_CACHE_SIZE would be too much) - */ - long __pad_to_align_refcnt[2]; -#endif /* * __refcnt wants to be on a different cache line from * input/output/ops or performance tanks badly */ - atomic_t __refcnt; /* client references */ +#ifdef CONFIG_64BIT + atomic_t __refcnt; /* 64-bit offset 64 */ +#endif int __use; unsigned long lastuse; struct lwtunnel_state *lwtstate; - union { - struct dst_entry *next; - struct rtable __rcu *rt_next; - struct rt6_info *rt6_next; - struct dn_route __rcu *dn_next; - }; + struct rcu_head rcu_head; + short error; + short __pad; + __u32 tclassid; +#ifndef CONFIG_64BIT + atomic_t __refcnt; /* 32-bit offset 64 */ +#endif }; struct dst_metrics { @@ -250,23 +233,24 @@ static inline void dst_hold(struct dst_entry *dst) { /* * If your kernel compilation stops here, please check - * __pad_to_align_refcnt declaration in struct dst_entry + * the placement of __refcnt in struct dst_entry */ BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0); } -static inline void dst_use(struct dst_entry *dst, unsigned long time) +static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) { - dst_hold(dst); - dst->__use++; - dst->lastuse = time; + if (unlikely(time != dst->lastuse)) { + dst->__use++; + dst->lastuse = time; + } } -static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) +static inline void dst_hold_and_use(struct dst_entry *dst, unsigned long time) { - dst->__use++; - dst->lastuse = time; + dst_hold(dst); + dst_use_noref(dst, time); } static inline struct dst_entry *dst_clone(struct dst_entry *dst) @@ -520,4 +504,12 @@ static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) } #endif +static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) +{ + struct dst_entry *dst = skb_dst(skb); + + if (dst && dst->ops->update_pmtu) + dst->ops->update_pmtu(dst, NULL, skb, mtu); +} + #endif /* _NET_DST_H */ diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 91bc7bdf6bf5..56cb3c38569a 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -25,7 +25,7 @@ struct metadata_dst { } u; }; -static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) +static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb) { struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); @@ -35,7 +35,8 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) return NULL; } -static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb) +static inline struct ip_tunnel_info * +skb_tunnel_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; @@ -87,6 +88,7 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, void metadata_dst_free(struct metadata_dst *); struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags); +void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst); struct metadata_dst __percpu * metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); diff --git a/include/net/erspan.h b/include/net/erspan.h index ca94fc86865e..d044aa60cc76 100644 --- a/include/net/erspan.h +++ b/include/net/erspan.h @@ -15,7 +15,7 @@ * s, Recur, Flags, Version fields only S (bit 03) is set to 1. The * other fields are set to zero, so only a sequence number follows. * - * ERSPAN Type II header (8 octets [42:49]) + * ERSPAN Version 1 (Type II) header (8 octets [42:49]) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -24,11 +24,31 @@ * | Reserved | Index | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * + * + * ERSPAN Version 2 (Type III) header (12 octets [42:49]) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Ver | VLAN | COS |BSO|T| Session ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Timestamp | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SGT |P| FT | Hw ID |D|Gra|O| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Platform Specific SubHeader (8 octets, optional) + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Platf ID | Platform Specific Info | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Platform Specific Info | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB */ -#define ERSPAN_VERSION 0x1 +#include <uapi/linux/erspan.h> +#define ERSPAN_VERSION 0x1 /* ERSPAN type II */ #define VER_MASK 0xf000 #define VLAN_MASK 0x0fff #define COS_MASK 0xe000 @@ -37,6 +57,19 @@ #define ID_MASK 0x03ff #define INDEX_MASK 0xfffff +#define ERSPAN_VERSION2 0x2 /* ERSPAN type III*/ +#define BSO_MASK EN_MASK +#define SGT_MASK 0xffff0000 +#define P_MASK 0x8000 +#define FT_MASK 0x7c00 +#define HWID_MASK 0x03f0 +#define DIR_MASK 0x0008 +#define GRA_MASK 0x0006 +#define O_MASK 0x0001 + +#define HWID_OFFSET 4 +#define DIR_OFFSET 3 + enum erspan_encap_type { ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */ ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */ @@ -44,18 +77,199 @@ enum erspan_encap_type { ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */ }; -struct erspan_metadata { - __be32 index; /* type II */ -}; +#define ERSPAN_V1_MDSIZE 4 +#define ERSPAN_V2_MDSIZE 8 -struct erspanhdr { - __be16 ver_vlan; -#define VER_OFFSET 12 - __be16 session_id; -#define COS_OFFSET 13 -#define EN_OFFSET 11 -#define T_OFFSET 10 - struct erspan_metadata md; +struct erspan_base_hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 vlan_upper:4, + ver:4; + __u8 vlan:8; + __u8 session_id_upper:2, + t:1, + en:2, + cos:3; + __u8 session_id:8; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 ver: 4, + vlan_upper:4; + __u8 vlan:8; + __u8 cos:3, + en:2, + t:1, + session_id_upper:2; + __u8 session_id:8; +#else +#error "Please fix <asm/byteorder.h>" +#endif }; +static inline void set_session_id(struct erspan_base_hdr *ershdr, u16 id) +{ + ershdr->session_id = id & 0xff; + ershdr->session_id_upper = (id >> 8) & 0x3; +} + +static inline u16 get_session_id(const struct erspan_base_hdr *ershdr) +{ + return (ershdr->session_id_upper << 8) + ershdr->session_id; +} + +static inline void set_vlan(struct erspan_base_hdr *ershdr, u16 vlan) +{ + ershdr->vlan = vlan & 0xff; + ershdr->vlan_upper = (vlan >> 8) & 0xf; +} + +static inline u16 get_vlan(const struct erspan_base_hdr *ershdr) +{ + return (ershdr->vlan_upper << 8) + ershdr->vlan; +} + +static inline void set_hwid(struct erspan_md2 *md2, u8 hwid) +{ + md2->hwid = hwid & 0xf; + md2->hwid_upper = (hwid >> 4) & 0x3; +} + +static inline u8 get_hwid(const struct erspan_md2 *md2) +{ + return (md2->hwid_upper << 4) + md2->hwid; +} + +static inline int erspan_hdr_len(int version) +{ + return sizeof(struct erspan_base_hdr) + + (version == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE); +} + +static inline u8 tos_to_cos(u8 tos) +{ + u8 dscp, cos; + + dscp = tos >> 2; + cos = dscp >> 3; + return cos; +} + +static inline void erspan_build_header(struct sk_buff *skb, + u32 id, u32 index, + bool truncate, bool is_ipv4) +{ + struct ethhdr *eth = (struct ethhdr *)skb->data; + enum erspan_encap_type enc_type; + struct erspan_base_hdr *ershdr; + struct qtag_prefix { + __be16 eth_type; + __be16 tci; + } *qp; + u16 vlan_tci = 0; + u8 tos; + __be32 *idx; + + tos = is_ipv4 ? ip_hdr(skb)->tos : + (ipv6_hdr(skb)->priority << 4) + + (ipv6_hdr(skb)->flow_lbl[0] >> 4); + + enc_type = ERSPAN_ENCAP_NOVLAN; + + /* If mirrored packet has vlan tag, extract tci and + * perserve vlan header in the mirrored frame. + */ + if (eth->h_proto == htons(ETH_P_8021Q)) { + qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); + vlan_tci = ntohs(qp->tci); + enc_type = ERSPAN_ENCAP_INFRAME; + } + + skb_push(skb, sizeof(*ershdr) + ERSPAN_V1_MDSIZE); + ershdr = (struct erspan_base_hdr *)skb->data; + memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V1_MDSIZE); + + /* Build base header */ + ershdr->ver = ERSPAN_VERSION; + ershdr->cos = tos_to_cos(tos); + ershdr->en = enc_type; + ershdr->t = truncate; + set_vlan(ershdr, vlan_tci); + set_session_id(ershdr, id); + + /* Build metadata */ + idx = (__be32 *)(ershdr + 1); + *idx = htonl(index & INDEX_MASK); +} + +/* ERSPAN GRA: timestamp granularity + * 00b --> granularity = 100 microseconds + * 01b --> granularity = 100 nanoseconds + * 10b --> granularity = IEEE 1588 + * Here we only support 100 microseconds. + */ +static inline __be32 erspan_get_timestamp(void) +{ + u64 h_usecs; + ktime_t kt; + + kt = ktime_get_real(); + h_usecs = ktime_divns(kt, 100 * NSEC_PER_USEC); + + /* ERSPAN base header only has 32-bit, + * so it wraps around 4 days. + */ + return htonl((u32)h_usecs); +} + +static inline void erspan_build_header_v2(struct sk_buff *skb, + u32 id, u8 direction, u16 hwid, + bool truncate, bool is_ipv4) +{ + struct ethhdr *eth = (struct ethhdr *)skb->data; + struct erspan_base_hdr *ershdr; + struct erspan_md2 *md2; + struct qtag_prefix { + __be16 eth_type; + __be16 tci; + } *qp; + u16 vlan_tci = 0; + u8 gra = 0; /* 100 usec */ + u8 bso = 0; /* Bad/Short/Oversized */ + u8 sgt = 0; + u8 tos; + + tos = is_ipv4 ? ip_hdr(skb)->tos : + (ipv6_hdr(skb)->priority << 4) + + (ipv6_hdr(skb)->flow_lbl[0] >> 4); + + /* Unlike v1, v2 does not have En field, + * so only extract vlan tci field. + */ + if (eth->h_proto == htons(ETH_P_8021Q)) { + qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); + vlan_tci = ntohs(qp->tci); + } + + skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); + ershdr = (struct erspan_base_hdr *)skb->data; + memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); + + /* Build base header */ + ershdr->ver = ERSPAN_VERSION2; + ershdr->cos = tos_to_cos(tos); + ershdr->en = bso; + ershdr->t = truncate; + set_vlan(ershdr, vlan_tci); + set_session_id(ershdr, id); + + /* Build metadata */ + md2 = (struct erspan_md2 *)(ershdr + 1); + md2->timestamp = erspan_get_timestamp(); + md2->sgt = htons(sgt); + md2->p = 1; + md2->ft = 0; + md2->dir = direction; + md2->gra = gra; + md2->o = 0; + set_hwid(md2, hwid); +} + #endif diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index 669b9716dc7a..c91ec732afd6 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -9,6 +9,7 @@ struct fib_notifier_info { struct net *net; int family; + struct netlink_ext_ack *extack; }; enum fib_event_type { @@ -20,6 +21,8 @@ enum fib_event_type { FIB_EVENT_RULE_DEL, FIB_EVENT_NH_ADD, FIB_EVENT_NH_DEL, + FIB_EVENT_VIF_ADD, + FIB_EVENT_VIF_DEL, }; struct fib_notifier_ops { diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 22aba321282d..9a074776f70b 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -84,11 +84,11 @@ struct flow_dissector_key_ipv6_addrs { }; /** - * struct flow_dissector_key_tipc_addrs: - * @srcnode: source node address + * struct flow_dissector_key_tipc: + * @key: source node address combined with selector */ -struct flow_dissector_key_tipc_addrs { - __be32 srcnode; +struct flow_dissector_key_tipc { + __be32 key; }; /** @@ -100,7 +100,7 @@ struct flow_dissector_key_addrs { union { struct flow_dissector_key_ipv4_addrs v4addrs; struct flow_dissector_key_ipv6_addrs v6addrs; - struct flow_dissector_key_tipc_addrs tipcaddrs; + struct flow_dissector_key_tipc tipckey; }; }; @@ -192,7 +192,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ - FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ + FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */ FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ diff --git a/include/net/fq.h b/include/net/fq.h index 6d8521a30c5c..ac944a686840 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -90,6 +90,13 @@ typedef void fq_skb_free_t(struct fq *, struct fq_flow *, struct sk_buff *); +/* Return %true to filter (drop) the frame. */ +typedef bool fq_skb_filter_t(struct fq *, + struct fq_tin *, + struct fq_flow *, + struct sk_buff *, + void *); + typedef struct fq_flow *fq_flow_get_default_t(struct fq *, struct fq_tin *, int idx, diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index ac1a2317941e..be7c0fab3478 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -12,24 +12,22 @@ /* functions that are embedded into includer */ -static struct sk_buff *fq_flow_dequeue(struct fq *fq, - struct fq_flow *flow) +static void fq_adjust_removal(struct fq *fq, + struct fq_flow *flow, + struct sk_buff *skb) { struct fq_tin *tin = flow->tin; - struct fq_flow *i; - struct sk_buff *skb; - - lockdep_assert_held(&fq->lock); - - skb = __skb_dequeue(&flow->queue); - if (!skb) - return NULL; tin->backlog_bytes -= skb->len; tin->backlog_packets--; flow->backlog -= skb->len; fq->backlog--; fq->memory_usage -= skb->truesize; +} + +static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow) +{ + struct fq_flow *i; if (flow->backlog == 0) { list_del_init(&flow->backlogchain); @@ -43,6 +41,21 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq, list_move_tail(&flow->backlogchain, &i->backlogchain); } +} + +static struct sk_buff *fq_flow_dequeue(struct fq *fq, + struct fq_flow *flow) +{ + struct sk_buff *skb; + + lockdep_assert_held(&fq->lock); + + skb = __skb_dequeue(&flow->queue); + if (!skb) + return NULL; + + fq_adjust_removal(fq, flow, skb); + fq_rejigger_backlog(fq, flow); return skb; } @@ -191,6 +204,45 @@ static void fq_tin_enqueue(struct fq *fq, } } +static void fq_flow_filter(struct fq *fq, + struct fq_flow *flow, + fq_skb_filter_t filter_func, + void *filter_data, + fq_skb_free_t free_func) +{ + struct fq_tin *tin = flow->tin; + struct sk_buff *skb, *tmp; + + lockdep_assert_held(&fq->lock); + + skb_queue_walk_safe(&flow->queue, skb, tmp) { + if (!filter_func(fq, tin, flow, skb, filter_data)) + continue; + + __skb_unlink(skb, &flow->queue); + fq_adjust_removal(fq, flow, skb); + free_func(fq, tin, flow, skb); + } + + fq_rejigger_backlog(fq, flow); +} + +static void fq_tin_filter(struct fq *fq, + struct fq_tin *tin, + fq_skb_filter_t filter_func, + void *filter_data, + fq_skb_free_t free_func) +{ + struct fq_flow *flow; + + lockdep_assert_held(&fq->lock); + + list_for_each_entry(flow, &tin->new_flows, flowchain) + fq_flow_filter(fq, flow, filter_func, filter_data, free_func); + list_for_each_entry(flow, &tin->old_flows, flowchain) + fq_flow_filter(fq, flow, filter_func, filter_data, free_func); +} + static void fq_flow_reset(struct fq *fq, struct fq_flow *flow, fq_skb_free_t free_func) diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 304f7aa9cc01..0304ba2ae353 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -49,6 +49,9 @@ int gnet_stats_copy_rate_est(struct gnet_dump *d, int gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue __percpu *cpu_q, struct gnet_stats_queue *q, __u32 qlen); +void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, + const struct gnet_stats_queue __percpu *cpu_q, + const struct gnet_stats_queue *q, __u32 qlen); int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); int gnet_stats_finish_copy(struct gnet_dump *d); diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 5ac169a735f4..decf6012a401 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -154,15 +154,12 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, /** * genlmsg_nlhdr - Obtain netlink header from user specified header * @user_hdr: user header as returned from genlmsg_put() - * @family: generic netlink family * * Returns pointer to netlink header. */ -static inline struct nlmsghdr * -genlmsg_nlhdr(void *user_hdr, const struct genl_family *family) +static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr) { return (struct nlmsghdr *)((char *)user_hdr - - family->hdrsize - GENL_HDRLEN - NLMSG_HDRLEN); } @@ -190,16 +187,14 @@ static inline int genlmsg_parse(const struct nlmsghdr *nlh, * genl_dump_check_consistent - check if sequence is consistent and advertise if not * @cb: netlink callback structure that stores the sequence number * @user_hdr: user header as returned from genlmsg_put() - * @family: generic netlink family * * Cf. nl_dump_check_consistent(), this just provides a wrapper to make it * simpler to use with generic netlink. */ static inline void genl_dump_check_consistent(struct netlink_callback *cb, - void *user_hdr, - const struct genl_family *family) + void *user_hdr) { - nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr, family)); + nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr)); } /** diff --git a/include/net/gue.h b/include/net/gue.h index 2fdb29ca74c2..fdad41469b65 100644 --- a/include/net/gue.h +++ b/include/net/gue.h @@ -44,10 +44,10 @@ struct guehdr { #else #error "Please fix <asm/byteorder.h>" #endif - __u8 proto_ctype; - __u16 flags; + __u8 proto_ctype; + __be16 flags; }; - __u32 word; + __be32 word; }; }; @@ -84,11 +84,10 @@ static inline size_t guehdr_priv_flags_len(__be32 flags) * if there is an unknown standard or private flags, or the options length for * the flags exceeds the options length specific in hlen of the GUE header. */ -static inline int validate_gue_flags(struct guehdr *guehdr, - size_t optlen) +static inline int validate_gue_flags(struct guehdr *guehdr, size_t optlen) { + __be16 flags = guehdr->flags; size_t len; - __be32 flags = guehdr->flags; if (flags & ~GUE_FLAGS_ALL) return 1; @@ -101,12 +100,13 @@ static inline int validate_gue_flags(struct guehdr *guehdr, /* Private flags are last four bytes accounted in * guehdr_flags_len */ - flags = *(__be32 *)((void *)&guehdr[1] + len - GUE_LEN_PRIV); + __be32 pflags = *(__be32 *)((void *)&guehdr[1] + + len - GUE_LEN_PRIV); - if (flags & ~GUE_PFLAGS_ALL) + if (pflags & ~GUE_PFLAGS_ALL) return 1; - len += guehdr_priv_flags_len(flags); + len += guehdr_priv_flags_len(pflags); if (len > optlen) return 1; } diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 13e4c89a8231..c1a93ce35e62 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops { * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data + * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -101,6 +102,7 @@ struct inet_connection_sock { const struct inet_connection_sock_af_ops *icsk_af_ops; const struct tcp_ulp_ops *icsk_ulp_ops; void *icsk_ulp_data; + struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, icsk_ca_setsockopt:1, @@ -169,9 +171,9 @@ enum inet_csk_ack_state_t { }; void inet_csk_init_xmit_timers(struct sock *sk, - void (*retransmit_handler)(unsigned long), - void (*delack_handler)(unsigned long), - void (*keepalive_handler)(unsigned long)); + void (*retransmit_handler)(struct timer_list *), + void (*delack_handler)(struct timer_list *), + void (*keepalive_handler)(struct timer_list *)); void inet_csk_clear_xmit_timers(struct sock *sk); static inline void inet_csk_schedule_ack(struct sock *sk) @@ -305,10 +307,10 @@ void inet_csk_prepare_forced_close(struct sock *sk); /* * LISTEN is a special case for poll.. */ -static inline unsigned int inet_csk_listen_poll(const struct sock *sk) +static inline __poll_t inet_csk_listen_poll(const struct sock *sk) { return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? - (POLLIN | POLLRDNORM) : 0; + (EPOLLIN | EPOLLRDNORM) : 0; } int inet_csk_listen_start(struct sock *sk, int backlog); diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index d30e4c869438..482a1b705362 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -134,11 +134,6 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) return 1; } -static inline void IP6_ECN_clear(struct ipv6hdr *iph) -{ - *(__be32*)iph &= ~htonl(INET_ECN_MASK << 20); -} - static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) { dscp &= ~INET_ECN_MASK; diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index a6e4edd8d4a2..351f0c3cdcd9 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -96,7 +96,7 @@ struct inet_frags { void (*constructor)(struct inet_frag_queue *q, const void *arg); void (*destructor)(struct inet_frag_queue *); - void (*frag_expire)(unsigned long data); + void (*frag_expire)(struct timer_list *t); struct kmem_cache *frags_cachep; const char *frags_cache_name; }; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 2dbbbff5e1e3..9141e95529e7 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -111,6 +111,7 @@ struct inet_bind_hashbucket { */ struct inet_listen_hashbucket { spinlock_t lock; + unsigned int count; struct hlist_head head; }; @@ -132,12 +133,13 @@ struct inet_hashinfo { /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ + struct kmem_cache *bind_bucket_cachep; struct inet_bind_hashbucket *bhash; - unsigned int bhash_size; - /* 4 bytes hole on 64 bit */ - struct kmem_cache *bind_bucket_cachep; + /* The 2nd listener table hashed by local port and address */ + unsigned int lhash2_mask; + struct inet_listen_hashbucket *lhash2; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. @@ -145,14 +147,25 @@ struct inet_hashinfo { * Now align to a new cache line as all the following members * might be often dirty. */ - /* All sockets in TCP_LISTEN state will be in here. This is the only - * table where wildcard'd TCP sockets can exist. Hash function here - * is just local port number. + /* All sockets in TCP_LISTEN state will be in listening_hash. + * This is the only table where wildcard'd TCP sockets can + * exist. listening_hash is only hashed by local port number. + * If lhash2 is initialized, the same socket will also be hashed + * to lhash2 by port and address. */ struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] ____cacheline_aligned_in_smp; }; +#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ + hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) + +static inline struct inet_listen_hashbucket * +inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash) +{ + return &h->lhash2[hash & h->lhash2_mask]; +} + static inline struct inet_ehash_bucket *inet_ehash_bucket( struct inet_hashinfo *hashinfo, unsigned int hash) @@ -208,6 +221,10 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child); void inet_put_port(struct sock *sk); void inet_hashinfo_init(struct inet_hashinfo *h); +void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, + unsigned long numentries, int scale, + unsigned long low_limit, + unsigned long high_limit); bool inet_ehash_insert(struct sock *sk, struct sock *osk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index db8162dd8c0b..0a671c32d6b9 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -17,7 +17,6 @@ #define _INET_SOCK_H #include <linux/bitops.h> -#include <linux/kmemcheck.h> #include <linux/string.h> #include <linux/types.h> #include <linux/jhash.h> @@ -84,7 +83,6 @@ struct inet_request_sock { #define ireq_state req.__req_common.skc_state #define ireq_family req.__req_common.skc_family - kmemcheck_bitfield_begin(flags); u16 snd_wscale : 4, rcv_wscale : 4, tstamp_ok : 1, @@ -92,8 +90,8 @@ struct inet_request_sock { wscale_ok : 1, ecn_ok : 1, acked : 1, - no_srccheck: 1; - kmemcheck_bitfield_end(flags); + no_srccheck: 1, + smc_ok : 1; u32 ir_mark; union { struct ip_options_rcu __rcu *ireq_opt; @@ -293,6 +291,31 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, int inet_sk_rebuild_header(struct sock *sk); +/** + * inet_sk_state_load - read sk->sk_state for lockless contexts + * @sk: socket pointer + * + * Paired with inet_sk_state_store(). Used in places we don't hold socket lock: + * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... + */ +static inline int inet_sk_state_load(const struct sock *sk) +{ + /* state change might impact lockless readers. */ + return smp_load_acquire(&sk->sk_state); +} + +/** + * inet_sk_state_store - update sk->sk_state + * @sk: socket pointer + * @newstate: new state + * + * Paired with inet_sk_state_load(). Should be used in contexts where + * state change might impact lockless readers. + */ +void inet_sk_state_store(struct sock *sk, int newstate); + +void inet_sk_set_state(struct sock *sk, int state); + static inline unsigned int __inet_ehashfn(const __be32 laddr, const __u16 lport, const __be32 faddr, diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 6a75d67a30fd..899495589a7e 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -15,8 +15,6 @@ #ifndef _INET_TIMEWAIT_SOCK_ #define _INET_TIMEWAIT_SOCK_ - -#include <linux/kmemcheck.h> #include <linux/list.h> #include <linux/timer.h> #include <linux/types.h> @@ -69,14 +67,12 @@ struct inet_timewait_sock { /* Socket demultiplex comparisons on incoming packets. */ /* these three are in inet_sock */ __be16 tw_sport; - kmemcheck_bitfield_begin(flags); /* And these are ours. */ unsigned int tw_kill : 1, tw_transparent : 1, tw_flowlabel : 20, tw_pad : 2, /* 2 bits hole */ tw_tos : 8; - kmemcheck_bitfield_end(flags); struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; }; @@ -97,8 +93,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, struct inet_timewait_death_row *dr, const int state); -void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, - struct inet_hashinfo *hashinfo); +void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, + struct inet_hashinfo *hashinfo); void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); diff --git a/include/net/ip.h b/include/net/ip.h index 9896f46cbbf1..746abff9ce51 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -26,14 +26,17 @@ #include <linux/ip.h> #include <linux/in.h> #include <linux/skbuff.h> +#include <linux/jhash.h> #include <net/inet_sock.h> #include <net/route.h> #include <net/snmp.h> #include <net/flow.h> #include <net/flow_dissector.h> +#include <net/netns/hash.h> #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ +#define IPV4_MIN_MTU 68 /* RFC 791 */ struct sock; @@ -521,6 +524,13 @@ static inline unsigned int ipv4_addr_hash(__be32 ip) return (__force unsigned int) ip; } +static inline u32 ipv4_portaddr_hash(const struct net *net, + __be32 saddr, + unsigned int port) +{ + return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; +} + bool ip_call_ra_chain(struct sk_buff *skb); /* diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index d060d711a624..34ec321d6a03 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -29,6 +29,14 @@ #define FIB6_TABLE_HASHSZ 1 #endif +#define RT6_DEBUG 2 + +#if RT6_DEBUG >= 3 +#define RT6_TRACE(x...) pr_debug(x) +#else +#define RT6_TRACE(x...) do { ; } while (0) +#endif + struct rt6_info; struct fib6_config { @@ -60,25 +68,30 @@ struct fib6_config { }; struct fib6_node { - struct fib6_node *parent; - struct fib6_node *left; - struct fib6_node *right; + struct fib6_node __rcu *parent; + struct fib6_node __rcu *left; + struct fib6_node __rcu *right; #ifdef CONFIG_IPV6_SUBTREES - struct fib6_node *subtree; + struct fib6_node __rcu *subtree; #endif - struct rt6_info *leaf; + struct rt6_info __rcu *leaf; __u16 fn_bit; /* bit key */ __u16 fn_flags; int fn_sernum; - struct rt6_info *rr_ptr; + struct rt6_info __rcu *rr_ptr; struct rcu_head rcu; }; +struct fib6_gc_args { + int timeout; + int more; +}; + #ifndef CONFIG_IPV6_SUBTREES #define FIB6_SUBTREE(fn) NULL #else -#define FIB6_SUBTREE(fn) ((fn)->subtree) +#define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1)) #endif struct mx6_config { @@ -98,8 +111,26 @@ struct rt6key { struct fib6_table; +struct rt6_exception_bucket { + struct hlist_head chain; + int depth; +}; + +struct rt6_exception { + struct hlist_node hlist; + struct rt6_info *rt6i; + unsigned long stamp; + struct rcu_head rcu; +}; + +#define FIB6_EXCEPTION_BUCKET_SIZE_SHIFT 10 +#define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT) +#define FIB6_MAX_DEPTH 5 + struct rt6_info { struct dst_entry dst; + struct rt6_info __rcu *rt6_next; + struct rt6_info *from; /* * Tail elements of dst_entry (__refcnt etc.) @@ -118,6 +149,7 @@ struct rt6_info { */ struct list_head rt6i_siblings; unsigned int rt6i_nsiblings; + atomic_t rt6i_nh_upper_bound; atomic_t rt6i_ref; @@ -134,14 +166,27 @@ struct rt6_info { struct inet6_dev *rt6i_idev; struct rt6_info * __percpu *rt6i_pcpu; + struct rt6_exception_bucket __rcu *rt6i_exception_bucket; u32 rt6i_metric; u32 rt6i_pmtu; /* more non-fragment space at head required */ + int rt6i_nh_weight; unsigned short rt6i_nfheader_len; u8 rt6i_protocol; + u8 exception_bucket_flushed:1, + should_flush:1, + unused:6; }; +#define for_each_fib6_node_rt_rcu(fn) \ + for (rt = rcu_dereference((fn)->leaf); rt; \ + rt = rcu_dereference(rt->rt6_next)) + +#define for_each_fib6_walker_rt(w) \ + for (rt = (w)->leaf; rt; \ + rt = rcu_dereference_protected(rt->rt6_next, 1)) + static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { return ((struct rt6_info *)dst)->rt6i_idev; @@ -163,11 +208,9 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) { struct rt6_info *rt; - for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); - rt = (struct rt6_info *)rt->dst.from); + for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from); if (rt && rt != rt0) rt0->dst.expires = rt->dst.expires; - dst_set_expires(&rt0->dst, timeout); rt0->rt6i_flags |= RTF_EXPIRES; } @@ -188,6 +231,8 @@ static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, if (fn) { *cookie = fn->fn_sernum; + /* pairs with smp_wmb() in fib6_update_sernum_upto_root() */ + smp_rmb(); status = true; } @@ -200,8 +245,8 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt) u32 cookie = 0; if (rt->rt6i_flags & RTF_PCPU || - (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) - rt = (struct rt6_info *)(rt->dst.from); + (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) + rt = rt->from; rt6_get_cookie_safe(rt, &cookie); @@ -248,7 +293,6 @@ struct fib6_walker { struct fib6_node *root, *node; struct rt6_info *leaf; enum fib6_walk_state state; - bool prune; unsigned int skip; unsigned int count; int (*func)(struct fib6_walker *); @@ -256,12 +300,15 @@ struct fib6_walker { }; struct rt6_statistics { - __u32 fib_nodes; - __u32 fib_route_nodes; - __u32 fib_rt_alloc; /* permanent routes */ - __u32 fib_rt_entries; /* rt entries in table */ - __u32 fib_rt_cache; /* cache routes */ - __u32 fib_discarded_routes; + __u32 fib_nodes; /* all fib6 nodes */ + __u32 fib_route_nodes; /* intermediate nodes */ + __u32 fib_rt_entries; /* rt entries in fib table */ + __u32 fib_rt_cache; /* cached rt entries in exception table */ + __u32 fib_discarded_routes; /* total number of routes delete */ + + /* The following stats are not protected by any lock */ + atomic_t fib_rt_alloc; /* total number of routes alloced */ + atomic_t fib_rt_uncache; /* rt entries in uncached list */ }; #define RTN_TL_ROOT 0x0001 @@ -277,7 +324,7 @@ struct rt6_statistics { struct fib6_table { struct hlist_node tb6_hlist; u32 tb6_id; - rwlock_t tb6_lock; + spinlock_t tb6_lock; struct fib6_node tb6_root; struct inet_peer_base tb6_peers; unsigned int flags; @@ -325,7 +372,8 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *daddr, int dst_len, - const struct in6_addr *saddr, int src_len); + const struct in6_addr *saddr, int src_len, + bool exact_match); void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), void *arg); @@ -358,6 +406,9 @@ void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb); +void fib6_update_sernum(struct rt6_info *rt); +void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index bee528135cf1..27d23a65f3cd 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -66,6 +66,12 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } +static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt) +{ + return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == + RTF_GATEWAY; +} + void ip6_route_input(struct sk_buff *skb); struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, @@ -96,6 +102,11 @@ int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack); int ip6_ins_rt(struct rt6_info *); int ip6_del_rt(struct rt6_info *); +void rt6_flush_exceptions(struct rt6_info *rt); +int rt6_remove_exception_rt(struct rt6_info *rt); +void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now); + static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, const struct in6_addr *daddr, unsigned int prefs, @@ -160,10 +171,13 @@ struct rt6_rtnl_dump_arg { }; int rt6_dump_route(struct rt6_info *rt, void *p_arg); -void rt6_ifdown(struct net *net, struct net_device *dev); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); +void rt6_sync_up(struct net_device *dev, unsigned int nh_flags); +void rt6_disable_ip(struct net_device *dev, unsigned long event); +void rt6_sync_down_dev(struct net_device *dev, unsigned long event); +void rt6_multipath_rebalance(struct rt6_info *rt); static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) { diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index d66f70f63734..236e40ba06bf 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -36,6 +36,10 @@ struct __ip6_tnl_parm { __be32 o_key; __u32 fwmark; + __u32 index; /* ERSPAN type II index */ + __u8 erspan_ver; /* ERSPAN version */ + __u8 dir; /* direction */ + __u16 hwid; /* hwid */ }; /* IPv6 tunnel */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 1a7f7e424320..f80524396c06 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -122,9 +122,6 @@ struct fib_info { #define fib_rtt fib_metrics->metrics[RTAX_RTT-1] #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; -#ifdef CONFIG_IP_ROUTE_MULTIPATH - int fib_weight; -#endif struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index eb2321a13506..1f16773cfd76 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -116,8 +116,11 @@ struct ip_tunnel { u32 o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ - /* This field used only by ERSPAN */ + /* These four fields used only by ERSPAN */ u32 index; /* ERSPAN type II index */ + u8 erspan_ver; /* ERSPAN version */ + u8 dir; /* ERSPAN direction */ + u16 hwid; /* ERSPAN hardware ID */ struct dst_cache dst_cache; @@ -259,7 +262,8 @@ int ip_tunnel_get_iflink(const struct net_device *dev); int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); -void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); +void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, + struct rtnl_link_ops *ops); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 5d08c1950e7d..eb0bec043c96 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -69,8 +69,7 @@ struct ip_vs_iphdr { }; static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, - int len, void *buffer, - const struct ip_vs_iphdr *ipvsh) + int len, void *buffer) { return skb_header_pointer(skb, offset, len, buffer); } @@ -984,12 +983,12 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) static inline int sysctl_sync_period(struct netns_ipvs *ipvs) { - return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]); + return READ_ONCE(ipvs->sysctl_sync_threshold[1]); } static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs) { - return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period); + return READ_ONCE(ipvs->sysctl_sync_refresh_period); } static inline int sysctl_sync_retries(struct netns_ipvs *ipvs) @@ -1014,7 +1013,7 @@ static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs) static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) { - return ACCESS_ONCE(ipvs->sysctl_sync_ports); + return READ_ONCE(ipvs->sysctl_sync_ports); } static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 6eac5cf8f1e6..8606c9113d3f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -22,6 +22,7 @@ #include <net/flow.h> #include <net/flow_dissector.h> #include <net/snmp.h> +#include <net/netns/hash.h> #define SIN6_LEN_RFC2133 24 @@ -51,6 +52,46 @@ #define IPV6_DEFAULT_HOPLIMIT 64 #define IPV6_DEFAULT_MCASTHOPS 1 +/* Limits on Hop-by-Hop and Destination options. + * + * Per RFC8200 there is no limit on the maximum number or lengths of options in + * Hop-by-Hop or Destination options other then the packet must fit in an MTU. + * We allow configurable limits in order to mitigate potential denial of + * service attacks. + * + * There are three limits that may be set: + * - Limit the number of options in a Hop-by-Hop or Destination options + * extension header + * - Limit the byte length of a Hop-by-Hop or Destination options extension + * header + * - Disallow unknown options + * + * The limits are expressed in corresponding sysctls: + * + * ipv6.sysctl.max_dst_opts_cnt + * ipv6.sysctl.max_hbh_opts_cnt + * ipv6.sysctl.max_dst_opts_len + * ipv6.sysctl.max_hbh_opts_len + * + * max_*_opts_cnt is the number of TLVs that are allowed for Destination + * options or Hop-by-Hop options. If the number is less than zero then unknown + * TLVs are disallowed and the number of known options that are allowed is the + * absolute value. Setting the value to INT_MAX indicates no limit. + * + * max_*_opts_len is the length limit in bytes of a Destination or + * Hop-by-Hop options extension header. Setting the value to INT_MAX + * indicates no length limit. + * + * If a limit is exceeded when processing an extension header the packet is + * silently discarded. + */ + +/* Default limits for Hop-by-Hop and Destination options */ +#define IP6_DEFAULT_MAX_DST_OPTS_CNT 8 +#define IP6_DEFAULT_MAX_HBH_OPTS_CNT 8 +#define IP6_DEFAULT_MAX_DST_OPTS_LEN INT_MAX /* No limit */ +#define IP6_DEFAULT_MAX_HBH_OPTS_LEN INT_MAX /* No limit */ + /* * Addr type * @@ -291,6 +332,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); static inline void fl6_sock_release(struct ip6_flowlabel *fl) { @@ -300,8 +342,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl) void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info); -int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, - struct icmp6hdr *thdr, int len); +void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len); int ip6_ra_control(struct sock *sk, int sel); @@ -633,6 +675,22 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) cpu_to_be32(0x0000ffff))) == 0UL; } +static inline u32 ipv6_portaddr_hash(const struct net *net, + const struct in6_addr *addr6, + unsigned int port) +{ + unsigned int hash, mix = net_hash_mix(net); + + if (ipv6_addr_any(addr6)) + hash = jhash_1word(0, mix); + else if (ipv6_addr_v4mapped(addr6)) + hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix); + else + hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix); + + return hash ^ port; +} + /* * Check for a RFC 4843 ORCHID address * (Overlay Routable Cryptographic Hash Identifiers) @@ -727,7 +785,7 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr); -void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); +__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); @@ -912,6 +970,8 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk) &inet6_sk(sk)->cork); } +unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst); + int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index 070e93a17c59..f4c21b5a1242 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -153,7 +153,7 @@ struct iucv_sock_list { atomic_t autobind_name; }; -unsigned int iucv_sock_poll(struct file *file, struct socket *sock, +__poll_t iucv_sock_poll(struct file *file, struct socket *sock, poll_table *wait); void iucv_sock_link(struct iucv_sock_list *l, struct sock *s); void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s); diff --git a/include/net/llc_c_ac.h b/include/net/llc_c_ac.h index f3be818e73c1..e766300b3e99 100644 --- a/include/net/llc_c_ac.h +++ b/include/net/llc_c_ac.h @@ -171,10 +171,10 @@ int llc_conn_ac_rst_sendack_flag(struct sock *sk, struct sk_buff *skb); int llc_conn_ac_send_i_rsp_as_ack(struct sock *sk, struct sk_buff *skb); int llc_conn_ac_send_i_as_ack(struct sock *sk, struct sk_buff *skb); -void llc_conn_busy_tmr_cb(unsigned long timeout_data); -void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data); -void llc_conn_ack_tmr_cb(unsigned long timeout_data); -void llc_conn_rej_tmr_cb(unsigned long timeout_data); +void llc_conn_busy_tmr_cb(struct timer_list *t); +void llc_conn_pf_cycle_tmr_cb(struct timer_list *t); +void llc_conn_ack_tmr_cb(struct timer_list *t); +void llc_conn_rej_tmr_cb(struct timer_list *t); void llc_conn_set_p_flag(struct sock *sk, u8 value); #endif /* LLC_C_AC_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 885690fa39c8..c96511fa9198 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1552,6 +1552,9 @@ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif); * @IEEE80211_KEY_FLAG_RESERVE_TAILROOM: This flag should be set by the * driver for a key to indicate that sufficient tailroom must always * be reserved for ICV or MIC, even when HW encryption is enabled. + * @IEEE80211_KEY_FLAG_PUT_MIC_SPACE: This flag should be set by the driver for + * a TKIP key if it only requires MIC space. Do not set together with + * @IEEE80211_KEY_FLAG_GENERATE_MMIC on the same key. */ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0), @@ -1562,6 +1565,7 @@ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_PUT_IV_SPACE = BIT(5), IEEE80211_KEY_FLAG_RX_MGMT = BIT(6), IEEE80211_KEY_FLAG_RESERVE_TAILROOM = BIT(7), + IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8), }; /** @@ -1593,8 +1597,8 @@ struct ieee80211_key_conf { u8 icv_len; u8 iv_len; u8 hw_key_idx; - u8 flags; s8 keyidx; + u16 flags; u8 keylen; u8 key[0]; }; @@ -2056,6 +2060,9 @@ struct ieee80211_txq { * The stack will not do fragmentation. * The callback for @set_frag_threshold should be set as well. * + * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on + * TDLS links. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2098,6 +2105,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_TX_FRAG_LIST, IEEE80211_HW_REPORTS_LOW_ACK, IEEE80211_HW_SUPPORTS_TX_FRAG, + IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -4141,7 +4149,7 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid); * The TX headroom reserved by mac80211 for its own tx_status functions. * This is enough for the radiotap header. */ -#define IEEE80211_TX_STATUS_HEADROOM 14 +#define IEEE80211_TX_STATUS_HEADROOM ALIGN(14, 4) /** * ieee80211_sta_set_buffered - inform mac80211 about driver-buffered frames @@ -4470,18 +4478,24 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, * ieee80211_nullfunc_get - retrieve a nullfunc template * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @qos_ok: QoS NDP is acceptable to the caller, this should be set + * if at all possible * * Creates a Nullfunc template which can, for example, uploaded to * hardware. The template must be updated after association so that correct * BSSID and address is used. * + * If @qos_ndp is set and the association is to an AP with QoS/WMM, the + * returned packet will be QoS NDP. + * * Note: Caller (or hardware) is responsible for setting the * &IEEE80211_FCTL_PM bit as well as Duration and Sequence Control fields. * * Return: The nullfunc template. %NULL on error. */ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + bool qos_ok); /** * ieee80211_probereq_get - retrieve a Probe Request template @@ -5441,8 +5455,14 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, */ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn); +/** + * ieee80211_manage_rx_ba_offl - helper to queue an RX BA work + * @vif: &struct ieee80211_vif pointer from the add_interface callback + * @addr: station mac address + * @tid: the rx tid + */ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, const u8 *addr, - unsigned int bit); + unsigned int tid); /** * ieee80211_start_rx_ba_session_offl - start a Rx BA session diff --git a/include/net/neighbour.h b/include/net/neighbour.h index a964366a7ef5..e421f86af043 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -191,8 +191,8 @@ struct neigh_hash_table { struct neigh_table { int family; - int entry_size; - int key_len; + unsigned int entry_size; + unsigned int key_len; __be16 protocol; __u32 (*hash)(const void *pkey, const struct net_device *dev, diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 10f99dafd5ac..f306b2aa15a4 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -51,7 +51,7 @@ struct net { refcount_t passive; /* To decided when the network * namespace should be freed. */ - atomic_t count; /* To decided when the network + refcount_t count; /* To decided when the network * namespace should be shut down. */ spinlock_t rules_mod_lock; @@ -195,7 +195,7 @@ void __put_net(struct net *net); static inline struct net *get_net(struct net *net) { - atomic_inc(&net->count); + refcount_inc(&net->count); return net; } @@ -206,14 +206,14 @@ static inline struct net *maybe_get_net(struct net *net) * exists. If the reference count is zero this * function fails and returns NULL. */ - if (!atomic_inc_not_zero(&net->count)) + if (!refcount_inc_not_zero(&net->count)) net = NULL; return net; } static inline void put_net(struct net *net) { - if (atomic_dec_and_test(&net->count)) + if (refcount_dec_and_test(&net->count)) __put_net(net); } @@ -223,6 +223,11 @@ int net_eq(const struct net *net1, const struct net *net2) return net1 == net2; } +static inline int check_net(const struct net *net) +{ + return refcount_read(&net->count) != 0; +} + void net_drop_ns(void *); #else @@ -247,6 +252,11 @@ int net_eq(const struct net *net1, const struct net *net2) return 1; } +static inline int check_net(const struct net *net) +{ + return 1; +} + #define net_drop_ns NULL #endif diff --git a/include/net/netevent.h b/include/net/netevent.h index f728d9cad170..40e7bab68490 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -26,6 +26,7 @@ enum netevent_notif_type { NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ + NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */ }; int register_netevent_notifier(struct notifier_block *nb); diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 2cc728ef8cd0..73f825732326 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -11,19 +11,19 @@ #define _NF_CONNTRACK_IPV4_H -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; +const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_DCCP -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; #endif #ifdef CONFIG_NF_CT_PROTO_SCTP -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; #endif #ifdef CONFIG_NF_CT_PROTO_UDPLITE -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; #endif int nf_conntrack_ipv4_compat_init(void); diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index 79a335c0d8b8..effa8dfba68c 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -2,19 +2,19 @@ #ifndef _NF_CONNTRACK_IPV6_H #define _NF_CONNTRACK_IPV6_H -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; +extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; #ifdef CONFIG_NF_CT_PROTO_DCCP -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; #endif #ifdef CONFIG_NF_CT_PROTO_SCTP -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; #endif #ifdef CONFIG_NF_CT_PROTO_UDPLITE -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; #endif #include <linux/sysctl.h> diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 792c3f6d30ce..062dc19b5840 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -213,11 +213,6 @@ static inline bool nf_ct_kill(struct nf_conn *ct) return nf_ct_delete(ct, 0, 0); } -/* These are for NAT. Icky. */ -extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, - enum ip_conntrack_dir dir, - u32 seq); - /* Set all unconfirmed conntrack as dying */ void nf_ct_unconfirmed_destroy(struct net *); @@ -285,7 +280,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) struct kernel_param; -int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); +int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp); int nf_conntrack_hash_resize(unsigned int hashsize); extern struct hlist_nulls_head *nf_conntrack_hash; diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h new file mode 100644 index 000000000000..adf8db44cf86 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_count.h @@ -0,0 +1,17 @@ +#ifndef _NF_CONNTRACK_COUNT_H +#define _NF_CONNTRACK_COUNT_H + +struct nf_conncount_data; + +struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, + unsigned int keylen); +void nf_conncount_destroy(struct net *net, unsigned int family, + struct nf_conncount_data *data); + +unsigned int nf_conncount_count(struct net *net, + struct nf_conncount_data *data, + const u32 *key, + unsigned int family, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone); +#endif diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 510192eb7e9d..a7220eef9aee 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -27,6 +27,9 @@ struct nf_conntrack_l4proto { /* Resolve clashes on insertion races. */ bool allow_clash; + /* protoinfo nlattr size, closes a hole */ + u16 nlattr_size; + /* Try to fill in the third arg: dataoff is offset past network protocol hdr. Return true if possible. */ bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, @@ -43,7 +46,6 @@ struct nf_conntrack_l4proto { const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts); /* Called when a new connection for this protocol found; @@ -67,8 +69,6 @@ struct nf_conntrack_l4proto { /* convert protoinfo to nfnetink attributes */ int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, struct nf_conn *ct); - /* Calculate protoinfo nlattr size */ - int (*nlattr_size)(void); /* convert nfnetlink attributes to protoinfo */ int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct); @@ -76,13 +76,11 @@ struct nf_conntrack_l4proto { int (*tuple_to_nlattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); /* Calculate tuple nlattr size */ - int (*nlattr_tuple_size)(void); + unsigned int (*nlattr_tuple_size)(void); int (*nlattr_to_tuple)(struct nlattr *tb[], struct nf_conntrack_tuple *t); const struct nla_policy *nla_policy; - size_t nla_size; - #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) struct { int (*nlattr_to_obj)(struct nlattr *tb[], @@ -110,7 +108,7 @@ struct nf_conntrack_l4proto { }; /* Existing built-in generic protocol */ -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; #define MAX_NF_CT_PROTO 256 @@ -127,18 +125,18 @@ int nf_ct_l4proto_pernet_register_one(struct net *net, void nf_ct_l4proto_pernet_unregister_one(struct net *net, const struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *const proto[], + const struct nf_conntrack_l4proto *const proto[], unsigned int num_proto); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *const proto[], + const struct nf_conntrack_l4proto *const proto[], unsigned int num_proto); /* Protocol global registration. */ -int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto); +int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto); void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); -int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[], +int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[], unsigned int num_proto); -void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[], +void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[], unsigned int num_proto); /* Generic netlink helpers */ @@ -146,15 +144,27 @@ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple); int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t); -int nf_ct_port_nlattr_tuple_size(void); +unsigned int nf_ct_port_nlattr_tuple_size(void); extern const struct nla_policy nf_ct_port_nla_policy[]; #ifdef CONFIG_SYSCTL -#define LOG_INVALID(net, proto) \ - ((net)->ct.sysctl_log_invalid == (proto) || \ - (net)->ct.sysctl_log_invalid == IPPROTO_RAW) +__printf(3, 4) __cold +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const char *fmt, ...); +__printf(5, 6) __cold +void nf_l4proto_log_invalid(const struct sk_buff *skb, + struct net *net, + u16 pf, u8 protonum, + const char *fmt, ...); #else -static inline int LOG_INVALID(struct net *net, int proto) { return 0; } +static inline __printf(5, 6) __cold +void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, + u16 pf, u8 protonum, const char *fmt, ...) {} +static inline __printf(3, 4) __cold +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const char *fmt, ...) { } #endif /* CONFIG_SYSCTL */ #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h new file mode 100644 index 000000000000..833752dd0c58 --- /dev/null +++ b/include/net/netfilter/nf_flow_table.h @@ -0,0 +1,126 @@ +#ifndef _NF_FLOW_TABLE_H +#define _NF_FLOW_TABLE_H + +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/rhashtable.h> +#include <linux/rcupdate.h> +#include <net/dst.h> + +struct nf_flowtable; + +struct nf_flowtable_type { + struct list_head list; + int family; + void (*gc)(struct work_struct *work); + void (*free)(struct nf_flowtable *ft); + const struct rhashtable_params *params; + nf_hookfn *hook; + struct module *owner; +}; + +struct nf_flowtable { + struct rhashtable rhashtable; + const struct nf_flowtable_type *type; + struct delayed_work gc_work; +}; + +enum flow_offload_tuple_dir { + FLOW_OFFLOAD_DIR_ORIGINAL, + FLOW_OFFLOAD_DIR_REPLY, + __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY, +}; +#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1) + +struct flow_offload_tuple { + union { + struct in_addr src_v4; + struct in6_addr src_v6; + }; + union { + struct in_addr dst_v4; + struct in6_addr dst_v6; + }; + struct { + __be16 src_port; + __be16 dst_port; + }; + + int iifidx; + + u8 l3proto; + u8 l4proto; + u8 dir; + + int oifidx; + + struct dst_entry *dst_cache; +}; + +struct flow_offload_tuple_rhash { + struct rhash_head node; + struct flow_offload_tuple tuple; +}; + +#define FLOW_OFFLOAD_SNAT 0x1 +#define FLOW_OFFLOAD_DNAT 0x2 +#define FLOW_OFFLOAD_DYING 0x4 + +struct flow_offload { + struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; + u32 flags; + union { + /* Your private driver data here. */ + u32 timeout; + }; +}; + +#define NF_FLOW_TIMEOUT (30 * HZ) + +struct nf_flow_route { + struct { + struct dst_entry *dst; + int ifindex; + } tuple[FLOW_OFFLOAD_DIR_MAX]; +}; + +struct flow_offload *flow_offload_alloc(struct nf_conn *ct, + struct nf_flow_route *route); +void flow_offload_free(struct flow_offload *flow); + +int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); +struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, + struct flow_offload_tuple *tuple); +int nf_flow_table_iterate(struct nf_flowtable *flow_table, + void (*iter)(struct flow_offload *flow, void *data), + void *data); + +void nf_flow_table_cleanup(struct net *net, struct net_device *dev); + +void nf_flow_table_free(struct nf_flowtable *flow_table); +void nf_flow_offload_work_gc(struct work_struct *work); +extern const struct rhashtable_params nf_flow_offload_rhash_params; + +void flow_offload_dead(struct flow_offload *flow); + +int nf_flow_snat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); +int nf_flow_dnat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); + +struct flow_ports { + __be16 source, dest; +}; + +unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); +unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); + +#define MODULE_ALIAS_NF_FLOWTABLE(family) \ + MODULE_ALIAS("nf-flowtable-" __stringify(family)) + +#endif /* _FLOW_OFFLOAD_H */ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 814058d0f167..a50a69f5334c 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -25,7 +25,7 @@ struct nf_queue_entry { struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); - unsigned int (*nf_hook_drop)(struct net *net); + void (*nf_hook_drop)(struct net *net); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 079c69cae2f6..663b015dace5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -9,6 +9,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/nf_tables.h> #include <linux/u64_stats_sync.h> +#include <net/netfilter/nf_flow_table.h> #include <net/netlink.h> #define NFT_JUMP_STACK_SIZE 16 @@ -54,8 +55,8 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, pkt->xt.state = state; } -static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, - struct sk_buff *skb) +static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, + struct sk_buff *skb) { pkt->tprot_set = false; pkt->tprot = 0; @@ -63,14 +64,6 @@ static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, pkt->xt.fragoff = 0; } -static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - nft_set_pktinfo(pkt, skb, state); - nft_set_pktinfo_proto_unspec(pkt, skb); -} - /** * struct nft_verdict - nf_tables verdict * @@ -150,22 +143,22 @@ static inline void nft_data_debug(const struct nft_data *data) * struct nft_ctx - nf_tables rule/set context * * @net: net namespace - * @afi: address family info * @table: the table the chain is contained in * @chain: the chain the rule is contained in * @nla: netlink attributes * @portid: netlink portID of the original message * @seq: netlink sequence number + * @family: protocol family * @report: notify via unicast netlink message */ struct nft_ctx { struct net *net; - struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; const struct nlattr * const *nla; u32 portid; u32 seq; + u8 family; bool report; }; @@ -312,6 +305,7 @@ struct nft_expr; * @flush: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts + * @get: get set elements * @privsize: function to return size of set private data * @init: initialize private data of new set instance * @destroy: destroy private data of set instance @@ -351,6 +345,10 @@ struct nft_set_ops { void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter); + void * (*get)(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem, + unsigned int flags); unsigned int (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); @@ -376,6 +374,7 @@ void nft_unregister_set(struct nft_set_type *type); * @list: table set list node * @bindings: list of set bindings * @name: name of the set + * @handle: unique handle of the set * @ktype: key type (numeric type defined by userspace, not used in the kernel) * @dtype: data type (verdict or numeric type defined by userspace) * @objtype: object type (see NFT_OBJECT_* definitions) @@ -398,6 +397,7 @@ struct nft_set { struct list_head list; struct list_head bindings; char *name; + u64 handle; u32 ktype; u32 dtype; u32 objtype; @@ -419,6 +419,11 @@ struct nft_set { __attribute__((aligned(__alignof__(u64)))); }; +static inline bool nft_set_is_anonymous(const struct nft_set *set) +{ + return set->flags & NFT_SET_ANONYMOUS; +} + static inline void *nft_set_priv(const struct nft_set *set) { return (void *)set->data; @@ -878,7 +883,7 @@ enum nft_chain_type { * @family: address family * @owner: module owner * @hook_mask: mask of valid hooks - * @hooks: hookfn overrides + * @hooks: array of hook functions */ struct nf_chain_type { const char *name; @@ -900,8 +905,6 @@ struct nft_stats { struct u64_stats_sync syncp; }; -#define NFT_HOOK_OPS_MAX 2 - /** * struct nft_base_chain - nf_tables base chain * @@ -913,7 +916,7 @@ struct nft_stats { * @dev_name: device name that this base chain is attached to (if any) */ struct nft_base_chain { - struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; + struct nf_hook_ops ops; const struct nf_chain_type *type; u8 policy; u8 flags; @@ -943,10 +946,13 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * @chains: chains in the table * @sets: sets in the table * @objects: stateful objects in the table + * @flowtables: flow tables in the table * @hgenerator: handle generator state + * @handle: table handle * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) * @genmask: generation mask + * @afinfo: address family info * @name: name of the table */ struct nft_table { @@ -954,46 +960,16 @@ struct nft_table { struct list_head chains; struct list_head sets; struct list_head objects; + struct list_head flowtables; u64 hgenerator; + u64 handle; u32 use; - u16 flags:14, + u16 family:6, + flags:8, genmask:2; char *name; }; -enum nft_af_flags { - NFT_AF_NEEDS_DEV = (1 << 0), -}; - -/** - * struct nft_af_info - nf_tables address family info - * - * @list: used internally - * @family: address family - * @nhooks: number of hooks in this family - * @owner: module owner - * @tables: used internally - * @flags: family flags - * @nops: number of hook ops in this family - * @hook_ops_init: initialization function for chain hook ops - * @hooks: hookfn overrides for packet validation - */ -struct nft_af_info { - struct list_head list; - int family; - unsigned int nhooks; - struct module *owner; - struct list_head tables; - u32 flags; - unsigned int nops; - void (*hook_ops_init)(struct nf_hook_ops *, - unsigned int); - nf_hookfn *hooks[NF_MAX_HOOKS]; -}; - -int nft_register_afinfo(struct net *, struct nft_af_info *); -void nft_unregister_afinfo(struct net *, struct nft_af_info *); - int nft_register_chain_type(const struct nf_chain_type *); void nft_unregister_chain_type(const struct nf_chain_type *); @@ -1011,9 +987,9 @@ int nft_verdict_dump(struct sk_buff *skb, int type, * @name: name of this stateful object * @genmask: generation mask * @use: number of references to this stateful object - * @data: object data, layout depends on type + * @handle: unique object handle * @ops: object operations - * @data: pointer to object data + * @data: object data, layout depends on type */ struct nft_object { struct list_head list; @@ -1021,6 +997,7 @@ struct nft_object { struct nft_table *table; u32 genmask:2, use:30; + u64 handle; /* runtime data below here */ const struct nft_object_ops *ops ____cacheline_aligned; unsigned char data[] @@ -1092,6 +1069,46 @@ int nft_register_obj(struct nft_object_type *obj_type); void nft_unregister_obj(struct nft_object_type *obj_type); /** + * struct nft_flowtable - nf_tables flow table + * + * @list: flow table list node in table list + * @table: the table the flow table is contained in + * @name: name of this flow table + * @hooknum: hook number + * @priority: hook priority + * @ops_len: number of hooks in array + * @genmask: generation mask + * @use: number of references to this flow table + * @handle: unique object handle + * @data: rhashtable and garbage collector + * @ops: array of hooks + */ +struct nft_flowtable { + struct list_head list; + struct nft_table *table; + char *name; + int hooknum; + int priority; + int ops_len; + u32 genmask:2, + use:30; + u64 handle; + /* runtime data below here */ + struct nf_hook_ops *ops ____cacheline_aligned; + struct nf_flowtable data; +}; + +struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, + const struct nlattr *nla, + u8 genmask); +void nft_flow_table_iterate(struct net *net, + void (*iter)(struct nf_flowtable *flowtable, void *data), + void *data); + +void nft_register_flowtable_type(struct nf_flowtable_type *type); +void nft_unregister_flowtable_type(struct nf_flowtable_type *type); + +/** * struct nft_traceinfo - nft tracing information and state * * @pkt: pktinfo currently processed @@ -1120,12 +1137,6 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, void nft_trace_notify(struct nft_traceinfo *info); -#define nft_dereference(p) \ - nfnl_dereference(p, NFNL_SUBSYS_NFTABLES) - -#define MODULE_ALIAS_NFT_FAMILY(family) \ - MODULE_ALIAS("nft-afinfo-" __stringify(family)) - #define MODULE_ALIAS_NFT_CHAIN(family, name) \ MODULE_ALIAS("nft-chain-" __stringify(family) "-" name) @@ -1165,8 +1176,8 @@ static inline u8 nft_genmask_next(const struct net *net) static inline u8 nft_genmask_cur(const struct net *net) { - /* Use ACCESS_ONCE() to prevent refetching the value for atomicity */ - return 1 << ACCESS_ONCE(net->nft.gencursor); + /* Use READ_ONCE() to prevent refetching the value for atomicity */ + return 1 << READ_ONCE(net->nft.gencursor); } #define NFT_GENMASK_ANY ((1 << 0) | (1 << 1)) @@ -1327,4 +1338,11 @@ struct nft_trans_obj { #define nft_trans_obj(trans) \ (((struct nft_trans_obj *)trans->data)->obj) +struct nft_trans_flowtable { + struct nft_flowtable *flowtable; +}; + +#define nft_trans_flowtable(trans) \ + (((struct nft_trans_flowtable *)trans->data)->flowtable) + #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index f0896ba456c4..ed7b511f0a59 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -5,15 +5,11 @@ #include <net/netfilter/nf_tables.h> #include <net/ip.h> -static inline void -nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, + struct sk_buff *skb) { struct iphdr *ip; - nft_set_pktinfo(pkt, skb, state); - ip = ip_hdr(pkt->skb); pkt->tprot_set = true; pkt->tprot = ip->protocol; @@ -21,10 +17,8 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; } -static inline int -__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb) { struct iphdr *iph, _iph; u32 len, thoff; @@ -52,16 +46,11 @@ __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, return 0; } -static inline void -nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb) { - nft_set_pktinfo(pkt, skb, state); - if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0) - nft_set_pktinfo_proto_unspec(pkt, skb); + if (__nft_set_pktinfo_ipv4_validate(pkt, skb) < 0) + nft_set_pktinfo_unspec(pkt, skb); } -extern struct nft_af_info nft_af_ipv4; - #endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index b8065b72f56e..dabe6fdb553a 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -5,20 +5,16 @@ #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/ipv6.h> -static inline void -nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, + struct sk_buff *skb) { unsigned int flags = IP6_FH_F_AUTH; int protohdr, thoff = 0; unsigned short frag_off; - nft_set_pktinfo(pkt, skb, state); - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0) { - nft_set_pktinfo_proto_unspec(pkt, skb); + nft_set_pktinfo_unspec(pkt, skb); return; } @@ -28,10 +24,8 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, pkt->xt.fragoff = frag_off; } -static inline int -__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) unsigned int flags = IP6_FH_F_AUTH; @@ -68,16 +62,11 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, #endif } -static inline void -nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb) { - nft_set_pktinfo(pkt, skb, state); - if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0) - nft_set_pktinfo_proto_unspec(pkt, skb); + if (__nft_set_pktinfo_ipv6_validate(pkt, skb) < 0) + nft_set_pktinfo_unspec(pkt, skb); } -extern struct nft_af_info nft_af_ipv6; - #endif diff --git a/include/net/netns/can.h b/include/net/netns/can.h index ecf238b8862c..ca9bd9fba5b5 100644 --- a/include/net/netns/can.h +++ b/include/net/netns/can.h @@ -8,7 +8,7 @@ #include <linux/spinlock.h> -struct dev_rcv_lists; +struct can_dev_rcv_lists; struct s_stats; struct s_pstats; @@ -28,7 +28,7 @@ struct netns_can { #endif /* receive filters subscribed for 'all' CAN devices */ - struct dev_rcv_lists *can_rx_alldev_list; + struct can_dev_rcv_lists *can_rx_alldev_list; spinlock_t can_rcvlists_lock; struct timer_list can_stattimer;/* timer for statistics update */ struct s_stats *can_stats; /* packet statistics */ diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 0ad4d0c71228..36c2d998a43c 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -11,7 +11,10 @@ struct netns_core { int sysctl_somaxconn; - struct prot_inuse __percpu *inuse; +#ifdef CONFIG_PROC_FS + int __percpu *sock_inuse; + struct prot_inuse __percpu *prot_inuse; +#endif }; #endif diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8fcff2837484..44668c29701a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -37,6 +37,8 @@ struct inet_timewait_death_row { int sysctl_max_tw_buckets; }; +struct tcp_fastopen_context; + struct netns_ipv4 { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -53,6 +55,7 @@ struct netns_ipv4 { struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; #endif + bool fib_has_custom_local_routes; #ifdef CONFIG_IP_ROUTE_CLASSID int fib_num_tclassid_users; #endif @@ -126,8 +129,43 @@ struct netns_ipv4 { int sysctl_tcp_sack; int sysctl_tcp_window_scaling; int sysctl_tcp_timestamps; + int sysctl_tcp_early_retrans; + int sysctl_tcp_recovery; + int sysctl_tcp_thin_linear_timeouts; + int sysctl_tcp_slow_start_after_idle; + int sysctl_tcp_retrans_collapse; + int sysctl_tcp_stdurg; + int sysctl_tcp_rfc1337; + int sysctl_tcp_abort_on_overflow; + int sysctl_tcp_fack; + int sysctl_tcp_max_reordering; + int sysctl_tcp_dsack; + int sysctl_tcp_app_win; + int sysctl_tcp_adv_win_scale; + int sysctl_tcp_frto; + int sysctl_tcp_nometrics_save; + int sysctl_tcp_moderate_rcvbuf; + int sysctl_tcp_tso_win_divisor; + int sysctl_tcp_workaround_signed_windows; + int sysctl_tcp_limit_output_bytes; + int sysctl_tcp_challenge_ack_limit; + int sysctl_tcp_min_tso_segs; + int sysctl_tcp_min_rtt_wlen; + int sysctl_tcp_autocorking; + int sysctl_tcp_invalid_ratelimit; + int sysctl_tcp_pacing_ss_ratio; + int sysctl_tcp_pacing_ca_ratio; + int sysctl_tcp_wmem[3]; + int sysctl_tcp_rmem[3]; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; + int sysctl_tcp_fastopen; + const struct tcp_congestion_ops __rcu *tcp_congestion_control; + struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; + spinlock_t tcp_fastopen_ctx_lock; + unsigned int sysctl_tcp_fastopen_blackhole_timeout; + atomic_t tfo_active_disable_times; + unsigned long tfo_active_disable_stamp; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; @@ -163,6 +201,9 @@ struct netns_ipv4 { struct fib_notifier_ops *notifier_ops; unsigned int fib_seq; /* protected by rtnl_mutex */ + struct fib_notifier_ops *ipmr_notifier_ops; + unsigned int ipmr_seq; /* protected by rtnl_mutex */ + atomic_t rt_genid; }; #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index dc825a5ddd7f..987cc4569cb8 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -38,6 +38,10 @@ struct netns_sysctl_ipv6 { int idgen_delay; int flowlabel_state_ranges; int flowlabel_reflect; + int max_dst_opts_cnt; + int max_hbh_opts_cnt; + int max_dst_opts_len; + int max_hbh_opts_len; }; struct netns_ipv6 { @@ -90,6 +94,11 @@ struct netns_ipv6 { atomic_t fib6_sernum; struct seg6_pernet_data *seg6_data; struct fib_notifier_ops *notifier_ops; + struct { + struct hlist_head head; + spinlock_t lock; + u32 seq; + } ip6addrlbl_table; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index cc00af2ac2d7..ca043342c0eb 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -17,7 +17,17 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif - struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; + struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS]; + struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS]; +#ifdef CONFIG_NETFILTER_FAMILY_ARP + struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS]; +#endif +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS]; +#endif +#if IS_ENABLED(CONFIG_DECNET) + struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS]; +#endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) bool defrag_ipv4; #endif diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index 4109b5f3010f..48134353411d 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -7,14 +7,8 @@ struct nft_af_info; struct netns_nftables { - struct list_head af_info; + struct list_head tables; struct list_head commit_list; - struct nft_af_info *ipv4; - struct nft_af_info *ipv6; - struct nft_af_info *inet; - struct nft_af_info *arp; - struct nft_af_info *bridge; - struct nft_af_info *netdev; unsigned int base_seq; u8 gencursor; }; diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index ebc813277662..0db7fb3e4e15 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -122,9 +122,12 @@ struct netns_sctp { /* Flag to indicate if PR-CONFIG is enabled. */ int reconf_enable; - /* Flag to idicate if SCTP-AUTH is enabled */ + /* Flag to indicate if SCTP-AUTH is enabled */ int auth_enable; + /* Flag to indicate if stream interleave is enabled */ + int intl_enable; + /* * Policy to control SCTP IPv4 address scoping * 0 - Disable IPv4 address scoping diff --git a/include/net/nsh.h b/include/net/nsh.h index a1eaea20be96..350b1ad11c7f 100644 --- a/include/net/nsh.h +++ b/include/net/nsh.h @@ -304,4 +304,7 @@ static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags, NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK); } +int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh); +int nsh_pop(struct sk_buff *skb); + #endif /* __NET_NSH_H */ diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 039cc29cb4a8..51e1a2a45d02 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -108,8 +108,10 @@ struct phonet_protocol { int sock_type; }; -int phonet_proto_register(unsigned int protocol, struct phonet_protocol *pp); -void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp); +int phonet_proto_register(unsigned int protocol, + const struct phonet_protocol *pp); +void phonet_proto_unregister(unsigned int protocol, + const struct phonet_protocol *pp); int phonet_sysctl_init(void); void phonet_sysctl_exit(void); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 70ca2437740e..87406252f0a3 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -19,22 +19,84 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +enum tcf_block_binder_type { + TCF_BLOCK_BINDER_TYPE_UNSPEC, + TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS, + TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS, +}; + +struct tcf_block_ext_info { + enum tcf_block_binder_type binder_type; + tcf_chain_head_change_t *chain_head_change; + void *chain_head_change_priv; + u32 block_index; +}; + +struct tcf_block_cb; bool tcf_queue_work(struct work_struct *work); #ifdef CONFIG_NET_CLS struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create); void tcf_chain_put(struct tcf_chain *chain); +void tcf_block_netif_keep_dst(struct tcf_block *block); int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain); + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct netlink_ext_ack *extack); +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, + struct tcf_block_ext_info *ei, + struct netlink_ext_ack *extack); void tcf_block_put(struct tcf_block *block); +void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei); + +static inline bool tcf_block_shared(struct tcf_block *block) +{ + return block->index; +} + +static inline struct Qdisc *tcf_block_q(struct tcf_block *block) +{ + WARN_ON(tcf_block_shared(block)); + return block->q; +} + +static inline struct net_device *tcf_block_dev(struct tcf_block *block) +{ + return tcf_block_q(block)->dev_queue->dev; +} + +void *tcf_block_cb_priv(struct tcf_block_cb *block_cb); +struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident); +void tcf_block_cb_incref(struct tcf_block_cb *block_cb); +unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb); +struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv); +int tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv); +void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb); +void tcf_block_cb_unregister(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident); + int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); #else static inline int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain) + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static inline +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, + struct tcf_block_ext_info *ei, + struct netlink_ext_ack *extack) { return 0; } @@ -43,6 +105,86 @@ static inline void tcf_block_put(struct tcf_block *block) { } +static inline +void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ +} + +static inline struct Qdisc *tcf_block_q(struct tcf_block *block) +{ + return NULL; +} + +static inline struct net_device *tcf_block_dev(struct tcf_block *block) +{ + return NULL; +} + +static inline +int tc_setup_cb_block_register(struct tcf_block *block, tc_setup_cb_t *cb, + void *cb_priv) +{ + return 0; +} + +static inline +void tc_setup_cb_block_unregister(struct tcf_block *block, tc_setup_cb_t *cb, + void *cb_priv) +{ +} + +static inline +void *tcf_block_cb_priv(struct tcf_block_cb *block_cb) +{ + return NULL; +} + +static inline +struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident) +{ + return NULL; +} + +static inline +void tcf_block_cb_incref(struct tcf_block_cb *block_cb) +{ +} + +static inline +unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb) +{ + return 0; +} + +static inline +struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv) +{ + return NULL; +} + +static inline +int tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv) +{ + return 0; +} + +static inline +void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb) +{ +} + +static inline +void tcf_block_cb_unregister(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident) +{ +} + static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { @@ -57,36 +199,43 @@ __cls_set_class(unsigned long *clp, unsigned long cl) } static inline unsigned long -cls_set_class(struct tcf_proto *tp, unsigned long *clp, - unsigned long cl) +cls_set_class(struct Qdisc *q, unsigned long *clp, unsigned long cl) { unsigned long old_cl; - - tcf_tree_lock(tp); + + sch_tree_lock(q); old_cl = __cls_set_class(clp, cl); - tcf_tree_unlock(tp); - + sch_tree_unlock(q); return old_cl; } static inline void tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base) { + struct Qdisc *q = tp->chain->block->q; unsigned long cl; - cl = tp->q->ops->cl_ops->bind_tcf(tp->q, base, r->classid); - cl = cls_set_class(tp, &r->class, cl); + /* Check q as it is not set for shared blocks. In that case, + * setting class is not supported. + */ + if (!q) + return; + cl = q->ops->cl_ops->bind_tcf(q, base, r->classid); + cl = cls_set_class(q, &r->class, cl); if (cl) - tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); + q->ops->cl_ops->unbind_tcf(q, cl); } static inline void tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) { + struct Qdisc *q = tp->chain->block->q; unsigned long cl; + if (!q) + return; if ((cl = __cls_set_class(&r->class, 0)) != 0) - tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); + q->ops->cl_ops->unbind_tcf(q, cl); } struct tcf_exts { @@ -94,6 +243,7 @@ struct tcf_exts { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ int nr_actions; struct tc_action **actions; + struct net *net; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -107,6 +257,7 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; + exts->net = NULL; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); if (!exts->actions) @@ -117,6 +268,28 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) return 0; } +/* Return false if the netns is being destroyed in cleanup_net(). Callers + * need to do cleanup synchronously in this case, otherwise may race with + * tc_action_net_exit(). Return true for other cases. + */ +static inline bool tcf_exts_get_net(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + exts->net = maybe_get_net(exts->net); + return exts->net != NULL; +#else + return true; +#endif +} + +static inline void tcf_exts_put_net(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + if (exts->net) + put_net(exts->net); +#endif +} + static inline void tcf_exts_to_list(const struct tcf_exts *exts, struct list_head *actions) { @@ -203,13 +376,12 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, - struct tcf_exts *exts, bool ovr); + struct tcf_exts *exts, bool ovr, + struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); -int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, - struct net_device **hw_dev); /** * struct tcf_pkt_info - packet information @@ -363,7 +535,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer) { switch (layer) { case TCF_LAYER_LINK: - return skb->data; + return skb_mac_header(skb); case TCF_LAYER_NETWORK: return skb_network_header(skb); case TCF_LAYER_TRANSPORT: @@ -385,13 +557,16 @@ static inline int tcf_valid_offset(const struct sk_buff *skb, #include <net/net_namespace.h> static inline int -tcf_change_indev(struct net *net, struct nlattr *indev_tlv) +tcf_change_indev(struct net *net, struct nlattr *indev_tlv, + struct netlink_ext_ack *extack) { char indev[IFNAMSIZ]; struct net_device *dev; - if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) + if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) { + NL_SET_ERR_MSG(extack, "Interface name too long"); return -EINVAL; + } dev = __dev_get_by_name(net, indev); if (!dev) return -ENODEV; @@ -409,23 +584,27 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ +int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, + enum tc_setup_type type, void *type_data, bool err_stop); + +enum tc_block_command { + TC_BLOCK_BIND, + TC_BLOCK_UNBIND, +}; + +struct tc_block_offload { + enum tc_block_command command; + enum tcf_block_binder_type binder_type; + struct tcf_block *block; +}; + struct tc_cls_common_offload { u32 chain_index; __be16 protocol; u32 prio; - u32 classid; + struct netlink_ext_ack *extack; }; -static inline void -tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, - const struct tcf_proto *tp) -{ - cls_common->chain_index = tp->chain->index; - cls_common->protocol = tp->protocol; - cls_common->prio = tp->prio; - cls_common->classid = tp->classid; -} - struct tc_cls_u32_knode { struct tcf_exts *exts; struct tc_u32_sel *sel; @@ -463,10 +642,31 @@ struct tc_cls_u32_offload { static inline bool tc_can_offload(const struct net_device *dev) { - if (!(dev->features & NETIF_F_HW_TC)) + return dev->features & NETIF_F_HW_TC; +} + +static inline bool tc_can_offload_extack(const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + bool can = tc_can_offload(dev); + + if (!can) + NL_SET_ERR_MSG(extack, "TC offload is disabled on net device"); + + return can; +} + +static inline bool +tc_cls_can_offload_and_chain0(const struct net_device *dev, + struct tc_cls_common_offload *common) +{ + if (!tc_can_offload_extack(dev, common->extack)) return false; - if (!dev->netdev_ops->ndo_setup_tc) + if (common->chain_index) { + NL_SET_ERR_MSG(common->extack, + "Driver supports only offload of chain 0"); return false; + } return true; } @@ -475,13 +675,6 @@ static inline bool tc_skip_hw(u32 flags) return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false; } -static inline bool tc_should_offload(const struct net_device *dev, u32 flags) -{ - if (tc_skip_hw(flags)) - return false; - return tc_can_offload(dev); -} - static inline bool tc_skip_sw(u32 flags) { return (flags & TCA_CLS_FLAGS_SKIP_SW) ? true : false; @@ -504,6 +697,18 @@ static inline bool tc_in_hw(u32 flags) return (flags & TCA_CLS_FLAGS_IN_HW) ? true : false; } +static inline void +tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, + const struct tcf_proto *tp, u32 flags, + struct netlink_ext_ack *extack) +{ + cls_common->chain_index = tp->chain->index; + cls_common->protocol = tp->protocol; + cls_common->prio = tp->prio; + if (tc_skip_sw(flags)) + cls_common->extack = extack; +} + enum tc_fl_command { TC_CLSFLOWER_REPLACE, TC_CLSFLOWER_DESTROY, @@ -518,7 +723,7 @@ struct tc_cls_flower_offload { struct fl_flow_key *mask; struct fl_flow_key *key; struct tcf_exts *exts; - bool egress_dev; + u32 classid; }; enum tc_matchall_command { @@ -534,9 +739,7 @@ struct tc_cls_matchall_offload { }; enum tc_clsbpf_command { - TC_CLSBPF_ADD, - TC_CLSBPF_REPLACE, - TC_CLSBPF_DESTROY, + TC_CLSBPF_OFFLOAD, TC_CLSBPF_STATS, }; @@ -545,11 +748,20 @@ struct tc_cls_bpf_offload { enum tc_clsbpf_command command; struct tcf_exts *exts; struct bpf_prog *prog; + struct bpf_prog *oldprog; const char *name; bool exts_integrated; - u32 gen_flags; }; +struct tc_mqprio_qopt_offload { + /* struct tc_mqprio_qopt must always be the first element */ + struct tc_mqprio_qopt qopt; + u16 mode; + u16 shaper; + u32 flags; + u64 min_rate[TC_QOPT_MAX_QUEUE]; + u64 max_rate[TC_QOPT_MAX_QUEUE]; +}; /* This structure holds cookie structure that is passed from user * to the kernel for actions and classifiers @@ -558,4 +770,61 @@ struct tc_cookie { u8 *data; u32 len; }; + +struct tc_qopt_offload_stats { + struct gnet_stats_basic_packed *bstats; + struct gnet_stats_queue *qstats; +}; + +enum tc_red_command { + TC_RED_REPLACE, + TC_RED_DESTROY, + TC_RED_STATS, + TC_RED_XSTATS, +}; + +struct tc_red_qopt_offload_params { + u32 min; + u32 max; + u32 probability; + bool is_ecn; + struct gnet_stats_queue *qstats; +}; + +struct tc_red_qopt_offload { + enum tc_red_command command; + u32 handle; + u32 parent; + union { + struct tc_red_qopt_offload_params set; + struct tc_qopt_offload_stats stats; + struct red_stats *xstats; + }; +}; + +enum tc_prio_command { + TC_PRIO_REPLACE, + TC_PRIO_DESTROY, + TC_PRIO_STATS, +}; + +struct tc_prio_qopt_offload_params { + int bands; + u8 priomap[TC_PRIO_MAX + 1]; + /* In case that a prio qdisc is offloaded and now is changed to a + * non-offloadedable config, it needs to update the backlog & qlen + * values to negate the HW backlog & qlen values (and only them). + */ + struct gnet_stats_queue *qstats; +}; + +struct tc_prio_qopt_offload { + enum tc_prio_command command; + u32 handle; + u32 parent; + union { + struct tc_prio_qopt_offload_params replace_params; + struct tc_qopt_offload_stats stats; + }; +}; #endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index b3869f97d37d..815b92a23936 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -5,7 +5,9 @@ #include <linux/jiffies.h> #include <linux/ktime.h> #include <linux/if_vlan.h> +#include <linux/netdevice.h> #include <net/sch_generic.h> +#include <net/net_namespace.h> #include <uapi/linux/pkt_sched.h> #define DEFAULT_TX_QUEUE_LEN 1000 @@ -87,7 +89,8 @@ extern struct Qdisc_ops pfifo_head_drop_qdisc_ops; int fifo_set_limit(struct Qdisc *q, unsigned int limit); struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, - unsigned int limit); + unsigned int limit, + struct netlink_ext_ack *extack); int register_qdisc(struct Qdisc_ops *qops); int unregister_qdisc(struct Qdisc_ops *qops); @@ -97,22 +100,24 @@ int qdisc_set_default(const char *id); void qdisc_hash_add(struct Qdisc *q, bool invisible); void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); -struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, - struct nlattr *tab); + struct nlattr *tab, + struct netlink_ext_ack *extack); void qdisc_put_rtab(struct qdisc_rate_table *tab); void qdisc_put_stab(struct qdisc_size_table *tab); void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); -int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, - struct net_device *dev, struct netdev_queue *txq, - spinlock_t *root_lock, bool validate); +bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, + struct net_device *dev, struct netdev_queue *txq, + spinlock_t *root_lock, bool validate); void __qdisc_run(struct Qdisc *q); static inline void qdisc_run(struct Qdisc *q) { - if (qdisc_run_begin(q)) + if (qdisc_run_begin(q)) { __qdisc_run(q); + qdisc_run_end(q); + } } static inline __be16 tc_skb_protocol(const struct sk_buff *skb) @@ -134,17 +139,18 @@ static inline unsigned int psched_mtu(const struct net_device *dev) return dev->mtu + dev->hard_header_len; } -static inline bool is_classid_clsact_ingress(u32 classid) +static inline struct net *qdisc_net(struct Qdisc *q) { - /* This also returns true for ingress qdisc */ - return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) && - TC_H_MIN(classid) != TC_H_MIN(TC_H_MIN_EGRESS); + return dev_net(q->dev_queue->dev); } -static inline bool is_classid_clsact_egress(u32 classid) -{ - return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) && - TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS); -} +struct tc_cbs_qopt_offload { + u8 enable; + s32 queue; + s32 hicredit; + s32 locredit; + s32 idleslope; + s32 sendslope; +}; #endif diff --git a/include/net/red.h b/include/net/red.h index 9a9347710701..9665582c4687 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -168,6 +168,17 @@ static inline void red_set_vars(struct red_vars *v) v->qcount = -1; } +static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog) +{ + if (fls(qth_min) + Wlog > 32) + return false; + if (fls(qth_max) + Wlog > 32) + return false; + if (qth_max < qth_min) + return false; + return true; +} + static inline void red_set_parms(struct red_parms *p, u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog, u8 Scell_log, u8 *stab, u32 max_P) @@ -179,7 +190,7 @@ static inline void red_set_parms(struct red_parms *p, p->qth_max = qth_max << Wlog; p->Wlog = Wlog; p->Plog = Plog; - if (delta < 0) + if (delta <= 0) delta = 1; p->qth_delta = delta; if (!max_P) { diff --git a/include/net/regulatory.h b/include/net/regulatory.h index ebc5a2ed8631..f83cacce3308 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -78,7 +78,7 @@ struct regulatory_request { int wiphy_idx; enum nl80211_reg_initiator initiator; enum nl80211_user_reg_hint_type user_reg_hint_type; - char alpha2[2]; + char alpha2[3]; enum nl80211_dfs_regions dfs_region; bool intersect; bool processed; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 23e22054aa60..347015515a7d 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -150,6 +150,8 @@ struct fastopen_queue { spinlock_t lock; int qlen; /* # of pending (TCP_SYN_RECV) reqs */ int max_qlen; /* != 0 iff TFO is currently enabled */ + + struct tcp_fastopen_context __rcu *ctx; /* cipher context for cookie */ }; /** struct request_sock_queue - queue of request_socks diff --git a/include/net/route.h b/include/net/route.h index d538e6db1afe..1eb9ce470e25 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -217,7 +217,7 @@ unsigned int inet_addr_type_dev_table(struct net *net, const struct net_device *dev, __be32 addr); void ip_rt_multicast_event(struct in_device *); -int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg); +int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 7b938fbeebc1..14b6b3af8918 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -13,10 +13,10 @@ enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = 1, }; -int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); void rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); +int rtnl_register_module(struct module *owner, int protocol, int msgtype, + rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); @@ -94,9 +94,6 @@ struct rtnl_link_ops { int slave_maxtype; const struct nla_policy *slave_policy; - int (*slave_validate)(struct nlattr *tb[], - struct nlattr *data[], - struct netlink_ext_ack *extack); int (*slave_changelink)(struct net_device *dev, struct net_device *slave_dev, struct nlattr *tb[], @@ -155,8 +152,6 @@ struct rtnl_af_ops { size_t (*get_stats_af_size)(const struct net_device *dev); }; -void __rtnl_af_unregister(struct rtnl_af_ops *ops); - void rtnl_af_register(struct rtnl_af_ops *ops); void rtnl_af_unregister(struct rtnl_af_ops *ops); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 236bfe5b2ffe..e2ab13687fb9 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -71,6 +71,8 @@ struct Qdisc { * qdisc_tree_decrease_qlen() should stop. */ #define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ +#define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */ +#define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; @@ -87,15 +89,14 @@ struct Qdisc { /* * For performance sake on SMP, we put highly modified fields at the end */ - struct sk_buff *gso_skb ____cacheline_aligned_in_smp; + struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; seqcount_t running; struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; - struct sk_buff *skb_bad_txq; - struct rcu_head rcu_head; + struct sk_buff_head skb_bad_txq; int padded; refcount_t refcnt; @@ -150,19 +151,23 @@ struct Qdisc_class_ops { /* Child qdisc manipulation */ struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *); int (*graft)(struct Qdisc *, unsigned long cl, - struct Qdisc *, struct Qdisc **); + struct Qdisc *, struct Qdisc **, + struct netlink_ext_ack *extack); struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl); void (*qlen_notify)(struct Qdisc *, unsigned long); /* Class manipulation routines */ unsigned long (*find)(struct Qdisc *, u32 classid); int (*change)(struct Qdisc *, u32, u32, - struct nlattr **, unsigned long *); + struct nlattr **, unsigned long *, + struct netlink_ext_ack *); int (*delete)(struct Qdisc *, unsigned long); void (*walk)(struct Qdisc *, struct qdisc_walker * arg); /* Filter manipulation */ - struct tcf_block * (*tcf_block)(struct Qdisc *, unsigned long); + struct tcf_block * (*tcf_block)(struct Qdisc *sch, + unsigned long arg, + struct netlink_ext_ack *extack); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); void (*unbind_tcf)(struct Qdisc *, unsigned long); @@ -179,6 +184,7 @@ struct Qdisc_ops { const struct Qdisc_class_ops *cl_ops; char id[IFNAMSIZ]; int priv_size; + unsigned int static_flags; int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, @@ -186,15 +192,26 @@ struct Qdisc_ops { struct sk_buff * (*dequeue)(struct Qdisc *); struct sk_buff * (*peek)(struct Qdisc *); - int (*init)(struct Qdisc *, struct nlattr *arg); + int (*init)(struct Qdisc *sch, struct nlattr *arg, + struct netlink_ext_ack *extack); void (*reset)(struct Qdisc *); void (*destroy)(struct Qdisc *); - int (*change)(struct Qdisc *, struct nlattr *arg); - void (*attach)(struct Qdisc *); + int (*change)(struct Qdisc *sch, + struct nlattr *arg, + struct netlink_ext_ack *extack); + void (*attach)(struct Qdisc *sch); + int (*change_tx_queue_len)(struct Qdisc *, unsigned int); int (*dump)(struct Qdisc *, struct sk_buff *); int (*dump_stats)(struct Qdisc *, struct gnet_dump *); + void (*ingress_block_set)(struct Qdisc *sch, + u32 block_index); + void (*egress_block_set)(struct Qdisc *sch, + u32 block_index); + u32 (*ingress_block_get)(struct Qdisc *sch); + u32 (*egress_block_get)(struct Qdisc *sch); + struct module *owner; }; @@ -217,14 +234,18 @@ struct tcf_proto_ops { const struct tcf_proto *, struct tcf_result *); int (*init)(struct tcf_proto*); - void (*destroy)(struct tcf_proto*); + void (*destroy)(struct tcf_proto *tp, + struct netlink_ext_ack *extack); void* (*get)(struct tcf_proto*, u32 handle); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - void **, bool); - int (*delete)(struct tcf_proto*, void *, bool*); + void **, bool, + struct netlink_ext_ack *); + int (*delete)(struct tcf_proto *tp, void *arg, + bool *last, + struct netlink_ext_ack *); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); void (*bind_class)(void *, u32, unsigned long); @@ -246,8 +267,6 @@ struct tcf_proto { /* All the rest */ u32 prio; - u32 classid; - struct Qdisc *q; void *data; const struct tcf_proto_ops *ops; struct tcf_chain *chain; @@ -262,9 +281,11 @@ struct qdisc_skb_cb { unsigned char data[QDISC_CB_PRIV_LEN]; }; +typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); + struct tcf_chain { struct tcf_proto __rcu *filter_chain; - struct tcf_proto __rcu **p_filter_chain; + struct list_head filter_chain_list; struct list_head list; struct tcf_block *block; u32 index; /* chain index */ @@ -273,9 +294,33 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; - struct work_struct work; + u32 index; /* block index for shared blocks */ + unsigned int refcnt; + struct net *net; + struct Qdisc *q; + struct list_head cb_list; + struct list_head owner_list; + bool keep_dst; + unsigned int offloadcnt; /* Number of oddloaded filters */ + unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ }; +static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) +{ + if (*flags & TCA_CLS_FLAGS_IN_HW) + return; + *flags |= TCA_CLS_FLAGS_IN_HW; + block->offloadcnt++; +} + +static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) +{ + if (!(*flags & TCA_CLS_FLAGS_IN_HW)) + return; + *flags &= ~TCA_CLS_FLAGS_IN_HW; + block->offloadcnt--; +} + static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; @@ -284,11 +329,31 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) BUILD_BUG_ON(sizeof(qcb->data) < sz); } +static inline int qdisc_qlen_cpu(const struct Qdisc *q) +{ + return this_cpu_ptr(q->cpu_qstats)->qlen; +} + static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; } +static inline int qdisc_qlen_sum(const struct Qdisc *q) +{ + __u32 qlen = 0; + int i; + + if (q->flags & TCQ_F_NOLOCK) { + for_each_possible_cpu(i) + qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; + } else { + qlen = q->q.qlen; + } + + return qlen; +} + static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb) { return (struct qdisc_skb_cb *)skb->cb; @@ -361,9 +426,6 @@ static inline void sch_tree_unlock(const struct Qdisc *q) spin_unlock_bh(qdisc_root_sleeping_lock(q)); } -#define tcf_tree_lock(tp) sch_tree_lock((tp)->q) -#define tcf_tree_unlock(tp) sch_tree_unlock((tp)->q) - extern struct Qdisc noop_qdisc; extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_fast_ops; @@ -413,6 +475,13 @@ qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id) return NULL; } +static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid) +{ + u32 hwtc = TC_H_MIN(classid) - TC_H_MIN_PRIORITY; + + return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL; +} + int qdisc_class_hash_init(struct Qdisc_class_hash *); void qdisc_class_hash_insert(struct Qdisc_class_hash *, struct Qdisc_class_common *); @@ -421,6 +490,7 @@ void qdisc_class_hash_remove(struct Qdisc_class_hash *, void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); void qdisc_class_hash_destroy(struct Qdisc_class_hash *); +int dev_qdisc_change_tx_queue_len(struct net_device *dev); void dev_init_scheduler(struct net_device *dev); void dev_shutdown(struct net_device *dev); void dev_activate(struct net_device *dev); @@ -433,9 +503,12 @@ void qdisc_destroy(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, unsigned int len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, - const struct Qdisc_ops *ops); + const struct Qdisc_ops *ops, + struct netlink_ext_ack *extack); +void qdisc_free(struct Qdisc *qdisc); struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, - const struct Qdisc_ops *ops, u32 parentid); + const struct Qdisc_ops *ops, u32 parentid, + struct netlink_ext_ack *extack); void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab); int skb_do_redirect(struct sk_buff *); @@ -621,12 +694,39 @@ static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch, sch->qstats.backlog -= qdisc_pkt_len(skb); } +static inline void qdisc_qstats_cpu_backlog_dec(struct Qdisc *sch, + const struct sk_buff *skb) +{ + this_cpu_sub(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); +} + static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch, const struct sk_buff *skb) { sch->qstats.backlog += qdisc_pkt_len(skb); } +static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch, + const struct sk_buff *skb) +{ + this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); +} + +static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch) +{ + this_cpu_inc(sch->cpu_qstats->qlen); +} + +static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch) +{ + this_cpu_dec(sch->cpu_qstats->qlen); +} + +static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch) +{ + this_cpu_inc(sch->cpu_qstats->requeues); +} + static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count) { sch->qstats.drops += count; @@ -757,26 +857,30 @@ static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) /* generic pseudo peek method for non-work-conserving qdisc */ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) { + struct sk_buff *skb = skb_peek(&sch->gso_skb); + /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ - if (!sch->gso_skb) { - sch->gso_skb = sch->dequeue(sch); - if (sch->gso_skb) { + if (!skb) { + skb = sch->dequeue(sch); + + if (skb) { + __skb_queue_head(&sch->gso_skb, skb); /* it's still part of the queue */ - qdisc_qstats_backlog_inc(sch, sch->gso_skb); + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; } } - return sch->gso_skb; + return skb; } /* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) { - struct sk_buff *skb = sch->gso_skb; + struct sk_buff *skb = skb_peek(&sch->gso_skb); if (skb) { - sch->gso_skb = NULL; + skb = __skb_dequeue(&sch->gso_skb); qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { @@ -834,6 +938,14 @@ static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) qdisc_qstats_drop(sch); } +static inline int qdisc_drop_cpu(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + __qdisc_drop(skb, to_free); + qdisc_qstats_cpu_drop(sch); + + return NET_XMIT_DROP; +} static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) @@ -896,4 +1008,36 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } +/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc. + * The fast path only needs to access filter list and to update stats + */ +struct mini_Qdisc { + struct tcf_proto *filter_list; + struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_queue __percpu *cpu_qstats; + struct rcu_head rcu; +}; + +static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq, + const struct sk_buff *skb) +{ + bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb); +} + +static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq) +{ + this_cpu_inc(miniq->cpu_qstats->drops); +} + +struct mini_Qdisc_pair { + struct mini_Qdisc miniq1; + struct mini_Qdisc miniq2; + struct mini_Qdisc __rcu **p_miniq; +}; + +void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, + struct tcf_proto *tp_head); +void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, + struct mini_Qdisc __rcu **p_miniq); + #endif diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 4a5b9a306c69..32ee65a30aff 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -48,31 +48,32 @@ static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum) /* This uses the crypto implementation of crc32c, which is either * implemented w/ hardware support or resolves to __crc32c_le(). */ - return crc32c(sum, buff, len); + return (__force __wsum)crc32c((__force __u32)sum, buff, len); } static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, int offset, int len) { - return __crc32c_le_combine(csum, csum2, len); + return (__force __wsum)__crc32c_le_combine((__force __u32)csum, + (__force __u32)csum2, len); } static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { struct sctphdr *sh = sctp_hdr(skb); - __le32 ret, old = sh->checksum; const struct skb_checksum_ops ops = { .update = sctp_csum_update, .combine = sctp_csum_combine, }; + __le32 old = sh->checksum; + __wsum new; sh->checksum = 0; - ret = cpu_to_le32(~__skb_checksum(skb, offset, skb->len - offset, - ~(__u32)0, &ops)); + new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, &ops); sh->checksum = old; - return ret; + return cpu_to_le32((__force __u32)new); } #endif /* __sctp_checksum_h__ */ diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index deaafa9b09cb..20ff237c5eb2 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -145,12 +145,13 @@ SCTP_SUBTYPE_CONSTRUCTOR(OTHER, enum sctp_event_other, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) -#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA) +#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA || \ + a->chunk_hdr->type == SCTP_CID_I_DATA) /* Calculate the actual data size in a data chunk */ -#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\ - - (unsigned long)(c->chunk_hdr)\ - - sizeof(struct sctp_data_chunk))) +#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end) - \ + (unsigned long)(c->chunk_hdr) - \ + sctp_datachk_len(&c->asoc->stream))) /* Internal error codes */ enum sctp_ierror { diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index d7d8cba01469..f7ae6b0a21d0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -107,7 +107,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); void sctp_data_ready(struct sock *sk); -unsigned int sctp_poll(struct file *file, struct socket *sock, +__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); void sctp_copy_sock(struct sock *newsk, struct sock *sk, @@ -116,7 +116,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); -int sctp_transport_walk_start(struct rhashtable_iter *iter); +void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter); @@ -195,6 +195,11 @@ void sctp_remaddr_proc_exit(struct net *net); int sctp_offload_init(void); /* + * sctp/stream_sched.c + */ +void sctp_sched_ops_init(void); + +/* * sctp/stream.c */ int sctp_send_reset_streams(struct sctp_association *asoc, @@ -439,12 +444,13 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) int frag = pmtu; frag -= sp->pf->af->net_header_len; - frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk); + frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream); if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); - frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); + frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN - + sctp_datachk_len(&asoc->stream))); return frag; } diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 88233cf8b8d4..2883c43c5258 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -72,7 +72,7 @@ typedef enum sctp_disposition (sctp_state_fn_t) ( const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); -typedef void (sctp_timer_event_t) (unsigned long); +typedef void (sctp_timer_event_t) (struct timer_list *); struct sctp_sm_table_entry { sctp_state_fn_t *fn; const char *name; @@ -197,10 +197,14 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc, struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc, const __u32 lowest_tsn, const struct sctp_chunk *chunk); -struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, +struct sctp_chunk *sctp_make_idata(const struct sctp_association *asoc, + __u8 flags, int paylen, gfp_t gfp); +struct sctp_chunk *sctp_make_ifwdtsn(const struct sctp_association *asoc, + __u32 new_cum_tsn, size_t nstreams, + struct sctp_ifwdtsn_skip *skiplist); +struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc, const struct sctp_sndrcvinfo *sinfo, - int len, const __u8 flags, - __u16 ssn, gfp_t gfp); + int len, __u8 flags, gfp_t gfp); struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, const __u32 lowest_tsn); struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc); @@ -314,10 +318,10 @@ int sctp_do_sm(struct net *net, enum sctp_event event_type, void *event_arg, gfp_t gfp); /* 2nd level prototypes */ -void sctp_generate_t3_rtx_event(unsigned long peer); -void sctp_generate_heartbeat_event(unsigned long peer); -void sctp_generate_reconf_event(unsigned long peer); -void sctp_generate_proto_unreach_event(unsigned long peer); +void sctp_generate_t3_rtx_event(struct timer_list *t); +void sctp_generate_heartbeat_event(struct timer_list *t); +void sctp_generate_reconf_event(struct timer_list *t); +void sctp_generate_proto_unreach_event(struct timer_list *t); void sctp_ootb_pkt_free(struct sctp_packet *packet); @@ -342,7 +346,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) __u16 size; size = ntohs(chunk->chunk_hdr->length); - size -= sizeof(struct sctp_data_chunk); + size -= sctp_datahdr_len(&chunk->asoc->stream); return size; } @@ -358,6 +362,12 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) typecheck(__u32, b) && \ ((__s32)((a) - (b)) <= 0)) +/* Compare two MIDs */ +#define MID_lt(a, b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) < 0)) + /* Compare two SSNs */ #define SSN_lt(a,b) \ (typecheck(__u16, a) && \ diff --git a/include/net/sctp/stream_interleave.h b/include/net/sctp/stream_interleave.h new file mode 100644 index 000000000000..6657711c8bc4 --- /dev/null +++ b/include/net/sctp/stream_interleave.h @@ -0,0 +1,61 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * These are definitions used by the stream schedulers, defined in RFC + * draft ndata (https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-11) + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. + * + * Please send any bug reports or fixes you make to the + * email addresses: + * lksctp developers <linux-sctp@vger.kernel.org> + * + * Written or modified by: + * Xin Long <lucien.xin@gmail.com> + */ + +#ifndef __sctp_stream_interleave_h__ +#define __sctp_stream_interleave_h__ + +struct sctp_stream_interleave { + __u16 data_chunk_len; + __u16 ftsn_chunk_len; + /* (I-)DATA process */ + struct sctp_chunk *(*make_datafrag)(const struct sctp_association *asoc, + const struct sctp_sndrcvinfo *sinfo, + int len, __u8 flags, gfp_t gfp); + void (*assign_number)(struct sctp_chunk *chunk); + bool (*validate_data)(struct sctp_chunk *chunk); + int (*ulpevent_data)(struct sctp_ulpq *ulpq, + struct sctp_chunk *chunk, gfp_t gfp); + int (*enqueue_event)(struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event); + void (*renege_events)(struct sctp_ulpq *ulpq, + struct sctp_chunk *chunk, gfp_t gfp); + void (*start_pd)(struct sctp_ulpq *ulpq, gfp_t gfp); + void (*abort_pd)(struct sctp_ulpq *ulpq, gfp_t gfp); + /* (I-)FORWARD-TSN process */ + void (*generate_ftsn)(struct sctp_outq *q, __u32 ctsn); + bool (*validate_ftsn)(struct sctp_chunk *chunk); + void (*report_ftsn)(struct sctp_ulpq *ulpq, __u32 ftsn); + void (*handle_ftsn)(struct sctp_ulpq *ulpq, + struct sctp_chunk *chunk); +}; + +void sctp_stream_interleave_init(struct sctp_stream *stream); + +#endif /* __sctp_stream_interleave_h__ */ diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h new file mode 100644 index 000000000000..5c5da48f65e7 --- /dev/null +++ b/include/net/sctp/stream_sched.h @@ -0,0 +1,77 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * These are definitions used by the stream schedulers, defined in RFC + * draft ndata (https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-11) + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. + * + * Please send any bug reports or fixes you make to the + * email addresses: + * lksctp developers <linux-sctp@vger.kernel.org> + * + * Written or modified by: + * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> + */ + +#ifndef __sctp_stream_sched_h__ +#define __sctp_stream_sched_h__ + +struct sctp_sched_ops { + /* Property handling for a given stream */ + int (*set)(struct sctp_stream *stream, __u16 sid, __u16 value, + gfp_t gfp); + int (*get)(struct sctp_stream *stream, __u16 sid, __u16 *value); + + /* Init the specific scheduler */ + int (*init)(struct sctp_stream *stream); + /* Init a stream */ + int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp); + /* Frees the entire thing */ + void (*free)(struct sctp_stream *stream); + + /* Enqueue a chunk */ + void (*enqueue)(struct sctp_outq *q, struct sctp_datamsg *msg); + /* Dequeue a chunk */ + struct sctp_chunk *(*dequeue)(struct sctp_outq *q); + /* Called only if the chunk fit the packet */ + void (*dequeue_done)(struct sctp_outq *q, struct sctp_chunk *chunk); + /* Sched all chunks already enqueued */ + void (*sched_all)(struct sctp_stream *steam); + /* Unched all chunks already enqueued */ + void (*unsched_all)(struct sctp_stream *steam); +}; + +int sctp_sched_set_sched(struct sctp_association *asoc, + enum sctp_sched_type sched); +int sctp_sched_get_sched(struct sctp_association *asoc); +int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid, + __u16 value, gfp_t gfp); +int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid, + __u16 *value); +void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch); + +void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch); +int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp); +struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream); + +void sctp_sched_ops_register(enum sctp_sched_type sched, + struct sctp_sched_ops *sched_ops); +void sctp_sched_ops_prio_init(void); +void sctp_sched_ops_rr_init(void); + +#endif /* __sctp_stream_sched_h__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0477945de1a3..03e92dda1813 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -89,6 +89,7 @@ struct sctp_stream; #include <net/sctp/tsnmap.h> #include <net/sctp/ulpevent.h> #include <net/sctp/ulpqueue.h> +#include <net/sctp/stream_interleave.h> /* Structures useful for managing bind/connect. */ @@ -202,12 +203,17 @@ struct sctp_sock { /* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */ __u32 param_flags; - struct sctp_initmsg initmsg; struct sctp_rtoinfo rtoinfo; struct sctp_paddrparams paddrparam; - struct sctp_event_subscribe subscribe; struct sctp_assocparams assocparams; + /* + * These two structures must be grouped together for the usercopy + * whitelist region. + */ + struct sctp_event_subscribe subscribe; + struct sctp_initmsg initmsg; + int user_frag; __u32 autoclose; @@ -217,6 +223,7 @@ struct sctp_sock { disable_fragments:1, v4mapped:1, frag_interleave:1, + strm_interleave:1, recvrcvinfo:1, recvnxtinfo:1, data_ready_signalled:1; @@ -380,6 +387,7 @@ struct sctp_sender_hb_info { int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp); +int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid); void sctp_stream_free(struct sctp_stream *stream); void sctp_stream_clear(struct sctp_stream *stream); void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new); @@ -396,6 +404,28 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new); #define sctp_ssn_skip(stream, type, sid, ssn) \ ((stream)->type[sid].ssn = ssn + 1) +/* What is the current MID number for this stream? */ +#define sctp_mid_peek(stream, type, sid) \ + ((stream)->type[sid].mid) + +/* Return the next MID number for this stream. */ +#define sctp_mid_next(stream, type, sid) \ + ((stream)->type[sid].mid++) + +/* Skip over this mid and all below. */ +#define sctp_mid_skip(stream, type, sid, mid) \ + ((stream)->type[sid].mid = mid + 1) + +#define sctp_stream_in(asoc, sid) (&(asoc)->stream.in[sid]) + +/* What is the current MID_uo number for this stream? */ +#define sctp_mid_uo_peek(stream, type, sid) \ + ((stream)->type[sid].mid_uo) + +/* Return the next MID_uo number for this stream. */ +#define sctp_mid_uo_next(stream, type, sid) \ + ((stream)->type[sid].mid_uo++) + /* * Pointers to address related SCTP functions. * (i.e. things that depend on the address family.) @@ -502,7 +532,8 @@ struct sctp_datamsg { /* Did the messenge fail to send? */ int send_error; u8 send_failed:1, - can_delay; /* should this message be Nagle delayed */ + can_delay:1, /* should this message be Nagle delayed */ + abandoned:1; /* should this message be abandoned */ }; struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *, @@ -529,8 +560,12 @@ struct sctp_chunk { /* How many times this chunk have been sent, for prsctp RTX policy */ int sent_count; - /* This is our link to the per-transport transmitted list. */ - struct list_head transmitted_list; + union { + /* This is our link to the per-transport transmitted list. */ + struct list_head transmitted_list; + /* List in specific stream outq */ + struct list_head stream_list; + }; /* This field is used by chunks that hold fragmented data. * For the first fragment this is the list that holds the rest of @@ -568,6 +603,8 @@ struct sctp_chunk { struct sctp_addiphdr *addip_hdr; struct sctp_fwdtsn_hdr *fwdtsn_hdr; struct sctp_authhdr *auth_hdr; + struct sctp_idatahdr *idata_hdr; + struct sctp_ifwdtsn_hdr *ifwdtsn_hdr; } subh; __u8 *chunk_end; @@ -614,6 +651,7 @@ struct sctp_chunk { __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ has_tsn:1, /* Does this chunk have a TSN yet? */ has_ssn:1, /* Does this chunk have a SSN yet? */ +#define has_mid has_ssn singleton:1, /* Only chunk in the packet? */ end_of_packet:1, /* Last chunk in the packet? */ ecn_ce_done:1, /* Have we processed the ECN CE bit? */ @@ -640,6 +678,11 @@ void sctp_init_addrs(struct sctp_chunk *, union sctp_addr *, union sctp_addr *); const union sctp_addr *sctp_source(const struct sctp_chunk *chunk); +static inline __u16 sctp_chunk_stream_no(struct sctp_chunk *ch) +{ + return ntohs(ch->subh.data_hdr->stream); +} + enum { SCTP_ADDR_NEW, /* new address added to assoc/ep */ SCTP_ADDR_SRC, /* address can be used as source */ @@ -955,7 +998,7 @@ void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *t); -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); @@ -1012,6 +1055,9 @@ struct sctp_outq { /* Data pending that has never been transmitted. */ struct list_head out_chunk_list; + /* Stream scheduler being used */ + struct sctp_sched_ops *sched; + unsigned int out_qlen; /* Total length of queued data chunks. */ /* Error of send failed, may used in SCTP_SEND_FAILED event. */ @@ -1059,6 +1105,7 @@ void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); void sctp_prsctp_prune(struct sctp_association *asoc, struct sctp_sndrcvinfo *sinfo, int msg_len); +void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); /* Uncork and flush an outqueue. */ static inline void sctp_outq_cork(struct sctp_outq *q) { @@ -1315,15 +1362,53 @@ struct sctp_inithdr_host { __u32 initial_tsn; }; +struct sctp_stream_priorities { + /* List of priorities scheduled */ + struct list_head prio_sched; + /* List of streams scheduled */ + struct list_head active; + /* The next stream stream in line */ + struct sctp_stream_out_ext *next; + __u16 prio; +}; + +struct sctp_stream_out_ext { + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; + __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + struct list_head outq; /* chunks enqueued by this stream */ + union { + struct { + /* Scheduled streams list */ + struct list_head prio_list; + struct sctp_stream_priorities *prio_head; + }; + /* Fields used by RR scheduler */ + struct { + struct list_head rr_list; + }; + }; +}; + struct sctp_stream_out { - __u16 ssn; - __u8 state; - __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; - __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + union { + __u32 mid; + __u16 ssn; + }; + __u32 mid_uo; + struct sctp_stream_out_ext *ext; + __u8 state; }; struct sctp_stream_in { - __u16 ssn; + union { + __u32 mid; + __u16 ssn; + }; + __u32 mid_uo; + __u32 fsn; + __u32 fsn_uo; + char pd_mode; + char pd_mode_uo; }; struct sctp_stream { @@ -1331,11 +1416,48 @@ struct sctp_stream { struct sctp_stream_in *in; __u16 outcnt; __u16 incnt; + /* Current stream being sent, if any */ + struct sctp_stream_out *out_curr; + union { + /* Fields used by priority scheduler */ + struct { + /* List of priorities scheduled */ + struct list_head prio_list; + }; + /* Fields used by RR scheduler */ + struct { + /* List of streams scheduled */ + struct list_head rr_list; + /* The next stream stream in line */ + struct sctp_stream_out_ext *rr_next; + }; + }; + struct sctp_stream_interleave *si; }; #define SCTP_STREAM_CLOSED 0x00 #define SCTP_STREAM_OPEN 0x01 +static inline __u16 sctp_datachk_len(const struct sctp_stream *stream) +{ + return stream->si->data_chunk_len; +} + +static inline __u16 sctp_datahdr_len(const struct sctp_stream *stream) +{ + return stream->si->data_chunk_len - sizeof(struct sctp_chunkhdr); +} + +static inline __u16 sctp_ftsnchk_len(const struct sctp_stream *stream) +{ + return stream->si->ftsn_chunk_len; +} + +static inline __u16 sctp_ftsnhdr_len(const struct sctp_stream *stream) +{ + return stream->si->ftsn_chunk_len - sizeof(struct sctp_chunkhdr); +} + /* SCTP_GET_ASSOC_STATS counters */ struct sctp_priv_assoc_stats { /* Maximum observed rto in the association during subsequent @@ -1884,6 +2006,7 @@ struct sctp_association { __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ temp:1, /* Is it a temporary association? */ force_delay:1, + intl_enable:1, prsctp_enable:1, reconf_enable:1; diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 231dc42f1da6..51b4e0626c34 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -45,19 +45,29 @@ /* A structure to carry information to the ULP (e.g. Sockets API) */ /* Warning: This sits inside an skb.cb[] area. Be very careful of * growing this structure as it is at the maximum limit now. + * + * sctp_ulpevent is saved in sk->cb(48 bytes), whose last 4 bytes + * have been taken by sock_skb_cb, So here it has to use 'packed' + * to make sctp_ulpevent fit into the rest 44 bytes. */ struct sctp_ulpevent { struct sctp_association *asoc; struct sctp_chunk *chunk; unsigned int rmem_len; - __u32 ppid; + union { + __u32 mid; + __u16 ssn; + }; + union { + __u32 ppid; + __u32 fsn; + }; __u32 tsn; __u32 cumtsn; __u16 stream; - __u16 ssn; __u16 flags; __u16 msg_flags; -}; +} __packed; /* Retrieve the skb this event sits inside of. */ static inline struct sk_buff *sctp_event2skb(const struct sctp_ulpevent *ev) @@ -112,7 +122,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event( struct sctp_ulpevent *sctp_ulpevent_make_pdapi( const struct sctp_association *asoc, - __u32 indication, gfp_t gfp); + __u32 indication, __u32 sid, __u32 seq, + __u32 flags, gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_adaptation_indication( const struct sctp_association *asoc, gfp_t gfp); @@ -140,6 +151,10 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event( const struct sctp_association *asoc, __u16 flags, __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp); +struct sctp_ulpevent *sctp_make_reassembled_event( + struct net *net, struct sk_buff_head *queue, + struct sk_buff *f_frag, struct sk_buff *l_frag); + void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *); void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event, diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index e0dce07b8794..bb0ecba3db2b 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -45,6 +45,7 @@ struct sctp_ulpq { char pd_mode; struct sctp_association *asoc; struct sk_buff_head reasm; + struct sk_buff_head reasm_uo; struct sk_buff_head lobby; }; @@ -76,11 +77,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc); void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn); void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *, __u32); -#endif /* __sctp_ulpqueue_h__ */ - - - - - +__u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, + struct sk_buff_head *list, __u16 needed); +#endif /* __sctp_ulpqueue_h__ */ diff --git a/include/net/sock.h b/include/net/sock.h index a6b9a8d1a6df..169c92afcafa 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -60,7 +60,7 @@ #include <linux/sched.h> #include <linux/wait.h> #include <linux/cgroup-defs.h> - +#include <linux/rbtree.h> #include <linux/filter.h> #include <linux/rculist_nulls.h> #include <linux/poll.h> @@ -72,6 +72,7 @@ #include <net/tcp_states.h> #include <linux/net_tstamp.h> #include <net/smc.h> +#include <net/l3mdev.h> /* * This structure really needs to be cleaned up. @@ -267,6 +268,7 @@ struct sock_common { * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build * @sk_gso_max_segs: Maximum number of GSO segments + * @sk_pacing_shift: scaling factor for TCP Small Queues * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct @@ -397,7 +399,10 @@ struct sock { int sk_wmem_queued; refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; - struct sk_buff *sk_send_head; + union { + struct sk_buff *sk_send_head; + struct rb_root tcp_rtx_queue; + }; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; @@ -436,7 +441,6 @@ struct sock { #define SK_FL_TYPE_MASK 0xffff0000 #endif - kmemcheck_bitfield_begin(flags); unsigned int sk_padding : 1, sk_kern_sock : 1, sk_no_check_tx : 1, @@ -445,9 +449,8 @@ struct sock { sk_protocol : 8, sk_type : 16; #define SK_PROTOCOL_MAX U8_MAX - kmemcheck_bitfield_end(flags); - u16 sk_gso_max_segs; + u8 sk_pacing_shift; unsigned long sk_lingertime; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; @@ -683,11 +686,7 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { - if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && - sk->sk_family == AF_INET6) - hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); - else - hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) @@ -734,10 +733,10 @@ static inline void sk_add_bind_node(struct sock *sk, * */ #define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \ - for (pos = rcu_dereference((head)->first); \ + for (pos = rcu_dereference(hlist_first_rcu(head)); \ pos != NULL && \ ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ - pos = rcu_dereference(pos->next)) + pos = rcu_dereference(hlist_next_rcu(pos))) static inline struct user_namespace *sk_user_ns(struct sock *sk) { @@ -1098,14 +1097,20 @@ struct proto { */ unsigned long *memory_pressure; long *sysctl_mem; + int *sysctl_wmem; int *sysctl_rmem; + u32 sysctl_wmem_offset; + u32 sysctl_rmem_offset; + int max_header; bool no_autobind; struct kmem_cache *slab; unsigned int obj_size; - int slab_flags; + slab_flags_t slab_flags; + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ struct percpu_counter *orphan_count; @@ -1260,6 +1265,7 @@ proto_memory_pressure(struct proto *prot) /* Called with local bh disabled */ void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); int sock_prot_inuse_get(struct net *net, struct proto *proto); +int sock_inuse_get(struct net *net); #else static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc) @@ -1443,10 +1449,8 @@ do { \ } while (0) #ifdef CONFIG_LOCKDEP -static inline bool lockdep_sock_is_held(const struct sock *csk) +static inline bool lockdep_sock_is_held(const struct sock *sk) { - struct sock *sk = (struct sock *)csk; - return lockdep_is_held(&sk->sk_lock) || lockdep_is_held(&sk->sk_lock.slock); } @@ -1512,6 +1516,11 @@ static inline bool sock_owned_by_user(const struct sock *sk) return sk->sk_lock.owned; } +static inline bool sock_owned_by_user_nocheck(const struct sock *sk) +{ + return sk->sk_lock.owned; +} + /* no reclassification while locks are held */ static inline bool sock_allow_reclassification(const struct sock *csk) { @@ -1576,7 +1585,7 @@ int sock_no_connect(struct socket *, struct sockaddr *, int, int); int sock_no_socketpair(struct socket *, struct socket *); int sock_no_accept(struct socket *, struct socket *, int, bool); int sock_no_getname(struct socket *, struct sockaddr *, int *, int); -unsigned int sock_no_poll(struct file *, struct socket *, +__poll_t sock_no_poll(struct file *, struct socket *, struct poll_table_struct *); int sock_no_ioctl(struct socket *, unsigned int, unsigned long); int sock_no_listen(struct socket *, int); @@ -2330,31 +2339,6 @@ static inline bool sk_listener(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); } -/** - * sk_state_load - read sk->sk_state for lockless contexts - * @sk: socket pointer - * - * Paired with sk_state_store(). Used in places we do not hold socket lock : - * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... - */ -static inline int sk_state_load(const struct sock *sk) -{ - return smp_load_acquire(&sk->sk_state); -} - -/** - * sk_state_store - update sk->sk_state - * @sk: socket pointer - * @newstate: new state - * - * Paired with sk_state_load(). Should be used in contexts where - * state change might impact lockless readers. - */ -static inline void sk_state_store(struct sock *sk, int newstate) -{ - smp_store_release(&sk->sk_state, newstate); -} - void sock_enable_timestamp(struct sock *sk, int flag); int sock_get_timestamp(struct sock *, struct timeval __user *); int sock_get_timestampns(struct sock *, struct timespec __user *); @@ -2387,4 +2371,52 @@ extern int sysctl_optmem_max; extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; +static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) +{ + /* Does this proto have per netns sysctl_wmem ? */ + if (proto->sysctl_wmem_offset) + return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); + + return *proto->sysctl_wmem; +} + +static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) +{ + /* Does this proto have per netns sysctl_rmem ? */ + if (proto->sysctl_rmem_offset) + return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); + + return *proto->sysctl_rmem; +} + +/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) + * Some wifi drivers need to tweak it to get more chunks. + * They can use this helper from their ndo_start_xmit() + */ +static inline void sk_pacing_shift_update(struct sock *sk, int val) +{ + if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val) + return; + sk->sk_pacing_shift = val; +} + +/* if a socket is bound to a device, check that the given device + * index is either the same or that the socket is bound to an L3 + * master device and the given device index is also enslaved to + * that L3 master + */ +static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif) +{ + int mdif; + + if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif) + return true; + + mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif); + if (mdif && mdif == sk->sk_bound_dev_if) + return true; + + return false; +} + #endif /* _SOCK_H */ diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d767b7991887..39bc855d7fee 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -51,6 +51,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, + SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, }; struct switchdev_attr { @@ -75,6 +76,7 @@ enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, SWITCHDEV_OBJ_ID_PORT_MDB, + SWITCHDEV_OBJ_ID_HOST_MDB, }; struct switchdev_obj { diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index 781f3433a0be..9470fd7e4350 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -6,10 +6,16 @@ #include <net/act_api.h> #include <linux/tc_act/tc_csum.h> +struct tcf_csum_params { + int action; + u32 update_flags; + struct rcu_head rcu; +}; + struct tcf_csum { struct tc_action common; - u32 update_flags; + struct tcf_csum_params __rcu *params; }; #define to_tcf_csum(a) ((struct tcf_csum *)a) @@ -24,7 +30,13 @@ static inline bool is_tcf_csum(const struct tc_action *a) static inline u32 tcf_csum_update_flags(const struct tc_action *a) { - return to_tcf_csum(a)->update_flags; + u32 update_flags; + + rcu_read_lock(); + update_flags = rcu_dereference(to_tcf_csum(a)->params)->update_flags; + rcu_read_unlock(); + + return update_flags; } #endif /* __NET_TC_CSUM_H */ diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index e82d93346b63..ef8dd0db70ce 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -34,6 +34,11 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act, return false; } +static inline bool is_tcf_gact_ok(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_OK, false); +} + static inline bool is_tcf_gact_shot(const struct tc_action *a) { return __is_tcf_gact_act(a, TC_ACT_SHOT, false); diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index ba6667125bcd..86d13b01b39d 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -7,12 +7,18 @@ #include <linux/rtnetlink.h> #include <linux/module.h> -struct tcf_ife_info { - struct tc_action common; +struct tcf_ife_params { u8 eth_dst[ETH_ALEN]; u8 eth_src[ETH_ALEN]; u16 eth_type; u16 flags; + + struct rcu_head rcu; +}; + +struct tcf_ife_info { + struct tc_action common; + struct tcf_ife_params __rcu *params; /* list of metaids allowed */ struct list_head metalist; }; @@ -41,7 +47,7 @@ struct tcf_meta_ops { struct module *owner; }; -#define MODULE_ALIAS_IFE_META(metan) MODULE_ALIAS("ifemeta" __stringify_1(metan)) +#define MODULE_ALIAS_IFE_META(metan) MODULE_ALIAS("ife-meta-" metan) int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi); diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index b2dbbfaefd22..a2e9cbca5c9e 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,7 +8,6 @@ struct tcf_mirred { struct tc_action common; int tcfm_eaction; - int tcfm_ifindex; bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; struct list_head tcfm_list; @@ -33,9 +32,9 @@ static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a) return false; } -static inline int tcf_mirred_ifindex(const struct tc_action *a) +static inline struct net_device *tcf_mirred_dev(const struct tc_action *a) { - return to_mirred(a)->tcfm_ifindex; + return rtnl_dereference(to_mirred(a)->tcfm_dev); } #endif /* __NET_TC_MIR_H */ diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h index 524cee4f4c81..01dbfea32672 100644 --- a/include/net/tc_act/tc_sample.h +++ b/include/net/tc_act/tc_sample.h @@ -14,7 +14,6 @@ struct tcf_sample { struct psample_group __rcu *psample_group; u32 psample_group_num; struct list_head tcfm_list; - struct rcu_head rcu; }; #define to_sample(a) ((struct tcf_sample *)a) diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index c2090df944ff..22ae260d6869 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -13,12 +13,17 @@ #include <net/act_api.h> #include <linux/tc_act/tc_vlan.h> +struct tcf_vlan_params { + int tcfv_action; + u16 tcfv_push_vid; + __be16 tcfv_push_proto; + u8 tcfv_push_prio; + struct rcu_head rcu; +}; + struct tcf_vlan { struct tc_action common; - int tcfv_action; - u16 tcfv_push_vid; - __be16 tcfv_push_proto; - u8 tcfv_push_prio; + struct tcf_vlan_params __rcu *vlan_p; }; #define to_vlan(a) ((struct tcf_vlan *)a) @@ -33,22 +38,45 @@ static inline bool is_tcf_vlan(const struct tc_action *a) static inline u32 tcf_vlan_action(const struct tc_action *a) { - return to_vlan(a)->tcfv_action; + u32 tcfv_action; + + rcu_read_lock(); + tcfv_action = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_action; + rcu_read_unlock(); + + return tcfv_action; } static inline u16 tcf_vlan_push_vid(const struct tc_action *a) { - return to_vlan(a)->tcfv_push_vid; + u16 tcfv_push_vid; + + rcu_read_lock(); + tcfv_push_vid = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_vid; + rcu_read_unlock(); + + return tcfv_push_vid; } static inline __be16 tcf_vlan_push_proto(const struct tc_action *a) { - return to_vlan(a)->tcfv_push_proto; + __be16 tcfv_push_proto; + + rcu_read_lock(); + tcfv_push_proto = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_proto; + rcu_read_unlock(); + + return tcfv_push_proto; } static inline u8 tcf_vlan_push_prio(const struct tc_action *a) { - return to_vlan(a)->tcfv_push_prio; -} + u8 tcfv_push_prio; + rcu_read_lock(); + tcfv_push_prio = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_prio; + rcu_read_unlock(); + + return tcfv_push_prio; +} #endif /* __NET_TC_VLAN_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index e6d0002a1b0b..e3fc667f9ac2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -45,9 +45,6 @@ #include <linux/seq_file.h> #include <linux/memcontrol.h> - -#include <linux/bpf.h> -#include <linux/filter.h> #include <linux/bpf-cgroup.h> extern struct inet_hashinfo tcp_hashinfo; @@ -191,6 +188,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); * experimental options. See draft-ietf-tcpm-experimental-options-00.txt */ #define TCPOPT_FASTOPEN_MAGIC 0xF989 +#define TCPOPT_SMC_MAGIC 0xE2D4C3D9 /* * TCP option lengths @@ -203,6 +201,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_MD5SIG 18 #define TCPOLEN_FASTOPEN_BASE 2 #define TCPOLEN_EXP_FASTOPEN_BASE 4 +#define TCPOLEN_EXP_SMC_BASE 6 /* But this is what stacks really send out. */ #define TCPOLEN_TSTAMP_ALIGNED 12 @@ -213,6 +212,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_SACK_PERBLOCK 8 #define TCPOLEN_MD5SIG_ALIGNED 20 #define TCPOLEN_MSS_ALIGNED 4 +#define TCPOLEN_EXP_SMC_BASE_ALIGNED 8 /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ @@ -240,41 +240,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_fastopen; -extern int sysctl_tcp_retrans_collapse; -extern int sysctl_tcp_stdurg; -extern int sysctl_tcp_rfc1337; -extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_fack; -extern int sysctl_tcp_reordering; -extern int sysctl_tcp_max_reordering; -extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; -extern int sysctl_tcp_wmem[3]; -extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_app_win; -extern int sysctl_tcp_adv_win_scale; -extern int sysctl_tcp_frto; -extern int sysctl_tcp_nometrics_save; -extern int sysctl_tcp_moderate_rcvbuf; -extern int sysctl_tcp_tso_win_divisor; -extern int sysctl_tcp_workaround_signed_windows; -extern int sysctl_tcp_slow_start_after_idle; -extern int sysctl_tcp_thin_linear_timeouts; -extern int sysctl_tcp_thin_dupack; -extern int sysctl_tcp_early_retrans; -extern int sysctl_tcp_recovery; -#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_limit_output_bytes; -extern int sysctl_tcp_challenge_ack_limit; -extern int sysctl_tcp_min_tso_segs; -extern int sysctl_tcp_min_rtt_wlen; -extern int sysctl_tcp_autocorking; -extern int sysctl_tcp_invalid_ratelimit; -extern int sysctl_tcp_pacing_ss_ratio; -extern int sysctl_tcp_pacing_ca_ratio; +#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ +#define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */ extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; @@ -414,10 +384,10 @@ void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); -void tcp_disable_fack(struct tcp_sock *tp); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); -unsigned int tcp_poll(struct file *file, struct socket *sock, +void tcp_init_transfer(struct sock *sk, int bpf_op); +__poll_t tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); @@ -551,7 +521,13 @@ void tcp_xmit_retransmit_queue(struct sock *); void tcp_simple_retransmit(struct sock *); void tcp_enter_recovery(struct sock *sk, bool ece_ack); int tcp_trim_head(struct sock *, struct sk_buff *, u32); -int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); +enum tcp_queue { + TCP_FRAG_IN_WRITE_QUEUE, + TCP_FRAG_IN_RTX_QUEUE, +}; +int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, + struct sk_buff *skb, u32 len, + unsigned int mss_now, gfp_t gfp); void tcp_send_probe0(struct sock *); void tcp_send_partial(struct sock *); @@ -563,7 +539,7 @@ void tcp_push_one(struct sock *, unsigned int mss_now); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); -bool tcp_schedule_loss_probe(struct sock *sk); +bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto); void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb); @@ -796,16 +772,10 @@ struct tcp_skb_cb { u16 tcp_gso_segs; u16 tcp_gso_size; }; - - /* Used to stash the receive timestamp while this skb is in the - * out of order queue, as skb->tstamp is overwritten by the - * rbnode. - */ - ktime_t swtstamp; }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ - __u8 sacked; /* State flags for SACK/FACK. */ + __u8 sacked; /* State flags for SACK. */ #define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */ #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ #define TCPCB_LOST 0x04 /* SKB is lost */ @@ -874,12 +844,11 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb) } #endif -/* TCP_SKB_CB reference means this can not be used from early demux */ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) return true; #endif return false; @@ -984,6 +953,7 @@ struct rate_sample { u32 prior_in_flight; /* in flight before this ACK */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ + bool is_ack_delayed; /* is this (likely) a delayed ACK? */ }; struct tcp_congestion_ops { @@ -1032,8 +1002,8 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); void tcp_assign_congestion_control(struct sock *sk); void tcp_init_congestion_control(struct sock *sk); void tcp_cleanup_congestion_control(struct sock *sk); -int tcp_set_default_congestion_control(const char *name); -void tcp_get_default_congestion_control(char *name); +int tcp_set_default_congestion_control(struct net *net, const char *name); +void tcp_get_default_congestion_control(struct net *net, char *name); void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); @@ -1047,7 +1017,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; struct tcp_congestion_ops *tcp_ca_find_key(u32 key); -u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); +u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); #ifdef CONFIG_INET char *tcp_ca_get_name_by_key(u32 key, char *buffer); #else @@ -1086,7 +1056,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct rate_sample *rs); + bool is_sack_reneg, struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK @@ -1095,7 +1065,6 @@ void tcp_rate_check_app_limited(struct sock *sk); * * tcp_is_sack - SACK enabled * tcp_is_reno - No SACK - * tcp_is_fack - FACK enabled, implies SACK enabled */ static inline int tcp_is_sack(const struct tcp_sock *tp) { @@ -1107,16 +1076,6 @@ static inline bool tcp_is_reno(const struct tcp_sock *tp) return !tcp_is_sack(tp); } -static inline bool tcp_is_fack(const struct tcp_sock *tp) -{ - return tp->rx_opt.sack_ok & TCP_FACK_ENABLED; -} - -static inline void tcp_enable_fack(struct tcp_sock *tp) -{ - tp->rx_opt.sack_ok |= TCP_FACK_ENABLED; -} - static inline unsigned int tcp_left_out(const struct tcp_sock *tp) { return tp->sacked_out + tp->lost_out; @@ -1309,7 +1268,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sysctl_tcp_slow_start_after_idle || tp->packets_out || + if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; @@ -1318,13 +1277,14 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) } /* Determine a window scaling and initial window to offer. */ -void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, +void tcp_select_initial_window(const struct sock *sk, int __space, + __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd); -static inline int tcp_win_from_space(int space) +static inline int tcp_win_from_space(const struct sock *sk, int space) { - int tcp_adv_win_scale = sysctl_tcp_adv_win_scale; + int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; return tcp_adv_win_scale <= 0 ? (space>>(-tcp_adv_win_scale)) : @@ -1334,13 +1294,13 @@ static inline int tcp_win_from_space(int space) /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { - return tcp_win_from_space(sk->sk_rcvbuf - + return tcp_win_from_space(sk, sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)); } static inline int tcp_full_space(const struct sock *sk) { - return tcp_win_from_space(sk->sk_rcvbuf); + return tcp_win_from_space(sk, sk->sk_rcvbuf); } extern void tcp_openreq_init_rwin(struct request_sock *req, @@ -1548,8 +1508,7 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, /* From tcp_fastopen.c */ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie, int *syn_loss, - unsigned long *last_syn_loss); + struct tcp_fastopen_cookie *cookie); void tcp_fastopen_cache_set(struct sock *sk, u16 mss, struct tcp_fastopen_cookie *cookie, bool syn_lost, u16 try_exp); @@ -1561,14 +1520,16 @@ struct tcp_fastopen_request { int copied; /* queued in tcp_connect() */ }; void tcp_free_fastopen_req(struct tcp_sock *tp); - -extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; -int tcp_fastopen_reset_cipher(void *key, unsigned int len); +void tcp_fastopen_destroy_cipher(struct sock *sk); +void tcp_fastopen_ctx_destroy(struct net *net); +int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, + void *key, unsigned int len); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct tcp_fastopen_cookie *foc); -void tcp_fastopen_init_key_once(bool publish); + struct tcp_fastopen_cookie *foc, + const struct dst_entry *dst); +void tcp_fastopen_init_key_once(struct net *net); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); bool tcp_fastopen_defer_connect(struct sock *sk, int *err); @@ -1585,7 +1546,7 @@ extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; void tcp_fastopen_active_disable(struct sock *sk); bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); -void tcp_fastopen_active_timeout_reset(void); +void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired); /* Latencies incurred by various limits for a sender. They are * chronograph-like stats that are mutually exclusive. @@ -1601,52 +1562,46 @@ enum tcp_chrono { void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type); void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type); -/* write queue abstraction */ -static inline void tcp_write_queue_purge(struct sock *sk) +/* This helper is needed, because skb->tcp_tsorted_anchor uses + * the same memory storage than skb->destructor/_skb_refdst + */ +static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb) { - struct sk_buff *skb; - - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) - sk_wmem_free_skb(sk, skb); - sk_mem_reclaim(sk); - tcp_clear_all_retrans_hints(tcp_sk(sk)); + skb->destructor = NULL; + skb->_skb_refdst = 0UL; } -static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) -{ - return skb_peek(&sk->sk_write_queue); +#define tcp_skb_tsorted_save(skb) { \ + unsigned long _save = skb->_skb_refdst; \ + skb->_skb_refdst = 0UL; + +#define tcp_skb_tsorted_restore(skb) \ + skb->_skb_refdst = _save; \ } -static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk) +void tcp_write_queue_purge(struct sock *sk); + +static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk) { - return skb_peek_tail(&sk->sk_write_queue); + return skb_rb_first(&sk->tcp_rtx_queue); } -static inline struct sk_buff *tcp_write_queue_next(const struct sock *sk, - const struct sk_buff *skb) +static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) { - return skb_queue_next(&sk->sk_write_queue, skb); + return skb_peek(&sk->sk_write_queue); } -static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk, - const struct sk_buff *skb) +static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk) { - return skb_queue_prev(&sk->sk_write_queue, skb); + return skb_peek_tail(&sk->sk_write_queue); } -#define tcp_for_write_queue(skb, sk) \ - skb_queue_walk(&(sk)->sk_write_queue, skb) - -#define tcp_for_write_queue_from(skb, sk) \ - skb_queue_walk_from(&(sk)->sk_write_queue, skb) - #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) static inline struct sk_buff *tcp_send_head(const struct sock *sk) { - return sk->sk_send_head; + return skb_peek(&sk->sk_write_queue); } static inline bool tcp_skb_is_last(const struct sock *sk, @@ -1655,27 +1610,25 @@ static inline bool tcp_skb_is_last(const struct sock *sk, return skb_queue_is_last(&sk->sk_write_queue, skb); } -static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb) +static inline bool tcp_write_queue_empty(const struct sock *sk) { - if (tcp_skb_is_last(sk, skb)) - sk->sk_send_head = NULL; - else - sk->sk_send_head = tcp_write_queue_next(sk, skb); + return skb_queue_empty(&sk->sk_write_queue); } -static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) +static inline bool tcp_rtx_queue_empty(const struct sock *sk) { - if (sk->sk_send_head == skb_unlinked) { - sk->sk_send_head = NULL; - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - } - if (tcp_sk(sk)->highest_sack == skb_unlinked) - tcp_sk(sk)->highest_sack = NULL; + return RB_EMPTY_ROOT(&sk->tcp_rtx_queue); } -static inline void tcp_init_send_head(struct sock *sk) +static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk) { - sk->sk_send_head = NULL; + return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk); +} + +static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) +{ + if (tcp_write_queue_empty(sk)) + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); } static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) @@ -1688,26 +1641,8 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb __tcp_add_write_queue_tail(sk, skb); /* Queue it, remembering where we must start sending. */ - if (sk->sk_send_head == NULL) { - sk->sk_send_head = skb; + if (sk->sk_write_queue.next == skb) tcp_chrono_start(sk, TCP_CHRONO_BUSY); - - if (tcp_sk(sk)->highest_sack == NULL) - tcp_sk(sk)->highest_sack = skb; - } -} - -static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb) -{ - __skb_queue_head(&sk->sk_write_queue, skb); -} - -/* Insert buff after skb on the write queue of sk. */ -static inline void tcp_insert_write_queue_after(struct sk_buff *skb, - struct sk_buff *buff, - struct sock *sk) -{ - __skb_queue_after(&sk->sk_write_queue, skb, buff); } /* Insert new before skb on the write queue of sk. */ @@ -1716,19 +1651,27 @@ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sock *sk) { __skb_queue_before(&sk->sk_write_queue, skb, new); - - if (sk->sk_send_head == skb) - sk->sk_send_head = new; } static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) { + tcp_skb_tsorted_anchor_cleanup(skb); __skb_unlink(skb, &sk->sk_write_queue); } -static inline bool tcp_write_queue_empty(struct sock *sk) +void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb); + +static inline void tcp_rtx_queue_unlink(struct sk_buff *skb, struct sock *sk) { - return skb_queue_empty(&sk->sk_write_queue); + tcp_skb_tsorted_anchor_cleanup(skb); + rb_erase(&skb->rbnode, &sk->tcp_rtx_queue); +} + +static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct sock *sk) +{ + list_del(&skb->tcp_tsorted_anchor); + tcp_rtx_queue_unlink(skb, sk); + sk_wmem_free_skb(sk, skb); } static inline void tcp_push_pending_frames(struct sock *sk) @@ -1757,8 +1700,7 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb) { - tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL : - tcp_write_queue_next(sk, skb); + tcp_sk(sk)->highest_sack = skb_rb_next(skb); } static inline struct sk_buff *tcp_highest_sack(struct sock *sk) @@ -1768,7 +1710,7 @@ static inline struct sk_buff *tcp_highest_sack(struct sock *sk) static inline void tcp_highest_sack_reset(struct sock *sk) { - tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk); + tcp_sk(sk)->highest_sack = tcp_rtx_queue_head(sk); } /* Called when old skb is about to be deleted and replaced by new skb */ @@ -1934,11 +1876,12 @@ extern void tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); +extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs); /* At how many usecs into the future should the RTO fire? */ static inline s64 tcp_rto_delta_us(const struct sock *sk) { - const struct sk_buff *skb = tcp_write_queue_head(sk); + const struct sk_buff *skb = tcp_rtx_queue_head(sk); u32 rto = inet_csk(sk)->icsk_rto; u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); @@ -2040,6 +1983,11 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer); #define TCP_ULP_MAX 128 #define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) +enum { + TCP_ULP_TLS, + TCP_ULP_BPF, +}; + struct tcp_ulp_ops { struct list_head list; @@ -2048,12 +1996,15 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); + int uid; char name[TCP_ULP_NAME_MAX]; + bool user_visible; struct module *owner; }; int tcp_register_ulp(struct tcp_ulp_ops *type); void tcp_unregister_ulp(struct tcp_ulp_ops *type); int tcp_set_ulp(struct sock *sk, const char *name); +int tcp_set_ulp_id(struct sock *sk, const int ulp); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); @@ -2063,17 +2014,21 @@ void tcp_cleanup_ulp(struct sock *sk); * program loaded). */ #ifdef CONFIG_BPF -static inline int tcp_call_bpf(struct sock *sk, int op) +static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) { struct bpf_sock_ops_kern sock_ops; int ret; - if (sk_fullsock(sk)) + memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); + if (sk_fullsock(sk)) { + sock_ops.is_fullsock = 1; sock_owned_by_me(sk); + } - memset(&sock_ops, 0, sizeof(sock_ops)); sock_ops.sk = sk; sock_ops.op = op; + if (nargs > 0) + memcpy(sock_ops.args, args, nargs * sizeof(*args)); ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); if (ret == 0) @@ -2082,18 +2037,46 @@ static inline int tcp_call_bpf(struct sock *sk, int op) ret = -1; return ret; } + +static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2) +{ + u32 args[2] = {arg1, arg2}; + + return tcp_call_bpf(sk, op, 2, args); +} + +static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2, + u32 arg3) +{ + u32 args[3] = {arg1, arg2, arg3}; + + return tcp_call_bpf(sk, op, 3, args); +} + #else -static inline int tcp_call_bpf(struct sock *sk, int op) +static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) { return -EPERM; } + +static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2) +{ + return -EPERM; +} + +static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2, + u32 arg3) +{ + return -EPERM; +} + #endif static inline u32 tcp_timeout_init(struct sock *sk) { int timeout; - timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT); + timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT, 0, NULL); if (timeout <= 0) timeout = TCP_TIMEOUT_INIT; @@ -2104,7 +2087,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk) { int rwnd; - rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT); + rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT, 0, NULL); if (rwnd < 0) rwnd = 0; @@ -2113,6 +2096,10 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk) static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) { - return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1); + return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); } + +#if IS_ENABLED(CONFIG_SMC) +extern struct static_key_false tcp_have_smc; +#endif #endif /* _TCP_H */ diff --git a/include/net/tipc.h b/include/net/tipc.h new file mode 100644 index 000000000000..07670ec022a7 --- /dev/null +++ b/include/net/tipc.h @@ -0,0 +1,62 @@ +/* + * include/net/tipc.h: Include file for TIPC message header routines + * + * Copyright (c) 2017 Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_HDR_H +#define _TIPC_HDR_H + +#include <linux/random.h> + +#define KEEPALIVE_MSG_MASK 0x0e080000 /* LINK_PROTOCOL + MSG_IS_KEEPALIVE */ + +struct tipc_basic_hdr { + __be32 w[4]; +}; + +static inline u32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr) +{ + u32 w0 = ntohl(hdr->w[0]); + bool keepalive_msg = (w0 & KEEPALIVE_MSG_MASK) == KEEPALIVE_MSG_MASK; + int key; + + /* Return source node identity as key */ + if (likely(!keepalive_msg)) + return hdr->w[3]; + + /* Spread PROBE/PROBE_REPLY messages across the cores */ + get_random_bytes(&key, sizeof(key)); + return key; +} + +#endif diff --git a/include/net/tls.h b/include/net/tls.h index b89d397dd62f..4913430ab807 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -35,6 +35,11 @@ #define _TLS_OFFLOAD_H #include <linux/types.h> +#include <asm/byteorder.h> +#include <linux/crypto.h> +#include <linux/socket.h> +#include <linux/tcp.h> +#include <net/tcp.h> #include <uapi/linux/tls.h> @@ -53,6 +58,7 @@ struct tls_sw_context { struct crypto_aead *aead_send; + struct crypto_wait async_wait; /* Sending context */ char aad_space[TLS_AAD_SPACE_SIZE]; @@ -83,6 +89,8 @@ struct tls_context { void *priv_ctx; + u8 tx_conf:2; + u16 prepend_size; u16 tag_size; u16 overhead_size; @@ -97,7 +105,6 @@ struct tls_context { u16 pending_open_record_frags; int (*push_pending_record)(struct sock *sk, int flags); - void (*free_resources)(struct sock *sk); void (*sk_write_space)(struct sock *sk); void (*sk_proto_close)(struct sock *sk, long timeout); @@ -122,6 +129,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_close(struct sock *sk, long timeout); +void tls_sw_free_tx_resources(struct sock *sk); void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); void tls_icsk_clean_acked(struct sock *sk); @@ -164,7 +172,7 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) static inline void tls_err_abort(struct sock *sk) { - sk->sk_err = -EBADMSG; + sk->sk_err = EBADMSG; sk->sk_error_report(sk); } @@ -212,6 +220,21 @@ static inline void tls_fill_prepend(struct tls_context *ctx, ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size); } +static inline void tls_make_aad(char *buf, + size_t size, + char *record_sequence, + int record_sequence_size, + unsigned char record_type) +{ + memcpy(buf, record_sequence, record_sequence_size); + + buf[8] = record_type; + buf[9] = TLS_1_2_VERSION_MAJOR; + buf[10] = TLS_1_2_VERSION_MINOR; + buf[11] = size >> 8; + buf[12] = size & 0xFF; +} + static inline struct tls_context *tls_get_ctx(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/include/net/udp.h b/include/net/udp.h index 6c759c8594e2..850a8e581cce 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -275,7 +275,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); int udp_init_sock(struct sock *sk); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); -unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); +__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, bool is_ipv6); diff --git a/include/net/udplite.h b/include/net/udplite.h index 81bdbf97319b..9185e45b997f 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -64,6 +64,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) UDP_SKB_CB(skb)->cscov = cscov; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; + skb->csum_valid = 0; } return 0; diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 13223396dc64..ad73d8b3fcc2 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -146,7 +146,7 @@ struct vxlanhdr_gpe { np_applied:1, instance_applied:1, version:2, -reserved_flags2:2; + reserved_flags2:2; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved_flags2:2, version:2, @@ -301,7 +301,7 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, l4_hdr = ipv6_hdr(skb)->nexthdr; break; default: - return features;; + return features; } if ((l4_hdr == IPPROTO_UDP) && diff --git a/include/net/wext.h b/include/net/wext.h index e51f067fdb3a..aa192a670304 100644 --- a/include/net/wext.h +++ b/include/net/wext.h @@ -7,7 +7,7 @@ struct net; #ifdef CONFIG_WEXT_CORE -int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, +int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg); int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, unsigned long arg); @@ -15,7 +15,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, struct iw_statistics *get_wireless_stats(struct net_device *dev); int call_commit_handler(struct net_device *dev); #else -static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, +static inline int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg) { return -EINVAL; diff --git a/include/net/xdp.h b/include/net/xdp.h new file mode 100644 index 000000000000..b2362ddfa694 --- /dev/null +++ b/include/net/xdp.h @@ -0,0 +1,48 @@ +/* include/net/xdp.h + * + * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. + * Released under terms in GPL version 2. See COPYING. + */ +#ifndef __LINUX_NET_XDP_H__ +#define __LINUX_NET_XDP_H__ + +/** + * DOC: XDP RX-queue information + * + * The XDP RX-queue info (xdp_rxq_info) is associated with the driver + * level RX-ring queues. It is information that is specific to how + * the driver have configured a given RX-ring queue. + * + * Each xdp_buff frame received in the driver carry a (pointer) + * reference to this xdp_rxq_info structure. This provides the XDP + * data-path read-access to RX-info for both kernel and bpf-side + * (limited subset). + * + * For now, direct access is only safe while running in NAPI/softirq + * context. Contents is read-mostly and must not be updated during + * driver NAPI/softirq poll. + * + * The driver usage API is a register and unregister API. + * + * The struct is not directly tied to the XDP prog. A new XDP prog + * can be attached as long as it doesn't change the underlying + * RX-ring. If the RX-ring does change significantly, the NIC driver + * naturally need to stop the RX-ring before purging and reallocating + * memory. In that process the driver MUST call unregistor (which + * also apply for driver shutdown and unload). The register API is + * also mandatory during RX-ring setup. + */ + +struct xdp_rxq_info { + struct net_device *dev; + u32 queue_index; + u32 reg_state; +} ____cacheline_aligned; /* perf critical, avoid false-sharing */ + +int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index); +void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); +void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); +bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); + +#endif /* __LINUX_NET_XDP_H__ */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e015e164bac0..7d2077665c0b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -968,7 +968,7 @@ static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_c /* A struct encoding bundle of transformations to apply to some set of flow. * - * dst->child points to the next element of bundle. + * xdst->child points to the next element of bundle. * dst->xfrm points to an instanse of transformer. * * Due to unfortunate limitations of current routing cache, which we @@ -984,6 +984,8 @@ struct xfrm_dst { struct rt6_info rt6; } u; struct dst_entry *route; + struct dst_entry *child; + struct dst_entry *path; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; u32 xfrm_genid; @@ -994,7 +996,35 @@ struct xfrm_dst { u32 path_cookie; }; +static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst) +{ +#ifdef CONFIG_XFRM + if (dst->xfrm) { + const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst; + + return xdst->path; + } +#endif + return (struct dst_entry *) dst; +} + +static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst) +{ +#ifdef CONFIG_XFRM + if (dst->xfrm) { + struct xfrm_dst *xdst = (struct xfrm_dst *) dst; + return xdst->child; + } +#endif + return NULL; +} + #ifdef CONFIG_XFRM +static inline void xfrm_dst_set_child(struct xfrm_dst *xdst, struct dst_entry *child) +{ + xdst->child = child; +} + static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) { xfrm_pols_put(xdst->pols, xdst->num_pols); @@ -1021,6 +1051,7 @@ struct xfrm_offload { #define XFRM_GSO_SEGMENT 16 #define XFRM_GRO 32 #define XFRM_ESP_NO_TRAILER 64 +#define XFRM_DEV_RESUME 128 __u32 status; #define CRYPTO_SUCCESS 1 @@ -1570,6 +1601,9 @@ int xfrm_init_state(struct xfrm_state *x); int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); +int xfrm_trans_queue(struct sk_buff *skb, + int (*finish)(struct net *, struct sock *, + struct sk_buff *)); int xfrm_output_resume(struct sk_buff *skb, int err); int xfrm_output(struct sock *sk, struct sk_buff *skb); int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); @@ -1765,22 +1799,22 @@ static inline int xfrm_acquire_is_on(struct net *net) } #endif -static inline int aead_len(struct xfrm_algo_aead *alg) +static inline unsigned int aead_len(struct xfrm_algo_aead *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -static inline int xfrm_alg_len(const struct xfrm_algo *alg) +static inline unsigned int xfrm_alg_len(const struct xfrm_algo *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -static inline int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg) +static inline unsigned int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -static inline int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay_esn) +static inline unsigned int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay_esn) { return sizeof(*replay_esn) + replay_esn->bmp_len * sizeof(__u32); } @@ -1844,34 +1878,53 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { return skb->sp->xvec[skb->sp->len - 1]; } +#endif + static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) { +#ifdef CONFIG_XFRM struct sec_path *sp = skb->sp; if (!sp || !sp->olen || sp->len != sp->olen) return NULL; return &sp->ovec[sp->olen - 1]; -} +#else + return NULL; #endif +} void __net_init xfrm_dev_init(void); #ifdef CONFIG_XFRM_OFFLOAD -int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features); +void xfrm_dev_resume(struct sk_buff *skb); +void xfrm_dev_backlog(struct softnet_data *sd); +struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); +static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) +{ + struct xfrm_state_offload *xso = &x->xso; + + if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn) + xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x); +} + static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) { struct xfrm_state *x = dst->xfrm; + struct xfrm_dst *xdst; if (!x || !x->type_offload) return false; - if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) && - !dst->child->xfrm) + xdst = (struct xfrm_dst *) dst; + if (!x->xso.offload_handle && !xdst->child->xfrm) + return true; + if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) && + !xdst->child->xfrm) return true; return false; @@ -1891,15 +1944,24 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) struct net_device *dev = xso->dev; if (dev && dev->xfrmdev_ops) { - dev->xfrmdev_ops->xdo_dev_state_free(x); + if (dev->xfrmdev_ops->xdo_dev_state_free) + dev->xfrmdev_ops->xdo_dev_state_free(x); xso->dev = NULL; dev_put(dev); } } #else -static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) +static inline void xfrm_dev_resume(struct sk_buff *skb) { - return 0; +} + +static inline void xfrm_dev_backlog(struct softnet_data *sd) +{ +} + +static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) +{ + return skb; } static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo) @@ -1920,6 +1982,10 @@ static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x return false; } +static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) +{ +} + static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) { return false; |
