diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-05 12:31:59 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-05 12:31:59 -0700 |
commit | 5518b69b76680a4f2df96b1deca260059db0c2de (patch) | |
tree | f33cd1519c8efb4590500f2f9617400be233238c /include/net | |
parent | 8ad06e56dcbc1984ef0ff8f6e3c19982c5809f73 (diff) | |
parent | 0e72582270c07850b92cac351c8b97d4f9c123b9 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Reasonably busy this cycle, but perhaps not as busy as in the 4.12
merge window:
1) Several optimizations for UDP processing under high load from
Paolo Abeni.
2) Support pacing internally in TCP when using the sch_fq packet
scheduler for this is not practical. From Eric Dumazet.
3) Support mutliple filter chains per qdisc, from Jiri Pirko.
4) Move to 1ms TCP timestamp clock, from Eric Dumazet.
5) Add batch dequeueing to vhost_net, from Jason Wang.
6) Flesh out more completely SCTP checksum offload support, from
Davide Caratti.
7) More plumbing of extended netlink ACKs, from David Ahern, Pablo
Neira Ayuso, and Matthias Schiffer.
8) Add devlink support to nfp driver, from Simon Horman.
9) Add RTM_F_FIB_MATCH flag to RTM_GETROUTE queries, from Roopa
Prabhu.
10) Add stack depth tracking to BPF verifier and use this information
in the various eBPF JITs. From Alexei Starovoitov.
11) Support XDP on qed device VFs, from Yuval Mintz.
12) Introduce BPF PROG ID for better introspection of installed BPF
programs. From Martin KaFai Lau.
13) Add bpf_set_hash helper for TC bpf programs, from Daniel Borkmann.
14) For loads, allow narrower accesses in bpf verifier checking, from
Yonghong Song.
15) Support MIPS in the BPF selftests and samples infrastructure, the
MIPS eBPF JIT will be merged in via the MIPS GIT tree. From David
Daney.
16) Support kernel based TLS, from Dave Watson and others.
17) Remove completely DST garbage collection, from Wei Wang.
18) Allow installing TCP MD5 rules using prefixes, from Ivan
Delalande.
19) Add XDP support to Intel i40e driver, from Björn Töpel
20) Add support for TC flower offload in nfp driver, from Simon
Horman, Pieter Jansen van Vuuren, Benjamin LaHaise, Jakub
Kicinski, and Bert van Leeuwen.
21) IPSEC offloading support in mlx5, from Ilan Tayari.
22) Add HW PTP support to macb driver, from Rafal Ozieblo.
23) Networking refcount_t conversions, From Elena Reshetova.
24) Add sock_ops support to BPF, from Lawrence Brako. This is useful
for tuning the TCP sockopt settings of a group of applications,
currently via CGROUPs"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1899 commits)
net: phy: dp83867: add workaround for incorrect RX_CTRL pin strap
dt-bindings: phy: dp83867: provide a workaround for incorrect RX_CTRL pin strap
cxgb4: Support for get_ts_info ethtool method
cxgb4: Add PTP Hardware Clock (PHC) support
cxgb4: time stamping interface for PTP
nfp: default to chained metadata prepend format
nfp: remove legacy MAC address lookup
nfp: improve order of interfaces in breakout mode
net: macb: remove extraneous return when MACB_EXT_DESC is defined
bpf: add missing break in for the TCP_BPF_SNDCWND_CLAMP case
bpf: fix return in load_bpf_file
mpls: fix rtm policy in mpls_getroute
net, ax25: convert ax25_cb.refcount from atomic_t to refcount_t
net, ax25: convert ax25_route.refcount from atomic_t to refcount_t
net, ax25: convert ax25_uid_assoc.refcount from atomic_t to refcount_t
net, sctp: convert sctp_ep_common.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_transport.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_chunk.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_datamsg.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_auth_bytes.refcnt from atomic_t to refcount_t
...
Diffstat (limited to 'include/net')
65 files changed, 1034 insertions, 363 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index cfa2ae33da9a..26ffd8333f50 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -42,6 +42,7 @@ struct tc_action { struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie *act_cookie; + struct tcf_chain *goto_chain; }; #define tcf_head common.tcfa_head #define tcf_index common.tcfa_index @@ -180,12 +181,12 @@ int tcf_unregister_action(struct tc_action_ops *a, int tcf_action_destroy(struct list_head *actions, int bind); int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); -int tcf_action_init(struct net *net, struct nlattr *nla, - struct nlattr *est, char *n, int ovr, - int bind, struct list_head *); -struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, - struct nlattr *est, char *n, int ovr, - int bind); +int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, + struct nlattr *est, char *name, int ovr, int bind, + struct list_head *actions); +struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, + struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind); int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index b43a4eec3cec..6df79e96a780 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -48,11 +48,15 @@ struct prefix_info { struct in6_addr prefix; }; - #include <linux/netdevice.h> #include <net/if_inet6.h> #include <net/ipv6.h> +struct in6_validator_info { + struct in6_addr i6vi_addr; + struct inet6_dev *i6vi_dev; +}; + #define IN6_ADDR_HSIZE_SHIFT 4 #define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) @@ -278,6 +282,10 @@ int register_inet6addr_notifier(struct notifier_block *nb); int unregister_inet6addr_notifier(struct notifier_block *nb); int inet6addr_notifier_call_chain(unsigned long val, void *v); +int register_inet6addr_validator_notifier(struct notifier_block *nb); +int unregister_inet6addr_validator_notifier(struct notifier_block *nb); +int inet6addr_validator_notifier_call_chain(unsigned long val, void *v); + void inet6_netconf_notify_devconf(struct net *net, int event, int type, int ifindex, struct ipv6_devconf *devconf); @@ -308,7 +316,7 @@ static inline struct inet6_dev *in6_dev_get(const struct net_device *dev) rcu_read_lock(); idev = rcu_dereference(dev->ip6_ptr); if (idev) - atomic_inc(&idev->refcnt); + refcount_inc(&idev->refcnt); rcu_read_unlock(); return idev; } @@ -324,36 +332,36 @@ void in6_dev_finish_destroy(struct inet6_dev *idev); static inline void in6_dev_put(struct inet6_dev *idev) { - if (atomic_dec_and_test(&idev->refcnt)) + if (refcount_dec_and_test(&idev->refcnt)) in6_dev_finish_destroy(idev); } static inline void __in6_dev_put(struct inet6_dev *idev) { - atomic_dec(&idev->refcnt); + refcount_dec(&idev->refcnt); } static inline void in6_dev_hold(struct inet6_dev *idev) { - atomic_inc(&idev->refcnt); + refcount_inc(&idev->refcnt); } void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); static inline void in6_ifa_put(struct inet6_ifaddr *ifp) { - if (atomic_dec_and_test(&ifp->refcnt)) + if (refcount_dec_and_test(&ifp->refcnt)) inet6_ifa_finish_destroy(ifp); } static inline void __in6_ifa_put(struct inet6_ifaddr *ifp) { - atomic_dec(&ifp->refcnt); + refcount_dec(&ifp->refcnt); } static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) { - atomic_inc(&ifp->refcnt); + refcount_inc(&ifp->refcnt); } diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index b5f5187f488c..c172709787af 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -33,6 +33,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct sockaddr_rxrpc *, struct key *, unsigned long, + s64, gfp_t, rxrpc_notify_rx_t); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, @@ -46,5 +47,6 @@ void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t); +void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); #endif /* _NET_RXRPC_H */ diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 75e612a45824..678e4d6fa317 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -4,6 +4,7 @@ #include <linux/socket.h> #include <linux/un.h> #include <linux/mutex.h> +#include <linux/refcount.h> #include <net/sock.h> void unix_inflight(struct user_struct *user, struct file *fp); @@ -21,7 +22,7 @@ extern spinlock_t unix_table_lock; extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { - atomic_t refcnt; + refcount_t refcnt; int len; unsigned int hash; struct sockaddr_un name[0]; diff --git a/include/net/arp.h b/include/net/arp.h index 65619a2de6f4..17d90e4e8dc5 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -28,7 +28,7 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 rcu_read_lock_bh(); n = __ipv4_neigh_lookup_noref(dev, key); - if (n && !atomic_inc_not_zero(&n->refcnt)) + if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; rcu_read_unlock_bh(); diff --git a/include/net/ax25.h b/include/net/ax25.h index e602f8177ebf..c4a0cf6f0810 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -11,7 +11,7 @@ #include <linux/timer.h> #include <linux/list.h> #include <linux/slab.h> -#include <linux/atomic.h> +#include <linux/refcount.h> #include <net/neighbour.h> #include <net/sock.h> @@ -158,7 +158,7 @@ enum { typedef struct ax25_uid_assoc { struct hlist_node uid_node; - atomic_t refcount; + refcount_t refcount; kuid_t uid; ax25_address call; } ax25_uid_assoc; @@ -167,11 +167,11 @@ typedef struct ax25_uid_assoc { hlist_for_each_entry(__ax25, list, uid_node) #define ax25_uid_hold(ax25) \ - atomic_inc(&((ax25)->refcount)) + refcount_inc(&((ax25)->refcount)) static inline void ax25_uid_put(ax25_uid_assoc *assoc) { - if (atomic_dec_and_test(&assoc->refcount)) { + if (refcount_dec_and_test(&assoc->refcount)) { kfree(assoc); } } @@ -185,7 +185,7 @@ typedef struct { typedef struct ax25_route { struct ax25_route *next; - atomic_t refcount; + refcount_t refcount; ax25_address callsign; struct net_device *dev; ax25_digi *digipeat; @@ -194,14 +194,14 @@ typedef struct ax25_route { static inline void ax25_hold_route(ax25_route *ax25_rt) { - atomic_inc(&ax25_rt->refcount); + refcount_inc(&ax25_rt->refcount); } void __ax25_put_route(ax25_route *ax25_rt); static inline void ax25_put_route(ax25_route *ax25_rt) { - if (atomic_dec_and_test(&ax25_rt->refcount)) + if (refcount_dec_and_test(&ax25_rt->refcount)) __ax25_put_route(ax25_rt); } @@ -244,7 +244,7 @@ typedef struct ax25_cb { unsigned char window; struct timer_list timer, dtimer; struct sock *sk; /* Backlink to socket */ - atomic_t refcount; + refcount_t refcount; } ax25_cb; struct ax25_sock { @@ -266,11 +266,11 @@ static inline struct ax25_cb *sk_to_ax25(const struct sock *sk) hlist_for_each_entry(__ax25, list, ax25_node) #define ax25_cb_hold(__ax25) \ - atomic_inc(&((__ax25)->refcount)) + refcount_inc(&((__ax25)->refcount)) static __inline__ void ax25_cb_put(ax25_cb *ax25) { - if (atomic_dec_and_test(&ax25->refcount)) { + if (refcount_dec_and_test(&ax25->refcount)) { kfree(ax25->digipeat); kfree(ax25); } diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 99aa5e5e3100..fe98f0a5bef0 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -399,6 +399,7 @@ enum { #define HCI_LE_PING 0x10 #define HCI_LE_DATA_LEN_EXT 0x20 #define HCI_LE_EXT_SCAN_POLICY 0x80 +#define HCI_LE_CHAN_SEL_ALG2 0x40 /* Connection modes */ #define HCI_CM_ACTIVE 0x0000 @@ -1498,6 +1499,13 @@ struct hci_rp_le_read_max_data_len { __le16 rx_time; } __packed; +#define HCI_OP_LE_SET_DEFAULT_PHY 0x2031 +struct hci_cp_le_set_default_phy { + __u8 all_phys; + __u8 tx_phys; + __u8 rx_phys; +} __packed; + /* ---- HCI Events ---- */ #define HCI_EV_INQUIRY_COMPLETE 0x01 diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 1797235cd590..d79d28f5318c 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -104,6 +104,8 @@ struct bond_option { int __bond_opt_set(struct bonding *bond, unsigned int option, struct bond_opt_value *val); +int __bond_opt_set_notify(struct bonding *bond, unsigned int option, + struct bond_opt_value *val); int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf); const struct bond_opt_value *bond_opt_parse(const struct bond_option *opt, diff --git a/include/net/calipso.h b/include/net/calipso.h index b1b30cd36601..5f95b11a04bf 100644 --- a/include/net/calipso.h +++ b/include/net/calipso.h @@ -38,7 +38,7 @@ #include <linux/skbuff.h> #include <net/netlabel.h> #include <net/request_sock.h> -#include <linux/atomic.h> +#include <linux/refcount.h> #include <asm/unaligned.h> /* known doi values */ @@ -57,7 +57,7 @@ struct calipso_doi { u32 doi; u32 type; - atomic_t refcount; + refcount_t refcount; struct list_head list; struct rcu_head rcu; }; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index b083e6cbae8c..f12fa5245a45 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -649,6 +649,7 @@ struct survey_info { * @wep_keys: static WEP keys, if not NULL points to an array of * CFG80211_MAX_WEP_KEYS WEP keys * @wep_tx_key: key index (0..3) of the default TX static WEP key + * @psk: PSK (for devices supporting 4-way-handshake offload) */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -662,6 +663,7 @@ struct cfg80211_crypto_settings { bool control_port_no_encrypt; struct key_params *wep_keys; int wep_tx_key; + const u8 *psk; }; /** @@ -1441,6 +1443,9 @@ struct mesh_config { * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a] * @basic_rates: basic rates to use when creating the mesh * @beacon_rate: bitrate to be used for beacons + * @userspace_handles_dfs: whether user space controls DFS operation, i.e. + * changes the channel when a radar is detected. This is required + * to operate on DFS channels. * * These parameters are fixed when the mesh is created. */ @@ -1462,6 +1467,7 @@ struct mesh_setup { int mcast_rate[NUM_NL80211_BANDS]; u32 basic_rates; struct cfg80211_bitrate_mask beacon_rate; + bool userspace_handles_dfs; }; /** @@ -2106,6 +2112,8 @@ struct cfg80211_bss_selection { * @fils_erp_rrk: ERP re-authentication Root Key (rRK) used to derive additional * keys in FILS or %NULL if not specified. * @fils_erp_rrk_len: Length of @fils_erp_rrk in octets. + * @want_1x: indicates user-space supports and wants to use 802.1X driver + * offload of 4-way handshake. */ struct cfg80211_connect_params { struct ieee80211_channel *channel; @@ -2138,6 +2146,7 @@ struct cfg80211_connect_params { u16 fils_erp_next_seq_num; const u8 *fils_erp_rrk; size_t fils_erp_rrk_len; + bool want_1x; }; /** @@ -2560,6 +2569,23 @@ struct cfg80211_nan_func { }; /** + * struct cfg80211_pmk_conf - PMK configuration + * + * @aa: authenticator address + * @pmk_len: PMK length in bytes. + * @pmk: the PMK material + * @pmk_r0_name: PMK-R0 Name. NULL if not applicable (i.e., the PMK + * is not PMK-R0). When pmk_r0_name is not NULL, the pmk field + * holds PMK-R0. + */ +struct cfg80211_pmk_conf { + const u8 *aa; + u8 pmk_len; + const u8 *pmk; + const u8 *pmk_r0_name; +}; + +/** * struct cfg80211_ops - backend description for wireless configuration * * This struct is registered by fullmac card drivers and/or wireless stacks @@ -2875,6 +2901,13 @@ struct cfg80211_nan_func { * All other parameters must be ignored. * * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS + * + * @set_pmk: configure the PMK to be used for offloaded 802.1X 4-Way handshake. + * If not deleted through @del_pmk the PMK remains valid until disconnect + * upon which the driver should clear it. + * (invoked with the wireless_dev mutex held) + * @del_pmk: delete the previously configured PMK for the given authenticator. + * (invoked with the wireless_dev mutex held) */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -3163,6 +3196,11 @@ struct cfg80211_ops { int (*set_multicast_to_unicast)(struct wiphy *wiphy, struct net_device *dev, const bool enabled); + + int (*set_pmk)(struct wiphy *wiphy, struct net_device *dev, + const struct cfg80211_pmk_conf *conf); + int (*del_pmk)(struct wiphy *wiphy, struct net_device *dev, + const u8 *aa); }; /* @@ -5403,6 +5441,9 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length + * @authorized: true if the 802.1X authentication was done by the driver or is + * not needed (e.g., when Fast Transition protocol was used), false + * otherwise. Ignored for networks that don't use 802.1X authentication. */ struct cfg80211_roam_info { struct ieee80211_channel *channel; @@ -5412,6 +5453,7 @@ struct cfg80211_roam_info { size_t req_ie_len; const u8 *resp_ie; size_t resp_ie_len; + bool authorized; }; /** diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index a34b141f125f..880adb2f2afd 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -41,6 +41,7 @@ #include <net/netlabel.h> #include <net/request_sock.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <asm/unaligned.h> /* known doi values */ @@ -85,7 +86,7 @@ struct cipso_v4_doi { } map; u8 tags[CIPSO_V4_TAG_MAXCNT]; - atomic_t refcount; + refcount_t refcount; struct list_head list; struct rcu_head rcu; }; diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index f2ca135ddcc9..81210a8b8d7c 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -2,6 +2,7 @@ #define _NET_DN_FIB_H #include <linux/netlink.h> +#include <linux/refcount.h> extern const struct nla_policy rtm_dn_policy[]; @@ -28,7 +29,7 @@ struct dn_fib_info { struct dn_fib_info *fib_next; struct dn_fib_info *fib_prev; int fib_treeref; - atomic_t fib_clntref; + refcount_t fib_clntref; int fib_dead; unsigned int fib_flags; int fib_protocol; @@ -130,7 +131,7 @@ void dn_fib_free_info(struct dn_fib_info *fi); static inline void dn_fib_info_put(struct dn_fib_info *fi) { - if (atomic_dec_and_test(&fi->fib_clntref)) + if (refcount_dec_and_test(&fi->fib_clntref)) dn_fib_free_info(fi); } diff --git a/include/net/dsa.h b/include/net/dsa.h index 8e24677b1c62..58969b9a090c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -20,6 +20,7 @@ #include <linux/of.h> #include <linux/ethtool.h> #include <net/devlink.h> +#include <net/switchdev.h> struct tc_action; struct phy_device; @@ -27,13 +28,14 @@ struct fixed_phy_status; enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = 0, + DSA_TAG_PROTO_BRCM, DSA_TAG_PROTO_DSA, - DSA_TAG_PROTO_TRAILER, DSA_TAG_PROTO_EDSA, - DSA_TAG_PROTO_BRCM, - DSA_TAG_PROTO_QCA, - DSA_TAG_PROTO_MTK, + DSA_TAG_PROTO_KSZ, DSA_TAG_PROTO_LAN9303, + DSA_TAG_PROTO_MTK, + DSA_TAG_PROTO_QCA, + DSA_TAG_PROTO_TRAILER, DSA_TAG_LAST, /* MUST BE LAST */ }; @@ -120,27 +122,16 @@ struct dsa_switch_tree { */ struct dsa_platform_data *pd; - /* - * Reference to network device to use, and which tagging - * protocol to use. - */ - struct net_device *master_netdev; + /* Copy of tag_ops->rcv for faster access in hot path */ struct sk_buff * (*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); /* - * Original copy of the master netdev ethtool_ops - */ - struct ethtool_ops master_ethtool_ops; - const struct ethtool_ops *master_orig_ethtool_ops; - - /* - * The switch and port to which the CPU is attached. + * The switch port to which the CPU is attached. */ - struct dsa_switch *cpu_switch; - s8 cpu_port; + struct dsa_port *cpu_dp; /* * Data for the individual switch chips. @@ -180,12 +171,18 @@ struct dsa_port { struct dsa_switch *ds; unsigned int index; const char *name; + struct dsa_port *cpu_dp; struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; u8 stp_state; struct net_device *bridge_dev; struct devlink_port devlink_port; + /* + * Original copy of the master netdev ethtool_ops + */ + struct ethtool_ops ethtool_ops; + const struct ethtool_ops *orig_ethtool_ops; }; struct dsa_switch { @@ -224,11 +221,6 @@ struct dsa_switch { s8 rtable[DSA_MAX_SWITCHES]; /* - * The lower device this switch uses to talk to the host - */ - struct net_device *master_netdev; - - /* * Slave mii_bus and devices for the individual ports. */ u32 dsa_port_mask; @@ -251,7 +243,7 @@ struct dsa_switch { static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) { - return !!(ds == ds->dst->cpu_switch && p == ds->dst->cpu_port); + return !!(ds->cpu_port_mask & (1 << p)); } static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) @@ -279,28 +271,12 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) * Else return the (DSA) port number that connects to the * switch that is one hop closer to the cpu. */ - if (dst->cpu_switch == ds) - return dst->cpu_port; + if (dst->cpu_dp->ds == ds) + return dst->cpu_dp->index; else - return ds->rtable[dst->cpu_switch->index]; + return ds->rtable[dst->cpu_dp->ds->index]; } -struct switchdev_trans; -struct switchdev_obj; -struct switchdev_obj_port_fdb; -struct switchdev_obj_port_mdb; -struct switchdev_obj_port_vlan; - -#define DSA_NOTIFIER_BRIDGE_JOIN 1 -#define DSA_NOTIFIER_BRIDGE_LEAVE 2 - -/* DSA_NOTIFIER_BRIDGE_* */ -struct dsa_notifier_bridge_info { - struct net_device *br; - int sw_index; - int port; -}; - struct dsa_switch_ops { /* * Legacy probing. @@ -410,7 +386,7 @@ struct dsa_switch_ops { const struct switchdev_obj_port_vlan *vlan); int (*port_vlan_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_vlan *vlan, - int (*cb)(struct switchdev_obj *obj)); + switchdev_obj_dump_cb_t *cb); /* * Forwarding database @@ -425,7 +401,7 @@ struct dsa_switch_ops { const struct switchdev_obj_port_fdb *fdb); int (*port_fdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, - int (*cb)(struct switchdev_obj *obj)); + switchdev_obj_dump_cb_t *cb); /* * Multicast database @@ -440,7 +416,7 @@ struct dsa_switch_ops { const struct switchdev_obj_port_mdb *mdb); int (*port_mdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_mdb *mdb, - int (*cb)(struct switchdev_obj *obj)); + switchdev_obj_dump_cb_t *cb); /* * RXNFC @@ -480,23 +456,18 @@ struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); struct net_device *dsa_dev_to_net_device(struct device *dev); -static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) -{ - return dst->rcv != NULL; -} - +/* Keep inline for faster access in hot path */ static inline bool netdev_uses_dsa(struct net_device *dev) { #if IS_ENABLED(CONFIG_NET_DSA) - if (dev->dsa_ptr != NULL) - return dsa_uses_tagged_protocol(dev->dsa_ptr); + return dev->dsa_ptr && dev->dsa_ptr->rcv; #endif return false; } struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n); void dsa_unregister_switch(struct dsa_switch *ds); -int dsa_register_switch(struct dsa_switch *ds, struct device *dev); +int dsa_register_switch(struct dsa_switch *ds); #ifdef CONFIG_PM_SLEEP int dsa_switch_suspend(struct dsa_switch *ds); int dsa_switch_resume(struct dsa_switch *ds); diff --git a/include/net/dst.h b/include/net/dst.h index cfc043784166..f73611ec4017 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -31,9 +31,9 @@ struct sk_buff; struct dst_entry { + struct net_device *dev; struct rcu_head rcu_head; struct dst_entry *child; - struct net_device *dev; struct dst_ops *ops; unsigned long _metrics; unsigned long expires; @@ -51,13 +51,11 @@ struct dst_entry { #define DST_HOST 0x0001 #define DST_NOXFRM 0x0002 #define DST_NOPOLICY 0x0004 -#define DST_NOHASH 0x0008 -#define DST_NOCACHE 0x0010 -#define DST_NOCOUNT 0x0020 -#define DST_FAKE_RTABLE 0x0040 -#define DST_XFRM_TUNNEL 0x0080 -#define DST_XFRM_QUEUE 0x0100 -#define DST_METADATA 0x0200 +#define DST_NOCOUNT 0x0008 +#define DST_FAKE_RTABLE 0x0010 +#define DST_XFRM_TUNNEL 0x0020 +#define DST_XFRM_QUEUE 0x0040 +#define DST_METADATA 0x0080 short error; @@ -253,7 +251,7 @@ static inline void dst_hold(struct dst_entry *dst) * __pad_to_align_refcnt declaration in struct dst_entry */ BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); - atomic_inc(&dst->__refcnt); + WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0); } static inline void dst_use(struct dst_entry *dst, unsigned long time) @@ -278,6 +276,8 @@ static inline struct dst_entry *dst_clone(struct dst_entry *dst) void dst_release(struct dst_entry *dst); +void dst_release_immediate(struct dst_entry *dst); + static inline void refdst_drop(unsigned long refdst) { if (!(refdst & SKB_DST_NOREF)) @@ -334,10 +334,7 @@ static inline void skb_dst_force(struct sk_buff *skb) */ static inline bool dst_hold_safe(struct dst_entry *dst) { - if (dst->flags & DST_NOCACHE) - return atomic_inc_not_zero(&dst->__refcnt); - dst_hold(dst); - return true; + return atomic_inc_not_zero(&dst->__refcnt); } /** @@ -423,26 +420,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, void dst_init(struct dst_entry *dst, struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags); -void __dst_free(struct dst_entry *dst); struct dst_entry *dst_destroy(struct dst_entry *dst); - -static inline void dst_free(struct dst_entry *dst) -{ - if (dst->obsolete > 0) - return; - if (!atomic_read(&dst->__refcnt)) { - dst = dst_destroy(dst); - if (!dst) - return; - } - __dst_free(dst); -} - -static inline void dst_rcu_free(struct rcu_head *head) -{ - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst_free(dst); -} +void dst_dev_put(struct dst_entry *dst); static inline void dst_confirm(struct dst_entry *dst) { @@ -505,8 +484,6 @@ static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) return dst; } -void dst_subsys_init(void); - /* Flags for xfrm_lookup flags argument. */ enum { XFRM_LOOKUP_ICMP = 1 << 0, diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 701fc814d0af..a803129a4849 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -5,10 +5,22 @@ #include <net/ip_tunnels.h> #include <net/dst.h> +enum metadata_type { + METADATA_IP_TUNNEL, + METADATA_HW_PORT_MUX, +}; + +struct hw_port_info { + struct net_device *lower_dev; + u32 port_id; +}; + struct metadata_dst { struct dst_entry dst; + enum metadata_type type; union { struct ip_tunnel_info tun_info; + struct hw_port_info port_info; } u; }; @@ -27,7 +39,7 @@ static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb) struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; - if (md_dst) + if (md_dst && md_dst->type == METADATA_IP_TUNNEL) return &md_dst->u.tun_info; dst = skb_dst(skb); @@ -55,22 +67,33 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, a = (const struct metadata_dst *) skb_dst(skb_a); b = (const struct metadata_dst *) skb_dst(skb_b); - if (!a != !b || a->u.tun_info.options_len != b->u.tun_info.options_len) + if (!a != !b || a->type != b->type) return 1; - return memcmp(&a->u.tun_info, &b->u.tun_info, - sizeof(a->u.tun_info) + a->u.tun_info.options_len); + switch (a->type) { + case METADATA_HW_PORT_MUX: + return memcmp(&a->u.port_info, &b->u.port_info, + sizeof(a->u.port_info)); + case METADATA_IP_TUNNEL: + return memcmp(&a->u.tun_info, &b->u.tun_info, + sizeof(a->u.tun_info) + + a->u.tun_info.options_len); + default: + return 1; + } } void metadata_dst_free(struct metadata_dst *); -struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); -struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); +struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, + gfp_t flags); +struct metadata_dst __percpu * +metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); static inline struct metadata_dst *tun_rx_dst(int md_size) { struct metadata_dst *tun_dst; - tun_dst = metadata_dst_alloc(md_size, GFP_ATOMIC); + tun_dst = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!tun_dst) return NULL; @@ -85,11 +108,11 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) int md_size; struct metadata_dst *new_md; - if (!md_dst) + if (!md_dst || md_dst->type != METADATA_IP_TUNNEL) return ERR_PTR(-EINVAL); md_size = md_dst->u.tun_info.options_len; - new_md = metadata_dst_alloc(md_size, GFP_ATOMIC); + new_md = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!new_md) return ERR_PTR(-ENOMEM); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 76c7300626d6..c487bfa2f479 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -5,6 +5,7 @@ #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/fib_rules.h> +#include <linux/refcount.h> #include <net/flow.h> #include <net/rtnetlink.h> @@ -29,7 +30,7 @@ struct fib_rule { struct fib_rule __rcu *ctarget; struct net *fr_net; - atomic_t refcnt; + refcount_t refcnt; u32 pref; int suppress_ifgroup; int suppress_prefixlen; @@ -103,12 +104,12 @@ struct fib_rules_ops { static inline void fib_rule_get(struct fib_rule *rule) { - atomic_inc(&rule->refcnt); + refcount_inc(&rule->refcnt); } static inline void fib_rule_put(struct fib_rule *rule) { - if (atomic_dec_and_test(&rule->refcnt)) + if (refcount_dec_and_test(&rule->refcnt)) kfree_rcu(rule, rcu); } diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 8d21d448daa9..e2663e900b0a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -157,6 +157,24 @@ struct flow_dissector_key_eth_addrs { unsigned char src[ETH_ALEN]; }; +/** + * struct flow_dissector_key_tcp: + * @flags: flags + */ +struct flow_dissector_key_tcp { + __be16 flags; +}; + +/** + * struct flow_dissector_key_ip: + * @tos: tos + * @ttl: ttl + */ +struct flow_dissector_key_ip { + __u8 tos; + __u8 ttl; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -177,6 +195,8 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */ + FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */ + FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 68b88192b00c..c59a098221db 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -128,7 +128,6 @@ static inline int genl_err_attr(struct genl_info *info, int err, * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps - * @ops_list: operations list */ struct genl_ops { const struct nla_policy *policy; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f656f9051aca..d4088d1a688d 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -17,6 +17,7 @@ #include <net/snmp.h> #include <linux/ipv6.h> +#include <linux/refcount.h> /* inet6_dev.if_flags */ @@ -45,7 +46,7 @@ struct inet6_ifaddr { /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */ __u32 valid_lft; __u32 prefered_lft; - atomic_t refcnt; + refcount_t refcnt; spinlock_t lock; int state; @@ -126,7 +127,7 @@ struct ifmcaddr6 { struct timer_list mca_timer; unsigned int mca_flags; int mca_users; - atomic_t mca_refcnt; + refcount_t mca_refcnt; spinlock_t mca_lock; unsigned long mca_cstamp; unsigned long mca_tstamp; @@ -146,7 +147,7 @@ struct ifacaddr6 { struct rt6_info *aca_rt; struct ifacaddr6 *aca_next; int aca_users; - atomic_t aca_refcnt; + refcount_t aca_refcnt; unsigned long aca_cstamp; unsigned long aca_tstamp; }; @@ -187,7 +188,7 @@ struct inet6_dev { struct ifacaddr6 *ac_list; rwlock_t lock; - atomic_t refcnt; + refcount_t refcnt; __u32 if_flags; int dead; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c7a577976bec..13e4c89a8231 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -75,6 +75,8 @@ struct inet_connection_sock_af_ops { * @icsk_pmtu_cookie Last pmtu seen by socket * @icsk_ca_ops Pluggable congestion control hook * @icsk_af_ops Operations which are AF_INET{4,6} specific + * @icsk_ulp_ops Pluggable ULP control hook + * @icsk_ulp_data ULP private data * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -97,6 +99,8 @@ struct inet_connection_sock { __u32 icsk_pmtu_cookie; const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; + const struct tcp_ulp_ops *icsk_ulp_ops; + void *icsk_ulp_data; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, icsk_ca_setsockopt:1, diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 5894730ec82a..440c1e9d0623 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -50,7 +50,7 @@ struct inet_frag_queue { spinlock_t lock; struct timer_list timer; struct hlist_node list; - atomic_t refcnt; + refcount_t refcnt; struct sk_buff *fragments; struct sk_buff *fragments_tail; ktime_t stamp; @@ -92,7 +92,7 @@ struct inet_frags { */ u32 rnd; seqlock_t rnd_seqlock; - int qsize; + unsigned int qsize; unsigned int (*hashfn)(const struct inet_frag_queue *); bool (*match)(const struct inet_frag_queue *q, @@ -129,7 +129,7 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { - if (atomic_dec_and_test(&q->refcnt)) + if (refcount_dec_and_test(&q->refcnt)) inet_frag_destroy(q, f); } diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 1178931288cb..5026b1f08bb8 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -32,7 +32,7 @@ #include <net/tcp_states.h> #include <net/netns/hash.h> -#include <linux/atomic.h> +#include <linux/refcount.h> #include <asm/byteorder.h> /* This is for all connections with a full identity, no wildcards. @@ -334,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net, sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, dport, dif, &refcounted); - if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } @@ -359,7 +359,6 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, refcounted); } -u32 sk_ehashfn(const struct sock *sk); u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport); diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 235c7811a86a..f2a215fc78e4 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -46,7 +46,7 @@ struct inet_peer { struct rcu_head gc_rcu; }; /* - * Once inet_peer is queued for deletion (refcnt == -1), following field + * Once inet_peer is queued for deletion (refcnt == 0), following field * is not available: rid * We can share memory with rcu_head to help keep inet_peer small. */ @@ -60,7 +60,7 @@ struct inet_peer { /* following fields might be frequently dirtied */ __u32 dtime; /* the time of last use of not referenced entries */ - atomic_t refcnt; + refcount_t refcnt; }; struct inet_peer_base { diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index c979c878df1c..1a88008cc6f5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -170,7 +170,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) static inline u32 rt6_get_cookie(const struct rt6_info *rt) { if (rt->rt6i_flags & RTF_PCPU || - (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from)) + (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) rt = (struct rt6_info *)(rt->dst.from); return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; @@ -277,7 +277,8 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), void *arg); int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc); + struct nl_info *info, struct mx6_config *mxc, + struct netlink_ext_ack *extack); int fib6_del(struct rt6_info *rt, struct nl_info *info); void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f5e625f53367..0fbf73dd531a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -90,7 +90,7 @@ void ip6_route_cleanup(void); int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg); -int ip6_route_add(struct fib6_config *cfg); +int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack); int ip6_ins_rt(struct rt6_info *); int ip6_del_rt(struct rt6_info *); @@ -116,7 +116,6 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int flags); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); -int icmp6_dst_gc(void); void fib6_force_start_gc(struct net *net); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f7f6aa789c61..41d580c6185f 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -23,6 +23,7 @@ #include <net/inetpeer.h> #include <linux/percpu.h> #include <linux/notifier.h> +#include <linux/refcount.h> struct fib_config { u8 fc_dst_len; @@ -105,7 +106,7 @@ struct fib_info { struct hlist_node fib_lhash; struct net *fib_net; int fib_treeref; - atomic_t fib_clntref; + refcount_t fib_clntref; unsigned int fib_flags; unsigned char fib_dead; unsigned char fib_protocol; @@ -136,6 +137,7 @@ struct fib_rule; struct fib_table; struct fib_result { + __be32 prefix; unsigned char prefixlen; unsigned char nh_sel; unsigned char type; @@ -263,8 +265,10 @@ struct fib_table { int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); -int fib_table_insert(struct net *, struct fib_table *, struct fib_config *); -int fib_table_delete(struct net *, struct fib_table *, struct fib_config *); +int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, + struct netlink_ext_ack *extack); +int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, + struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); @@ -427,12 +431,12 @@ void free_fib_info(struct fib_info *fi); static inline void fib_info_hold(struct fib_info *fi) { - atomic_inc(&fi->fib_clntref); + refcount_inc(&fi->fib_clntref); } static inline void fib_info_put(struct fib_info *fi) { - if (atomic_dec_and_test(&fi->fib_clntref)) + if (refcount_dec_and_test(&fi->fib_clntref)) free_fib_info(fi); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3e505bbff8ca..6eac5cf8f1e6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -16,6 +16,7 @@ #include <linux/ipv6.h> #include <linux/hardirq.h> #include <linux/jhash.h> +#include <linux/refcount.h> #include <net/if_inet6.h> #include <net/ndisc.h> #include <net/flow.h> @@ -203,7 +204,7 @@ extern rwlock_t ip6_ra_lock; */ struct ipv6_txoptions { - atomic_t refcnt; + refcount_t refcnt; /* Length of this structure */ int tot_len; @@ -265,7 +266,7 @@ static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) rcu_read_lock(); opt = rcu_dereference(np->opt); if (opt) { - if (!atomic_inc_not_zero(&opt->refcnt)) + if (!refcount_inc_not_zero(&opt->refcnt)) opt = NULL; else opt = rcu_pointer_handoff(opt); @@ -276,7 +277,7 @@ static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) static inline void txopt_put(struct ipv6_txoptions *opt) { - if (opt && atomic_dec_and_test(&opt->refcnt)) + if (opt && refcount_dec_and_test(&opt->refcnt)) kfree_rcu(opt, rcu); } diff --git a/include/net/ipx.h b/include/net/ipx.h index e5cff6811b30..af32b97b5ddd 100644 --- a/include/net/ipx.h +++ b/include/net/ipx.h @@ -14,6 +14,7 @@ #include <linux/ipx.h> #include <linux/list.h> #include <linux/slab.h> +#include <linux/refcount.h> struct ipx_address { __be32 net; @@ -54,7 +55,7 @@ struct ipx_interface { /* IPX address */ __be32 if_netnum; unsigned char if_node[IPX_NODE_LEN]; - atomic_t refcnt; + refcount_t refcnt; /* physical device info */ struct net_device *if_dev; @@ -80,7 +81,7 @@ struct ipx_route { unsigned char ir_routed; unsigned char ir_router_node[IPX_NODE_LEN]; struct list_head node; /* node in ipx_routes list */ - atomic_t refcnt; + refcount_t refcnt; }; struct ipx_cb { @@ -139,7 +140,7 @@ const char *ipx_device_name(struct ipx_interface *intrfc); static __inline__ void ipxitf_hold(struct ipx_interface *intrfc) { - atomic_inc(&intrfc->refcnt); + refcount_inc(&intrfc->refcnt); } void ipxitf_down(struct ipx_interface *intrfc); @@ -157,18 +158,18 @@ int ipxrtr_ioctl(unsigned int cmd, void __user *arg); static __inline__ void ipxitf_put(struct ipx_interface *intrfc) { - if (atomic_dec_and_test(&intrfc->refcnt)) + if (refcount_dec_and_test(&intrfc->refcnt)) ipxitf_down(intrfc); } static __inline__ void ipxrtr_hold(struct ipx_route *rt) { - atomic_inc(&rt->refcnt); + refcount_inc(&rt->refcnt); } static __inline__ void ipxrtr_put(struct ipx_route *rt) { - if (atomic_dec_and_test(&rt->refcnt)) + if (refcount_dec_and_test(&rt->refcnt)) kfree(rt); } #endif /* _NET_INET_IPX_H_ */ diff --git a/include/net/lapb.h b/include/net/lapb.h index 9510f8725f03..85e773742f4e 100644 --- a/include/net/lapb.h +++ b/include/net/lapb.h @@ -1,6 +1,7 @@ #ifndef _LAPB_H #define _LAPB_H #include <linux/lapb.h> +#include <linux/refcount.h> #define LAPB_HEADER_LEN 20 /* LAPB over Ethernet + a bit more */ @@ -101,7 +102,7 @@ struct lapb_cb { struct lapb_frame frmr_data; unsigned char frmr_type; - atomic_t refcnt; + refcount_t refcnt; }; /* lapb_iface.c */ diff --git a/include/net/llc.h b/include/net/llc.h index e8e61d4fb458..dc35f25eb679 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -55,7 +55,7 @@ struct llc_sap { unsigned char state; unsigned char p_bit; unsigned char f_bit; - atomic_t refcnt; + refcount_t refcnt; int (*rcv_func)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, @@ -113,14 +113,14 @@ struct llc_sap *llc_sap_open(unsigned char lsap, struct net_device *orig_dev)); static inline void llc_sap_hold(struct llc_sap *sap) { - atomic_inc(&sap->refcnt); + refcount_inc(&sap->refcnt); } void llc_sap_close(struct llc_sap *sap); static inline void llc_sap_put(struct llc_sap *sap) { - if (atomic_dec_and_test(&sap->refcnt)) + if (refcount_dec_and_test(&sap->refcnt)) llc_sap_close(sap); } diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index ebfe237aad7e..7c26863b8cf4 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -35,7 +35,8 @@ struct lwtunnel_state { struct lwtunnel_encap_ops { int (*build_state)(struct nlattr *encap, unsigned int family, const void *cfg, - struct lwtunnel_state **ts); + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack); void (*destroy_state)(struct lwtunnel_state *lws); int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*input)(struct sk_buff *skb); @@ -107,12 +108,15 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num); -int lwtunnel_valid_encap_type(u16 encap_type); -int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len); +int lwtunnel_valid_encap_type(u16 encap_type, + struct netlink_ext_ack *extack); +int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, + struct netlink_ext_ack *extack); int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, - struct lwtunnel_state **lws); + struct lwtunnel_state **lws, + struct netlink_ext_ack *extack); int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate); int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); @@ -172,11 +176,14 @@ static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, return -EOPNOTSUPP; } -static inline int lwtunnel_valid_encap_type(u16 encap_type) +static inline int lwtunnel_valid_encap_type(u16 encap_type, + struct netlink_ext_ack *extack) { + NL_SET_ERR_MSG(extack, "CONFIG_LWTUNNEL is not enabled in this kernel"); return -EOPNOTSUPP; } -static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len) +static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, + struct netlink_ext_ack *extack) { /* return 0 since we are not walking attr looking for * RTA_ENCAP_TYPE attribute on nexthops. @@ -187,7 +194,8 @@ static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len) static inline int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, - struct lwtunnel_state **lws) + struct lwtunnel_state **lws, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 76ed24a201eb..b2b5419467cc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4205,6 +4205,22 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, int max_rates); /** + * ieee80211_sta_set_expected_throughput - set the expected tpt for a station + * + * Call this function to notify mac80211 about a change in expected throughput + * to a station. A driver for a device that does rate control in firmware can + * call this function when the expected throughput estimate towards a station + * changes. The information is used to tune the CoDel AQM applied to traffic + * going towards that station (which can otherwise be too aggressive and cause + * slow stations to starve). + * + * @pubsta: the station to set throughput for. + * @thr: the current expected throughput in kbps. + */ +void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, + u32 thr); + +/** * ieee80211_tx_status - transmit status callback * * Call this function for all transmitted frames after they have been @@ -5436,6 +5452,9 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, */ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn); +void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, const u8 *addr, + unsigned int bit); + /** * ieee80211_start_rx_ba_session_offl - start a Rx BA session * @@ -5450,8 +5469,13 @@ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn); * @addr: station mac address * @tid: the rx tid */ -void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif, - const u8 *addr, u16 tid); +static inline void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif, + const u8 *addr, u16 tid) +{ + if (WARN_ON(tid >= IEEE80211_NUM_TIDS)) + return; + ieee80211_manage_rx_ba_offl(vif, addr, tid); +} /** * ieee80211_stop_rx_ba_session_offl - stop a Rx BA session @@ -5467,8 +5491,13 @@ void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif, * @addr: station mac address * @tid: the rx tid */ -void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, - const u8 *addr, u16 tid); +static inline void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, + const u8 *addr, u16 tid) +{ + if (WARN_ON(tid >= IEEE80211_NUM_TIDS)) + return; + ieee80211_manage_rx_ba_offl(vif, addr, tid + IEEE80211_NUM_TIDS); +} /* Rate control API */ diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 1036c902d2c9..31b1bb11ba3f 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -384,7 +384,7 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref(dev, pkey); - if (n && !atomic_inc_not_zero(&n->refcnt)) + if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; rcu_read_unlock_bh(); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index e4dd3a214034..afc39e3a3f7c 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -17,6 +17,7 @@ */ #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rcupdate.h> @@ -76,7 +77,7 @@ struct neigh_parms { void *sysctl_table; int dead; - atomic_t refcnt; + refcount_t refcnt; struct rcu_head rcu_head; int reachable_time; @@ -137,7 +138,7 @@ struct neighbour { unsigned long confirmed; unsigned long updated; rwlock_t lock; - atomic_t refcnt; + refcount_t refcnt; struct sk_buff_head arp_queue; unsigned int arp_queue_len_bytes; struct timer_list timer; @@ -317,6 +318,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); +bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb); @@ -394,12 +396,12 @@ void neigh_sysctl_unregister(struct neigh_parms *p); static inline void __neigh_parms_put(struct neigh_parms *parms) { - atomic_dec(&parms->refcnt); + refcount_dec(&parms->refcnt); } static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms) { - atomic_inc(&parms->refcnt); + refcount_inc(&parms->refcnt); return parms; } @@ -409,18 +411,18 @@ static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms) static inline void neigh_release(struct neighbour *neigh) { - if (atomic_dec_and_test(&neigh->refcnt)) + if (refcount_dec_and_test(&neigh->refcnt)) neigh_destroy(neigh); } static inline struct neighbour * neigh_clone(struct neighbour *neigh) { if (neigh) - atomic_inc(&neigh->refcnt); + refcount_inc(&neigh->refcnt); return neigh; } -#define neigh_hold(n) atomic_inc(&(n)->refcnt) +#define neigh_hold(n) refcount_inc(&(n)->refcnt) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fe80bb48ab1f..31a2b51bef2c 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -5,6 +5,7 @@ #define __NET_NET_NAMESPACE_H #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/workqueue.h> #include <linux/list.h> #include <linux/sysctl.h> @@ -46,7 +47,7 @@ struct netns_ipvs; #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { - atomic_t passive; /* To decided when the network + refcount_t passive; /* To decided when the network * namespace should be freed. */ atomic_t count; /* To decided when the network @@ -158,6 +159,7 @@ extern struct net init_net; struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, struct net *old_net); +void net_ns_barrier(void); #else /* CONFIG_NET_NS */ #include <linux/sched.h> #include <linux/nsproxy.h> @@ -168,6 +170,8 @@ static inline struct net *copy_net_ns(unsigned long flags, return ERR_PTR(-EINVAL); return old_net; } + +static inline void net_ns_barrier(void) {} #endif /* CONFIG_NET_NS */ diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 0b0c35c37125..925524ede6c8 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -8,7 +8,7 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); if (likely(skb->nf_bridge)) - atomic_set(&(skb->nf_bridge->use), 1); + refcount_set(&(skb->nf_bridge->use), 1); return skb->nf_bridge; } diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8ece3612d0cd..48407569585d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -225,9 +225,13 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, u32 seq); /* Iterate over all conntracks: if iter returns true, it's deleted. */ -void nf_ct_iterate_cleanup(struct net *net, - int (*iter)(struct nf_conn *i, void *data), - void *data, u32 portid, int report); +void nf_ct_iterate_cleanup_net(struct net *net, + int (*iter)(struct nf_conn *i, void *data), + void *data, u32 portid, int report); + +/* also set unconfirmed conntracks as dying. Only use in module exit path. */ +void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), + void *data); struct nf_conntrack_zone; diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index e01559b4d781..6d14b36e3a49 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -71,7 +71,7 @@ struct nf_conntrack_l3proto { struct module *me; }; -extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; +extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL /* Protocol pernet registration. */ @@ -100,7 +100,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; static inline struct nf_conntrack_l3proto * __nf_ct_l3proto_find(u_int16_t l3proto) { - if (unlikely(l3proto >= AF_MAX)) + if (unlikely(l3proto >= NFPROTO_NUMPROTO)) return &nf_conntrack_l3proto_generic; return rcu_dereference(nf_ct_l3protos[l3proto]); } diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8a8bab8d7b15..bd5be0d691d5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -281,6 +281,23 @@ struct nft_set_estimate { enum nft_set_class space; }; +/** + * struct nft_set_type - nf_tables set type + * + * @select_ops: function to select nft_set_ops + * @ops: default ops, used when no select_ops functions is present + * @list: used internally + * @owner: module reference + */ +struct nft_set_type { + const struct nft_set_ops *(*select_ops)(const struct nft_ctx *, + const struct nft_set_desc *desc, + u32 flags); + const struct nft_set_ops *ops; + struct list_head list; + struct module *owner; +}; + struct nft_set_ext; struct nft_expr; @@ -297,8 +314,6 @@ struct nft_expr; * @privsize: function to return size of set private data * @init: initialize private data of new set instance * @destroy: destroy private data of set instance - * @list: nf_tables_set_ops list node - * @owner: module reference * @elemsize: element private size * @features: features supported by the implementation */ @@ -336,7 +351,8 @@ struct nft_set_ops { struct nft_set *set, struct nft_set_iter *iter); - unsigned int (*privsize)(const struct nlattr * const nla[]); + unsigned int (*privsize)(const struct nlattr * const nla[], + const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est); @@ -345,14 +361,13 @@ struct nft_set_ops { const struct nlattr * const nla[]); void (*destroy)(const struct nft_set *set); - struct list_head list; - struct module *owner; unsigned int elemsize; u32 features; + const struct nft_set_type *type; }; -int nft_register_set(struct nft_set_ops *ops); -void nft_unregister_set(struct nft_set_ops *ops); +int nft_register_set(struct nft_set_type *type); +void nft_unregister_set(struct nft_set_type *type); /** * struct nft_set - nf_tables set instance diff --git a/include/net/netlabel.h b/include/net/netlabel.h index efe98068880f..72d6435fc16c 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -37,7 +37,7 @@ #include <linux/in6.h> #include <net/netlink.h> #include <net/request_sock.h> -#include <linux/atomic.h> +#include <linux/refcount.h> struct cipso_v4_doi; struct calipso_doi; @@ -136,7 +136,7 @@ struct netlbl_audit { * */ struct netlbl_lsm_cache { - atomic_t refcount; + refcount_t refcount; void (*free) (const void *data); void *data; }; @@ -295,7 +295,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags) cache = kzalloc(sizeof(*cache), flags); if (cache) - atomic_set(&cache->refcount, 1); + refcount_set(&cache->refcount, 1); return cache; } @@ -309,7 +309,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags) */ static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache) { - if (!atomic_dec_and_test(&cache->refcount)) + if (!refcount_dec_and_test(&cache->refcount)) return; if (cache->free) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index cd686c4fb32d..9a14a0850b0e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -122,6 +122,9 @@ struct netns_ipv4 { int sysctl_tcp_fin_timeout; unsigned int sysctl_tcp_notsent_lowat; int sysctl_tcp_tw_reuse; + int sysctl_tcp_sack; + int sysctl_tcp_window_scaling; + int sysctl_tcp_timestamps; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; diff --git a/include/net/netrom.h b/include/net/netrom.h index 110350aca3df..443a4ffca7aa 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -11,6 +11,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <net/sock.h> +#include <linux/refcount.h> #define NR_NETWORK_LEN 15 #define NR_TRANSPORT_LEN 5 @@ -93,7 +94,7 @@ struct nr_neigh { unsigned short count; unsigned int number; unsigned char failed; - atomic_t refcount; + refcount_t refcount; }; struct nr_route { @@ -109,7 +110,7 @@ struct nr_node { unsigned char which; unsigned char count; struct nr_route routes[3]; - atomic_t refcount; + refcount_t refcount; spinlock_t node_lock; }; @@ -118,21 +119,21 @@ struct nr_node { *********************************************************************/ #define nr_node_hold(__nr_node) \ - atomic_inc(&((__nr_node)->refcount)) + refcount_inc(&((__nr_node)->refcount)) static __inline__ void nr_node_put(struct nr_node *nr_node) { - if (atomic_dec_and_test(&nr_node->refcount)) { + if (refcount_dec_and_test(&nr_node->refcount)) { kfree(nr_node); } } #define nr_neigh_hold(__nr_neigh) \ - atomic_inc(&((__nr_neigh)->refcount)) + refcount_inc(&((__nr_neigh)->refcount)) static __inline__ void nr_neigh_put(struct nr_neigh *nr_neigh) { - if (atomic_dec_and_test(&nr_neigh->refcount)) { + if (refcount_dec_and_test(&nr_neigh->refcount)) { if (nr_neigh->ax25) ax25_cb_put(nr_neigh->ax25); kfree(nr_neigh->digipeat); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 269fd78bb0ae..537d0a0ad4c4 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -18,11 +18,32 @@ int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); #ifdef CONFIG_NET_CLS -void tcf_destroy_chain(struct tcf_proto __rcu **fl); +struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, + bool create); +void tcf_chain_put(struct tcf_chain *chain); +int tcf_block_get(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain); +void tcf_block_put(struct tcf_block *block); +int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode); + #else -static inline void tcf_destroy_chain(struct tcf_proto __rcu **fl) +static inline +int tcf_block_get(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain) +{ + return 0; +} + +static inline void tcf_block_put(struct tcf_block *block) { } + +static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ + return TC_ACT_UNSPEC; +} #endif static inline unsigned long @@ -136,6 +157,25 @@ static inline void tcf_exts_to_list(const struct tcf_exts *exts, #endif } +static inline void +tcf_exts_stats_update(const struct tcf_exts *exts, + u64 bytes, u64 packets, u64 lastuse) +{ +#ifdef CONFIG_NET_CLS_ACT + int i; + + preempt_disable(); + + for (i = 0; i < exts->nr_actions; i++) { + struct tc_action *a = exts->actions[i]; + + tcf_action_stats_update(a, bytes, packets, lastuse); + } + + preempt_enable(); +#endif +} + /** * tcf_exts_exec - execute tc filter extensions * @skb: socket buffer diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index bec46f63f10c..2579c209ea51 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -113,9 +113,6 @@ static inline void qdisc_run(struct Qdisc *q) __qdisc_run(q); } -int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode); - static inline __be16 tc_skb_protocol(const struct sk_buff *skb) { /* We need to take extra care in case the skb came via diff --git a/include/net/request_sock.h b/include/net/request_sock.h index a12a5d25b27e..23e22054aa60 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -19,6 +19,7 @@ #include <linux/spinlock.h> #include <linux/types.h> #include <linux/bug.h> +#include <linux/refcount.h> #include <net/sock.h> @@ -29,7 +30,7 @@ struct proto; struct request_sock_ops { int family; - int obj_size; + unsigned int obj_size; struct kmem_cache *slab; char *slab_name; int (*rtx_syn_ack)(const struct sock *sk, @@ -89,7 +90,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, return NULL; req->rsk_listener = NULL; if (attach_listener) { - if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) { + if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { kmem_cache_free(ops->slab, req); return NULL; } @@ -100,7 +101,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, sk_node_init(&req_to_sk(req)->sk_node); sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; - atomic_set(&req->rsk_refcnt, 0); + refcount_set(&req->rsk_refcnt, 0); return req; } @@ -108,7 +109,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, static inline void reqsk_free(struct request_sock *req) { /* temporary debugging */ - WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0); + WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); req->rsk_ops->destructor(req); if (req->rsk_listener) @@ -119,7 +120,7 @@ static inline void reqsk_free(struct request_sock *req) static inline void reqsk_put(struct request_sock *req) { - if (atomic_dec_and_test(&req->rsk_refcnt)) + if (refcount_dec_and_test(&req->rsk_refcnt)) reqsk_free(req); } diff --git a/include/net/route.h b/include/net/route.h index 2cc0e14c6359..cb0a76d9dde1 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -113,13 +113,16 @@ struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); -struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *flp, - const struct sk_buff *skb); +struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp, + const struct sk_buff *skb); +struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp, + struct fib_result *res, + const struct sk_buff *skb); static inline struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp) { - return __ip_route_output_key_hash(net, flp, NULL); + return ip_route_output_key_hash(net, flp, NULL); } struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, @@ -175,6 +178,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin); +int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin, + struct fib_result *res); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin) @@ -184,7 +190,9 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, rcu_read_lock(); err = ip_route_input_noref(skb, dst, src, tos, devin); if (!err) - skb_dst_force(skb); + skb_dst_force_safe(skb); + if (!skb_dst(skb)) + err = -EINVAL; rcu_read_unlock(); return err; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 78fa5fe32947..abe6b733d473 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -63,15 +63,18 @@ struct rtnl_link_ops { int maxtype; const struct nla_policy *policy; int (*validate)(struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); int (*newlink)(struct net *src_net, struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); int (*changelink)(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); void (*dellink)(struct net_device *dev, struct list_head *head); @@ -88,11 +91,13 @@ struct rtnl_link_ops { int slave_maxtype; const struct nla_policy *slave_policy; int (*slave_validate)(struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); int (*slave_changelink)(struct net_device *dev, struct net_device *slave_dev, struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); size_t (*get_slave_size)(const struct net_device *dev, const struct net_device *slave_dev); int (*fill_slave_info)(struct sk_buff *skb, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 22e52093bfda..1c123e2b2415 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -8,6 +8,8 @@ #include <linux/pkt_cls.h> #include <linux/percpu.h> #include <linux/dynamic_queue_limits.h> +#include <linux/list.h> +#include <linux/refcount.h> #include <net/gen_stats.h> #include <net/rtnetlink.h> @@ -94,7 +96,7 @@ struct Qdisc { struct sk_buff *skb_bad_txq; struct rcu_head rcu_head; int padded; - atomic_t refcnt; + refcount_t refcnt; spinlock_t busylock ____cacheline_aligned_in_smp; }; @@ -153,7 +155,7 @@ struct Qdisc_class_ops { void (*walk)(struct Qdisc *, struct qdisc_walker * arg); /* Filter manipulation */ - struct tcf_proto __rcu ** (*tcf_chain)(struct Qdisc *, unsigned long); + struct tcf_block * (*tcf_block)(struct Qdisc *, unsigned long); bool (*tcf_cl_offload)(u32 classid); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); @@ -192,8 +194,13 @@ struct Qdisc_ops { struct tcf_result { - unsigned long class; - u32 classid; + union { + struct { + unsigned long class; + u32 classid; + }; + const struct tcf_proto *goto_tp; + }; }; struct tcf_proto_ops { @@ -236,6 +243,7 @@ struct tcf_proto { struct Qdisc *q; void *data; const struct tcf_proto_ops *ops; + struct tcf_chain *chain; struct rcu_head rcu; }; @@ -247,6 +255,19 @@ struct qdisc_skb_cb { unsigned char data[QDISC_CB_PRIV_LEN]; }; +struct tcf_chain { + struct tcf_proto __rcu *filter_chain; + struct tcf_proto __rcu **p_filter_chain; + struct list_head list; + struct tcf_block *block; + u32 index; /* chain index */ + unsigned int refcnt; +}; + +struct tcf_block { + struct list_head chain_list; +}; + static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h index 9b9fb122b31f..e5c57d0a082d 100644 --- a/include/net/sctp/auth.h +++ b/include/net/sctp/auth.h @@ -31,6 +31,7 @@ #define __sctp_auth_h__ #include <linux/list.h> +#include <linux/refcount.h> struct sctp_endpoint; struct sctp_association; @@ -53,7 +54,7 @@ struct sctp_hmac { * over SCTP-AUTH */ struct sctp_auth_bytes { - atomic_t refcnt; + refcount_t refcnt; __u32 len; __u8 data[]; }; @@ -76,7 +77,7 @@ static inline void sctp_auth_key_hold(struct sctp_auth_bytes *key) if (!key) return; - atomic_inc(&key->refcnt); + refcount_inc(&key->refcnt); } void sctp_auth_key_put(struct sctp_auth_bytes *key); @@ -97,8 +98,10 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, struct sctp_hmac_algo_param *hmacs); int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, __be16 hmac_id); -int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc); -int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc); +int sctp_auth_send_cid(enum sctp_cid chunk, + const struct sctp_association *asoc); +int sctp_auth_recv_cid(enum sctp_cid chunk, + const struct sctp_association *asoc); void sctp_auth_calculate_hmac(const struct sctp_association *asoc, struct sk_buff *skb, struct sctp_auth_chunk *auth, gfp_t gfp); diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index d4a20d00461c..d4679e7a5ed5 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -132,7 +132,7 @@ typedef union { struct sctp_association *asoc; struct sctp_transport *transport; struct sctp_bind_addr *bp; - sctp_init_chunk_t *init; + struct sctp_init_chunk *init; struct sctp_ulpevent *ulpevent; struct sctp_packet *packet; sctp_sackhdr_t *sackh; @@ -173,7 +173,7 @@ SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) SCTP_ARG_CONSTRUCTOR(ASOC, struct sctp_association *, asoc) SCTP_ARG_CONSTRUCTOR(TRANSPORT, struct sctp_transport *, transport) SCTP_ARG_CONSTRUCTOR(BA, struct sctp_bind_addr *, bp) -SCTP_ARG_CONSTRUCTOR(PEER_INIT, sctp_init_chunk_t *, init) +SCTP_ARG_CONSTRUCTOR(PEER_INIT, struct sctp_init_chunk *, init) SCTP_ARG_CONSTRUCTOR(ULPEVENT, struct sctp_ulpevent *, ulpevent) SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet) SCTP_ARG_CONSTRUCTOR(SACKH, sctp_sackhdr_t *, sackh) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index b07a745ab69f..9b18044c551e 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -130,7 +130,7 @@ typedef enum { */ typedef union { - sctp_cid_t chunk; + enum sctp_cid chunk; sctp_event_timeout_t timeout; sctp_event_other_t other; sctp_event_primitive_t primitive; @@ -141,7 +141,7 @@ static inline sctp_subtype_t \ SCTP_ST_## _name (_type _arg) \ { sctp_subtype_t _retval; _retval._elt = _arg; return _retval; } -SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, sctp_cid_t, chunk) +SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) SCTP_SUBTYPE_CONSTRUCTOR(OTHER, sctp_event_other_t, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) @@ -152,7 +152,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) /* Calculate the actual data size in a data chunk */ #define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\ - (unsigned long)(c->chunk_hdr)\ - - sizeof(sctp_data_chunk_t))) + - sizeof(struct sctp_data_chunk))) /* Internal error codes */ typedef enum { diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 069582ee5d7f..a9519a06a23b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -470,7 +470,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ - ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ + ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\ pos.v += SCTP_PAD4(ntohs(pos.p->length))) #define sctp_walk_errors(err, chunk_hdr)\ @@ -478,7 +478,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ - sizeof(sctp_chunkhdr_t));\ + sizeof(struct sctp_chunkhdr));\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 47113f2c4b0a..860f378333b5 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -325,19 +325,17 @@ void sctp_generate_heartbeat_event(unsigned long peer); void sctp_generate_reconf_event(unsigned long peer); void sctp_generate_proto_unreach_event(unsigned long peer); -void sctp_ootb_pkt_free(struct sctp_packet *); +void sctp_ootb_pkt_free(struct sctp_packet *packet); -struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *, - const struct sctp_association *, - struct sctp_chunk *, +struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + struct sctp_chunk *chunk, gfp_t gfp, int *err, struct sctp_chunk **err_chk_p); -int sctp_addip_addr_config(struct sctp_association *, sctp_param_t, - struct sockaddr_storage*, int); /* 3rd level prototypes */ -__u32 sctp_generate_tag(const struct sctp_endpoint *); -__u32 sctp_generate_tsn(const struct sctp_endpoint *); +__u32 sctp_generate_tag(const struct sctp_endpoint *ep); +__u32 sctp_generate_tsn(const struct sctp_endpoint *ep); /* Extern declarations for major data structures. */ extern sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES]; @@ -349,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) __u16 size; size = ntohs(chunk->chunk_hdr->length); - size -= sizeof(sctp_data_chunk_t); + size -= sizeof(struct sctp_data_chunk); return size; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index a8b38e123f97..5ab29af8ca8a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -310,9 +310,10 @@ struct sctp_cookie { __u32 adaptation_ind; - __u8 auth_random[sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH]; + __u8 auth_random[sizeof(struct sctp_paramhdr) + + SCTP_AUTH_RANDOM_LENGTH]; __u8 auth_hmacs[SCTP_AUTH_NUM_HMACS * sizeof(__u16) + 2]; - __u8 auth_chunks[sizeof(sctp_paramhdr_t) + SCTP_AUTH_MAX_CHUNKS]; + __u8 auth_chunks[sizeof(struct sctp_paramhdr) + SCTP_AUTH_MAX_CHUNKS]; /* This is a shim for my peer's INIT packet, followed by * a copy of the raw address list of the association. @@ -377,10 +378,11 @@ typedef struct sctp_sender_hb_info { __u64 hb_nonce; } sctp_sender_hb_info_t; -int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp); -int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp); +int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + gfp_t gfp); void sctp_stream_free(struct sctp_stream *stream); void sctp_stream_clear(struct sctp_stream *stream); +void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new); /* What is the current SSN number for this stream? */ #define sctp_ssn_peek(stream, type, sid) \ @@ -494,7 +496,7 @@ struct sctp_datamsg { /* Chunks waiting to be submitted to lower layer. */ struct list_head chunks; /* Reference counting. */ - atomic_t refcnt; + refcount_t refcnt; /* When is this message no longer interesting to the peer? */ unsigned long expires_at; /* Did the messenge fail to send? */ @@ -522,7 +524,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *); struct sctp_chunk { struct list_head list; - atomic_t refcnt; + refcount_t refcnt; /* How many times this chunk have been sent, for prsctp RTX policy */ int sent_count; @@ -733,7 +735,7 @@ struct sctp_transport { struct rhlist_head node; /* Reference counting. */ - atomic_t refcnt; + refcount_t refcnt; /* RTO-Pending : A flag used to track if one of the DATA * chunks sent to this address is currently being * used to compute a RTT. If this flag is 0, @@ -1172,7 +1174,7 @@ struct sctp_ep_common { * refcnt - Reference count access to this object. * dead - Do not attempt to use this object. */ - atomic_t refcnt; + refcount_t refcnt; bool dead; /* What socket does this endpoint belong to? */ @@ -1296,11 +1298,11 @@ int sctp_has_association(struct net *net, const union sctp_addr *laddr, int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - sctp_cid_t, sctp_init_chunk_t *peer_init, + enum sctp_cid cid, struct sctp_init_chunk *peer_init, struct sctp_chunk *chunk, struct sctp_chunk **err_chunk); int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk, const union sctp_addr *peer, - sctp_init_chunk_t *init, gfp_t gfp); + struct sctp_init_chunk *init, gfp_t gfp); __u32 sctp_generate_tag(const struct sctp_endpoint *); __u32 sctp_generate_tsn(const struct sctp_endpoint *); @@ -1750,7 +1752,7 @@ struct sctp_association { __u32 default_rcv_context; /* Stream arrays */ - struct sctp_stream *stream; + struct sctp_stream stream; /* All outbound chunks go through this structure. */ struct sctp_outq outqueue; @@ -1952,8 +1954,8 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *, const union sctp_addr *, const union sctp_addr *); void sctp_assoc_migrate(struct sctp_association *, struct sock *); -void sctp_assoc_update(struct sctp_association *old, - struct sctp_association *new); +int sctp_assoc_update(struct sctp_association *old, + struct sctp_association *new); __u32 sctp_association_get_next_tsn(struct sctp_association *); diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index b94006f6fbdd..031bf16d1521 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -8,10 +8,11 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); u32 secure_tcp_seq(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); -u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr); +u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr); u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport); -u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr); +u32 secure_tcpv6_ts_off(const struct net *net, + const __be32 *saddr, const __be32 *daddr); u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, diff --git a/include/net/sock.h b/include/net/sock.h index f97da141d920..48e4d5c38f85 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -66,6 +66,7 @@ #include <linux/poll.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <net/dst.h> #include <net/checksum.h> #include <net/tcp_states.h> @@ -219,7 +220,7 @@ struct sock_common { u32 skc_tw_rcv_nxt; /* struct tcp_timewait_sock */ }; - atomic_t skc_refcnt; + refcount_t skc_refcnt; /* private: */ int skc_dontcopy_end[0]; union { @@ -253,6 +254,7 @@ struct sock_common { * @sk_ll_usec: usecs to busypoll when there is no data * @sk_allocation: allocation mode * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) + * @sk_pacing_status: Pacing status (requested, handled by sch_fq) * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes * @sk_padding: unused element for alignment @@ -389,14 +391,14 @@ struct sock { /* ===== cache line for TX ===== */ int sk_wmem_queued; - atomic_t sk_wmem_alloc; + refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; struct sk_buff *sk_send_head; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; __u32 sk_dst_pending_confirm; - /* Note: 32bit hole on 64bit arches */ + u32 sk_pacing_status; /* see enum sk_pacing */ long sk_sndtimeo; struct timer_list sk_timer; __u32 sk_priority; @@ -475,6 +477,12 @@ struct sock { struct rcu_head sk_rcu; }; +enum sk_pacing { + SK_PACING_NONE = 0, + SK_PACING_NEEDED = 1, + SK_PACING_FQ = 2, +}; + #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) #define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk))) @@ -604,7 +612,7 @@ static inline bool __sk_del_node_init(struct sock *sk) static __always_inline void sock_hold(struct sock *sk) { - atomic_inc(&sk->sk_refcnt); + refcount_inc(&sk->sk_refcnt); } /* Ungrab socket in the context, which assumes that socket refcnt @@ -612,7 +620,7 @@ static __always_inline void sock_hold(struct sock *sk) */ static __always_inline void __sock_put(struct sock *sk) { - atomic_dec(&sk->sk_refcnt); + refcount_dec(&sk->sk_refcnt); } static inline bool sk_del_node_init(struct sock *sk) @@ -621,7 +629,7 @@ static inline bool sk_del_node_init(struct sock *sk) if (rc) { /* paranoid for a while -acme */ - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } return rc; @@ -643,7 +651,7 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk) if (rc) { /* paranoid for a while -acme */ - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } return rc; @@ -900,7 +908,10 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) static inline void sk_incoming_cpu_update(struct sock *sk) { - sk->sk_incoming_cpu = raw_smp_processor_id(); + int cpu = raw_smp_processor_id(); + + if (unlikely(sk->sk_incoming_cpu != cpu)) + sk->sk_incoming_cpu = cpu; } static inline void sock_rps_record_flow_hash(__u32 hash) @@ -1073,6 +1084,7 @@ struct proto { bool (*stream_memory_free)(const struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); + void (*leave_memory_pressure)(struct sock *sk); atomic_long_t *memory_allocated; /* Current allocated memory. */ struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* @@ -1081,7 +1093,7 @@ struct proto { * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ - int *memory_pressure; + unsigned long *memory_pressure; long *sysctl_mem; int *sysctl_wmem; int *sysctl_rmem; @@ -1133,9 +1145,9 @@ static inline void sk_refcnt_debug_dec(struct sock *sk) static inline void sk_refcnt_debug_release(const struct sock *sk) { - if (atomic_read(&sk->sk_refcnt) != 1) + if (refcount_read(&sk->sk_refcnt) != 1) printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", - sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); + sk->sk_prot->name, sk, refcount_read(&sk->sk_refcnt)); } #else /* SOCK_REFCNT_DEBUG */ #define sk_refcnt_debug_inc(sk) do { } while (0) @@ -1186,25 +1198,6 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) return !!*sk->sk_prot->memory_pressure; } -static inline void sk_leave_memory_pressure(struct sock *sk) -{ - int *memory_pressure = sk->sk_prot->memory_pressure; - - if (!memory_pressure) - return; - - if (*memory_pressure) - *memory_pressure = 0; -} - -static inline void sk_enter_memory_pressure(struct sock *sk) -{ - if (!sk->sk_prot->enter_memory_pressure) - return; - - sk->sk_prot->enter_memory_pressure(sk); -} - static inline long sk_memory_allocated(const struct sock *sk) { @@ -1644,7 +1637,7 @@ void sock_init_data(struct socket *sock, struct sock *sk); /* Ungrab socket and destroy it, if it was the last reference. */ static inline void sock_put(struct sock *sk) { - if (atomic_dec_and_test(&sk->sk_refcnt)) + if (refcount_dec_and_test(&sk->sk_refcnt)) sk_free(sk); } /* Generic version of sock_put(), dealing with all sockets @@ -1919,7 +1912,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro */ static inline int sk_wmem_alloc_get(const struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) - 1; + return refcount_read(&sk->sk_wmem_alloc) - 1; } /** @@ -2034,8 +2027,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); -int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, - unsigned int flags, +int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, + struct sk_buff *skb, unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb)); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); @@ -2062,7 +2055,7 @@ static inline unsigned long sock_wspace(struct sock *sk) int amt = 0; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { - amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc); if (amt < 0) amt = 0; } @@ -2143,7 +2136,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); */ static inline bool sock_writeable(const struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); + return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); } static inline gfp_t gfp_any(void) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 929d6af321cd..8ae9e3b6392e 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -46,6 +46,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_PORT_PARENT_ID, SWITCHDEV_ATTR_ID_PORT_STP_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, + SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT, SWITCHDEV_ATTR_ID_PORT_MROUTER, SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, @@ -62,6 +63,7 @@ struct switchdev_attr { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ + unsigned long brport_flags_support; /* PORT_BRIDGE_FLAGS_SUPPORT */ bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ @@ -153,8 +155,11 @@ struct switchdev_ops { }; enum switchdev_notifier_type { - SWITCHDEV_FDB_ADD = 1, - SWITCHDEV_FDB_DEL, + SWITCHDEV_FDB_ADD_TO_BRIDGE = 1, + SWITCHDEV_FDB_DEL_TO_BRIDGE, + SWITCHDEV_FDB_ADD_TO_DEVICE, + SWITCHDEV_FDB_DEL_TO_DEVICE, + SWITCHDEV_FDB_OFFLOADED, }; struct switchdev_notifier_info { @@ -212,6 +217,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b); + +#define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops)) #else static inline void switchdev_deferred_process(void) @@ -317,6 +324,8 @@ static inline bool switchdev_port_same_parent_id(struct net_device *a, return false; } +#define SWITCHDEV_SET_OPS(netdev, ops) do {} while (0) + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index b6f173910226..d576374c4d6f 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -15,7 +15,7 @@ struct tcf_gact { }; #define to_gact(a) ((struct tcf_gact *)a) -static inline bool is_tcf_gact_shot(const struct tc_action *a) +static inline bool __is_tcf_gact_act(const struct tc_action *a, int act) { #ifdef CONFIG_NET_CLS_ACT struct tcf_gact *gact; @@ -24,10 +24,21 @@ static inline bool is_tcf_gact_shot(const struct tc_action *a) return false; gact = to_gact(a); - if (gact->tcf_action == TC_ACT_SHOT) + if (gact->tcf_action == act) return true; #endif return false; } + +static inline bool is_tcf_gact_shot(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_SHOT); +} + +static inline bool is_tcf_gact_trap(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_TRAP); +} + #endif /* __NET_TC_GACT_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index be6223c586fa..70483296157f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -46,6 +46,10 @@ #include <linux/seq_file.h> #include <linux/memcontrol.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/bpf-cgroup.h> + extern struct inet_hashinfo tcp_hashinfo; extern struct percpu_counter tcp_orphan_count; @@ -237,9 +241,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_timestamps; -extern int sysctl_tcp_window_scaling; -extern int sysctl_tcp_sack; extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; @@ -279,7 +280,7 @@ extern int sysctl_tcp_pacing_ca_ratio; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; -extern int tcp_memory_pressure; +extern unsigned long tcp_memory_pressure; /* optimized version of sk_under_memory_pressure() for TCP sockets */ static inline bool tcp_under_memory_pressure(const struct sock *sk) @@ -353,6 +354,8 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); +ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, + size_t size, int flags); void tcp_release_cb(struct sock *sk); void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); @@ -427,7 +430,7 @@ void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -void tcp_parse_options(const struct sk_buff *skb, +void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); @@ -519,8 +522,9 @@ static inline u32 tcp_cookie_time(void) u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); -__u32 cookie_init_timestamp(struct request_sock *req); -bool cookie_timestamp_decode(struct tcp_options_received *opt); +u64 cookie_init_timestamp(struct request_sock *req); +bool cookie_timestamp_decode(const struct net *net, + struct tcp_options_received *opt); bool cookie_ecn_ok(const struct tcp_options_received *opt, const struct net *net, const struct dst_entry *dst); @@ -574,6 +578,7 @@ void tcp_fin(struct sock *sk); void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) { + hrtimer_cancel(&tcp_sk(sk)->pacing_timer); inet_csk_clear_xmit_timers(sk); } @@ -699,17 +704,61 @@ u32 __tcp_select_window(struct sock *sk); void tcp_send_window_probe(struct sock *sk); -/* TCP timestamps are only 32-bits, this causes a slight - * complication on 64-bit systems since we store a snapshot - * of jiffies in the buffer control blocks below. We decided - * to use only the low 32-bits of jiffies and hide the ugly - * casts with the following macro. +/* TCP uses 32bit jiffies to save some space. + * Note that this is different from tcp_time_stamp, which + * historically has been the same until linux-4.13. + */ +#define tcp_jiffies32 ((u32)jiffies) + +/* + * Deliver a 32bit value for TCP timestamp option (RFC 7323) + * It is no longer tied to jiffies, but to 1 ms clock. + * Note: double check if you want to use tcp_jiffies32 instead of this. + */ +#define TCP_TS_HZ 1000 + +static inline u64 tcp_clock_ns(void) +{ + return local_clock(); +} + +static inline u64 tcp_clock_us(void) +{ + return div_u64(tcp_clock_ns(), NSEC_PER_USEC); +} + +/* This should only be used in contexts where tp->tcp_mstamp is up to date */ +static inline u32 tcp_time_stamp(const struct tcp_sock *tp) +{ + return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); +} + +/* Could use tcp_clock_us() / 1000, but this version uses a single divide */ +static inline u32 tcp_time_stamp_raw(void) +{ + return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ); +} + + +/* Refresh 1us clock of a TCP socket, + * ensuring monotically increasing values. */ -#define tcp_time_stamp ((__u32)(jiffies)) +static inline void tcp_mstamp_refresh(struct tcp_sock *tp) +{ + u64 val = tcp_clock_us(); + + if (val > tp->tcp_mstamp) + tp->tcp_mstamp = val; +} + +static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) +{ + return max_t(s64, t1 - t0, 0); +} static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) { - return skb->skb_mstamp.stamp_jiffies; + return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ); } @@ -774,9 +823,9 @@ struct tcp_skb_cb { /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ - struct skb_mstamp first_tx_mstamp; + u64 first_tx_mstamp; /* when we reached the "delivered" count */ - struct skb_mstamp delivered_mstamp; + u64 delivered_mstamp; } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -892,7 +941,7 @@ struct ack_sample { * A sample is invalid if "delivered" or "interval_us" is negative. */ struct rate_sample { - struct skb_mstamp prior_mstamp; /* starting timestamp for interval */ + u64 prior_mstamp; /* starting timestamp for interval */ u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ s32 delivered; /* number of packets delivered over interval */ long interval_us; /* time for tp->delivered to incr "delivered" */ @@ -955,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name); void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); -int tcp_set_congestion_control(struct sock *sk, const char *name); +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load); +void tcp_reinit_congestion_control(struct sock *sk, + const struct tcp_congestion_ops *ca); u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); @@ -1241,7 +1292,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) if (!sysctl_tcp_slow_start_after_idle || tp->packets_out || ca_ops->cong_control) return; - delta = tcp_time_stamp - tp->lsndtime; + delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) tcp_cwnd_restart(sk, delta); } @@ -1277,6 +1328,7 @@ extern void tcp_openreq_init_rwin(struct request_sock *req, const struct dst_entry *dst); void tcp_enter_memory_pressure(struct sock *sk); +void tcp_leave_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) { @@ -1303,8 +1355,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) { const struct inet_connection_sock *icsk = &tp->inet_conn; - return min_t(u32, tcp_time_stamp - icsk->icsk_ack.lrcvtime, - tcp_time_stamp - tp->rcv_tstamp); + return min_t(u32, tcp_jiffies32 - icsk->icsk_ack.lrcvtime, + tcp_jiffies32 - tp->rcv_tstamp); } static inline int tcp_fin_time(const struct sock *sk) @@ -1395,6 +1447,7 @@ struct tcp_md5sig_key { u8 keylen; u8 family; /* AF_INET or AF_INET6 */ union tcp_md5_addr addr; + u8 prefixlen; u8 key[TCP_MD5SIG_MAXKEYLEN]; struct rcu_head rcu; }; @@ -1438,9 +1491,10 @@ struct tcp_md5sig_pool { int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, - int family, const u8 *newkey, u8 newkeylen, gfp_t gfp); + int family, u8 prefixlen, const u8 *newkey, u8 newkeylen, + gfp_t gfp); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, - int family); + int family, u8 prefixlen); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); @@ -1800,6 +1854,7 @@ struct tcp_sock_af_ops { const struct sock *sk, const struct sk_buff *skb); int (*md5_parse)(struct sock *sk, + int optname, char __user *optval, int optlen); #endif @@ -1825,7 +1880,7 @@ struct tcp_request_sock_ops { struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, const struct request_sock *req); u32 (*init_seq)(const struct sk_buff *skb); - u32 (*init_ts_off)(const struct sk_buff *skb); + u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, @@ -1858,7 +1913,7 @@ void tcp_init(void); /* tcp_recovery.c */ extern void tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, - const struct skb_mstamp *xmit_time); + u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); /* @@ -1945,4 +2000,89 @@ static inline void tcp_listendrop(const struct sock *sk) __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); } +enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer); + +/* + * Interface for adding Upper Level Protocols over TCP + */ + +#define TCP_ULP_NAME_MAX 16 +#define TCP_ULP_MAX 128 +#define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) + +struct tcp_ulp_ops { + struct list_head list; + + /* initialize ulp */ + int (*init)(struct sock *sk); + /* cleanup ulp */ + void (*release)(struct sock *sk); + + char name[TCP_ULP_NAME_MAX]; + struct module *owner; +}; +int tcp_register_ulp(struct tcp_ulp_ops *type); +void tcp_unregister_ulp(struct tcp_ulp_ops *type); +int tcp_set_ulp(struct sock *sk, const char *name); +void tcp_get_available_ulp(char *buf, size_t len); +void tcp_cleanup_ulp(struct sock *sk); + +/* Call BPF_SOCK_OPS program that returns an int. If the return value + * is < 0, then the BPF op failed (for example if the loaded BPF + * program does not support the chosen operation or there is no BPF + * program loaded). + */ +#ifdef CONFIG_BPF +static inline int tcp_call_bpf(struct sock *sk, int op) +{ + struct bpf_sock_ops_kern sock_ops; + int ret; + + if (sk_fullsock(sk)) + sock_owned_by_me(sk); + + memset(&sock_ops, 0, sizeof(sock_ops)); + sock_ops.sk = sk; + sock_ops.op = op; + + ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); + if (ret == 0) + ret = sock_ops.reply; + else + ret = -1; + return ret; +} +#else +static inline int tcp_call_bpf(struct sock *sk, int op) +{ + return -EPERM; +} +#endif + +static inline u32 tcp_timeout_init(struct sock *sk) +{ + int timeout; + + timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT); + + if (timeout <= 0) + timeout = TCP_TIMEOUT_INIT; + return timeout; +} + +static inline u32 tcp_rwnd_init_bpf(struct sock *sk) +{ + int rwnd; + + rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT); + + if (rwnd < 0) + rwnd = 0; + return rwnd; +} + +static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) +{ + return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1); +} #endif /* _TCP_H */ diff --git a/include/net/tls.h b/include/net/tls.h new file mode 100644 index 000000000000..b89d397dd62f --- /dev/null +++ b/include/net/tls.h @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _TLS_OFFLOAD_H +#define _TLS_OFFLOAD_H + +#include <linux/types.h> + +#include <uapi/linux/tls.h> + + +/* Maximum data size carried in a TLS record */ +#define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14) + +#define TLS_HEADER_SIZE 5 +#define TLS_NONCE_OFFSET TLS_HEADER_SIZE + +#define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type) + +#define TLS_RECORD_TYPE_DATA 0x17 + +#define TLS_AAD_SPACE_SIZE 13 + +struct tls_sw_context { + struct crypto_aead *aead_send; + + /* Sending context */ + char aad_space[TLS_AAD_SPACE_SIZE]; + + unsigned int sg_plaintext_size; + int sg_plaintext_num_elem; + struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS]; + + unsigned int sg_encrypted_size; + int sg_encrypted_num_elem; + struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS]; + + /* AAD | sg_plaintext_data | sg_tag */ + struct scatterlist sg_aead_in[2]; + /* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */ + struct scatterlist sg_aead_out[2]; +}; + +enum { + TLS_PENDING_CLOSED_RECORD +}; + +struct tls_context { + union { + struct tls_crypto_info crypto_send; + struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128; + }; + + void *priv_ctx; + + u16 prepend_size; + u16 tag_size; + u16 overhead_size; + u16 iv_size; + char *iv; + u16 rec_seq_size; + char *rec_seq; + + struct scatterlist *partially_sent_record; + u16 partially_sent_offset; + unsigned long flags; + + u16 pending_open_record_frags; + int (*push_pending_record)(struct sock *sk, int flags); + void (*free_resources)(struct sock *sk); + + void (*sk_write_space)(struct sock *sk); + void (*sk_proto_close)(struct sock *sk, long timeout); + + int (*setsockopt)(struct sock *sk, int level, + int optname, char __user *optval, + unsigned int optlen); + int (*getsockopt)(struct sock *sk, int level, + int optname, char __user *optval, + int __user *optlen); +}; + +int wait_on_pending_writer(struct sock *sk, long *timeo); +int tls_sk_query(struct sock *sk, int optname, char __user *optval, + int __user *optlen); +int tls_sk_attach(struct sock *sk, int optname, char __user *optval, + unsigned int optlen); + + +int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx); +int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int tls_sw_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags); +void tls_sw_close(struct sock *sk, long timeout); + +void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); +void tls_icsk_clean_acked(struct sock *sk); + +int tls_push_sg(struct sock *sk, struct tls_context *ctx, + struct scatterlist *sg, u16 first_offset, + int flags); +int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx, + int flags, long *timeo); + +static inline bool tls_is_pending_closed_record(struct tls_context *ctx) +{ + return test_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); +} + +static inline int tls_complete_pending_work(struct sock *sk, + struct tls_context *ctx, + int flags, long *timeo) +{ + int rc = 0; + + if (unlikely(sk->sk_write_pending)) + rc = wait_on_pending_writer(sk, timeo); + + if (!rc && tls_is_pending_closed_record(ctx)) + rc = tls_push_pending_closed_record(sk, ctx, flags, timeo); + + return rc; +} + +static inline bool tls_is_partially_sent_record(struct tls_context *ctx) +{ + return !!ctx->partially_sent_record; +} + +static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) +{ + return tls_ctx->pending_open_record_frags; +} + +static inline void tls_err_abort(struct sock *sk) +{ + sk->sk_err = -EBADMSG; + sk->sk_error_report(sk); +} + +static inline bool tls_bigint_increment(unsigned char *seq, int len) +{ + int i; + + for (i = len - 1; i >= 0; i--) { + ++seq[i]; + if (seq[i] != 0) + break; + } + + return (i == -1); +} + +static inline void tls_advance_record_sn(struct sock *sk, + struct tls_context *ctx) +{ + if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size)) + tls_err_abort(sk); + tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + ctx->iv_size); +} + +static inline void tls_fill_prepend(struct tls_context *ctx, + char *buf, + size_t plaintext_len, + unsigned char record_type) +{ + size_t pkt_len, iv_size = ctx->iv_size; + + pkt_len = plaintext_len + iv_size + ctx->tag_size; + + /* we cover nonce explicit here as well, so buf should be of + * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE + */ + buf[0] = record_type; + buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.version); + buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.version); + /* we can use IV for nonce explicit according to spec */ + buf[3] = pkt_len >> 8; + buf[4] = pkt_len & 0xFF; + memcpy(buf + TLS_NONCE_OFFSET, + ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size); +} + +static inline struct tls_context *tls_get_ctx(const struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ulp_data; +} + +static inline struct tls_sw_context *tls_sw_ctx( + const struct tls_context *tls_ctx) +{ + return (struct tls_sw_context *)tls_ctx->priv_ctx; +} + +static inline struct tls_offload_context *tls_offload_ctx( + const struct tls_context *tls_ctx) +{ + return (struct tls_offload_context *)tls_ctx->priv_ctx; +} + +int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, + unsigned char *record_type); + +#endif /* _TLS_OFFLOAD_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 3391dbd73959..972ce4baab6b 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -249,13 +249,8 @@ void udp_destruct_sock(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); -static inline struct sk_buff * -__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked, - int *off, int *err) -{ - return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - udp_skb_destructor, peeked, off, err); -} +struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, + int noblock, int *peeked, int *off, int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *err) { @@ -307,6 +302,67 @@ struct sock *__udp6_lib_lookup(struct net *net, struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport); +/* UDP uses skb->dev_scratch to cache as much information as possible and avoid + * possibly multiple cache miss on dequeue() + */ +#if BITS_PER_LONG == 64 + +/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on + * cold cache lines at recvmsg time. + * skb->len can be stored on 16 bits since the udp header has been already + * validated and pulled. + */ +struct udp_dev_scratch { + u32 truesize; + u16 len; + bool is_linear; + bool csum_unnecessary; +}; + +static inline unsigned int udp_skb_len(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->len; +} + +static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary; +} + +static inline bool udp_skb_is_linear(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear; +} + +#else +static inline unsigned int udp_skb_len(struct sk_buff *skb) +{ + return skb->len; +} + +static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) +{ + return skb_csum_unnecessary(skb); +} + +static inline bool udp_skb_is_linear(struct sk_buff *skb) +{ + return !skb_is_nonlinear(skb); +} +#endif + +static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, + struct iov_iter *to) +{ + int n, copy = len - off; + + n = copy_to_iter(skb->data + off, copy, to); + if (n == copy) + return 0; + + return -EFAULT; +} + /* * SNMP statistics for UDP and UDP-Lite */ diff --git a/include/net/udplite.h b/include/net/udplite.h index ea340524f99b..b7a18f63d86d 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -26,8 +26,8 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, /* Designate sk as UDP-Lite socket */ static inline int udplite_sk_init(struct sock *sk) { + udp_init_sock(sk); udp_sk(sk)->pcflag = UDPLITE_BIT; - sk->sk_destruct = udp_destruct_sock; return 0; } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 49a59202f85e..3f430e38ab82 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -183,7 +183,7 @@ struct vxlan_sock { struct hlist_node hlist; struct socket *sock; struct hlist_head vni_list[VNI_HASH_SIZE]; - atomic_t refcnt; + refcount_t refcnt; u32 flags; }; @@ -221,9 +221,17 @@ struct vxlan_config { bool no_share; }; +struct vxlan_dev_node { + struct hlist_node hlist; + struct vxlan_dev *vxlan; +}; + /* Pseudo network device */ struct vxlan_dev { - struct hlist_node hlist; /* vni hash table */ + struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ +#if IS_ENABLED(CONFIG_IPV6) + struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ +#endif struct list_head next; /* vxlan's per namespace list */ struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */ #if IS_ENABLED(CONFIG_IPV6) @@ -232,7 +240,6 @@ struct vxlan_dev { struct net_device *dev; struct net *net; /* netns for packet i/o */ struct vxlan_rdst default_dst; /* default destination */ - u32 flags; /* VXLAN_F_* in vxlan.h */ struct timer_list age_timer; spinlock_t hash_lock; @@ -259,6 +266,7 @@ struct vxlan_dev { #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 #define VXLAN_F_COLLECT_METADATA 0x2000 #define VXLAN_F_GPE 0x4000 +#define VXLAN_F_IPV6_LINKLOCAL 0x8000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -273,6 +281,7 @@ struct vxlan_dev { /* Flags that can be set together with VXLAN_F_GPE. */ #define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ VXLAN_F_IPV6 | \ + VXLAN_F_IPV6_LINKLOCAL | \ VXLAN_F_UDP_ZERO_CSUM_TX | \ VXLAN_F_UDP_ZERO_CSUM6_TX | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ diff --git a/include/net/x25.h b/include/net/x25.h index 6d30a01d281d..2609b57bd459 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -11,6 +11,7 @@ #define _X25_H #include <linux/x25.h> #include <linux/slab.h> +#include <linux/refcount.h> #include <net/sock.h> #define X25_ADDR_LEN 16 @@ -129,7 +130,7 @@ struct x25_route { struct x25_address address; unsigned int sigdigits; struct net_device *dev; - atomic_t refcnt; + refcount_t refcnt; }; struct x25_neigh { @@ -141,7 +142,7 @@ struct x25_neigh { unsigned long t20; struct timer_list t20timer; unsigned long global_facil_mask; - atomic_t refcnt; + refcount_t refcnt; }; struct x25_sock { @@ -242,12 +243,12 @@ void x25_link_free(void); /* x25_neigh.c */ static __inline__ void x25_neigh_hold(struct x25_neigh *nb) { - atomic_inc(&nb->refcnt); + refcount_inc(&nb->refcnt); } static __inline__ void x25_neigh_put(struct x25_neigh *nb) { - if (atomic_dec_and_test(&nb->refcnt)) + if (refcount_dec_and_test(&nb->refcnt)) kfree(nb); } @@ -265,12 +266,12 @@ void x25_route_free(void); static __inline__ void x25_route_hold(struct x25_route *rt) { - atomic_inc(&rt->refcnt); + refcount_inc(&rt->refcnt); } static __inline__ void x25_route_put(struct x25_route *rt) { - if (atomic_dec_and_test(&rt->refcnt)) + if (refcount_dec_and_test(&rt->refcnt)) kfree(rt); } diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 62f5a259e597..c0916ab18d32 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -13,6 +13,7 @@ #include <linux/mutex.h> #include <linux/audit.h> #include <linux/slab.h> +#include <linux/refcount.h> #include <net/sock.h> #include <net/dst.h> @@ -137,7 +138,7 @@ struct xfrm_state { struct hlist_node bysrc; struct hlist_node byspi; - atomic_t refcnt; + refcount_t refcnt; spinlock_t lock; struct xfrm_id id; @@ -559,7 +560,7 @@ struct xfrm_policy { /* This lock only affects elements except for entry. */ rwlock_t lock; - atomic_t refcnt; + refcount_t refcnt; struct timer_list timer; struct flow_cache_object flo; @@ -631,7 +632,8 @@ struct xfrm_mgr { u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k); + const struct xfrm_kmaddress *k, + const struct xfrm_encap_tmpl *encap); bool (*is_alive)(const struct km_event *c); }; @@ -814,14 +816,14 @@ static inline void xfrm_audit_state_icvfail(struct xfrm_state *x, static inline void xfrm_pol_hold(struct xfrm_policy *policy) { if (likely(policy != NULL)) - atomic_inc(&policy->refcnt); + refcount_inc(&policy->refcnt); } void xfrm_policy_destroy(struct xfrm_policy *policy); static inline void xfrm_pol_put(struct xfrm_policy *policy) { - if (atomic_dec_and_test(&policy->refcnt)) + if (refcount_dec_and_test(&policy->refcnt)) xfrm_policy_destroy(policy); } @@ -836,18 +838,18 @@ void __xfrm_state_destroy(struct xfrm_state *); static inline void __xfrm_state_put(struct xfrm_state *x) { - atomic_dec(&x->refcnt); + refcount_dec(&x->refcnt); } static inline void xfrm_state_put(struct xfrm_state *x) { - if (atomic_dec_and_test(&x->refcnt)) + if (refcount_dec_and_test(&x->refcnt)) __xfrm_state_destroy(x); } static inline void xfrm_state_hold(struct xfrm_state *x) { - atomic_inc(&x->refcnt); + refcount_inc(&x->refcnt); } static inline bool addr_match(const void *token1, const void *token2, @@ -1028,7 +1030,7 @@ struct xfrm_offload { }; struct sec_path { - atomic_t refcnt; + refcount_t refcnt; int len; int olen; @@ -1049,7 +1051,7 @@ static inline struct sec_path * secpath_get(struct sec_path *sp) { if (sp) - atomic_inc(&sp->refcnt); + refcount_inc(&sp->refcnt); return sp; } @@ -1058,7 +1060,7 @@ void __secpath_destroy(struct sec_path *sp); static inline void secpath_put(struct sec_path *sp) { - if (sp && atomic_dec_and_test(&sp->refcnt)) + if (sp && refcount_dec_and_test(&sp->refcnt)) __secpath_destroy(sp); } @@ -1675,13 +1677,16 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k); + const struct xfrm_kmaddress *k, + const struct xfrm_encap_tmpl *encap); struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, - struct xfrm_migrate *m); + struct xfrm_migrate *m, + struct xfrm_encap_tmpl *encap); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, - struct xfrm_kmaddress *k, struct net *net); + struct xfrm_kmaddress *k, struct net *net, + struct xfrm_encap_tmpl *encap); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); |