summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2018-04-04 16:11:49 -0700
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2018-04-04 16:11:49 -0700
commit664b0bae0b87f69bc9deb098f5e0158b9cf18e04 (patch)
treed5841492b396ff483723b9339c7c11dc33b67688 /include/net
parent567b9b549cfa1cbc202762ae97b5385c29ade1e3 (diff)
parent04bb1719c4de94700056241d4c0fe3c1413f5aff (diff)
Merge branch 'next' into for-linus
Prepare input updates for 4.17 merge window.
Diffstat (limited to 'include/net')
-rw-r--r--include/net/act_api.h56
-rw-r--r--include/net/addrconf.h12
-rw-r--r--include/net/af_rxrpc.h7
-rw-r--r--include/net/af_vsock.h20
-rw-r--r--include/net/arp.h3
-rw-r--r--include/net/bluetooth/bluetooth.h5
-rw-r--r--include/net/bluetooth/hci.h2
-rw-r--r--include/net/bonding.h7
-rw-r--r--include/net/caif/cfpkt.h27
-rw-r--r--include/net/cfg80211.h60
-rw-r--r--include/net/devlink.h115
-rw-r--r--include/net/dn.h7
-rw-r--r--include/net/dn_nsp.h1
-rw-r--r--include/net/dn_route.h1
-rw-r--r--include/net/dsa.h191
-rw-r--r--include/net/dst.h62
-rw-r--r--include/net/dst_metadata.h6
-rw-r--r--include/net/erspan.h240
-rw-r--r--include/net/fib_notifier.h3
-rw-r--r--include/net/flow_dissector.h12
-rw-r--r--include/net/fq.h7
-rw-r--r--include/net/fq_impl.h72
-rw-r--r--include/net/gen_stats.h3
-rw-r--r--include/net/genetlink.h11
-rw-r--r--include/net/gue.h18
-rw-r--r--include/net/inet_connection_sock.h12
-rw-r--r--include/net/inet_ecn.h5
-rw-r--r--include/net/inet_frag.h2
-rw-r--r--include/net/inet_hashtables.h29
-rw-r--r--include/net/inet_sock.h31
-rw-r--r--include/net/inet_timewait_sock.h8
-rw-r--r--include/net/ip.h10
-rw-r--r--include/net/ip6_fib.h93
-rw-r--r--include/net/ip6_route.h16
-rw-r--r--include/net/ip6_tunnel.h4
-rw-r--r--include/net/ip_fib.h3
-rw-r--r--include/net/ip_tunnels.h8
-rw-r--r--include/net/ip_vs.h9
-rw-r--r--include/net/ipv6.h66
-rw-r--r--include/net/iucv/af_iucv.h2
-rw-r--r--include/net/llc_c_ac.h8
-rw-r--r--include/net/mac80211.h28
-rw-r--r--include/net/neighbour.h4
-rw-r--r--include/net/net_namespace.h18
-rw-r--r--include/net/netevent.h1
-rw-r--r--include/net/netfilter/ipv4/nf_conntrack_ipv4.h14
-rw-r--r--include/net/netfilter/ipv6/nf_conntrack_ipv6.h14
-rw-r--r--include/net/netfilter/nf_conntrack.h7
-rw-r--r--include/net/netfilter/nf_conntrack_count.h17
-rw-r--r--include/net/netfilter/nf_conntrack_l4proto.h44
-rw-r--r--include/net/netfilter/nf_flow_table.h126
-rw-r--r--include/net/netfilter/nf_queue.h2
-rw-r--r--include/net/netfilter/nf_tables.h138
-rw-r--r--include/net/netfilter/nf_tables_ipv4.h27
-rw-r--r--include/net/netfilter/nf_tables_ipv6.h29
-rw-r--r--include/net/netns/can.h4
-rw-r--r--include/net/netns/core.h5
-rw-r--r--include/net/netns/ipv4.h41
-rw-r--r--include/net/netns/ipv6.h9
-rw-r--r--include/net/netns/netfilter.h12
-rw-r--r--include/net/netns/nftables.h8
-rw-r--r--include/net/netns/sctp.h5
-rw-r--r--include/net/nsh.h3
-rw-r--r--include/net/phonet/phonet.h6
-rw-r--r--include/net/pkt_cls.h355
-rw-r--r--include/net/pkt_sched.h38
-rw-r--r--include/net/red.h13
-rw-r--r--include/net/regulatory.h2
-rw-r--r--include/net/request_sock.h2
-rw-r--r--include/net/route.h2
-rw-r--r--include/net/rtnetlink.h9
-rw-r--r--include/net/sch_generic.h200
-rw-r--r--include/net/sctp/checksum.h13
-rw-r--r--include/net/sctp/constants.h9
-rw-r--r--include/net/sctp/sctp.h14
-rw-r--r--include/net/sctp/sm.h28
-rw-r--r--include/net/sctp/stream_interleave.h61
-rw-r--r--include/net/sctp/stream_sched.h77
-rw-r--r--include/net/sctp/structs.h145
-rw-r--r--include/net/sctp/ulpevent.h23
-rw-r--r--include/net/sctp/ulpqueue.h10
-rw-r--r--include/net/sock.h116
-rw-r--r--include/net/switchdev.h2
-rw-r--r--include/net/tc_act/tc_csum.h16
-rw-r--r--include/net/tc_act/tc_gact.h5
-rw-r--r--include/net/tc_act/tc_ife.h12
-rw-r--r--include/net/tc_act/tc_mirred.h5
-rw-r--r--include/net/tc_act/tc_sample.h1
-rw-r--r--include/net/tc_act/tc_vlan.h46
-rw-r--r--include/net/tcp.h293
-rw-r--r--include/net/tipc.h62
-rw-r--r--include/net/tls.h27
-rw-r--r--include/net/udp.h2
-rw-r--r--include/net/udplite.h1
-rw-r--r--include/net/vxlan.h4
-rw-r--r--include/net/wext.h4
-rw-r--r--include/net/xdp.h48
-rw-r--r--include/net/xfrm.h90
98 files changed, 2760 insertions, 791 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 1e6df0eb058f..6ed9692f20bd 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -14,7 +14,6 @@
struct tcf_idrinfo {
spinlock_t lock;
struct idr action_idr;
- struct net *net;
};
struct tc_action_ops;
@@ -87,7 +86,7 @@ struct tc_action_ops {
int (*act)(struct sk_buff *, const struct tc_action *,
struct tcf_result *);
int (*dump)(struct sk_buff *, struct tc_action *, int, int);
- void (*cleanup)(struct tc_action *, int bind);
+ void (*cleanup)(struct tc_action *);
int (*lookup)(struct net *, struct tc_action **, u32);
int (*init)(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act, int ovr,
@@ -95,8 +94,7 @@ struct tc_action_ops {
int (*walk)(struct net *, struct sk_buff *,
struct netlink_callback *, int, const struct tc_action_ops *);
void (*stats_update)(struct tc_action *, u64, u32, u64);
- int (*get_dev)(const struct tc_action *a, struct net *net,
- struct net_device **mirred_dev);
+ struct net_device *(*get_dev)(const struct tc_action *a);
};
struct tc_action_net {
@@ -106,7 +104,7 @@ struct tc_action_net {
static inline
int tc_action_net_init(struct tc_action_net *tn,
- const struct tc_action_ops *ops, struct net *net)
+ const struct tc_action_ops *ops)
{
int err = 0;
@@ -114,7 +112,6 @@ int tc_action_net_init(struct tc_action_net *tn,
if (!tn->idrinfo)
return -ENOMEM;
tn->ops = ops;
- tn->idrinfo->net = net;
spin_lock_init(&tn->idrinfo->lock);
idr_init(&tn->idrinfo->action_idr);
return err;
@@ -123,12 +120,19 @@ int tc_action_net_init(struct tc_action_net *tn,
void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
struct tcf_idrinfo *idrinfo);
-static inline void tc_action_net_exit(struct tc_action_net *tn)
+static inline void tc_action_net_exit(struct list_head *net_list,
+ unsigned int id)
{
+ struct net *net;
+
rtnl_lock();
- tcf_idrinfo_destroy(tn->ops, tn->idrinfo);
+ list_for_each_entry(net, net_list, exit_list) {
+ struct tc_action_net *tn = net_generic(net, id);
+
+ tcf_idrinfo_destroy(tn->ops, tn->idrinfo);
+ kfree(tn->idrinfo);
+ }
rtnl_unlock();
- kfree(tn->idrinfo);
}
int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
@@ -180,4 +184,38 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
#endif
}
+typedef int tc_setup_cb_t(enum tc_setup_type type,
+ void *type_data, void *cb_priv);
+
+#ifdef CONFIG_NET_CLS_ACT
+int tc_setup_cb_egdev_register(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv);
+void tc_setup_cb_egdev_unregister(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv);
+int tc_setup_cb_egdev_call(const struct net_device *dev,
+ enum tc_setup_type type, void *type_data,
+ bool err_stop);
+#else
+static inline
+int tc_setup_cb_egdev_register(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ return 0;
+}
+
+static inline
+void tc_setup_cb_egdev_unregister(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+}
+
+static inline
+int tc_setup_cb_egdev_call(const struct net_device *dev,
+ enum tc_setup_type type, void *type_data,
+ bool err_stop)
+{
+ return 0;
+}
+#endif
+
#endif
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 35f5aabd432f..c4185a7b0e90 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -56,11 +56,9 @@ struct prefix_info {
struct in6_validator_info {
struct in6_addr i6vi_addr;
struct inet6_dev *i6vi_dev;
+ struct netlink_ext_ack *extack;
};
-#define IN6_ADDR_HSIZE_SHIFT 4
-#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT)
-
int addrconf_init(void);
void addrconf_cleanup(void);
@@ -95,8 +93,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
u32 banned_flags);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
-int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard);
+bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
@@ -182,7 +180,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout)
*/
int ipv6_addr_label_init(void);
void ipv6_addr_label_cleanup(void);
-void ipv6_addr_label_rtnl_register(void);
+int ipv6_addr_label_rtnl_register(void);
u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr,
int type, int ifindex);
@@ -208,7 +206,7 @@ void ipv6_mc_remap(struct inet6_dev *idev);
void ipv6_mc_init_dev(struct inet6_dev *idev);
void ipv6_mc_destroy_dev(struct inet6_dev *idev);
int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed);
-void addrconf_dad_failure(struct inet6_ifaddr *ifp);
+void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp);
bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
const struct in6_addr *src_addr);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 3ac79150291f..2b3a6eec4570 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -49,17 +49,19 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
unsigned long,
s64,
gfp_t,
- rxrpc_notify_rx_t);
+ rxrpc_notify_rx_t,
+ bool);
int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
struct msghdr *, size_t,
rxrpc_notify_end_tx_t);
int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
- void *, size_t, size_t *, bool, u32 *);
+ void *, size_t, size_t *, bool, u32 *, u16 *);
bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
u32, int, const char *);
void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
struct sockaddr_rxrpc *);
+u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *);
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t);
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
@@ -67,5 +69,6 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
struct sockaddr_rxrpc *, struct key *);
int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *,
enum rxrpc_call_completion *, u32 *);
+u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *);
#endif /* _NET_RXRPC_H */
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index f9fb566e75cf..9324ac2d9ff2 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -22,11 +22,13 @@
#include "vsock_addr.h"
-/* vsock-specific sock->sk_state constants */
-#define VSOCK_SS_LISTEN 255
-
#define LAST_RESERVED_PORT 1023
+#define VSOCK_HASH_SIZE 251
+extern struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
+extern struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
+extern spinlock_t vsock_table_lock;
+
#define vsock_sk(__sk) ((struct vsock_sock *)__sk)
#define sk_vsock(__vsk) (&(__vsk)->sk)
@@ -175,6 +177,18 @@ const struct vsock_transport *vsock_core_get_transport(void);
/**** UTILS ****/
+/* vsock_table_lock must be held */
+static inline bool __vsock_in_bound_table(struct vsock_sock *vsk)
+{
+ return !list_empty(&vsk->bound_table);
+}
+
+/* vsock_table_lock must be held */
+static inline bool __vsock_in_connected_table(struct vsock_sock *vsk)
+{
+ return !list_empty(&vsk->connected_table);
+}
+
void vsock_release_pending(struct sock *pending);
void vsock_add_pending(struct sock *listener, struct sock *pending);
void vsock_remove_pending(struct sock *listener, struct sock *pending);
diff --git a/include/net/arp.h b/include/net/arp.h
index dc8cd47f883b..977aabfcdc03 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -20,6 +20,9 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32
static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key)
{
+ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+ key = INADDR_ANY;
+
return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev);
}
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 020142bb9735..ec9d6bc65855 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -147,6 +147,9 @@ void bt_err_ratelimited(const char *fmt, ...);
#define bt_dev_dbg(hdev, fmt, ...) \
BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+#define bt_dev_err_ratelimited(hdev, fmt, ...) \
+ BT_ERR_RATELIMITED("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+
/* Connection and socket states */
enum {
BT_CONNECTED = 1, /* Equal to TCP_ESTABLISHED to make net code happy */
@@ -268,7 +271,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags);
int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags);
-uint bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait);
int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
int bt_sock_wait_ready(struct sock *sk, unsigned long flags);
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index fe98f0a5bef0..1668211297a9 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -273,7 +273,7 @@ enum {
#define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */
#define HCI_POWER_OFF_TIMEOUT msecs_to_jiffies(5000) /* 5 seconds */
#define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */
-#define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */
+#define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */
/* HCI data types */
#define HCI_COMMAND_PKT 0x01
diff --git a/include/net/bonding.h b/include/net/bonding.h
index b2e68657a216..f801fc940b29 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -330,7 +330,6 @@ static inline void bond_set_active_slave(struct slave *slave)
slave->backup = 0;
bond_queue_slave_event(slave);
bond_lower_state_changed(slave);
- rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
}
}
@@ -340,7 +339,6 @@ static inline void bond_set_backup_slave(struct slave *slave)
slave->backup = 1;
bond_queue_slave_event(slave);
bond_lower_state_changed(slave);
- rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
}
}
@@ -353,7 +351,6 @@ static inline void bond_set_slave_state(struct slave *slave,
slave->backup = slave_state;
if (notify) {
bond_lower_state_changed(slave);
- rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
bond_queue_slave_event(slave);
slave->should_notify = 0;
} else {
@@ -385,7 +382,6 @@ static inline void bond_slave_state_notify(struct bonding *bond)
bond_for_each_slave(bond, tmp, iter) {
if (tmp->should_notify) {
bond_lower_state_changed(tmp);
- rtmsg_ifinfo(RTM_NEWLINK, tmp->dev, 0, GFP_ATOMIC);
tmp->should_notify = 0;
}
}
@@ -596,7 +592,8 @@ void bond_destroy_sysfs(struct bond_net *net);
void bond_prepare_sysfs_group(struct bonding *bond);
int bond_sysfs_slave_add(struct slave *slave);
void bond_sysfs_slave_del(struct slave *slave);
-int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev);
+int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+ struct netlink_ext_ack *extack);
int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb);
int bond_set_carrier(struct bonding *bond);
diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h
index fe328c52c46b..801489bb14c3 100644
--- a/include/net/caif/cfpkt.h
+++ b/include/net/caif/cfpkt.h
@@ -32,6 +32,33 @@ void cfpkt_destroy(struct cfpkt *pkt);
*/
int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len);
+static inline u8 cfpkt_extr_head_u8(struct cfpkt *pkt)
+{
+ u8 tmp;
+
+ cfpkt_extr_head(pkt, &tmp, 1);
+
+ return tmp;
+}
+
+static inline u16 cfpkt_extr_head_u16(struct cfpkt *pkt)
+{
+ __le16 tmp;
+
+ cfpkt_extr_head(pkt, &tmp, 2);
+
+ return le16_to_cpu(tmp);
+}
+
+static inline u32 cfpkt_extr_head_u32(struct cfpkt *pkt)
+{
+ __le32 tmp;
+
+ cfpkt_extr_head(pkt, &tmp, 4);
+
+ return le32_to_cpu(tmp);
+}
+
/*
* Peek header from packet.
* Reads data from packet without changing packet.
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f12fa5245a45..81174f9b8d14 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -815,6 +815,8 @@ struct cfg80211_csa_settings {
u8 count;
};
+#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
+
/**
* struct iface_combination_params - input parameters for interface combinations
*
@@ -1773,6 +1775,8 @@ enum cfg80211_signal_type {
* by %parent_bssid.
* @parent_bssid: the BSS according to which %parent_tsf is set. This is set to
* the BSS that requested the scan in which the beacon/probe was received.
+ * @chains: bitmask for filled values in @chain_signal.
+ * @chain_signal: per-chain signal strength of last received BSS in dBm.
*/
struct cfg80211_inform_bss {
struct ieee80211_channel *chan;
@@ -1781,6 +1785,8 @@ struct cfg80211_inform_bss {
u64 boottime_ns;
u64 parent_tsf;
u8 parent_bssid[ETH_ALEN] __aligned(2);
+ u8 chains;
+ s8 chain_signal[IEEE80211_MAX_CHAINS];
};
/**
@@ -1824,6 +1830,8 @@ struct cfg80211_bss_ies {
* that holds the beacon data. @beacon_ies is still valid, of course, and
* points to the same data as hidden_beacon_bss->beacon_ies in that case.
* @signal: signal strength value (type depends on the wiphy's signal_type)
+ * @chains: bitmask for filled values in @chain_signal.
+ * @chain_signal: per-chain signal strength of last received BSS in dBm.
* @priv: private area for driver use, has at least wiphy->bss_priv_size bytes
*/
struct cfg80211_bss {
@@ -1842,6 +1850,8 @@ struct cfg80211_bss {
u16 capability;
u8 bssid[ETH_ALEN];
+ u8 chains;
+ s8 chain_signal[IEEE80211_MAX_CHAINS];
u8 priv[0] __aligned(sizeof(void *));
};
@@ -2021,6 +2031,9 @@ struct cfg80211_disassoc_request {
* @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask
* will be used in ht_capa. Un-supported values will be ignored.
* @ht_capa_mask: The bits of ht_capa which are to be used.
+ * @wep_keys: static WEP keys, if not NULL points to an array of
+ * CFG80211_MAX_WEP_KEYS WEP keys
+ * @wep_tx_key: key index (0..3) of the default TX static WEP key
*/
struct cfg80211_ibss_params {
const u8 *ssid;
@@ -2037,6 +2050,8 @@ struct cfg80211_ibss_params {
int mcast_rate[NUM_NL80211_BANDS];
struct ieee80211_ht_cap ht_capa;
struct ieee80211_ht_cap ht_capa_mask;
+ struct key_params *wep_keys;
+ int wep_tx_key;
};
/**
@@ -3226,7 +3241,6 @@ struct cfg80211_ops {
* @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
* @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
* auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
- * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans.
* @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the
* firmware.
* @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP.
@@ -4347,19 +4361,6 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
}
/**
- * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11
- * @skb: the 802.3 frame
- * @addr: the device MAC address
- * @iftype: the virtual interface type
- * @bssid: the network bssid (used only for iftype STATION and ADHOC)
- * @qos: build 802.11 QoS data frame
- * Return: 0 on success, or a negative error code.
- */
-int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
- enum nl80211_iftype iftype, const u8 *bssid,
- bool qos);
-
-/**
* ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame
*
* Decode an IEEE 802.11 A-MSDU and convert it to a list of 802.3 frames.
@@ -5441,9 +5442,6 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid,
* @req_ie_len: association request IEs length
* @resp_ie: association response IEs (may be %NULL)
* @resp_ie_len: assoc response IEs length
- * @authorized: true if the 802.1X authentication was done by the driver or is
- * not needed (e.g., when Fast Transition protocol was used), false
- * otherwise. Ignored for networks that don't use 802.1X authentication.
*/
struct cfg80211_roam_info {
struct ieee80211_channel *channel;
@@ -5453,7 +5451,6 @@ struct cfg80211_roam_info {
size_t req_ie_len;
const u8 *resp_ie;
size_t resp_ie_len;
- bool authorized;
};
/**
@@ -5478,6 +5475,23 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
gfp_t gfp);
/**
+ * cfg80211_port_authorized - notify cfg80211 of successful security association
+ *
+ * @dev: network device
+ * @bssid: the BSSID of the AP
+ * @gfp: allocation flags
+ *
+ * This function should be called by a driver that supports 4 way handshake
+ * offload after a security association was successfully established (i.e.,
+ * the 4 way handshake was completed successfully). The call to this function
+ * should be preceded with a call to cfg80211_connect_result(),
+ * cfg80211_connect_done(), cfg80211_connect_bss() or cfg80211_roamed() to
+ * indicate the 802.11 association.
+ */
+void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
+ gfp_t gfp);
+
+/**
* cfg80211_disconnected - notify cfg80211 that connection was dropped
*
* @dev: network device
@@ -5576,7 +5590,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
* cfg80211_rx_mgmt - notification of received, unprocessed management frame
* @wdev: wireless device receiving the frame
* @freq: Frequency on which the frame was received in MHz
- * @sig_dbm: signal strength in mBm, or 0 if unknown
+ * @sig_dbm: signal strength in dBm, or 0 if unknown
* @buf: Management frame (header + body)
* @len: length of the frame data
* @flags: flags, as defined in enum nl80211_rxmgmt_flags
@@ -5755,7 +5769,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
* @frame: the frame
* @len: length of the frame
* @freq: frequency the frame was received on
- * @sig_dbm: signal strength in mBm, or 0 if unknown
+ * @sig_dbm: signal strength in dBm, or 0 if unknown
*
* Use this function to report to userspace when a beacon was
* received. It is not useful to call this when there is no
@@ -5934,7 +5948,8 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
* @ies: the IE buffer
* @ielen: the length of the IE buffer
* @ids: an array with element IDs that are allowed before
- * the split
+ * the split. A WLAN_EID_EXTENSION value means that the next
+ * EID in the list is a sub-element of the EXTENSION IE.
* @n_ids: the size of the element ID array
* @after_ric: array IE types that come after the RIC element
* @n_after_ric: size of the @after_ric array
@@ -5965,7 +5980,8 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
* @ies: the IE buffer
* @ielen: the length of the IE buffer
* @ids: an array with element IDs that are allowed before
- * the split
+ * the split. A WLAN_EID_EXTENSION value means that the next
+ * EID in the list is a sub-element of the EXTENSION IE.
* @n_ids: the size of the element ID array
* @offset: offset where to start splitting in the buffer
*
diff --git a/include/net/devlink.h b/include/net/devlink.h
index b9654e133599..6545b03e97f7 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -26,10 +26,12 @@ struct devlink {
struct list_head port_list;
struct list_head sb_list;
struct list_head dpipe_table_list;
+ struct list_head resource_list;
struct devlink_dpipe_headers *dpipe_headers;
const struct devlink_ops *ops;
struct device *dev;
possible_net_t _net;
+ struct mutex lock;
char priv[0] __aligned(NETDEV_ALIGN);
};
@@ -181,6 +183,9 @@ struct devlink_dpipe_table_ops;
* @counters_enabled: indicates if counters are active
* @counter_control_extern: indicates if counter control is in dpipe or
* external tool
+ * @resource_valid: Indicate that the resource id is valid
+ * @resource_id: relative resource this table is related to
+ * @resource_units: number of resource's unit consumed per table's entry
* @table_ops: table operations
* @rcu: rcu
*/
@@ -190,6 +195,9 @@ struct devlink_dpipe_table {
const char *name;
bool counters_enabled;
bool counter_control_extern;
+ bool resource_valid;
+ u64 resource_id;
+ u64 resource_units;
struct devlink_dpipe_table_ops *table_ops;
struct rcu_head rcu;
};
@@ -223,7 +231,63 @@ struct devlink_dpipe_headers {
unsigned int headers_count;
};
+/**
+ * struct devlink_resource_ops - resource ops
+ * @occ_get: get the occupied size
+ * @size_validate: validate the size of the resource before update, reload
+ * is needed for changes to take place
+ */
+struct devlink_resource_ops {
+ u64 (*occ_get)(struct devlink *devlink);
+ int (*size_validate)(struct devlink *devlink, u64 size,
+ struct netlink_ext_ack *extack);
+};
+
+/**
+ * struct devlink_resource_size_params - resource's size parameters
+ * @size_min: minimum size which can be set
+ * @size_max: maximum size which can be set
+ * @size_granularity: size granularity
+ * @size_unit: resource's basic unit
+ */
+struct devlink_resource_size_params {
+ u64 size_min;
+ u64 size_max;
+ u64 size_granularity;
+ enum devlink_resource_unit unit;
+};
+
+/**
+ * struct devlink_resource - devlink resource
+ * @name: name of the resource
+ * @id: id, per devlink instance
+ * @size: size of the resource
+ * @size_new: updated size of the resource, reload is needed
+ * @size_valid: valid in case the total size of the resource is valid
+ * including its children
+ * @parent: parent resource
+ * @size_params: size parameters
+ * @list: parent list
+ * @resource_list: list of child resources
+ * @resource_ops: resource ops
+ */
+struct devlink_resource {
+ const char *name;
+ u64 id;
+ u64 size;
+ u64 size_new;
+ bool size_valid;
+ struct devlink_resource *parent;
+ struct devlink_resource_size_params *size_params;
+ struct list_head list;
+ struct list_head resource_list;
+ const struct devlink_resource_ops *resource_ops;
+};
+
+#define DEVLINK_RESOURCE_ID_PARENT_TOP 0
+
struct devlink_ops {
+ int (*reload)(struct devlink *devlink);
int (*port_type_set)(struct devlink_port *devlink_port,
enum devlink_port_type port_type);
int (*port_split)(struct devlink *devlink, unsigned int port_index,
@@ -332,6 +396,23 @@ extern struct devlink_dpipe_header devlink_dpipe_header_ethernet;
extern struct devlink_dpipe_header devlink_dpipe_header_ipv4;
extern struct devlink_dpipe_header devlink_dpipe_header_ipv6;
+int devlink_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ bool top_hierarchy,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ struct devlink_resource_size_params *size_params,
+ const struct devlink_resource_ops *resource_ops);
+void devlink_resources_unregister(struct devlink *devlink,
+ struct devlink_resource *resource);
+int devlink_resource_size_get(struct devlink *devlink,
+ u64 resource_id,
+ u64 *p_resource_size);
+int devlink_dpipe_table_resource_set(struct devlink *devlink,
+ const char *table_name, u64 resource_id,
+ u64 resource_units);
+
#else
static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -468,6 +549,40 @@ devlink_dpipe_match_put(struct sk_buff *skb,
return 0;
}
+static inline int
+devlink_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ bool top_hierarchy,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ struct devlink_resource_size_params *size_params,
+ const struct devlink_resource_ops *resource_ops)
+{
+ return 0;
+}
+
+static inline void
+devlink_resources_unregister(struct devlink *devlink,
+ struct devlink_resource *resource)
+{
+}
+
+static inline int
+devlink_resource_size_get(struct devlink *devlink, u64 resource_id,
+ u64 *p_resource_size)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+devlink_dpipe_table_resource_set(struct devlink *devlink,
+ const char *table_name, u64 resource_id,
+ u64 resource_units)
+{
+ return -EOPNOTSUPP;
+}
+
#endif
#endif /* _NET_DEVLINK_H_ */
diff --git a/include/net/dn.h b/include/net/dn.h
index fc0036228d20..56ab0726c641 100644
--- a/include/net/dn.h
+++ b/include/net/dn.h
@@ -123,13 +123,6 @@ struct dn_scp /* Session Control Port */
unsigned long keepalive;
void (*keepalive_fxn)(struct sock *sk);
- /*
- * This stuff is for the fast timer for delayed acks
- */
- struct timer_list delack_timer;
- int delack_pending;
- void (*delack_fxn)(struct sock *sk);
-
};
static inline struct dn_scp *DN_SK(struct sock *sk)
diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h
index 3a3e33d18456..413a15e5339c 100644
--- a/include/net/dn_nsp.h
+++ b/include/net/dn_nsp.h
@@ -17,7 +17,6 @@
void dn_nsp_send_data_ack(struct sock *sk);
void dn_nsp_send_oth_ack(struct sock *sk);
-void dn_nsp_delayed_ack(struct sock *sk);
void dn_send_conn_ack(struct sock *sk);
void dn_send_conn_conf(struct sock *sk, gfp_t gfp);
void dn_nsp_send_disc(struct sock *sk, unsigned char type,
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index 55df9939bca2..342d2503cba5 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -69,6 +69,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev,
*/
struct dn_route {
struct dst_entry dst;
+ struct dn_route __rcu *dn_next;
struct neighbour *n;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index dd44d6ce1097..6cb602dd970c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -29,6 +29,7 @@ struct fixed_phy_status;
enum dsa_tag_protocol {
DSA_TAG_PROTO_NONE = 0,
DSA_TAG_PROTO_BRCM,
+ DSA_TAG_PROTO_BRCM_PREPEND,
DSA_TAG_PROTO_DSA,
DSA_TAG_PROTO_EDSA,
DSA_TAG_PROTO_KSZ,
@@ -116,13 +117,13 @@ struct dsa_switch_tree {
struct raw_notifier_head nh;
/* Tree identifier */
- u32 tree;
+ unsigned int index;
/* Number of switches attached to this tree */
struct kref refcount;
/* Has this tree been applied to the hardware? */
- bool applied;
+ bool setup;
/*
* Configuration data for the platform device that owns
@@ -130,11 +131,6 @@ struct dsa_switch_tree {
*/
struct dsa_platform_data *pd;
- /* Copy of tag_ops->rcv for faster access in hot path */
- struct sk_buff * (*rcv)(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt);
-
/*
* The switch port to which the CPU is attached.
*/
@@ -144,12 +140,6 @@ struct dsa_switch_tree {
* Data for the individual switch chips.
*/
struct dsa_switch *ds[DSA_MAX_SWITCHES];
-
- /*
- * Tagging protocol operations for adding and removing an
- * encapsulation tag.
- */
- const struct dsa_device_ops *tag_ops;
};
/* TC matchall action types, only mirroring for now */
@@ -175,11 +165,33 @@ struct dsa_mall_tc_entry {
struct dsa_port {
+ /* A CPU port is physically connected to a master device.
+ * A user port exposed to userspace has a slave device.
+ */
+ union {
+ struct net_device *master;
+ struct net_device *slave;
+ };
+
+ /* CPU port tagging operations used by master or slave devices */
+ const struct dsa_device_ops *tag_ops;
+
+ /* Copies for faster access in master receive hot path */
+ struct dsa_switch_tree *dst;
+ struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt);
+
+ enum {
+ DSA_PORT_TYPE_UNUSED = 0,
+ DSA_PORT_TYPE_CPU,
+ DSA_PORT_TYPE_DSA,
+ DSA_PORT_TYPE_USER,
+ } type;
+
struct dsa_switch *ds;
unsigned int index;
const char *name;
- struct dsa_port *cpu_dp;
- struct net_device *netdev;
+ const struct dsa_port *cpu_dp;
struct device_node *dn;
unsigned int ageing_time;
u8 stp_state;
@@ -188,7 +200,6 @@ struct dsa_port {
/*
* Original copy of the master netdev ethtool_ops
*/
- struct ethtool_ops ethtool_ops;
const struct ethtool_ops *orig_ethtool_ops;
};
@@ -199,7 +210,7 @@ struct dsa_switch {
* Parent switch tree, and switch index.
*/
struct dsa_switch_tree *dst;
- int index;
+ unsigned int index;
/* Listener for switch fabric events */
struct notifier_block nb;
@@ -230,9 +241,6 @@ struct dsa_switch {
/*
* Slave mii_bus and devices for the individual ports.
*/
- u32 dsa_port_mask;
- u32 cpu_port_mask;
- u32 enabled_port_mask;
u32 phys_mii_mask;
struct mii_bus *slave_mii_bus;
@@ -251,51 +259,81 @@ struct dsa_switch {
struct dsa_port ports[];
};
+static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p)
+{
+ return &ds->ports[p];
+}
+
+static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p)
+{
+ return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED;
+}
+
static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
{
- return !!(ds->cpu_port_mask & (1 << p));
+ return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_CPU;
}
static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p)
{
- return !!((ds->dsa_port_mask) & (1 << p));
+ return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_DSA;
}
-static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p)
+static inline bool dsa_is_user_port(struct dsa_switch *ds, int p)
{
- return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p);
+ return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_USER;
}
-static inline u8 dsa_upstream_port(struct dsa_switch *ds)
+static inline u32 dsa_user_ports(struct dsa_switch *ds)
{
- struct dsa_switch_tree *dst = ds->dst;
+ u32 mask = 0;
+ int p;
- /*
- * If this is the root switch (i.e. the switch that connects
- * to the CPU), return the cpu port number on this switch.
- * Else return the (DSA) port number that connects to the
- * switch that is one hop closer to the cpu.
- */
- if (dst->cpu_dp->ds == ds)
- return dst->cpu_dp->index;
+ for (p = 0; p < ds->num_ports; p++)
+ if (dsa_is_user_port(ds, p))
+ mask |= BIT(p);
+
+ return mask;
+}
+
+/* Return the local port used to reach an arbitrary switch port */
+static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device,
+ int port)
+{
+ if (device == ds->index)
+ return port;
else
- return ds->rtable[dst->cpu_dp->ds->index];
+ return ds->rtable[device];
+}
+
+/* Return the local port used to reach the dedicated CPU port */
+static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port)
+{
+ const struct dsa_port *dp = dsa_to_port(ds, port);
+ const struct dsa_port *cpu_dp = dp->cpu_dp;
+
+ if (!cpu_dp)
+ return port;
+
+ return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index);
}
typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
bool is_static, void *data);
struct dsa_switch_ops {
+#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
/*
* Legacy probing.
*/
const char *(*probe)(struct device *dsa_dev,
struct device *host_dev, int sw_addr,
void **priv);
+#endif
- enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds);
+ enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds,
+ int port);
int (*setup)(struct dsa_switch *ds);
- int (*set_addr)(struct dsa_switch *ds, u8 *addr);
u32 (*get_phy_flags)(struct dsa_switch *ds, int port);
/*
@@ -382,12 +420,10 @@ struct dsa_switch_ops {
*/
int (*port_vlan_filtering)(struct dsa_switch *ds, int port,
bool vlan_filtering);
- int (*port_vlan_prepare)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans);
- void (*port_vlan_add)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans);
+ int (*port_vlan_prepare)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan);
+ void (*port_vlan_add)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan);
int (*port_vlan_del)(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan);
/*
@@ -403,12 +439,10 @@ struct dsa_switch_ops {
/*
* Multicast database
*/
- int (*port_mdb_prepare)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans);
- void (*port_mdb_add)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans);
+ int (*port_mdb_prepare)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb);
+ void (*port_mdb_add)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb);
int (*port_mdb_del)(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb);
/*
@@ -442,11 +476,20 @@ struct dsa_switch_driver {
const struct dsa_switch_ops *ops;
};
+#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
/* Legacy driver registration */
void register_switch_driver(struct dsa_switch_driver *type);
void unregister_switch_driver(struct dsa_switch_driver *type);
struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev);
+#else
+static inline void register_switch_driver(struct dsa_switch_driver *type) { }
+static inline void unregister_switch_driver(struct dsa_switch_driver *type) { }
+static inline struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev)
+{
+ return NULL;
+}
+#endif
struct net_device *dsa_dev_to_net_device(struct device *dev);
/* Keep inline for faster access in hot path */
@@ -475,4 +518,54 @@ static inline int dsa_switch_resume(struct dsa_switch *ds)
}
#endif /* CONFIG_PM_SLEEP */
+enum dsa_notifier_type {
+ DSA_PORT_REGISTER,
+ DSA_PORT_UNREGISTER,
+};
+
+struct dsa_notifier_info {
+ struct net_device *dev;
+};
+
+struct dsa_notifier_register_info {
+ struct dsa_notifier_info info; /* must be first */
+ struct net_device *master;
+ unsigned int port_number;
+ unsigned int switch_number;
+};
+
+static inline struct net_device *
+dsa_notifier_info_to_dev(const struct dsa_notifier_info *info)
+{
+ return info->dev;
+}
+
+#if IS_ENABLED(CONFIG_NET_DSA)
+int register_dsa_notifier(struct notifier_block *nb);
+int unregister_dsa_notifier(struct notifier_block *nb);
+int call_dsa_notifiers(unsigned long val, struct net_device *dev,
+ struct dsa_notifier_info *info);
+#else
+static inline int register_dsa_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline int unregister_dsa_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
+ struct dsa_notifier_info *info)
+{
+ return NOTIFY_DONE;
+}
+#endif
+
+/* Broadcom tag specific helpers to insert and extract queue/port number */
+#define BRCM_TAG_SET_PORT_QUEUE(p, q) ((p) << 8 | q)
+#define BRCM_TAG_GET_PORT(v) ((v) >> 8)
+#define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
+
#endif
diff --git a/include/net/dst.h b/include/net/dst.h
index 694c2e6ae618..c63d2c37f6e9 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -34,13 +34,9 @@ struct sk_buff;
struct dst_entry {
struct net_device *dev;
- struct rcu_head rcu_head;
- struct dst_entry *child;
struct dst_ops *ops;
unsigned long _metrics;
unsigned long expires;
- struct dst_entry *path;
- struct dst_entry *from;
#ifdef CONFIG_XFRM
struct xfrm_state *xfrm;
#else
@@ -59,8 +55,6 @@ struct dst_entry {
#define DST_XFRM_QUEUE 0x0040
#define DST_METADATA 0x0080
- short error;
-
/* A non-zero value of dst->obsolete forces by-hand validation
* of the route entry. Positive values are set by the generic
* dst layer to indicate that the entry has been forcefully
@@ -76,35 +70,24 @@ struct dst_entry {
#define DST_OBSOLETE_KILL -2
unsigned short header_len; /* more space at head required */
unsigned short trailer_len; /* space to reserve at tail */
- unsigned short __pad3;
-
-#ifdef CONFIG_IP_ROUTE_CLASSID
- __u32 tclassid;
-#else
- __u32 __pad2;
-#endif
-#ifdef CONFIG_64BIT
- /*
- * Align __refcnt to a 64 bytes alignment
- * (L1_CACHE_SIZE would be too much)
- */
- long __pad_to_align_refcnt[2];
-#endif
/*
* __refcnt wants to be on a different cache line from
* input/output/ops or performance tanks badly
*/
- atomic_t __refcnt; /* client references */
+#ifdef CONFIG_64BIT
+ atomic_t __refcnt; /* 64-bit offset 64 */
+#endif
int __use;
unsigned long lastuse;
struct lwtunnel_state *lwtstate;
- union {
- struct dst_entry *next;
- struct rtable __rcu *rt_next;
- struct rt6_info *rt6_next;
- struct dn_route __rcu *dn_next;
- };
+ struct rcu_head rcu_head;
+ short error;
+ short __pad;
+ __u32 tclassid;
+#ifndef CONFIG_64BIT
+ atomic_t __refcnt; /* 32-bit offset 64 */
+#endif
};
struct dst_metrics {
@@ -250,23 +233,24 @@ static inline void dst_hold(struct dst_entry *dst)
{
/*
* If your kernel compilation stops here, please check
- * __pad_to_align_refcnt declaration in struct dst_entry
+ * the placement of __refcnt in struct dst_entry
*/
BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63);
WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0);
}
-static inline void dst_use(struct dst_entry *dst, unsigned long time)
+static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
{
- dst_hold(dst);
- dst->__use++;
- dst->lastuse = time;
+ if (unlikely(time != dst->lastuse)) {
+ dst->__use++;
+ dst->lastuse = time;
+ }
}
-static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
+static inline void dst_hold_and_use(struct dst_entry *dst, unsigned long time)
{
- dst->__use++;
- dst->lastuse = time;
+ dst_hold(dst);
+ dst_use_noref(dst, time);
}
static inline struct dst_entry *dst_clone(struct dst_entry *dst)
@@ -520,4 +504,12 @@ static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst)
}
#endif
+static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (dst && dst->ops->update_pmtu)
+ dst->ops->update_pmtu(dst, NULL, skb, mtu);
+}
+
#endif /* _NET_DST_H */
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 91bc7bdf6bf5..56cb3c38569a 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -25,7 +25,7 @@ struct metadata_dst {
} u;
};
-static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
+static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb)
{
struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb);
@@ -35,7 +35,8 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
return NULL;
}
-static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb)
+static inline struct ip_tunnel_info *
+skb_tunnel_info(const struct sk_buff *skb)
{
struct metadata_dst *md_dst = skb_metadata_dst(skb);
struct dst_entry *dst;
@@ -87,6 +88,7 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
void metadata_dst_free(struct metadata_dst *);
struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type,
gfp_t flags);
+void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst);
struct metadata_dst __percpu *
metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags);
diff --git a/include/net/erspan.h b/include/net/erspan.h
index ca94fc86865e..d044aa60cc76 100644
--- a/include/net/erspan.h
+++ b/include/net/erspan.h
@@ -15,7 +15,7 @@
* s, Recur, Flags, Version fields only S (bit 03) is set to 1. The
* other fields are set to zero, so only a sequence number follows.
*
- * ERSPAN Type II header (8 octets [42:49])
+ * ERSPAN Version 1 (Type II) header (8 octets [42:49])
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -24,11 +24,31 @@
* | Reserved | Index |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
+ *
+ * ERSPAN Version 2 (Type III) header (12 octets [42:49])
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Ver | VLAN | COS |BSO|T| Session ID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Timestamp |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SGT |P| FT | Hw ID |D|Gra|O|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Platform Specific SubHeader (8 octets, optional)
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Platf ID | Platform Specific Info |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Platform Specific Info |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
* GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB
*/
-#define ERSPAN_VERSION 0x1
+#include <uapi/linux/erspan.h>
+#define ERSPAN_VERSION 0x1 /* ERSPAN type II */
#define VER_MASK 0xf000
#define VLAN_MASK 0x0fff
#define COS_MASK 0xe000
@@ -37,6 +57,19 @@
#define ID_MASK 0x03ff
#define INDEX_MASK 0xfffff
+#define ERSPAN_VERSION2 0x2 /* ERSPAN type III*/
+#define BSO_MASK EN_MASK
+#define SGT_MASK 0xffff0000
+#define P_MASK 0x8000
+#define FT_MASK 0x7c00
+#define HWID_MASK 0x03f0
+#define DIR_MASK 0x0008
+#define GRA_MASK 0x0006
+#define O_MASK 0x0001
+
+#define HWID_OFFSET 4
+#define DIR_OFFSET 3
+
enum erspan_encap_type {
ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */
ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */
@@ -44,18 +77,199 @@ enum erspan_encap_type {
ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */
};
-struct erspan_metadata {
- __be32 index; /* type II */
-};
+#define ERSPAN_V1_MDSIZE 4
+#define ERSPAN_V2_MDSIZE 8
-struct erspanhdr {
- __be16 ver_vlan;
-#define VER_OFFSET 12
- __be16 session_id;
-#define COS_OFFSET 13
-#define EN_OFFSET 11
-#define T_OFFSET 10
- struct erspan_metadata md;
+struct erspan_base_hdr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 vlan_upper:4,
+ ver:4;
+ __u8 vlan:8;
+ __u8 session_id_upper:2,
+ t:1,
+ en:2,
+ cos:3;
+ __u8 session_id:8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 ver: 4,
+ vlan_upper:4;
+ __u8 vlan:8;
+ __u8 cos:3,
+ en:2,
+ t:1,
+ session_id_upper:2;
+ __u8 session_id:8;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
};
+static inline void set_session_id(struct erspan_base_hdr *ershdr, u16 id)
+{
+ ershdr->session_id = id & 0xff;
+ ershdr->session_id_upper = (id >> 8) & 0x3;
+}
+
+static inline u16 get_session_id(const struct erspan_base_hdr *ershdr)
+{
+ return (ershdr->session_id_upper << 8) + ershdr->session_id;
+}
+
+static inline void set_vlan(struct erspan_base_hdr *ershdr, u16 vlan)
+{
+ ershdr->vlan = vlan & 0xff;
+ ershdr->vlan_upper = (vlan >> 8) & 0xf;
+}
+
+static inline u16 get_vlan(const struct erspan_base_hdr *ershdr)
+{
+ return (ershdr->vlan_upper << 8) + ershdr->vlan;
+}
+
+static inline void set_hwid(struct erspan_md2 *md2, u8 hwid)
+{
+ md2->hwid = hwid & 0xf;
+ md2->hwid_upper = (hwid >> 4) & 0x3;
+}
+
+static inline u8 get_hwid(const struct erspan_md2 *md2)
+{
+ return (md2->hwid_upper << 4) + md2->hwid;
+}
+
+static inline int erspan_hdr_len(int version)
+{
+ return sizeof(struct erspan_base_hdr) +
+ (version == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE);
+}
+
+static inline u8 tos_to_cos(u8 tos)
+{
+ u8 dscp, cos;
+
+ dscp = tos >> 2;
+ cos = dscp >> 3;
+ return cos;
+}
+
+static inline void erspan_build_header(struct sk_buff *skb,
+ u32 id, u32 index,
+ bool truncate, bool is_ipv4)
+{
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ enum erspan_encap_type enc_type;
+ struct erspan_base_hdr *ershdr;
+ struct qtag_prefix {
+ __be16 eth_type;
+ __be16 tci;
+ } *qp;
+ u16 vlan_tci = 0;
+ u8 tos;
+ __be32 *idx;
+
+ tos = is_ipv4 ? ip_hdr(skb)->tos :
+ (ipv6_hdr(skb)->priority << 4) +
+ (ipv6_hdr(skb)->flow_lbl[0] >> 4);
+
+ enc_type = ERSPAN_ENCAP_NOVLAN;
+
+ /* If mirrored packet has vlan tag, extract tci and
+ * perserve vlan header in the mirrored frame.
+ */
+ if (eth->h_proto == htons(ETH_P_8021Q)) {
+ qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
+ vlan_tci = ntohs(qp->tci);
+ enc_type = ERSPAN_ENCAP_INFRAME;
+ }
+
+ skb_push(skb, sizeof(*ershdr) + ERSPAN_V1_MDSIZE);
+ ershdr = (struct erspan_base_hdr *)skb->data;
+ memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V1_MDSIZE);
+
+ /* Build base header */
+ ershdr->ver = ERSPAN_VERSION;
+ ershdr->cos = tos_to_cos(tos);
+ ershdr->en = enc_type;
+ ershdr->t = truncate;
+ set_vlan(ershdr, vlan_tci);
+ set_session_id(ershdr, id);
+
+ /* Build metadata */
+ idx = (__be32 *)(ershdr + 1);
+ *idx = htonl(index & INDEX_MASK);
+}
+
+/* ERSPAN GRA: timestamp granularity
+ * 00b --> granularity = 100 microseconds
+ * 01b --> granularity = 100 nanoseconds
+ * 10b --> granularity = IEEE 1588
+ * Here we only support 100 microseconds.
+ */
+static inline __be32 erspan_get_timestamp(void)
+{
+ u64 h_usecs;
+ ktime_t kt;
+
+ kt = ktime_get_real();
+ h_usecs = ktime_divns(kt, 100 * NSEC_PER_USEC);
+
+ /* ERSPAN base header only has 32-bit,
+ * so it wraps around 4 days.
+ */
+ return htonl((u32)h_usecs);
+}
+
+static inline void erspan_build_header_v2(struct sk_buff *skb,
+ u32 id, u8 direction, u16 hwid,
+ bool truncate, bool is_ipv4)
+{
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ struct erspan_base_hdr *ershdr;
+ struct erspan_md2 *md2;
+ struct qtag_prefix {
+ __be16 eth_type;
+ __be16 tci;
+ } *qp;
+ u16 vlan_tci = 0;
+ u8 gra = 0; /* 100 usec */
+ u8 bso = 0; /* Bad/Short/Oversized */
+ u8 sgt = 0;
+ u8 tos;
+
+ tos = is_ipv4 ? ip_hdr(skb)->tos :
+ (ipv6_hdr(skb)->priority << 4) +
+ (ipv6_hdr(skb)->flow_lbl[0] >> 4);
+
+ /* Unlike v1, v2 does not have En field,
+ * so only extract vlan tci field.
+ */
+ if (eth->h_proto == htons(ETH_P_8021Q)) {
+ qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
+ vlan_tci = ntohs(qp->tci);
+ }
+
+ skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
+ ershdr = (struct erspan_base_hdr *)skb->data;
+ memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
+
+ /* Build base header */
+ ershdr->ver = ERSPAN_VERSION2;
+ ershdr->cos = tos_to_cos(tos);
+ ershdr->en = bso;
+ ershdr->t = truncate;
+ set_vlan(ershdr, vlan_tci);
+ set_session_id(ershdr, id);
+
+ /* Build metadata */
+ md2 = (struct erspan_md2 *)(ershdr + 1);
+ md2->timestamp = erspan_get_timestamp();
+ md2->sgt = htons(sgt);
+ md2->p = 1;
+ md2->ft = 0;
+ md2->dir = direction;
+ md2->gra = gra;
+ md2->o = 0;
+ set_hwid(md2, hwid);
+}
+
#endif
diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h
index 669b9716dc7a..c91ec732afd6 100644
--- a/include/net/fib_notifier.h
+++ b/include/net/fib_notifier.h
@@ -9,6 +9,7 @@
struct fib_notifier_info {
struct net *net;
int family;
+ struct netlink_ext_ack *extack;
};
enum fib_event_type {
@@ -20,6 +21,8 @@ enum fib_event_type {
FIB_EVENT_RULE_DEL,
FIB_EVENT_NH_ADD,
FIB_EVENT_NH_DEL,
+ FIB_EVENT_VIF_ADD,
+ FIB_EVENT_VIF_DEL,
};
struct fib_notifier_ops {
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 22aba321282d..9a074776f70b 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -84,11 +84,11 @@ struct flow_dissector_key_ipv6_addrs {
};
/**
- * struct flow_dissector_key_tipc_addrs:
- * @srcnode: source node address
+ * struct flow_dissector_key_tipc:
+ * @key: source node address combined with selector
*/
-struct flow_dissector_key_tipc_addrs {
- __be32 srcnode;
+struct flow_dissector_key_tipc {
+ __be32 key;
};
/**
@@ -100,7 +100,7 @@ struct flow_dissector_key_addrs {
union {
struct flow_dissector_key_ipv4_addrs v4addrs;
struct flow_dissector_key_ipv6_addrs v6addrs;
- struct flow_dissector_key_tipc_addrs tipcaddrs;
+ struct flow_dissector_key_tipc tipckey;
};
};
@@ -192,7 +192,7 @@ enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */
FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
- FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */
+ FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */
FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */
FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */
FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */
diff --git a/include/net/fq.h b/include/net/fq.h
index 6d8521a30c5c..ac944a686840 100644
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -90,6 +90,13 @@ typedef void fq_skb_free_t(struct fq *,
struct fq_flow *,
struct sk_buff *);
+/* Return %true to filter (drop) the frame. */
+typedef bool fq_skb_filter_t(struct fq *,
+ struct fq_tin *,
+ struct fq_flow *,
+ struct sk_buff *,
+ void *);
+
typedef struct fq_flow *fq_flow_get_default_t(struct fq *,
struct fq_tin *,
int idx,
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index ac1a2317941e..be7c0fab3478 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -12,24 +12,22 @@
/* functions that are embedded into includer */
-static struct sk_buff *fq_flow_dequeue(struct fq *fq,
- struct fq_flow *flow)
+static void fq_adjust_removal(struct fq *fq,
+ struct fq_flow *flow,
+ struct sk_buff *skb)
{
struct fq_tin *tin = flow->tin;
- struct fq_flow *i;
- struct sk_buff *skb;
-
- lockdep_assert_held(&fq->lock);
-
- skb = __skb_dequeue(&flow->queue);
- if (!skb)
- return NULL;
tin->backlog_bytes -= skb->len;
tin->backlog_packets--;
flow->backlog -= skb->len;
fq->backlog--;
fq->memory_usage -= skb->truesize;
+}
+
+static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow)
+{
+ struct fq_flow *i;
if (flow->backlog == 0) {
list_del_init(&flow->backlogchain);
@@ -43,6 +41,21 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq,
list_move_tail(&flow->backlogchain,
&i->backlogchain);
}
+}
+
+static struct sk_buff *fq_flow_dequeue(struct fq *fq,
+ struct fq_flow *flow)
+{
+ struct sk_buff *skb;
+
+ lockdep_assert_held(&fq->lock);
+
+ skb = __skb_dequeue(&flow->queue);
+ if (!skb)
+ return NULL;
+
+ fq_adjust_removal(fq, flow, skb);
+ fq_rejigger_backlog(fq, flow);
return skb;
}
@@ -191,6 +204,45 @@ static void fq_tin_enqueue(struct fq *fq,
}
}
+static void fq_flow_filter(struct fq *fq,
+ struct fq_flow *flow,
+ fq_skb_filter_t filter_func,
+ void *filter_data,
+ fq_skb_free_t free_func)
+{
+ struct fq_tin *tin = flow->tin;
+ struct sk_buff *skb, *tmp;
+
+ lockdep_assert_held(&fq->lock);
+
+ skb_queue_walk_safe(&flow->queue, skb, tmp) {
+ if (!filter_func(fq, tin, flow, skb, filter_data))
+ continue;
+
+ __skb_unlink(skb, &flow->queue);
+ fq_adjust_removal(fq, flow, skb);
+ free_func(fq, tin, flow, skb);
+ }
+
+ fq_rejigger_backlog(fq, flow);
+}
+
+static void fq_tin_filter(struct fq *fq,
+ struct fq_tin *tin,
+ fq_skb_filter_t filter_func,
+ void *filter_data,
+ fq_skb_free_t free_func)
+{
+ struct fq_flow *flow;
+
+ lockdep_assert_held(&fq->lock);
+
+ list_for_each_entry(flow, &tin->new_flows, flowchain)
+ fq_flow_filter(fq, flow, filter_func, filter_data, free_func);
+ list_for_each_entry(flow, &tin->old_flows, flowchain)
+ fq_flow_filter(fq, flow, filter_func, filter_data, free_func);
+}
+
static void fq_flow_reset(struct fq *fq,
struct fq_flow *flow,
fq_skb_free_t free_func)
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 304f7aa9cc01..0304ba2ae353 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -49,6 +49,9 @@ int gnet_stats_copy_rate_est(struct gnet_dump *d,
int gnet_stats_copy_queue(struct gnet_dump *d,
struct gnet_stats_queue __percpu *cpu_q,
struct gnet_stats_queue *q, __u32 qlen);
+void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *cpu_q,
+ const struct gnet_stats_queue *q, __u32 qlen);
int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
int gnet_stats_finish_copy(struct gnet_dump *d);
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 5ac169a735f4..decf6012a401 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -154,15 +154,12 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
/**
* genlmsg_nlhdr - Obtain netlink header from user specified header
* @user_hdr: user header as returned from genlmsg_put()
- * @family: generic netlink family
*
* Returns pointer to netlink header.
*/
-static inline struct nlmsghdr *
-genlmsg_nlhdr(void *user_hdr, const struct genl_family *family)
+static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr)
{
return (struct nlmsghdr *)((char *)user_hdr -
- family->hdrsize -
GENL_HDRLEN -
NLMSG_HDRLEN);
}
@@ -190,16 +187,14 @@ static inline int genlmsg_parse(const struct nlmsghdr *nlh,
* genl_dump_check_consistent - check if sequence is consistent and advertise if not
* @cb: netlink callback structure that stores the sequence number
* @user_hdr: user header as returned from genlmsg_put()
- * @family: generic netlink family
*
* Cf. nl_dump_check_consistent(), this just provides a wrapper to make it
* simpler to use with generic netlink.
*/
static inline void genl_dump_check_consistent(struct netlink_callback *cb,
- void *user_hdr,
- const struct genl_family *family)
+ void *user_hdr)
{
- nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr, family));
+ nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr));
}
/**
diff --git a/include/net/gue.h b/include/net/gue.h
index 2fdb29ca74c2..fdad41469b65 100644
--- a/include/net/gue.h
+++ b/include/net/gue.h
@@ -44,10 +44,10 @@ struct guehdr {
#else
#error "Please fix <asm/byteorder.h>"
#endif
- __u8 proto_ctype;
- __u16 flags;
+ __u8 proto_ctype;
+ __be16 flags;
};
- __u32 word;
+ __be32 word;
};
};
@@ -84,11 +84,10 @@ static inline size_t guehdr_priv_flags_len(__be32 flags)
* if there is an unknown standard or private flags, or the options length for
* the flags exceeds the options length specific in hlen of the GUE header.
*/
-static inline int validate_gue_flags(struct guehdr *guehdr,
- size_t optlen)
+static inline int validate_gue_flags(struct guehdr *guehdr, size_t optlen)
{
+ __be16 flags = guehdr->flags;
size_t len;
- __be32 flags = guehdr->flags;
if (flags & ~GUE_FLAGS_ALL)
return 1;
@@ -101,12 +100,13 @@ static inline int validate_gue_flags(struct guehdr *guehdr,
/* Private flags are last four bytes accounted in
* guehdr_flags_len
*/
- flags = *(__be32 *)((void *)&guehdr[1] + len - GUE_LEN_PRIV);
+ __be32 pflags = *(__be32 *)((void *)&guehdr[1] +
+ len - GUE_LEN_PRIV);
- if (flags & ~GUE_PFLAGS_ALL)
+ if (pflags & ~GUE_PFLAGS_ALL)
return 1;
- len += guehdr_priv_flags_len(flags);
+ len += guehdr_priv_flags_len(pflags);
if (len > optlen)
return 1;
}
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 13e4c89a8231..c1a93ce35e62 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops {
* @icsk_af_ops Operations which are AF_INET{4,6} specific
* @icsk_ulp_ops Pluggable ULP control hook
* @icsk_ulp_data ULP private data
+ * @icsk_listen_portaddr_node hash to the portaddr listener hashtable
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts
* @icsk_pending: Scheduled timer event
@@ -101,6 +102,7 @@ struct inet_connection_sock {
const struct inet_connection_sock_af_ops *icsk_af_ops;
const struct tcp_ulp_ops *icsk_ulp_ops;
void *icsk_ulp_data;
+ struct hlist_node icsk_listen_portaddr_node;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
__u8 icsk_ca_state:6,
icsk_ca_setsockopt:1,
@@ -169,9 +171,9 @@ enum inet_csk_ack_state_t {
};
void inet_csk_init_xmit_timers(struct sock *sk,
- void (*retransmit_handler)(unsigned long),
- void (*delack_handler)(unsigned long),
- void (*keepalive_handler)(unsigned long));
+ void (*retransmit_handler)(struct timer_list *),
+ void (*delack_handler)(struct timer_list *),
+ void (*keepalive_handler)(struct timer_list *));
void inet_csk_clear_xmit_timers(struct sock *sk);
static inline void inet_csk_schedule_ack(struct sock *sk)
@@ -305,10 +307,10 @@ void inet_csk_prepare_forced_close(struct sock *sk);
/*
* LISTEN is a special case for poll..
*/
-static inline unsigned int inet_csk_listen_poll(const struct sock *sk)
+static inline __poll_t inet_csk_listen_poll(const struct sock *sk)
{
return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ?
- (POLLIN | POLLRDNORM) : 0;
+ (EPOLLIN | EPOLLRDNORM) : 0;
}
int inet_csk_listen_start(struct sock *sk, int backlog);
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index d30e4c869438..482a1b705362 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -134,11 +134,6 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
return 1;
}
-static inline void IP6_ECN_clear(struct ipv6hdr *iph)
-{
- *(__be32*)iph &= ~htonl(INET_ECN_MASK << 20);
-}
-
static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
{
dscp &= ~INET_ECN_MASK;
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index a6e4edd8d4a2..351f0c3cdcd9 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -96,7 +96,7 @@ struct inet_frags {
void (*constructor)(struct inet_frag_queue *q,
const void *arg);
void (*destructor)(struct inet_frag_queue *);
- void (*frag_expire)(unsigned long data);
+ void (*frag_expire)(struct timer_list *t);
struct kmem_cache *frags_cachep;
const char *frags_cache_name;
};
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 2dbbbff5e1e3..9141e95529e7 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -111,6 +111,7 @@ struct inet_bind_hashbucket {
*/
struct inet_listen_hashbucket {
spinlock_t lock;
+ unsigned int count;
struct hlist_head head;
};
@@ -132,12 +133,13 @@ struct inet_hashinfo {
/* Ok, let's try this, I give up, we do need a local binding
* TCP hash as well as the others for fast bind/connect.
*/
+ struct kmem_cache *bind_bucket_cachep;
struct inet_bind_hashbucket *bhash;
-
unsigned int bhash_size;
- /* 4 bytes hole on 64 bit */
- struct kmem_cache *bind_bucket_cachep;
+ /* The 2nd listener table hashed by local port and address */
+ unsigned int lhash2_mask;
+ struct inet_listen_hashbucket *lhash2;
/* All the above members are written once at bootup and
* never written again _or_ are predominantly read-access.
@@ -145,14 +147,25 @@ struct inet_hashinfo {
* Now align to a new cache line as all the following members
* might be often dirty.
*/
- /* All sockets in TCP_LISTEN state will be in here. This is the only
- * table where wildcard'd TCP sockets can exist. Hash function here
- * is just local port number.
+ /* All sockets in TCP_LISTEN state will be in listening_hash.
+ * This is the only table where wildcard'd TCP sockets can
+ * exist. listening_hash is only hashed by local port number.
+ * If lhash2 is initialized, the same socket will also be hashed
+ * to lhash2 by port and address.
*/
struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE]
____cacheline_aligned_in_smp;
};
+#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
+ hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)
+
+static inline struct inet_listen_hashbucket *
+inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash)
+{
+ return &h->lhash2[hash & h->lhash2_mask];
+}
+
static inline struct inet_ehash_bucket *inet_ehash_bucket(
struct inet_hashinfo *hashinfo,
unsigned int hash)
@@ -208,6 +221,10 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child);
void inet_put_port(struct sock *sk);
void inet_hashinfo_init(struct inet_hashinfo *h);
+void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
+ unsigned long numentries, int scale,
+ unsigned long low_limit,
+ unsigned long high_limit);
bool inet_ehash_insert(struct sock *sk, struct sock *osk);
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index db8162dd8c0b..0a671c32d6b9 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -17,7 +17,6 @@
#define _INET_SOCK_H
#include <linux/bitops.h>
-#include <linux/kmemcheck.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/jhash.h>
@@ -84,7 +83,6 @@ struct inet_request_sock {
#define ireq_state req.__req_common.skc_state
#define ireq_family req.__req_common.skc_family
- kmemcheck_bitfield_begin(flags);
u16 snd_wscale : 4,
rcv_wscale : 4,
tstamp_ok : 1,
@@ -92,8 +90,8 @@ struct inet_request_sock {
wscale_ok : 1,
ecn_ok : 1,
acked : 1,
- no_srccheck: 1;
- kmemcheck_bitfield_end(flags);
+ no_srccheck: 1,
+ smc_ok : 1;
u32 ir_mark;
union {
struct ip_options_rcu __rcu *ireq_opt;
@@ -293,6 +291,31 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
int inet_sk_rebuild_header(struct sock *sk);
+/**
+ * inet_sk_state_load - read sk->sk_state for lockless contexts
+ * @sk: socket pointer
+ *
+ * Paired with inet_sk_state_store(). Used in places we don't hold socket lock:
+ * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...
+ */
+static inline int inet_sk_state_load(const struct sock *sk)
+{
+ /* state change might impact lockless readers. */
+ return smp_load_acquire(&sk->sk_state);
+}
+
+/**
+ * inet_sk_state_store - update sk->sk_state
+ * @sk: socket pointer
+ * @newstate: new state
+ *
+ * Paired with inet_sk_state_load(). Should be used in contexts where
+ * state change might impact lockless readers.
+ */
+void inet_sk_state_store(struct sock *sk, int newstate);
+
+void inet_sk_set_state(struct sock *sk, int state);
+
static inline unsigned int __inet_ehashfn(const __be32 laddr,
const __u16 lport,
const __be32 faddr,
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 6a75d67a30fd..899495589a7e 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -15,8 +15,6 @@
#ifndef _INET_TIMEWAIT_SOCK_
#define _INET_TIMEWAIT_SOCK_
-
-#include <linux/kmemcheck.h>
#include <linux/list.h>
#include <linux/timer.h>
#include <linux/types.h>
@@ -69,14 +67,12 @@ struct inet_timewait_sock {
/* Socket demultiplex comparisons on incoming packets. */
/* these three are in inet_sock */
__be16 tw_sport;
- kmemcheck_bitfield_begin(flags);
/* And these are ours. */
unsigned int tw_kill : 1,
tw_transparent : 1,
tw_flowlabel : 20,
tw_pad : 2, /* 2 bits hole */
tw_tos : 8;
- kmemcheck_bitfield_end(flags);
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
};
@@ -97,8 +93,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
struct inet_timewait_death_row *dr,
const int state);
-void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
- struct inet_hashinfo *hashinfo);
+void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
+ struct inet_hashinfo *hashinfo);
void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo,
bool rearm);
diff --git a/include/net/ip.h b/include/net/ip.h
index 9896f46cbbf1..746abff9ce51 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -26,14 +26,17 @@
#include <linux/ip.h>
#include <linux/in.h>
#include <linux/skbuff.h>
+#include <linux/jhash.h>
#include <net/inet_sock.h>
#include <net/route.h>
#include <net/snmp.h>
#include <net/flow.h>
#include <net/flow_dissector.h>
+#include <net/netns/hash.h>
#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */
+#define IPV4_MIN_MTU 68 /* RFC 791 */
struct sock;
@@ -521,6 +524,13 @@ static inline unsigned int ipv4_addr_hash(__be32 ip)
return (__force unsigned int) ip;
}
+static inline u32 ipv4_portaddr_hash(const struct net *net,
+ __be32 saddr,
+ unsigned int port)
+{
+ return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
+}
+
bool ip_call_ra_chain(struct sk_buff *skb);
/*
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index d060d711a624..34ec321d6a03 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -29,6 +29,14 @@
#define FIB6_TABLE_HASHSZ 1
#endif
+#define RT6_DEBUG 2
+
+#if RT6_DEBUG >= 3
+#define RT6_TRACE(x...) pr_debug(x)
+#else
+#define RT6_TRACE(x...) do { ; } while (0)
+#endif
+
struct rt6_info;
struct fib6_config {
@@ -60,25 +68,30 @@ struct fib6_config {
};
struct fib6_node {
- struct fib6_node *parent;
- struct fib6_node *left;
- struct fib6_node *right;
+ struct fib6_node __rcu *parent;
+ struct fib6_node __rcu *left;
+ struct fib6_node __rcu *right;
#ifdef CONFIG_IPV6_SUBTREES
- struct fib6_node *subtree;
+ struct fib6_node __rcu *subtree;
#endif
- struct rt6_info *leaf;
+ struct rt6_info __rcu *leaf;
__u16 fn_bit; /* bit key */
__u16 fn_flags;
int fn_sernum;
- struct rt6_info *rr_ptr;
+ struct rt6_info __rcu *rr_ptr;
struct rcu_head rcu;
};
+struct fib6_gc_args {
+ int timeout;
+ int more;
+};
+
#ifndef CONFIG_IPV6_SUBTREES
#define FIB6_SUBTREE(fn) NULL
#else
-#define FIB6_SUBTREE(fn) ((fn)->subtree)
+#define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1))
#endif
struct mx6_config {
@@ -98,8 +111,26 @@ struct rt6key {
struct fib6_table;
+struct rt6_exception_bucket {
+ struct hlist_head chain;
+ int depth;
+};
+
+struct rt6_exception {
+ struct hlist_node hlist;
+ struct rt6_info *rt6i;
+ unsigned long stamp;
+ struct rcu_head rcu;
+};
+
+#define FIB6_EXCEPTION_BUCKET_SIZE_SHIFT 10
+#define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT)
+#define FIB6_MAX_DEPTH 5
+
struct rt6_info {
struct dst_entry dst;
+ struct rt6_info __rcu *rt6_next;
+ struct rt6_info *from;
/*
* Tail elements of dst_entry (__refcnt etc.)
@@ -118,6 +149,7 @@ struct rt6_info {
*/
struct list_head rt6i_siblings;
unsigned int rt6i_nsiblings;
+ atomic_t rt6i_nh_upper_bound;
atomic_t rt6i_ref;
@@ -134,14 +166,27 @@ struct rt6_info {
struct inet6_dev *rt6i_idev;
struct rt6_info * __percpu *rt6i_pcpu;
+ struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
u32 rt6i_metric;
u32 rt6i_pmtu;
/* more non-fragment space at head required */
+ int rt6i_nh_weight;
unsigned short rt6i_nfheader_len;
u8 rt6i_protocol;
+ u8 exception_bucket_flushed:1,
+ should_flush:1,
+ unused:6;
};
+#define for_each_fib6_node_rt_rcu(fn) \
+ for (rt = rcu_dereference((fn)->leaf); rt; \
+ rt = rcu_dereference(rt->rt6_next))
+
+#define for_each_fib6_walker_rt(w) \
+ for (rt = (w)->leaf; rt; \
+ rt = rcu_dereference_protected(rt->rt6_next, 1))
+
static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
{
return ((struct rt6_info *)dst)->rt6i_idev;
@@ -163,11 +208,9 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
{
struct rt6_info *rt;
- for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES);
- rt = (struct rt6_info *)rt->dst.from);
+ for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from);
if (rt && rt != rt0)
rt0->dst.expires = rt->dst.expires;
-
dst_set_expires(&rt0->dst, timeout);
rt0->rt6i_flags |= RTF_EXPIRES;
}
@@ -188,6 +231,8 @@ static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
if (fn) {
*cookie = fn->fn_sernum;
+ /* pairs with smp_wmb() in fib6_update_sernum_upto_root() */
+ smp_rmb();
status = true;
}
@@ -200,8 +245,8 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
u32 cookie = 0;
if (rt->rt6i_flags & RTF_PCPU ||
- (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
- rt = (struct rt6_info *)(rt->dst.from);
+ (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
+ rt = rt->from;
rt6_get_cookie_safe(rt, &cookie);
@@ -248,7 +293,6 @@ struct fib6_walker {
struct fib6_node *root, *node;
struct rt6_info *leaf;
enum fib6_walk_state state;
- bool prune;
unsigned int skip;
unsigned int count;
int (*func)(struct fib6_walker *);
@@ -256,12 +300,15 @@ struct fib6_walker {
};
struct rt6_statistics {
- __u32 fib_nodes;
- __u32 fib_route_nodes;
- __u32 fib_rt_alloc; /* permanent routes */
- __u32 fib_rt_entries; /* rt entries in table */
- __u32 fib_rt_cache; /* cache routes */
- __u32 fib_discarded_routes;
+ __u32 fib_nodes; /* all fib6 nodes */
+ __u32 fib_route_nodes; /* intermediate nodes */
+ __u32 fib_rt_entries; /* rt entries in fib table */
+ __u32 fib_rt_cache; /* cached rt entries in exception table */
+ __u32 fib_discarded_routes; /* total number of routes delete */
+
+ /* The following stats are not protected by any lock */
+ atomic_t fib_rt_alloc; /* total number of routes alloced */
+ atomic_t fib_rt_uncache; /* rt entries in uncached list */
};
#define RTN_TL_ROOT 0x0001
@@ -277,7 +324,7 @@ struct rt6_statistics {
struct fib6_table {
struct hlist_node tb6_hlist;
u32 tb6_id;
- rwlock_t tb6_lock;
+ spinlock_t tb6_lock;
struct fib6_node tb6_root;
struct inet_peer_base tb6_peers;
unsigned int flags;
@@ -325,7 +372,8 @@ struct fib6_node *fib6_lookup(struct fib6_node *root,
struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *daddr, int dst_len,
- const struct in6_addr *saddr, int src_len);
+ const struct in6_addr *saddr, int src_len,
+ bool exact_match);
void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
void *arg);
@@ -358,6 +406,9 @@ void __net_exit fib6_notifier_exit(struct net *net);
unsigned int fib6_tables_seq_read(struct net *net);
int fib6_tables_dump(struct net *net, struct notifier_block *nb);
+void fib6_update_sernum(struct rt6_info *rt);
+void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt);
+
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
int fib6_rules_init(void);
void fib6_rules_cleanup(void);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index bee528135cf1..27d23a65f3cd 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,6 +66,12 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}
+static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
+{
+ return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
+ RTF_GATEWAY;
+}
+
void ip6_route_input(struct sk_buff *skb);
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
@@ -96,6 +102,11 @@ int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack);
int ip6_ins_rt(struct rt6_info *);
int ip6_del_rt(struct rt6_info *);
+void rt6_flush_exceptions(struct rt6_info *rt);
+int rt6_remove_exception_rt(struct rt6_info *rt);
+void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args,
+ unsigned long now);
+
static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
const struct in6_addr *daddr,
unsigned int prefs,
@@ -160,10 +171,13 @@ struct rt6_rtnl_dump_arg {
};
int rt6_dump_route(struct rt6_info *rt, void *p_arg);
-void rt6_ifdown(struct net *net, struct net_device *dev);
void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
+void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
+void rt6_disable_ip(struct net_device *dev, unsigned long event);
+void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
+void rt6_multipath_rebalance(struct rt6_info *rt);
static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
{
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index d66f70f63734..236e40ba06bf 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -36,6 +36,10 @@ struct __ip6_tnl_parm {
__be32 o_key;
__u32 fwmark;
+ __u32 index; /* ERSPAN type II index */
+ __u8 erspan_ver; /* ERSPAN version */
+ __u8 dir; /* direction */
+ __u16 hwid; /* hwid */
};
/* IPv6 tunnel */
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 1a7f7e424320..f80524396c06 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -122,9 +122,6 @@ struct fib_info {
#define fib_rtt fib_metrics->metrics[RTAX_RTT-1]
#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
int fib_nhs;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- int fib_weight;
-#endif
struct rcu_head rcu;
struct fib_nh fib_nh[0];
#define fib_dev fib_nh[0].nh_dev
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index eb2321a13506..1f16773cfd76 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -116,8 +116,11 @@ struct ip_tunnel {
u32 o_seqno; /* The last output seqno */
int tun_hlen; /* Precalculated header length */
- /* This field used only by ERSPAN */
+ /* These four fields used only by ERSPAN */
u32 index; /* ERSPAN type II index */
+ u8 erspan_ver; /* ERSPAN version */
+ u8 dir; /* ERSPAN direction */
+ u16 hwid; /* ERSPAN hardware ID */
struct dst_cache dst_cache;
@@ -259,7 +262,8 @@ int ip_tunnel_get_iflink(const struct net_device *dev);
int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname);
-void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops);
+void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
+ struct rtnl_link_ops *ops);
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 5d08c1950e7d..eb0bec043c96 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -69,8 +69,7 @@ struct ip_vs_iphdr {
};
static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
- int len, void *buffer,
- const struct ip_vs_iphdr *ipvsh)
+ int len, void *buffer)
{
return skb_header_pointer(skb, offset, len, buffer);
}
@@ -984,12 +983,12 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
{
- return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]);
+ return READ_ONCE(ipvs->sysctl_sync_threshold[1]);
}
static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
{
- return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period);
+ return READ_ONCE(ipvs->sysctl_sync_refresh_period);
}
static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
@@ -1014,7 +1013,7 @@ static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
{
- return ACCESS_ONCE(ipvs->sysctl_sync_ports);
+ return READ_ONCE(ipvs->sysctl_sync_ports);
}
static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 6eac5cf8f1e6..8606c9113d3f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -22,6 +22,7 @@
#include <net/flow.h>
#include <net/flow_dissector.h>
#include <net/snmp.h>
+#include <net/netns/hash.h>
#define SIN6_LEN_RFC2133 24
@@ -51,6 +52,46 @@
#define IPV6_DEFAULT_HOPLIMIT 64
#define IPV6_DEFAULT_MCASTHOPS 1
+/* Limits on Hop-by-Hop and Destination options.
+ *
+ * Per RFC8200 there is no limit on the maximum number or lengths of options in
+ * Hop-by-Hop or Destination options other then the packet must fit in an MTU.
+ * We allow configurable limits in order to mitigate potential denial of
+ * service attacks.
+ *
+ * There are three limits that may be set:
+ * - Limit the number of options in a Hop-by-Hop or Destination options
+ * extension header
+ * - Limit the byte length of a Hop-by-Hop or Destination options extension
+ * header
+ * - Disallow unknown options
+ *
+ * The limits are expressed in corresponding sysctls:
+ *
+ * ipv6.sysctl.max_dst_opts_cnt
+ * ipv6.sysctl.max_hbh_opts_cnt
+ * ipv6.sysctl.max_dst_opts_len
+ * ipv6.sysctl.max_hbh_opts_len
+ *
+ * max_*_opts_cnt is the number of TLVs that are allowed for Destination
+ * options or Hop-by-Hop options. If the number is less than zero then unknown
+ * TLVs are disallowed and the number of known options that are allowed is the
+ * absolute value. Setting the value to INT_MAX indicates no limit.
+ *
+ * max_*_opts_len is the length limit in bytes of a Destination or
+ * Hop-by-Hop options extension header. Setting the value to INT_MAX
+ * indicates no length limit.
+ *
+ * If a limit is exceeded when processing an extension header the packet is
+ * silently discarded.
+ */
+
+/* Default limits for Hop-by-Hop and Destination options */
+#define IP6_DEFAULT_MAX_DST_OPTS_CNT 8
+#define IP6_DEFAULT_MAX_HBH_OPTS_CNT 8
+#define IP6_DEFAULT_MAX_DST_OPTS_LEN INT_MAX /* No limit */
+#define IP6_DEFAULT_MAX_HBH_OPTS_LEN INT_MAX /* No limit */
+
/*
* Addr type
*
@@ -291,6 +332,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
int flags);
int ip6_flowlabel_init(void);
void ip6_flowlabel_cleanup(void);
+bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np);
static inline void fl6_sock_release(struct ip6_flowlabel *fl)
{
@@ -300,8 +342,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl)
void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info);
-int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
- struct icmp6hdr *thdr, int len);
+void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+ struct icmp6hdr *thdr, int len);
int ip6_ra_control(struct sock *sk, int sel);
@@ -633,6 +675,22 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
cpu_to_be32(0x0000ffff))) == 0UL;
}
+static inline u32 ipv6_portaddr_hash(const struct net *net,
+ const struct in6_addr *addr6,
+ unsigned int port)
+{
+ unsigned int hash, mix = net_hash_mix(net);
+
+ if (ipv6_addr_any(addr6))
+ hash = jhash_1word(0, mix);
+ else if (ipv6_addr_v4mapped(addr6))
+ hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
+ else
+ hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
+
+ return hash ^ port;
+}
+
/*
* Check for a RFC 4843 ORCHID address
* (Overlay Routable Cryptographic Hash Identifiers)
@@ -727,7 +785,7 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
__be32 ipv6_select_ident(struct net *net,
const struct in6_addr *daddr,
const struct in6_addr *saddr);
-void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb);
+__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb);
int ip6_dst_hoplimit(struct dst_entry *dst);
@@ -912,6 +970,8 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
&inet6_sk(sk)->cork);
}
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
+
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6);
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index 070e93a17c59..f4c21b5a1242 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -153,7 +153,7 @@ struct iucv_sock_list {
atomic_t autobind_name;
};
-unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
+__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
poll_table *wait);
void iucv_sock_link(struct iucv_sock_list *l, struct sock *s);
void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s);
diff --git a/include/net/llc_c_ac.h b/include/net/llc_c_ac.h
index f3be818e73c1..e766300b3e99 100644
--- a/include/net/llc_c_ac.h
+++ b/include/net/llc_c_ac.h
@@ -171,10 +171,10 @@ int llc_conn_ac_rst_sendack_flag(struct sock *sk, struct sk_buff *skb);
int llc_conn_ac_send_i_rsp_as_ack(struct sock *sk, struct sk_buff *skb);
int llc_conn_ac_send_i_as_ack(struct sock *sk, struct sk_buff *skb);
-void llc_conn_busy_tmr_cb(unsigned long timeout_data);
-void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data);
-void llc_conn_ack_tmr_cb(unsigned long timeout_data);
-void llc_conn_rej_tmr_cb(unsigned long timeout_data);
+void llc_conn_busy_tmr_cb(struct timer_list *t);
+void llc_conn_pf_cycle_tmr_cb(struct timer_list *t);
+void llc_conn_ack_tmr_cb(struct timer_list *t);
+void llc_conn_rej_tmr_cb(struct timer_list *t);
void llc_conn_set_p_flag(struct sock *sk, u8 value);
#endif /* LLC_C_AC_H */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 885690fa39c8..c96511fa9198 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1552,6 +1552,9 @@ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif);
* @IEEE80211_KEY_FLAG_RESERVE_TAILROOM: This flag should be set by the
* driver for a key to indicate that sufficient tailroom must always
* be reserved for ICV or MIC, even when HW encryption is enabled.
+ * @IEEE80211_KEY_FLAG_PUT_MIC_SPACE: This flag should be set by the driver for
+ * a TKIP key if it only requires MIC space. Do not set together with
+ * @IEEE80211_KEY_FLAG_GENERATE_MMIC on the same key.
*/
enum ieee80211_key_flags {
IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0),
@@ -1562,6 +1565,7 @@ enum ieee80211_key_flags {
IEEE80211_KEY_FLAG_PUT_IV_SPACE = BIT(5),
IEEE80211_KEY_FLAG_RX_MGMT = BIT(6),
IEEE80211_KEY_FLAG_RESERVE_TAILROOM = BIT(7),
+ IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8),
};
/**
@@ -1593,8 +1597,8 @@ struct ieee80211_key_conf {
u8 icv_len;
u8 iv_len;
u8 hw_key_idx;
- u8 flags;
s8 keyidx;
+ u16 flags;
u8 keylen;
u8 key[0];
};
@@ -2056,6 +2060,9 @@ struct ieee80211_txq {
* The stack will not do fragmentation.
* The callback for @set_frag_threshold should be set as well.
*
+ * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on
+ * TDLS links.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
@@ -2098,6 +2105,7 @@ enum ieee80211_hw_flags {
IEEE80211_HW_TX_FRAG_LIST,
IEEE80211_HW_REPORTS_LOW_ACK,
IEEE80211_HW_SUPPORTS_TX_FRAG,
+ IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
@@ -4141,7 +4149,7 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid);
* The TX headroom reserved by mac80211 for its own tx_status functions.
* This is enough for the radiotap header.
*/
-#define IEEE80211_TX_STATUS_HEADROOM 14
+#define IEEE80211_TX_STATUS_HEADROOM ALIGN(14, 4)
/**
* ieee80211_sta_set_buffered - inform mac80211 about driver-buffered frames
@@ -4470,18 +4478,24 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
* ieee80211_nullfunc_get - retrieve a nullfunc template
* @hw: pointer obtained from ieee80211_alloc_hw().
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @qos_ok: QoS NDP is acceptable to the caller, this should be set
+ * if at all possible
*
* Creates a Nullfunc template which can, for example, uploaded to
* hardware. The template must be updated after association so that correct
* BSSID and address is used.
*
+ * If @qos_ndp is set and the association is to an AP with QoS/WMM, the
+ * returned packet will be QoS NDP.
+ *
* Note: Caller (or hardware) is responsible for setting the
* &IEEE80211_FCTL_PM bit as well as Duration and Sequence Control fields.
*
* Return: The nullfunc template. %NULL on error.
*/
struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+ struct ieee80211_vif *vif,
+ bool qos_ok);
/**
* ieee80211_probereq_get - retrieve a Probe Request template
@@ -5441,8 +5455,14 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
*/
void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn);
+/**
+ * ieee80211_manage_rx_ba_offl - helper to queue an RX BA work
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback
+ * @addr: station mac address
+ * @tid: the rx tid
+ */
void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, const u8 *addr,
- unsigned int bit);
+ unsigned int tid);
/**
* ieee80211_start_rx_ba_session_offl - start a Rx BA session
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index a964366a7ef5..e421f86af043 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -191,8 +191,8 @@ struct neigh_hash_table {
struct neigh_table {
int family;
- int entry_size;
- int key_len;
+ unsigned int entry_size;
+ unsigned int key_len;
__be16 protocol;
__u32 (*hash)(const void *pkey,
const struct net_device *dev,
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 10f99dafd5ac..f306b2aa15a4 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -51,7 +51,7 @@ struct net {
refcount_t passive; /* To decided when the network
* namespace should be freed.
*/
- atomic_t count; /* To decided when the network
+ refcount_t count; /* To decided when the network
* namespace should be shut down.
*/
spinlock_t rules_mod_lock;
@@ -195,7 +195,7 @@ void __put_net(struct net *net);
static inline struct net *get_net(struct net *net)
{
- atomic_inc(&net->count);
+ refcount_inc(&net->count);
return net;
}
@@ -206,14 +206,14 @@ static inline struct net *maybe_get_net(struct net *net)
* exists. If the reference count is zero this
* function fails and returns NULL.
*/
- if (!atomic_inc_not_zero(&net->count))
+ if (!refcount_inc_not_zero(&net->count))
net = NULL;
return net;
}
static inline void put_net(struct net *net)
{
- if (atomic_dec_and_test(&net->count))
+ if (refcount_dec_and_test(&net->count))
__put_net(net);
}
@@ -223,6 +223,11 @@ int net_eq(const struct net *net1, const struct net *net2)
return net1 == net2;
}
+static inline int check_net(const struct net *net)
+{
+ return refcount_read(&net->count) != 0;
+}
+
void net_drop_ns(void *);
#else
@@ -247,6 +252,11 @@ int net_eq(const struct net *net1, const struct net *net2)
return 1;
}
+static inline int check_net(const struct net *net)
+{
+ return 1;
+}
+
#define net_drop_ns NULL
#endif
diff --git a/include/net/netevent.h b/include/net/netevent.h
index f728d9cad170..40e7bab68490 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -26,6 +26,7 @@ enum netevent_notif_type {
NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */
NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
+ NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */
};
int register_netevent_notifier(struct notifier_block *nb);
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 2cc728ef8cd0..73f825732326 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -11,19 +11,19 @@
#define _NF_CONNTRACK_IPV4_H
-extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
+const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
#ifdef CONFIG_NF_CT_PROTO_DCCP
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4;
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4;
#endif
int nf_conntrack_ipv4_compat_init(void);
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 79a335c0d8b8..effa8dfba68c 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -2,19 +2,19 @@
#ifndef _NF_CONNTRACK_IPV6_H
#define _NF_CONNTRACK_IPV6_H
-extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
+extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
#ifdef CONFIG_NF_CT_PROTO_DCCP
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6;
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6;
#endif
#include <linux/sysctl.h>
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 792c3f6d30ce..062dc19b5840 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -213,11 +213,6 @@ static inline bool nf_ct_kill(struct nf_conn *ct)
return nf_ct_delete(ct, 0, 0);
}
-/* These are for NAT. Icky. */
-extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
- enum ip_conntrack_dir dir,
- u32 seq);
-
/* Set all unconfirmed conntrack as dying */
void nf_ct_unconfirmed_destroy(struct net *);
@@ -285,7 +280,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
struct kernel_param;
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
+int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
int nf_conntrack_hash_resize(unsigned int hashsize);
extern struct hlist_nulls_head *nf_conntrack_hash;
diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
new file mode 100644
index 000000000000..adf8db44cf86
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -0,0 +1,17 @@
+#ifndef _NF_CONNTRACK_COUNT_H
+#define _NF_CONNTRACK_COUNT_H
+
+struct nf_conncount_data;
+
+struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
+ unsigned int keylen);
+void nf_conncount_destroy(struct net *net, unsigned int family,
+ struct nf_conncount_data *data);
+
+unsigned int nf_conncount_count(struct net *net,
+ struct nf_conncount_data *data,
+ const u32 *key,
+ unsigned int family,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone);
+#endif
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 510192eb7e9d..a7220eef9aee 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -27,6 +27,9 @@ struct nf_conntrack_l4proto {
/* Resolve clashes on insertion races. */
bool allow_clash;
+ /* protoinfo nlattr size, closes a hole */
+ u16 nlattr_size;
+
/* Try to fill in the third arg: dataoff is offset past network protocol
hdr. Return true if possible. */
bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff,
@@ -43,7 +46,6 @@ struct nf_conntrack_l4proto {
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeouts);
/* Called when a new connection for this protocol found;
@@ -67,8 +69,6 @@ struct nf_conntrack_l4proto {
/* convert protoinfo to nfnetink attributes */
int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
struct nf_conn *ct);
- /* Calculate protoinfo nlattr size */
- int (*nlattr_size)(void);
/* convert nfnetlink attributes to protoinfo */
int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct);
@@ -76,13 +76,11 @@ struct nf_conntrack_l4proto {
int (*tuple_to_nlattr)(struct sk_buff *skb,
const struct nf_conntrack_tuple *t);
/* Calculate tuple nlattr size */
- int (*nlattr_tuple_size)(void);
+ unsigned int (*nlattr_tuple_size)(void);
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
const struct nla_policy *nla_policy;
- size_t nla_size;
-
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
struct {
int (*nlattr_to_obj)(struct nlattr *tb[],
@@ -110,7 +108,7 @@ struct nf_conntrack_l4proto {
};
/* Existing built-in generic protocol */
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
#define MAX_NF_CT_PROTO 256
@@ -127,18 +125,18 @@ int nf_ct_l4proto_pernet_register_one(struct net *net,
void nf_ct_l4proto_pernet_unregister_one(struct net *net,
const struct nf_conntrack_l4proto *proto);
int nf_ct_l4proto_pernet_register(struct net *net,
- struct nf_conntrack_l4proto *const proto[],
+ const struct nf_conntrack_l4proto *const proto[],
unsigned int num_proto);
void nf_ct_l4proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l4proto *const proto[],
+ const struct nf_conntrack_l4proto *const proto[],
unsigned int num_proto);
/* Protocol global registration. */
-int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto);
+int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto);
void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
-int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[],
+int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[],
unsigned int num_proto);
-void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[],
+void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[],
unsigned int num_proto);
/* Generic netlink helpers */
@@ -146,15 +144,27 @@ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple);
int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
-int nf_ct_port_nlattr_tuple_size(void);
+unsigned int nf_ct_port_nlattr_tuple_size(void);
extern const struct nla_policy nf_ct_port_nla_policy[];
#ifdef CONFIG_SYSCTL
-#define LOG_INVALID(net, proto) \
- ((net)->ct.sysctl_log_invalid == (proto) || \
- (net)->ct.sysctl_log_invalid == IPPROTO_RAW)
+__printf(3, 4) __cold
+void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
+ const struct nf_conn *ct,
+ const char *fmt, ...);
+__printf(5, 6) __cold
+void nf_l4proto_log_invalid(const struct sk_buff *skb,
+ struct net *net,
+ u16 pf, u8 protonum,
+ const char *fmt, ...);
#else
-static inline int LOG_INVALID(struct net *net, int proto) { return 0; }
+static inline __printf(5, 6) __cold
+void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net,
+ u16 pf, u8 protonum, const char *fmt, ...) {}
+static inline __printf(3, 4) __cold
+void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
+ const struct nf_conn *ct,
+ const char *fmt, ...) { }
#endif /* CONFIG_SYSCTL */
#endif /*_NF_CONNTRACK_PROTOCOL_H*/
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
new file mode 100644
index 000000000000..833752dd0c58
--- /dev/null
+++ b/include/net/netfilter/nf_flow_table.h
@@ -0,0 +1,126 @@
+#ifndef _NF_FLOW_TABLE_H
+#define _NF_FLOW_TABLE_H
+
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+#include <linux/rcupdate.h>
+#include <net/dst.h>
+
+struct nf_flowtable;
+
+struct nf_flowtable_type {
+ struct list_head list;
+ int family;
+ void (*gc)(struct work_struct *work);
+ void (*free)(struct nf_flowtable *ft);
+ const struct rhashtable_params *params;
+ nf_hookfn *hook;
+ struct module *owner;
+};
+
+struct nf_flowtable {
+ struct rhashtable rhashtable;
+ const struct nf_flowtable_type *type;
+ struct delayed_work gc_work;
+};
+
+enum flow_offload_tuple_dir {
+ FLOW_OFFLOAD_DIR_ORIGINAL,
+ FLOW_OFFLOAD_DIR_REPLY,
+ __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY,
+};
+#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1)
+
+struct flow_offload_tuple {
+ union {
+ struct in_addr src_v4;
+ struct in6_addr src_v6;
+ };
+ union {
+ struct in_addr dst_v4;
+ struct in6_addr dst_v6;
+ };
+ struct {
+ __be16 src_port;
+ __be16 dst_port;
+ };
+
+ int iifidx;
+
+ u8 l3proto;
+ u8 l4proto;
+ u8 dir;
+
+ int oifidx;
+
+ struct dst_entry *dst_cache;
+};
+
+struct flow_offload_tuple_rhash {
+ struct rhash_head node;
+ struct flow_offload_tuple tuple;
+};
+
+#define FLOW_OFFLOAD_SNAT 0x1
+#define FLOW_OFFLOAD_DNAT 0x2
+#define FLOW_OFFLOAD_DYING 0x4
+
+struct flow_offload {
+ struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
+ u32 flags;
+ union {
+ /* Your private driver data here. */
+ u32 timeout;
+ };
+};
+
+#define NF_FLOW_TIMEOUT (30 * HZ)
+
+struct nf_flow_route {
+ struct {
+ struct dst_entry *dst;
+ int ifindex;
+ } tuple[FLOW_OFFLOAD_DIR_MAX];
+};
+
+struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
+ struct nf_flow_route *route);
+void flow_offload_free(struct flow_offload *flow);
+
+int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
+struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
+ struct flow_offload_tuple *tuple);
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ void (*iter)(struct flow_offload *flow, void *data),
+ void *data);
+
+void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
+
+void nf_flow_table_free(struct nf_flowtable *flow_table);
+void nf_flow_offload_work_gc(struct work_struct *work);
+extern const struct rhashtable_params nf_flow_offload_rhash_params;
+
+void flow_offload_dead(struct flow_offload *flow);
+
+int nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
+int nf_flow_dnat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
+
+struct flow_ports {
+ __be16 source, dest;
+};
+
+unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state);
+unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state);
+
+#define MODULE_ALIAS_NF_FLOWTABLE(family) \
+ MODULE_ALIAS("nf-flowtable-" __stringify(family))
+
+#endif /* _FLOW_OFFLOAD_H */
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 814058d0f167..a50a69f5334c 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -25,7 +25,7 @@ struct nf_queue_entry {
struct nf_queue_handler {
int (*outfn)(struct nf_queue_entry *entry,
unsigned int queuenum);
- unsigned int (*nf_hook_drop)(struct net *net);
+ void (*nf_hook_drop)(struct net *net);
};
void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 079c69cae2f6..663b015dace5 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -9,6 +9,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nf_tables.h>
#include <linux/u64_stats_sync.h>
+#include <net/netfilter/nf_flow_table.h>
#include <net/netlink.h>
#define NFT_JUMP_STACK_SIZE 16
@@ -54,8 +55,8 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
pkt->xt.state = state;
}
-static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt,
- struct sk_buff *skb)
+static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
pkt->tprot_set = false;
pkt->tprot = 0;
@@ -63,14 +64,6 @@ static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt,
pkt->xt.fragoff = 0;
}
-static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- nft_set_pktinfo(pkt, skb, state);
- nft_set_pktinfo_proto_unspec(pkt, skb);
-}
-
/**
* struct nft_verdict - nf_tables verdict
*
@@ -150,22 +143,22 @@ static inline void nft_data_debug(const struct nft_data *data)
* struct nft_ctx - nf_tables rule/set context
*
* @net: net namespace
- * @afi: address family info
* @table: the table the chain is contained in
* @chain: the chain the rule is contained in
* @nla: netlink attributes
* @portid: netlink portID of the original message
* @seq: netlink sequence number
+ * @family: protocol family
* @report: notify via unicast netlink message
*/
struct nft_ctx {
struct net *net;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
const struct nlattr * const *nla;
u32 portid;
u32 seq;
+ u8 family;
bool report;
};
@@ -312,6 +305,7 @@ struct nft_expr;
* @flush: deactivate element in the next generation
* @remove: remove element from set
* @walk: iterate over all set elemeennts
+ * @get: get set elements
* @privsize: function to return size of set private data
* @init: initialize private data of new set instance
* @destroy: destroy private data of set instance
@@ -351,6 +345,10 @@ struct nft_set_ops {
void (*walk)(const struct nft_ctx *ctx,
struct nft_set *set,
struct nft_set_iter *iter);
+ void * (*get)(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ unsigned int flags);
unsigned int (*privsize)(const struct nlattr * const nla[],
const struct nft_set_desc *desc);
@@ -376,6 +374,7 @@ void nft_unregister_set(struct nft_set_type *type);
* @list: table set list node
* @bindings: list of set bindings
* @name: name of the set
+ * @handle: unique handle of the set
* @ktype: key type (numeric type defined by userspace, not used in the kernel)
* @dtype: data type (verdict or numeric type defined by userspace)
* @objtype: object type (see NFT_OBJECT_* definitions)
@@ -398,6 +397,7 @@ struct nft_set {
struct list_head list;
struct list_head bindings;
char *name;
+ u64 handle;
u32 ktype;
u32 dtype;
u32 objtype;
@@ -419,6 +419,11 @@ struct nft_set {
__attribute__((aligned(__alignof__(u64))));
};
+static inline bool nft_set_is_anonymous(const struct nft_set *set)
+{
+ return set->flags & NFT_SET_ANONYMOUS;
+}
+
static inline void *nft_set_priv(const struct nft_set *set)
{
return (void *)set->data;
@@ -878,7 +883,7 @@ enum nft_chain_type {
* @family: address family
* @owner: module owner
* @hook_mask: mask of valid hooks
- * @hooks: hookfn overrides
+ * @hooks: array of hook functions
*/
struct nf_chain_type {
const char *name;
@@ -900,8 +905,6 @@ struct nft_stats {
struct u64_stats_sync syncp;
};
-#define NFT_HOOK_OPS_MAX 2
-
/**
* struct nft_base_chain - nf_tables base chain
*
@@ -913,7 +916,7 @@ struct nft_stats {
* @dev_name: device name that this base chain is attached to (if any)
*/
struct nft_base_chain {
- struct nf_hook_ops ops[NFT_HOOK_OPS_MAX];
+ struct nf_hook_ops ops;
const struct nf_chain_type *type;
u8 policy;
u8 flags;
@@ -943,10 +946,13 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
* @chains: chains in the table
* @sets: sets in the table
* @objects: stateful objects in the table
+ * @flowtables: flow tables in the table
* @hgenerator: handle generator state
+ * @handle: table handle
* @use: number of chain references to this table
* @flags: table flag (see enum nft_table_flags)
* @genmask: generation mask
+ * @afinfo: address family info
* @name: name of the table
*/
struct nft_table {
@@ -954,46 +960,16 @@ struct nft_table {
struct list_head chains;
struct list_head sets;
struct list_head objects;
+ struct list_head flowtables;
u64 hgenerator;
+ u64 handle;
u32 use;
- u16 flags:14,
+ u16 family:6,
+ flags:8,
genmask:2;
char *name;
};
-enum nft_af_flags {
- NFT_AF_NEEDS_DEV = (1 << 0),
-};
-
-/**
- * struct nft_af_info - nf_tables address family info
- *
- * @list: used internally
- * @family: address family
- * @nhooks: number of hooks in this family
- * @owner: module owner
- * @tables: used internally
- * @flags: family flags
- * @nops: number of hook ops in this family
- * @hook_ops_init: initialization function for chain hook ops
- * @hooks: hookfn overrides for packet validation
- */
-struct nft_af_info {
- struct list_head list;
- int family;
- unsigned int nhooks;
- struct module *owner;
- struct list_head tables;
- u32 flags;
- unsigned int nops;
- void (*hook_ops_init)(struct nf_hook_ops *,
- unsigned int);
- nf_hookfn *hooks[NF_MAX_HOOKS];
-};
-
-int nft_register_afinfo(struct net *, struct nft_af_info *);
-void nft_unregister_afinfo(struct net *, struct nft_af_info *);
-
int nft_register_chain_type(const struct nf_chain_type *);
void nft_unregister_chain_type(const struct nf_chain_type *);
@@ -1011,9 +987,9 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
* @name: name of this stateful object
* @genmask: generation mask
* @use: number of references to this stateful object
- * @data: object data, layout depends on type
+ * @handle: unique object handle
* @ops: object operations
- * @data: pointer to object data
+ * @data: object data, layout depends on type
*/
struct nft_object {
struct list_head list;
@@ -1021,6 +997,7 @@ struct nft_object {
struct nft_table *table;
u32 genmask:2,
use:30;
+ u64 handle;
/* runtime data below here */
const struct nft_object_ops *ops ____cacheline_aligned;
unsigned char data[]
@@ -1092,6 +1069,46 @@ int nft_register_obj(struct nft_object_type *obj_type);
void nft_unregister_obj(struct nft_object_type *obj_type);
/**
+ * struct nft_flowtable - nf_tables flow table
+ *
+ * @list: flow table list node in table list
+ * @table: the table the flow table is contained in
+ * @name: name of this flow table
+ * @hooknum: hook number
+ * @priority: hook priority
+ * @ops_len: number of hooks in array
+ * @genmask: generation mask
+ * @use: number of references to this flow table
+ * @handle: unique object handle
+ * @data: rhashtable and garbage collector
+ * @ops: array of hooks
+ */
+struct nft_flowtable {
+ struct list_head list;
+ struct nft_table *table;
+ char *name;
+ int hooknum;
+ int priority;
+ int ops_len;
+ u32 genmask:2,
+ use:30;
+ u64 handle;
+ /* runtime data below here */
+ struct nf_hook_ops *ops ____cacheline_aligned;
+ struct nf_flowtable data;
+};
+
+struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
+ const struct nlattr *nla,
+ u8 genmask);
+void nft_flow_table_iterate(struct net *net,
+ void (*iter)(struct nf_flowtable *flowtable, void *data),
+ void *data);
+
+void nft_register_flowtable_type(struct nf_flowtable_type *type);
+void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
+
+/**
* struct nft_traceinfo - nft tracing information and state
*
* @pkt: pktinfo currently processed
@@ -1120,12 +1137,6 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
void nft_trace_notify(struct nft_traceinfo *info);
-#define nft_dereference(p) \
- nfnl_dereference(p, NFNL_SUBSYS_NFTABLES)
-
-#define MODULE_ALIAS_NFT_FAMILY(family) \
- MODULE_ALIAS("nft-afinfo-" __stringify(family))
-
#define MODULE_ALIAS_NFT_CHAIN(family, name) \
MODULE_ALIAS("nft-chain-" __stringify(family) "-" name)
@@ -1165,8 +1176,8 @@ static inline u8 nft_genmask_next(const struct net *net)
static inline u8 nft_genmask_cur(const struct net *net)
{
- /* Use ACCESS_ONCE() to prevent refetching the value for atomicity */
- return 1 << ACCESS_ONCE(net->nft.gencursor);
+ /* Use READ_ONCE() to prevent refetching the value for atomicity */
+ return 1 << READ_ONCE(net->nft.gencursor);
}
#define NFT_GENMASK_ANY ((1 << 0) | (1 << 1))
@@ -1327,4 +1338,11 @@ struct nft_trans_obj {
#define nft_trans_obj(trans) \
(((struct nft_trans_obj *)trans->data)->obj)
+struct nft_trans_flowtable {
+ struct nft_flowtable *flowtable;
+};
+
+#define nft_trans_flowtable(trans) \
+ (((struct nft_trans_flowtable *)trans->data)->flowtable)
+
#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index f0896ba456c4..ed7b511f0a59 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -5,15 +5,11 @@
#include <net/netfilter/nf_tables.h>
#include <net/ip.h>
-static inline void
-nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
struct iphdr *ip;
- nft_set_pktinfo(pkt, skb, state);
-
ip = ip_hdr(pkt->skb);
pkt->tprot_set = true;
pkt->tprot = ip->protocol;
@@ -21,10 +17,8 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
}
-static inline int
-__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
struct iphdr *iph, _iph;
u32 len, thoff;
@@ -52,16 +46,11 @@ __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
return 0;
}
-static inline void
-nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
- nft_set_pktinfo(pkt, skb, state);
- if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0)
- nft_set_pktinfo_proto_unspec(pkt, skb);
+ if (__nft_set_pktinfo_ipv4_validate(pkt, skb) < 0)
+ nft_set_pktinfo_unspec(pkt, skb);
}
-extern struct nft_af_info nft_af_ipv4;
-
#endif
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index b8065b72f56e..dabe6fdb553a 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -5,20 +5,16 @@
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/ipv6.h>
-static inline void
-nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
unsigned int flags = IP6_FH_F_AUTH;
int protohdr, thoff = 0;
unsigned short frag_off;
- nft_set_pktinfo(pkt, skb, state);
-
protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
if (protohdr < 0) {
- nft_set_pktinfo_proto_unspec(pkt, skb);
+ nft_set_pktinfo_unspec(pkt, skb);
return;
}
@@ -28,10 +24,8 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
pkt->xt.fragoff = frag_off;
}
-static inline int
-__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
unsigned int flags = IP6_FH_F_AUTH;
@@ -68,16 +62,11 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
#endif
}
-static inline void
-nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
- nft_set_pktinfo(pkt, skb, state);
- if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0)
- nft_set_pktinfo_proto_unspec(pkt, skb);
+ if (__nft_set_pktinfo_ipv6_validate(pkt, skb) < 0)
+ nft_set_pktinfo_unspec(pkt, skb);
}
-extern struct nft_af_info nft_af_ipv6;
-
#endif
diff --git a/include/net/netns/can.h b/include/net/netns/can.h
index ecf238b8862c..ca9bd9fba5b5 100644
--- a/include/net/netns/can.h
+++ b/include/net/netns/can.h
@@ -8,7 +8,7 @@
#include <linux/spinlock.h>
-struct dev_rcv_lists;
+struct can_dev_rcv_lists;
struct s_stats;
struct s_pstats;
@@ -28,7 +28,7 @@ struct netns_can {
#endif
/* receive filters subscribed for 'all' CAN devices */
- struct dev_rcv_lists *can_rx_alldev_list;
+ struct can_dev_rcv_lists *can_rx_alldev_list;
spinlock_t can_rcvlists_lock;
struct timer_list can_stattimer;/* timer for statistics update */
struct s_stats *can_stats; /* packet statistics */
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 0ad4d0c71228..36c2d998a43c 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -11,7 +11,10 @@ struct netns_core {
int sysctl_somaxconn;
- struct prot_inuse __percpu *inuse;
+#ifdef CONFIG_PROC_FS
+ int __percpu *sock_inuse;
+ struct prot_inuse __percpu *prot_inuse;
+#endif
};
#endif
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 8fcff2837484..44668c29701a 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -37,6 +37,8 @@ struct inet_timewait_death_row {
int sysctl_max_tw_buckets;
};
+struct tcp_fastopen_context;
+
struct netns_ipv4 {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
@@ -53,6 +55,7 @@ struct netns_ipv4 {
struct fib_table __rcu *fib_main;
struct fib_table __rcu *fib_default;
#endif
+ bool fib_has_custom_local_routes;
#ifdef CONFIG_IP_ROUTE_CLASSID
int fib_num_tclassid_users;
#endif
@@ -126,8 +129,43 @@ struct netns_ipv4 {
int sysctl_tcp_sack;
int sysctl_tcp_window_scaling;
int sysctl_tcp_timestamps;
+ int sysctl_tcp_early_retrans;
+ int sysctl_tcp_recovery;
+ int sysctl_tcp_thin_linear_timeouts;
+ int sysctl_tcp_slow_start_after_idle;
+ int sysctl_tcp_retrans_collapse;
+ int sysctl_tcp_stdurg;
+ int sysctl_tcp_rfc1337;
+ int sysctl_tcp_abort_on_overflow;
+ int sysctl_tcp_fack;
+ int sysctl_tcp_max_reordering;
+ int sysctl_tcp_dsack;
+ int sysctl_tcp_app_win;
+ int sysctl_tcp_adv_win_scale;
+ int sysctl_tcp_frto;
+ int sysctl_tcp_nometrics_save;
+ int sysctl_tcp_moderate_rcvbuf;
+ int sysctl_tcp_tso_win_divisor;
+ int sysctl_tcp_workaround_signed_windows;
+ int sysctl_tcp_limit_output_bytes;
+ int sysctl_tcp_challenge_ack_limit;
+ int sysctl_tcp_min_tso_segs;
+ int sysctl_tcp_min_rtt_wlen;
+ int sysctl_tcp_autocorking;
+ int sysctl_tcp_invalid_ratelimit;
+ int sysctl_tcp_pacing_ss_ratio;
+ int sysctl_tcp_pacing_ca_ratio;
+ int sysctl_tcp_wmem[3];
+ int sysctl_tcp_rmem[3];
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
+ int sysctl_tcp_fastopen;
+ const struct tcp_congestion_ops __rcu *tcp_congestion_control;
+ struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
+ spinlock_t tcp_fastopen_ctx_lock;
+ unsigned int sysctl_tcp_fastopen_blackhole_timeout;
+ atomic_t tfo_active_disable_times;
+ unsigned long tfo_active_disable_stamp;
#ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
@@ -163,6 +201,9 @@ struct netns_ipv4 {
struct fib_notifier_ops *notifier_ops;
unsigned int fib_seq; /* protected by rtnl_mutex */
+ struct fib_notifier_ops *ipmr_notifier_ops;
+ unsigned int ipmr_seq; /* protected by rtnl_mutex */
+
atomic_t rt_genid;
};
#endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index dc825a5ddd7f..987cc4569cb8 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -38,6 +38,10 @@ struct netns_sysctl_ipv6 {
int idgen_delay;
int flowlabel_state_ranges;
int flowlabel_reflect;
+ int max_dst_opts_cnt;
+ int max_hbh_opts_cnt;
+ int max_dst_opts_len;
+ int max_hbh_opts_len;
};
struct netns_ipv6 {
@@ -90,6 +94,11 @@ struct netns_ipv6 {
atomic_t fib6_sernum;
struct seg6_pernet_data *seg6_data;
struct fib_notifier_ops *notifier_ops;
+ struct {
+ struct hlist_head head;
+ spinlock_t lock;
+ u32 seq;
+ } ip6addrlbl_table;
};
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index cc00af2ac2d7..ca043342c0eb 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -17,7 +17,17 @@ struct netns_nf {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *nf_log_dir_header;
#endif
- struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+ struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS];
+ struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS];
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
+ struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS];
+#endif
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
+#endif
+#if IS_ENABLED(CONFIG_DECNET)
+ struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
+#endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
bool defrag_ipv4;
#endif
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 4109b5f3010f..48134353411d 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,14 +7,8 @@
struct nft_af_info;
struct netns_nftables {
- struct list_head af_info;
+ struct list_head tables;
struct list_head commit_list;
- struct nft_af_info *ipv4;
- struct nft_af_info *ipv6;
- struct nft_af_info *inet;
- struct nft_af_info *arp;
- struct nft_af_info *bridge;
- struct nft_af_info *netdev;
unsigned int base_seq;
u8 gencursor;
};
diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index ebc813277662..0db7fb3e4e15 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -122,9 +122,12 @@ struct netns_sctp {
/* Flag to indicate if PR-CONFIG is enabled. */
int reconf_enable;
- /* Flag to idicate if SCTP-AUTH is enabled */
+ /* Flag to indicate if SCTP-AUTH is enabled */
int auth_enable;
+ /* Flag to indicate if stream interleave is enabled */
+ int intl_enable;
+
/*
* Policy to control SCTP IPv4 address scoping
* 0 - Disable IPv4 address scoping
diff --git a/include/net/nsh.h b/include/net/nsh.h
index a1eaea20be96..350b1ad11c7f 100644
--- a/include/net/nsh.h
+++ b/include/net/nsh.h
@@ -304,4 +304,7 @@ static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags,
NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
}
+int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh);
+int nsh_pop(struct sk_buff *skb);
+
#endif /* __NET_NSH_H */
diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index 039cc29cb4a8..51e1a2a45d02 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -108,8 +108,10 @@ struct phonet_protocol {
int sock_type;
};
-int phonet_proto_register(unsigned int protocol, struct phonet_protocol *pp);
-void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp);
+int phonet_proto_register(unsigned int protocol,
+ const struct phonet_protocol *pp);
+void phonet_proto_unregister(unsigned int protocol,
+ const struct phonet_protocol *pp);
int phonet_sysctl_init(void);
void phonet_sysctl_exit(void);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 70ca2437740e..87406252f0a3 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -19,22 +19,84 @@ struct tcf_walker {
int register_tcf_proto_ops(struct tcf_proto_ops *ops);
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
+enum tcf_block_binder_type {
+ TCF_BLOCK_BINDER_TYPE_UNSPEC,
+ TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
+ TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
+};
+
+struct tcf_block_ext_info {
+ enum tcf_block_binder_type binder_type;
+ tcf_chain_head_change_t *chain_head_change;
+ void *chain_head_change_priv;
+ u32 block_index;
+};
+
+struct tcf_block_cb;
bool tcf_queue_work(struct work_struct *work);
#ifdef CONFIG_NET_CLS
struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
bool create);
void tcf_chain_put(struct tcf_chain *chain);
+void tcf_block_netif_keep_dst(struct tcf_block *block);
int tcf_block_get(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain);
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+ struct netlink_ext_ack *extack);
+int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei,
+ struct netlink_ext_ack *extack);
void tcf_block_put(struct tcf_block *block);
+void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei);
+
+static inline bool tcf_block_shared(struct tcf_block *block)
+{
+ return block->index;
+}
+
+static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
+{
+ WARN_ON(tcf_block_shared(block));
+ return block->q;
+}
+
+static inline struct net_device *tcf_block_dev(struct tcf_block *block)
+{
+ return tcf_block_q(block)->dev_queue->dev;
+}
+
+void *tcf_block_cb_priv(struct tcf_block_cb *block_cb);
+struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident);
+void tcf_block_cb_incref(struct tcf_block_cb *block_cb);
+unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb);
+struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv);
+int tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv);
+void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb);
+void tcf_block_cb_unregister(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident);
+
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode);
#else
static inline
int tcf_block_get(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain)
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+ struct netlink_ext_ack *extack)
+{
+ return 0;
+}
+
+static inline
+int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei,
+ struct netlink_ext_ack *extack)
{
return 0;
}
@@ -43,6 +105,86 @@ static inline void tcf_block_put(struct tcf_block *block)
{
}
+static inline
+void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
+{
+}
+
+static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
+{
+ return NULL;
+}
+
+static inline struct net_device *tcf_block_dev(struct tcf_block *block)
+{
+ return NULL;
+}
+
+static inline
+int tc_setup_cb_block_register(struct tcf_block *block, tc_setup_cb_t *cb,
+ void *cb_priv)
+{
+ return 0;
+}
+
+static inline
+void tc_setup_cb_block_unregister(struct tcf_block *block, tc_setup_cb_t *cb,
+ void *cb_priv)
+{
+}
+
+static inline
+void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
+{
+ return NULL;
+}
+
+static inline
+struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident)
+{
+ return NULL;
+}
+
+static inline
+void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
+{
+}
+
+static inline
+unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
+{
+ return 0;
+}
+
+static inline
+struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv)
+{
+ return NULL;
+}
+
+static inline
+int tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv)
+{
+ return 0;
+}
+
+static inline
+void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+{
+}
+
+static inline
+void tcf_block_cb_unregister(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident)
+{
+}
+
static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
@@ -57,36 +199,43 @@ __cls_set_class(unsigned long *clp, unsigned long cl)
}
static inline unsigned long
-cls_set_class(struct tcf_proto *tp, unsigned long *clp,
- unsigned long cl)
+cls_set_class(struct Qdisc *q, unsigned long *clp, unsigned long cl)
{
unsigned long old_cl;
-
- tcf_tree_lock(tp);
+
+ sch_tree_lock(q);
old_cl = __cls_set_class(clp, cl);
- tcf_tree_unlock(tp);
-
+ sch_tree_unlock(q);
return old_cl;
}
static inline void
tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base)
{
+ struct Qdisc *q = tp->chain->block->q;
unsigned long cl;
- cl = tp->q->ops->cl_ops->bind_tcf(tp->q, base, r->classid);
- cl = cls_set_class(tp, &r->class, cl);
+ /* Check q as it is not set for shared blocks. In that case,
+ * setting class is not supported.
+ */
+ if (!q)
+ return;
+ cl = q->ops->cl_ops->bind_tcf(q, base, r->classid);
+ cl = cls_set_class(q, &r->class, cl);
if (cl)
- tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
+ q->ops->cl_ops->unbind_tcf(q, cl);
}
static inline void
tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r)
{
+ struct Qdisc *q = tp->chain->block->q;
unsigned long cl;
+ if (!q)
+ return;
if ((cl = __cls_set_class(&r->class, 0)) != 0)
- tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
+ q->ops->cl_ops->unbind_tcf(q, cl);
}
struct tcf_exts {
@@ -94,6 +243,7 @@ struct tcf_exts {
__u32 type; /* for backward compat(TCA_OLD_COMPAT) */
int nr_actions;
struct tc_action **actions;
+ struct net *net;
#endif
/* Map to export classifier specific extension TLV types to the
* generic extensions API. Unsupported extensions must be set to 0.
@@ -107,6 +257,7 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police)
#ifdef CONFIG_NET_CLS_ACT
exts->type = 0;
exts->nr_actions = 0;
+ exts->net = NULL;
exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
GFP_KERNEL);
if (!exts->actions)
@@ -117,6 +268,28 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police)
return 0;
}
+/* Return false if the netns is being destroyed in cleanup_net(). Callers
+ * need to do cleanup synchronously in this case, otherwise may race with
+ * tc_action_net_exit(). Return true for other cases.
+ */
+static inline bool tcf_exts_get_net(struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ exts->net = maybe_get_net(exts->net);
+ return exts->net != NULL;
+#else
+ return true;
+#endif
+}
+
+static inline void tcf_exts_put_net(struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ if (exts->net)
+ put_net(exts->net);
+#endif
+}
+
static inline void tcf_exts_to_list(const struct tcf_exts *exts,
struct list_head *actions)
{
@@ -203,13 +376,12 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
struct nlattr **tb, struct nlattr *rate_tlv,
- struct tcf_exts *exts, bool ovr);
+ struct tcf_exts *exts, bool ovr,
+ struct netlink_ext_ack *extack);
void tcf_exts_destroy(struct tcf_exts *exts);
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
-int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
- struct net_device **hw_dev);
/**
* struct tcf_pkt_info - packet information
@@ -363,7 +535,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
{
switch (layer) {
case TCF_LAYER_LINK:
- return skb->data;
+ return skb_mac_header(skb);
case TCF_LAYER_NETWORK:
return skb_network_header(skb);
case TCF_LAYER_TRANSPORT:
@@ -385,13 +557,16 @@ static inline int tcf_valid_offset(const struct sk_buff *skb,
#include <net/net_namespace.h>
static inline int
-tcf_change_indev(struct net *net, struct nlattr *indev_tlv)
+tcf_change_indev(struct net *net, struct nlattr *indev_tlv,
+ struct netlink_ext_ack *extack)
{
char indev[IFNAMSIZ];
struct net_device *dev;
- if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ)
+ if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) {
+ NL_SET_ERR_MSG(extack, "Interface name too long");
return -EINVAL;
+ }
dev = __dev_get_by_name(net, indev);
if (!dev)
return -ENODEV;
@@ -409,23 +584,27 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
}
#endif /* CONFIG_NET_CLS_IND */
+int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
+ enum tc_setup_type type, void *type_data, bool err_stop);
+
+enum tc_block_command {
+ TC_BLOCK_BIND,
+ TC_BLOCK_UNBIND,
+};
+
+struct tc_block_offload {
+ enum tc_block_command command;
+ enum tcf_block_binder_type binder_type;
+ struct tcf_block *block;
+};
+
struct tc_cls_common_offload {
u32 chain_index;
__be16 protocol;
u32 prio;
- u32 classid;
+ struct netlink_ext_ack *extack;
};
-static inline void
-tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
- const struct tcf_proto *tp)
-{
- cls_common->chain_index = tp->chain->index;
- cls_common->protocol = tp->protocol;
- cls_common->prio = tp->prio;
- cls_common->classid = tp->classid;
-}
-
struct tc_cls_u32_knode {
struct tcf_exts *exts;
struct tc_u32_sel *sel;
@@ -463,10 +642,31 @@ struct tc_cls_u32_offload {
static inline bool tc_can_offload(const struct net_device *dev)
{
- if (!(dev->features & NETIF_F_HW_TC))
+ return dev->features & NETIF_F_HW_TC;
+}
+
+static inline bool tc_can_offload_extack(const struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ bool can = tc_can_offload(dev);
+
+ if (!can)
+ NL_SET_ERR_MSG(extack, "TC offload is disabled on net device");
+
+ return can;
+}
+
+static inline bool
+tc_cls_can_offload_and_chain0(const struct net_device *dev,
+ struct tc_cls_common_offload *common)
+{
+ if (!tc_can_offload_extack(dev, common->extack))
return false;
- if (!dev->netdev_ops->ndo_setup_tc)
+ if (common->chain_index) {
+ NL_SET_ERR_MSG(common->extack,
+ "Driver supports only offload of chain 0");
return false;
+ }
return true;
}
@@ -475,13 +675,6 @@ static inline bool tc_skip_hw(u32 flags)
return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false;
}
-static inline bool tc_should_offload(const struct net_device *dev, u32 flags)
-{
- if (tc_skip_hw(flags))
- return false;
- return tc_can_offload(dev);
-}
-
static inline bool tc_skip_sw(u32 flags)
{
return (flags & TCA_CLS_FLAGS_SKIP_SW) ? true : false;
@@ -504,6 +697,18 @@ static inline bool tc_in_hw(u32 flags)
return (flags & TCA_CLS_FLAGS_IN_HW) ? true : false;
}
+static inline void
+tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
+ const struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
+{
+ cls_common->chain_index = tp->chain->index;
+ cls_common->protocol = tp->protocol;
+ cls_common->prio = tp->prio;
+ if (tc_skip_sw(flags))
+ cls_common->extack = extack;
+}
+
enum tc_fl_command {
TC_CLSFLOWER_REPLACE,
TC_CLSFLOWER_DESTROY,
@@ -518,7 +723,7 @@ struct tc_cls_flower_offload {
struct fl_flow_key *mask;
struct fl_flow_key *key;
struct tcf_exts *exts;
- bool egress_dev;
+ u32 classid;
};
enum tc_matchall_command {
@@ -534,9 +739,7 @@ struct tc_cls_matchall_offload {
};
enum tc_clsbpf_command {
- TC_CLSBPF_ADD,
- TC_CLSBPF_REPLACE,
- TC_CLSBPF_DESTROY,
+ TC_CLSBPF_OFFLOAD,
TC_CLSBPF_STATS,
};
@@ -545,11 +748,20 @@ struct tc_cls_bpf_offload {
enum tc_clsbpf_command command;
struct tcf_exts *exts;
struct bpf_prog *prog;
+ struct bpf_prog *oldprog;
const char *name;
bool exts_integrated;
- u32 gen_flags;
};
+struct tc_mqprio_qopt_offload {
+ /* struct tc_mqprio_qopt must always be the first element */
+ struct tc_mqprio_qopt qopt;
+ u16 mode;
+ u16 shaper;
+ u32 flags;
+ u64 min_rate[TC_QOPT_MAX_QUEUE];
+ u64 max_rate[TC_QOPT_MAX_QUEUE];
+};
/* This structure holds cookie structure that is passed from user
* to the kernel for actions and classifiers
@@ -558,4 +770,61 @@ struct tc_cookie {
u8 *data;
u32 len;
};
+
+struct tc_qopt_offload_stats {
+ struct gnet_stats_basic_packed *bstats;
+ struct gnet_stats_queue *qstats;
+};
+
+enum tc_red_command {
+ TC_RED_REPLACE,
+ TC_RED_DESTROY,
+ TC_RED_STATS,
+ TC_RED_XSTATS,
+};
+
+struct tc_red_qopt_offload_params {
+ u32 min;
+ u32 max;
+ u32 probability;
+ bool is_ecn;
+ struct gnet_stats_queue *qstats;
+};
+
+struct tc_red_qopt_offload {
+ enum tc_red_command command;
+ u32 handle;
+ u32 parent;
+ union {
+ struct tc_red_qopt_offload_params set;
+ struct tc_qopt_offload_stats stats;
+ struct red_stats *xstats;
+ };
+};
+
+enum tc_prio_command {
+ TC_PRIO_REPLACE,
+ TC_PRIO_DESTROY,
+ TC_PRIO_STATS,
+};
+
+struct tc_prio_qopt_offload_params {
+ int bands;
+ u8 priomap[TC_PRIO_MAX + 1];
+ /* In case that a prio qdisc is offloaded and now is changed to a
+ * non-offloadedable config, it needs to update the backlog & qlen
+ * values to negate the HW backlog & qlen values (and only them).
+ */
+ struct gnet_stats_queue *qstats;
+};
+
+struct tc_prio_qopt_offload {
+ enum tc_prio_command command;
+ u32 handle;
+ u32 parent;
+ union {
+ struct tc_prio_qopt_offload_params replace_params;
+ struct tc_qopt_offload_stats stats;
+ };
+};
#endif
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index b3869f97d37d..815b92a23936 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -5,7 +5,9 @@
#include <linux/jiffies.h>
#include <linux/ktime.h>
#include <linux/if_vlan.h>
+#include <linux/netdevice.h>
#include <net/sch_generic.h>
+#include <net/net_namespace.h>
#include <uapi/linux/pkt_sched.h>
#define DEFAULT_TX_QUEUE_LEN 1000
@@ -87,7 +89,8 @@ extern struct Qdisc_ops pfifo_head_drop_qdisc_ops;
int fifo_set_limit(struct Qdisc *q, unsigned int limit);
struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
- unsigned int limit);
+ unsigned int limit,
+ struct netlink_ext_ack *extack);
int register_qdisc(struct Qdisc_ops *qops);
int unregister_qdisc(struct Qdisc_ops *qops);
@@ -97,22 +100,24 @@ int qdisc_set_default(const char *id);
void qdisc_hash_add(struct Qdisc *q, bool invisible);
void qdisc_hash_del(struct Qdisc *q);
struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
-struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
- struct nlattr *tab);
+ struct nlattr *tab,
+ struct netlink_ext_ack *extack);
void qdisc_put_rtab(struct qdisc_rate_table *tab);
void qdisc_put_stab(struct qdisc_size_table *tab);
void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc);
-int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
- struct net_device *dev, struct netdev_queue *txq,
- spinlock_t *root_lock, bool validate);
+bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+ struct net_device *dev, struct netdev_queue *txq,
+ spinlock_t *root_lock, bool validate);
void __qdisc_run(struct Qdisc *q);
static inline void qdisc_run(struct Qdisc *q)
{
- if (qdisc_run_begin(q))
+ if (qdisc_run_begin(q)) {
__qdisc_run(q);
+ qdisc_run_end(q);
+ }
}
static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
@@ -134,17 +139,18 @@ static inline unsigned int psched_mtu(const struct net_device *dev)
return dev->mtu + dev->hard_header_len;
}
-static inline bool is_classid_clsact_ingress(u32 classid)
+static inline struct net *qdisc_net(struct Qdisc *q)
{
- /* This also returns true for ingress qdisc */
- return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) &&
- TC_H_MIN(classid) != TC_H_MIN(TC_H_MIN_EGRESS);
+ return dev_net(q->dev_queue->dev);
}
-static inline bool is_classid_clsact_egress(u32 classid)
-{
- return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) &&
- TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS);
-}
+struct tc_cbs_qopt_offload {
+ u8 enable;
+ s32 queue;
+ s32 hicredit;
+ s32 locredit;
+ s32 idleslope;
+ s32 sendslope;
+};
#endif
diff --git a/include/net/red.h b/include/net/red.h
index 9a9347710701..9665582c4687 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -168,6 +168,17 @@ static inline void red_set_vars(struct red_vars *v)
v->qcount = -1;
}
+static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog)
+{
+ if (fls(qth_min) + Wlog > 32)
+ return false;
+ if (fls(qth_max) + Wlog > 32)
+ return false;
+ if (qth_max < qth_min)
+ return false;
+ return true;
+}
+
static inline void red_set_parms(struct red_parms *p,
u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
u8 Scell_log, u8 *stab, u32 max_P)
@@ -179,7 +190,7 @@ static inline void red_set_parms(struct red_parms *p,
p->qth_max = qth_max << Wlog;
p->Wlog = Wlog;
p->Plog = Plog;
- if (delta < 0)
+ if (delta <= 0)
delta = 1;
p->qth_delta = delta;
if (!max_P) {
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index ebc5a2ed8631..f83cacce3308 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -78,7 +78,7 @@ struct regulatory_request {
int wiphy_idx;
enum nl80211_reg_initiator initiator;
enum nl80211_user_reg_hint_type user_reg_hint_type;
- char alpha2[2];
+ char alpha2[3];
enum nl80211_dfs_regions dfs_region;
bool intersect;
bool processed;
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 23e22054aa60..347015515a7d 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -150,6 +150,8 @@ struct fastopen_queue {
spinlock_t lock;
int qlen; /* # of pending (TCP_SYN_RECV) reqs */
int max_qlen; /* != 0 iff TFO is currently enabled */
+
+ struct tcp_fastopen_context __rcu *ctx; /* cipher context for cookie */
};
/** struct request_sock_queue - queue of request_socks
diff --git a/include/net/route.h b/include/net/route.h
index d538e6db1afe..1eb9ce470e25 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -217,7 +217,7 @@ unsigned int inet_addr_type_dev_table(struct net *net,
const struct net_device *dev,
__be32 addr);
void ip_rt_multicast_event(struct in_device *);
-int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg);
+int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt);
void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
struct rtable *rt_dst_alloc(struct net_device *dev,
unsigned int flags, u16 type,
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 7b938fbeebc1..14b6b3af8918 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -13,10 +13,10 @@ enum rtnl_link_flags {
RTNL_FLAG_DOIT_UNLOCKED = 1,
};
-int __rtnl_register(int protocol, int msgtype,
- rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
+int rtnl_register_module(struct module *owner, int protocol, int msgtype,
+ rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
int rtnl_unregister(int protocol, int msgtype);
void rtnl_unregister_all(int protocol);
@@ -94,9 +94,6 @@ struct rtnl_link_ops {
int slave_maxtype;
const struct nla_policy *slave_policy;
- int (*slave_validate)(struct nlattr *tb[],
- struct nlattr *data[],
- struct netlink_ext_ack *extack);
int (*slave_changelink)(struct net_device *dev,
struct net_device *slave_dev,
struct nlattr *tb[],
@@ -155,8 +152,6 @@ struct rtnl_af_ops {
size_t (*get_stats_af_size)(const struct net_device *dev);
};
-void __rtnl_af_unregister(struct rtnl_af_ops *ops);
-
void rtnl_af_register(struct rtnl_af_ops *ops);
void rtnl_af_unregister(struct rtnl_af_ops *ops);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 236bfe5b2ffe..e2ab13687fb9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -71,6 +71,8 @@ struct Qdisc {
* qdisc_tree_decrease_qlen() should stop.
*/
#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */
+#define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */
+#define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */
u32 limit;
const struct Qdisc_ops *ops;
struct qdisc_size_table __rcu *stab;
@@ -87,15 +89,14 @@ struct Qdisc {
/*
* For performance sake on SMP, we put highly modified fields at the end
*/
- struct sk_buff *gso_skb ____cacheline_aligned_in_smp;
+ struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
struct qdisc_skb_head q;
struct gnet_stats_basic_packed bstats;
seqcount_t running;
struct gnet_stats_queue qstats;
unsigned long state;
struct Qdisc *next_sched;
- struct sk_buff *skb_bad_txq;
- struct rcu_head rcu_head;
+ struct sk_buff_head skb_bad_txq;
int padded;
refcount_t refcnt;
@@ -150,19 +151,23 @@ struct Qdisc_class_ops {
/* Child qdisc manipulation */
struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *);
int (*graft)(struct Qdisc *, unsigned long cl,
- struct Qdisc *, struct Qdisc **);
+ struct Qdisc *, struct Qdisc **,
+ struct netlink_ext_ack *extack);
struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl);
void (*qlen_notify)(struct Qdisc *, unsigned long);
/* Class manipulation routines */
unsigned long (*find)(struct Qdisc *, u32 classid);
int (*change)(struct Qdisc *, u32, u32,
- struct nlattr **, unsigned long *);
+ struct nlattr **, unsigned long *,
+ struct netlink_ext_ack *);
int (*delete)(struct Qdisc *, unsigned long);
void (*walk)(struct Qdisc *, struct qdisc_walker * arg);
/* Filter manipulation */
- struct tcf_block * (*tcf_block)(struct Qdisc *, unsigned long);
+ struct tcf_block * (*tcf_block)(struct Qdisc *sch,
+ unsigned long arg,
+ struct netlink_ext_ack *extack);
unsigned long (*bind_tcf)(struct Qdisc *, unsigned long,
u32 classid);
void (*unbind_tcf)(struct Qdisc *, unsigned long);
@@ -179,6 +184,7 @@ struct Qdisc_ops {
const struct Qdisc_class_ops *cl_ops;
char id[IFNAMSIZ];
int priv_size;
+ unsigned int static_flags;
int (*enqueue)(struct sk_buff *skb,
struct Qdisc *sch,
@@ -186,15 +192,26 @@ struct Qdisc_ops {
struct sk_buff * (*dequeue)(struct Qdisc *);
struct sk_buff * (*peek)(struct Qdisc *);
- int (*init)(struct Qdisc *, struct nlattr *arg);
+ int (*init)(struct Qdisc *sch, struct nlattr *arg,
+ struct netlink_ext_ack *extack);
void (*reset)(struct Qdisc *);
void (*destroy)(struct Qdisc *);
- int (*change)(struct Qdisc *, struct nlattr *arg);
- void (*attach)(struct Qdisc *);
+ int (*change)(struct Qdisc *sch,
+ struct nlattr *arg,
+ struct netlink_ext_ack *extack);
+ void (*attach)(struct Qdisc *sch);
+ int (*change_tx_queue_len)(struct Qdisc *, unsigned int);
int (*dump)(struct Qdisc *, struct sk_buff *);
int (*dump_stats)(struct Qdisc *, struct gnet_dump *);
+ void (*ingress_block_set)(struct Qdisc *sch,
+ u32 block_index);
+ void (*egress_block_set)(struct Qdisc *sch,
+ u32 block_index);
+ u32 (*ingress_block_get)(struct Qdisc *sch);
+ u32 (*egress_block_get)(struct Qdisc *sch);
+
struct module *owner;
};
@@ -217,14 +234,18 @@ struct tcf_proto_ops {
const struct tcf_proto *,
struct tcf_result *);
int (*init)(struct tcf_proto*);
- void (*destroy)(struct tcf_proto*);
+ void (*destroy)(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack);
void* (*get)(struct tcf_proto*, u32 handle);
int (*change)(struct net *net, struct sk_buff *,
struct tcf_proto*, unsigned long,
u32 handle, struct nlattr **,
- void **, bool);
- int (*delete)(struct tcf_proto*, void *, bool*);
+ void **, bool,
+ struct netlink_ext_ack *);
+ int (*delete)(struct tcf_proto *tp, void *arg,
+ bool *last,
+ struct netlink_ext_ack *);
void (*walk)(struct tcf_proto*, struct tcf_walker *arg);
void (*bind_class)(void *, u32, unsigned long);
@@ -246,8 +267,6 @@ struct tcf_proto {
/* All the rest */
u32 prio;
- u32 classid;
- struct Qdisc *q;
void *data;
const struct tcf_proto_ops *ops;
struct tcf_chain *chain;
@@ -262,9 +281,11 @@ struct qdisc_skb_cb {
unsigned char data[QDISC_CB_PRIV_LEN];
};
+typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
+
struct tcf_chain {
struct tcf_proto __rcu *filter_chain;
- struct tcf_proto __rcu **p_filter_chain;
+ struct list_head filter_chain_list;
struct list_head list;
struct tcf_block *block;
u32 index; /* chain index */
@@ -273,9 +294,33 @@ struct tcf_chain {
struct tcf_block {
struct list_head chain_list;
- struct work_struct work;
+ u32 index; /* block index for shared blocks */
+ unsigned int refcnt;
+ struct net *net;
+ struct Qdisc *q;
+ struct list_head cb_list;
+ struct list_head owner_list;
+ bool keep_dst;
+ unsigned int offloadcnt; /* Number of oddloaded filters */
+ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
};
+static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
+{
+ if (*flags & TCA_CLS_FLAGS_IN_HW)
+ return;
+ *flags |= TCA_CLS_FLAGS_IN_HW;
+ block->offloadcnt++;
+}
+
+static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
+{
+ if (!(*flags & TCA_CLS_FLAGS_IN_HW))
+ return;
+ *flags &= ~TCA_CLS_FLAGS_IN_HW;
+ block->offloadcnt--;
+}
+
static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
{
struct qdisc_skb_cb *qcb;
@@ -284,11 +329,31 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
BUILD_BUG_ON(sizeof(qcb->data) < sz);
}
+static inline int qdisc_qlen_cpu(const struct Qdisc *q)
+{
+ return this_cpu_ptr(q->cpu_qstats)->qlen;
+}
+
static inline int qdisc_qlen(const struct Qdisc *q)
{
return q->q.qlen;
}
+static inline int qdisc_qlen_sum(const struct Qdisc *q)
+{
+ __u32 qlen = 0;
+ int i;
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ for_each_possible_cpu(i)
+ qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
+ } else {
+ qlen = q->q.qlen;
+ }
+
+ return qlen;
+}
+
static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
{
return (struct qdisc_skb_cb *)skb->cb;
@@ -361,9 +426,6 @@ static inline void sch_tree_unlock(const struct Qdisc *q)
spin_unlock_bh(qdisc_root_sleeping_lock(q));
}
-#define tcf_tree_lock(tp) sch_tree_lock((tp)->q)
-#define tcf_tree_unlock(tp) sch_tree_unlock((tp)->q)
-
extern struct Qdisc noop_qdisc;
extern struct Qdisc_ops noop_qdisc_ops;
extern struct Qdisc_ops pfifo_fast_ops;
@@ -413,6 +475,13 @@ qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id)
return NULL;
}
+static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid)
+{
+ u32 hwtc = TC_H_MIN(classid) - TC_H_MIN_PRIORITY;
+
+ return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL;
+}
+
int qdisc_class_hash_init(struct Qdisc_class_hash *);
void qdisc_class_hash_insert(struct Qdisc_class_hash *,
struct Qdisc_class_common *);
@@ -421,6 +490,7 @@ void qdisc_class_hash_remove(struct Qdisc_class_hash *,
void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *);
void qdisc_class_hash_destroy(struct Qdisc_class_hash *);
+int dev_qdisc_change_tx_queue_len(struct net_device *dev);
void dev_init_scheduler(struct net_device *dev);
void dev_shutdown(struct net_device *dev);
void dev_activate(struct net_device *dev);
@@ -433,9 +503,12 @@ void qdisc_destroy(struct Qdisc *qdisc);
void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n,
unsigned int len);
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
- const struct Qdisc_ops *ops);
+ const struct Qdisc_ops *ops,
+ struct netlink_ext_ack *extack);
+void qdisc_free(struct Qdisc *qdisc);
struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
- const struct Qdisc_ops *ops, u32 parentid);
+ const struct Qdisc_ops *ops, u32 parentid,
+ struct netlink_ext_ack *extack);
void __qdisc_calculate_pkt_len(struct sk_buff *skb,
const struct qdisc_size_table *stab);
int skb_do_redirect(struct sk_buff *);
@@ -621,12 +694,39 @@ static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch,
sch->qstats.backlog -= qdisc_pkt_len(skb);
}
+static inline void qdisc_qstats_cpu_backlog_dec(struct Qdisc *sch,
+ const struct sk_buff *skb)
+{
+ this_cpu_sub(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
+}
+
static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch,
const struct sk_buff *skb)
{
sch->qstats.backlog += qdisc_pkt_len(skb);
}
+static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch,
+ const struct sk_buff *skb)
+{
+ this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
+}
+
+static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch)
+{
+ this_cpu_inc(sch->cpu_qstats->qlen);
+}
+
+static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch)
+{
+ this_cpu_dec(sch->cpu_qstats->qlen);
+}
+
+static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch)
+{
+ this_cpu_inc(sch->cpu_qstats->requeues);
+}
+
static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count)
{
sch->qstats.drops += count;
@@ -757,26 +857,30 @@ static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
/* generic pseudo peek method for non-work-conserving qdisc */
static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch)
{
+ struct sk_buff *skb = skb_peek(&sch->gso_skb);
+
/* we can reuse ->gso_skb because peek isn't called for root qdiscs */
- if (!sch->gso_skb) {
- sch->gso_skb = sch->dequeue(sch);
- if (sch->gso_skb) {
+ if (!skb) {
+ skb = sch->dequeue(sch);
+
+ if (skb) {
+ __skb_queue_head(&sch->gso_skb, skb);
/* it's still part of the queue */
- qdisc_qstats_backlog_inc(sch, sch->gso_skb);
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
}
}
- return sch->gso_skb;
+ return skb;
}
/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */
static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
{
- struct sk_buff *skb = sch->gso_skb;
+ struct sk_buff *skb = skb_peek(&sch->gso_skb);
if (skb) {
- sch->gso_skb = NULL;
+ skb = __skb_dequeue(&sch->gso_skb);
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
} else {
@@ -834,6 +938,14 @@ static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
qdisc_qstats_drop(sch);
}
+static inline int qdisc_drop_cpu(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ __qdisc_drop(skb, to_free);
+ qdisc_qstats_cpu_drop(sch);
+
+ return NET_XMIT_DROP;
+}
static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
@@ -896,4 +1008,36 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
}
+/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc.
+ * The fast path only needs to access filter list and to update stats
+ */
+struct mini_Qdisc {
+ struct tcf_proto *filter_list;
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+ struct gnet_stats_queue __percpu *cpu_qstats;
+ struct rcu_head rcu;
+};
+
+static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
+ const struct sk_buff *skb)
+{
+ bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb);
+}
+
+static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
+{
+ this_cpu_inc(miniq->cpu_qstats->drops);
+}
+
+struct mini_Qdisc_pair {
+ struct mini_Qdisc miniq1;
+ struct mini_Qdisc miniq2;
+ struct mini_Qdisc __rcu **p_miniq;
+};
+
+void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
+ struct tcf_proto *tp_head);
+void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
+ struct mini_Qdisc __rcu **p_miniq);
+
#endif
diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h
index 4a5b9a306c69..32ee65a30aff 100644
--- a/include/net/sctp/checksum.h
+++ b/include/net/sctp/checksum.h
@@ -48,31 +48,32 @@ static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum)
/* This uses the crypto implementation of crc32c, which is either
* implemented w/ hardware support or resolves to __crc32c_le().
*/
- return crc32c(sum, buff, len);
+ return (__force __wsum)crc32c((__force __u32)sum, buff, len);
}
static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2,
int offset, int len)
{
- return __crc32c_le_combine(csum, csum2, len);
+ return (__force __wsum)__crc32c_le_combine((__force __u32)csum,
+ (__force __u32)csum2, len);
}
static inline __le32 sctp_compute_cksum(const struct sk_buff *skb,
unsigned int offset)
{
struct sctphdr *sh = sctp_hdr(skb);
- __le32 ret, old = sh->checksum;
const struct skb_checksum_ops ops = {
.update = sctp_csum_update,
.combine = sctp_csum_combine,
};
+ __le32 old = sh->checksum;
+ __wsum new;
sh->checksum = 0;
- ret = cpu_to_le32(~__skb_checksum(skb, offset, skb->len - offset,
- ~(__u32)0, &ops));
+ new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, &ops);
sh->checksum = old;
- return ret;
+ return cpu_to_le32((__force __u32)new);
}
#endif /* __sctp_checksum_h__ */
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index deaafa9b09cb..20ff237c5eb2 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -145,12 +145,13 @@ SCTP_SUBTYPE_CONSTRUCTOR(OTHER, enum sctp_event_other, other)
SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive)
-#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA)
+#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA || \
+ a->chunk_hdr->type == SCTP_CID_I_DATA)
/* Calculate the actual data size in a data chunk */
-#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\
- - (unsigned long)(c->chunk_hdr)\
- - sizeof(struct sctp_data_chunk)))
+#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end) - \
+ (unsigned long)(c->chunk_hdr) - \
+ sctp_datachk_len(&c->asoc->stream)))
/* Internal error codes */
enum sctp_ierror {
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index d7d8cba01469..f7ae6b0a21d0 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -107,7 +107,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
int sctp_inet_listen(struct socket *sock, int backlog);
void sctp_write_space(struct sock *sk);
void sctp_data_ready(struct sock *sk);
-unsigned int sctp_poll(struct file *file, struct socket *sock,
+__poll_t sctp_poll(struct file *file, struct socket *sock,
poll_table *wait);
void sctp_sock_rfree(struct sk_buff *skb);
void sctp_copy_sock(struct sock *newsk, struct sock *sk,
@@ -116,7 +116,7 @@ extern struct percpu_counter sctp_sockets_allocated;
int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
-int sctp_transport_walk_start(struct rhashtable_iter *iter);
+void sctp_transport_walk_start(struct rhashtable_iter *iter);
void sctp_transport_walk_stop(struct rhashtable_iter *iter);
struct sctp_transport *sctp_transport_get_next(struct net *net,
struct rhashtable_iter *iter);
@@ -195,6 +195,11 @@ void sctp_remaddr_proc_exit(struct net *net);
int sctp_offload_init(void);
/*
+ * sctp/stream_sched.c
+ */
+void sctp_sched_ops_init(void);
+
+/*
* sctp/stream.c
*/
int sctp_send_reset_streams(struct sctp_association *asoc,
@@ -439,12 +444,13 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
int frag = pmtu;
frag -= sp->pf->af->net_header_len;
- frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk);
+ frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream);
if (asoc->user_frag)
frag = min_t(int, frag, asoc->user_frag);
- frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN));
+ frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN -
+ sctp_datachk_len(&asoc->stream)));
return frag;
}
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 88233cf8b8d4..2883c43c5258 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -72,7 +72,7 @@ typedef enum sctp_disposition (sctp_state_fn_t) (
const union sctp_subtype type,
void *arg,
struct sctp_cmd_seq *commands);
-typedef void (sctp_timer_event_t) (unsigned long);
+typedef void (sctp_timer_event_t) (struct timer_list *);
struct sctp_sm_table_entry {
sctp_state_fn_t *fn;
const char *name;
@@ -197,10 +197,14 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
const __u32 lowest_tsn,
const struct sctp_chunk *chunk);
-struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
+struct sctp_chunk *sctp_make_idata(const struct sctp_association *asoc,
+ __u8 flags, int paylen, gfp_t gfp);
+struct sctp_chunk *sctp_make_ifwdtsn(const struct sctp_association *asoc,
+ __u32 new_cum_tsn, size_t nstreams,
+ struct sctp_ifwdtsn_skip *skiplist);
+struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc,
const struct sctp_sndrcvinfo *sinfo,
- int len, const __u8 flags,
- __u16 ssn, gfp_t gfp);
+ int len, __u8 flags, gfp_t gfp);
struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
const __u32 lowest_tsn);
struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc);
@@ -314,10 +318,10 @@ int sctp_do_sm(struct net *net, enum sctp_event event_type,
void *event_arg, gfp_t gfp);
/* 2nd level prototypes */
-void sctp_generate_t3_rtx_event(unsigned long peer);
-void sctp_generate_heartbeat_event(unsigned long peer);
-void sctp_generate_reconf_event(unsigned long peer);
-void sctp_generate_proto_unreach_event(unsigned long peer);
+void sctp_generate_t3_rtx_event(struct timer_list *t);
+void sctp_generate_heartbeat_event(struct timer_list *t);
+void sctp_generate_reconf_event(struct timer_list *t);
+void sctp_generate_proto_unreach_event(struct timer_list *t);
void sctp_ootb_pkt_free(struct sctp_packet *packet);
@@ -342,7 +346,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
__u16 size;
size = ntohs(chunk->chunk_hdr->length);
- size -= sizeof(struct sctp_data_chunk);
+ size -= sctp_datahdr_len(&chunk->asoc->stream);
return size;
}
@@ -358,6 +362,12 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
typecheck(__u32, b) && \
((__s32)((a) - (b)) <= 0))
+/* Compare two MIDs */
+#define MID_lt(a, b) \
+ (typecheck(__u32, a) && \
+ typecheck(__u32, b) && \
+ ((__s32)((a) - (b)) < 0))
+
/* Compare two SSNs */
#define SSN_lt(a,b) \
(typecheck(__u16, a) && \
diff --git a/include/net/sctp/stream_interleave.h b/include/net/sctp/stream_interleave.h
new file mode 100644
index 000000000000..6657711c8bc4
--- /dev/null
+++ b/include/net/sctp/stream_interleave.h
@@ -0,0 +1,61 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * These are definitions used by the stream schedulers, defined in RFC
+ * draft ndata (https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-11)
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresses:
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Xin Long <lucien.xin@gmail.com>
+ */
+
+#ifndef __sctp_stream_interleave_h__
+#define __sctp_stream_interleave_h__
+
+struct sctp_stream_interleave {
+ __u16 data_chunk_len;
+ __u16 ftsn_chunk_len;
+ /* (I-)DATA process */
+ struct sctp_chunk *(*make_datafrag)(const struct sctp_association *asoc,
+ const struct sctp_sndrcvinfo *sinfo,
+ int len, __u8 flags, gfp_t gfp);
+ void (*assign_number)(struct sctp_chunk *chunk);
+ bool (*validate_data)(struct sctp_chunk *chunk);
+ int (*ulpevent_data)(struct sctp_ulpq *ulpq,
+ struct sctp_chunk *chunk, gfp_t gfp);
+ int (*enqueue_event)(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event);
+ void (*renege_events)(struct sctp_ulpq *ulpq,
+ struct sctp_chunk *chunk, gfp_t gfp);
+ void (*start_pd)(struct sctp_ulpq *ulpq, gfp_t gfp);
+ void (*abort_pd)(struct sctp_ulpq *ulpq, gfp_t gfp);
+ /* (I-)FORWARD-TSN process */
+ void (*generate_ftsn)(struct sctp_outq *q, __u32 ctsn);
+ bool (*validate_ftsn)(struct sctp_chunk *chunk);
+ void (*report_ftsn)(struct sctp_ulpq *ulpq, __u32 ftsn);
+ void (*handle_ftsn)(struct sctp_ulpq *ulpq,
+ struct sctp_chunk *chunk);
+};
+
+void sctp_stream_interleave_init(struct sctp_stream *stream);
+
+#endif /* __sctp_stream_interleave_h__ */
diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h
new file mode 100644
index 000000000000..5c5da48f65e7
--- /dev/null
+++ b/include/net/sctp/stream_sched.h
@@ -0,0 +1,77 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * These are definitions used by the stream schedulers, defined in RFC
+ * draft ndata (https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-11)
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresses:
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#ifndef __sctp_stream_sched_h__
+#define __sctp_stream_sched_h__
+
+struct sctp_sched_ops {
+ /* Property handling for a given stream */
+ int (*set)(struct sctp_stream *stream, __u16 sid, __u16 value,
+ gfp_t gfp);
+ int (*get)(struct sctp_stream *stream, __u16 sid, __u16 *value);
+
+ /* Init the specific scheduler */
+ int (*init)(struct sctp_stream *stream);
+ /* Init a stream */
+ int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp);
+ /* Frees the entire thing */
+ void (*free)(struct sctp_stream *stream);
+
+ /* Enqueue a chunk */
+ void (*enqueue)(struct sctp_outq *q, struct sctp_datamsg *msg);
+ /* Dequeue a chunk */
+ struct sctp_chunk *(*dequeue)(struct sctp_outq *q);
+ /* Called only if the chunk fit the packet */
+ void (*dequeue_done)(struct sctp_outq *q, struct sctp_chunk *chunk);
+ /* Sched all chunks already enqueued */
+ void (*sched_all)(struct sctp_stream *steam);
+ /* Unched all chunks already enqueued */
+ void (*unsched_all)(struct sctp_stream *steam);
+};
+
+int sctp_sched_set_sched(struct sctp_association *asoc,
+ enum sctp_sched_type sched);
+int sctp_sched_get_sched(struct sctp_association *asoc);
+int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid,
+ __u16 value, gfp_t gfp);
+int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid,
+ __u16 *value);
+void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch);
+
+void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch);
+int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp);
+struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream);
+
+void sctp_sched_ops_register(enum sctp_sched_type sched,
+ struct sctp_sched_ops *sched_ops);
+void sctp_sched_ops_prio_init(void);
+void sctp_sched_ops_rr_init(void);
+
+#endif /* __sctp_stream_sched_h__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0477945de1a3..03e92dda1813 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -89,6 +89,7 @@ struct sctp_stream;
#include <net/sctp/tsnmap.h>
#include <net/sctp/ulpevent.h>
#include <net/sctp/ulpqueue.h>
+#include <net/sctp/stream_interleave.h>
/* Structures useful for managing bind/connect. */
@@ -202,12 +203,17 @@ struct sctp_sock {
/* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */
__u32 param_flags;
- struct sctp_initmsg initmsg;
struct sctp_rtoinfo rtoinfo;
struct sctp_paddrparams paddrparam;
- struct sctp_event_subscribe subscribe;
struct sctp_assocparams assocparams;
+ /*
+ * These two structures must be grouped together for the usercopy
+ * whitelist region.
+ */
+ struct sctp_event_subscribe subscribe;
+ struct sctp_initmsg initmsg;
+
int user_frag;
__u32 autoclose;
@@ -217,6 +223,7 @@ struct sctp_sock {
disable_fragments:1,
v4mapped:1,
frag_interleave:1,
+ strm_interleave:1,
recvrcvinfo:1,
recvnxtinfo:1,
data_ready_signalled:1;
@@ -380,6 +387,7 @@ struct sctp_sender_hb_info {
int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
gfp_t gfp);
+int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid);
void sctp_stream_free(struct sctp_stream *stream);
void sctp_stream_clear(struct sctp_stream *stream);
void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new);
@@ -396,6 +404,28 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new);
#define sctp_ssn_skip(stream, type, sid, ssn) \
((stream)->type[sid].ssn = ssn + 1)
+/* What is the current MID number for this stream? */
+#define sctp_mid_peek(stream, type, sid) \
+ ((stream)->type[sid].mid)
+
+/* Return the next MID number for this stream. */
+#define sctp_mid_next(stream, type, sid) \
+ ((stream)->type[sid].mid++)
+
+/* Skip over this mid and all below. */
+#define sctp_mid_skip(stream, type, sid, mid) \
+ ((stream)->type[sid].mid = mid + 1)
+
+#define sctp_stream_in(asoc, sid) (&(asoc)->stream.in[sid])
+
+/* What is the current MID_uo number for this stream? */
+#define sctp_mid_uo_peek(stream, type, sid) \
+ ((stream)->type[sid].mid_uo)
+
+/* Return the next MID_uo number for this stream. */
+#define sctp_mid_uo_next(stream, type, sid) \
+ ((stream)->type[sid].mid_uo++)
+
/*
* Pointers to address related SCTP functions.
* (i.e. things that depend on the address family.)
@@ -502,7 +532,8 @@ struct sctp_datamsg {
/* Did the messenge fail to send? */
int send_error;
u8 send_failed:1,
- can_delay; /* should this message be Nagle delayed */
+ can_delay:1, /* should this message be Nagle delayed */
+ abandoned:1; /* should this message be abandoned */
};
struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *,
@@ -529,8 +560,12 @@ struct sctp_chunk {
/* How many times this chunk have been sent, for prsctp RTX policy */
int sent_count;
- /* This is our link to the per-transport transmitted list. */
- struct list_head transmitted_list;
+ union {
+ /* This is our link to the per-transport transmitted list. */
+ struct list_head transmitted_list;
+ /* List in specific stream outq */
+ struct list_head stream_list;
+ };
/* This field is used by chunks that hold fragmented data.
* For the first fragment this is the list that holds the rest of
@@ -568,6 +603,8 @@ struct sctp_chunk {
struct sctp_addiphdr *addip_hdr;
struct sctp_fwdtsn_hdr *fwdtsn_hdr;
struct sctp_authhdr *auth_hdr;
+ struct sctp_idatahdr *idata_hdr;
+ struct sctp_ifwdtsn_hdr *ifwdtsn_hdr;
} subh;
__u8 *chunk_end;
@@ -614,6 +651,7 @@ struct sctp_chunk {
__u16 rtt_in_progress:1, /* This chunk used for RTT calc? */
has_tsn:1, /* Does this chunk have a TSN yet? */
has_ssn:1, /* Does this chunk have a SSN yet? */
+#define has_mid has_ssn
singleton:1, /* Only chunk in the packet? */
end_of_packet:1, /* Last chunk in the packet? */
ecn_ce_done:1, /* Have we processed the ECN CE bit? */
@@ -640,6 +678,11 @@ void sctp_init_addrs(struct sctp_chunk *, union sctp_addr *,
union sctp_addr *);
const union sctp_addr *sctp_source(const struct sctp_chunk *chunk);
+static inline __u16 sctp_chunk_stream_no(struct sctp_chunk *ch)
+{
+ return ntohs(ch->subh.data_hdr->stream);
+}
+
enum {
SCTP_ADDR_NEW, /* new address added to assoc/ep */
SCTP_ADDR_SRC, /* address can be used as source */
@@ -955,7 +998,7 @@ void sctp_transport_burst_limited(struct sctp_transport *);
void sctp_transport_burst_reset(struct sctp_transport *);
unsigned long sctp_transport_timeout(struct sctp_transport *);
void sctp_transport_reset(struct sctp_transport *t);
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
void sctp_transport_immediate_rtx(struct sctp_transport *);
void sctp_transport_dst_release(struct sctp_transport *t);
void sctp_transport_dst_confirm(struct sctp_transport *t);
@@ -1012,6 +1055,9 @@ struct sctp_outq {
/* Data pending that has never been transmitted. */
struct list_head out_chunk_list;
+ /* Stream scheduler being used */
+ struct sctp_sched_ops *sched;
+
unsigned int out_qlen; /* Total length of queued data chunks. */
/* Error of send failed, may used in SCTP_SEND_FAILED event. */
@@ -1059,6 +1105,7 @@ void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
void sctp_prsctp_prune(struct sctp_association *asoc,
struct sctp_sndrcvinfo *sinfo, int msg_len);
+void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
/* Uncork and flush an outqueue. */
static inline void sctp_outq_cork(struct sctp_outq *q)
{
@@ -1315,15 +1362,53 @@ struct sctp_inithdr_host {
__u32 initial_tsn;
};
+struct sctp_stream_priorities {
+ /* List of priorities scheduled */
+ struct list_head prio_sched;
+ /* List of streams scheduled */
+ struct list_head active;
+ /* The next stream stream in line */
+ struct sctp_stream_out_ext *next;
+ __u16 prio;
+};
+
+struct sctp_stream_out_ext {
+ __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
+ __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
+ struct list_head outq; /* chunks enqueued by this stream */
+ union {
+ struct {
+ /* Scheduled streams list */
+ struct list_head prio_list;
+ struct sctp_stream_priorities *prio_head;
+ };
+ /* Fields used by RR scheduler */
+ struct {
+ struct list_head rr_list;
+ };
+ };
+};
+
struct sctp_stream_out {
- __u16 ssn;
- __u8 state;
- __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
- __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
+ union {
+ __u32 mid;
+ __u16 ssn;
+ };
+ __u32 mid_uo;
+ struct sctp_stream_out_ext *ext;
+ __u8 state;
};
struct sctp_stream_in {
- __u16 ssn;
+ union {
+ __u32 mid;
+ __u16 ssn;
+ };
+ __u32 mid_uo;
+ __u32 fsn;
+ __u32 fsn_uo;
+ char pd_mode;
+ char pd_mode_uo;
};
struct sctp_stream {
@@ -1331,11 +1416,48 @@ struct sctp_stream {
struct sctp_stream_in *in;
__u16 outcnt;
__u16 incnt;
+ /* Current stream being sent, if any */
+ struct sctp_stream_out *out_curr;
+ union {
+ /* Fields used by priority scheduler */
+ struct {
+ /* List of priorities scheduled */
+ struct list_head prio_list;
+ };
+ /* Fields used by RR scheduler */
+ struct {
+ /* List of streams scheduled */
+ struct list_head rr_list;
+ /* The next stream stream in line */
+ struct sctp_stream_out_ext *rr_next;
+ };
+ };
+ struct sctp_stream_interleave *si;
};
#define SCTP_STREAM_CLOSED 0x00
#define SCTP_STREAM_OPEN 0x01
+static inline __u16 sctp_datachk_len(const struct sctp_stream *stream)
+{
+ return stream->si->data_chunk_len;
+}
+
+static inline __u16 sctp_datahdr_len(const struct sctp_stream *stream)
+{
+ return stream->si->data_chunk_len - sizeof(struct sctp_chunkhdr);
+}
+
+static inline __u16 sctp_ftsnchk_len(const struct sctp_stream *stream)
+{
+ return stream->si->ftsn_chunk_len;
+}
+
+static inline __u16 sctp_ftsnhdr_len(const struct sctp_stream *stream)
+{
+ return stream->si->ftsn_chunk_len - sizeof(struct sctp_chunkhdr);
+}
+
/* SCTP_GET_ASSOC_STATS counters */
struct sctp_priv_assoc_stats {
/* Maximum observed rto in the association during subsequent
@@ -1884,6 +2006,7 @@ struct sctp_association {
__u8 need_ecne:1, /* Need to send an ECNE Chunk? */
temp:1, /* Is it a temporary association? */
force_delay:1,
+ intl_enable:1,
prsctp_enable:1,
reconf_enable:1;
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 231dc42f1da6..51b4e0626c34 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -45,19 +45,29 @@
/* A structure to carry information to the ULP (e.g. Sockets API) */
/* Warning: This sits inside an skb.cb[] area. Be very careful of
* growing this structure as it is at the maximum limit now.
+ *
+ * sctp_ulpevent is saved in sk->cb(48 bytes), whose last 4 bytes
+ * have been taken by sock_skb_cb, So here it has to use 'packed'
+ * to make sctp_ulpevent fit into the rest 44 bytes.
*/
struct sctp_ulpevent {
struct sctp_association *asoc;
struct sctp_chunk *chunk;
unsigned int rmem_len;
- __u32 ppid;
+ union {
+ __u32 mid;
+ __u16 ssn;
+ };
+ union {
+ __u32 ppid;
+ __u32 fsn;
+ };
__u32 tsn;
__u32 cumtsn;
__u16 stream;
- __u16 ssn;
__u16 flags;
__u16 msg_flags;
-};
+} __packed;
/* Retrieve the skb this event sits inside of. */
static inline struct sk_buff *sctp_event2skb(const struct sctp_ulpevent *ev)
@@ -112,7 +122,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event(
struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
const struct sctp_association *asoc,
- __u32 indication, gfp_t gfp);
+ __u32 indication, __u32 sid, __u32 seq,
+ __u32 flags, gfp_t gfp);
struct sctp_ulpevent *sctp_ulpevent_make_adaptation_indication(
const struct sctp_association *asoc, gfp_t gfp);
@@ -140,6 +151,10 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event(
const struct sctp_association *asoc, __u16 flags,
__u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp);
+struct sctp_ulpevent *sctp_make_reassembled_event(
+ struct net *net, struct sk_buff_head *queue,
+ struct sk_buff *f_frag, struct sk_buff *l_frag);
+
void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
struct msghdr *);
void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event,
diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h
index e0dce07b8794..bb0ecba3db2b 100644
--- a/include/net/sctp/ulpqueue.h
+++ b/include/net/sctp/ulpqueue.h
@@ -45,6 +45,7 @@ struct sctp_ulpq {
char pd_mode;
struct sctp_association *asoc;
struct sk_buff_head reasm;
+ struct sk_buff_head reasm_uo;
struct sk_buff_head lobby;
};
@@ -76,11 +77,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc);
void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn);
void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *, __u32);
-#endif /* __sctp_ulpqueue_h__ */
-
-
-
-
-
+__u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq,
+ struct sk_buff_head *list, __u16 needed);
+#endif /* __sctp_ulpqueue_h__ */
diff --git a/include/net/sock.h b/include/net/sock.h
index a6b9a8d1a6df..169c92afcafa 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -60,7 +60,7 @@
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/cgroup-defs.h>
-
+#include <linux/rbtree.h>
#include <linux/filter.h>
#include <linux/rculist_nulls.h>
#include <linux/poll.h>
@@ -72,6 +72,7 @@
#include <net/tcp_states.h>
#include <linux/net_tstamp.h>
#include <net/smc.h>
+#include <net/l3mdev.h>
/*
* This structure really needs to be cleaned up.
@@ -267,6 +268,7 @@ struct sock_common {
* @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
* @sk_gso_max_size: Maximum GSO segment size to build
* @sk_gso_max_segs: Maximum number of GSO segments
+ * @sk_pacing_shift: scaling factor for TCP Small Queues
* @sk_lingertime: %SO_LINGER l_linger setting
* @sk_backlog: always used with the per-socket spinlock held
* @sk_callback_lock: used with the callbacks in the end of this struct
@@ -397,7 +399,10 @@ struct sock {
int sk_wmem_queued;
refcount_t sk_wmem_alloc;
unsigned long sk_tsq_flags;
- struct sk_buff *sk_send_head;
+ union {
+ struct sk_buff *sk_send_head;
+ struct rb_root tcp_rtx_queue;
+ };
struct sk_buff_head sk_write_queue;
__s32 sk_peek_off;
int sk_write_pending;
@@ -436,7 +441,6 @@ struct sock {
#define SK_FL_TYPE_MASK 0xffff0000
#endif
- kmemcheck_bitfield_begin(flags);
unsigned int sk_padding : 1,
sk_kern_sock : 1,
sk_no_check_tx : 1,
@@ -445,9 +449,8 @@ struct sock {
sk_protocol : 8,
sk_type : 16;
#define SK_PROTOCOL_MAX U8_MAX
- kmemcheck_bitfield_end(flags);
-
u16 sk_gso_max_segs;
+ u8 sk_pacing_shift;
unsigned long sk_lingertime;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
@@ -683,11 +686,7 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
{
- if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
- sk->sk_family == AF_INET6)
- hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
- else
- hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+ hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
}
static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
@@ -734,10 +733,10 @@ static inline void sk_add_bind_node(struct sock *sk,
*
*/
#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \
- for (pos = rcu_dereference((head)->first); \
+ for (pos = rcu_dereference(hlist_first_rcu(head)); \
pos != NULL && \
({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \
- pos = rcu_dereference(pos->next))
+ pos = rcu_dereference(hlist_next_rcu(pos)))
static inline struct user_namespace *sk_user_ns(struct sock *sk)
{
@@ -1098,14 +1097,20 @@ struct proto {
*/
unsigned long *memory_pressure;
long *sysctl_mem;
+
int *sysctl_wmem;
int *sysctl_rmem;
+ u32 sysctl_wmem_offset;
+ u32 sysctl_rmem_offset;
+
int max_header;
bool no_autobind;
struct kmem_cache *slab;
unsigned int obj_size;
- int slab_flags;
+ slab_flags_t slab_flags;
+ size_t useroffset; /* Usercopy region offset */
+ size_t usersize; /* Usercopy region size */
struct percpu_counter *orphan_count;
@@ -1260,6 +1265,7 @@ proto_memory_pressure(struct proto *prot)
/* Called with local bh disabled */
void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc);
int sock_prot_inuse_get(struct net *net, struct proto *proto);
+int sock_inuse_get(struct net *net);
#else
static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
int inc)
@@ -1443,10 +1449,8 @@ do { \
} while (0)
#ifdef CONFIG_LOCKDEP
-static inline bool lockdep_sock_is_held(const struct sock *csk)
+static inline bool lockdep_sock_is_held(const struct sock *sk)
{
- struct sock *sk = (struct sock *)csk;
-
return lockdep_is_held(&sk->sk_lock) ||
lockdep_is_held(&sk->sk_lock.slock);
}
@@ -1512,6 +1516,11 @@ static inline bool sock_owned_by_user(const struct sock *sk)
return sk->sk_lock.owned;
}
+static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
+{
+ return sk->sk_lock.owned;
+}
+
/* no reclassification while locks are held */
static inline bool sock_allow_reclassification(const struct sock *csk)
{
@@ -1576,7 +1585,7 @@ int sock_no_connect(struct socket *, struct sockaddr *, int, int);
int sock_no_socketpair(struct socket *, struct socket *);
int sock_no_accept(struct socket *, struct socket *, int, bool);
int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
-unsigned int sock_no_poll(struct file *, struct socket *,
+__poll_t sock_no_poll(struct file *, struct socket *,
struct poll_table_struct *);
int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
int sock_no_listen(struct socket *, int);
@@ -2330,31 +2339,6 @@ static inline bool sk_listener(const struct sock *sk)
return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
}
-/**
- * sk_state_load - read sk->sk_state for lockless contexts
- * @sk: socket pointer
- *
- * Paired with sk_state_store(). Used in places we do not hold socket lock :
- * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...
- */
-static inline int sk_state_load(const struct sock *sk)
-{
- return smp_load_acquire(&sk->sk_state);
-}
-
-/**
- * sk_state_store - update sk->sk_state
- * @sk: socket pointer
- * @newstate: new state
- *
- * Paired with sk_state_load(). Should be used in contexts where
- * state change might impact lockless readers.
- */
-static inline void sk_state_store(struct sock *sk, int newstate)
-{
- smp_store_release(&sk->sk_state, newstate);
-}
-
void sock_enable_timestamp(struct sock *sk, int flag);
int sock_get_timestamp(struct sock *, struct timeval __user *);
int sock_get_timestampns(struct sock *, struct timespec __user *);
@@ -2387,4 +2371,52 @@ extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
+static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
+{
+ /* Does this proto have per netns sysctl_wmem ? */
+ if (proto->sysctl_wmem_offset)
+ return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
+
+ return *proto->sysctl_wmem;
+}
+
+static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
+{
+ /* Does this proto have per netns sysctl_rmem ? */
+ if (proto->sysctl_rmem_offset)
+ return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
+
+ return *proto->sysctl_rmem;
+}
+
+/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
+ * Some wifi drivers need to tweak it to get more chunks.
+ * They can use this helper from their ndo_start_xmit()
+ */
+static inline void sk_pacing_shift_update(struct sock *sk, int val)
+{
+ if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val)
+ return;
+ sk->sk_pacing_shift = val;
+}
+
+/* if a socket is bound to a device, check that the given device
+ * index is either the same or that the socket is bound to an L3
+ * master device and the given device index is also enslaved to
+ * that L3 master
+ */
+static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
+{
+ int mdif;
+
+ if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)
+ return true;
+
+ mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif);
+ if (mdif && mdif == sk->sk_bound_dev_if)
+ return true;
+
+ return false;
+}
+
#endif /* _SOCK_H */
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d767b7991887..39bc855d7fee 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -51,6 +51,7 @@ enum switchdev_attr_id {
SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
+ SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
};
struct switchdev_attr {
@@ -75,6 +76,7 @@ enum switchdev_obj_id {
SWITCHDEV_OBJ_ID_UNDEFINED,
SWITCHDEV_OBJ_ID_PORT_VLAN,
SWITCHDEV_OBJ_ID_PORT_MDB,
+ SWITCHDEV_OBJ_ID_HOST_MDB,
};
struct switchdev_obj {
diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index 781f3433a0be..9470fd7e4350 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -6,10 +6,16 @@
#include <net/act_api.h>
#include <linux/tc_act/tc_csum.h>
+struct tcf_csum_params {
+ int action;
+ u32 update_flags;
+ struct rcu_head rcu;
+};
+
struct tcf_csum {
struct tc_action common;
- u32 update_flags;
+ struct tcf_csum_params __rcu *params;
};
#define to_tcf_csum(a) ((struct tcf_csum *)a)
@@ -24,7 +30,13 @@ static inline bool is_tcf_csum(const struct tc_action *a)
static inline u32 tcf_csum_update_flags(const struct tc_action *a)
{
- return to_tcf_csum(a)->update_flags;
+ u32 update_flags;
+
+ rcu_read_lock();
+ update_flags = rcu_dereference(to_tcf_csum(a)->params)->update_flags;
+ rcu_read_unlock();
+
+ return update_flags;
}
#endif /* __NET_TC_CSUM_H */
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index e82d93346b63..ef8dd0db70ce 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -34,6 +34,11 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act,
return false;
}
+static inline bool is_tcf_gact_ok(const struct tc_action *a)
+{
+ return __is_tcf_gact_act(a, TC_ACT_OK, false);
+}
+
static inline bool is_tcf_gact_shot(const struct tc_action *a)
{
return __is_tcf_gact_act(a, TC_ACT_SHOT, false);
diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index ba6667125bcd..86d13b01b39d 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -7,12 +7,18 @@
#include <linux/rtnetlink.h>
#include <linux/module.h>
-struct tcf_ife_info {
- struct tc_action common;
+struct tcf_ife_params {
u8 eth_dst[ETH_ALEN];
u8 eth_src[ETH_ALEN];
u16 eth_type;
u16 flags;
+
+ struct rcu_head rcu;
+};
+
+struct tcf_ife_info {
+ struct tc_action common;
+ struct tcf_ife_params __rcu *params;
/* list of metaids allowed */
struct list_head metalist;
};
@@ -41,7 +47,7 @@ struct tcf_meta_ops {
struct module *owner;
};
-#define MODULE_ALIAS_IFE_META(metan) MODULE_ALIAS("ifemeta" __stringify_1(metan))
+#define MODULE_ALIAS_IFE_META(metan) MODULE_ALIAS("ife-meta-" metan)
int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi);
int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi);
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index b2dbbfaefd22..a2e9cbca5c9e 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -8,7 +8,6 @@
struct tcf_mirred {
struct tc_action common;
int tcfm_eaction;
- int tcfm_ifindex;
bool tcfm_mac_header_xmit;
struct net_device __rcu *tcfm_dev;
struct list_head tcfm_list;
@@ -33,9 +32,9 @@ static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a)
return false;
}
-static inline int tcf_mirred_ifindex(const struct tc_action *a)
+static inline struct net_device *tcf_mirred_dev(const struct tc_action *a)
{
- return to_mirred(a)->tcfm_ifindex;
+ return rtnl_dereference(to_mirred(a)->tcfm_dev);
}
#endif /* __NET_TC_MIR_H */
diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h
index 524cee4f4c81..01dbfea32672 100644
--- a/include/net/tc_act/tc_sample.h
+++ b/include/net/tc_act/tc_sample.h
@@ -14,7 +14,6 @@ struct tcf_sample {
struct psample_group __rcu *psample_group;
u32 psample_group_num;
struct list_head tcfm_list;
- struct rcu_head rcu;
};
#define to_sample(a) ((struct tcf_sample *)a)
diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index c2090df944ff..22ae260d6869 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -13,12 +13,17 @@
#include <net/act_api.h>
#include <linux/tc_act/tc_vlan.h>
+struct tcf_vlan_params {
+ int tcfv_action;
+ u16 tcfv_push_vid;
+ __be16 tcfv_push_proto;
+ u8 tcfv_push_prio;
+ struct rcu_head rcu;
+};
+
struct tcf_vlan {
struct tc_action common;
- int tcfv_action;
- u16 tcfv_push_vid;
- __be16 tcfv_push_proto;
- u8 tcfv_push_prio;
+ struct tcf_vlan_params __rcu *vlan_p;
};
#define to_vlan(a) ((struct tcf_vlan *)a)
@@ -33,22 +38,45 @@ static inline bool is_tcf_vlan(const struct tc_action *a)
static inline u32 tcf_vlan_action(const struct tc_action *a)
{
- return to_vlan(a)->tcfv_action;
+ u32 tcfv_action;
+
+ rcu_read_lock();
+ tcfv_action = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_action;
+ rcu_read_unlock();
+
+ return tcfv_action;
}
static inline u16 tcf_vlan_push_vid(const struct tc_action *a)
{
- return to_vlan(a)->tcfv_push_vid;
+ u16 tcfv_push_vid;
+
+ rcu_read_lock();
+ tcfv_push_vid = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_vid;
+ rcu_read_unlock();
+
+ return tcfv_push_vid;
}
static inline __be16 tcf_vlan_push_proto(const struct tc_action *a)
{
- return to_vlan(a)->tcfv_push_proto;
+ __be16 tcfv_push_proto;
+
+ rcu_read_lock();
+ tcfv_push_proto = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_proto;
+ rcu_read_unlock();
+
+ return tcfv_push_proto;
}
static inline u8 tcf_vlan_push_prio(const struct tc_action *a)
{
- return to_vlan(a)->tcfv_push_prio;
-}
+ u8 tcfv_push_prio;
+ rcu_read_lock();
+ tcfv_push_prio = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_prio;
+ rcu_read_unlock();
+
+ return tcfv_push_prio;
+}
#endif /* __NET_TC_VLAN_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e6d0002a1b0b..e3fc667f9ac2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -45,9 +45,6 @@
#include <linux/seq_file.h>
#include <linux/memcontrol.h>
-
-#include <linux/bpf.h>
-#include <linux/filter.h>
#include <linux/bpf-cgroup.h>
extern struct inet_hashinfo tcp_hashinfo;
@@ -191,6 +188,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
* experimental options. See draft-ietf-tcpm-experimental-options-00.txt
*/
#define TCPOPT_FASTOPEN_MAGIC 0xF989
+#define TCPOPT_SMC_MAGIC 0xE2D4C3D9
/*
* TCP option lengths
@@ -203,6 +201,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOLEN_MD5SIG 18
#define TCPOLEN_FASTOPEN_BASE 2
#define TCPOLEN_EXP_FASTOPEN_BASE 4
+#define TCPOLEN_EXP_SMC_BASE 6
/* But this is what stacks really send out. */
#define TCPOLEN_TSTAMP_ALIGNED 12
@@ -213,6 +212,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOLEN_SACK_PERBLOCK 8
#define TCPOLEN_MD5SIG_ALIGNED 20
#define TCPOLEN_MSS_ALIGNED 4
+#define TCPOLEN_EXP_SMC_BASE_ALIGNED 8
/* Flags in tp->nonagle */
#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
@@ -240,41 +240,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
/* sysctl variables for tcp */
-extern int sysctl_tcp_fastopen;
-extern int sysctl_tcp_retrans_collapse;
-extern int sysctl_tcp_stdurg;
-extern int sysctl_tcp_rfc1337;
-extern int sysctl_tcp_abort_on_overflow;
extern int sysctl_tcp_max_orphans;
-extern int sysctl_tcp_fack;
-extern int sysctl_tcp_reordering;
-extern int sysctl_tcp_max_reordering;
-extern int sysctl_tcp_dsack;
extern long sysctl_tcp_mem[3];
-extern int sysctl_tcp_wmem[3];
-extern int sysctl_tcp_rmem[3];
-extern int sysctl_tcp_app_win;
-extern int sysctl_tcp_adv_win_scale;
-extern int sysctl_tcp_frto;
-extern int sysctl_tcp_nometrics_save;
-extern int sysctl_tcp_moderate_rcvbuf;
-extern int sysctl_tcp_tso_win_divisor;
-extern int sysctl_tcp_workaround_signed_windows;
-extern int sysctl_tcp_slow_start_after_idle;
-extern int sysctl_tcp_thin_linear_timeouts;
-extern int sysctl_tcp_thin_dupack;
-extern int sysctl_tcp_early_retrans;
-extern int sysctl_tcp_recovery;
-#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */
-extern int sysctl_tcp_limit_output_bytes;
-extern int sysctl_tcp_challenge_ack_limit;
-extern int sysctl_tcp_min_tso_segs;
-extern int sysctl_tcp_min_rtt_wlen;
-extern int sysctl_tcp_autocorking;
-extern int sysctl_tcp_invalid_ratelimit;
-extern int sysctl_tcp_pacing_ss_ratio;
-extern int sysctl_tcp_pacing_ca_ratio;
+#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */
+#define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */
extern atomic_long_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
@@ -414,10 +384,10 @@ void tcp_update_metrics(struct sock *sk);
void tcp_init_metrics(struct sock *sk);
void tcp_metrics_init(void);
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
-void tcp_disable_fack(struct tcp_sock *tp);
void tcp_close(struct sock *sk, long timeout);
void tcp_init_sock(struct sock *sk);
-unsigned int tcp_poll(struct file *file, struct socket *sock,
+void tcp_init_transfer(struct sock *sk, int bpf_op);
+__poll_t tcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int tcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
@@ -551,7 +521,13 @@ void tcp_xmit_retransmit_queue(struct sock *);
void tcp_simple_retransmit(struct sock *);
void tcp_enter_recovery(struct sock *sk, bool ece_ack);
int tcp_trim_head(struct sock *, struct sk_buff *, u32);
-int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t);
+enum tcp_queue {
+ TCP_FRAG_IN_WRITE_QUEUE,
+ TCP_FRAG_IN_RTX_QUEUE,
+};
+int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ struct sk_buff *skb, u32 len,
+ unsigned int mss_now, gfp_t gfp);
void tcp_send_probe0(struct sock *);
void tcp_send_partial(struct sock *);
@@ -563,7 +539,7 @@ void tcp_push_one(struct sock *, unsigned int mss_now);
void tcp_send_ack(struct sock *sk);
void tcp_send_delayed_ack(struct sock *sk);
void tcp_send_loss_probe(struct sock *sk);
-bool tcp_schedule_loss_probe(struct sock *sk);
+bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto);
void tcp_skb_collapse_tstamp(struct sk_buff *skb,
const struct sk_buff *next_skb);
@@ -796,16 +772,10 @@ struct tcp_skb_cb {
u16 tcp_gso_segs;
u16 tcp_gso_size;
};
-
- /* Used to stash the receive timestamp while this skb is in the
- * out of order queue, as skb->tstamp is overwritten by the
- * rbnode.
- */
- ktime_t swtstamp;
};
__u8 tcp_flags; /* TCP header flags. (tcp[13]) */
- __u8 sacked; /* State flags for SACK/FACK. */
+ __u8 sacked; /* State flags for SACK. */
#define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
#define TCPCB_LOST 0x04 /* SKB is lost */
@@ -874,12 +844,11 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb)
}
#endif
-/* TCP_SKB_CB reference means this can not be used from early demux */
static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
- skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
+ skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
return true;
#endif
return false;
@@ -984,6 +953,7 @@ struct rate_sample {
u32 prior_in_flight; /* in flight before this ACK */
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
+ bool is_ack_delayed; /* is this (likely) a delayed ACK? */
};
struct tcp_congestion_ops {
@@ -1032,8 +1002,8 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
void tcp_assign_congestion_control(struct sock *sk);
void tcp_init_congestion_control(struct sock *sk);
void tcp_cleanup_congestion_control(struct sock *sk);
-int tcp_set_default_congestion_control(const char *name);
-void tcp_get_default_congestion_control(char *name);
+int tcp_set_default_congestion_control(struct net *net, const char *name);
+void tcp_get_default_congestion_control(struct net *net, char *name);
void tcp_get_available_congestion_control(char *buf, size_t len);
void tcp_get_allowed_congestion_control(char *buf, size_t len);
int tcp_set_allowed_congestion_control(char *allowed);
@@ -1047,7 +1017,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
extern struct tcp_congestion_ops tcp_reno;
struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
-u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca);
+u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca);
#ifdef CONFIG_INET
char *tcp_ca_get_name_by_key(u32 key, char *buffer);
#else
@@ -1086,7 +1056,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs);
void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- struct rate_sample *rs);
+ bool is_sack_reneg, struct rate_sample *rs);
void tcp_rate_check_app_limited(struct sock *sk);
/* These functions determine how the current flow behaves in respect of SACK
@@ -1095,7 +1065,6 @@ void tcp_rate_check_app_limited(struct sock *sk);
*
* tcp_is_sack - SACK enabled
* tcp_is_reno - No SACK
- * tcp_is_fack - FACK enabled, implies SACK enabled
*/
static inline int tcp_is_sack(const struct tcp_sock *tp)
{
@@ -1107,16 +1076,6 @@ static inline bool tcp_is_reno(const struct tcp_sock *tp)
return !tcp_is_sack(tp);
}
-static inline bool tcp_is_fack(const struct tcp_sock *tp)
-{
- return tp->rx_opt.sack_ok & TCP_FACK_ENABLED;
-}
-
-static inline void tcp_enable_fack(struct tcp_sock *tp)
-{
- tp->rx_opt.sack_ok |= TCP_FACK_ENABLED;
-}
-
static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
{
return tp->sacked_out + tp->lost_out;
@@ -1309,7 +1268,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
s32 delta;
- if (!sysctl_tcp_slow_start_after_idle || tp->packets_out ||
+ if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
ca_ops->cong_control)
return;
delta = tcp_jiffies32 - tp->lsndtime;
@@ -1318,13 +1277,14 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
}
/* Determine a window scaling and initial window to offer. */
-void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
+void tcp_select_initial_window(const struct sock *sk, int __space,
+ __u32 mss, __u32 *rcv_wnd,
__u32 *window_clamp, int wscale_ok,
__u8 *rcv_wscale, __u32 init_rcv_wnd);
-static inline int tcp_win_from_space(int space)
+static inline int tcp_win_from_space(const struct sock *sk, int space)
{
- int tcp_adv_win_scale = sysctl_tcp_adv_win_scale;
+ int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
return tcp_adv_win_scale <= 0 ?
(space>>(-tcp_adv_win_scale)) :
@@ -1334,13 +1294,13 @@ static inline int tcp_win_from_space(int space)
/* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space(const struct sock *sk)
{
- return tcp_win_from_space(sk->sk_rcvbuf -
+ return tcp_win_from_space(sk, sk->sk_rcvbuf -
atomic_read(&sk->sk_rmem_alloc));
}
static inline int tcp_full_space(const struct sock *sk)
{
- return tcp_win_from_space(sk->sk_rcvbuf);
+ return tcp_win_from_space(sk, sk->sk_rcvbuf);
}
extern void tcp_openreq_init_rwin(struct request_sock *req,
@@ -1548,8 +1508,7 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
/* From tcp_fastopen.c */
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
- struct tcp_fastopen_cookie *cookie, int *syn_loss,
- unsigned long *last_syn_loss);
+ struct tcp_fastopen_cookie *cookie);
void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
struct tcp_fastopen_cookie *cookie, bool syn_lost,
u16 try_exp);
@@ -1561,14 +1520,16 @@ struct tcp_fastopen_request {
int copied; /* queued in tcp_connect() */
};
void tcp_free_fastopen_req(struct tcp_sock *tp);
-
-extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-int tcp_fastopen_reset_cipher(void *key, unsigned int len);
+void tcp_fastopen_destroy_cipher(struct sock *sk);
+void tcp_fastopen_ctx_destroy(struct net *net);
+int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
+ void *key, unsigned int len);
void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct tcp_fastopen_cookie *foc);
-void tcp_fastopen_init_key_once(bool publish);
+ struct tcp_fastopen_cookie *foc,
+ const struct dst_entry *dst);
+void tcp_fastopen_init_key_once(struct net *net);
bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie);
bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
@@ -1585,7 +1546,7 @@ extern unsigned int sysctl_tcp_fastopen_blackhole_timeout;
void tcp_fastopen_active_disable(struct sock *sk);
bool tcp_fastopen_active_should_disable(struct sock *sk);
void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
-void tcp_fastopen_active_timeout_reset(void);
+void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
/* Latencies incurred by various limits for a sender. They are
* chronograph-like stats that are mutually exclusive.
@@ -1601,52 +1562,46 @@ enum tcp_chrono {
void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type);
void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type);
-/* write queue abstraction */
-static inline void tcp_write_queue_purge(struct sock *sk)
+/* This helper is needed, because skb->tcp_tsorted_anchor uses
+ * the same memory storage than skb->destructor/_skb_refdst
+ */
+static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb)
{
- struct sk_buff *skb;
-
- tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
- while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
- sk_wmem_free_skb(sk, skb);
- sk_mem_reclaim(sk);
- tcp_clear_all_retrans_hints(tcp_sk(sk));
+ skb->destructor = NULL;
+ skb->_skb_refdst = 0UL;
}
-static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
-{
- return skb_peek(&sk->sk_write_queue);
+#define tcp_skb_tsorted_save(skb) { \
+ unsigned long _save = skb->_skb_refdst; \
+ skb->_skb_refdst = 0UL;
+
+#define tcp_skb_tsorted_restore(skb) \
+ skb->_skb_refdst = _save; \
}
-static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk)
+void tcp_write_queue_purge(struct sock *sk);
+
+static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk)
{
- return skb_peek_tail(&sk->sk_write_queue);
+ return skb_rb_first(&sk->tcp_rtx_queue);
}
-static inline struct sk_buff *tcp_write_queue_next(const struct sock *sk,
- const struct sk_buff *skb)
+static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
{
- return skb_queue_next(&sk->sk_write_queue, skb);
+ return skb_peek(&sk->sk_write_queue);
}
-static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk,
- const struct sk_buff *skb)
+static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk)
{
- return skb_queue_prev(&sk->sk_write_queue, skb);
+ return skb_peek_tail(&sk->sk_write_queue);
}
-#define tcp_for_write_queue(skb, sk) \
- skb_queue_walk(&(sk)->sk_write_queue, skb)
-
-#define tcp_for_write_queue_from(skb, sk) \
- skb_queue_walk_from(&(sk)->sk_write_queue, skb)
-
#define tcp_for_write_queue_from_safe(skb, tmp, sk) \
skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
static inline struct sk_buff *tcp_send_head(const struct sock *sk)
{
- return sk->sk_send_head;
+ return skb_peek(&sk->sk_write_queue);
}
static inline bool tcp_skb_is_last(const struct sock *sk,
@@ -1655,27 +1610,25 @@ static inline bool tcp_skb_is_last(const struct sock *sk,
return skb_queue_is_last(&sk->sk_write_queue, skb);
}
-static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb)
+static inline bool tcp_write_queue_empty(const struct sock *sk)
{
- if (tcp_skb_is_last(sk, skb))
- sk->sk_send_head = NULL;
- else
- sk->sk_send_head = tcp_write_queue_next(sk, skb);
+ return skb_queue_empty(&sk->sk_write_queue);
}
-static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
+static inline bool tcp_rtx_queue_empty(const struct sock *sk)
{
- if (sk->sk_send_head == skb_unlinked) {
- sk->sk_send_head = NULL;
- tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
- }
- if (tcp_sk(sk)->highest_sack == skb_unlinked)
- tcp_sk(sk)->highest_sack = NULL;
+ return RB_EMPTY_ROOT(&sk->tcp_rtx_queue);
}
-static inline void tcp_init_send_head(struct sock *sk)
+static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk)
{
- sk->sk_send_head = NULL;
+ return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk);
+}
+
+static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
+{
+ if (tcp_write_queue_empty(sk))
+ tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
}
static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
@@ -1688,26 +1641,8 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb
__tcp_add_write_queue_tail(sk, skb);
/* Queue it, remembering where we must start sending. */
- if (sk->sk_send_head == NULL) {
- sk->sk_send_head = skb;
+ if (sk->sk_write_queue.next == skb)
tcp_chrono_start(sk, TCP_CHRONO_BUSY);
-
- if (tcp_sk(sk)->highest_sack == NULL)
- tcp_sk(sk)->highest_sack = skb;
- }
-}
-
-static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb)
-{
- __skb_queue_head(&sk->sk_write_queue, skb);
-}
-
-/* Insert buff after skb on the write queue of sk. */
-static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
- struct sk_buff *buff,
- struct sock *sk)
-{
- __skb_queue_after(&sk->sk_write_queue, skb, buff);
}
/* Insert new before skb on the write queue of sk. */
@@ -1716,19 +1651,27 @@ static inline void tcp_insert_write_queue_before(struct sk_buff *new,
struct sock *sk)
{
__skb_queue_before(&sk->sk_write_queue, skb, new);
-
- if (sk->sk_send_head == skb)
- sk->sk_send_head = new;
}
static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
{
+ tcp_skb_tsorted_anchor_cleanup(skb);
__skb_unlink(skb, &sk->sk_write_queue);
}
-static inline bool tcp_write_queue_empty(struct sock *sk)
+void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb);
+
+static inline void tcp_rtx_queue_unlink(struct sk_buff *skb, struct sock *sk)
{
- return skb_queue_empty(&sk->sk_write_queue);
+ tcp_skb_tsorted_anchor_cleanup(skb);
+ rb_erase(&skb->rbnode, &sk->tcp_rtx_queue);
+}
+
+static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct sock *sk)
+{
+ list_del(&skb->tcp_tsorted_anchor);
+ tcp_rtx_queue_unlink(skb, sk);
+ sk_wmem_free_skb(sk, skb);
}
static inline void tcp_push_pending_frames(struct sock *sk)
@@ -1757,8 +1700,7 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb)
{
- tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL :
- tcp_write_queue_next(sk, skb);
+ tcp_sk(sk)->highest_sack = skb_rb_next(skb);
}
static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
@@ -1768,7 +1710,7 @@ static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
static inline void tcp_highest_sack_reset(struct sock *sk)
{
- tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk);
+ tcp_sk(sk)->highest_sack = tcp_rtx_queue_head(sk);
}
/* Called when old skb is about to be deleted and replaced by new skb */
@@ -1934,11 +1876,12 @@ extern void tcp_rack_mark_lost(struct sock *sk);
extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
u64 xmit_time);
extern void tcp_rack_reo_timeout(struct sock *sk);
+extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs);
/* At how many usecs into the future should the RTO fire? */
static inline s64 tcp_rto_delta_us(const struct sock *sk)
{
- const struct sk_buff *skb = tcp_write_queue_head(sk);
+ const struct sk_buff *skb = tcp_rtx_queue_head(sk);
u32 rto = inet_csk(sk)->icsk_rto;
u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
@@ -2040,6 +1983,11 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer);
#define TCP_ULP_MAX 128
#define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX)
+enum {
+ TCP_ULP_TLS,
+ TCP_ULP_BPF,
+};
+
struct tcp_ulp_ops {
struct list_head list;
@@ -2048,12 +1996,15 @@ struct tcp_ulp_ops {
/* cleanup ulp */
void (*release)(struct sock *sk);
+ int uid;
char name[TCP_ULP_NAME_MAX];
+ bool user_visible;
struct module *owner;
};
int tcp_register_ulp(struct tcp_ulp_ops *type);
void tcp_unregister_ulp(struct tcp_ulp_ops *type);
int tcp_set_ulp(struct sock *sk, const char *name);
+int tcp_set_ulp_id(struct sock *sk, const int ulp);
void tcp_get_available_ulp(char *buf, size_t len);
void tcp_cleanup_ulp(struct sock *sk);
@@ -2063,17 +2014,21 @@ void tcp_cleanup_ulp(struct sock *sk);
* program loaded).
*/
#ifdef CONFIG_BPF
-static inline int tcp_call_bpf(struct sock *sk, int op)
+static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
{
struct bpf_sock_ops_kern sock_ops;
int ret;
- if (sk_fullsock(sk))
+ memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
+ if (sk_fullsock(sk)) {
+ sock_ops.is_fullsock = 1;
sock_owned_by_me(sk);
+ }
- memset(&sock_ops, 0, sizeof(sock_ops));
sock_ops.sk = sk;
sock_ops.op = op;
+ if (nargs > 0)
+ memcpy(sock_ops.args, args, nargs * sizeof(*args));
ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
if (ret == 0)
@@ -2082,18 +2037,46 @@ static inline int tcp_call_bpf(struct sock *sk, int op)
ret = -1;
return ret;
}
+
+static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
+{
+ u32 args[2] = {arg1, arg2};
+
+ return tcp_call_bpf(sk, op, 2, args);
+}
+
+static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
+ u32 arg3)
+{
+ u32 args[3] = {arg1, arg2, arg3};
+
+ return tcp_call_bpf(sk, op, 3, args);
+}
+
#else
-static inline int tcp_call_bpf(struct sock *sk, int op)
+static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
{
return -EPERM;
}
+
+static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
+{
+ return -EPERM;
+}
+
+static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
+ u32 arg3)
+{
+ return -EPERM;
+}
+
#endif
static inline u32 tcp_timeout_init(struct sock *sk)
{
int timeout;
- timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT);
+ timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT, 0, NULL);
if (timeout <= 0)
timeout = TCP_TIMEOUT_INIT;
@@ -2104,7 +2087,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
{
int rwnd;
- rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT);
+ rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT, 0, NULL);
if (rwnd < 0)
rwnd = 0;
@@ -2113,6 +2096,10 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
{
- return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
+ return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
}
+
+#if IS_ENABLED(CONFIG_SMC)
+extern struct static_key_false tcp_have_smc;
+#endif
#endif /* _TCP_H */
diff --git a/include/net/tipc.h b/include/net/tipc.h
new file mode 100644
index 000000000000..07670ec022a7
--- /dev/null
+++ b/include/net/tipc.h
@@ -0,0 +1,62 @@
+/*
+ * include/net/tipc.h: Include file for TIPC message header routines
+ *
+ * Copyright (c) 2017 Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_HDR_H
+#define _TIPC_HDR_H
+
+#include <linux/random.h>
+
+#define KEEPALIVE_MSG_MASK 0x0e080000 /* LINK_PROTOCOL + MSG_IS_KEEPALIVE */
+
+struct tipc_basic_hdr {
+ __be32 w[4];
+};
+
+static inline u32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr)
+{
+ u32 w0 = ntohl(hdr->w[0]);
+ bool keepalive_msg = (w0 & KEEPALIVE_MSG_MASK) == KEEPALIVE_MSG_MASK;
+ int key;
+
+ /* Return source node identity as key */
+ if (likely(!keepalive_msg))
+ return hdr->w[3];
+
+ /* Spread PROBE/PROBE_REPLY messages across the cores */
+ get_random_bytes(&key, sizeof(key));
+ return key;
+}
+
+#endif
diff --git a/include/net/tls.h b/include/net/tls.h
index b89d397dd62f..4913430ab807 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -35,6 +35,11 @@
#define _TLS_OFFLOAD_H
#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/crypto.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
#include <uapi/linux/tls.h>
@@ -53,6 +58,7 @@
struct tls_sw_context {
struct crypto_aead *aead_send;
+ struct crypto_wait async_wait;
/* Sending context */
char aad_space[TLS_AAD_SPACE_SIZE];
@@ -83,6 +89,8 @@ struct tls_context {
void *priv_ctx;
+ u8 tx_conf:2;
+
u16 prepend_size;
u16 tag_size;
u16 overhead_size;
@@ -97,7 +105,6 @@ struct tls_context {
u16 pending_open_record_frags;
int (*push_pending_record)(struct sock *sk, int flags);
- void (*free_resources)(struct sock *sk);
void (*sk_write_space)(struct sock *sk);
void (*sk_proto_close)(struct sock *sk, long timeout);
@@ -122,6 +129,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
void tls_sw_close(struct sock *sk, long timeout);
+void tls_sw_free_tx_resources(struct sock *sk);
void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
void tls_icsk_clean_acked(struct sock *sk);
@@ -164,7 +172,7 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
static inline void tls_err_abort(struct sock *sk)
{
- sk->sk_err = -EBADMSG;
+ sk->sk_err = EBADMSG;
sk->sk_error_report(sk);
}
@@ -212,6 +220,21 @@ static inline void tls_fill_prepend(struct tls_context *ctx,
ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
}
+static inline void tls_make_aad(char *buf,
+ size_t size,
+ char *record_sequence,
+ int record_sequence_size,
+ unsigned char record_type)
+{
+ memcpy(buf, record_sequence, record_sequence_size);
+
+ buf[8] = record_type;
+ buf[9] = TLS_1_2_VERSION_MAJOR;
+ buf[10] = TLS_1_2_VERSION_MINOR;
+ buf[11] = size >> 8;
+ buf[12] = size & 0xFF;
+}
+
static inline struct tls_context *tls_get_ctx(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/include/net/udp.h b/include/net/udp.h
index 6c759c8594e2..850a8e581cce 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -275,7 +275,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int udp_init_sock(struct sock *sk);
int __udp_disconnect(struct sock *sk, int flags);
int udp_disconnect(struct sock *sk, int flags);
-unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
bool is_ipv6);
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 81bdbf97319b..9185e45b997f 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -64,6 +64,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
UDP_SKB_CB(skb)->cscov = cscov;
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
+ skb->csum_valid = 0;
}
return 0;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 13223396dc64..ad73d8b3fcc2 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -146,7 +146,7 @@ struct vxlanhdr_gpe {
np_applied:1,
instance_applied:1,
version:2,
-reserved_flags2:2;
+ reserved_flags2:2;
#elif defined(__BIG_ENDIAN_BITFIELD)
u8 reserved_flags2:2,
version:2,
@@ -301,7 +301,7 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
l4_hdr = ipv6_hdr(skb)->nexthdr;
break;
default:
- return features;;
+ return features;
}
if ((l4_hdr == IPPROTO_UDP) &&
diff --git a/include/net/wext.h b/include/net/wext.h
index e51f067fdb3a..aa192a670304 100644
--- a/include/net/wext.h
+++ b/include/net/wext.h
@@ -7,7 +7,7 @@
struct net;
#ifdef CONFIG_WEXT_CORE
-int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, unsigned int cmd,
void __user *arg);
int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
unsigned long arg);
@@ -15,7 +15,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
struct iw_statistics *get_wireless_stats(struct net_device *dev);
int call_commit_handler(struct net_device *dev);
#else
-static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
+static inline int wext_handle_ioctl(struct net *net, unsigned int cmd,
void __user *arg)
{
return -EINVAL;
diff --git a/include/net/xdp.h b/include/net/xdp.h
new file mode 100644
index 000000000000..b2362ddfa694
--- /dev/null
+++ b/include/net/xdp.h
@@ -0,0 +1,48 @@
+/* include/net/xdp.h
+ *
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * Released under terms in GPL version 2. See COPYING.
+ */
+#ifndef __LINUX_NET_XDP_H__
+#define __LINUX_NET_XDP_H__
+
+/**
+ * DOC: XDP RX-queue information
+ *
+ * The XDP RX-queue info (xdp_rxq_info) is associated with the driver
+ * level RX-ring queues. It is information that is specific to how
+ * the driver have configured a given RX-ring queue.
+ *
+ * Each xdp_buff frame received in the driver carry a (pointer)
+ * reference to this xdp_rxq_info structure. This provides the XDP
+ * data-path read-access to RX-info for both kernel and bpf-side
+ * (limited subset).
+ *
+ * For now, direct access is only safe while running in NAPI/softirq
+ * context. Contents is read-mostly and must not be updated during
+ * driver NAPI/softirq poll.
+ *
+ * The driver usage API is a register and unregister API.
+ *
+ * The struct is not directly tied to the XDP prog. A new XDP prog
+ * can be attached as long as it doesn't change the underlying
+ * RX-ring. If the RX-ring does change significantly, the NIC driver
+ * naturally need to stop the RX-ring before purging and reallocating
+ * memory. In that process the driver MUST call unregistor (which
+ * also apply for driver shutdown and unload). The register API is
+ * also mandatory during RX-ring setup.
+ */
+
+struct xdp_rxq_info {
+ struct net_device *dev;
+ u32 queue_index;
+ u32 reg_state;
+} ____cacheline_aligned; /* perf critical, avoid false-sharing */
+
+int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index);
+void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
+void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
+bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
+
+#endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e015e164bac0..7d2077665c0b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -968,7 +968,7 @@ static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_c
/* A struct encoding bundle of transformations to apply to some set of flow.
*
- * dst->child points to the next element of bundle.
+ * xdst->child points to the next element of bundle.
* dst->xfrm points to an instanse of transformer.
*
* Due to unfortunate limitations of current routing cache, which we
@@ -984,6 +984,8 @@ struct xfrm_dst {
struct rt6_info rt6;
} u;
struct dst_entry *route;
+ struct dst_entry *child;
+ struct dst_entry *path;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int num_pols, num_xfrms;
u32 xfrm_genid;
@@ -994,7 +996,35 @@ struct xfrm_dst {
u32 path_cookie;
};
+static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
+{
+#ifdef CONFIG_XFRM
+ if (dst->xfrm) {
+ const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst;
+
+ return xdst->path;
+ }
+#endif
+ return (struct dst_entry *) dst;
+}
+
+static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst)
+{
+#ifdef CONFIG_XFRM
+ if (dst->xfrm) {
+ struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
+ return xdst->child;
+ }
+#endif
+ return NULL;
+}
+
#ifdef CONFIG_XFRM
+static inline void xfrm_dst_set_child(struct xfrm_dst *xdst, struct dst_entry *child)
+{
+ xdst->child = child;
+}
+
static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
{
xfrm_pols_put(xdst->pols, xdst->num_pols);
@@ -1021,6 +1051,7 @@ struct xfrm_offload {
#define XFRM_GSO_SEGMENT 16
#define XFRM_GRO 32
#define XFRM_ESP_NO_TRAILER 64
+#define XFRM_DEV_RESUME 128
__u32 status;
#define CRYPTO_SUCCESS 1
@@ -1570,6 +1601,9 @@ int xfrm_init_state(struct xfrm_state *x);
int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
+int xfrm_trans_queue(struct sk_buff *skb,
+ int (*finish)(struct net *, struct sock *,
+ struct sk_buff *));
int xfrm_output_resume(struct sk_buff *skb, int err);
int xfrm_output(struct sock *sk, struct sk_buff *skb);
int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
@@ -1765,22 +1799,22 @@ static inline int xfrm_acquire_is_on(struct net *net)
}
#endif
-static inline int aead_len(struct xfrm_algo_aead *alg)
+static inline unsigned int aead_len(struct xfrm_algo_aead *alg)
{
return sizeof(*alg) + ((alg->alg_key_len + 7) / 8);
}
-static inline int xfrm_alg_len(const struct xfrm_algo *alg)
+static inline unsigned int xfrm_alg_len(const struct xfrm_algo *alg)
{
return sizeof(*alg) + ((alg->alg_key_len + 7) / 8);
}
-static inline int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg)
+static inline unsigned int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg)
{
return sizeof(*alg) + ((alg->alg_key_len + 7) / 8);
}
-static inline int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay_esn)
+static inline unsigned int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay_esn)
{
return sizeof(*replay_esn) + replay_esn->bmp_len * sizeof(__u32);
}
@@ -1844,34 +1878,53 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
{
return skb->sp->xvec[skb->sp->len - 1];
}
+#endif
+
static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
{
+#ifdef CONFIG_XFRM
struct sec_path *sp = skb->sp;
if (!sp || !sp->olen || sp->len != sp->olen)
return NULL;
return &sp->ovec[sp->olen - 1];
-}
+#else
+ return NULL;
#endif
+}
void __net_init xfrm_dev_init(void);
#ifdef CONFIG_XFRM_OFFLOAD
-int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features);
+void xfrm_dev_resume(struct sk_buff *skb);
+void xfrm_dev_backlog(struct softnet_data *sd);
+struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again);
int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
struct xfrm_user_offload *xuo);
bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
+static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
+{
+ struct xfrm_state_offload *xso = &x->xso;
+
+ if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn)
+ xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x);
+}
+
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
struct xfrm_state *x = dst->xfrm;
+ struct xfrm_dst *xdst;
if (!x || !x->type_offload)
return false;
- if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) &&
- !dst->child->xfrm)
+ xdst = (struct xfrm_dst *) dst;
+ if (!x->xso.offload_handle && !xdst->child->xfrm)
+ return true;
+ if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) &&
+ !xdst->child->xfrm)
return true;
return false;
@@ -1891,15 +1944,24 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
struct net_device *dev = xso->dev;
if (dev && dev->xfrmdev_ops) {
- dev->xfrmdev_ops->xdo_dev_state_free(x);
+ if (dev->xfrmdev_ops->xdo_dev_state_free)
+ dev->xfrmdev_ops->xdo_dev_state_free(x);
xso->dev = NULL;
dev_put(dev);
}
}
#else
-static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
+static inline void xfrm_dev_resume(struct sk_buff *skb)
{
- return 0;
+}
+
+static inline void xfrm_dev_backlog(struct softnet_data *sd)
+{
+}
+
+static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
+{
+ return skb;
}
static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo)
@@ -1920,6 +1982,10 @@ static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x
return false;
}
+static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
+{
+}
+
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
return false;