From ccdb2d17df9f07c291d43b0aeea7c90e4c020489 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 15 Jul 2017 19:40:20 +0200 Subject: ip6: fix PMTU discovery when using /127 subnets The definition of an "anycast destination address" has been tweaked as a side-effect of commit 2647a9b07032 ("ipv6: Remove external dependency on rt6i_gateway and RTF_ANYCAST"). The first address of a point-to-point /127 subnet is now considered as an anycast address. This prevents ICMPv6 errors to be returned to a sender of such a subnet and breaks PMTU discovery. This can be reproduced with: ip link add name out6 type veth peer name in6 ip link add name out7 type veth peer name in7 ip link set mtu 1400 dev out7 ip link set mtu 1400 dev in7 ip netns add next-hop ip netns add next-next-hop ip link set netns next-hop dev in6 ip link set netns next-hop dev out7 ip link set netns next-next-hop dev in7 ip link set up dev out6 ip addr add 2001:db8:1::12/127 dev out6 ip netns exec next-hop ip link set up dev in6 ip netns exec next-hop ip link set up dev out7 ip netns exec next-hop ip addr add 2001:db8:1::13/127 dev in6 ip netns exec next-hop ip addr add 2001:db8:1::14/127 dev out7 ip netns exec next-hop ip route add default via 2001:db8:1::15 ip netns exec next-hop sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec next-next-hop ip link set up dev in7 ip netns exec next-next-hop ip addr add 2001:db8:1::15/127 dev in7 ip netns exec next-next-hop ip addr add 2001:db8:1::50/128 dev in7 ip netns exec next-next-hop ip route add default via 2001:db8:1::14 ip netns exec next-next-hop sysctl -qw net.ipv6.conf.all.forwarding=1 ip route add 2001:db8:1::48/123 via 2001:db8:1::13 sleep 4 ping -M do -s 1452 -c 3 2001:db8:1::50 || true ip route get 2001:db8:1::50 Before the patch, we get: 2001:db8:1::50 from :: via 2001:db8:1::13 dev out6 src 2001:db8:1::12 metric 1024 pref medium After the patch, we get: 2001:db8:1::50 via 2001:db8:1::13 dev out6 src 2001:db8:1::12 metric 0 cache expires 578sec mtu 1400 pref medium Fixes: 2647a9b07032 ("ipv6: Remove external dependency on rt6i_gateway and RTF_ANYCAST") Signed-off-by: Vincent Bernat Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 199056933dcb..907d39a42f6b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -194,7 +194,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, struct rt6_info *rt = (struct rt6_info *)dst; return rt->rt6i_flags & RTF_ANYCAST || - (rt->rt6i_dst.plen != 128 && + (rt->rt6i_dst.plen < 127 && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } -- cgit v1.2.3 From b02db702face3791889a4fcf06691c086648ee89 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:57 +0800 Subject: sctp: remove the typedef sctp_random_param_t This patch is to remove the typedef sctp_random_param_t, and replace with struct sctp_random_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 5ab29af8ca8a..f22c079fd1f1 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1556,7 +1556,7 @@ struct sctp_association { * and authenticated chunk list. All that is part of the * cookie and these are just pointers to those locations */ - sctp_random_param_t *peer_random; + struct sctp_random_param *peer_random; sctp_chunks_param_t *peer_chunks; sctp_hmac_algo_param_t *peer_hmacs; } peer; -- cgit v1.2.3 From a762a9d94d44980e3690f9de87b918376daa6428 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:58 +0800 Subject: sctp: remove the typedef sctp_chunks_param_t This patch is to remove the typedef sctp_chunks_param_t, and replace with struct sctp_chunks_param in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f22c079fd1f1..8042e6380b0e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1557,7 +1557,7 @@ struct sctp_association { * cookie and these are just pointers to those locations */ struct sctp_random_param *peer_random; - sctp_chunks_param_t *peer_chunks; + struct sctp_chunks_param *peer_chunks; sctp_hmac_algo_param_t *peer_hmacs; } peer; -- cgit v1.2.3 From 1474774a7f0daf9878fd9537a24714f419e744ed Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:59 +0800 Subject: sctp: remove the typedef sctp_hmac_algo_param_t This patch is to remove the typedef sctp_hmac_algo_param_t, and replace with struct sctp_hmac_algo_param in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8042e6380b0e..66cd7639b912 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1558,7 +1558,7 @@ struct sctp_association { */ struct sctp_random_param *peer_random; struct sctp_chunks_param *peer_chunks; - sctp_hmac_algo_param_t *peer_hmacs; + struct sctp_hmac_algo_param *peer_hmacs; } peer; /* State : A state variable indicating what state the -- cgit v1.2.3 From 27eac47b00789522ba00501b0838026e1ecb6f05 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 17 Jul 2017 11:35:54 +0200 Subject: net/unix: drop obsolete fd-recursion limits All unix sockets now account inflight FDs to the respective sender. This was introduced in: commit 712f4aad406bb1ed67f3f98d04c044191f0ff593 Author: willy tarreau Date: Sun Jan 10 07:54:56 2016 +0100 unix: properly account for FDs passed over unix sockets and further refined in: commit 415e3d3e90ce9e18727e8843ae343eda5a58fad6 Author: Hannes Frederic Sowa Date: Wed Feb 3 02:11:03 2016 +0100 unix: correctly track in-flight fds in sending process user_struct Hence, regardless of the stacking depth of FDs, the total number of inflight FDs is limited, and accounted. There is no known way for a local user to exceed those limits or exploit the accounting. Furthermore, the GC logic is independent of the recursion/stacking depth as well. It solely depends on the total number of inflight FDs, regardless of their layout. Lastly, the current `recursion_level' suffers a TOCTOU race, since it checks and inherits depths only at queue time. If we consider `A<-B' to mean `queue-B-on-A', the following sequence circumvents the recursion level easily: A<-B B<-C C<-D ... Y<-Z resulting in: A<-B<-C<-...<-Z With all of this in mind, lets drop the recursion limit. It has no additional security value, anymore. On the contrary, it randomly confuses message brokers that try to forward file-descriptors, since any sendmsg(2) call can fail spuriously with ETOOMANYREFS if a client maliciously modifies the FD while inflight. Cc: Alban Crequy Cc: Simon McVittie Signed-off-by: David Herrmann Reviewed-by: Tom Gundersen Signed-off-by: David S. Miller --- include/net/af_unix.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 678e4d6fa317..3b3194b2fc65 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -58,7 +58,6 @@ struct unix_sock { struct list_head link; atomic_long_t inflight; spinlock_t lock; - unsigned char recursion_level; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 -- cgit v1.2.3 From b145425f269a17ed344d737f746b844dfac60c82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Jul 2017 02:56:10 -0700 Subject: inetpeer: remove AVL implementation in favor of RB tree As discussed in Faro during Netfilter Workshop 2017, RB trees can be used with RCU, using a seqlock. Note that net/rxrpc/conn_service.c is already using this. This patch converts inetpeer from AVL tree to RB tree, since it allows to remove private AVL implementation in favor of shared RB code. $ size net/ipv4/inetpeer.before net/ipv4/inetpeer.after text data bss dec hex filename 3195 40 128 3363 d23 net/ipv4/inetpeer.before 1562 24 0 1586 632 net/ipv4/inetpeer.after The same technique can be used to speed up net/netfilter/nft_set_rbtree.c (removing rwlock contention in fast path) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inetpeer.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index f2a215fc78e4..950ed182f62f 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -33,18 +33,12 @@ struct inetpeer_addr { }; struct inet_peer { - /* group together avl_left,avl_right,v4daddr to speedup lookups */ - struct inet_peer __rcu *avl_left, *avl_right; + struct rb_node rb_node; struct inetpeer_addr daddr; - __u32 avl_height; u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ unsigned long rate_last; - union { - struct list_head gc_list; - struct rcu_head gc_rcu; - }; /* * Once inet_peer is queued for deletion (refcnt == 0), following field * is not available: rid @@ -55,7 +49,6 @@ struct inet_peer { atomic_t rid; /* Frag reception counter */ }; struct rcu_head rcu; - struct inet_peer *gc_next; }; /* following fields might be frequently dirtied */ @@ -64,7 +57,7 @@ struct inet_peer { }; struct inet_peer_base { - struct inet_peer __rcu *root; + struct rb_root rb_root; seqlock_t lock; int total; }; -- cgit v1.2.3 From 09c7570480f7544ffbf8e6db365208b0b0c154c6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:26 +0200 Subject: xfrm: remove flow cache After rcu conversions performance degradation in forward tests isn't that noticeable anymore. See next patch for some numbers. A followup patcg could then also remove genid from the policies as we do not cache bundles anymore. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/flow.h | 34 ---------------------------------- include/net/flowcache.h | 25 ------------------------- include/net/netns/xfrm.h | 11 ----------- include/net/xfrm.h | 8 -------- 4 files changed, 78 deletions(-) delete mode 100644 include/net/flowcache.h (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index bae198b3039e..f3dc61b29bb5 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family) return 0; } -#define FLOW_DIR_IN 0 -#define FLOW_DIR_OUT 1 -#define FLOW_DIR_FWD 2 - -struct net; -struct sock; -struct flow_cache_ops; - -struct flow_cache_object { - const struct flow_cache_ops *ops; -}; - -struct flow_cache_ops { - struct flow_cache_object *(*get)(struct flow_cache_object *); - int (*check)(struct flow_cache_object *); - void (*delete)(struct flow_cache_object *); -}; - -typedef struct flow_cache_object *(*flow_resolve_t)( - struct net *net, const struct flowi *key, u16 family, - u8 dir, struct flow_cache_object *oldobj, void *ctx); - -struct flow_cache_object *flow_cache_lookup(struct net *net, - const struct flowi *key, u16 family, - u8 dir, flow_resolve_t resolver, - void *ctx); -int flow_cache_init(struct net *net); -void flow_cache_fini(struct net *net); -void flow_cache_hp_init(void); - -void flow_cache_flush(struct net *net); -void flow_cache_flush_deferred(struct net *net); -extern atomic_t flow_cache_genid; - __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6) diff --git a/include/net/flowcache.h b/include/net/flowcache.h deleted file mode 100644 index 51eb971e8973..000000000000 --- a/include/net/flowcache.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _NET_FLOWCACHE_H -#define _NET_FLOWCACHE_H - -#include -#include -#include -#include - -struct flow_cache_percpu { - struct hlist_head *hash_table; - unsigned int hash_count; - u32 hash_rnd; - int hash_rnd_recalc; - struct tasklet_struct flush_tasklet; -}; - -struct flow_cache { - u32 hash_shift; - struct flow_cache_percpu __percpu *percpu; - struct hlist_node node; - unsigned int low_watermark; - unsigned int high_watermark; - struct timer_list rnd_timer; -}; -#endif /* _NET_FLOWCACHE_H */ diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 27bb9633c69d..611521646dd4 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,7 +6,6 @@ #include #include #include -#include struct ctl_table_header; @@ -73,16 +72,6 @@ struct netns_xfrm { spinlock_t xfrm_state_lock; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; - - /* flow cache part */ - struct flow_cache flow_cache_global; - atomic_t flow_cache_genid; - struct list_head flow_cache_gc_list; - atomic_t flow_cache_gc_count; - spinlock_t flow_cache_gc_lock; - struct work_struct flow_cache_gc_work; - struct work_struct flow_cache_flush_work; - struct mutex flow_flush_sem; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c0916ab18d32..e0feba2ce76a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -563,7 +563,6 @@ struct xfrm_policy { refcount_t refcnt; struct timer_list timer; - struct flow_cache_object flo; atomic_t genid; u32 priority; u32 index; @@ -978,7 +977,6 @@ struct xfrm_dst { struct rt6_info rt6; } u; struct dst_entry *route; - struct flow_cache_object flo; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; u32 xfrm_genid; @@ -1226,9 +1224,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk) } } -void xfrm_garbage_collect(struct net *net); -void xfrm_garbage_collect_deferred(struct net *net); - #else static inline void xfrm_sk_free_policy(struct sock *sk) {} @@ -1263,9 +1258,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, { return 1; } -static inline void xfrm_garbage_collect(struct net *net) -{ -} #endif static __inline__ -- cgit v1.2.3 From ec30d78c14a813db39a647b6a348b4286ba4abf5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:27 +0200 Subject: xfrm: add xdst pcpu cache retain last used xfrm_dst in a pcpu cache. On next request, reuse this dst if the policies are the same. The cache will not help with strict RR workloads as there is no hit. The cache packet-path part is reasonably small, the notifier part is needed so we do not add long hangs when a device is dismantled but some pcpu xdst still holds a reference, there are also calls to the flush operation when userspace deletes SAs so modules can be removed (there is no hit. We need to run the dst_release on the correct cpu to avoid races with packet path. This is done by adding a work_struct for each cpu and then doing the actual test/release on each affected cpu via schedule_work_on(). Test results using 4 network namespaces and null encryption: ns1 ns2 -> ns3 -> ns4 netperf -> xfrm/null enc -> xfrm/null dec -> netserver what TCP_STREAM UDP_STREAM UDP_RR Flow cache: 14644.61 294.35 327231.64 No flow cache: 14349.81 242.64 202301.72 Pcpu cache: 14629.70 292.21 205595.22 UDP tests used 64byte packets, tests ran for one minute each, value is average over ten iterations. 'Flow cache' is 'net-next', 'No flow cache' is net-next plus this series but without this patch. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e0feba2ce76a..afb4929d7232 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c); +void xfrm_policy_cache_flush(void); void km_state_notify(struct xfrm_state *x, const struct km_event *c); struct xfrm_tmpl; -- cgit v1.2.3 From bb4d991a28cc86a2dfbeefeff32911ca9f779c18 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 19 Jul 2017 15:41:26 -0700 Subject: tcp: adjust tail loss probe timeout This patch adjusts the timeout formula to schedule the TCP loss probe (TLP). The previous formula uses 2*SRTT or 1.5*RTT + DelayACKMax if only one packet is in flight. It keeps a lower bound of 10 msec which is too large for short RTT connections (e.g. within a data-center). The new formula = 2*RTT + (inflight == 1 ? 200ms : 2ticks) which performs better for short and fast connections. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 70483296157f..4f056ea79df2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #endif #define TCP_RTO_MAX ((unsigned)(120*HZ)) #define TCP_RTO_MIN ((unsigned)(HZ/5)) +#define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */ #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now * used as a fallback RTO for the @@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes * for local resources. */ -#define TCP_REO_TIMEOUT_MIN (2000) /* Min RACK reordering timeout in usec */ - #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */ #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ #define TCP_KEEPALIVE_INTVL (75*HZ) -- cgit v1.2.3 From e7d53ad3239de636ea478fc003d3652b49b8e593 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 18 Jul 2017 16:23:56 -0400 Subject: net: dsa: unexport dsa_is_port_initialized The dsa_is_port_initialized helper is only used by dsa_switch_resume and dsa_switch_suspend, if CONFIG_PM_SLEEP is enabled. Make it static to dsa.c. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 58969b9a090c..88da272d20d0 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -256,11 +256,6 @@ static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p); } -static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p) -{ - return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; -} - static inline u8 dsa_upstream_port(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; -- cgit v1.2.3 From 9f08ea848117ab521efcfd3e004d8e1a0edc640c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 18 Jul 2017 20:18:09 +0200 Subject: netfilter: nf_tables: keep chain counters away from hot path These chain counters are only used by the iptables-compat tool, that allow users to use the x_tables extensions from the existing nf_tables framework. This patch makes nf_tables by ~5% for the general usecase, ie. native nft users, where no chain counters are used at all. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 8f690effec37..424684c33771 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -49,6 +49,8 @@ struct nft_payload_set { }; extern const struct nft_expr_ops nft_payload_fast_ops; + +extern struct static_key_false nft_counters_enabled; extern struct static_key_false nft_trace_enabled; #endif /* _NET_NF_TABLES_CORE_H */ -- cgit v1.2.3 From 296d8ee37c50f139d934bdefbab85509b2e4a525 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 21 Jul 2017 12:49:30 +0200 Subject: net: add infrastructure to un-offload UDP tunnel port This adds a new NETDEV_UDP_TUNNEL_DROP_INFO event, similar to NETDEV_UDP_TUNNEL_PUSH_INFO, to signal to un-offload ports. This also adds udp_tunnel_drop_rx_port(), which calls ndo_udp_tunnel_del. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/udp_tunnel.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 02c5be037451..10cce0dd4450 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -115,6 +115,8 @@ struct udp_tunnel_info { /* Notify network devices of offloadable types */ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, unsigned short type); +void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, + unsigned short type); void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); @@ -124,6 +126,12 @@ static inline void udp_tunnel_get_rx_info(struct net_device *dev) call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); } +static inline void udp_tunnel_drop_rx_info(struct net_device *dev) +{ + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); +} + /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, -- cgit v1.2.3 From 787310859d8d1a72545db2343fb3ac8f765b0f35 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:32 +0800 Subject: sctp: remove the typedef sctp_sackhdr_t This patch is to remove the typedef sctp_sackhdr_t, and replace with struct sctp_sackhdr in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index d4679e7a5ed5..1d5f6ff3f440 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -135,7 +135,7 @@ typedef union { struct sctp_init_chunk *init; struct sctp_ulpevent *ulpevent; struct sctp_packet *packet; - sctp_sackhdr_t *sackh; + struct sctp_sackhdr *sackh; struct sctp_datamsg *msg; } sctp_arg_t; @@ -176,7 +176,7 @@ SCTP_ARG_CONSTRUCTOR(BA, struct sctp_bind_addr *, bp) SCTP_ARG_CONSTRUCTOR(PEER_INIT, struct sctp_init_chunk *, init) SCTP_ARG_CONSTRUCTOR(ULPEVENT, struct sctp_ulpevent *, ulpevent) SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet) -SCTP_ARG_CONSTRUCTOR(SACKH, sctp_sackhdr_t *, sackh) +SCTP_ARG_CONSTRUCTOR(SACKH, struct sctp_sackhdr *, sackh) SCTP_ARG_CONSTRUCTOR(DATAMSG, struct sctp_datamsg *, msg) static inline sctp_arg_t SCTP_FORCE(void) -- cgit v1.2.3 From e42e24c3cc072088756d84ef07b492ac2a3ae2e5 Mon Sep 17 00:00:00 2001 From: Matvejchikov Ilya Date: Mon, 24 Jul 2017 16:02:12 +0400 Subject: tcp: remove redundant argument from tcp_rcv_established() The last (4th) argument of tcp_rcv_established() is redundant as it always equals to skb->len and the skb itself is always passed as 2th agrument. There is no reason to have it. Signed-off-by: Ilya V. Matveychikov Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 4f056ea79df2..12d68335acd4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -362,7 +362,7 @@ void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len); + const struct tcphdr *th); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); -- cgit v1.2.3 From dce4551cb2adb1ac9a30f8ab5299d614392b3cff Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 25 Jul 2017 17:57:47 +0200 Subject: udp: preserve head state for IP_CMSG_PASSSEC Paul Moore reported a SELinux/IP_PASSSEC regression caused by missing skb->sp at recvmsg() time. We need to preserve the skb head state to process the IP_CMSG_PASSSEC cmsg. With this commit we avoid releasing the skb head state in the BH even if a secpath is attached to the current skb, and stores the skb status (with/without head states) in the scratch area, so that we can access it at skb deallocation time, without incurring in cache-miss penalties. This also avoids misusing the skb CB for ipv6 packets, as introduced by the commit 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing"). Clean a bit the scratch area helpers implementation, to reduce the code differences between 32 and 64 bits build. Reported-by: Paul Moore Fixes: 0a463c78d25b ("udp: avoid a cache miss on dequeue") Fixes: 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing") Signed-off-by: Paolo Abeni Tested-by: Paul Moore Signed-off-by: David S. Miller --- include/net/udp.h | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index 972ce4baab6b..56ce2d2a612d 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -305,33 +305,44 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, /* UDP uses skb->dev_scratch to cache as much information as possible and avoid * possibly multiple cache miss on dequeue() */ -#if BITS_PER_LONG == 64 - -/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on - * cold cache lines at recvmsg time. - * skb->len can be stored on 16 bits since the udp header has been already - * validated and pulled. - */ struct udp_dev_scratch { - u32 truesize; + /* skb->truesize and the stateless bit are embedded in a single field; + * do not use a bitfield since the compiler emits better/smaller code + * this way + */ + u32 _tsize_state; + +#if BITS_PER_LONG == 64 + /* len and the bit needed to compute skb_csum_unnecessary + * will be on cold cache lines at recvmsg time. + * skb->len can be stored on 16 bits since the udp header has been + * already validated and pulled. + */ u16 len; bool is_linear; bool csum_unnecessary; +#endif }; +static inline struct udp_dev_scratch *udp_skb_scratch(struct sk_buff *skb) +{ + return (struct udp_dev_scratch *)&skb->dev_scratch; +} + +#if BITS_PER_LONG == 64 static inline unsigned int udp_skb_len(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->len; + return udp_skb_scratch(skb)->len; } static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary; + return udp_skb_scratch(skb)->csum_unnecessary; } static inline bool udp_skb_is_linear(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear; + return udp_skb_scratch(skb)->is_linear; } #else -- cgit v1.2.3 From 6b84202c946cd3da3a8daa92c682510e9ed80321 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 26 Jul 2017 16:24:59 +0800 Subject: sctp: fix the check for _sctp_walk_params and _sctp_walk_errors Commit b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()") tried to fix the issue that it may overstep the chunk end for _sctp_walk_{params, errors} with 'chunk_end > offset(length) + sizeof(length)'. But it introduced a side effect: When processing INIT, it verifies the chunks with 'param.v == chunk_end' after iterating all params by sctp_walk_params(). With the check 'chunk_end > offset(length) + sizeof(length)', it would return when the last param is not yet accessed. Because the last param usually is fwdtsn supported param whose size is 4 and 'chunk_end == offset(length) + sizeof(length)' This is a badly issue even causing sctp couldn't process 4-shakes. Client would always get abort when connecting to server, due to the failure of INIT chunk verification on server. The patch is to use 'chunk_end <= offset(length) + sizeof(length)' instead of 'chunk_end < offset(length) + sizeof(length)' for both _sctp_walk_params and _sctp_walk_errors. Fixes: b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()") Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 980807d7506f..45fd4c6056b5 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -469,7 +469,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ - (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ + (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\ (void *)chunk + end) &&\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\ @@ -481,7 +481,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(struct sctp_chunkhdr));\ - ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ + ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\ (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ -- cgit v1.2.3 From c9f2c1ae123a751d4e4f949144500219354d5ee1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 Jul 2017 14:45:09 +0200 Subject: udp6: fix socket leak on early demux When an early demuxed packet reaches __udp6_lib_lookup_skb(), the sk reference is retrieved and used, but the relevant reference count is leaked and the socket destructor is never called. Beyond leaking the sk memory, if there are pending UDP packets in the receive queue, even the related accounted memory is leaked. In the long run, this will cause persistent forward allocation errors and no UDP skbs (both ipv4 and ipv6) will be able to reach the user-space. Fix this by explicitly accessing the early demux reference before the lookup, and properly decreasing the socket reference count after usage. Also drop the skb_steal_sock() in __udp6_lib_lookup_skb(), and the now obsoleted comment about "socket cache". The newly added code is derived from the current ipv4 code for the similar path. v1 -> v2: fixed the __udp6_lib_rcv() return code for resubmission, as suggested by Eric Reported-by: Sam Edwards Reported-by: Marc Haber Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast") Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/udp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index 56ce2d2a612d..cc8036987dcb 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -260,6 +260,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, } void udp_v4_early_demux(struct sk_buff *skb); +void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); -- cgit v1.2.3 From 64c83d837329531252a1a0f0dfdd4fd607e1d8e9 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 30 Jul 2017 13:24:49 -0400 Subject: net netlink: Add new type NLA_BITFIELD32 Generic bitflags attribute content sent to the kernel by user. With this netlink attr type the user can either set or unset a flag in the kernel. The value is a bitmap that defines the bit values being set The selector is a bitmask that defines which value bit is to be considered. A check is made to ensure the rules that a kernel subsystem always conforms to bitflags the kernel already knows about. i.e if the user tries to set a bit flag that is not understood then the _it will be rejected_. In the most basic form, the user specifies the attribute policy as: [ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags }, where myvalidflags is the bit mask of the flags the kernel understands. If the user _does not_ provide myvalidflags then the attribute will also be rejected. Examples: value = 0x0, and selector = 0x1 implies we are selecting bit 1 and we want to set its value to 0. value = 0x2, and selector = 0x2 implies we are selecting bit 2 and we want to set its value to 1. Suggested-by: Jiri Pirko Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/netlink.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index ef8e6c3a80a6..82dd298b40c7 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -178,6 +178,7 @@ enum { NLA_S16, NLA_S32, NLA_S64, + NLA_BITFIELD32, __NLA_TYPE_MAX, }; @@ -206,6 +207,7 @@ enum { * NLA_MSECS Leaving the length field zero will verify the * given type fits, using it verifies minimum length * just like "All other" + * NLA_BITFIELD32 A 32-bit bitmap/bitselector attribute * All other Minimum length of attribute payload * * Example: @@ -213,11 +215,13 @@ enum { * [ATTR_FOO] = { .type = NLA_U16 }, * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ }, * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, + * [ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags }, * }; */ struct nla_policy { u16 type; u16 len; + void *validation_data; }; /** @@ -1202,6 +1206,18 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla) return tmp; } +/** + * nla_get_bitfield32 - return payload of 32 bitfield attribute + * @nla: nla_bitfield32 attribute + */ +static inline struct nla_bitfield32 nla_get_bitfield32(const struct nlattr *nla) +{ + struct nla_bitfield32 tmp; + + nla_memcpy(&tmp, nla, sizeof(tmp)); + return tmp; +} + /** * nla_memdup - duplicate attribute memory (kmemdup) * @src: netlink attribute to duplicate from -- cgit v1.2.3 From ac7b848390036dadd4351899d2a23748075916bd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jul 2017 00:02:31 +0200 Subject: netfilter: expect: add and use nf_ct_expect_iterate helpers We have several spots that open-code a expect walk, add a helper that is similar to nf_ct_iterate_destroy/nf_ct_iterate_cleanup. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_expect.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 2ba54feaccd8..818def011110 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -107,6 +107,11 @@ void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); bool nf_ct_remove_expect(struct nf_conntrack_expect *exp); +void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data), void *data); +void nf_ct_expect_iterate_net(struct net *net, + bool (*iter)(struct nf_conntrack_expect *e, void *data), + void *data, u32 portid, int report); + /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me); -- cgit v1.2.3 From 84657984c26fd0b64743a397f3a1a587fa4b575a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jul 2017 00:02:32 +0200 Subject: netfilter: add and use nf_ct_unconfirmed_destroy This also removes __nf_ct_unconfirmed_destroy() call from nf_ct_iterate_cleanup_net, so that function can be used only when missing conntracks from unconfirmed list isn't a problem. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 48407569585d..6e6f678aaac7 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -224,6 +224,9 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); +/* Set all unconfirmed conntrack as dying */ +void nf_ct_unconfirmed_destroy(struct net *); + /* Iterate over all conntracks: if iter returns true, it's deleted. */ void nf_ct_iterate_cleanup_net(struct net *net, int (*iter)(struct nf_conn *i, void *data), -- cgit v1.2.3 From 2cf0c8b3e6942ecafe6ebb1a6d0328a81641bf39 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:40 +0200 Subject: netlink: Introduce nla_strdup() This is similar to strdup() for netlink string attributes. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index ef8e6c3a80a6..c8c2eb5ae55e 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -247,6 +247,7 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); +char *nla_strdup(const struct nlattr *nla, gfp_t flags); int nla_memcpy(void *dest, const struct nlattr *src, int count); int nla_memcmp(const struct nlattr *nla, const void *data, size_t size); int nla_strcmp(const struct nlattr *nla, const char *str); -- cgit v1.2.3 From e46abbcc05aa8a16b0e7f5c94e86d11af9aa2770 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:41 +0200 Subject: netfilter: nf_tables: Allow table names of up to 255 chars Allocate all table names dynamically to allow for arbitrary lengths but introduce NFT_NAME_MAXLEN as an upper sanity boundary. It's value was chosen to allow using a domain name as per RFC 1035. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index bd5be0d691d5..05ecf78ec078 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -957,7 +957,7 @@ struct nft_table { u32 use; u16 flags:14, genmask:2; - char name[NFT_TABLE_MAXNAMELEN]; + char *name; }; enum nft_af_flags { -- cgit v1.2.3 From b7263e071aba736cea9e71cdf2e76dfa7aebd039 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:42 +0200 Subject: netfilter: nf_tables: Allow chain name of up to 255 chars Same conversion as for table names, use NFT_NAME_MAXLEN as upper boundary as well. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 05ecf78ec078..be1610162ee0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -859,7 +859,7 @@ struct nft_chain { u16 level; u8 flags:6, genmask:2; - char name[NFT_CHAIN_MAXNAMELEN]; + char *name; }; enum nft_chain_type { @@ -1272,7 +1272,7 @@ struct nft_trans_set { struct nft_trans_chain { bool update; - char name[NFT_CHAIN_MAXNAMELEN]; + char *name; struct nft_stats __percpu *stats; u8 policy; }; -- cgit v1.2.3 From 387454901bd62022ac1b04e15bd8d4fcc60bbed4 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:43 +0200 Subject: netfilter: nf_tables: Allow set names of up to 255 chars Same conversion as for table names, use NFT_NAME_MAXLEN as upper boundary as well. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index be1610162ee0..66ba62fa7d90 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -396,7 +396,7 @@ void nft_unregister_set(struct nft_set_type *type); struct nft_set { struct list_head list; struct list_head bindings; - char name[NFT_SET_MAXNAMELEN]; + char *name; u32 ktype; u32 dtype; u32 objtype; -- cgit v1.2.3 From 615095752100748e221028fc96163c2b78185ae4 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:44 +0200 Subject: netfilter: nf_tables: Allow object names of up to 255 chars Same conversion as for table names, use NFT_NAME_MAXLEN as upper boundary as well. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 66ba62fa7d90..f9795fe394f3 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1016,7 +1016,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type, */ struct nft_object { struct list_head list; - char name[NFT_OBJ_MAXNAMELEN]; + char *name; struct nft_table *table; u32 genmask:2, use:30; -- cgit v1.2.3 From 4d3a57f23dec59f0a2362e63540b2d01b37afe0a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Jul 2017 11:22:04 +0200 Subject: netfilter: conntrack: do not enable connection tracking unless needed Discussion during NFWS 2017 in Faro has shown that the current conntrack behaviour is unreasonable. Even if conntrack module is loaded on behalf of a single net namespace, its turned on for all namespaces, which is expensive. Commit 481fa373476 ("netfilter: conntrack: add nf_conntrack_default_on sysctl") attempted to provide an alternative to the 'default on' behaviour by adding a sysctl to change it. However, as Eric points out, the sysctl only becomes available once the module is loaded, and then its too late. So we either have to move the sysctl to the core, or, alternatively, change conntrack to become active only once the rule set requires this. This does the latter, conntrack is only enabled when a rule needs it. Reported-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 6d14b36e3a49..1b8de164d744 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -73,21 +73,6 @@ struct nf_conntrack_l3proto { extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; -#ifdef CONFIG_SYSCTL -/* Protocol pernet registration. */ -int nf_ct_l3proto_pernet_register(struct net *net, - struct nf_conntrack_l3proto *proto); -#else -static inline int nf_ct_l3proto_pernet_register(struct net *n, - struct nf_conntrack_l3proto *p) -{ - return 0; -} -#endif - -void nf_ct_l3proto_pernet_unregister(struct net *net, - struct nf_conntrack_l3proto *proto); - /* Protocol global registration. */ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto); void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto); -- cgit v1.2.3 From e7942d0633c47c791ece6afa038be9cf977226de Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:18 +0200 Subject: tcp: remove prequeue support prequeue is a tcp receive optimization that moves part of rx processing from bh to process context. This only works if the socket being processed belongs to a process that is blocked in recv on that socket. In practice, this doesn't happen anymore that often because nowadays servers tend to use an event driven (epoll) model. Even normal client applications (web browsers) commonly use many tcp connections in parallel. This has measureable impact only in netperf (which uses plain recv and thus allows prequeue use) from host to locally running vm (~4%), however, there were no changes when using netperf between two physical hosts with ixgbe interfaces. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 12d68335acd4..93f115cfc8f8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1244,17 +1244,6 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) __tcp_checksum_complete(skb); } -/* Prequeue for VJ style copy to user, combined with checksumming. */ - -static inline void tcp_prequeue_init(struct tcp_sock *tp) -{ - tp->ucopy.task = NULL; - tp->ucopy.len = 0; - tp->ucopy.memory = 0; - skb_queue_head_init(&tp->ucopy.prequeue); -} - -bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); int tcp_filter(struct sock *sk, struct sk_buff *skb); -- cgit v1.2.3 From b6690b14386698ce2c19309abad3f17656bdfaea Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:20 +0200 Subject: tcp: remove low_latency sysctl Was only checked by the removed prequeue code. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 93f115cfc8f8..8507c81fb0e9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -256,7 +256,6 @@ extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_frto; -extern int sysctl_tcp_low_latency; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; -- cgit v1.2.3 From 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:21 +0200 Subject: tcp: remove header prediction Like prequeue, I am not sure this is overly useful nowadays. If we receive a train of packets, GRO will aggregate them if the headers are the same (HP predates GRO by several years) so we don't get a per-packet benefit, only a per-aggregated-packet one. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8507c81fb0e9..8f11b82b5b5a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -631,29 +631,6 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } -static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) -{ - tp->pred_flags = htonl((tp->tcp_header_len << 26) | - ntohl(TCP_FLAG_ACK) | - snd_wnd); -} - -static inline void tcp_fast_path_on(struct tcp_sock *tp) -{ - __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); -} - -static inline void tcp_fast_path_check(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && - tp->rcv_wnd && - atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && - !tp->urg_data) - tcp_fast_path_on(tp); -} - /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(struct sock *sk) { -- cgit v1.2.3 From 573aeb0492be3d0e5be9796a0c91abde794c1e36 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:22 +0200 Subject: tcp: remove CA_ACK_SLOWPATH re-indent tcp_ack, and remove CA_ACK_SLOWPATH; it is always set now. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8f11b82b5b5a..3ecb62811004 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -880,9 +880,8 @@ enum tcp_ca_event { /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ enum tcp_ca_ack_event_flags { - CA_ACK_SLOWPATH = (1 << 0), /* In slow path processing */ - CA_ACK_WIN_UPDATE = (1 << 1), /* ACK updated window */ - CA_ACK_ECE = (1 << 2), /* ECE bit is set on ack */ + CA_ACK_WIN_UPDATE = (1 << 0), /* ACK updated window */ + CA_ACK_ECE = (1 << 1), /* ECE bit is set on ack */ }; /* -- cgit v1.2.3 From 306b13eb3cf9515a8214bbf5d69d811371d05792 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 28 Jul 2017 16:22:41 -0700 Subject: proto_ops: Add locked held versions of sendmsg and sendpage Add new proto_ops sendmsg_locked and sendpage_locked that can be called when the socket lock is already held. Correspondingly, add kernel_sendmsg_locked and kernel_sendpage_locked as front end functions. These functions will be used in zero proxy so that we can take the socket lock in a ULP sendmsg/sendpage and then directly call the backend transport proto_ops functions. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/sock.h | 3 +++ include/net/tcp.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 7c0632c7e870..393c38e9f6aa 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1582,11 +1582,14 @@ int sock_no_shutdown(struct socket *, int); int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *); int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int); int sock_no_sendmsg(struct socket *, struct msghdr *, size_t); +int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len); int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int); int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); +ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page, + int offset, size_t size, int flags); /* * Functions to fill in entries in struct proto_ops when a protocol diff --git a/include/net/tcp.h b/include/net/tcp.h index 3ecb62811004..bb1881b4ce48 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -350,8 +350,11 @@ int tcp_v4_rcv(struct sk_buff *skb); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); +int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, + size_t size, int flags); ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tcp_release_cb(struct sock *sk); -- cgit v1.2.3 From bbb03029a899679d73e62d7e6ae80348cc5d0054 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 28 Jul 2017 16:22:43 -0700 Subject: strparser: Generalize strparser Generalize strparser from more than just being used in conjunction with read_sock. strparser will also be used in the send path with zero proxy. The primary change is to create strp_process function that performs the critical processing on skbs. The documentation is also updated to reflect the new uses. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/strparser.h | 119 +++++++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 57 deletions(-) (limited to 'include/net') diff --git a/include/net/strparser.h b/include/net/strparser.h index 0c28ad97c52f..4fe966a0ad92 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -18,26 +18,26 @@ #define STRP_STATS_INCR(stat) ((stat)++) struct strp_stats { - unsigned long long rx_msgs; - unsigned long long rx_bytes; - unsigned int rx_mem_fail; - unsigned int rx_need_more_hdr; - unsigned int rx_msg_too_big; - unsigned int rx_msg_timeouts; - unsigned int rx_bad_hdr_len; + unsigned long long msgs; + unsigned long long bytes; + unsigned int mem_fail; + unsigned int need_more_hdr; + unsigned int msg_too_big; + unsigned int msg_timeouts; + unsigned int bad_hdr_len; }; struct strp_aggr_stats { - unsigned long long rx_msgs; - unsigned long long rx_bytes; - unsigned int rx_mem_fail; - unsigned int rx_need_more_hdr; - unsigned int rx_msg_too_big; - unsigned int rx_msg_timeouts; - unsigned int rx_bad_hdr_len; - unsigned int rx_aborts; - unsigned int rx_interrupted; - unsigned int rx_unrecov_intr; + unsigned long long msgs; + unsigned long long bytes; + unsigned int mem_fail; + unsigned int need_more_hdr; + unsigned int msg_too_big; + unsigned int msg_timeouts; + unsigned int bad_hdr_len; + unsigned int aborts; + unsigned int interrupted; + unsigned int unrecov_intr; }; struct strparser; @@ -48,16 +48,18 @@ struct strp_callbacks { void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb); int (*read_sock_done)(struct strparser *strp, int err); void (*abort_parser)(struct strparser *strp, int err); + void (*lock)(struct strparser *strp); + void (*unlock)(struct strparser *strp); }; -struct strp_rx_msg { +struct strp_msg { int full_len; int offset; }; -static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb) +static inline struct strp_msg *strp_msg(struct sk_buff *skb) { - return (struct strp_rx_msg *)((void *)skb->cb + + return (struct strp_msg *)((void *)skb->cb + offsetof(struct qdisc_skb_cb, data)); } @@ -65,18 +67,18 @@ static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb) struct strparser { struct sock *sk; - u32 rx_stopped : 1; - u32 rx_paused : 1; - u32 rx_aborted : 1; - u32 rx_interrupted : 1; - u32 rx_unrecov_intr : 1; - - struct sk_buff **rx_skb_nextp; - struct timer_list rx_msg_timer; - struct sk_buff *rx_skb_head; - unsigned int rx_need_bytes; - struct delayed_work rx_delayed_work; - struct work_struct rx_work; + u32 stopped : 1; + u32 paused : 1; + u32 aborted : 1; + u32 interrupted : 1; + u32 unrecov_intr : 1; + + struct sk_buff **skb_nextp; + struct timer_list msg_timer; + struct sk_buff *skb_head; + unsigned int need_bytes; + struct delayed_work delayed_work; + struct work_struct work; struct strp_stats stats; struct strp_callbacks cb; }; @@ -84,7 +86,7 @@ struct strparser { /* Must be called with lock held for attached socket */ static inline void strp_pause(struct strparser *strp) { - strp->rx_paused = 1; + strp->paused = 1; } /* May be called without holding lock for attached socket */ @@ -97,37 +99,37 @@ static inline void save_strp_stats(struct strparser *strp, #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += \ strp->stats._stat) - SAVE_PSOCK_STATS(rx_msgs); - SAVE_PSOCK_STATS(rx_bytes); - SAVE_PSOCK_STATS(rx_mem_fail); - SAVE_PSOCK_STATS(rx_need_more_hdr); - SAVE_PSOCK_STATS(rx_msg_too_big); - SAVE_PSOCK_STATS(rx_msg_timeouts); - SAVE_PSOCK_STATS(rx_bad_hdr_len); + SAVE_PSOCK_STATS(msgs); + SAVE_PSOCK_STATS(bytes); + SAVE_PSOCK_STATS(mem_fail); + SAVE_PSOCK_STATS(need_more_hdr); + SAVE_PSOCK_STATS(msg_too_big); + SAVE_PSOCK_STATS(msg_timeouts); + SAVE_PSOCK_STATS(bad_hdr_len); #undef SAVE_PSOCK_STATS - if (strp->rx_aborted) - agg_stats->rx_aborts++; - if (strp->rx_interrupted) - agg_stats->rx_interrupted++; - if (strp->rx_unrecov_intr) - agg_stats->rx_unrecov_intr++; + if (strp->aborted) + agg_stats->aborts++; + if (strp->interrupted) + agg_stats->interrupted++; + if (strp->unrecov_intr) + agg_stats->unrecov_intr++; } static inline void aggregate_strp_stats(struct strp_aggr_stats *stats, struct strp_aggr_stats *agg_stats) { #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat) - SAVE_PSOCK_STATS(rx_msgs); - SAVE_PSOCK_STATS(rx_bytes); - SAVE_PSOCK_STATS(rx_mem_fail); - SAVE_PSOCK_STATS(rx_need_more_hdr); - SAVE_PSOCK_STATS(rx_msg_too_big); - SAVE_PSOCK_STATS(rx_msg_timeouts); - SAVE_PSOCK_STATS(rx_bad_hdr_len); - SAVE_PSOCK_STATS(rx_aborts); - SAVE_PSOCK_STATS(rx_interrupted); - SAVE_PSOCK_STATS(rx_unrecov_intr); + SAVE_PSOCK_STATS(msgs); + SAVE_PSOCK_STATS(bytes); + SAVE_PSOCK_STATS(mem_fail); + SAVE_PSOCK_STATS(need_more_hdr); + SAVE_PSOCK_STATS(msg_too_big); + SAVE_PSOCK_STATS(msg_timeouts); + SAVE_PSOCK_STATS(bad_hdr_len); + SAVE_PSOCK_STATS(aborts); + SAVE_PSOCK_STATS(interrupted); + SAVE_PSOCK_STATS(unrecov_intr); #undef SAVE_PSOCK_STATS } @@ -135,8 +137,11 @@ static inline void aggregate_strp_stats(struct strp_aggr_stats *stats, void strp_done(struct strparser *strp); void strp_stop(struct strparser *strp); void strp_check_rcv(struct strparser *strp); -int strp_init(struct strparser *strp, struct sock *csk, +int strp_init(struct strparser *strp, struct sock *sk, struct strp_callbacks *cb); void strp_data_ready(struct strparser *strp); +int strp_process(struct strparser *strp, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len, + size_t max_msg_size, long timeo); #endif /* __NET_STRPARSER_H_ */ -- cgit v1.2.3 From 46587e4a312780a63132483bc8678c397eca6aff Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 1 Aug 2017 16:32:39 -0400 Subject: net: dsa: remove PHY device argument from .set_eee The DSA switch operations for EEE are only meant to configure a port's MAC EEE settings. The port's PHY EEE settings are accessed by the DSA layer and must be made available via a proper PHY driver. In order to reduce this confusion, remove the phy_device argument from the .set_eee operation. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 88da272d20d0..ce46db323394 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -335,7 +335,6 @@ struct dsa_switch_ops { * EEE setttings */ int (*set_eee)(struct dsa_switch *ds, int port, - struct phy_device *phydev, struct ethtool_eee *e); int (*get_eee)(struct dsa_switch *ds, int port, struct ethtool_eee *e); -- cgit v1.2.3 From 08f500610f39809c107f206cba1f799c98c38054 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 1 Aug 2017 16:32:41 -0400 Subject: net: dsa: rename switch EEE ops To avoid confusion with the PHY EEE settings, rename the .set_eee and .get_eee ops to respectively .set_mac_eee and .get_mac_eee. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index ce46db323394..0b1a0622b33c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -332,12 +332,12 @@ struct dsa_switch_ops { struct phy_device *phy); /* - * EEE setttings + * Port's MAC EEE settings */ - int (*set_eee)(struct dsa_switch *ds, int port, - struct ethtool_eee *e); - int (*get_eee)(struct dsa_switch *ds, int port, - struct ethtool_eee *e); + int (*set_mac_eee)(struct dsa_switch *ds, int port, + struct ethtool_eee *e); + int (*get_mac_eee)(struct dsa_switch *ds, int port, + struct ethtool_eee *e); /* EEPROM access */ int (*get_eeprom_len)(struct dsa_switch *ds); -- cgit v1.2.3 From ffdb5211da1c20354f1b40c204b6cf6c29c68161 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Tue, 1 Aug 2017 12:49:08 +0300 Subject: xfrm: Auto-load xfrm offload modules IPSec crypto offload depends on the protocol-specific offload module (such as esp_offload.ko). When the user installs an SA with crypto-offload, load the offload module automatically, in the same way that the protocol module is loaded (such as esp.ko) Signed-off-by: Ilan Tayari Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index afb4929d7232..5a360100136c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -43,6 +43,8 @@ MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap)) #define MODULE_ALIAS_XFRM_TYPE(family, proto) \ MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto)) +#define MODULE_ALIAS_XFRM_OFFLOAD_TYPE(family, proto) \ + MODULE_ALIAS("xfrm-offload-" __stringify(family) "-" __stringify(proto)) #ifdef CONFIG_XFRM_STATISTICS #define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field) @@ -1558,7 +1560,7 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x); int xfrm_state_mtu(struct xfrm_state *x, int mtu); -int __xfrm_init_state(struct xfrm_state *x, bool init_replay); +int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); -- cgit v1.2.3 From f70f250a77313b542531e1ff7a449cd0ccd83ec0 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 1 Aug 2017 12:49:10 +0300 Subject: net: Allow IPsec GSO for local sockets This patch allows local sockets to make use of XFRM GSO code path. Signed-off-by: Steffen Klassert Signed-off-by: Ilan Tayari --- include/net/xfrm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5a360100136c..18d7de34a5c3 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1858,6 +1858,20 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); +static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) +{ + struct xfrm_state *x = dst->xfrm; + + if (!x || !x->type_offload) + return false; + + if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) && + !dst->child->xfrm) + return true; + + return false; +} + static inline void xfrm_dev_state_delete(struct xfrm_state *x) { struct xfrm_state_offload *xso = &x->xso; @@ -1900,6 +1914,11 @@ static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x { return false; } + +static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) +{ + return false; +} #endif static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) -- cgit v1.2.3 From 2a04aabf5c96c9e25df488949b21223bcc623815 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 1 Aug 2017 12:25:01 +0200 Subject: netfilter: constify nf_conntrack_l3/4proto parameters When a nf_conntrack_l3/4proto parameter is not on the left hand side of an assignment, its address is not taken, and it is not passed to a function that may modify its fields, then it can be declared as const. This change is useful from a documentation point of view, and can possibly facilitate making some nf_conntrack_l3/4proto structures const subsequently. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 14 +++++++------- include/net/netfilter/nf_conntrack_timeout.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7032e044bbe2..b6e27cafb1d9 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -125,23 +125,23 @@ struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto); -void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); +void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); /* Protocol pernet registration. */ int nf_ct_l4proto_pernet_register_one(struct net *net, - struct nf_conntrack_l4proto *proto); + const struct nf_conntrack_l4proto *proto); void nf_ct_l4proto_pernet_unregister_one(struct net *net, - struct nf_conntrack_l4proto *proto); + const struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *proto[], + struct nf_conntrack_l4proto *const proto[], unsigned int num_proto); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *proto[], - unsigned int num_proto); + struct nf_conntrack_l4proto *const proto[], + unsigned int num_proto); /* Protocol global registration. */ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto); +void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[], unsigned int num_proto); void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[], diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index d40b89355fdd..b222957062b5 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -68,7 +68,7 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct, static inline unsigned int * nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_conn_timeout *timeout_ext; -- cgit v1.2.3 From 2202e35d47fff379fc744e6bd3c111b018cc77df Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 2 Aug 2017 09:56:06 +0200 Subject: ipv4: fib: Remove unused functions Previous patches converted users of these functions to provide offload indication using the nexthop's flags instead of the FIB info's. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 41d580c6185f..ef8992d49bc3 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -124,7 +124,6 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; #endif - unsigned int fib_offload_cnt; struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev @@ -177,18 +176,6 @@ struct fib_result_nl { __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); -static inline void fib_info_offload_inc(struct fib_info *fi) -{ - fi->fib_offload_cnt++; - fi->fib_flags |= RTNH_F_OFFLOAD; -} - -static inline void fib_info_offload_dec(struct fib_info *fi) -{ - if (--fi->fib_offload_cnt == 0) - fi->fib_flags &= ~RTNH_F_OFFLOAD; -} - #define FIB_RES_SADDR(net, res) \ ((FIB_RES_NH(res).nh_saddr_genid == \ atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ -- cgit v1.2.3 From d8238d9dab8fbea22dd04f4e77639c7f7b83eef7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:11 +0800 Subject: sctp: remove the typedef sctp_errhdr_t This patch is to remove the typedef sctp_errhdr_t, and replace with struct sctp_errhdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 45fd4c6056b5..84650fed1e6a 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -479,13 +479,13 @@ for (pos.v = chunk->member;\ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ -for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ +for (err = (struct sctp_errhdr *)((void *)chunk_hdr + \ sizeof(struct sctp_chunkhdr));\ - ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\ + ((void *)err + offsetof(struct sctp_errhdr, length) + sizeof(err->length) <=\ (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ - ntohs(err->length) >= sizeof(sctp_errhdr_t); \ - err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) + ntohs(err->length) >= sizeof(struct sctp_errhdr); \ + err = (struct sctp_errhdr *)((void *)err + SCTP_PAD4(ntohs(err->length)))) #define sctp_walk_fwdtsn(pos, chunk)\ _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk)) -- cgit v1.2.3 From 04b1d4e50e82536c12da00ee04a77510c459c844 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:11 +0200 Subject: net: core: Make the FIB notification chain generic The FIB notification chain is currently soley used by IPv4 code. However, we're going to introduce IPv6 FIB offload support, which requires these notification as well. As explained in commit c3852ef7f2f8 ("ipv4: fib: Replay events when registering FIB notifier"), upon registration to the chain, the callee receives a full dump of the FIB tables and rules by traversing all the net namespaces. The integrity of the dump is ensured by a per-namespace sequence counter that is incremented whenever a change to the tables or rules occurs. In order to allow more address families to use the chain, each family is expected to register its fib_notifier_ops in its pernet init. These operations allow the common code to read the family's sequence counter as well as dump its tables and rules in the given net namespace. Additionally, a 'family' parameter is added to sent notifications, so that listeners could distinguish between the different families. Implement the common code that allows listeners to register to the chain and for address families to register their fib_notifier_ops. Subsequent patches will implement these operations in IPv6. In the future, ipmr and ip6mr will be extended to provide these notifications as well. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/fib_notifier.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/net/ip_fib.h | 30 ++++++++---------------------- include/net/net_namespace.h | 1 + include/net/netns/ipv4.h | 1 + 4 files changed, 54 insertions(+), 22 deletions(-) create mode 100644 include/net/fib_notifier.h (limited to 'include/net') diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h new file mode 100644 index 000000000000..241475224f74 --- /dev/null +++ b/include/net/fib_notifier.h @@ -0,0 +1,44 @@ +#ifndef __NET_FIB_NOTIFIER_H +#define __NET_FIB_NOTIFIER_H + +#include +#include +#include + +struct fib_notifier_info { + struct net *net; + int family; +}; + +enum fib_event_type { + FIB_EVENT_ENTRY_REPLACE, + FIB_EVENT_ENTRY_APPEND, + FIB_EVENT_ENTRY_ADD, + FIB_EVENT_ENTRY_DEL, + FIB_EVENT_RULE_ADD, + FIB_EVENT_RULE_DEL, + FIB_EVENT_NH_ADD, + FIB_EVENT_NH_DEL, +}; + +struct fib_notifier_ops { + int family; + struct list_head list; + unsigned int (*fib_seq_read)(struct net *net); + int (*fib_dump)(struct net *net, struct notifier_block *nb); + struct rcu_head rcu; +}; + +int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info); +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)); +int unregister_fib_notifier(struct notifier_block *nb); +struct fib_notifier_ops * +fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net); +void fib_notifier_ops_unregister(struct fib_notifier_ops *ops); + +#endif diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ef8992d49bc3..c0295c3ec5f3 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -188,10 +189,6 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); #define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ FIB_RES_SADDR(net, res)) -struct fib_notifier_info { - struct net *net; -}; - struct fib_entry_notifier_info { struct fib_notifier_info info; /* must be first */ u32 dst; @@ -212,25 +209,14 @@ struct fib_nh_notifier_info { struct fib_nh *fib_nh; }; -enum fib_event_type { - FIB_EVENT_ENTRY_REPLACE, - FIB_EVENT_ENTRY_APPEND, - FIB_EVENT_ENTRY_ADD, - FIB_EVENT_ENTRY_DEL, - FIB_EVENT_RULE_ADD, - FIB_EVENT_RULE_DEL, - FIB_EVENT_NH_ADD, - FIB_EVENT_NH_DEL, -}; - -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)); -int unregister_fib_notifier(struct notifier_block *nb); -int call_fib_notifier(struct notifier_block *nb, struct net *net, - enum fib_event_type event_type, - struct fib_notifier_info *info); -int call_fib_notifiers(struct net *net, enum fib_event_type event_type, +int call_fib4_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, struct fib_notifier_info *info); +int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); + +int __net_init fib4_notifier_init(struct net *net); +void __net_exit fib4_notifier_exit(struct net *net); void fib_notify(struct net *net, struct notifier_block *nb); #ifdef CONFIG_IP_MULTIPLE_TABLES diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1c401bd4c2e0..57faa375eab9 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -88,6 +88,7 @@ struct net { /* core fib_rules */ struct list_head rules_ops; + struct list_head fib_notifier_ops; /* protected by net_mutex */ struct net_device *loopback_dev; /* The loopback */ struct netns_core core; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9a14a0850b0e..20d061c805e3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -159,6 +159,7 @@ struct netns_ipv4 { int sysctl_fib_multipath_hash_policy; #endif + struct fib_notifier_ops *notifier_ops; unsigned int fib_seq; /* protected by rtnl_mutex */ atomic_t rt_genid; -- cgit v1.2.3 From 1b2a4440858857f2f93bb2ec5bb3a60f4fcc25be Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:14 +0200 Subject: net: fib_rules: Implement notification logic in core Unlike the routing tables, the FIB rules share a common core, so instead of replicating the same logic for each address family we can simply dump the rules and send notifications from the core itself. To protect the integrity of the dump, a rules-specific sequence counter is added for each address family and incremented whenever a rule is added or deleted (under RTNL). Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/fib_rules.h | 9 +++++++++ include/net/ip_fib.h | 24 ++++++++++++------------ 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index c487bfa2f479..3d7f1cefc6f5 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -8,6 +8,7 @@ #include #include #include +#include struct fib_kuid_range { kuid_t start; @@ -57,6 +58,7 @@ struct fib_rules_ops { int addr_size; int unresolved_rules; int nr_goto_rules; + unsigned int fib_rules_seq; int (*action)(struct fib_rule *, struct flowi *, int, @@ -89,6 +91,11 @@ struct fib_rules_ops { struct rcu_head rcu; }; +struct fib_rule_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct fib_rule *rule; +}; + #define FRA_GENERIC_POLICY \ [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ @@ -143,6 +150,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); bool fib_rule_matchall(const struct fib_rule *rule); +int fib_rules_dump(struct net *net, struct notifier_block *nb, int family); +unsigned int fib_rules_seq_read(struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c0295c3ec5f3..1a7f7e424320 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -199,11 +199,6 @@ struct fib_entry_notifier_info { u32 tb_id; }; -struct fib_rule_notifier_info { - struct fib_notifier_info info; /* must be first */ - struct fib_rule *rule; -}; - struct fib_nh_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_nh *fib_nh; @@ -219,13 +214,6 @@ int __net_init fib4_notifier_init(struct net *net); void __net_exit fib4_notifier_exit(struct net *net); void fib_notify(struct net *net, struct notifier_block *nb); -#ifdef CONFIG_IP_MULTIPLE_TABLES -void fib_rules_notify(struct net *net, struct notifier_block *nb); -#else -static inline void fib_rules_notify(struct net *net, struct notifier_block *nb) -{ -} -#endif struct fib_table { struct hlist_node tb_hlist; @@ -298,6 +286,16 @@ static inline bool fib4_rule_default(const struct fib_rule *rule) return true; } +static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} + +static inline unsigned int fib4_rules_seq_read(struct net *net) +{ + return 0; +} + #else /* CONFIG_IP_MULTIPLE_TABLES */ int __net_init fib4_rules_init(struct net *net); void __net_exit fib4_rules_exit(struct net *net); @@ -343,6 +341,8 @@ out: } bool fib4_rule_default(const struct fib_rule *rule); +int fib4_rules_dump(struct net *net, struct notifier_block *nb); +unsigned int fib4_rules_seq_read(struct net *net); #endif /* CONFIG_IP_MULTIPLE_TABLES */ -- cgit v1.2.3 From e3ea973159d53559c5ae9a9dbc824da9aba6cac0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:15 +0200 Subject: ipv6: fib_rules: Check if rule is a default rule As explained in commit 3c71006d15fd ("ipv4: fib_rules: Check if rule is a default rule"), drivers supporting IPv6 FIB offload need to be able to sanitize the rules they don't support and potentially flush their tables. Add an IPv6 helper to check if a FIB rule is a default rule. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1a88008cc6f5..6000b0dc51ee 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -295,6 +295,7 @@ int ipv6_route_open(struct inode *inode, struct file *file); #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); +bool fib6_rule_default(const struct fib_rule *rule); #else static inline int fib6_rules_init(void) { @@ -304,5 +305,9 @@ static inline void fib6_rules_cleanup(void) { return ; } +static inline bool fib6_rule_default(const struct fib_rule *rule) +{ + return true; +} #endif #endif -- cgit v1.2.3 From 16ab6d7d4d8cc037bb4be12c2b849ac92787e1ff Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:16 +0200 Subject: ipv6: fib: Add FIB notifiers callbacks We're about to add IPv6 FIB offload support, so implement the necessary callbacks in IPv6 code, which will later allow us to add routes and rules notifications. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 11 +++++++++++ include/net/netns/ipv6.h | 1 + 2 files changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 6000b0dc51ee..be8ddf3253dc 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -16,10 +16,12 @@ #include #include #include +#include #include #include #include #include +#include #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 @@ -292,6 +294,15 @@ int fib6_init(void); int ipv6_route_open(struct inode *inode, struct file *file); +int call_fib6_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info); +int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); + +int __net_init fib6_notifier_init(struct net *net); +void __net_exit fib6_notifier_exit(struct net *net); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index de7745e2edcc..abdf3b40303b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -86,6 +86,7 @@ struct netns_ipv6 { atomic_t dev_addr_genid; atomic_t fib6_sernum; struct seg6_pernet_data *seg6_data; + struct fib_notifier_ops *notifier_ops; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) -- cgit v1.2.3 From df77fe4d9865c6354372876632bcbceeda84f6c8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:17 +0200 Subject: ipv6: fib: Add in-kernel notifications for route add / delete As with IPv4, allow listeners of the FIB notification chain to receive notifications whenever a route is added, replaced or deleted. This is done by placing calls to the FIB notification chain in the two lowest level functions that end up performing these operations - namely, fib6_add_rt2node() and fib6_del_route(). Unlike IPv4, APPEND notifications aren't sent as the kernel doesn't distinguish between "append" (NLM_F_CREATE|NLM_F_APPEND) and "prepend" (NLM_F_CREATE). If NLM_F_EXCL isn't set, duplicate routes are always added after the existing duplicate routes. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index be8ddf3253dc..e2b292b79e99 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -258,6 +258,11 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_table *, struct flowi6 *, int); +struct fib6_entry_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct rt6_info *rt; +}; + /* * exported functions */ -- cgit v1.2.3 From dcb18f762f6ac83a6dc9cdc26dd694dcc167beb7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:18 +0200 Subject: ipv6: fib_rules: Dump rules during registration to FIB chain Allow users of the FIB notification chain to receive a complete view of the IPv6 FIB rules upon registration to the chain. The integrity of the dump is ensured by a per-family sequence counter that is incremented (under RTNL) whenever a rule is added or deleted. All the sequence counters are read (under RTNL) and summed, prior and after the dump. In case the counters differ, then the dump is either restarted or the registration fails. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index e2b292b79e99..dbe5537809f5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -312,6 +312,8 @@ void __net_exit fib6_notifier_exit(struct net *net); int fib6_rules_init(void); void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); +int fib6_rules_dump(struct net *net, struct notifier_block *nb); +unsigned int fib6_rules_seq_read(struct net *net); #else static inline int fib6_rules_init(void) { @@ -325,5 +327,13 @@ static inline bool fib6_rule_default(const struct fib_rule *rule) { return true; } +static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} +static inline unsigned int fib6_rules_seq_read(struct net *net) +{ + return 0; +} #endif #endif -- cgit v1.2.3 From e1ee0a5ba35d999caef94d659b4cb842e63aeb68 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:19 +0200 Subject: ipv6: fib: Dump tables during registration to FIB chain Dump all the FIB tables in each net namespace upon registration to the FIB notification chain so that the callee will have a complete view of the tables. The integrity of the dump is ensured by a per-table sequence counter that is incremented (under write lock) whenever a route is added or deleted from the table. All the sequence counters are read (under each table's read lock) and summed, prior and after the dump. In case the counters differ, then the dump is either restarted or the registration fails. While it's possible for a table to be modified after its counter has been read, this isn't really a problem. In case it happened before it was read the second time, then the comparison at the end will fail. If it happened afterwards, then we're guaranteed to be notified about the change, as the notification block is registered prior to the second read. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index dbe5537809f5..0b3052157e6b 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -235,6 +235,7 @@ struct fib6_table { struct fib6_node tb6_root; struct inet_peer_base tb6_peers; unsigned int flags; + unsigned int fib_seq; #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -308,6 +309,9 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, int __net_init fib6_notifier_init(struct net *net); void __net_exit fib6_notifier_exit(struct net *net); +unsigned int fib6_tables_seq_read(struct net *net); +int fib6_tables_dump(struct net *net, struct notifier_block *nb); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); -- cgit v1.2.3 From a460aa83963b185a32a6377eb486b6e613ac8e38 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:25 +0200 Subject: ipv6: fib: Add helpers to hold / drop a reference on rt6_info Similar to commit 1c677b3d2828 ("ipv4: fib: Add fib_info_hold() helper") and commit b423cb10807b ("ipv4: fib: Export free_fib_info()") add an helper to hold a reference on rt6_info and export rt6_release() to drop it and potentially release the route. This is needed so that drivers capable of FIB offload could hold a reference on the route before queueing it for offload and drop it after the route has been programmed to the device's tables. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0b3052157e6b..1d790ea40ea7 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -187,6 +187,22 @@ static inline void ip6_rt_put(struct rt6_info *rt) dst_release(&rt->dst); } +void rt6_free_pcpu(struct rt6_info *non_pcpu_rt); + +static inline void rt6_hold(struct rt6_info *rt) +{ + atomic_inc(&rt->rt6i_ref); +} + +static inline void rt6_release(struct rt6_info *rt) +{ + if (atomic_dec_and_test(&rt->rt6i_ref)) { + rt6_free_pcpu(rt); + dst_dev_put(&rt->dst); + dst_release(&rt->dst); + } +} + enum fib6_walk_state { #ifdef CONFIG_IPV6_SUBTREES FWS_S, -- cgit v1.2.3 From e1a10ef7fa876f8510aaec36ea5c0cf34baba410 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 3 Aug 2017 09:19:52 -0400 Subject: tcp: introduce tcp_rto_delta_us() helper for xmit timer fix Pure refactor. This helper will be required in the xmit timer fix later in the patch series. (Because the TLP logic will want to make this calculation.) Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 70483296157f..ada65e767b28 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1916,6 +1916,16 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); +/* At how many usecs into the future should the RTO fire? */ +static inline s64 tcp_rto_delta_us(const struct sock *sk) +{ + const struct sk_buff *skb = tcp_write_queue_head(sk); + u32 rto = inet_csk(sk)->icsk_rto; + u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); + + return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; +} + /* * Save and compile IPv4 options, return a pointer to it */ -- cgit v1.2.3 From 98ba0bd5505dcbb90322a4be07bcfe6b8a18c73f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:37 -0400 Subject: sock: allocate skbs from optmem Add sock_omalloc and sock_ofree to be able to allocate control skbs, for instance for looping errors onto sk_error_queue. The transmit budget (sk_wmem_alloc) is involved in transmit skb shaping, most notably in TCP Small Queues. Using this budget for control packets would impact transmission. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 393c38e9f6aa..0f778d3c4300 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1531,6 +1531,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority); void __sock_wfree(struct sk_buff *skb); void sock_wfree(struct sk_buff *skb); +struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, + gfp_t priority); void skb_orphan_partial(struct sk_buff *skb); void sock_rfree(struct sk_buff *skb); void sock_efree(struct sk_buff *skb); -- cgit v1.2.3 From 52267790ef52d7513879238ca9fac22c1733e0e3 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:39 -0400 Subject: sock: add MSG_ZEROCOPY The kernel supports zerocopy sendmsg in virtio and tap. Expand the infrastructure to support other socket types. Introduce a completion notification channel over the socket error queue. Notifications are returned with ee_origin SO_EE_ORIGIN_ZEROCOPY. ee_errno is 0 to avoid blocking the send/recv path on receiving notifications. Add reference counting, to support the skb split, merge, resize and clone operations possible with SOCK_STREAM and other socket types. The patch does not yet modify any datapaths. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 0f778d3c4300..fe1a0bc25cd3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -294,6 +294,7 @@ struct sock_common { * @sk_stamp: time stamp of last packet received * @sk_tsflags: SO_TIMESTAMPING socket options * @sk_tskey: counter to disambiguate concurrent tstamp requests + * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals * @sk_user_data: RPC layer private data * @sk_frag: cached page frag @@ -462,6 +463,7 @@ struct sock { u16 sk_tsflags; u8 sk_shutdown; u32 sk_tskey; + atomic_t sk_zckey; struct socket *sk_socket; void *sk_user_data; #ifdef CONFIG_SECURITY -- cgit v1.2.3 From 4ebc1e3cfcd8778e2150bdb799b19e85348b8efa Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:28:57 +0200 Subject: net: sched: remove unneeded tcf_em_tree_change Since tcf_em_tree_validate could be always called on a newly created filter, there is no need for this change function. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 537d0a0ad4c4..f4462ec8b2f4 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -332,26 +332,6 @@ int tcf_em_tree_dump(struct sk_buff *, struct tcf_ematch_tree *, int); int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *, struct tcf_pkt_info *); -/** - * tcf_em_tree_change - replace ematch tree of a running classifier - * - * @tp: classifier kind handle - * @dst: destination ematch tree variable - * @src: source ematch tree (temporary tree from tcf_em_tree_validate) - * - * This functions replaces the ematch tree in @dst with the ematch - * tree in @src. The classifier in charge of the ematch tree may be - * running. - */ -static inline void tcf_em_tree_change(struct tcf_proto *tp, - struct tcf_ematch_tree *dst, - struct tcf_ematch_tree *src) -{ - tcf_tree_lock(tp); - memcpy(dst, src, sizeof(*dst)); - tcf_tree_unlock(tp); -} - /** * tcf_em_tree_match - evaulate an ematch tree * @@ -386,7 +366,6 @@ struct tcf_ematch_tree { #define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0) #define tcf_em_tree_destroy(t) do { (void)(t); } while(0) #define tcf_em_tree_dump(skb, t, tlv) (0) -#define tcf_em_tree_change(tp, dst, src) do { } while(0) #define tcf_em_tree_match(skb, t, info) ((void)(info), 1) #endif /* CONFIG_NET_EMATCH */ -- cgit v1.2.3 From 3bcc0cec818fa969fe555b44443347211ed787a3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:28:58 +0200 Subject: net: sched: change names of action number helpers to be aligned with the rest The rest of the helpers are named tcf_exts_*, so change the name of the action number helpers to be aligned. While at it, change to inline functions. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f4462ec8b2f4..7f2563636df0 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -199,17 +199,35 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, return 0; } +/** + * tcf_exts_has_actions - check if at least one action is present + * @exts: tc filter extensions handle + * + * Returns true if at least one action is present. + */ +static inline bool tcf_exts_has_actions(struct tcf_exts *exts) +{ #ifdef CONFIG_NET_CLS_ACT + return exts->nr_actions; +#else + return false; +#endif +} -#define tc_no_actions(_exts) ((_exts)->nr_actions == 0) -#define tc_single_action(_exts) ((_exts)->nr_actions == 1) - -#else /* CONFIG_NET_CLS_ACT */ - -#define tc_no_actions(_exts) true -#define tc_single_action(_exts) false - -#endif /* CONFIG_NET_CLS_ACT */ +/** + * tcf_exts_has_one_action - check if exactly one action is present + * @exts: tc filter extensions handle + * + * Returns true if exactly one action is present. + */ +static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + return exts->nr_actions == 1; +#else + return false; +#endif +} int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, -- cgit v1.2.3 From af69afc551eb9f9b1f2cc3295e2dfcdaa1dc948d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:28:59 +0200 Subject: net: sched: use tcf_exts_has_actions in tcf_exts_exec Use the tcf_exts_has_actions helper instead or directly testing exts->nr_actions in tcf_exts_exec. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 7f2563636df0..322a2823cc6a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -176,29 +176,6 @@ tcf_exts_stats_update(const struct tcf_exts *exts, #endif } -/** - * tcf_exts_exec - execute tc filter extensions - * @skb: socket buffer - * @exts: tc filter extensions handle - * @res: desired result - * - * Executes all configured extensions. Returns 0 on a normal execution, - * a negative number if the filter must be considered unmatched or - * a positive action code (TC_ACT_*) which must be returned to the - * underlying layer. - */ -static inline int -tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, - struct tcf_result *res) -{ -#ifdef CONFIG_NET_CLS_ACT - if (exts->nr_actions) - return tcf_action_exec(skb, exts->actions, exts->nr_actions, - res); -#endif - return 0; -} - /** * tcf_exts_has_actions - check if at least one action is present * @exts: tc filter extensions handle @@ -229,6 +206,29 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) #endif } +/** + * tcf_exts_exec - execute tc filter extensions + * @skb: socket buffer + * @exts: tc filter extensions handle + * @res: desired result + * + * Executes all configured extensions. Returns 0 on a normal execution, + * a negative number if the filter must be considered unmatched or + * a positive action code (TC_ACT_*) which must be returned to the + * underlying layer. + */ +static inline int +tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, + struct tcf_result *res) +{ +#ifdef CONFIG_NET_CLS_ACT + if (tcf_exts_has_actions(exts)) + return tcf_action_exec(skb, exts->actions, exts->nr_actions, + res); +#endif + return 0; +} + int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr); -- cgit v1.2.3 From 6fc6d06e5371507e68c6904a3423622b0e465b64 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:29:00 +0200 Subject: net: sched: remove redundant helpers tcf_exts_is_predicative and tcf_exts_is_available These two helpers are doing the same as tcf_exts_has_actions, so remove them and use tcf_exts_has_actions instead. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 322a2823cc6a..817badf733b5 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -113,36 +113,6 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) return 0; } -/** - * tcf_exts_is_predicative - check if a predicative extension is present - * @exts: tc filter extensions handle - * - * Returns 1 if a predicative extension is present, i.e. an extension which - * might cause further actions and thus overrule the regular tcf_result. - */ -static inline int -tcf_exts_is_predicative(struct tcf_exts *exts) -{ -#ifdef CONFIG_NET_CLS_ACT - return exts->nr_actions; -#else - return 0; -#endif -} - -/** - * tcf_exts_is_available - check if at least one extension is present - * @exts: tc filter extensions handle - * - * Returns 1 if at least one extension is present. - */ -static inline int -tcf_exts_is_available(struct tcf_exts *exts) -{ - /* All non-predicative extensions must be added here. */ - return tcf_exts_is_predicative(exts); -} - static inline void tcf_exts_to_list(const struct tcf_exts *exts, struct list_head *actions) { -- cgit v1.2.3 From af089e701adfb5898fee00a56ea4bb421edc308d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:29:01 +0200 Subject: net: sched: fix return value of tcf_exts_exec Return the defined TC_ACT_OK instead of 0. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 817badf733b5..61ce521688b2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -182,7 +182,7 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) * @exts: tc filter extensions handle * @res: desired result * - * Executes all configured extensions. Returns 0 on a normal execution, + * Executes all configured extensions. Returns TC_ACT_OK on a normal execution, * a negative number if the filter must be considered unmatched or * a positive action code (TC_ACT_*) which must be returned to the * underlying layer. @@ -196,7 +196,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, return tcf_action_exec(skb, exts->actions, exts->nr_actions, res); #endif - return 0; + return TC_ACT_OK; } int tcf_exts_validate(struct net *net, struct tcf_proto *tp, -- cgit v1.2.3 From ec1a9cca0e13391167567964fd04e61a39d6a4ae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:29:02 +0200 Subject: net: sched: remove check for number of actions in tcf_exts_exec Leave it to tcf_action_exec to return TC_ACT_OK in case there is no action present. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 61ce521688b2..b8959c9a190d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -192,9 +192,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, struct tcf_result *res) { #ifdef CONFIG_NET_CLS_ACT - if (tcf_exts_has_actions(exts)) - return tcf_action_exec(skb, exts->actions, exts->nr_actions, - res); + return tcf_action_exec(skb, exts->actions, exts->nr_actions, res); #endif return TC_ACT_OK; } -- cgit v1.2.3 From 9b0d4446b56904b59ae3809913b0ac760fa941a6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:29:15 +0200 Subject: net: sched: avoid atomic swap in tcf_exts_change tcf_exts_change is always called on newly created exts, which are not used on fastpath. Therefore, simple struct copy is enough. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index b8959c9a190d..e0c54f111467 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -201,8 +201,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr); void tcf_exts_destroy(struct tcf_exts *exts); -void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, - struct tcf_exts *src); +void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, -- cgit v1.2.3 From 91ed1e666a4ea2e260452a7d7d311ac5ae852cba Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 3 Aug 2017 18:07:06 +0200 Subject: ip/options: explicitly provide net ns to __ip_options_echo() __ip_options_echo() uses the current network namespace, and currently retrives it via skb->dst->dev. This commit adds an explicit 'net' argument to __ip_options_echo() and update all the call sites to provide it, usually via a simpler sock_net(). After this change, __ip_options_echo() no more needs to access skb->dst and we can drop a couple of hack to preserve such info in the rx path. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/ip.h | 9 +++++---- include/net/tcp.h | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 821cedcc8e73..9e59dcf1787a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -567,11 +567,12 @@ int ip_forward(struct sk_buff *skb); void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag); -int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, - const struct ip_options *sopt); -static inline int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) +int __ip_options_echo(struct net *net, struct ip_options *dopt, + struct sk_buff *skb, const struct ip_options *sopt); +static inline int ip_options_echo(struct net *net, struct ip_options *dopt, + struct sk_buff *skb) { - return __ip_options_echo(dopt, skb, &IPCB(skb)->opt); + return __ip_options_echo(net, dopt, skb, &IPCB(skb)->opt); } void ip_options_fragment(struct sk_buff *skb); diff --git a/include/net/tcp.h b/include/net/tcp.h index bb1881b4ce48..5173fecde495 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1885,7 +1885,8 @@ extern void tcp_rack_reo_timeout(struct sock *sk); /* * Save and compile IPv4 options, return a pointer to it */ -static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) +static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net, + struct sk_buff *skb) { const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; struct ip_options_rcu *dopt = NULL; @@ -1894,7 +1895,7 @@ static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) int opt_size = sizeof(*dopt) + opt->optlen; dopt = kmalloc(opt_size, GFP_ATOMIC); - if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) { + if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) { kfree(dopt); dopt = NULL; } -- cgit v1.2.3 From 233e7936c84b7d7dd90c10e2ba27abb5ab42956f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:51 +0800 Subject: sctp: remove the typedef sctp_lower_cwnd_t This patch is to remove the typedef sctp_lower_cwnd_t, and replace with enum sctp_lower_cwnd in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/structs.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 9b18044c551e..761064ee3ac5 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -378,12 +378,12 @@ typedef enum { } sctp_retransmit_reason_t; /* Reasons to lower cwnd. */ -typedef enum { +enum sctp_lower_cwnd { SCTP_LOWER_CWND_T3_RTX, SCTP_LOWER_CWND_FAST_RTX, SCTP_LOWER_CWND_ECNE, SCTP_LOWER_CWND_INACTIVE, -} sctp_lower_cwnd_t; +}; /* SCTP-AUTH Necessary constants */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 66cd7639b912..53802d8872d7 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -950,7 +950,8 @@ int sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32); -void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t); +void sctp_transport_lower_cwnd(struct sctp_transport *t, + enum sctp_lower_cwnd reason); void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); -- cgit v1.2.3 From 125c29820252bfa5bf8081e75618e4ee7e9487da Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:52 +0800 Subject: sctp: remove the typedef sctp_retransmit_reason_t This patch is to remove the typedef sctp_retransmit_reason_t, and replace with enum sctp_retransmit_reason in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/structs.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 761064ee3ac5..922fba5880d6 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -370,12 +370,12 @@ typedef enum { peer */ /* Reasons to retransmit. */ -typedef enum { +enum sctp_retransmit_reason { SCTP_RTXR_T3_RTX, SCTP_RTXR_FAST_RTX, SCTP_RTXR_PMTUD, SCTP_RTXR_T1_RTX, -} sctp_retransmit_reason_t; +}; /* Reasons to lower cwnd. */ enum sctp_lower_cwnd { diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 53802d8872d7..5e872acee6e3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1054,8 +1054,8 @@ int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *); int sctp_outq_is_empty(const struct sctp_outq *); void sctp_outq_restart(struct sctp_outq *); -void sctp_retransmit(struct sctp_outq *, struct sctp_transport *, - sctp_retransmit_reason_t); +void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, + enum sctp_retransmit_reason reason); void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); void sctp_prsctp_prune(struct sctp_association *asoc, -- cgit v1.2.3 From 701ef3e6c74be771a76be39817941e68e7228644 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:53 +0800 Subject: sctp: remove the typedef sctp_scope_policy_t This patch is to remove the typedef sctp_scope_policy_t and keep it's members as an anonymous enum. It is also to define SCTP_SCOPE_POLICY_MAX to replace the num 3 in sysctl.c to make codes clear. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 922fba5880d6..acb03eb64842 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -341,12 +341,14 @@ typedef enum { SCTP_SCOPE_UNUSABLE, /* IPv4 unusable addresses */ } sctp_scope_t; -typedef enum { +enum { SCTP_SCOPE_POLICY_DISABLE, /* Disable IPv4 address scoping */ SCTP_SCOPE_POLICY_ENABLE, /* Enable IPv4 address scoping */ SCTP_SCOPE_POLICY_PRIVATE, /* Follow draft but allow IPv4 private addresses */ SCTP_SCOPE_POLICY_LINK, /* Follow draft but allow IPv4 link local addresses */ -} sctp_scope_policy_t; +}; + +#define SCTP_SCOPE_POLICY_MAX SCTP_SCOPE_POLICY_LINK /* Based on IPv4 scoping , * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24, -- cgit v1.2.3 From 1c662018d2d41ecc5550cbd74d29d2d32c164ed3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:54 +0800 Subject: sctp: remove the typedef sctp_scope_t This patch is to remove the typedef sctp_scope_t, and replace with enum sctp_scope in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/sctp.h | 4 ++-- include/net/sctp/structs.h | 17 +++++++++-------- 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index acb03eb64842..0503bb70e5d9 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -333,13 +333,13 @@ typedef enum { * At this point, the IPv6 scopes will be mapped to these internal scopes * as much as possible. */ -typedef enum { +enum sctp_scope { SCTP_SCOPE_GLOBAL, /* IPv4 global addresses */ SCTP_SCOPE_PRIVATE, /* IPv4 private addresses */ SCTP_SCOPE_LINK, /* IPv4 link local address */ SCTP_SCOPE_LOOPBACK, /* IPv4 loopback address */ SCTP_SCOPE_UNUSABLE, /* IPv4 unusable addresses */ -} sctp_scope_t; +}; enum { SCTP_SCOPE_POLICY_DISABLE, /* Disable IPv4 address scoping */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 84650fed1e6a..ca66b033ec38 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -94,8 +94,8 @@ /* * sctp/protocol.c */ -int sctp_copy_local_addr_list(struct net *, struct sctp_bind_addr *, - sctp_scope_t, gfp_t gfp, int flags); +int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *addr, + enum sctp_scope, gfp_t gfp, int flags); struct sctp_pf *sctp_get_pf_specific(sa_family_t family); int sctp_register_pf(struct sctp_pf *, sa_family_t); void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 5e872acee6e3..d771d418481f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -449,7 +449,7 @@ struct sctp_af { int (*addr_valid) (union sctp_addr *, struct sctp_sock *, const struct sk_buff *); - sctp_scope_t (*scope) (union sctp_addr *); + enum sctp_scope (*scope)(union sctp_addr *); void (*inaddr_any) (union sctp_addr *, __be16); int (*is_any) (const union sctp_addr *); int (*available) (union sctp_addr *, @@ -1111,7 +1111,7 @@ void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port); void sctp_bind_addr_free(struct sctp_bind_addr *); int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, - sctp_scope_t scope, gfp_t gfp, + enum sctp_scope scope, gfp_t gfp, int flags); int sctp_bind_addr_dup(struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, @@ -1135,8 +1135,9 @@ union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp, int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len, __u16 port, gfp_t gfp); -sctp_scope_t sctp_scope(const union sctp_addr *); -int sctp_in_scope(struct net *net, const union sctp_addr *addr, const sctp_scope_t scope); +enum sctp_scope sctp_scope(const union sctp_addr *addr); +int sctp_in_scope(struct net *net, const union sctp_addr *addr, + const enum sctp_scope scope); int sctp_is_any(struct sock *sk, const union sctp_addr *addr); int sctp_is_ep_boundall(struct sock *sk); @@ -1925,8 +1926,8 @@ static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base) struct sctp_association * -sctp_association_new(const struct sctp_endpoint *, const struct sock *, - sctp_scope_t scope, gfp_t gfp); +sctp_association_new(const struct sctp_endpoint *ep, const struct sock *sk, + enum sctp_scope scope, gfp_t gfp); void sctp_association_free(struct sctp_association *); void sctp_association_put(struct sctp_association *); void sctp_association_hold(struct sctp_association *); @@ -1967,8 +1968,8 @@ void sctp_assoc_set_primary(struct sctp_association *, struct sctp_transport *); void sctp_assoc_del_nonprimary_peers(struct sctp_association *, struct sctp_transport *); -int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *, - sctp_scope_t, gfp_t); +int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, + enum sctp_scope scope, gfp_t gfp); int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *, struct sctp_cookie*, gfp_t gfp); -- cgit v1.2.3 From 0ceaeebe28d46e49c865f88a3e3ca75f6cf13e1f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:55 +0800 Subject: sctp: remove the typedef sctp_transport_cmd_t This patch is to remove the typedef sctp_transport_cmd_t, and replace with enum sctp_transport_cmd in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/structs.h | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 0503bb70e5d9..8ce6d3263e41 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -320,11 +320,11 @@ typedef enum { } sctp_xmit_t; /* These are the commands for manipulating transports. */ -typedef enum { +enum sctp_transport_cmd { SCTP_TRANSPORT_UP, SCTP_TRANSPORT_DOWN, SCTP_TRANSPORT_PF, -} sctp_transport_cmd_t; +}; /* These are the address scopes defined mainly for IPv4 addresses * based on draft of SCTP IPv4 scoping . diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d771d418481f..d098d1c4ed74 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1947,9 +1947,10 @@ void sctp_assoc_del_peer(struct sctp_association *asoc, const union sctp_addr *addr); void sctp_assoc_rm_peer(struct sctp_association *asoc, struct sctp_transport *peer); -void sctp_assoc_control_transport(struct sctp_association *, - struct sctp_transport *, - sctp_transport_cmd_t, sctp_sn_error_t); +void sctp_assoc_control_transport(struct sctp_association *asoc, + struct sctp_transport *transport, + enum sctp_transport_cmd command, + sctp_sn_error_t error); struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32); struct sctp_transport *sctp_assoc_is_match(struct sctp_association *, struct net *, -- cgit v1.2.3 From 8496561430df6e2c21b4ed37bd93604d3acaf5d6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:56 +0800 Subject: sctp: remove the typedef sctp_sock_state_t This patch is to remove the typedef sctp_sock_state_t, and replace with enum sctp_sock_state in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 6 +++--- include/net/sctp/sctp.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 8ce6d3263e41..049868ea7ae9 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -42,7 +42,7 @@ #include #include /* For ipv6hdr. */ -#include /* For TCP states used in sctp_sock_state_t */ +#include /* For TCP states used in enum sctp_sock_state */ /* Value used for stream negotiation. */ enum { SCTP_MAX_STREAM = 0xffff }; @@ -214,13 +214,13 @@ typedef enum { * - A socket in SCTP_SS_ESTABLISHED state indicates that it has a single * association. */ -typedef enum { +enum sctp_sock_state { SCTP_SS_CLOSED = TCP_CLOSE, SCTP_SS_LISTENING = TCP_LISTEN, SCTP_SS_ESTABLISHING = TCP_SYN_SENT, SCTP_SS_ESTABLISHED = TCP_ESTABLISHED, SCTP_SS_CLOSING = TCP_CLOSE_WAIT, -} sctp_sock_state_t; +}; /* These functions map various type to printable names. */ const char *sctp_cname(const sctp_subtype_t); /* chunk types */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ca66b033ec38..0022bc713434 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -565,7 +565,8 @@ static inline int __sctp_state(const struct sctp_association *asoc, /* Is the socket in this state? */ #define sctp_sstate(sk, state) __sctp_sstate((sk), (SCTP_SS_##state)) -static inline int __sctp_sstate(const struct sock *sk, sctp_sock_state_t state) +static inline int __sctp_sstate(const struct sock *sk, + enum sctp_sock_state state) { return sk->sk_state == state; } -- cgit v1.2.3 From 86b36f2a9b9ea58dd2100b0e6f1f45a1f67ee95e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:57 +0800 Subject: sctp: remove the typedef sctp_xmit_t This patch is to remove the typedef sctp_xmit_t, and replace with enum sctp_xmit in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/structs.h | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 049868ea7ae9..311b2e3773e8 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -312,12 +312,12 @@ enum { SCTP_MAX_GABS = 16 }; /* These return values describe the success or failure of a number of * routines which form the lower interface to SCTP_outqueue. */ -typedef enum { +enum sctp_xmit { SCTP_XMIT_OK, SCTP_XMIT_PMTU_FULL, SCTP_XMIT_RWND_FULL, SCTP_XMIT_DELAY, -} sctp_xmit_t; +}; /* These are the commands for manipulating transports. */ enum sctp_transport_cmd { diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d098d1c4ed74..6fab67e7f1e5 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -697,10 +697,11 @@ struct sctp_packet { void sctp_packet_init(struct sctp_packet *, struct sctp_transport *, __u16 sport, __u16 dport); void sctp_packet_config(struct sctp_packet *, __u32 vtag, int); -sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *, - struct sctp_chunk *, int, gfp_t); -sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *, - struct sctp_chunk *); +enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk, + int one_packet, gfp_t gfp); +enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk); int sctp_packet_transmit(struct sctp_packet *, gfp_t); void sctp_packet_free(struct sctp_packet *); -- cgit v1.2.3 From 4785c7ae1848244da3435ba5269f6288c15975c7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:58 +0800 Subject: sctp: remove the typedef sctp_ierror_t This patch is to remove the typedef sctp_ierror_t, and replace with enum sctp_ierror in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 311b2e3773e8..9a694653b708 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -155,8 +155,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) - sizeof(struct sctp_data_chunk))) /* Internal error codes */ -typedef enum { - +enum sctp_ierror { SCTP_IERROR_NO_ERROR = 0, SCTP_IERROR_BASE = 1000, SCTP_IERROR_NO_COOKIE, @@ -177,7 +176,7 @@ typedef enum { SCTP_IERROR_PROTO_VIOLATION, SCTP_IERROR_ERROR, SCTP_IERROR_ABORT, -} sctp_ierror_t; +}; -- cgit v1.2.3 From 5210601945f5aedaf2d7f13a88436e27a39c6a8a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:59 +0800 Subject: sctp: remove the typedef sctp_state_t This patch is to remove the typedef sctp_state_t, and replace with enum sctp_state in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 4 ++-- include/net/sctp/constants.h | 4 ++-- include/net/sctp/sctp.h | 2 +- include/net/sctp/sm.h | 10 +++++----- include/net/sctp/structs.h | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 1d5f6ff3f440..be12ec946628 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -126,7 +126,7 @@ typedef union { __u8 u8; int error; __be16 err; - sctp_state_t state; + enum sctp_state state; sctp_event_timeout_t to; struct sctp_chunk *chunk; struct sctp_association *asoc; @@ -167,7 +167,7 @@ SCTP_ARG_CONSTRUCTOR(U16, __u16, u16) SCTP_ARG_CONSTRUCTOR(U8, __u8, u8) SCTP_ARG_CONSTRUCTOR(ERROR, int, error) SCTP_ARG_CONSTRUCTOR(PERR, __be16, err) /* protocol error */ -SCTP_ARG_CONSTRUCTOR(STATE, sctp_state_t, state) +SCTP_ARG_CONSTRUCTOR(STATE, enum sctp_state, state) SCTP_ARG_CONSTRUCTOR(TO, sctp_event_timeout_t, to) SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) SCTP_ARG_CONSTRUCTOR(ASOC, struct sctp_association *, asoc) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 9a694653b708..db9f40b657ba 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -181,7 +181,7 @@ enum sctp_ierror { /* SCTP state defines for internal state machine */ -typedef enum { +enum sctp_state { SCTP_STATE_CLOSED = 0, SCTP_STATE_COOKIE_WAIT = 1, @@ -192,7 +192,7 @@ typedef enum { SCTP_STATE_SHUTDOWN_RECEIVED = 6, SCTP_STATE_SHUTDOWN_ACK_SENT = 7, -} sctp_state_t; +}; #define SCTP_STATE_MAX SCTP_STATE_SHUTDOWN_ACK_SENT #define SCTP_STATE_NUM_STATES (SCTP_STATE_MAX + 1) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 0022bc713434..24ff7931d38c 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -558,7 +558,7 @@ static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style) /* Is the association in this state? */ #define sctp_state(asoc, state) __sctp_state((asoc), (SCTP_STATE_##state)) static inline int __sctp_state(const struct sctp_association *asoc, - sctp_state_t state) + enum sctp_state state) { return asoc->state == state; } diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 860f378333b5..281e8d1e0752 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -175,10 +175,10 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); -const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *, - sctp_event_t, - sctp_state_t, - sctp_subtype_t); +const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, + sctp_event_t event_type, + enum sctp_state state, + sctp_subtype_t event_subtype); int sctp_chunk_iif(const struct sctp_chunk *); struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *, struct sctp_chunk *, @@ -313,7 +313,7 @@ struct sctp_chunk *sctp_process_strreset_resp( /* Prototypes for statetable processing. */ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, - sctp_state_t state, + enum sctp_state state, struct sctp_endpoint *, struct sctp_association *asoc, void *event_arg, diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6fab67e7f1e5..fbe6e81b889b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1577,7 +1577,7 @@ struct sctp_association { * * State takes values from SCTP_STATE_*. */ - sctp_state_t state; + enum sctp_state state; /* Overall : The overall association error count. * Error Count : [Clear this any time I get something.] -- cgit v1.2.3 From dc1e0e6eb8b2e2c6b5c6cdfa4f0cc789e9516de5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:00 +0800 Subject: sctp: remove the typedef sctp_event_primitive_t This patch is to remove the typedef sctp_event_primitive_t, and replace with enum sctp_event_primitive in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index db9f40b657ba..162ee954a6af 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -110,7 +110,7 @@ typedef enum { #define SCTP_NUM_OTHER_TYPES (SCTP_EVENT_OTHER_MAX + 1) /* These are primitive requests from the ULP. */ -typedef enum { +enum sctp_event_primitive { SCTP_PRIMITIVE_ASSOCIATE = 0, SCTP_PRIMITIVE_SHUTDOWN, SCTP_PRIMITIVE_ABORT, @@ -118,7 +118,7 @@ typedef enum { SCTP_PRIMITIVE_REQUESTHEARTBEAT, SCTP_PRIMITIVE_ASCONF, SCTP_PRIMITIVE_RECONF, -} sctp_event_primitive_t; +}; #define SCTP_EVENT_PRIMITIVE_MAX SCTP_PRIMITIVE_RECONF #define SCTP_NUM_PRIMITIVE_TYPES (SCTP_EVENT_PRIMITIVE_MAX + 1) @@ -133,7 +133,7 @@ typedef union { enum sctp_cid chunk; sctp_event_timeout_t timeout; sctp_event_other_t other; - sctp_event_primitive_t primitive; + enum sctp_event_primitive primitive; } sctp_subtype_t; #define SCTP_SUBTYPE_CONSTRUCTOR(_name, _type, _elt) \ @@ -144,7 +144,7 @@ SCTP_ST_## _name (_type _arg) \ SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) SCTP_SUBTYPE_CONSTRUCTOR(OTHER, sctp_event_other_t, other) -SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) +SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) #define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA) -- cgit v1.2.3 From a0f098d0385a40f9c4c8a2ce48d075f77ea7edd8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:01 +0800 Subject: sctp: remove the typedef sctp_event_other_t This patch is to remove the typedef sctp_event_other_t, and replace with enum sctp_event_other in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 162ee954a6af..fd8a80ec573f 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -101,10 +101,10 @@ typedef enum { #define SCTP_EVENT_TIMEOUT_MAX SCTP_EVENT_TIMEOUT_AUTOCLOSE #define SCTP_NUM_TIMEOUT_TYPES (SCTP_EVENT_TIMEOUT_MAX + 1) -typedef enum { +enum sctp_event_other { SCTP_EVENT_NO_PENDING_TSN = 0, SCTP_EVENT_ICMP_PROTO_UNREACH, -} sctp_event_other_t; +}; #define SCTP_EVENT_OTHER_MAX SCTP_EVENT_ICMP_PROTO_UNREACH #define SCTP_NUM_OTHER_TYPES (SCTP_EVENT_OTHER_MAX + 1) @@ -132,7 +132,7 @@ enum sctp_event_primitive { typedef union { enum sctp_cid chunk; sctp_event_timeout_t timeout; - sctp_event_other_t other; + enum sctp_event_other other; enum sctp_event_primitive primitive; } sctp_subtype_t; @@ -143,7 +143,7 @@ SCTP_ST_## _name (_type _arg) \ SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) -SCTP_SUBTYPE_CONSTRUCTOR(OTHER, sctp_event_other_t, other) +SCTP_SUBTYPE_CONSTRUCTOR(OTHER, enum sctp_event_other, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) -- cgit v1.2.3 From 19cd1592a24754e16d48398812d5f69b63f674dd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:02 +0800 Subject: sctp: remove the typedef sctp_event_timeout_t This patch is to remove the typedef sctp_event_timeout_t, and replace with enum sctp_event_timeout in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 4 ++-- include/net/sctp/constants.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index be12ec946628..376cb78b6247 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -127,7 +127,7 @@ typedef union { int error; __be16 err; enum sctp_state state; - sctp_event_timeout_t to; + enum sctp_event_timeout to; struct sctp_chunk *chunk; struct sctp_association *asoc; struct sctp_transport *transport; @@ -168,7 +168,7 @@ SCTP_ARG_CONSTRUCTOR(U8, __u8, u8) SCTP_ARG_CONSTRUCTOR(ERROR, int, error) SCTP_ARG_CONSTRUCTOR(PERR, __be16, err) /* protocol error */ SCTP_ARG_CONSTRUCTOR(STATE, enum sctp_state, state) -SCTP_ARG_CONSTRUCTOR(TO, sctp_event_timeout_t, to) +SCTP_ARG_CONSTRUCTOR(TO, enum sctp_event_timeout, to) SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) SCTP_ARG_CONSTRUCTOR(ASOC, struct sctp_association *, asoc) SCTP_ARG_CONSTRUCTOR(TRANSPORT, struct sctp_transport *, transport) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index fd8a80ec573f..fb931f07e83a 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -84,7 +84,7 @@ typedef enum { * SCTP_ULP_* to the list of possible chunks. */ -typedef enum { +enum sctp_event_timeout { SCTP_EVENT_TIMEOUT_NONE = 0, SCTP_EVENT_TIMEOUT_T1_COOKIE, SCTP_EVENT_TIMEOUT_T1_INIT, @@ -96,7 +96,7 @@ typedef enum { SCTP_EVENT_TIMEOUT_RECONF, SCTP_EVENT_TIMEOUT_SACK, SCTP_EVENT_TIMEOUT_AUTOCLOSE, -} sctp_event_timeout_t; +}; #define SCTP_EVENT_TIMEOUT_MAX SCTP_EVENT_TIMEOUT_AUTOCLOSE #define SCTP_NUM_TIMEOUT_TYPES (SCTP_EVENT_TIMEOUT_MAX + 1) @@ -131,7 +131,7 @@ enum sctp_event_primitive { typedef union { enum sctp_cid chunk; - sctp_event_timeout_t timeout; + enum sctp_event_timeout timeout; enum sctp_event_other other; enum sctp_event_primitive primitive; } sctp_subtype_t; @@ -142,7 +142,7 @@ SCTP_ST_## _name (_type _arg) \ { sctp_subtype_t _retval; _retval._elt = _arg; return _retval; } SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) -SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) +SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, enum sctp_event_timeout, timeout) SCTP_SUBTYPE_CONSTRUCTOR(OTHER, enum sctp_event_other, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) -- cgit v1.2.3 From 61f0eb072294a148f707335d4d7f858b2af73770 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:03 +0800 Subject: sctp: remove the typedef sctp_event_t This patch is to remove the typedef sctp_event_t, and replace with enum sctp_event in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 6 ++---- include/net/sctp/sm.h | 12 +++++------- 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index fb931f07e83a..3181e0f95b60 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -71,14 +71,12 @@ enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM }; SCTP_NUM_AUTH_CHUNK_TYPES) /* These are the different flavours of event. */ -typedef enum { - +enum sctp_event { SCTP_EVENT_T_CHUNK = 1, SCTP_EVENT_T_TIMEOUT, SCTP_EVENT_T_OTHER, SCTP_EVENT_T_PRIMITIVE - -} sctp_event_t; +}; /* As a convenience for the state machine, we append SCTP_EVENT_* and * SCTP_ULP_* to the list of possible chunks. diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 281e8d1e0752..96f54cff7964 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -176,7 +176,7 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, - sctp_event_t event_type, + enum sctp_event event_type, enum sctp_state state, sctp_subtype_t event_subtype); int sctp_chunk_iif(const struct sctp_chunk *); @@ -312,12 +312,10 @@ struct sctp_chunk *sctp_process_strreset_resp( /* Prototypes for statetable processing. */ -int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, - enum sctp_state state, - struct sctp_endpoint *, - struct sctp_association *asoc, - void *event_arg, - gfp_t gfp); +int sctp_do_sm(struct net *net, enum sctp_event event_type, + sctp_subtype_t subtype, enum sctp_state state, + struct sctp_endpoint *ep, struct sctp_association *asoc, + void *event_arg, gfp_t gfp); /* 2nd level prototypes */ void sctp_generate_t3_rtx_event(unsigned long peer); -- cgit v1.2.3 From bfc6f8270fefb323662d1d7713f940149f27b7f1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:04 +0800 Subject: sctp: remove the typedef sctp_subtype_t This patch is to remove the typedef sctp_subtype_t, and replace with union sctp_subtype in the places where it's using this typedef. Note that it doesn't fix many indents although it should, as sctp_disposition_t's removal would mess them up again. So better to fix them when removing sctp_disposition_t in later patch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 18 +++++++++--------- include/net/sctp/sm.h | 13 +++++++------ 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 3181e0f95b60..deaafa9b09cb 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -124,20 +124,20 @@ enum sctp_event_primitive { /* We define here a utility type for manipulating subtypes. * The subtype constructors all work like this: * - * sctp_subtype_t foo = SCTP_ST_CHUNK(SCTP_CID_INIT); + * union sctp_subtype foo = SCTP_ST_CHUNK(SCTP_CID_INIT); */ -typedef union { +union sctp_subtype { enum sctp_cid chunk; enum sctp_event_timeout timeout; enum sctp_event_other other; enum sctp_event_primitive primitive; -} sctp_subtype_t; +}; #define SCTP_SUBTYPE_CONSTRUCTOR(_name, _type, _elt) \ -static inline sctp_subtype_t \ +static inline union sctp_subtype \ SCTP_ST_## _name (_type _arg) \ -{ sctp_subtype_t _retval; _retval._elt = _arg; return _retval; } +{ union sctp_subtype _retval; _retval._elt = _arg; return _retval; } SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, enum sctp_event_timeout, timeout) @@ -220,10 +220,10 @@ enum sctp_sock_state { }; /* These functions map various type to printable names. */ -const char *sctp_cname(const sctp_subtype_t); /* chunk types */ -const char *sctp_oname(const sctp_subtype_t); /* other events */ -const char *sctp_tname(const sctp_subtype_t); /* timeouts */ -const char *sctp_pname(const sctp_subtype_t); /* primitives */ +const char *sctp_cname(const union sctp_subtype id); /* chunk types */ +const char *sctp_oname(const union sctp_subtype id); /* other events */ +const char *sctp_tname(const union sctp_subtype id); /* timeouts */ +const char *sctp_pname(const union sctp_subtype id); /* primitives */ /* This is a table of printable names of sctp_state_t's. */ extern const char *const sctp_state_tbl[]; diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 96f54cff7964..1e7651c3b158 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -73,7 +73,7 @@ typedef struct { typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, const struct sctp_endpoint *, const struct sctp_association *, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *); typedef void (sctp_timer_event_t) (unsigned long); @@ -175,10 +175,11 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); -const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, - enum sctp_event event_type, - enum sctp_state state, - sctp_subtype_t event_subtype); +const sctp_sm_table_entry_t *sctp_sm_lookup_event( + struct net *net, + enum sctp_event event_type, + enum sctp_state state, + union sctp_subtype event_subtype); int sctp_chunk_iif(const struct sctp_chunk *); struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *, struct sctp_chunk *, @@ -313,7 +314,7 @@ struct sctp_chunk *sctp_process_strreset_resp( /* Prototypes for statetable processing. */ int sctp_do_sm(struct net *net, enum sctp_event event_type, - sctp_subtype_t subtype, enum sctp_state state, + union sctp_subtype subtype, enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, gfp_t gfp); -- cgit v1.2.3 From 3e0e82664322290a59189f7c2bcb39b0de932505 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:19 +0200 Subject: net: sched: make egress_dev flag part of flower offload struct Since this is specific to flower now, make it part of the flower offload struct. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e0c54f111467..8213acdfdf5a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -504,6 +504,7 @@ struct tc_cls_flower_offload { struct fl_flow_key *mask; struct fl_flow_key *key; struct tcf_exts *exts; + bool egress_dev; }; enum tc_matchall_command { -- cgit v1.2.3 From 5fd9fc4e207dba0c05cafe78417952b4c4ca02dc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:29 +0200 Subject: net: sched: push cls related args into cls_common structure As ndo_setup_tc is generic offload op for whole tc subsystem, does not really make sense to have cls-specific args. So move them under cls_common structurure which is embedded in all cls structs. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 8213acdfdf5a..ffaddf72108e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -405,6 +405,21 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ +struct tc_cls_common_offload { + u32 handle; + u32 chain_index; + __be16 protocol; +}; + +static inline void +tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, + const struct tcf_proto *tp) +{ + cls_common->handle = tp->q->handle; + cls_common->chain_index = tp->chain->index; + cls_common->protocol = tp->protocol; +} + struct tc_cls_u32_knode { struct tcf_exts *exts; struct tc_u32_sel *sel; @@ -431,6 +446,7 @@ enum tc_clsu32_command { }; struct tc_cls_u32_offload { + struct tc_cls_common_offload common; /* knode values */ enum tc_clsu32_command command; union { @@ -497,6 +513,7 @@ enum tc_fl_command { }; struct tc_cls_flower_offload { + struct tc_cls_common_offload common; enum tc_fl_command command; u32 prio; unsigned long cookie; @@ -513,6 +530,7 @@ enum tc_matchall_command { }; struct tc_cls_matchall_offload { + struct tc_cls_common_offload common; enum tc_matchall_command command; struct tcf_exts *exts; unsigned long cookie; @@ -526,6 +544,7 @@ enum tc_clsbpf_command { }; struct tc_cls_bpf_offload { + struct tc_cls_common_offload common; enum tc_clsbpf_command command; struct tcf_exts *exts; struct bpf_prog *prog; -- cgit v1.2.3 From d7c1c8d2e53be974b5c72e31d7d35f6d9737fe84 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:30 +0200 Subject: net: sched: move prio into cls_common prio is not cls_flower specific, but it is meaningful for all classifiers. Seems that only mlxsw cares about the value. Obviously, cls offload in other drivers is broken. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ffaddf72108e..572083af02ac 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -409,6 +409,7 @@ struct tc_cls_common_offload { u32 handle; u32 chain_index; __be16 protocol; + u32 prio; }; static inline void @@ -418,6 +419,7 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, cls_common->handle = tp->q->handle; cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; + cls_common->prio = tp->prio; } struct tc_cls_u32_knode { @@ -515,7 +517,6 @@ enum tc_fl_command { struct tc_cls_flower_offload { struct tc_cls_common_offload common; enum tc_fl_command command; - u32 prio; unsigned long cookie; struct flow_dissector *dissector; struct fl_flow_key *mask; -- cgit v1.2.3 From fb74c27735f0a34e76dbf1972084e984ad2ea145 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:16 -0700 Subject: net: ipv4: add second dif to udp socket lookups Add a second device index, sdif, to udp socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. Early demux lookups are handled in the next patch as part of INET_MATCH changes. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip.h | 10 ++++++++++ include/net/udp.h | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 9e59dcf1787a..39db596eb89f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -78,6 +78,16 @@ struct ipcm_cookie { #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb)) +/* return enslaved device index if relevant */ +static inline int inet_sdif(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) + return IPCB(skb)->iif; +#endif + return 0; +} + struct ip_ra_chain { struct ip_ra_chain __rcu *next; struct sock *sk; diff --git a/include/net/udp.h b/include/net/udp.h index cc8036987dcb..826c713d5a48 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -287,7 +287,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, int dif, + __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport); -- cgit v1.2.3 From 3fa6f616a7a4d0bdf4d877d530456d8a5c3b109b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:17 -0700 Subject: net: ipv4: add second dif to inet socket lookups Add a second device index, sdif, to inet socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. TCP moves the data in the cb. Prior to tcp_v4_rcv (e.g., early demux) the ingress index is obtained from IPCB using inet_sdif and after the cb move in tcp_v4_rcv the tcp_v4_sdif helper is used. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 31 +++++++++++++++++-------------- include/net/tcp.h | 10 ++++++++++ 2 files changed, 27 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 5026b1f08bb8..2dbbbff5e1e3 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -221,16 +221,16 @@ struct sock *__inet_lookup_listener(struct net *net, const __be32 saddr, const __be16 sport, const __be32 daddr, const unsigned short hnum, - const int dif); + const int dif, const int sdif); static inline struct sock *inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, int dif) + __be32 daddr, __be16 dport, int dif, int sdif) { return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport, - daddr, ntohs(dport), dif); + daddr, ntohs(dport), dif, sdif); } /* Socket demux engine toys. */ @@ -262,22 +262,24 @@ static inline struct sock *inet_lookup_listener(struct net *net, (((__force __u64)(__be32)(__daddr)) << 32) | \ ((__force __u64)(__be32)(__saddr))) #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \ +#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_addrpair == (__cookie)) && \ (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const int __name __deprecated __attribute__((unused)) -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \ +#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_daddr == (__saddr)) && \ ((__sk)->sk_rcv_saddr == (__daddr)) && \ (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* 64-bit arch */ @@ -288,7 +290,7 @@ struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, - const int dif); + const int dif, const int sdif); static inline struct sock * inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, @@ -297,7 +299,7 @@ static inline struct sock * const int dif) { return __inet_lookup_established(net, hashinfo, saddr, sport, daddr, - ntohs(dport), dif); + ntohs(dport), dif, 0); } static inline struct sock *__inet_lookup(struct net *net, @@ -305,20 +307,20 @@ static inline struct sock *__inet_lookup(struct net *net, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - const int dif, + const int dif, const int sdif, bool *refcounted) { u16 hnum = ntohs(dport); struct sock *sk; sk = __inet_lookup_established(net, hashinfo, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, - sport, daddr, hnum, dif); + sport, daddr, hnum, dif, sdif); } static inline struct sock *inet_lookup(struct net *net, @@ -332,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net, bool refcounted; sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, - dport, dif, &refcounted); + dport, dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -344,6 +346,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, int doff, const __be16 sport, const __be16 dport, + const int sdif, bool *refcounted) { struct sock *sk = skb_steal_sock(skb); @@ -355,7 +358,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, doff, iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), + iph->daddr, dport, inet_iif(skb), sdif, refcounted); } diff --git a/include/net/tcp.h b/include/net/tcp.h index 5173fecde495..2b89f1ab8552 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -840,6 +840,16 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) return false; } +/* TCP_SKB_CB reference means this can not be used from early demux */ +static inline int tcp_v4_sdif(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + return TCP_SKB_CB(skb)->header.h4.iif; +#endif + return 0; +} + /* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */ -- cgit v1.2.3 From 67359930e185c491b47cb958d5f1d6c1af4598a2 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:18 -0700 Subject: net: ipv4: add second dif to raw socket lookups Add a second device index, sdif, to raw socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/raw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/raw.h b/include/net/raw.h index 57c33dd22ec4..99d26d0c4a19 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -26,7 +26,7 @@ extern struct proto raw_prot; extern struct raw_hashinfo raw_v4_hashinfo; struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, - __be32 laddr, int dif); + __be32 laddr, int dif, int sdif); int raw_abort(struct sock *sk, int err); void raw_icmp_error(struct sk_buff *, int, u32); -- cgit v1.2.3 From 1801b570dd2ae50b90231f283e79a9a94fbe7875 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:20 -0700 Subject: net: ipv6: add second dif to udp socket lookups Add a second device index, sdif, to udp socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. Early demux lookups are handled in the next patch as part of INET_MATCH changes. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/udp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index 826c713d5a48..20dcdca4e85c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -298,7 +298,7 @@ struct sock *udp6_lib_lookup(struct net *net, struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *tbl, + int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport); -- cgit v1.2.3 From 4297a0ef085729af98adab9131d128c576ed3044 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:21 -0700 Subject: net: ipv6: add second dif to inet6 socket lookups Add a second device index, sdif, to inet6 socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. TCP moves the data in the cb. Prior to tcp_v4_rcv (e.g., early demux) the ingress index is obtained from IPCB using inet_sdif and after tcp_v4_rcv tcp_v4_sdif is used. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 22 +++++++++++++--------- include/net/tcp.h | 10 ++++++++++ 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index b87becacd9d3..6e91e38a31da 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -49,7 +49,8 @@ struct sock *__inet6_lookup_established(struct net *net, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, - const u16 hnum, const int dif); + const u16 hnum, const int dif, + const int sdif); struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, @@ -57,7 +58,8 @@ struct sock *inet6_lookup_listener(struct net *net, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, - const unsigned short hnum, const int dif); + const unsigned short hnum, + const int dif, const int sdif); static inline struct sock *__inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, @@ -66,24 +68,25 @@ static inline struct sock *__inet6_lookup(struct net *net, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif, + const int dif, const int sdif, bool *refcounted) { struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr, - sport, daddr, hnum, dif); + sport, daddr, hnum, + dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, sdif); } static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, - int iif, + int iif, int sdif, bool *refcounted) { struct sock *sk = skb_steal_sock(skb); @@ -95,7 +98,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, doff, &ipv6_hdr(skb)->saddr, sport, &ipv6_hdr(skb)->daddr, ntohs(dport), - iif, refcounted); + iif, sdif, refcounted); } struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, @@ -107,13 +110,14 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, int inet6_hash(struct sock *sk); #endif /* IS_ENABLED(CONFIG_IPV6) */ -#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ +#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* _INET6_HASHTABLES_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 2b89f1ab8552..999f3efe572b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -827,6 +827,16 @@ static inline int tcp_v6_iif(const struct sk_buff *skb) return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif; } + +/* TCP_SKB_CB reference means this can not be used from early demux */ +static inline int tcp_v6_sdif(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags)) + return TCP_SKB_CB(skb)->header.h6.iif; +#endif + return 0; +} #endif /* TCP_SKB_CB reference means this can not be used from early demux */ -- cgit v1.2.3 From 5108ab4bf446fa9ad2c71f5fc1d839067b72636f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:22 -0700 Subject: net: ipv6: add second dif to raw socket lookups Add a second device index, sdif, to raw socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/rawv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/rawv6.h b/include/net/rawv6.h index cbe4e9de1894..4addc5c988e0 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -6,7 +6,7 @@ extern struct raw_hashinfo raw_v6_hashinfo; struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, - const struct in6_addr *rmt_addr, int dif); + const struct in6_addr *rmt_addr, int dif, int sdif); int raw_abort(struct sock *sk, int err); -- cgit v1.2.3 From 8113c095672f6504b23eba6edf4a57b5f7f744af Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 4 Aug 2017 21:31:43 -0700 Subject: net_sched: use void pointer for filter handle Now we use 'unsigned long fh' as a pointer in every place, it is safe to convert it to a void pointer now. This gets rid of many casts to pointer. Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- include/net/sch_generic.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 572083af02ac..0f78e6560b2d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -11,7 +11,7 @@ struct tcf_walker { int stop; int skip; int count; - int (*fn)(struct tcf_proto *, unsigned long node, struct tcf_walker *); + int (*fn)(struct tcf_proto *, void *node, struct tcf_walker *); }; int register_tcf_proto_ops(struct tcf_proto_ops *ops); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1c123e2b2415..e79f5ad1c5f3 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -213,16 +213,16 @@ struct tcf_proto_ops { int (*init)(struct tcf_proto*); void (*destroy)(struct tcf_proto*); - unsigned long (*get)(struct tcf_proto*, u32 handle); + void* (*get)(struct tcf_proto*, u32 handle); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - unsigned long *, bool); - int (*delete)(struct tcf_proto*, unsigned long, bool*); + void **, bool); + int (*delete)(struct tcf_proto*, void *, bool*); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); /* rtnetlink specific */ - int (*dump)(struct net*, struct tcf_proto*, unsigned long, + int (*dump)(struct net*, struct tcf_proto*, void *, struct sk_buff *skb, struct tcmsg*); struct module *owner; -- cgit v1.2.3 From b04c80d3a7e228cfb832cdb1c9ce8151f174669c Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Sat, 5 Aug 2017 12:38:25 +0200 Subject: ipv6: sr: export SRH insertion functions This patch exports the seg6_do_srh_encap() and seg6_do_srh_inline() functions. It also removes the CONFIG_IPV6_SEG6_INLINE knob that enabled the compilation of seg6_do_srh_inline(). This function is now built-in. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/net/seg6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/seg6.h b/include/net/seg6.h index 4e0357517d79..a32abb040e1d 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -58,5 +58,7 @@ extern int seg6_iptunnel_init(void); extern void seg6_iptunnel_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); +extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); +extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); #endif -- cgit v1.2.3 From d1df6fd8a1d22d37cffa0075ab8ad423ce656777 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Sat, 5 Aug 2017 12:38:26 +0200 Subject: ipv6: sr: define core operations for seg6local lightweight tunnel This patch implements a new type of lightweight tunnel named seg6local. A seg6local lwt is defined by a type of action and a set of parameters. The action represents the operation to perform on the packets matching the lwt's route, and is not necessarily an encapsulation. The set of parameters are arguments for the processing function. Each action is defined in a struct seg6_action_desc within seg6_action_table[]. This structure contains the action, mandatory attributes, the processing function, and a static headroom size required by the action. The mandatory attributes are encoded as a bitmask field. The static headroom is set to a non-zero value when the processing function always add a constant number of bytes to the skb (e.g. the header size for encapsulations). To facilitate rtnetlink-related operations such as parsing, fill_encap, and cmp_encap, each type of action parameter is associated to three function pointers, in seg6_action_params[]. All actions defined in seg6_local.h are detailed in [1]. [1] https://tools.ietf.org/html/draft-filsfils-spring-srv6-network-programming-01 Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/net/seg6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/seg6.h b/include/net/seg6.h index a32abb040e1d..5379f550f521 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -56,6 +56,8 @@ extern int seg6_init(void); extern void seg6_exit(void); extern int seg6_iptunnel_init(void); extern void seg6_iptunnel_exit(void); +extern int seg6_local_init(void); +extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); -- cgit v1.2.3 From 6c2c1dcb185f1e44e1c895781dbaba40195234f9 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:39 +0300 Subject: net: dsa: Change DSA slave FDB API to be switchdev independent In order to support FDB add/del to be on a notifier chain the slave API need to be changed to be switchdev independent. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 0b1a0622b33c..ba11005dac8c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -386,13 +386,11 @@ struct dsa_switch_ops { * Forwarding database */ int (*port_fdb_prepare)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans); + const unsigned char *addr, u16 vid); void (*port_fdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans); + const unsigned char *addr, u16 vid); int (*port_fdb_del)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb); + const unsigned char *addr, u16 vid); int (*port_fdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, switchdev_obj_dump_cb_t *cb); -- cgit v1.2.3 From 1b6dd556c3045ca5fa31cc1e98a4a43afa680e1e Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:40 +0300 Subject: net: dsa: Remove prepare phase for FDB The prepare phase for FDB add is unneeded because most of DSA devices can have failures during bus transactions (SPI, I2C, etc.), thus, the prepare phase cannot guarantee success of the commit stage. The support for learning FDB through notification chain, which will be introduced in the following patches, will provide the ability to notify back the bridge about successful offload. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index ba11005dac8c..446fc438cb80 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -385,9 +385,7 @@ struct dsa_switch_ops { /* * Forwarding database */ - int (*port_fdb_prepare)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - void (*port_fdb_add)(struct dsa_switch *ds, int port, + int (*port_fdb_add)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid); int (*port_fdb_del)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid); -- cgit v1.2.3 From c069fcd82c571953b8aaf68769afe9ccb1aa7a9f Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:46 +0300 Subject: net: dsa: Remove support for bypass bridge port attributes/vlan set The bridge port attributes/vlan for DSA devices should be set only from bridge code. Furthermore, The vlans are synced totally with the bridge so there is no need for special dump support. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 446fc438cb80..d7b9bdd0e768 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -378,10 +378,6 @@ struct dsa_switch_ops { struct switchdev_trans *trans); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); - int (*port_vlan_dump)(struct dsa_switch *ds, int port, - struct switchdev_obj_port_vlan *vlan, - switchdev_obj_dump_cb_t *cb); - /* * Forwarding database */ -- cgit v1.2.3 From dc0cbff3ff9fe331160c2be2b3f47564e247137d Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:48 +0300 Subject: net: dsa: Remove redundant MDB dump support Currently the MDB HW database is synced with the bridge's one, thus, There is no need to support special dump functionality. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index d7b9bdd0e768..4ef185997b6f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -400,10 +400,6 @@ struct dsa_switch_ops { struct switchdev_trans *trans); int (*port_mdb_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); - int (*port_mdb_dump)(struct dsa_switch *ds, int port, - struct switchdev_obj_port_mdb *mdb, - switchdev_obj_dump_cb_t *cb); - /* * RXNFC */ -- cgit v1.2.3 From 2bedde1abbef5eec211308f0293dd7681b0513ec Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:49 +0300 Subject: net: dsa: Move FDB dump implementation inside DSA >From all switchdev devices only DSA requires special FDB dump. This is due to lack of ability for syncing the hardware learned FDBs with the bridge. Due to this it is removed from switchdev and moved inside DSA. Signed-off-by: Arkadi Sharshevsky Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++-- include/net/switchdev.h | 12 ------------ 2 files changed, 3 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 4ef185997b6f..a4f66dbb4b7c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -272,6 +272,8 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) return ds->rtable[dst->cpu_dp->ds->index]; } +typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, + bool is_static, void *data); struct dsa_switch_ops { /* * Legacy probing. @@ -386,8 +388,7 @@ struct dsa_switch_ops { int (*port_fdb_del)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid); int (*port_fdb_dump)(struct dsa_switch *ds, int port, - struct switchdev_obj_port_fdb *fdb, - switchdev_obj_dump_cb_t *cb); + dsa_fdb_dump_cb_t *cb, void *data); /* * Multicast database diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 8ae9e3b6392e..d2637a685ca6 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -208,9 +208,6 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); -int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, - struct net_device *filter_dev, int *idx); void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); @@ -309,15 +306,6 @@ static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], return -EOPNOTSUPP; } -static inline int switchdev_port_fdb_dump(struct sk_buff *skb, - struct netlink_callback *cb, - struct net_device *dev, - struct net_device *filter_dev, - int *idx) -{ - return *idx; -} - static inline bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { -- cgit v1.2.3 From 29ab586c3d83f81c435e269cace9a1619afb5bbd Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:51 +0300 Subject: net: switchdev: Remove bridge bypass support from switchdev Currently the bridge port flags, vlans, FDBs and MDBs can be offloaded through the bridge code, making the switchdev's SELF bridge bypass implementation to be redundant. This implies several changes: - No need for dump infra in switchdev, DSA's special case is handled privately. - Remove obj_dump from switchdev_ops. - FDBs are removed from obj_add/del routines, due to the fact that they are offloaded through the bridge notification chain. - The switchdev_port_bridge_xx() and switchdev_port_fdb_xx() functions can be removed. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Vivien Didelot Acked-by: Jiri Pirko Reviewed-by: Ivan Vecera Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/switchdev.h | 75 ------------------------------------------------- 1 file changed, 75 deletions(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d2637a685ca6..d767b7991887 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -74,7 +74,6 @@ struct switchdev_attr { enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, - SWITCHDEV_OBJ_ID_PORT_FDB, SWITCHDEV_OBJ_ID_PORT_MDB, }; @@ -97,17 +96,6 @@ struct switchdev_obj_port_vlan { #define SWITCHDEV_OBJ_PORT_VLAN(obj) \ container_of(obj, struct switchdev_obj_port_vlan, obj) -/* SWITCHDEV_OBJ_ID_PORT_FDB */ -struct switchdev_obj_port_fdb { - struct switchdev_obj obj; - unsigned char addr[ETH_ALEN]; - u16 vid; - u16 ndm_state; -}; - -#define SWITCHDEV_OBJ_PORT_FDB(obj) \ - container_of(obj, struct switchdev_obj_port_fdb, obj) - /* SWITCHDEV_OBJ_ID_PORT_MDB */ struct switchdev_obj_port_mdb { struct switchdev_obj obj; @@ -135,8 +123,6 @@ typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj); * @switchdev_port_obj_add: Add an object to port (see switchdev_obj_*). * * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj_*). - * - * @switchdev_port_obj_dump: Dump port objects (see switchdev_obj_*). */ struct switchdev_ops { int (*switchdev_port_attr_get)(struct net_device *dev, @@ -149,9 +135,6 @@ struct switchdev_ops { struct switchdev_trans *trans); int (*switchdev_port_obj_del)(struct net_device *dev, const struct switchdev_obj *obj); - int (*switchdev_port_obj_dump)(struct net_device *dev, - struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb); }; enum switchdev_notifier_type { @@ -189,25 +172,10 @@ int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj); int switchdev_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj); -int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb); int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info); -int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask, - int nlflags); -int switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, const unsigned char *addr, - u16 vid, u16 nlm_flags); -int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, const unsigned char *addr, - u16 vid); void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); @@ -246,13 +214,6 @@ static inline int switchdev_port_obj_del(struct net_device *dev, return -EOPNOTSUPP; } -static inline int switchdev_port_obj_dump(struct net_device *dev, - const struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb) -{ - return -EOPNOTSUPP; -} - static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; @@ -270,42 +231,6 @@ static inline int call_switchdev_notifiers(unsigned long val, return NOTIFY_DONE; } -static inline int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, - u32 seq, struct net_device *dev, - u32 filter_mask, int nlflags) -{ - return -EOPNOTSUPP; -} - -static inline int switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) -{ - return -EOPNOTSUPP; -} - -static inline int switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) -{ - return -EOPNOTSUPP; -} - -static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, - u16 vid, u16 nlm_flags) -{ - return -EOPNOTSUPP; -} - -static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid) -{ - return -EOPNOTSUPP; -} - static inline bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { -- cgit v1.2.3 From feca7d8c135bc1527b244fe817b8b6498066ccec Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Tue, 8 Aug 2017 20:23:49 +0200 Subject: net: ipv6: avoid overhead when no custom FIB rules are installed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the user hasn't installed any custom rules, don't go through the whole FIB rules layer. This is pretty similar to f4530fa574df (ipv4: Avoid overhead when no custom FIB rules are installed). Using a micro-benchmark module [1], timing ip6_route_output() with get_cycles(), with 40,000 routes in the main routing table, before this patch: min=606 max=12911 count=627 average=1959 95th=4903 90th=3747 50th=1602 mad=821 table=254 avgdepth=21.8 maxdepth=39 value │ ┊ count 600 │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ 199 880 │▒▒▒░░░░░░░░░░░░░░░░ 43 1160 │▒▒▒░░░░░░░░░░░░░░░░░░░░ 48 1440 │▒▒▒░░░░░░░░░░░░░░░░░░░░░░░ 43 1720 │▒▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░ 59 2000 │▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 50 2280 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 26 2560 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 31 2840 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 28 3120 │▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 17 3400 │▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 17 3680 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 3960 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 11 4240 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 6 4520 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 6 4800 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 9 After: min=544 max=11687 count=627 average=1776 95th=4546 90th=3585 50th=1227 mad=565 table=254 avgdepth=21.8 maxdepth=39 value │ ┊ count 540 │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ 201 800 │▒▒▒▒▒░░░░░░░░░░░░░░░░ 63 1060 │▒▒▒▒▒░░░░░░░░░░░░░░░░░░░░░ 68 1320 │▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░ 39 1580 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 32 1840 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 32 2100 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 34 2360 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 33 2620 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 26 2880 │▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 22 3140 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 9 3400 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 3660 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 9 3920 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 4180 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 4440 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 At the frequency of the host during the bench (~ 3.7 GHz), this is about a 100 ns difference on the median value. A next step would be to collapse local and main tables, as in 0ddcf43d5d4a (ipv4: FIB Local/MAIN table collapse). [1]: https://github.com/vincentbernat/network-lab/blob/master/lab-routes-ipv6/kbench_mod.c Signed-off-by: Vincent Bernat Reviewed-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index abdf3b40303b..0e50bf3ed097 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -65,6 +65,7 @@ struct netns_ipv6 { unsigned int ip6_rt_gc_expire; unsigned long ip6_rt_last_gc; #ifdef CONFIG_IPV6_MULTIPLE_TABLES + bool fib6_has_custom_rules; struct rt6_info *ip6_prohibit_entry; struct rt6_info *ip6_blk_hole_entry; struct fib6_table *fib6_local_tbl; -- cgit v1.2.3 From 04c2cf34362f133be09878bd752f8b014318b59a Mon Sep 17 00:00:00 2001 From: Naftali Goldstein Date: Tue, 11 Jul 2017 10:07:25 +0300 Subject: mac80211: add api to start ba session timer expired flow Some drivers handle rx buffer reordering internally (and by extension handle also the rx ba session timer internally), but do not ofload the addba/delba negotiation. Add an api for these drivers to properly tear-down the ba session, including sending a delba. Signed-off-by: Naftali Goldstein Signed-off-by: Luca Coelho --- include/net/mac80211.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b2b5419467cc..f8149ca192b4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5499,6 +5499,21 @@ static inline void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, ieee80211_manage_rx_ba_offl(vif, addr, tid + IEEE80211_NUM_TIDS); } +/** + * ieee80211_rx_ba_timer_expired - stop a Rx BA session due to timeout + * + * Some device drivers do not offload AddBa/DelBa negotiation, but handle rx + * buffer reording internally, and therefore also handle the session timer. + * + * Trigger the timeout flow, which sends a DelBa. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback + * @addr: station mac address + * @tid: the rx tid + */ +void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif, + const u8 *addr, unsigned int tid); + /* Rate control API */ /** -- cgit v1.2.3 From b97bac64a589d0158cf866e8995e831030f68f4f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Aug 2017 20:41:48 +0200 Subject: rtnetlink: make rtnl_register accept a flags parameter This change allows us to later indicate to rtnetlink core that certain doit functions should be called without acquiring rtnl_mutex. This change should have no effect, we simply replace the last (now unused) calcit argument with the new flag. Signed-off-by: Florian Westphal Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index abe6b733d473..ac32460a0adb 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -7,12 +7,11 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); -typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *); int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, rtnl_calcit_func); + rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); void rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, rtnl_calcit_func); + rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); -- cgit v1.2.3 From 62256f98f244fbb1c7a10465e1ee412f209d8978 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Aug 2017 20:41:52 +0200 Subject: rtnetlink: add RTNL_FLAG_DOIT_UNLOCKED Allow callers to tell rtnetlink core that its doit callback should be invoked without holding rtnl mutex. Signed-off-by: Florian Westphal Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index ac32460a0adb..21837ca68ecc 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -8,6 +8,10 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); +enum rtnl_link_flags { + RTNL_FLAG_DOIT_UNLOCKED = 1, +}; + int __rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); void rtnl_register(int protocol, int msgtype, -- cgit v1.2.3 From 68277a2c9d32fd9090247a5c08aaf1353049c0b1 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Wed, 9 Aug 2017 14:41:16 +0200 Subject: net-next: dsa: move struct dsa_device_ops to the global header file We need to access this struct from within the flow_dissector to fix dissection for packets coming in on DSA devices. Signed-off-by: Muciri Gatimu Signed-off-by: Shashidhar Lakkavalli Signed-off-by: John Crispin Signed-off-by: David S. Miller --- include/net/dsa.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index a4f66dbb4b7c..65d7804c6f69 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -101,6 +101,13 @@ struct dsa_platform_data { struct packet_type; +struct dsa_device_ops { + struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev); +}; + struct dsa_switch_tree { struct list_head list; -- cgit v1.2.3 From 598a968011ffc4d624934995fe46d06bd450cdf4 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Wed, 9 Aug 2017 14:41:17 +0200 Subject: net-next: dsa: add flow_dissect callback to struct dsa_device_ops When the flow dissector first sees packets coming in on a DSA devices the 802.3 header wont be located where the code expects it to be as the tag is still present. Adding this new callback allows a DSA device to provide a new function that the flow_dissector can use to get the correct protocol and offset of the network header. Signed-off-by: Muciri Gatimu Signed-off-by: Shashidhar Lakkavalli Signed-off-by: John Crispin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 65d7804c6f69..7f46b521313e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -106,6 +106,8 @@ struct dsa_device_ops { struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); + int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, + int *offset); }; struct dsa_switch_tree { -- cgit v1.2.3 From 077fbac405bfc6d41419ad6c1725804ad4e9887c Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 11 Aug 2017 02:11:33 +0900 Subject: net: xfrm: support setting an output mark. On systems that use mark-based routing it may be necessary for routing lookups to use marks in order for packets to be routed correctly. An example of such a system is Android, which uses socket marks to route packets via different networks. Currently, routing lookups in tunnel mode always use a mark of zero, making routing incorrect on such systems. This patch adds a new output_mark element to the xfrm state and a corresponding XFRMA_OUTPUT_MARK netlink attribute. The output mark differs from the existing xfrm mark in two ways: 1. The xfrm mark is used to match xfrm policies and states, while the xfrm output mark is used to set the mark (and influence the routing) of the packets emitted by those states. 2. The existing mark is constrained to be a subset of the bits of the originating socket or transformed packet, but the output mark is arbitrary and depends only on the state. The use of a separate mark provides additional flexibility. For example: - A packet subject to two transforms (e.g., transport mode inside tunnel mode) can have two different output marks applied to it, one for the transport mode SA and one for the tunnel mode SA. - On a system where socket marks determine routing, the packets emitted by an IPsec tunnel can be routed based on a mark that is determined by the tunnel, not by the marks of the unencrypted packets. - Support for setting the output marks can be introduced without breaking any existing setups that employ both mark-based routing and xfrm tunnel mode. Simply changing the code to use the xfrm mark for routing output packets could xfrm mark could change behaviour in a way that breaks these setups. If the output mark is unspecified or set to zero, the mark is not set or changed. Tested: make allyesconfig; make -j64 Tested: https://android-review.googlesource.com/452776 Signed-off-by: Lorenzo Colitti Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 18d7de34a5c3..9c7b70cce6d6 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -165,6 +165,7 @@ struct xfrm_state { int header_len; int trailer_len; u32 extra_flags; + u32 output_mark; } props; struct xfrm_lifetime_cfg lft; @@ -298,10 +299,12 @@ struct xfrm_policy_afinfo { struct dst_entry *(*dst_lookup)(struct net *net, int tos, int oif, const xfrm_address_t *saddr, - const xfrm_address_t *daddr); + const xfrm_address_t *daddr, + u32 mark); int (*get_saddr)(struct net *net, int oif, xfrm_address_t *saddr, - xfrm_address_t *daddr); + xfrm_address_t *daddr, + u32 mark); void (*decode_session)(struct sk_buff *skb, struct flowi *fl, int reverse); @@ -1640,7 +1643,7 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, - int family); + int family, u32 mark); struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); -- cgit v1.2.3 From afa6c45429f6e5ddd1eb6b77a36358f9c4b789da Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:45 +0800 Subject: sctp: remove the unused typedef sctp_packet_phandler_t Remove this function typedef, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index fbe6e81b889b..73e9509c057e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -657,8 +657,6 @@ struct sctp_sockaddr_entry { #define SCTP_ADDRESS_TICK_DELAY 500 -typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *); - /* This structure holds lists of chunks as we are assembling for * transmission. */ -- cgit v1.2.3 From edf903f83ebca988e04a39f515ab6eacb92055df Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:46 +0800 Subject: sctp: remove the typedef sctp_sender_hb_info_t This patch is to remove the typedef sctp_sender_hb_info_t, and replace with struct sctp_sender_hb_info in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 73e9509c057e..60033a263193 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -371,12 +371,12 @@ union sctp_params { * chunk is sent and the destination transport address to which this * HEARTBEAT is sent (see Section 8.3). */ -typedef struct sctp_sender_hb_info { +struct sctp_sender_hb_info { struct sctp_paramhdr param_hdr; union sctp_addr daddr; unsigned long sent_at; __u64 hb_nonce; -} sctp_sender_hb_info_t; +}; int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp); -- cgit v1.2.3 From 74439f344b1becd57ec822bc0e2c1a4cbf240a53 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:47 +0800 Subject: sctp: remove the typedef sctp_endpoint_type_t This patch is to remove the typedef sctp_endpoint_type_t, and replace with enum sctp_endpoint_type in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 60033a263193..937187f3bffc 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1142,10 +1142,10 @@ int sctp_is_ep_boundall(struct sock *sk); /* What type of endpoint? */ -typedef enum { +enum sctp_endpoint_type { SCTP_EP_TYPE_SOCKET, SCTP_EP_TYPE_ASSOCIATION, -} sctp_endpoint_type_t; +}; /* * A common base class to bridge the implmentation view of a @@ -1169,7 +1169,7 @@ struct sctp_ep_common { int hashent; /* Runtime type information. What kind of endpoint is this? */ - sctp_endpoint_type_t type; + enum sctp_endpoint_type type; /* Some fields to help us manage this object. * refcnt - Reference count access to this object. -- cgit v1.2.3 From a05437ac5deb100f94e290ad4c5eef3e78f4b6bb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:48 +0800 Subject: sctp: remove the typedef sctp_cmsgs_t This patch is to remove the typedef sctp_cmsgs_t, and replace with struct sctp_cmsgs in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 937187f3bffc..e171d3a3d2b4 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1985,11 +1985,11 @@ int sctp_cmp_addr_exact(const union sctp_addr *ss1, struct sctp_chunk *sctp_get_ecne_prepend(struct sctp_association *asoc); /* A convenience structure to parse out SCTP specific CMSGs. */ -typedef struct sctp_cmsgs { +struct sctp_cmsgs { struct sctp_initmsg *init; struct sctp_sndrcvinfo *srinfo; struct sctp_sndinfo *sinfo; -} sctp_cmsgs_t; +}; /* Structure for tracking memory objects */ typedef struct { -- cgit v1.2.3 From d38ef5ae35b0960f3219f1cf0203e19819e757c7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:49 +0800 Subject: sctp: remove the typedef sctp_dbg_objcnt_entry_t This patch is to remove the typedef sctp_dbg_objcnt_entry_t, and replace with struct sctp_dbg_objcnt_entry in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e171d3a3d2b4..b6d75b37f84d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1992,9 +1992,9 @@ struct sctp_cmsgs { }; /* Structure for tracking memory objects */ -typedef struct { +struct sctp_dbg_objcnt_entry { char *label; atomic_t *counter; -} sctp_dbg_objcnt_entry_t; +}; #endif /* __sctp_structs_h__ */ -- cgit v1.2.3 From b7ef2618a0bf75c1e480b05739b0c5f2a42081cd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:50 +0800 Subject: sctp: remove the typedef sctp_socket_type_t This patch is to remove the typedef sctp_socket_type_t, and replace with enum sctp_socket_type in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 3 ++- include/net/sctp/structs.h | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 24ff7931d38c..06b4f515e157 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -550,7 +550,8 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport) /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) -static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style) +static inline int __sctp_style(const struct sock *sk, + enum sctp_socket_type style) { return sctp_sk(sk)->type == style; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index b6d75b37f84d..0477945de1a3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -150,18 +150,18 @@ extern struct sctp_globals { #define sctp_checksum_disable (sctp_globals.checksum_disable) /* SCTP Socket type: UDP or TCP style. */ -typedef enum { +enum sctp_socket_type { SCTP_SOCKET_UDP = 0, SCTP_SOCKET_UDP_HIGH_BANDWIDTH, SCTP_SOCKET_TCP -} sctp_socket_type_t; +}; /* Per socket SCTP information. */ struct sctp_sock { /* inet_sock has to be the first member of sctp_sock */ struct inet_sock inet; /* What kind of a socket is this? */ - sctp_socket_type_t type; + enum sctp_socket_type type; /* PF_ family specific functions. */ struct sctp_pf *pf; -- cgit v1.2.3 From e2c3108ab25b4dbab3821e8b6084bfb73afb655c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:51 +0800 Subject: sctp: remove the typedef sctp_cmd_t This patch is to remove the typedef sctp_cmd_t, and replace with enum sctp_cmd in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 376cb78b6247..4e9e589b8f18 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -196,15 +196,15 @@ static inline sctp_arg_t SCTP_NULL(void) return retval; } -typedef struct { +struct sctp_cmd { sctp_arg_t obj; sctp_verb_t verb; -} sctp_cmd_t; +}; typedef struct { - sctp_cmd_t cmds[SCTP_MAX_NUM_COMMANDS]; - sctp_cmd_t *last_used_slot; - sctp_cmd_t *next_cmd; + struct sctp_cmd cmds[SCTP_MAX_NUM_COMMANDS]; + struct sctp_cmd *last_used_slot; + struct sctp_cmd *next_cmd; } sctp_cmd_seq_t; @@ -228,7 +228,7 @@ static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq) static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, sctp_arg_t obj) { - sctp_cmd_t *cmd = seq->last_used_slot - 1; + struct sctp_cmd *cmd = seq->last_used_slot - 1; BUG_ON(cmd < seq->cmds); @@ -240,7 +240,7 @@ static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, /* Return the next command structure in an sctp_cmd_seq. * Return NULL at the end of the sequence. */ -static inline sctp_cmd_t *sctp_next_cmd(sctp_cmd_seq_t *seq) +static inline struct sctp_cmd *sctp_next_cmd(sctp_cmd_seq_t *seq) { if (seq->next_cmd <= seq->last_used_slot) return NULL; -- cgit v1.2.3 From a85bbeb221d860097859f110ba1321f2b0653f07 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:52 +0800 Subject: sctp: remove the typedef sctp_cmd_seq_t This patch is to remove the typedef sctp_cmd_seq_t, and replace with struct sctp_cmd_seq in the places where it's using this typedef. Note that it doesn't fix many indents although it should, as sctp_disposition_t's removal would mess them up again. So better to fix them when removing sctp_disposition_t in the later patch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 14 +++++++------- include/net/sctp/sm.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 4e9e589b8f18..cbf6798866ef 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -110,7 +110,7 @@ typedef enum { SCTP_CMD_LAST } sctp_verb_t; -/* How many commands can you put in an sctp_cmd_seq_t? +/* How many commands can you put in an struct sctp_cmd_seq? * This is a rather arbitrary number, ideally derived from a careful * analysis of the state functions, but in reality just taken from * thin air in the hopes othat we don't trigger a kernel panic. @@ -201,17 +201,17 @@ struct sctp_cmd { sctp_verb_t verb; }; -typedef struct { +struct sctp_cmd_seq { struct sctp_cmd cmds[SCTP_MAX_NUM_COMMANDS]; struct sctp_cmd *last_used_slot; struct sctp_cmd *next_cmd; -} sctp_cmd_seq_t; +}; /* Initialize a block of memory as a command sequence. * Return 0 if the initialization fails. */ -static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq) +static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq) { /* cmds[] is filled backwards to simplify the overflow BUG() check */ seq->last_used_slot = seq->cmds + SCTP_MAX_NUM_COMMANDS; @@ -220,12 +220,12 @@ static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq) } -/* Add a command to an sctp_cmd_seq_t. +/* Add a command to an struct sctp_cmd_seq. * * Use the SCTP_* constructors defined by SCTP_ARG_CONSTRUCTOR() above * to wrap data which goes in the obj argument. */ -static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, +static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, sctp_verb_t verb, sctp_arg_t obj) { struct sctp_cmd *cmd = seq->last_used_slot - 1; @@ -240,7 +240,7 @@ static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, /* Return the next command structure in an sctp_cmd_seq. * Return NULL at the end of the sequence. */ -static inline struct sctp_cmd *sctp_next_cmd(sctp_cmd_seq_t *seq) +static inline struct sctp_cmd *sctp_next_cmd(struct sctp_cmd_seq *seq) { if (seq->next_cmd <= seq->last_used_slot) return NULL; diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 1e7651c3b158..9af64b98d96b 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -75,7 +75,7 @@ typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, const struct sctp_association *, const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *); + struct sctp_cmd_seq *); typedef void (sctp_timer_event_t) (unsigned long); typedef struct { sctp_state_fn_t *fn; -- cgit v1.2.3 From c488b7704ed0eed18e11f9b685931558735f2a68 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:53 +0800 Subject: sctp: remove the typedef sctp_arg_t This patch is to remove the typedef sctp_arg_t, and replace with union sctp_arg in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index cbf6798866ef..f5fc425b5a4f 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -117,7 +117,7 @@ typedef enum { */ #define SCTP_MAX_NUM_COMMANDS 20 -typedef union { +union sctp_arg { void *zero_all; /* Set to NULL to clear the entire union */ __s32 i32; __u32 u32; @@ -137,24 +137,24 @@ typedef union { struct sctp_packet *packet; struct sctp_sackhdr *sackh; struct sctp_datamsg *msg; -} sctp_arg_t; +}; /* We are simulating ML type constructors here. * * SCTP_ARG_CONSTRUCTOR(NAME, TYPE, ELT) builds a function called * SCTP_NAME() which takes an argument of type TYPE and returns an - * sctp_arg_t. It does this by inserting the sole argument into the - * ELT union element of a local sctp_arg_t. + * union sctp_arg. It does this by inserting the sole argument into + * the ELT union element of a local union sctp_arg. * * E.g., SCTP_ARG_CONSTRUCTOR(I32, __s32, i32) builds SCTP_I32(arg), - * which takes an __s32 and returns a sctp_arg_t containing the + * which takes an __s32 and returns a union sctp_arg containing the * __s32. So, after foo = SCTP_I32(arg), foo.i32 == arg. */ #define SCTP_ARG_CONSTRUCTOR(name, type, elt) \ -static inline sctp_arg_t \ +static inline union sctp_arg \ SCTP_## name (type arg) \ -{ sctp_arg_t retval;\ +{ union sctp_arg retval;\ retval.zero_all = NULL;\ retval.elt = arg;\ return retval;\ @@ -179,25 +179,25 @@ SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet) SCTP_ARG_CONSTRUCTOR(SACKH, struct sctp_sackhdr *, sackh) SCTP_ARG_CONSTRUCTOR(DATAMSG, struct sctp_datamsg *, msg) -static inline sctp_arg_t SCTP_FORCE(void) +static inline union sctp_arg SCTP_FORCE(void) { return SCTP_I32(1); } -static inline sctp_arg_t SCTP_NOFORCE(void) +static inline union sctp_arg SCTP_NOFORCE(void) { return SCTP_I32(0); } -static inline sctp_arg_t SCTP_NULL(void) +static inline union sctp_arg SCTP_NULL(void) { - sctp_arg_t retval; + union sctp_arg retval; retval.zero_all = NULL; return retval; } struct sctp_cmd { - sctp_arg_t obj; + union sctp_arg obj; sctp_verb_t verb; }; @@ -226,7 +226,7 @@ static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq) * to wrap data which goes in the obj argument. */ static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, sctp_verb_t verb, - sctp_arg_t obj) + union sctp_arg obj) { struct sctp_cmd *cmd = seq->last_used_slot - 1; -- cgit v1.2.3 From e08af95df1130883762b388a19bb150ae5d16c09 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:54 +0800 Subject: sctp: remove the typedef sctp_verb_t This patch is to remove the typedef sctp_verb_t, and replace with enum sctp_verb in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index f5fc425b5a4f..b55c6a48a206 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -40,7 +40,7 @@ #include -typedef enum { +enum sctp_verb { SCTP_CMD_NOP = 0, /* Do nothing. */ SCTP_CMD_NEW_ASOC, /* Register a new association. */ SCTP_CMD_DELETE_TCB, /* Delete the current association. */ @@ -108,7 +108,7 @@ typedef enum { SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/ SCTP_CMD_SET_ASOC, /* Restore association context */ SCTP_CMD_LAST -} sctp_verb_t; +}; /* How many commands can you put in an struct sctp_cmd_seq? * This is a rather arbitrary number, ideally derived from a careful @@ -198,7 +198,7 @@ static inline union sctp_arg SCTP_NULL(void) struct sctp_cmd { union sctp_arg obj; - sctp_verb_t verb; + enum sctp_verb verb; }; struct sctp_cmd_seq { @@ -225,8 +225,8 @@ static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq) * Use the SCTP_* constructors defined by SCTP_ARG_CONSTRUCTOR() above * to wrap data which goes in the obj argument. */ -static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, sctp_verb_t verb, - union sctp_arg obj) +static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, + enum sctp_verb verb, union sctp_arg obj) { struct sctp_cmd *cmd = seq->last_used_slot - 1; -- cgit v1.2.3 From eb662a6a9b2a4ee3c76bbfc4c23f4dc56859b0f3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:55 +0800 Subject: sctp: remove the unused typedef sctp_sm_command_t Remove this typedef including the struct, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 9af64b98d96b..3ca75a6c4b5e 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -65,11 +65,6 @@ typedef enum { SCTP_DISPOSITION_BUG, /* This is a bug. */ } sctp_disposition_t; -typedef struct { - int name; - int action; -} sctp_sm_command_t; - typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, const struct sctp_endpoint *, const struct sctp_association *, -- cgit v1.2.3 From 8ee821aea39c6bf4142c9319adecea6d3e1af4a2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:56 +0800 Subject: sctp: remove the typedef sctp_sm_table_entry_t This patch is to remove the typedef sctp_sm_table_entry_t, and replace with struct sctp_sm_table_entry in the places where it's using this typedef. It is also to fix some indents. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 3ca75a6c4b5e..7ad240228a0f 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -72,10 +72,10 @@ typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, void *arg, struct sctp_cmd_seq *); typedef void (sctp_timer_event_t) (unsigned long); -typedef struct { +struct sctp_sm_table_entry { sctp_state_fn_t *fn; const char *name; -} sctp_sm_table_entry_t; +}; /* A naming convention of "sctp_sf_xxx" applies to all the state functions * currently in use. @@ -170,7 +170,7 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); -const sctp_sm_table_entry_t *sctp_sm_lookup_event( +const struct sctp_sm_table_entry *sctp_sm_lookup_event( struct net *net, enum sctp_event event_type, enum sctp_state state, -- cgit v1.2.3 From 172a1599ba88df7147f6503a75686fb89c8a1f3f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:57 +0800 Subject: sctp: remove the typedef sctp_disposition_t This patch is to remove the typedef sctp_disposition_t, and replace with enum sctp_disposition in the places where it's using this typedef. It's also to fix the indent for many functions' defination. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 7ad240228a0f..33077f317995 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -53,7 +53,7 @@ /* * Possible values for the disposition are: */ -typedef enum { +enum sctp_disposition { SCTP_DISPOSITION_DISCARD, /* No further processing. */ SCTP_DISPOSITION_CONSUME, /* Process return values normally. */ SCTP_DISPOSITION_NOMEM, /* We ran out of memory--recover. */ @@ -63,14 +63,15 @@ typedef enum { SCTP_DISPOSITION_NOT_IMPL, /* This entry is not implemented. */ SCTP_DISPOSITION_ERROR, /* This is plain old user error. */ SCTP_DISPOSITION_BUG, /* This is a bug. */ -} sctp_disposition_t; - -typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, - const struct sctp_endpoint *, - const struct sctp_association *, - const union sctp_subtype type, - void *arg, - struct sctp_cmd_seq *); +}; + +typedef enum sctp_disposition (sctp_state_fn_t) ( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands); typedef void (sctp_timer_event_t) (unsigned long); struct sctp_sm_table_entry { sctp_state_fn_t *fn; -- cgit v1.2.3 From 327c0dab8d1301cd866816de8c7f32eb872cabac Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:58 +0800 Subject: sctp: fix some indents in sm_make_chunk.c There are some bad indents of functions' defination in sm_make_chunk.c. They have been there since beginning, it was probably caused by that the typedef sctp_chunk_t was replaced with struct sctp_chunk. So it's the best time to fix them in this patchset, it's also to fix some bad indents in other functions' defination in sm_make_chunk.c. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 158 +++++++++++++++++++++++++------------------------- 1 file changed, 79 insertions(+), 79 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 33077f317995..2db3d3a9ce1d 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -184,68 +184,69 @@ __u32 sctp_generate_verification_tag(void); void sctp_populate_tie_tags(__u8 *cookie, __u32 curTag, __u32 hisTag); /* Prototypes for chunk-building functions. */ -struct sctp_chunk *sctp_make_init(const struct sctp_association *, - const struct sctp_bind_addr *, - gfp_t gfp, int vparam_len); -struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *, - const struct sctp_chunk *, - const gfp_t gfp, - const int unkparam_len); -struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *, - const struct sctp_chunk *); -struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *, - const struct sctp_chunk *); -struct sctp_chunk *sctp_make_cwr(const struct sctp_association *, +struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, + const struct sctp_bind_addr *bp, + gfp_t gfp, int vparam_len); +struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const gfp_t gfp, const int unkparam_len); +struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc, + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc, + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc, const __u32 lowest_tsn, - const struct sctp_chunk *); -struct sctp_chunk * sctp_make_datafrag_empty(struct sctp_association *, - const struct sctp_sndrcvinfo *sinfo, - int len, const __u8 flags, - __u16 ssn, gfp_t gfp); -struct sctp_chunk *sctp_make_ecne(const struct sctp_association *, - const __u32); -struct sctp_chunk *sctp_make_sack(const struct sctp_association *); + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, + const struct sctp_sndrcvinfo *sinfo, + int len, const __u8 flags, + __u16 ssn, gfp_t gfp); +struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, + const __u32 lowest_tsn); +struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc); struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc, - const struct sctp_chunk *); -struct sctp_chunk *sctp_make_shutdown_complete(const struct sctp_association *, - const struct sctp_chunk *); -void sctp_init_cause(struct sctp_chunk *, __be16 cause, size_t); -struct sctp_chunk *sctp_make_abort(const struct sctp_association *, - const struct sctp_chunk *, - const size_t hint); -struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *, - const struct sctp_chunk *, - __u32 tsn); -struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *, - struct msghdr *, size_t msg_len); -struct sctp_chunk *sctp_make_abort_violation(const struct sctp_association *, - const struct sctp_chunk *, - const __u8 *, - const size_t ); -struct sctp_chunk *sctp_make_violation_paramlen(const struct sctp_association *, - const struct sctp_chunk *, - struct sctp_paramhdr *); -struct sctp_chunk *sctp_make_violation_max_retrans(const struct sctp_association *, - const struct sctp_chunk *); -struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *, - const struct sctp_transport *); -struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *, - const struct sctp_chunk *, - const void *payload, - const size_t paylen); -struct sctp_chunk *sctp_make_op_error(const struct sctp_association *, - const struct sctp_chunk *chunk, - __be16 cause_code, - const void *payload, - size_t paylen, - size_t reserve_tail); - -struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *, - union sctp_addr *, - struct sockaddr *, - int, __be16); + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_shutdown_complete( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk); +void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen); +struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const size_t hint); +struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + __u32 tsn); +struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc, + struct msghdr *msg, size_t msg_len); +struct sctp_chunk *sctp_make_abort_violation( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const __u8 *payload, + const size_t paylen); +struct sctp_chunk *sctp_make_violation_paramlen( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + struct sctp_paramhdr *param); +struct sctp_chunk *sctp_make_violation_max_retrans( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, + const struct sctp_transport *transport); +struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const void *payload, + const size_t paylen); +struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + __be16 cause_code, const void *payload, + size_t paylen, size_t reserve_tail); + +struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, + union sctp_addr *laddr, + struct sockaddr *addrs, + int addrcnt, __be16 flags); struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc, union sctp_addr *addr); bool sctp_verify_asconf(const struct sctp_association *asoc, @@ -259,27 +260,25 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, __u32 new_cum_tsn, size_t nstreams, struct sctp_fwdtsn_skip *skiplist); struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc); -struct sctp_chunk *sctp_make_strreset_req( - const struct sctp_association *asoc, - __u16 stream_num, __u16 *stream_list, - bool out, bool in); +struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc, + __u16 stream_num, __u16 *stream_list, + bool out, bool in); struct sctp_chunk *sctp_make_strreset_tsnreq( - const struct sctp_association *asoc); + const struct sctp_association *asoc); struct sctp_chunk *sctp_make_strreset_addstrm( - const struct sctp_association *asoc, - __u16 out, __u16 in); -struct sctp_chunk *sctp_make_strreset_resp( - const struct sctp_association *asoc, - __u32 result, __u32 sn); -struct sctp_chunk *sctp_make_strreset_tsnresp( - struct sctp_association *asoc, - __u32 result, __u32 sn, - __u32 sender_tsn, __u32 receiver_tsn); + const struct sctp_association *asoc, + __u16 out, __u16 in); +struct sctp_chunk *sctp_make_strreset_resp(const struct sctp_association *asoc, + __u32 result, __u32 sn); +struct sctp_chunk *sctp_make_strreset_tsnresp(struct sctp_association *asoc, + __u32 result, __u32 sn, + __u32 sender_tsn, + __u32 receiver_tsn); bool sctp_verify_reconf(const struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_paramhdr **errp); -void sctp_chunk_assign_tsn(struct sctp_chunk *); -void sctp_chunk_assign_ssn(struct sctp_chunk *); +void sctp_chunk_assign_tsn(struct sctp_chunk *chunk); +void sctp_chunk_assign_ssn(struct sctp_chunk *chunk); /* Prototypes for stream-processing functions. */ struct sctp_chunk *sctp_process_strreset_outreq( @@ -322,11 +321,12 @@ void sctp_generate_proto_unreach_event(unsigned long peer); void sctp_ootb_pkt_free(struct sctp_packet *packet); -struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - struct sctp_chunk *chunk, - gfp_t gfp, int *err, - struct sctp_chunk **err_chk_p); +struct sctp_association *sctp_unpack_cookie( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + struct sctp_chunk *chunk, + gfp_t gfp, int *err, + struct sctp_chunk **err_chk_p); /* 3rd level prototypes */ __u32 sctp_generate_tag(const struct sctp_endpoint *ep); -- cgit v1.2.3 From 861932ecc36063196c80ca3e240502b0f6d0e977 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 9 Aug 2017 14:30:31 +0200 Subject: net: sched: Add helpers to identify classids Offloading drivers need to understand what qdisc class a filter is added to. Currently they only need to identify ingress, clsact->ingress and clsact->egress. So provide these helpers. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2579c209ea51..259bc191ba59 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -5,6 +5,7 @@ #include #include #include +#include #define DEFAULT_TX_QUEUE_LEN 1000 @@ -132,4 +133,17 @@ static inline unsigned int psched_mtu(const struct net_device *dev) return dev->mtu + dev->hard_header_len; } +static inline bool is_classid_clsact_ingress(u32 classid) +{ + /* This also returns true for ingress qdisc */ + return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) && + TC_H_MIN(classid) != TC_H_MIN(TC_H_MIN_EGRESS); +} + +static inline bool is_classid_clsact_egress(u32 classid) +{ + return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) && + TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS); +} + #endif -- cgit v1.2.3 From 7690f2a51d8afe51ac97e7fae66b081f192a7158 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 9 Aug 2017 14:30:32 +0200 Subject: net: sched: propagate classid down to offload drivers Drivers need classid to decide they support this specific qdisc+class or not. So propagate it down via the tc_cls_common_offload struct. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0f78e6560b2d..1f1de20e584f 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -410,6 +410,7 @@ struct tc_cls_common_offload { u32 chain_index; __be16 protocol; u32 prio; + u32 classid; }; static inline void @@ -420,6 +421,7 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; cls_common->prio = tp->prio; + cls_common->classid = tp->classid; } struct tc_cls_u32_knode { -- cgit v1.2.3 From 237f79d24ebe1eb9b5651b7342ba5cc9d9b8f222 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 9 Aug 2017 14:30:34 +0200 Subject: net: sched: remove handle propagation down to the drivers There is no longer need to use handle in drivers, so remove it from tc_cls_common_offload struct. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 1f1de20e584f..bd9dd79357fe 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -406,7 +406,6 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) #endif /* CONFIG_NET_CLS_IND */ struct tc_cls_common_offload { - u32 handle; u32 chain_index; __be16 protocol; u32 prio; @@ -417,7 +416,6 @@ static inline void tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, const struct tcf_proto *tp) { - cls_common->handle = tp->q->handle; cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; cls_common->prio = tp->prio; -- cgit v1.2.3 From 7b06e8aed283081010596c98a67f06c595affe51 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 9 Aug 2017 14:30:35 +0200 Subject: net: sched: remove cops->tcf_cl_offload cops->tcf_cl_offload is no longer needed, as the drivers check what they can and cannot offload using the classid identify helpers. So remove this. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 14 +++----------- include/net/sch_generic.h | 1 - 2 files changed, 3 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index bd9dd79357fe..e80edd8879ef 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -457,19 +457,12 @@ struct tc_cls_u32_offload { }; }; -static inline bool tc_can_offload(const struct net_device *dev, - const struct tcf_proto *tp) +static inline bool tc_can_offload(const struct net_device *dev) { - const struct Qdisc *sch = tp->q; - const struct Qdisc_class_ops *cops = sch->ops->cl_ops; - if (!(dev->features & NETIF_F_HW_TC)) return false; if (!dev->netdev_ops->ndo_setup_tc) return false; - if (cops && cops->tcf_cl_offload) - return cops->tcf_cl_offload(tp->classid); - return true; } @@ -478,12 +471,11 @@ static inline bool tc_skip_hw(u32 flags) return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false; } -static inline bool tc_should_offload(const struct net_device *dev, - const struct tcf_proto *tp, u32 flags) +static inline bool tc_should_offload(const struct net_device *dev, u32 flags) { if (tc_skip_hw(flags)) return false; - return tc_can_offload(dev, tp); + return tc_can_offload(dev); } static inline bool tc_skip_sw(u32 flags) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e79f5ad1c5f3..5865db91976b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -156,7 +156,6 @@ struct Qdisc_class_ops { /* Filter manipulation */ struct tcf_block * (*tcf_block)(struct Qdisc *, unsigned long); - bool (*tcf_cl_offload)(u32 classid); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); void (*unbind_tcf)(struct Qdisc *, unsigned long); -- cgit v1.2.3 From ad729bc9acfb7c47112964b4877ef5404578ed13 Mon Sep 17 00:00:00 2001 From: Andreas Born Date: Thu, 10 Aug 2017 06:41:44 +0200 Subject: bonding: require speed/duplex only for 802.3ad, alb and tlb The patch c4adfc822bf5 ("bonding: make speed, duplex setting consistent with link state") puts the link state to down if bond_update_speed_duplex() cannot retrieve speed and duplex settings. Assumably the patch was written with 802.3ad mode in mind which relies on link speed/duplex settings. For other modes like active-backup these settings are not required. Thus, only for these other modes, this patch reintroduces support for slaves that do not support reporting speed or duplex such as wireless devices. This fixes the regression reported in bug 196547 (https://bugzilla.kernel.org/show_bug.cgi?id=196547). Fixes: c4adfc822bf5 ("bonding: make speed, duplex setting consistent with link state") Signed-off-by: Andreas Born Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- include/net/bonding.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/bonding.h b/include/net/bonding.h index b00508d22e0a..b2e68657a216 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -277,6 +277,11 @@ static inline bool bond_is_lb(const struct bonding *bond) BOND_MODE(bond) == BOND_MODE_ALB; } +static inline bool bond_needs_speed_duplex(const struct bonding *bond) +{ + return BOND_MODE(bond) == BOND_MODE_8023AD || bond_is_lb(bond); +} + static inline bool bond_is_nondyn_tlb(const struct bonding *bond) { return (BOND_MODE(bond) == BOND_MODE_TLB) && -- cgit v1.2.3 From e4dde4127396f0c8f1c2e11b3ecc5baf4f8628bf Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 11 Aug 2017 18:31:24 +0200 Subject: net: fix compilation when busy poll is not enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIN_NAPI_ID is used in various places outside of CONFIG_NET_RX_BUSY_POLL wrapping, so when it's not set we run into build errors such as: net/core/dev.c: In function 'dev_get_by_napi_id': net/core/dev.c:886:16: error: ‘MIN_NAPI_ID’ undeclared (first use in this function) if (napi_id < MIN_NAPI_ID) ^~~~~~~~~~~ Thus, have MIN_NAPI_ID always defined to fix these errors. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/busy_poll.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 8ffd434676b7..71c72a939bf8 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -29,18 +29,18 @@ #include #include -#ifdef CONFIG_NET_RX_BUSY_POLL - -struct napi_struct; -extern unsigned int sysctl_net_busy_read __read_mostly; -extern unsigned int sysctl_net_busy_poll __read_mostly; - /* 0 - Reserved to indicate value not set * 1..NR_CPUS - Reserved for sender_cpu * NR_CPUS+1..~0 - Region available for NAPI IDs */ #define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1)) +#ifdef CONFIG_NET_RX_BUSY_POLL + +struct napi_struct; +extern unsigned int sysctl_net_busy_read __read_mostly; +extern unsigned int sysctl_net_busy_poll __read_mostly; + static inline bool net_busy_loop_on(void) { return sysctl_net_busy_poll; -- cgit v1.2.3 From fd851ba9caa9a63fdbb72a2e6ed5560c0989e999 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Aug 2017 10:48:53 -0700 Subject: udp: harden copy_linear_skb() syzkaller got crashes with CONFIG_HARDENED_USERCOPY=y configs. Issue here is that recvfrom() can be used with user buffer of Z bytes, and SO_PEEK_OFF of X bytes, from a skb with Y bytes, and following condition : Z < X < Y kernel BUG at mm/usercopy.c:72! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 2917 Comm: syzkaller842281 Not tainted 4.13.0-rc3+ #16 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801d2fa40c0 task.stack: ffff8801d1fe8000 RIP: 0010:report_usercopy mm/usercopy.c:64 [inline] RIP: 0010:__check_object_size+0x3ad/0x500 mm/usercopy.c:264 RSP: 0018:ffff8801d1fef8a8 EFLAGS: 00010286 RAX: 0000000000000078 RBX: ffffffff847102c0 RCX: 0000000000000000 RDX: 0000000000000078 RSI: 1ffff1003a3fded5 RDI: ffffed003a3fdf09 RBP: ffff8801d1fef998 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801d1ea480e R13: fffffffffffffffa R14: ffffffff84710280 R15: dffffc0000000000 FS: 0000000001360880(0000) GS:ffff8801dc000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000202ecfe4 CR3: 00000001d1ff8000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: check_object_size include/linux/thread_info.h:108 [inline] check_copy_size include/linux/thread_info.h:139 [inline] copy_to_iter include/linux/uio.h:105 [inline] copy_linear_skb include/net/udp.h:371 [inline] udpv6_recvmsg+0x1040/0x1af0 net/ipv6/udp.c:395 inet_recvmsg+0x14c/0x5f0 net/ipv4/af_inet.c:793 sock_recvmsg_nosec net/socket.c:792 [inline] sock_recvmsg+0xc9/0x110 net/socket.c:799 SYSC_recvfrom+0x2d6/0x570 net/socket.c:1788 SyS_recvfrom+0x40/0x50 net/socket.c:1760 entry_SYSCALL_64_fastpath+0x1f/0xbe Fixes: b65ac44674dd ("udp: try to avoid 2 cache miss on dequeue") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Signed-off-by: David S. Miller --- include/net/udp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index cc8036987dcb..e9b1d1eacb59 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -368,6 +368,8 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, { int n, copy = len - off; + if (copy < 0) + return -EINVAL; n = copy_to_iter(skb->data + off, copy, to); if (n == copy) return 0; -- cgit v1.2.3 From 42b7305905be52e467bbc346b0f2f95ad44eb1a0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 14 Aug 2017 21:31:38 +0200 Subject: udp: fix linear skb reception with PEEK_OFF copy_linear_skb() is broken; both of its callers actually expect 'len' to be the amount we are trying to copy, not the offset of the end. Fix it keeping the meanings of arguments in sync with what the callers (both of them) expect. Also restore a saner behavior on EFAULT (i.e. preserving the iov_iter position in case of failure): The commit fd851ba9caa9 ("udp: harden copy_linear_skb()") avoids the more destructive effect of the buggy copy_linear_skb(), e.g. no more invalid memory access, but said function still behaves incorrectly: when peeking with offset it can fail with EINVAL instead of copying the appropriate amount of memory. Reported-by: Sasha Levin Fixes: b65ac44674dd ("udp: try to avoid 2 cache miss on dequeue") Fixes: fd851ba9caa9 ("udp: harden copy_linear_skb()") Signed-off-by: Al Viro Acked-by: Paolo Abeni Tested-by: Sasha Levin Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/udp.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index e9b1d1eacb59..586de4b811b5 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -366,14 +366,13 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb) static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, struct iov_iter *to) { - int n, copy = len - off; + int n; - if (copy < 0) - return -EINVAL; - n = copy_to_iter(skb->data + off, copy, to); - if (n == copy) + n = copy_to_iter(skb->data + off, len, to); + if (n == len) return 0; + iov_iter_revert(to, n); return -EFAULT; } -- cgit v1.2.3 From fe4007999599c02598c17b643e8de43e487d48e8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 15 Aug 2017 09:09:49 +0200 Subject: ipv6: fib: Provide offload indication using nexthop flags IPv6 routes currently lack nexthop flags as in IPv4. This has several implications. In the forwarding path, it requires us to check the carrier state of the nexthop device and potentially ignore a linkdown route, instead of checking for RTNH_F_LINKDOWN. It also requires capable drivers to use the user facing IPv6-specific route flags to provide offload indication, instead of using the nexthop flags as in IPv4. Add nexthop flags to IPv6 routes in the 40 bytes hole and use it to provide offload indication instead of the RTF_OFFLOAD flag, which is removed while it's still not part of any official kernel release. In the near future we would like to use the field for the RTNH_F_{LINKDOWN,DEAD} flags, but this change is more involved and might not be ready in time for the current cycle. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1d790ea40ea7..71c1646298ae 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -120,6 +120,8 @@ struct rt6_info { atomic_t rt6i_ref; + unsigned int rt6i_nh_flags; + /* These are in a separate cache line. */ struct rt6key rt6i_dst ____cacheline_aligned_in_smp; u32 rt6i_flags; -- cgit v1.2.3 From 12d94a804946af291e24b80fc53ec86264765781 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Aug 2017 04:09:51 -0700 Subject: ipv6: fix NULL dereference in ip6_route_dev_notify() Based on a syzkaller report [1], I found that a per cpu allocation failure in snmp6_alloc_dev() would then lead to NULL dereference in ip6_route_dev_notify(). It seems this is a very old bug, thus no Fixes tag in this submission. Let's add in6_dev_put_clear() helper, as we will probably use it elsewhere (once available/present in net-next) [1] kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 17294 Comm: syz-executor6 Not tainted 4.13.0-rc2+ #10 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff88019f456680 task.stack: ffff8801c6e58000 RIP: 0010:__read_once_size include/linux/compiler.h:250 [inline] RIP: 0010:atomic_read arch/x86/include/asm/atomic.h:26 [inline] RIP: 0010:refcount_sub_and_test+0x7d/0x1b0 lib/refcount.c:178 RSP: 0018:ffff8801c6e5f1b0 EFLAGS: 00010202 RAX: 0000000000000037 RBX: dffffc0000000000 RCX: ffffc90005d25000 RDX: ffff8801c6e5f218 RSI: ffffffff82342bbf RDI: 0000000000000001 RBP: ffff8801c6e5f240 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff10038dcbe37 R13: 0000000000000006 R14: 0000000000000001 R15: 00000000000001b8 FS: 00007f21e0429700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001ddbc22000 CR3: 00000001d632b000 CR4: 00000000001426e0 DR0: 0000000020000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Call Trace: refcount_dec_and_test+0x1a/0x20 lib/refcount.c:211 in6_dev_put include/net/addrconf.h:335 [inline] ip6_route_dev_notify+0x1c9/0x4a0 net/ipv6/route.c:3732 notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x51/0x90 net/core/dev.c:1678 call_netdevice_notifiers net/core/dev.c:1694 [inline] rollback_registered_many+0x91c/0xe80 net/core/dev.c:7107 rollback_registered+0x1be/0x3c0 net/core/dev.c:7149 register_netdevice+0xbcd/0xee0 net/core/dev.c:7587 register_netdev+0x1a/0x30 net/core/dev.c:7669 loopback_net_init+0x76/0x160 drivers/net/loopback.c:214 ops_init+0x10a/0x570 net/core/net_namespace.c:118 setup_net+0x313/0x710 net/core/net_namespace.c:294 copy_net_ns+0x27c/0x580 net/core/net_namespace.c:418 create_new_namespaces+0x425/0x880 kernel/nsproxy.c:107 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:206 SYSC_unshare kernel/fork.c:2347 [inline] SyS_unshare+0x653/0xfa0 kernel/fork.c:2297 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x4512c9 RSP: 002b:00007f21e0428c08 EFLAGS: 00000216 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 0000000000718150 RCX: 00000000004512c9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000062020200 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004b973d R13: 00000000ffffffff R14: 000000002001d000 R15: 00000000000002dd Code: 50 2b 34 82 c7 00 f1 f1 f1 f1 c7 40 04 04 f2 f2 f2 c7 40 08 f3 f3 f3 f3 e8 a1 43 39 ff 4c 89 f8 48 8b 95 70 ff ff ff 48 c1 e8 03 <0f> b6 0c 18 4c 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85 RIP: __read_once_size include/linux/compiler.h:250 [inline] RSP: ffff8801c6e5f1b0 RIP: atomic_read arch/x86/include/asm/atomic.h:26 [inline] RSP: ffff8801c6e5f1b0 RIP: refcount_sub_and_test+0x7d/0x1b0 lib/refcount.c:178 RSP: ffff8801c6e5f1b0 ---[ end trace e441d046c6410d31 ]--- Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- include/net/addrconf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 6df79e96a780..f44ff2476758 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -336,6 +336,16 @@ static inline void in6_dev_put(struct inet6_dev *idev) in6_dev_finish_destroy(idev); } +static inline void in6_dev_put_clear(struct inet6_dev **pidev) +{ + struct inet6_dev *idev = *pidev; + + if (idev) { + in6_dev_put(idev); + *pidev = NULL; + } +} + static inline void __in6_dev_put(struct inet6_dev *idev) { refcount_dec(&idev->refcnt); -- cgit v1.2.3 From c780a049f9bf442314335372c9abc4548bfe3e44 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2017 11:09:12 -0700 Subject: ipv4: better IP_MAX_MTU enforcement While working on yet another syzkaller report, I found that our IP_MAX_MTU enforcements were not properly done. gcc seems to reload dev->mtu for min(dev->mtu, IP_MAX_MTU), and final result can be bigger than IP_MAX_MTU :/ This is a problem because device mtu can be changed on other cpus or threads. While this patch does not fix the issue I am working on, it is probably worth addressing it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 821cedcc8e73..0cf7f5a65fe6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -352,7 +352,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, !forwarding) return dst_mtu(dst); - return min(dst->dev->mtu, IP_MAX_MTU); + return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); } static inline unsigned int ip_skb_dst_mtu(struct sock *sk, @@ -364,7 +364,7 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); } - return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU); + return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); } u32 ip_idents_reserve(u32 hash, int segs); -- cgit v1.2.3 From a0917e0bc6efc05834c0c1eafebd579a9c75e6e9 Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Fri, 18 Aug 2017 15:04:54 -0400 Subject: datagram: When peeking datagrams with offset < 0 don't skip empty skbs Due to commit e6afc8ace6dd5cef5e812f26c72579da8806f5ac ("udp: remove headers from UDP packets before queueing"), when udp packets are being peeked the requested extra offset is always 0 as there is no need to skip the udp header. However, when the offset is 0 and the next skb is of length 0, it is only returned once. The behaviour can be seen with the following python script: from socket import *; f=socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK, 0); g=socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK, 0); f.bind(('::', 0)); addr=('::1', f.getsockname()[1]); g.sendto(b'', addr) g.sendto(b'b', addr) print(f.recvfrom(10, MSG_PEEK)); print(f.recvfrom(10, MSG_PEEK)); Where the expected output should be the empty string twice. Instead, make sk_peek_offset return negative values, and pass those values to __skb_try_recv_datagram/__skb_try_recv_from_queue. If the passed offset to __skb_try_recv_from_queue is negative, the checked skb is never skipped. __skb_try_recv_from_queue will then ensure the offset is reset back to 0 if a peek is requested without an offset, unless no packets are found. Also simplify the if condition in __skb_try_recv_from_queue. If _off is greater then 0, and off is greater then or equal to skb->len, then (_off || skb->len) must always be true assuming skb->len >= 0 is always true. Also remove a redundant check around a call to sk_peek_offset in af_unix.c, as it double checked if MSG_PEEK was set in the flags. V2: - Moved the negative fixup into __skb_try_recv_from_queue, and remove now redundant checks - Fix peeking in udp{,v6}_recvmsg to report the right value when the offset is 0 V3: - Marked new branch in __skb_try_recv_from_queue as unlikely. Signed-off-by: Matthew Dawson Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 7c0632c7e870..aeeec62992ca 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -507,9 +507,7 @@ int sk_set_peek_off(struct sock *sk, int val); static inline int sk_peek_offset(struct sock *sk, int flags) { if (unlikely(flags & MSG_PEEK)) { - s32 off = READ_ONCE(sk->sk_peek_off); - if (off >= 0) - return off; + return READ_ONCE(sk->sk_peek_off); } return 0; -- cgit v1.2.3 From 9620fef27ed2cdb37bf6fd028f32bea2ef5119a8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Aug 2017 12:08:07 -0700 Subject: ipv4: convert dst_metrics.refcnt from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index f73611ec4017..93568bd0a352 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -107,7 +108,7 @@ struct dst_entry { struct dst_metrics { u32 metrics[RTAX_MAX]; - atomic_t refcnt; + refcount_t refcnt; }; extern const struct dst_metrics dst_default_metrics; -- cgit v1.2.3 From 68a66d149a8c78ec6720f268597302883e48e9fa Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sat, 19 Aug 2017 15:37:07 +0300 Subject: net_sched: fix order of queue length updates in qdisc_replace() This important to call qdisc_tree_reduce_backlog() after changing queue length. Parent qdisc should deactivate class in ->qlen_notify() called from qdisc_tree_reduce_backlog() but this happens only if qdisc->q.qlen in zero. Missed class deactivations leads to crashes/warnings at picking packets from empty qdisc and corrupting state at reactivating this class in future. Signed-off-by: Konstantin Khlebnikov Fixes: 86a7996cc8a0 ("net_sched: introduce qdisc_replace() helper") Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1c123e2b2415..67f815e5d525 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -806,8 +806,11 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, old = *pold; *pold = new; if (old != NULL) { - qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog); + unsigned int qlen = old->q.qlen; + unsigned int backlog = old->qstats.backlog; + qdisc_reset(old); + qdisc_tree_reduce_backlog(old, qlen, backlog); } sch_tree_unlock(sch); -- cgit v1.2.3 From 89e49506bc62520f93e64a278293444319a6aebb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 17 Aug 2017 16:47:00 +0200 Subject: dsa: remove unused net_device arg from handlers compile tested only, but saw no warnings/errors with allmodconfig build. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/dsa.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 7f46b521313e..398ca8d70ccd 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -104,8 +104,7 @@ struct packet_type; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev); + struct packet_type *pt); int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); }; @@ -134,8 +133,7 @@ struct dsa_switch_tree { /* Copy of tag_ops->rcv for faster access in hot path */ struct sk_buff * (*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev); + struct packet_type *pt); /* * The switch port to which the CPU is attached. -- cgit v1.2.3 From 7d3f0cd43feea1636dd7746f22fe8249b34d1b79 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 18 Aug 2017 15:23:24 +0800 Subject: net: sched: Add the invalid handle check in qdisc_class_find Add the invalid handle "0" check to avoid unnecessary search, because the qdisc uses the skb->priority as the handle value to look up, and it is "0" usually. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 5865db91976b..107c52432245 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -393,6 +393,9 @@ qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id) struct Qdisc_class_common *cl; unsigned int h; + if (!id) + return NULL; + h = qdisc_class_hash(id, hash->hashmask); hlist_for_each_entry(cl, &hash->hash[h], hnode) { if (cl->classid == id) -- cgit v1.2.3 From c5cff8561d2d0006e972bd114afd51f082fee77c Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 21 Aug 2017 09:47:10 -0700 Subject: ipv6: add rcu grace period before freeing fib6_node We currently keep rt->rt6i_node pointing to the fib6_node for the route. And some functions make use of this pointer to dereference the fib6_node from rt structure, e.g. rt6_check(). However, as there is neither refcount nor rcu taken when dereferencing rt->rt6i_node, it could potentially cause crashes as rt->rt6i_node could be set to NULL by other CPUs when doing a route deletion. This patch introduces an rcu grace period before freeing fib6_node and makes sure the functions that dereference it takes rcu_read_lock(). Note: there is no "Fixes" tag because this bug was there in a very early stage. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1a88008cc6f5..e9c59db92942 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -70,6 +70,7 @@ struct fib6_node { __u16 fn_flags; int fn_sernum; struct rt6_info *rr_ptr; + struct rcu_head rcu; }; #ifndef CONFIG_IPV6_SUBTREES @@ -167,13 +168,40 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) rt0->rt6i_flags |= RTF_EXPIRES; } +/* Function to safely get fn->sernum for passed in rt + * and store result in passed in cookie. + * Return true if we can get cookie safely + * Return false if not + */ +static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, + u32 *cookie) +{ + struct fib6_node *fn; + bool status = false; + + rcu_read_lock(); + fn = rcu_dereference(rt->rt6i_node); + + if (fn) { + *cookie = fn->fn_sernum; + status = true; + } + + rcu_read_unlock(); + return status; +} + static inline u32 rt6_get_cookie(const struct rt6_info *rt) { + u32 cookie = 0; + if (rt->rt6i_flags & RTF_PCPU || (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) rt = (struct rt6_info *)(rt->dst.from); - return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + rt6_get_cookie_safe(rt, &cookie); + + return cookie; } static inline void ip6_rt_put(struct rt6_info *rt) -- cgit v1.2.3 From 111993692741a7044e6c01b428cecf1071de3d0b Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 21 Aug 2017 23:33:49 -0700 Subject: tcp: Remove the unused parameter for tcp_try_fastopen. Signed-off-by: Tonghao Zhang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index afdab3781425..a995004ae946 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1533,8 +1533,7 @@ int tcp_fastopen_reset_cipher(void *key, unsigned int len); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct tcp_fastopen_cookie *foc, - struct dst_entry *dst); + struct tcp_fastopen_cookie *foc); void tcp_fastopen_init_key_once(bool publish); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); -- cgit v1.2.3 From 84e54fe0a5eaed696dee4019c396f8396f5a908b Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 22 Aug 2017 09:40:28 -0700 Subject: gre: introduce native tunnel support for ERSPAN The patch adds ERSPAN type II tunnel support. The implementation is based on the draft at [1]. One of the purposes is for Linux box to be able to receive ERSPAN monitoring traffic sent from the Cisco switch, by creating a ERSPAN tunnel device. In addition, the patch also adds ERSPAN TX, so Linux virtual switch can redirect monitored traffic to the ERSPAN tunnel device. The traffic will be encapsulated into ERSPAN and sent out. The implementation reuses tunnel key as ERSPAN session ID, and field 'erspan' as ERSPAN Index fields: ./ip link add dev ers11 type erspan seq key 100 erspan 123 \ local 172.16.1.200 remote 172.16.1.100 To use the above device as ERSPAN receiver, configure Nexus 5000 switch as below: monitor session 100 type erspan-source erspan-id 123 vrf default destination ip 172.16.1.200 source interface Ethernet1/11 both source interface Ethernet1/12 both no shut monitor erspan origin ip-address 172.16.1.100 global [1] https://tools.ietf.org/html/draft-foschiano-erspan-01 [2] iproute2 patch: http://marc.info/?l=linux-netdev&m=150306086924951&w=2 [3] test script: http://marc.info/?l=linux-netdev&m=150231021807304&w=2 Signed-off-by: William Tu Signed-off-by: Meenakshi Vohra Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Signed-off-by: David S. Miller --- include/net/erspan.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++++ include/net/ip_tunnels.h | 3 +++ 2 files changed, 64 insertions(+) create mode 100644 include/net/erspan.h (limited to 'include/net') diff --git a/include/net/erspan.h b/include/net/erspan.h new file mode 100644 index 000000000000..ca94fc86865e --- /dev/null +++ b/include/net/erspan.h @@ -0,0 +1,61 @@ +#ifndef __LINUX_ERSPAN_H +#define __LINUX_ERSPAN_H + +/* + * GRE header for ERSPAN encapsulation (8 octets [34:41]) -- 8 bytes + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |0|0|0|1|0|00000|000000000|00000| Protocol Type for ERSPAN | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Sequence Number (increments per packet per session) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Note that in the above GRE header [RFC1701] out of the C, R, K, S, + * s, Recur, Flags, Version fields only S (bit 03) is set to 1. The + * other fields are set to zero, so only a sequence number follows. + * + * ERSPAN Type II header (8 octets [42:49]) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Ver | VLAN | COS | En|T| Session ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | Index | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB + */ + +#define ERSPAN_VERSION 0x1 + +#define VER_MASK 0xf000 +#define VLAN_MASK 0x0fff +#define COS_MASK 0xe000 +#define EN_MASK 0x1800 +#define T_MASK 0x0400 +#define ID_MASK 0x03ff +#define INDEX_MASK 0xfffff + +enum erspan_encap_type { + ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */ + ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */ + ERSPAN_ENCAP_8021Q = 0x2, /* originally 802.1Q encapsulated */ + ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */ +}; + +struct erspan_metadata { + __be32 index; /* type II */ +}; + +struct erspanhdr { + __be16 ver_vlan; +#define VER_OFFSET 12 + __be16 session_id; +#define COS_OFFSET 13 +#define EN_OFFSET 11 +#define T_OFFSET 10 + struct erspan_metadata md; +}; + +#endif diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 520809912f03..625c29329372 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -115,6 +115,9 @@ struct ip_tunnel { u32 o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ + /* This field used only by ERSPAN */ + u32 index; /* ERSPAN type II index */ + struct dst_cache dst_cache; struct ip_tunnel_parm parms; -- cgit v1.2.3 From 98aaa913b4ed250324429f0a9e6d5f77a3b5276c Mon Sep 17 00:00:00 2001 From: Mike Maloney Date: Tue, 22 Aug 2017 17:08:48 -0400 Subject: tcp: Extend SOF_TIMESTAMPING_RX_SOFTWARE to TCP recvmsg When SOF_TIMESTAMPING_RX_SOFTWARE is enabled for tcp sockets, return the timestamp corresponding to the highest sequence number data returned. Previously the skb->tstamp is overwritten when a TCP packet is placed in the out of order queue. While the packet is in the ooo queue, save the timestamp in the TCB_SKB_CB. This space is shared with the gso_* options which are only used on the tx path, and a previously unused 4 byte hole. When skbs are coalesced either in the sk_receive_queue or the out_of_order_queue always choose the timestamp of the appended skb to maintain the invariant of returning the timestamp of the last byte in the recvmsg buffer. Signed-off-by: Mike Maloney Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/tcp.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index a995004ae946..c614ff135b66 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -774,6 +774,12 @@ struct tcp_skb_cb { u16 tcp_gso_segs; u16 tcp_gso_size; }; + + /* Used to stash the receive timestamp while this skb is in the + * out of order queue, as skb->tstamp is overwritten by the + * rbnode. + */ + ktime_t swtstamp; }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ @@ -790,7 +796,8 @@ struct tcp_skb_cb { __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ __u8 txstamp_ack:1, /* Record TX timestamp for ack? */ eor:1, /* Is skb MSG_EOR marked? */ - unused:6; + has_rxtstamp:1, /* SKB has a RX timestamp */ + unused:5; __u32 ack_seq; /* Sequence number ACK'd */ union { struct { -- cgit v1.2.3 From f9cbe9a556afca9e82df9aebe4412d93769566b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20T=C3=A9nart?= Date: Wed, 23 Aug 2017 09:46:54 +0200 Subject: net: define the TSO header size in net/tso.h The TSO header size was defined in many drivers. Factorize the code and define its size in net/tso.h. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/net/tso.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/tso.h b/include/net/tso.h index b7be852bfe9d..9a56c39e6d0a 100644 --- a/include/net/tso.h +++ b/include/net/tso.h @@ -3,6 +3,8 @@ #include +#define TSO_HEADER_SIZE 128 + struct tso_t { int next_frag_idx; void *data; -- cgit v1.2.3 From e457d86ada27cbd2f46ded75d4b4bc06e26d0e2e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 23 Aug 2017 10:08:19 +0200 Subject: net: sched: add couple of goto_chain helpers Add helpers to find out if a gact instance is goto_chain termination action and to get chain index. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/tc_act/tc_gact.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index d576374c4d6f..41afe1ce7b16 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -15,7 +15,8 @@ struct tcf_gact { }; #define to_gact(a) ((struct tcf_gact *)a) -static inline bool __is_tcf_gact_act(const struct tc_action *a, int act) +static inline bool __is_tcf_gact_act(const struct tc_action *a, int act, + bool is_ext) { #ifdef CONFIG_NET_CLS_ACT struct tcf_gact *gact; @@ -24,7 +25,8 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act) return false; gact = to_gact(a); - if (gact->tcf_action == act) + if ((!is_ext && gact->tcf_action == act) || + (is_ext && TC_ACT_EXT_CMP(gact->tcf_action, act))) return true; #endif @@ -33,12 +35,22 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act) static inline bool is_tcf_gact_shot(const struct tc_action *a) { - return __is_tcf_gact_act(a, TC_ACT_SHOT); + return __is_tcf_gact_act(a, TC_ACT_SHOT, false); } static inline bool is_tcf_gact_trap(const struct tc_action *a) { - return __is_tcf_gact_act(a, TC_ACT_TRAP); + return __is_tcf_gact_act(a, TC_ACT_TRAP, false); +} + +static inline bool is_tcf_gact_goto_chain(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_GOTO_CHAIN, true); +} + +static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a) +{ + return a->goto_chain->index; } #endif /* __NET_TC_GACT_H */ -- cgit v1.2.3 From 1177009131bee310421f5c04c43d3777cbacbdc8 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 24 Aug 2017 08:39:59 +0200 Subject: devlink: Add Ethernet header for dpipe This will be used by the IPv4 host table which will be introduced in the following patches. This header is global and can be reused by many drivers. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index ed7687bbf5d0..ddb8b5227580 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -328,6 +328,7 @@ int devlink_dpipe_action_put(struct sk_buff *skb, struct devlink_dpipe_action *action); int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_match *match); +extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; #else -- cgit v1.2.3 From 3fb886ecea93605a8ea14e258ff3158b8966781e Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 24 Aug 2017 08:40:00 +0200 Subject: devlink: Add IPv4 header for dpipe This will be used by the IPv4 host table which will be introduced in the following patches. This header is global and can be reused by many drivers. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index ddb8b5227580..8ff8a6f77f29 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -329,6 +329,7 @@ int devlink_dpipe_action_put(struct sk_buff *skb, int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_match *match); extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; +extern struct devlink_dpipe_header devlink_dpipe_header_ipv4; #else -- cgit v1.2.3 From ffd3cdccf214cf0df08856a6738544076c4cd548 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 24 Aug 2017 08:40:02 +0200 Subject: devlink: Add support for dynamic table size Up until now the dpipe table's size was static and known at registration time. The host table does not have constant size and it is resized in dynamic manner. In order to support this behavior the size is changed to be obtained dynamically via an op. This patch also adjust the current dpipe table for the new API. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 8ff8a6f77f29..e96272b2cfec 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -178,7 +178,6 @@ struct devlink_dpipe_table_ops; * struct devlink_dpipe_table - table object * @priv: private * @name: table name - * @size: maximum number of entries * @counters_enabled: indicates if counters are active * @counter_control_extern: indicates if counter control is in dpipe or * external tool @@ -189,7 +188,6 @@ struct devlink_dpipe_table { void *priv; struct list_head list; const char *name; - u64 size; bool counters_enabled; bool counter_control_extern; struct devlink_dpipe_table_ops *table_ops; @@ -204,6 +202,7 @@ struct devlink_dpipe_table { * @counters_set_update - when changing the counter status hardware sync * maybe needed to allocate/free counter related * resources + * @size_get - get size */ struct devlink_dpipe_table_ops { int (*actions_dump)(void *priv, struct sk_buff *skb); @@ -211,6 +210,7 @@ struct devlink_dpipe_table_ops { int (*entries_dump)(void *priv, bool counters_enabled, struct devlink_dpipe_dump_ctx *dump_ctx); int (*counters_set_update)(void *priv, bool enable); + u64 (*size_get)(void *priv); }; /** @@ -311,8 +311,7 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); int devlink_dpipe_table_register(struct devlink *devlink, const char *table_name, struct devlink_dpipe_table_ops *table_ops, - void *priv, u64 size, - bool counter_control_extern); + void *priv, bool counter_control_extern); void devlink_dpipe_table_unregister(struct devlink *devlink, const char *table_name); int devlink_dpipe_headers_register(struct devlink *devlink, -- cgit v1.2.3 From 3580732448f128c39e7325912bc4368ade5dce7d Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 24 Aug 2017 08:40:03 +0200 Subject: devlink: Move dpipe entry clear function into devlink The entry clear routine can be shared between the drivers, thus it is moved inside devlink. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index e96272b2cfec..047a0c54f652 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -323,6 +323,7 @@ int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx); int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, struct devlink_dpipe_entry *entry); int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx); +void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry); int devlink_dpipe_action_put(struct sk_buff *skb, struct devlink_dpipe_action *action); int devlink_dpipe_match_put(struct sk_buff *skb, @@ -448,6 +449,11 @@ devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) return 0; } +static inline void +devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry) +{ +} + static inline int devlink_dpipe_action_put(struct sk_buff *skb, struct devlink_dpipe_action *action) -- cgit v1.2.3 From 0d03510038bda70b5a4a252e8216822e6ce0cbdb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:02 +0200 Subject: netfilter: conntrack: compute l3proto nla size at compile time avoids a pointer and allows struct to be const later on. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 1b8de164d744..6a27ffea7480 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -20,6 +20,9 @@ struct nf_conntrack_l3proto { /* L3 Protocol Family number. ex) PF_INET */ u_int16_t l3proto; + /* size of tuple nlattr, fills a hole */ + u16 nla_size; + /* Protocol name */ const char *name; @@ -49,23 +52,17 @@ struct nf_conntrack_l3proto { int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum); +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) int (*tuple_to_nlattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); - - /* Called when netns wants to use connection tracking */ - int (*net_ns_get)(struct net *); - void (*net_ns_put)(struct net *); - - /* - * Calculate size of tuple nlattr - */ - int (*nlattr_tuple_size)(void); - int (*nlattr_to_tuple)(struct nlattr *tb[], struct nf_conntrack_tuple *t); const struct nla_policy *nla_policy; +#endif - size_t nla_size; + /* Called when netns wants to use connection tracking */ + int (*net_ns_get)(struct net *); + void (*net_ns_put)(struct net *); /* Module (if any) which this is connected to. */ struct module *me; -- cgit v1.2.3 From a3134d537f8209f5b149d7ed9f287047158845f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:03 +0200 Subject: netfilter: conntrack: remove protocol name from l3proto struct no need to waste storage for something that is only needed in one place and can be deduced from protocol number. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 6a27ffea7480..e31861e4fa6a 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -23,9 +23,6 @@ struct nf_conntrack_l3proto { /* size of tuple nlattr, fills a hole */ u16 nla_size; - /* Protocol name */ - const char *name; - /* * Try to fill in the third arg: nhoff is offset of l3 proto * hdr. Return true if possible. -- cgit v1.2.3 From 09ec82f5af99d1e35614eb0844b920fc335a313d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:04 +0200 Subject: netfilter: conntrack: remove protocol name from l4proto struct no need to waste storage for something that is only needed in one place and can be deduced from protocol number. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index b6e27cafb1d9..47c16bae5e00 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -108,9 +108,6 @@ struct nf_conntrack_l4proto { /* Return the per-net protocol part. */ struct nf_proto_net *(*get_net_proto)(struct net *net); - /* Protocol name */ - const char *name; - /* Module (if any) which this is connected to. */ struct module *me; }; -- cgit v1.2.3 From 036c69400a34cf8f6d39c88325dd510462809e7b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:05 +0200 Subject: netfilter: conntrack: reduce size of l4protocol trackers can use u16 for both, shrinks size by another 8 bytes. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 47c16bae5e00..15c58dd3f701 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -92,12 +92,12 @@ struct nf_conntrack_l4proto { #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) struct { - size_t obj_size; int (*nlattr_to_obj)(struct nlattr *tb[], struct net *net, void *data); int (*obj_to_nlattr)(struct sk_buff *skb, const void *data); - unsigned int nlattr_max; + u16 obj_size; + u16 nlattr_max; const struct nla_policy *nla_policy; } ctnl_timeout; #endif -- cgit v1.2.3 From 91950833dd5a34ac6336aa88da6d43aaeb56ac6d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:06 +0200 Subject: netfilter: conntrack: place print_tuple in procfs part CONFIG_NF_CONNTRACK_PROCFS is deprecated, no need to use a function pointer in the trackers for this. Place the printf formatting in the one place that uses it. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 4 ---- include/net/netfilter/nf_conntrack_l4proto.h | 4 ---- 2 files changed, 8 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index e31861e4fa6a..dabb53b0913c 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -37,10 +37,6 @@ struct nf_conntrack_l3proto { bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig); - /* Print out the per-protocol part of the tuple. */ - void (*print_tuple)(struct seq_file *s, - const struct nf_conntrack_tuple *); - /* * Called before tracking. * *dataoff: offset of protocol header (TCP, UDP,...) in skb diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 15c58dd3f701..7e8da04a5eb6 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -61,10 +61,6 @@ struct nf_conntrack_l4proto { /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); - /* Print out the per-protocol part of the tuple. Return like seq_* */ - void (*print_tuple)(struct seq_file *s, - const struct nf_conntrack_tuple *); - /* Print out the private part of the conntrack. */ void (*print_conntrack)(struct seq_file *s, struct nf_conn *); -- cgit v1.2.3 From ea48cc83cf612fddb4e8868369348b9f936cfedb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:07 +0200 Subject: netfilter: conntrack: print_conntrack only needed if CONFIG_NF_CONNTRACK_PROCFS Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7e8da04a5eb6..4976ef92dc78 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -61,9 +61,6 @@ struct nf_conntrack_l4proto { /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); - /* Print out the private part of the conntrack. */ - void (*print_conntrack)(struct seq_file *s, struct nf_conn *); - /* Return the array of timeouts for this protocol. */ unsigned int *(*get_timeouts)(struct net *net); @@ -96,6 +93,10 @@ struct nf_conntrack_l4proto { u16 nlattr_max; const struct nla_policy *nla_policy; } ctnl_timeout; +#endif +#ifdef CONFIG_NF_CONNTRACK_PROCFS + /* Print out the private part of the conntrack. */ + void (*print_conntrack)(struct seq_file *s, struct nf_conn *); #endif unsigned int *net_id; /* Init l4proto pernet data */ -- cgit v1.2.3 From b3480fe059ac9121b5714205b4ddae14b59ef4be Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:08 +0200 Subject: netfilter: conntrack: make protocol tracker pointers const Doesn't change generated code, but will make it easier to eventually make the actual trackers themselvers const. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 6 +++--- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- include/net/netfilter/nf_conntrack_timeout.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index dabb53b0913c..6269deecbee7 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -64,10 +64,10 @@ struct nf_conntrack_l3proto { extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; /* Protocol global registration. */ -int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto); -void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto); +int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto); +void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto); -struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); +const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); /* Existing built-in protocols */ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 4976ef92dc78..d4933d56809d 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -114,10 +114,10 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; #define MAX_NF_CT_PROTO 256 -struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, +const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto); -struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, +const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto); void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index b222957062b5..483d104fa254 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -16,7 +16,7 @@ struct ctnl_timeout { refcount_t refcnt; char name[CTNL_TIMEOUT_NAME_MAX]; __u16 l3num; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; char data[0]; }; -- cgit v1.2.3 From 22b6722bfa591ba03d6a0c5521b600d4ab2d9a27 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 23 Aug 2017 09:55:41 +0200 Subject: ipv6: Add sysctl for per namespace flow label reflection Reflecting IPv6 Flow Label at server nodes is useful in environments that employ multipath routing to load balance the requests. As "IPv6 Flow Label Reflection" standard draft [1] points out - ICMPv6 PTB error messages generated in response to a downstream packets from the server can be routed by a load balancer back to the original server without looking at transport headers, if the server applies the flow label reflection. This enables the Path MTU Discovery past the ECMP router in load-balance or anycast environments where each server node is reachable by only one path. Introduce a sysctl to enable flow label reflection per net namespace for all newly created sockets. Same could be earlier achieved only per socket by setting the IPV6_FL_F_REFLECT flag for the IPV6_FLOWLABEL_MGR socket option. [1] https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01 Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 0e50bf3ed097..2544f9760a42 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -36,6 +36,7 @@ struct netns_sysctl_ipv6 { int idgen_retries; int idgen_delay; int flowlabel_state_ranges; + int flowlabel_reflect; }; struct netns_ipv6 { -- cgit v1.2.3 From 790c6056686cc4dd5b149b330bbd5ae208d4d721 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2017 18:10:46 -0700 Subject: devlink: Fix devlink_dpipe_table_register() stub signature. One too many arguments compared to the non-stub version. Reported-by: kbuild test robot Fixes: ffd3cdccf214 ("devlink: Add support for dynamic table size") Signed-off-by: David S. Miller --- include/net/devlink.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 047a0c54f652..aaf7178127a2 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -402,8 +402,7 @@ static inline int devlink_dpipe_table_register(struct devlink *devlink, const char *table_name, struct devlink_dpipe_table_ops *table_ops, - void *priv, u64 size, - bool counter_control_extern) + void *priv, bool counter_control_extern) { return 0; } -- cgit v1.2.3 From 29825717123fb9cfb9e709327d565c2f2fa89903 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 23 Aug 2017 09:58:28 +0200 Subject: net: Extend struct flowi6 with multipath hash Allow for functions that fill out the IPv6 flow info to also pass a hash computed over the skb contents. The hash value will drive the multipath routing decisions. This is intended for special treatment of ICMPv6 errors, where we would like to make a routing decision based on the flow identifying the offending IPv6 datagram that triggered the error, rather than the flow of the ICMP error itself. Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- include/net/flow.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index f3dc61b29bb5..eb60cee30b44 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -149,6 +149,7 @@ struct flowi6 { #define fl6_ipsec_spi uli.spi #define fl6_mh_type uli.mht.type #define fl6_gre_key uli.gre_key + __u32 mp_hash; } __attribute__((__aligned__(BITS_PER_LONG/8))); struct flowidn { -- cgit v1.2.3 From 23aebdacb05dab9efdf22b9e0413491cbd5f128f Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 23 Aug 2017 09:58:29 +0200 Subject: ipv6: Compute multipath hash for ICMP errors from offending packet When forwarding or sending out an ICMPv6 error, look at the embedded packet that triggered the error and compute a flow hash over its headers. This let's us route the ICMP error together with the flow it belongs to when multipath (ECMP) routing is in use, which in turn makes Path MTU Discovery work in ECMP load-balanced or anycast setups (RFC 7690). Granted, end-hosts behind the ECMP router (aka servers) need to reflect the IPv6 Flow Label for PMTUD to work. The code is organized to be in parallel with ipv4 stack: ip_multipath_l3_keys -> ip6_multipath_l3_keys fib_multipath_hash -> rt6_multipath_hash Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 907d39a42f6b..882bc3c7ccde 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -115,6 +115,7 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int flags); +u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); -- cgit v1.2.3 From 551143d8d954fe398324a5caa276f518466c428b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 24 Aug 2017 21:12:28 -0700 Subject: net_sched: fix a refcount_t issue with noop_qdisc syzkaller reported a refcount_t warning [1] Issue here is that noop_qdisc refcnt was never really considered as a true refcount, since qdisc_destroy() found TCQ_F_BUILTIN set : if (qdisc->flags & TCQ_F_BUILTIN || !refcount_dec_and_test(&qdisc->refcnt))) return; Meaning that all atomic_inc() we did on noop_qdisc.refcnt were not really needed, but harmless until refcount_t came. To fix this problem, we simply need to not increment noop_qdisc.refcnt, since we never decrement it. [1] refcount_t: increment on 0; use-after-free. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 21754 at lib/refcount.c:152 refcount_inc+0x47/0x50 lib/refcount.c:152 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 21754 Comm: syz-executor7 Not tainted 4.13.0-rc6+ #20 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 panic+0x1e4/0x417 kernel/panic.c:180 __warn+0x1c4/0x1d9 kernel/panic.c:541 report_bug+0x211/0x2d0 lib/bug.c:183 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:190 do_trap_no_signal arch/x86/kernel/traps.c:224 [inline] do_trap+0x260/0x390 arch/x86/kernel/traps.c:273 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:310 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:323 invalid_op+0x1e/0x30 arch/x86/entry/entry_64.S:846 RIP: 0010:refcount_inc+0x47/0x50 lib/refcount.c:152 RSP: 0018:ffff8801c43477a0 EFLAGS: 00010282 RAX: 000000000000002b RBX: ffffffff86093c14 RCX: 0000000000000000 RDX: 000000000000002b RSI: ffffffff8159314e RDI: ffffed0038868ee8 RBP: ffff8801c43477a8 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff86093ac0 R13: 0000000000000001 R14: ffff8801d0f3bac0 R15: dffffc0000000000 attach_default_qdiscs net/sched/sch_generic.c:792 [inline] dev_activate+0x7d3/0xaa0 net/sched/sch_generic.c:833 __dev_open+0x227/0x330 net/core/dev.c:1380 __dev_change_flags+0x695/0x990 net/core/dev.c:6726 dev_change_flags+0x88/0x140 net/core/dev.c:6792 dev_ifsioc+0x5a6/0x930 net/core/dev_ioctl.c:256 dev_ioctl+0x2bc/0xf90 net/core/dev_ioctl.c:554 sock_do_ioctl+0x94/0xb0 net/socket.c:968 sock_ioctl+0x2c2/0x440 net/socket.c:1058 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 Fixes: 7b9364050246 ("net, sched: convert Qdisc.refcnt from atomic_t to refcount_t") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Reshetova, Elena Signed-off-by: David S. Miller --- include/net/sch_generic.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 67f815e5d525..c1109cdbbfa6 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -101,6 +101,13 @@ struct Qdisc { spinlock_t busylock ____cacheline_aligned_in_smp; }; +static inline void qdisc_refcount_inc(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return; + refcount_inc(&qdisc->refcnt); +} + static inline bool qdisc_is_running(const struct Qdisc *qdisc) { return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; -- cgit v1.2.3 From 3fd87127073292538047adf1c9c757e9cab0dd56 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 24 Aug 2017 14:38:51 -0700 Subject: strparser: initialize all callbacks commit bbb03029a899 ("strparser: Generalize strparser") added more function pointers to 'struct strp_callbacks'; however, kcm_attach() was not updated to initialize them. This could cause the ->lock() and/or ->unlock() function pointers to be set to garbage values, causing a crash in strp_work(). Fix the bug by moving the callback structs into static memory, so unspecified members are zeroed. Also constify them while we're at it. This bug was found by syzkaller, which encountered the following splat: IP: 0x55 PGD 3b1ca067 P4D 3b1ca067 PUD 3b12f067 PMD 0 Oops: 0010 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 2 PID: 1194 Comm: kworker/u8:1 Not tainted 4.13.0-rc4-next-20170811 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: kstrp strp_work task: ffff88006bb0e480 task.stack: ffff88006bb10000 RIP: 0010:0x55 RSP: 0018:ffff88006bb17540 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff88006ce4bd60 RCX: 0000000000000000 RDX: 1ffff1000d9c97bd RSI: 0000000000000000 RDI: ffff88006ce4bc48 RBP: ffff88006bb17558 R08: ffffffff81467ab2 R09: 0000000000000000 R10: ffff88006bb17438 R11: ffff88006bb17940 R12: ffff88006ce4bc48 R13: ffff88003c683018 R14: ffff88006bb17980 R15: ffff88003c683000 FS: 0000000000000000(0000) GS:ffff88006de00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000055 CR3: 000000003c145000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: process_one_work+0xbf3/0x1bc0 kernel/workqueue.c:2098 worker_thread+0x223/0x1860 kernel/workqueue.c:2233 kthread+0x35e/0x430 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Code: Bad RIP value. RIP: 0x55 RSP: ffff88006bb17540 CR2: 0000000000000055 ---[ end trace f0e4920047069cee ]--- Here is a C reproducer (requires CONFIG_BPF_SYSCALL=y and CONFIG_AF_KCM=y): #include #include #include #include #include #include #include #include static const struct bpf_insn bpf_insns[3] = { { .code = 0xb7 }, /* BPF_MOV64_IMM(0, 0) */ { .code = 0x95 }, /* BPF_EXIT_INSN() */ }; static const union bpf_attr bpf_attr = { .prog_type = 1, .insn_cnt = 2, .insns = (uintptr_t)&bpf_insns, .license = (uintptr_t)"", }; int main(void) { int bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &bpf_attr, sizeof(bpf_attr)); int inet_fd = socket(AF_INET, SOCK_STREAM, 0); int kcm_fd = socket(AF_KCM, SOCK_DGRAM, 0); ioctl(kcm_fd, SIOCKCMATTACH, &(struct kcm_attach) { .fd = inet_fd, .bpf_fd = bpf_fd }); } Fixes: bbb03029a899 ("strparser: Generalize strparser") Cc: Dmitry Vyukov Cc: Tom Herbert Signed-off-by: Eric Biggers Signed-off-by: David S. Miller --- include/net/strparser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/strparser.h b/include/net/strparser.h index 4fe966a0ad92..7dc131d62ad5 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -138,7 +138,7 @@ void strp_done(struct strparser *strp); void strp_stop(struct strparser *strp); void strp_check_rcv(struct strparser *strp); int strp_init(struct strparser *strp, struct sock *sk, - struct strp_callbacks *cb); + const struct strp_callbacks *cb); void strp_data_ready(struct strparser *strp); int strp_process(struct strparser *strp, struct sk_buff *orig_skb, unsigned int orig_offset, size_t orig_len, -- cgit v1.2.3 From 32d99d0b670299720dd0db92a974c9612c230889 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Fri, 25 Aug 2017 09:56:44 +0200 Subject: ipv6: sr: add support for ip4ip6 encapsulation This patch enables the SRv6 encapsulation mode to carry an IPv4 payload. All the infrastructure was already present, I just had to add a parameter to seg6_do_srh_encap() to specify the inner packet protocol, and perform some additional checks. Usage example: ip route add 1.2.3.4 encap seg6 mode encap segs fc00::1,fc00::2 dev eth0 Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/net/seg6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/seg6.h b/include/net/seg6.h index 5379f550f521..099bad59dc90 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -60,7 +60,8 @@ extern int seg6_local_init(void); extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); -extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); +extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, + int proto); extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); #endif -- cgit v1.2.3 From ebfa00c5745660fe7f0a91eea88d4dff658486c4 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 25 Aug 2017 13:10:12 +0200 Subject: tcp: fix refcnt leak with ebpf congestion control There are a few bugs around refcnt handling in the new BPF congestion control setsockopt: - The new ca is assigned to icsk->icsk_ca_ops even in the case where we cannot get a reference on it. This would lead to a use after free, since that ca is going away soon. - Changing the congestion control case doesn't release the refcnt on the previous ca. - In the reinit case, we first leak a reference on the old ca, then we call tcp_reinit_congestion_control on the ca that we have just assigned, leading to deinitializing the wrong ca (->release of the new ca on the old ca's data) and releasing the refcount on the ca that we actually want to use. This is visible by building (for example) BIC as a module and setting net.ipv4.tcp_congestion_control=bic, and using tcp_cong_kern.c from samples/bpf. This patch fixes the refcount issues, and moves reinit back into tcp core to avoid passing a ca pointer back to BPF. Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control") Signed-off-by: Sabrina Dubroca Acked-by: Lawrence Brakmo Signed-off-by: David S. Miller --- include/net/tcp.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index ada65e767b28..f642a39f9eee 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1004,9 +1004,7 @@ void tcp_get_default_congestion_control(char *name); void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); -int tcp_set_congestion_control(struct sock *sk, const char *name, bool load); -void tcp_reinit_congestion_control(struct sock *sk, - const struct tcp_congestion_ops *ca); +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit); u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); -- cgit v1.2.3 From 143976ce992fcf3bfc0f4d15d5726bb492dcf262 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 24 Aug 2017 16:51:29 -0700 Subject: net_sched: remove tc class reference counting For TC classes, their ->get() and ->put() are always paired, and the reference counting is completely useless, because: 1) For class modification and dumping paths, we already hold RTNL lock, so all of these ->get(),->change(),->put() are atomic. 2) For filter bindiing/unbinding, we use other reference counter than this one, and they should have RTNL lock too. 3) For ->qlen_notify(), it is special because it is called on ->enqueue() path, but we already hold qdisc tree lock there, and we hold this tree lock when graft or delete the class too, so it should not be gone or changed until we release the tree lock. Therefore, this patch removes ->get() and ->put(), but: 1) Adds a new ->find() to find the pointer to a class by classid, no refcnt. 2) Move the original class destroy upon the last refcnt into ->delete(), right after releasing tree lock. This is fine because the class is already removed from hash when holding the lock. For those who also use ->put() as ->unbind(), just rename them to reflect this change. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1688f0f6c7ba..d817585aa5df 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -147,8 +147,7 @@ struct Qdisc_class_ops { void (*qlen_notify)(struct Qdisc *, unsigned long); /* Class manipulation routines */ - unsigned long (*get)(struct Qdisc *, u32 classid); - void (*put)(struct Qdisc *, unsigned long); + unsigned long (*find)(struct Qdisc *, u32 classid); int (*change)(struct Qdisc *, u32, u32, struct nlattr **, unsigned long *); int (*delete)(struct Qdisc *, unsigned long); -- cgit v1.2.3 From 3cd904ecbb5d0bcf36dfca7e726bdfd6d3644334 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 24 Aug 2017 16:51:30 -0700 Subject: net_sched: kill u32_node pointer in Qdisc It is ugly to hide a u32-filter-specific pointer inside Qdisc, this breaks the TC layers: 1. Qdisc is a generic representation, should not have any specific data of any type 2. Qdisc layer is above filter layer, should only save filters in the list of struct tcf_proto. This pointer is used as the head of the chain of u32 hash tables, that is struct tc_u_hnode, because u32 filter is very special, it allows to create multiple hash tables within one qdisc and across multiple u32 filters. Instead of using this ugly pointer, we can just save it in a global hash table key'ed by (dev ifindex, qdisc handle), therefore we can still treat it as a per qdisc basis data structure conceptually. Of course, because of network namespaces, this key is not unique at all, but it is fine as we already have a pointer to Qdisc in struct tc_u_common, we can just compare the pointers when collision. And this only affects slow paths, has no impact to fast path, thanks to the pointer ->tp_c. Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d817585aa5df..c30b634c5f82 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -75,7 +75,6 @@ struct Qdisc { struct hlist_node hash; u32 handle; u32 parent; - void *u32_node; struct netdev_queue *dev_queue; -- cgit v1.2.3 From 64f0f5d18a47c703c85576375cc010e83dac6a48 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 25 Aug 2017 14:31:01 +0200 Subject: udp6: set rx_dst_cookie on rx_dst updates Currently, in the udp6 code, the dst cookie is not initialized/updated concurrently with the RX dst used by early demux. As a result, the dst_check() in the early_demux path always fails, the rx dst cache is always invalidated, and we can't really leverage significant gain from the demux lookup. Fix it adding udp6 specific variant of sk_rx_dst_set() and use it to set the dst cookie when the dst entry is really changed. The issue is there since the introduction of early demux for ipv6. Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast") Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/udp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index 586de4b811b5..626c2d8a70c5 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -260,7 +260,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, } void udp_v4_early_demux(struct sk_buff *skb); -void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); +bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); -- cgit v1.2.3 From 960632ece6949be1ab6f7a911faa4fa6e8305f4a Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Thu, 24 Aug 2017 00:08:32 +0200 Subject: netfilter: convert hook list to an array This converts the storage and layout of netfilter hook entries from a linked list to an array. After this commit, hook entries will be stored adjacent in memory. The next pointer is no longer required. The ops pointers are stored at the end of the array as they are only used in the register/unregister path and in the legacy br_netfilter code. nf_unregister_net_hooks() is slower than needed as it just calls nf_unregister_net_hook in a loop (i.e. at least n synchronize_net() calls), this will be addressed in followup patch. Test setup: - ixgbe 10gbit - netperf UDP_STREAM, 64 byte packets - 5 hooks: (raw + mangle prerouting, mangle+filter input, inet filter): empty mangle and raw prerouting, mangle and filter input hooks: 353.9 this patch: 364.2 Signed-off-by: Aaron Conole Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 2 +- include/net/netns/netfilter.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 4454719ff849..39468720fc19 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -10,9 +10,9 @@ struct nf_queue_entry { struct list_head list; struct sk_buff *skb; unsigned int id; + unsigned int hook_index; /* index in hook_entries->hook[] */ struct nf_hook_state state; - struct nf_hook_entry *hook; u16 size; /* sizeof(entry) + saved route keys */ /* extra space to store route keys */ diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index cea396b53a60..72d66c8763d0 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -16,7 +16,7 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif - struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; + struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) bool defrag_ipv4; #endif -- cgit v1.2.3 From 1a66a836da630cd70f3639208da549b549ce576b Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 25 Aug 2017 09:21:28 -0700 Subject: gre: add collect_md mode to ERSPAN tunnel Similar to gre, vxlan, geneve, ipip tunnels, allow ERSPAN tunnels to operate in 'collect metadata' mode. bpf_skb_[gs]et_tunnel_key() helpers can make use of it right away. OVS can use it as well in the future. Signed-off-by: William Tu Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 625c29329372..992652856fe8 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -154,8 +154,10 @@ struct ip_tunnel { #define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800) #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) #define TUNNEL_NOCACHE __cpu_to_be16(0x2000) +#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000) -#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT) +#define TUNNEL_OPTIONS_PRESENT \ + (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT) struct tnl_ptk_info { __be16 flags; -- cgit v1.2.3 From 4e587ea71bf924f7dac621f1351653bd41e446cb Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 25 Aug 2017 15:03:10 -0700 Subject: ipv6: fix sparse warning on rt6i_node Commit c5cff8561d2d adds rcu grace period before freeing fib6_node. This generates a new sparse warning on rt->rt6i_node related code: net/ipv6/route.c:1394:30: error: incompatible types in comparison expression (different address spaces) ./include/net/ip6_fib.h:187:14: error: incompatible types in comparison expression (different address spaces) This commit adds "__rcu" tag for rt6i_node and makes sure corresponding rcu API is used for it. After this fix, sparse no longer generates the above warning. Fixes: c5cff8561d2d ("ipv6: add rcu grace period before freeing fib6_node") Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index e9c59db92942..af509f801084 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -105,7 +105,7 @@ struct rt6_info { * the same cache line. */ struct fib6_table *rt6i_table; - struct fib6_node *rt6i_node; + struct fib6_node __rcu *rt6i_node; struct in6_addr rt6i_gateway; -- cgit v1.2.3 From 5bf916ee0ab638c86edeaf4caeeade9ddf44d95d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 27 Aug 2017 17:03:33 +0200 Subject: irda: move include/net/irda into staging subdirectory And finally, move the irda include files into drivers/staging/irda/include/net/irda. Yes, it's a long path, but it makes it easy for us to just add a Makefile directory path addition and all of the net and drivers code "just works". Signed-off-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- include/net/irda/af_irda.h | 87 ---------- include/net/irda/crc.h | 29 ---- include/net/irda/discovery.h | 95 ----------- include/net/irda/ircomm_core.h | 106 ------------ include/net/irda/ircomm_event.h | 83 ---------- include/net/irda/ircomm_lmp.h | 36 ---- include/net/irda/ircomm_param.h | 147 ----------------- include/net/irda/ircomm_ttp.h | 37 ----- include/net/irda/ircomm_tty.h | 121 -------------- include/net/irda/ircomm_tty_attach.h | 92 ----------- include/net/irda/irda.h | 115 ------------- include/net/irda/irda_device.h | 285 -------------------------------- include/net/irda/iriap.h | 108 ------------ include/net/irda/iriap_event.h | 85 ---------- include/net/irda/irias_object.h | 108 ------------ include/net/irda/irlan_client.h | 42 ----- include/net/irda/irlan_common.h | 230 -------------------------- include/net/irda/irlan_eth.h | 32 ---- include/net/irda/irlan_event.h | 81 --------- include/net/irda/irlan_filter.h | 35 ---- include/net/irda/irlan_provider.h | 52 ------ include/net/irda/irlap.h | 311 ----------------------------------- include/net/irda/irlap_event.h | 129 --------------- include/net/irda/irlap_frame.h | 167 ------------------- include/net/irda/irlmp.h | 295 --------------------------------- include/net/irda/irlmp_event.h | 98 ----------- include/net/irda/irlmp_frame.h | 62 ------- include/net/irda/irmod.h | 109 ------------ include/net/irda/irqueue.h | 96 ----------- include/net/irda/irttp.h | 210 ----------------------- include/net/irda/parameters.h | 100 ----------- include/net/irda/qos.h | 101 ------------ include/net/irda/timer.h | 105 ------------ include/net/irda/wrapper.h | 58 ------- 34 files changed, 3847 deletions(-) delete mode 100644 include/net/irda/af_irda.h delete mode 100644 include/net/irda/crc.h delete mode 100644 include/net/irda/discovery.h delete mode 100644 include/net/irda/ircomm_core.h delete mode 100644 include/net/irda/ircomm_event.h delete mode 100644 include/net/irda/ircomm_lmp.h delete mode 100644 include/net/irda/ircomm_param.h delete mode 100644 include/net/irda/ircomm_ttp.h delete mode 100644 include/net/irda/ircomm_tty.h delete mode 100644 include/net/irda/ircomm_tty_attach.h delete mode 100644 include/net/irda/irda.h delete mode 100644 include/net/irda/irda_device.h delete mode 100644 include/net/irda/iriap.h delete mode 100644 include/net/irda/iriap_event.h delete mode 100644 include/net/irda/irias_object.h delete mode 100644 include/net/irda/irlan_client.h delete mode 100644 include/net/irda/irlan_common.h delete mode 100644 include/net/irda/irlan_eth.h delete mode 100644 include/net/irda/irlan_event.h delete mode 100644 include/net/irda/irlan_filter.h delete mode 100644 include/net/irda/irlan_provider.h delete mode 100644 include/net/irda/irlap.h delete mode 100644 include/net/irda/irlap_event.h delete mode 100644 include/net/irda/irlap_frame.h delete mode 100644 include/net/irda/irlmp.h delete mode 100644 include/net/irda/irlmp_event.h delete mode 100644 include/net/irda/irlmp_frame.h delete mode 100644 include/net/irda/irmod.h delete mode 100644 include/net/irda/irqueue.h delete mode 100644 include/net/irda/irttp.h delete mode 100644 include/net/irda/parameters.h delete mode 100644 include/net/irda/qos.h delete mode 100644 include/net/irda/timer.h delete mode 100644 include/net/irda/wrapper.h (limited to 'include/net') diff --git a/include/net/irda/af_irda.h b/include/net/irda/af_irda.h deleted file mode 100644 index 0df574931522..000000000000 --- a/include/net/irda/af_irda.h +++ /dev/null @@ -1,87 +0,0 @@ -/********************************************************************* - * - * Filename: af_irda.h - * Version: 1.0 - * Description: IrDA sockets declarations - * Status: Stable - * Author: Dag Brattli - * Created at: Tue Dec 9 21:13:12 1997 - * Modified at: Fri Jan 28 13:16:32 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef AF_IRDA_H -#define AF_IRDA_H - -#include -#include -#include /* struct iriap_cb */ -#include /* struct ias_value */ -#include /* struct lsap_cb */ -#include /* struct tsap_cb */ -#include /* struct discovery_t */ -#include - -/* IrDA Socket */ -struct irda_sock { - /* struct sock has to be the first member of irda_sock */ - struct sock sk; - __u32 saddr; /* my local address */ - __u32 daddr; /* peer address */ - - struct lsap_cb *lsap; /* LSAP used by Ultra */ - __u8 pid; /* Protocol IP (PID) used by Ultra */ - - struct tsap_cb *tsap; /* TSAP used by this connection */ - __u8 dtsap_sel; /* remote TSAP address */ - __u8 stsap_sel; /* local TSAP address */ - - __u32 max_sdu_size_rx; - __u32 max_sdu_size_tx; - __u32 max_data_size; - __u8 max_header_size; - struct qos_info qos_tx; - - __u16_host_order mask; /* Hint bits mask */ - __u16_host_order hints; /* Hint bits */ - - void *ckey; /* IrLMP client handle */ - void *skey; /* IrLMP service handle */ - - struct ias_object *ias_obj; /* Our service name + lsap in IAS */ - struct iriap_cb *iriap; /* Used to query remote IAS */ - struct ias_value *ias_result; /* Result of remote IAS query */ - - hashbin_t *cachelog; /* Result of discovery query */ - __u32 cachedaddr; /* Result of selective discovery query */ - - int nslots; /* Number of slots to use for discovery */ - - int errno; /* status of the IAS query */ - - wait_queue_head_t query_wait; /* Wait for the answer to a query */ - struct timer_list watchdog; /* Timeout for discovery */ - - LOCAL_FLOW tx_flow; - LOCAL_FLOW rx_flow; -}; - -static inline struct irda_sock *irda_sk(struct sock *sk) -{ - return (struct irda_sock *)sk; -} - -#endif /* AF_IRDA_H */ diff --git a/include/net/irda/crc.h b/include/net/irda/crc.h deleted file mode 100644 index f202296df9bb..000000000000 --- a/include/net/irda/crc.h +++ /dev/null @@ -1,29 +0,0 @@ -/********************************************************************* - * - * Filename: crc.h - * Version: - * Description: CRC routines - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Sun May 2 20:25:23 1999 - * Modified by: Dag Brattli - * - ********************************************************************/ - -#ifndef IRDA_CRC_H -#define IRDA_CRC_H - -#include -#include - -#define INIT_FCS 0xffff /* Initial FCS value */ -#define GOOD_FCS 0xf0b8 /* Good final FCS value */ - -/* Recompute the FCS with one more character appended. */ -#define irda_fcs(fcs, c) crc_ccitt_byte(fcs, c) - -/* Recompute the FCS with len bytes appended. */ -#define irda_calc_crc16(fcs, buf, len) crc_ccitt(fcs, buf, len) - -#endif diff --git a/include/net/irda/discovery.h b/include/net/irda/discovery.h deleted file mode 100644 index 63ae32530567..000000000000 --- a/include/net/irda/discovery.h +++ /dev/null @@ -1,95 +0,0 @@ -/********************************************************************* - * - * Filename: discovery.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Apr 6 16:53:53 1999 - * Modified at: Tue Oct 5 10:05:10 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef DISCOVERY_H -#define DISCOVERY_H - -#include - -#include -#include /* irda_queue_t */ -#include /* LAP_REASON */ - -#define DISCOVERY_EXPIRE_TIMEOUT (2*sysctl_discovery_timeout*HZ) -#define DISCOVERY_DEFAULT_SLOTS 0 - -/* - * This type is used by the protocols that transmit 16 bits words in - * little endian format. A little endian machine stores MSB of word in - * byte[1] and LSB in byte[0]. A big endian machine stores MSB in byte[0] - * and LSB in byte[1]. - * - * This structure is used in the code for things that are endian neutral - * but that fit in a word so that we can manipulate them efficiently. - * By endian neutral, I mean things that are really an array of bytes, - * and always used as such, for example the hint bits. Jean II - */ -typedef union { - __u16 word; - __u8 byte[2]; -} __u16_host_order; - -/* Types of discovery */ -typedef enum { - DISCOVERY_LOG, /* What's in our discovery log */ - DISCOVERY_ACTIVE, /* Doing our own discovery on the medium */ - DISCOVERY_PASSIVE, /* Peer doing discovery on the medium */ - EXPIRY_TIMEOUT, /* Entry expired due to timeout */ -} DISCOVERY_MODE; - -#define NICKNAME_MAX_LEN 21 - -/* Basic discovery information about a peer */ -typedef struct irda_device_info discinfo_t; /* linux/irda.h */ - -/* - * The DISCOVERY structure is used for both discovery requests and responses - */ -typedef struct discovery_t { - irda_queue_t q; /* Must be first! */ - - discinfo_t data; /* Basic discovery information */ - int name_len; /* Length of nickname */ - - LAP_REASON condition; /* More info about the discovery */ - int gen_addr_bit; /* Need to generate a new device - * address? */ - int nslots; /* Number of slots to use when - * discovering */ - unsigned long timestamp; /* Last time discovered */ - unsigned long firststamp; /* First time discovered */ -} discovery_t; - -void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *discovery); -void irlmp_add_discovery_log(hashbin_t *cachelog, hashbin_t *log); -void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force); -struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn, - __u16 mask, int old_entries); - -#endif diff --git a/include/net/irda/ircomm_core.h b/include/net/irda/ircomm_core.h deleted file mode 100644 index 2a580ce9edad..000000000000 --- a/include/net/irda/ircomm_core.h +++ /dev/null @@ -1,106 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_core.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Wed Jun 9 08:58:43 1999 - * Modified at: Mon Dec 13 11:52:29 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_CORE_H -#define IRCOMM_CORE_H - -#include -#include -#include - -#define IRCOMM_MAGIC 0x98347298 -#define IRCOMM_HEADER_SIZE 1 - -struct ircomm_cb; /* Forward decl. */ - -/* - * A small call-table, so we don't have to check the service-type whenever - * we want to do something - */ -typedef struct { - int (*data_request)(struct ircomm_cb *, struct sk_buff *, int clen); - int (*connect_request)(struct ircomm_cb *, struct sk_buff *, - struct ircomm_info *); - int (*connect_response)(struct ircomm_cb *, struct sk_buff *); - int (*disconnect_request)(struct ircomm_cb *, struct sk_buff *, - struct ircomm_info *); -} call_t; - -struct ircomm_cb { - irda_queue_t queue; - magic_t magic; - - notify_t notify; - call_t issue; - - int state; - int line; /* Which TTY line we are using */ - - struct tsap_cb *tsap; - struct lsap_cb *lsap; - - __u8 dlsap_sel; /* Destination LSAP/TSAP selector */ - __u8 slsap_sel; /* Source LSAP/TSAP selector */ - - __u32 saddr; /* Source device address (link we are using) */ - __u32 daddr; /* Destination device address */ - - int max_header_size; /* Header space we must reserve for each frame */ - int max_data_size; /* The amount of data we can fill in each frame */ - - LOCAL_FLOW flow_status; /* Used by ircomm_lmp */ - int pkt_count; /* Number of frames we have sent to IrLAP */ - - __u8 service_type; -}; - -extern hashbin_t *ircomm; - -struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line); -int ircomm_close(struct ircomm_cb *self); - -int ircomm_data_request(struct ircomm_cb *self, struct sk_buff *skb); -void ircomm_data_indication(struct ircomm_cb *self, struct sk_buff *skb); -void ircomm_process_data(struct ircomm_cb *self, struct sk_buff *skb); -int ircomm_control_request(struct ircomm_cb *self, struct sk_buff *skb); -int ircomm_connect_request(struct ircomm_cb *self, __u8 dlsap_sel, - __u32 saddr, __u32 daddr, struct sk_buff *skb, - __u8 service_type); -void ircomm_connect_indication(struct ircomm_cb *self, struct sk_buff *skb, - struct ircomm_info *info); -void ircomm_connect_confirm(struct ircomm_cb *self, struct sk_buff *skb, - struct ircomm_info *info); -int ircomm_connect_response(struct ircomm_cb *self, struct sk_buff *userdata); -int ircomm_disconnect_request(struct ircomm_cb *self, struct sk_buff *userdata); -void ircomm_disconnect_indication(struct ircomm_cb *self, struct sk_buff *skb, - struct ircomm_info *info); -void ircomm_flow_request(struct ircomm_cb *self, LOCAL_FLOW flow); - -#define ircomm_is_connected(self) (self->state == IRCOMM_CONN) - -#endif diff --git a/include/net/irda/ircomm_event.h b/include/net/irda/ircomm_event.h deleted file mode 100644 index 5bbc32998d57..000000000000 --- a/include/net/irda/ircomm_event.h +++ /dev/null @@ -1,83 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_event.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Jun 6 23:51:13 1999 - * Modified at: Thu Jun 10 08:36:25 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_EVENT_H -#define IRCOMM_EVENT_H - -#include - -typedef enum { - IRCOMM_IDLE, - IRCOMM_WAITI, - IRCOMM_WAITR, - IRCOMM_CONN, -} IRCOMM_STATE; - -/* IrCOMM Events */ -typedef enum { - IRCOMM_CONNECT_REQUEST, - IRCOMM_CONNECT_RESPONSE, - IRCOMM_TTP_CONNECT_INDICATION, - IRCOMM_LMP_CONNECT_INDICATION, - IRCOMM_TTP_CONNECT_CONFIRM, - IRCOMM_LMP_CONNECT_CONFIRM, - - IRCOMM_LMP_DISCONNECT_INDICATION, - IRCOMM_TTP_DISCONNECT_INDICATION, - IRCOMM_DISCONNECT_REQUEST, - - IRCOMM_TTP_DATA_INDICATION, - IRCOMM_LMP_DATA_INDICATION, - IRCOMM_DATA_REQUEST, - IRCOMM_CONTROL_REQUEST, - IRCOMM_CONTROL_INDICATION, -} IRCOMM_EVENT; - -/* - * Used for passing information through the state-machine - */ -struct ircomm_info { - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - __u8 dlsap_sel; - LM_REASON reason; /* Reason for disconnect */ - __u32 max_data_size; - __u32 max_header_size; - - struct qos_info *qos; -}; - -extern const char *const ircomm_state[]; - -struct ircomm_cb; /* Forward decl. */ - -int ircomm_do_event(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info); -void ircomm_next_state(struct ircomm_cb *self, IRCOMM_STATE state); - -#endif diff --git a/include/net/irda/ircomm_lmp.h b/include/net/irda/ircomm_lmp.h deleted file mode 100644 index 5042a5021a04..000000000000 --- a/include/net/irda/ircomm_lmp.h +++ /dev/null @@ -1,36 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_lmp.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Wed Jun 9 10:06:07 1999 - * Modified at: Fri Aug 13 07:32:32 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_LMP_H -#define IRCOMM_LMP_H - -#include - -int ircomm_open_lsap(struct ircomm_cb *self); - -#endif diff --git a/include/net/irda/ircomm_param.h b/include/net/irda/ircomm_param.h deleted file mode 100644 index 1f67432321c4..000000000000 --- a/include/net/irda/ircomm_param.h +++ /dev/null @@ -1,147 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_param.h - * Version: 1.0 - * Description: Parameter handling for the IrCOMM protocol - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Jun 7 08:47:28 1999 - * Modified at: Wed Aug 25 13:46:33 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_PARAMS_H -#define IRCOMM_PARAMS_H - -#include - -/* Parameters common to all service types */ -#define IRCOMM_SERVICE_TYPE 0x00 -#define IRCOMM_PORT_TYPE 0x01 /* Only used in LM-IAS */ -#define IRCOMM_PORT_NAME 0x02 /* Only used in LM-IAS */ - -/* Parameters for both 3 wire and 9 wire */ -#define IRCOMM_DATA_RATE 0x10 -#define IRCOMM_DATA_FORMAT 0x11 -#define IRCOMM_FLOW_CONTROL 0x12 -#define IRCOMM_XON_XOFF 0x13 -#define IRCOMM_ENQ_ACK 0x14 -#define IRCOMM_LINE_STATUS 0x15 -#define IRCOMM_BREAK 0x16 - -/* Parameters for 9 wire */ -#define IRCOMM_DTE 0x20 -#define IRCOMM_DCE 0x21 -#define IRCOMM_POLL 0x22 - -/* Service type (details) */ -#define IRCOMM_3_WIRE_RAW 0x01 -#define IRCOMM_3_WIRE 0x02 -#define IRCOMM_9_WIRE 0x04 -#define IRCOMM_CENTRONICS 0x08 - -/* Port type (details) */ -#define IRCOMM_SERIAL 0x00 -#define IRCOMM_PARALLEL 0x01 - -/* Data format (details) */ -#define IRCOMM_WSIZE_5 0x00 -#define IRCOMM_WSIZE_6 0x01 -#define IRCOMM_WSIZE_7 0x02 -#define IRCOMM_WSIZE_8 0x03 - -#define IRCOMM_1_STOP_BIT 0x00 -#define IRCOMM_2_STOP_BIT 0x04 /* 1.5 if char len 5 */ - -#define IRCOMM_PARITY_DISABLE 0x00 -#define IRCOMM_PARITY_ENABLE 0x08 - -#define IRCOMM_PARITY_ODD 0x00 -#define IRCOMM_PARITY_EVEN 0x10 -#define IRCOMM_PARITY_MARK 0x20 -#define IRCOMM_PARITY_SPACE 0x30 - -/* Flow control */ -#define IRCOMM_XON_XOFF_IN 0x01 -#define IRCOMM_XON_XOFF_OUT 0x02 -#define IRCOMM_RTS_CTS_IN 0x04 -#define IRCOMM_RTS_CTS_OUT 0x08 -#define IRCOMM_DSR_DTR_IN 0x10 -#define IRCOMM_DSR_DTR_OUT 0x20 -#define IRCOMM_ENQ_ACK_IN 0x40 -#define IRCOMM_ENQ_ACK_OUT 0x80 - -/* Line status */ -#define IRCOMM_OVERRUN_ERROR 0x02 -#define IRCOMM_PARITY_ERROR 0x04 -#define IRCOMM_FRAMING_ERROR 0x08 - -/* DTE (Data terminal equipment) line settings */ -#define IRCOMM_DELTA_DTR 0x01 -#define IRCOMM_DELTA_RTS 0x02 -#define IRCOMM_DTR 0x04 -#define IRCOMM_RTS 0x08 - -/* DCE (Data communications equipment) line settings */ -#define IRCOMM_DELTA_CTS 0x01 /* Clear to send has changed */ -#define IRCOMM_DELTA_DSR 0x02 /* Data set ready has changed */ -#define IRCOMM_DELTA_RI 0x04 /* Ring indicator has changed */ -#define IRCOMM_DELTA_CD 0x08 /* Carrier detect has changed */ -#define IRCOMM_CTS 0x10 /* Clear to send is high */ -#define IRCOMM_DSR 0x20 /* Data set ready is high */ -#define IRCOMM_RI 0x40 /* Ring indicator is high */ -#define IRCOMM_CD 0x80 /* Carrier detect is high */ -#define IRCOMM_DCE_DELTA_ANY 0x0f - -/* - * Parameter state - */ -struct ircomm_params { - /* General control params */ - __u8 service_type; - __u8 port_type; - char port_name[32]; - - /* Control params for 3- and 9-wire service type */ - __u32 data_rate; /* Data rate in bps */ - __u8 data_format; - __u8 flow_control; - char xonxoff[2]; - char enqack[2]; - __u8 line_status; - __u8 _break; - - __u8 null_modem; - - /* Control params for 9-wire service type */ - __u8 dte; - __u8 dce; - __u8 poll; - - /* Control params for Centronics service type */ -}; - -struct ircomm_tty_cb; /* Forward decl. */ - -int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush); - -extern pi_param_info_t ircomm_param_info; - -#endif /* IRCOMM_PARAMS_H */ - diff --git a/include/net/irda/ircomm_ttp.h b/include/net/irda/ircomm_ttp.h deleted file mode 100644 index c5627288bca3..000000000000 --- a/include/net/irda/ircomm_ttp.h +++ /dev/null @@ -1,37 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_ttp.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Wed Jun 9 10:06:07 1999 - * Modified at: Fri Aug 13 07:32:22 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_TTP_H -#define IRCOMM_TTP_H - -#include - -int ircomm_open_tsap(struct ircomm_cb *self); - -#endif - diff --git a/include/net/irda/ircomm_tty.h b/include/net/irda/ircomm_tty.h deleted file mode 100644 index 8d4f588974bc..000000000000 --- a/include/net/irda/ircomm_tty.h +++ /dev/null @@ -1,121 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_tty.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Jun 6 23:24:22 1999 - * Modified at: Fri Jan 28 13:16:57 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_TTY_H -#define IRCOMM_TTY_H - -#include -#include -#include -#include /* struct tty_struct */ - -#include -#include -#include - -#define IRCOMM_TTY_PORTS 32 -#define IRCOMM_TTY_MAGIC 0x3432 -#define IRCOMM_TTY_MAJOR 161 -#define IRCOMM_TTY_MINOR 0 - -/* This is used as an initial value to max_header_size before the proper - * value is filled in (5 for ttp, 4 for lmp). This allow us to detect - * the state of the underlying connection. - Jean II */ -#define IRCOMM_TTY_HDR_UNINITIALISED 16 -/* Same for payload size. See qos.c for the smallest max data size */ -#define IRCOMM_TTY_DATA_UNINITIALISED (64 - IRCOMM_TTY_HDR_UNINITIALISED) - -/* - * IrCOMM TTY driver state - */ -struct ircomm_tty_cb { - irda_queue_t queue; /* Must be first */ - struct tty_port port; - magic_t magic; - - int state; /* Connect state */ - - struct ircomm_cb *ircomm; /* IrCOMM layer instance */ - - struct sk_buff *tx_skb; /* Transmit buffer */ - struct sk_buff *ctrl_skb; /* Control data buffer */ - - /* Parameters */ - struct ircomm_params settings; - - __u8 service_type; /* The service that we support */ - int client; /* True if we are a client */ - LOCAL_FLOW flow; /* IrTTP flow status */ - - int line; - - __u8 dlsap_sel; - __u8 slsap_sel; - - __u32 saddr; - __u32 daddr; - - __u32 max_data_size; /* Max data we can transmit in one packet */ - __u32 max_header_size; /* The amount of header space we must reserve */ - __u32 tx_data_size; /* Max data size of current tx_skb */ - - struct iriap_cb *iriap; /* Instance used for querying remote IAS */ - struct ias_object* obj; - void *skey; - void *ckey; - - struct timer_list watchdog_timer; - struct work_struct tqueue; - - /* Protect concurent access to : - * o self->ctrl_skb - * o self->tx_skb - * Maybe other things may gain to be protected as well... - * Jean II */ - spinlock_t spinlock; -}; - -void ircomm_tty_start(struct tty_struct *tty); -void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self); - -int ircomm_tty_tiocmget(struct tty_struct *tty); -int ircomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, - unsigned int clear); -int ircomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, - unsigned long arg); -void ircomm_tty_set_termios(struct tty_struct *tty, - struct ktermios *old_termios); - -#endif - - - - - - - diff --git a/include/net/irda/ircomm_tty_attach.h b/include/net/irda/ircomm_tty_attach.h deleted file mode 100644 index 20dcbdf258cf..000000000000 --- a/include/net/irda/ircomm_tty_attach.h +++ /dev/null @@ -1,92 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_tty_attach.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Wed Jun 9 15:55:18 1999 - * Modified at: Fri Dec 10 21:04:55 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_TTY_ATTACH_H -#define IRCOMM_TTY_ATTACH_H - -#include - -typedef enum { - IRCOMM_TTY_IDLE, - IRCOMM_TTY_SEARCH, - IRCOMM_TTY_QUERY_PARAMETERS, - IRCOMM_TTY_QUERY_LSAP_SEL, - IRCOMM_TTY_SETUP, - IRCOMM_TTY_READY, -} IRCOMM_TTY_STATE; - -/* IrCOMM TTY Events */ -typedef enum { - IRCOMM_TTY_ATTACH_CABLE, - IRCOMM_TTY_DETACH_CABLE, - IRCOMM_TTY_DATA_REQUEST, - IRCOMM_TTY_DATA_INDICATION, - IRCOMM_TTY_DISCOVERY_REQUEST, - IRCOMM_TTY_DISCOVERY_INDICATION, - IRCOMM_TTY_CONNECT_CONFIRM, - IRCOMM_TTY_CONNECT_INDICATION, - IRCOMM_TTY_DISCONNECT_REQUEST, - IRCOMM_TTY_DISCONNECT_INDICATION, - IRCOMM_TTY_WD_TIMER_EXPIRED, - IRCOMM_TTY_GOT_PARAMETERS, - IRCOMM_TTY_GOT_LSAPSEL, -} IRCOMM_TTY_EVENT; - -/* Used for passing information through the state-machine */ -struct ircomm_tty_info { - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - __u8 dlsap_sel; -}; - -extern const char *const ircomm_state[]; -extern const char *const ircomm_tty_state[]; - -int ircomm_tty_do_event(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event, - struct sk_buff *skb, struct ircomm_tty_info *info); - - -int ircomm_tty_attach_cable(struct ircomm_tty_cb *self); -void ircomm_tty_detach_cable(struct ircomm_tty_cb *self); -void ircomm_tty_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -void ircomm_tty_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb); -void ircomm_tty_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -int ircomm_tty_send_initial_parameters(struct ircomm_tty_cb *self); -void ircomm_tty_link_established(struct ircomm_tty_cb *self); - -#endif /* IRCOMM_TTY_ATTACH_H */ diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h deleted file mode 100644 index 92c8fb575213..000000000000 --- a/include/net/irda/irda.h +++ /dev/null @@ -1,115 +0,0 @@ -/********************************************************************* - * - * Filename: irda.h - * Version: 1.0 - * Description: IrDA common include file for kernel internal use - * Status: Stable - * Author: Dag Brattli - * Created at: Tue Dec 9 21:13:12 1997 - * Modified at: Fri Jan 28 13:16:32 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef NET_IRDA_H -#define NET_IRDA_H - -#include /* struct sk_buff */ -#include -#include /* sa_family_t in */ -#include - -typedef __u32 magic_t; - -#ifndef TRUE -#define TRUE 1 -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -/* Hack to do small backoff when setting media busy in IrLAP */ -#ifndef SMALL -#define SMALL 5 -#endif - -#ifndef IRDA_MIN /* Lets not mix this MIN with other header files */ -#define IRDA_MIN(a, b) (((a) < (b)) ? (a) : (b)) -#endif - -#ifndef IRDA_ALIGN -# define IRDA_ALIGN __attribute__((aligned)) -#endif - -#ifdef CONFIG_IRDA_DEBUG -#define IRDA_ASSERT(expr, func) \ -do { if(!(expr)) { \ - printk( "Assertion failed! %s:%s:%d %s\n", \ - __FILE__,__func__,__LINE__,(#expr) ); \ - func } } while (0) -#define IRDA_ASSERT_LABEL(label) label -#else -#define IRDA_ASSERT(expr, func) do { (void)(expr); } while (0) -#define IRDA_ASSERT_LABEL(label) -#endif /* CONFIG_IRDA_DEBUG */ - -/* - * Magic numbers used by Linux-IrDA. Random numbers which must be unique to - * give the best protection - */ - -#define IRTTY_MAGIC 0x2357 -#define LAP_MAGIC 0x1357 -#define LMP_MAGIC 0x4321 -#define LMP_LSAP_MAGIC 0x69333 -#define LMP_LAP_MAGIC 0x3432 -#define IRDA_DEVICE_MAGIC 0x63454 -#define IAS_MAGIC 0x007 -#define TTP_MAGIC 0x241169 -#define TTP_TSAP_MAGIC 0x4345 -#define IROBEX_MAGIC 0x341324 -#define HB_MAGIC 0x64534 -#define IRLAN_MAGIC 0x754 -#define IAS_OBJECT_MAGIC 0x34234 -#define IAS_ATTRIB_MAGIC 0x45232 -#define IRDA_TASK_MAGIC 0x38423 - -#define IAS_DEVICE_ID 0x0000 /* Defined by IrDA, IrLMP section 4.1 (page 68) */ -#define IAS_PNP_ID 0xd342 -#define IAS_OBEX_ID 0x34323 -#define IAS_IRLAN_ID 0x34234 -#define IAS_IRCOMM_ID 0x2343 -#define IAS_IRLPT_ID 0x9876 - -struct net_device; -struct packet_type; - -void irda_proc_register(void); -void irda_proc_unregister(void); - -int irda_sysctl_register(void); -void irda_sysctl_unregister(void); - -int irsock_init(void); -void irsock_cleanup(void); - -int irda_nl_register(void); -void irda_nl_unregister(void); - -int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype, struct net_device *orig_dev); - -#endif /* NET_IRDA_H */ diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h deleted file mode 100644 index 664bf8178412..000000000000 --- a/include/net/irda/irda_device.h +++ /dev/null @@ -1,285 +0,0 @@ -/********************************************************************* - * - * Filename: irda_device.h - * Version: 0.9 - * Description: Contains various declarations used by the drivers - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Apr 14 12:41:42 1998 - * Modified at: Mon Mar 20 09:08:57 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 1998 Thomas Davis, , - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -/* - * This header contains all the IrDA definitions a driver really - * needs, and therefore the driver should not need to include - * any other IrDA headers - Jean II - */ - -#ifndef IRDA_DEVICE_H -#define IRDA_DEVICE_H - -#include -#include -#include -#include /* struct sk_buff */ -#include -#include - -#include -#include -#include /* struct qos_info */ -#include /* irda_queue_t */ - -/* A few forward declarations (to make compiler happy) */ -struct irlap_cb; - -/* Some non-standard interface flags (should not conflict with any in if.h) */ -#define IFF_SIR 0x0001 /* Supports SIR speeds */ -#define IFF_MIR 0x0002 /* Supports MIR speeds */ -#define IFF_FIR 0x0004 /* Supports FIR speeds */ -#define IFF_VFIR 0x0008 /* Supports VFIR speeds */ -#define IFF_PIO 0x0010 /* Supports PIO transfer of data */ -#define IFF_DMA 0x0020 /* Supports DMA transfer of data */ -#define IFF_SHM 0x0040 /* Supports shared memory data transfers */ -#define IFF_DONGLE 0x0080 /* Interface has a dongle attached */ -#define IFF_AIR 0x0100 /* Supports Advanced IR (AIR) standards */ - -#define IO_XMIT 0x01 -#define IO_RECV 0x02 - -typedef enum { - IRDA_IRLAP, /* IrDA mode, and deliver to IrLAP */ - IRDA_RAW, /* IrDA mode */ - SHARP_ASK, - TV_REMOTE, /* Also known as Consumer Electronics IR */ -} INFRARED_MODE; - -typedef enum { - IRDA_TASK_INIT, /* All tasks are initialized with this state */ - IRDA_TASK_DONE, /* Signals that the task is finished */ - IRDA_TASK_WAIT, - IRDA_TASK_WAIT1, - IRDA_TASK_WAIT2, - IRDA_TASK_WAIT3, - IRDA_TASK_CHILD_INIT, /* Initializing child task */ - IRDA_TASK_CHILD_WAIT, /* Waiting for child task to finish */ - IRDA_TASK_CHILD_DONE /* Child task is finished */ -} IRDA_TASK_STATE; - -struct irda_task; -typedef int (*IRDA_TASK_CALLBACK) (struct irda_task *task); - -struct irda_task { - irda_queue_t q; - magic_t magic; - - IRDA_TASK_STATE state; - IRDA_TASK_CALLBACK function; - IRDA_TASK_CALLBACK finished; - - struct irda_task *parent; - struct timer_list timer; - - void *instance; /* Instance being called */ - void *param; /* Parameter to be used by instance */ -}; - -/* Dongle info */ -struct dongle_reg; -typedef struct { - struct dongle_reg *issue; /* Registration info */ - struct net_device *dev; /* Device we are attached to */ - struct irda_task *speed_task; /* Task handling speed change */ - struct irda_task *reset_task; /* Task handling reset */ - __u32 speed; /* Current speed */ - - /* Callbacks to the IrDA device driver */ - int (*set_mode)(struct net_device *, int mode); - int (*read)(struct net_device *dev, __u8 *buf, int len); - int (*write)(struct net_device *dev, __u8 *buf, int len); - int (*set_dtr_rts)(struct net_device *dev, int dtr, int rts); -} dongle_t; - -/* Dongle registration info */ -struct dongle_reg { - irda_queue_t q; /* Must be first */ - IRDA_DONGLE type; - - void (*open)(dongle_t *dongle, struct qos_info *qos); - void (*close)(dongle_t *dongle); - int (*reset)(struct irda_task *task); - int (*change_speed)(struct irda_task *task); - struct module *owner; -}; - -/* - * Per-packet information we need to hide inside sk_buff - * (must not exceed 48 bytes, check with struct sk_buff) - * The default_qdisc_pad field is a temporary hack. - */ -struct irda_skb_cb { - unsigned int default_qdisc_pad; - magic_t magic; /* Be sure that we can trust the information */ - __u32 next_speed; /* The Speed to be set *after* this frame */ - __u16 mtt; /* Minimum turn around time */ - __u16 xbofs; /* Number of xbofs required, used by SIR mode */ - __u16 next_xbofs; /* Number of xbofs required *after* this frame */ - void *context; /* May be used by drivers */ - void (*destructor)(struct sk_buff *skb); /* Used for flow control */ - __u16 xbofs_delay; /* Number of xbofs used for generating the mtt */ - __u8 line; /* Used by IrCOMM in IrLPT mode */ -}; - -/* Chip specific info */ -typedef struct { - int cfg_base; /* Config register IO base */ - int sir_base; /* SIR IO base */ - int fir_base; /* FIR IO base */ - int mem_base; /* Shared memory base */ - int sir_ext; /* Length of SIR iobase */ - int fir_ext; /* Length of FIR iobase */ - int irq, irq2; /* Interrupts used */ - int dma, dma2; /* DMA channel(s) used */ - int fifo_size; /* FIFO size */ - int irqflags; /* interrupt flags (ie, IRQF_SHARED) */ - int direction; /* Link direction, used by some FIR drivers */ - int enabled; /* Powered on? */ - int suspended; /* Suspended by APM */ - __u32 speed; /* Currently used speed */ - __u32 new_speed; /* Speed we must change to when Tx is finished */ - int dongle_id; /* Dongle or transceiver currently used */ -} chipio_t; - -/* IO buffer specific info (inspired by struct sk_buff) */ -typedef struct { - int state; /* Receiving state (transmit state not used) */ - int in_frame; /* True if receiving frame */ - - __u8 *head; /* start of buffer */ - __u8 *data; /* start of data in buffer */ - - int len; /* current length of data */ - int truesize; /* total allocated size of buffer */ - __u16 fcs; - - struct sk_buff *skb; /* ZeroCopy Rx in async_unwrap_char() */ -} iobuff_t; - -/* Maximum SIR frame (skb) that we expect to receive *unwrapped*. - * Max LAP MTU (I field) is 2048 bytes max (IrLAP 1.1, chapt 6.6.5, p40). - * Max LAP header is 2 bytes (for now). - * Max CRC is 2 bytes at SIR, 4 bytes at FIR. - * Need 1 byte for skb_reserve() to align IP header for IrLAN. - * Add a few extra bytes just to be safe (buffer is power of two anyway) - * Jean II */ -#define IRDA_SKB_MAX_MTU 2064 -/* Maximum SIR frame that we expect to send, wrapped (i.e. with XBOFS - * and escaped characters on top of above). */ -#define IRDA_SIR_MAX_FRAME 4269 - -/* The SIR unwrapper async_unwrap_char() will use a Rx-copy-break mechanism - * when using the optional ZeroCopy Rx, where only small frames are memcpy - * to a smaller skb to save memory. This is the threshold under which copy - * will happen (and over which it won't happen). - * Some FIR drivers may use this #define as well... - * This is the same value as various Ethernet drivers. - Jean II */ -#define IRDA_RX_COPY_THRESHOLD 256 - -/* Function prototypes */ -int irda_device_init(void); -void irda_device_cleanup(void); - -/* IrLAP entry points used by the drivers. - * We declare them here to avoid the driver pulling a whole bunch stack - * headers they don't really need - Jean II */ -struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos, - const char *hw_name); -void irlap_close(struct irlap_cb *self); - -/* Interface to be uses by IrLAP */ -void irda_device_set_media_busy(struct net_device *dev, int status); -int irda_device_is_media_busy(struct net_device *dev); -int irda_device_is_receiving(struct net_device *dev); - -/* Interface for internal use */ -static inline int irda_device_txqueue_empty(const struct net_device *dev) -{ - return qdisc_all_tx_empty(dev); -} -int irda_device_set_raw_mode(struct net_device* self, int status); -struct net_device *alloc_irdadev(int sizeof_priv); - -void irda_setup_dma(int channel, dma_addr_t buffer, int count, int mode); - -/* - * Function irda_get_mtt (skb) - * - * Utility function for getting the minimum turnaround time out of - * the skb, where it has been hidden in the cb field. - */ -static inline __u16 irda_get_mtt(const struct sk_buff *skb) -{ - const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb; - return (cb->magic == LAP_MAGIC) ? cb->mtt : 10000; -} - -/* - * Function irda_get_next_speed (skb) - * - * Extract the speed that should be set *after* this frame from the skb - * - * Note : return -1 for user space frames - */ -static inline __u32 irda_get_next_speed(const struct sk_buff *skb) -{ - const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb; - return (cb->magic == LAP_MAGIC) ? cb->next_speed : -1; -} - -/* - * Function irda_get_next_xbofs (skb) - * - * Extract the xbofs that should be set for this frame from the skb - * - * Note : default to 10 for user space frames - */ -static inline __u16 irda_get_xbofs(const struct sk_buff *skb) -{ - const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb; - return (cb->magic == LAP_MAGIC) ? cb->xbofs : 10; -} - -/* - * Function irda_get_next_xbofs (skb) - * - * Extract the xbofs that should be set *after* this frame from the skb - * - * Note : return -1 for user space frames - */ -static inline __u16 irda_get_next_xbofs(const struct sk_buff *skb) -{ - const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb; - return (cb->magic == LAP_MAGIC) ? cb->next_xbofs : -1; -} -#endif /* IRDA_DEVICE_H */ - - diff --git a/include/net/irda/iriap.h b/include/net/irda/iriap.h deleted file mode 100644 index fcc896491a95..000000000000 --- a/include/net/irda/iriap.h +++ /dev/null @@ -1,108 +0,0 @@ -/********************************************************************* - * - * Filename: iriap.h - * Version: 0.5 - * Description: Information Access Protocol (IAP) - * Status: Experimental. - * Author: Dag Brattli - * Created at: Thu Aug 21 00:02:07 1997 - * Modified at: Sat Dec 25 16:42:09 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997-1999 Dag Brattli , - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRIAP_H -#define IRIAP_H - -#include -#include - -#include -#include -#include /* irda_queue_t */ -#include /* struct timer_list */ - -#define IAP_LST 0x80 -#define IAP_ACK 0x40 - -#define IAS_SERVER 0 -#define IAS_CLIENT 1 - -/* IrIAP Op-codes */ -#define GET_INFO_BASE 0x01 -#define GET_OBJECTS 0x02 -#define GET_VALUE 0x03 -#define GET_VALUE_BY_CLASS 0x04 -#define GET_OBJECT_INFO 0x05 -#define GET_ATTRIB_NAMES 0x06 - -#define IAS_SUCCESS 0 -#define IAS_CLASS_UNKNOWN 1 -#define IAS_ATTRIB_UNKNOWN 2 -#define IAS_DISCONNECT 10 - -typedef void (*CONFIRM_CALLBACK)(int result, __u16 obj_id, - struct ias_value *value, void *priv); - -struct iriap_cb { - irda_queue_t q; /* Must be first */ - magic_t magic; /* Magic cookie */ - - int mode; /* Client or server */ - - __u32 saddr; - __u32 daddr; - __u8 operation; - - struct sk_buff *request_skb; - struct lsap_cb *lsap; - __u8 slsap_sel; - - /* Client states */ - IRIAP_STATE client_state; - IRIAP_STATE call_state; - - /* Server states */ - IRIAP_STATE server_state; - IRIAP_STATE r_connect_state; - - CONFIRM_CALLBACK confirm; - void *priv; /* Used to identify client */ - - __u8 max_header_size; - __u32 max_data_size; - - struct timer_list watchdog_timer; -}; - -int iriap_init(void); -void iriap_cleanup(void); - -struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv, - CONFIRM_CALLBACK callback); -void iriap_close(struct iriap_cb *self); - -int iriap_getvaluebyclass_request(struct iriap_cb *self, - __u32 saddr, __u32 daddr, - char *name, char *attr); -void iriap_connect_request(struct iriap_cb *self); -void iriap_send_ack( struct iriap_cb *self); -void iriap_call_indication(struct iriap_cb *self, struct sk_buff *skb); - -void iriap_register_server(void); - -#endif - - diff --git a/include/net/irda/iriap_event.h b/include/net/irda/iriap_event.h deleted file mode 100644 index 89747f06d9eb..000000000000 --- a/include/net/irda/iriap_event.h +++ /dev/null @@ -1,85 +0,0 @@ -/********************************************************************* - * - * Filename: iriap_event.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Sun Oct 31 22:02:54 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRIAP_FSM_H -#define IRIAP_FSM_H - -/* Forward because of circular include dependecies */ -struct iriap_cb; - -/* IrIAP states */ -typedef enum { - /* Client */ - S_DISCONNECT, - S_CONNECTING, - S_CALL, - - /* S-Call */ - S_MAKE_CALL, - S_CALLING, - S_OUTSTANDING, - S_REPLYING, - S_WAIT_FOR_CALL, - S_WAIT_ACTIVE, - - /* Server */ - R_DISCONNECT, - R_CALL, - - /* R-Connect */ - R_WAITING, - R_WAIT_ACTIVE, - R_RECEIVING, - R_EXECUTE, - R_RETURNING, -} IRIAP_STATE; - -typedef enum { - IAP_CALL_REQUEST, - IAP_CALL_REQUEST_GVBC, - IAP_CALL_RESPONSE, - IAP_RECV_F_LST, - IAP_LM_DISCONNECT_INDICATION, - IAP_LM_CONNECT_INDICATION, - IAP_LM_CONNECT_CONFIRM, -} IRIAP_EVENT; - -void iriap_next_client_state (struct iriap_cb *self, IRIAP_STATE state); -void iriap_next_call_state (struct iriap_cb *self, IRIAP_STATE state); -void iriap_next_server_state (struct iriap_cb *self, IRIAP_STATE state); -void iriap_next_r_connect_state(struct iriap_cb *self, IRIAP_STATE state); - - -void iriap_do_client_event(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -void iriap_do_call_event (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); - -void iriap_do_server_event (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -void iriap_do_r_connect_event(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); - -#endif /* IRIAP_FSM_H */ - diff --git a/include/net/irda/irias_object.h b/include/net/irda/irias_object.h deleted file mode 100644 index 83f78081799c..000000000000 --- a/include/net/irda/irias_object.h +++ /dev/null @@ -1,108 +0,0 @@ -/********************************************************************* - * - * Filename: irias_object.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Thu Oct 1 22:49:50 1998 - * Modified at: Wed Dec 15 11:20:57 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef LM_IAS_OBJECT_H -#define LM_IAS_OBJECT_H - -#include -#include - -/* LM-IAS Attribute types */ -#define IAS_MISSING 0 -#define IAS_INTEGER 1 -#define IAS_OCT_SEQ 2 -#define IAS_STRING 3 - -/* Object ownership of attributes (user or kernel) */ -#define IAS_KERNEL_ATTR 0 -#define IAS_USER_ATTR 1 - -/* - * LM-IAS Object - */ -struct ias_object { - irda_queue_t q; /* Must be first! */ - magic_t magic; - - char *name; - int id; - hashbin_t *attribs; -}; - -/* - * Values used by LM-IAS attributes - */ -struct ias_value { - __u8 type; /* Value description */ - __u8 owner; /* Managed from user/kernel space */ - int charset; /* Only used by string type */ - int len; - - /* Value */ - union { - int integer; - char *string; - __u8 *oct_seq; - } t; -}; - -/* - * Attributes used by LM-IAS objects - */ -struct ias_attrib { - irda_queue_t q; /* Must be first! */ - int magic; - - char *name; /* Attribute name */ - struct ias_value *value; /* Attribute value */ -}; - -struct ias_object *irias_new_object(char *name, int id); -void irias_insert_object(struct ias_object *obj); -int irias_delete_object(struct ias_object *obj); -int irias_delete_attrib(struct ias_object *obj, struct ias_attrib *attrib, - int cleanobject); -void __irias_delete_object(struct ias_object *obj); - -void irias_add_integer_attrib(struct ias_object *obj, char *name, int value, - int user); -void irias_add_string_attrib(struct ias_object *obj, char *name, char *value, - int user); -void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets, - int len, int user); -int irias_object_change_attribute(char *obj_name, char *attrib_name, - struct ias_value *new_value); -struct ias_object *irias_find_object(char *name); -struct ias_attrib *irias_find_attrib(struct ias_object *obj, char *name); - -struct ias_value *irias_new_string_value(char *string); -struct ias_value *irias_new_integer_value(int integer); -struct ias_value *irias_new_octseq_value(__u8 *octseq , int len); -struct ias_value *irias_new_missing_value(void); -void irias_delete_value(struct ias_value *value); - -extern struct ias_value irias_missing; -extern hashbin_t *irias_objects; - -#endif diff --git a/include/net/irda/irlan_client.h b/include/net/irda/irlan_client.h deleted file mode 100644 index fa8455eda280..000000000000 --- a/include/net/irda/irlan_client.h +++ /dev/null @@ -1,42 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_client.h - * Version: 0.3 - * Description: IrDA LAN access layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Thu Apr 22 14:13:34 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998 Dag Brattli , All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_CLIENT_H -#define IRLAN_CLIENT_H - -#include -#include -#include -#include - -#include -#include - -void irlan_client_discovery_indication(discinfo_t *, DISCOVERY_MODE, void *); -void irlan_client_wakeup(struct irlan_cb *self, __u32 saddr, __u32 daddr); - -void irlan_client_parse_response(struct irlan_cb *self, struct sk_buff *skb); -void irlan_client_get_value_confirm(int result, __u16 obj_id, - struct ias_value *value, void *priv); -#endif diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h deleted file mode 100644 index 550c2d6ec7ff..000000000000 --- a/include/net/irda/irlan_common.h +++ /dev/null @@ -1,230 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_common.h - * Version: 0.8 - * Description: IrDA LAN access layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Sun Oct 31 19:41:24 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_H -#define IRLAN_H - -#include /* for HZ */ - -#include -#include -#include -#include -#include - -#include - -#define IRLAN_MTU 1518 -#define IRLAN_TIMEOUT 10*HZ /* 10 seconds */ - -/* Command packet types */ -#define CMD_GET_PROVIDER_INFO 0 -#define CMD_GET_MEDIA_CHAR 1 -#define CMD_OPEN_DATA_CHANNEL 2 -#define CMD_CLOSE_DATA_CHAN 3 -#define CMD_RECONNECT_DATA_CHAN 4 -#define CMD_FILTER_OPERATION 5 - -/* Some responses */ -#define RSP_SUCCESS 0 -#define RSP_INSUFFICIENT_RESOURCES 1 -#define RSP_INVALID_COMMAND_FORMAT 2 -#define RSP_COMMAND_NOT_SUPPORTED 3 -#define RSP_PARAM_NOT_SUPPORTED 4 -#define RSP_VALUE_NOT_SUPPORTED 5 -#define RSP_NOT_OPEN 6 -#define RSP_AUTHENTICATION_REQUIRED 7 -#define RSP_INVALID_PASSWORD 8 -#define RSP_PROTOCOL_ERROR 9 -#define RSP_ASYNCHRONOUS_ERROR 255 - -/* Media types */ -#define MEDIA_802_3 1 -#define MEDIA_802_5 2 - -/* Filter parameters */ -#define DATA_CHAN 1 -#define FILTER_TYPE 2 -#define FILTER_MODE 3 - -/* Filter types */ -#define IRLAN_DIRECTED 0x01 -#define IRLAN_FUNCTIONAL 0x02 -#define IRLAN_GROUP 0x04 -#define IRLAN_MAC_FRAME 0x08 -#define IRLAN_MULTICAST 0x10 -#define IRLAN_BROADCAST 0x20 -#define IRLAN_IPX_SOCKET 0x40 - -/* Filter modes */ -#define ALL 1 -#define FILTER 2 -#define NONE 3 - -/* Filter operations */ -#define GET 1 -#define CLEAR 2 -#define ADD 3 -#define REMOVE 4 -#define DYNAMIC 5 - -/* Access types */ -#define ACCESS_DIRECT 1 -#define ACCESS_PEER 2 -#define ACCESS_HOSTED 3 - -#define IRLAN_BYTE 0 -#define IRLAN_SHORT 1 -#define IRLAN_ARRAY 2 - -/* IrLAN sits on top if IrTTP */ -#define IRLAN_MAX_HEADER (TTP_HEADER+LMP_HEADER) -/* 1 byte for the command code and 1 byte for the parameter count */ -#define IRLAN_CMD_HEADER 2 - -#define IRLAN_STRING_PARAMETER_LEN(name, value) (1 + strlen((name)) + 2 \ - + strlen ((value))) -#define IRLAN_BYTE_PARAMETER_LEN(name) (1 + strlen((name)) + 2 + 1) -#define IRLAN_SHORT_PARAMETER_LEN(name) (1 + strlen((name)) + 2 + 2) - -/* - * IrLAN client - */ -struct irlan_client_cb { - int state; - - int open_retries; - - struct tsap_cb *tsap_ctrl; - __u32 max_sdu_size; - __u8 max_header_size; - - int access_type; /* Access type of provider */ - __u8 reconnect_key[255]; - __u8 key_len; - - __u16 recv_arb_val; - __u16 max_frame; - int filter_type; - - int unicast_open; - int broadcast_open; - - int tx_busy; - struct sk_buff_head txq; /* Transmit control queue */ - - struct iriap_cb *iriap; - - struct timer_list kick_timer; -}; - -/* - * IrLAN provider - */ -struct irlan_provider_cb { - int state; - - struct tsap_cb *tsap_ctrl; - __u32 max_sdu_size; - __u8 max_header_size; - - /* - * Store some values here which are used by the provider to parse - * the filter operations - */ - int data_chan; - int filter_type; - int filter_mode; - int filter_operation; - int filter_entry; - int access_type; /* Access type */ - __u16 send_arb_val; - - __u8 mac_address[ETH_ALEN]; /* Generated MAC address for peer device */ -}; - -/* - * IrLAN control block - */ -struct irlan_cb { - int magic; - struct list_head dev_list; - struct net_device *dev; /* Ethernet device structure*/ - - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - int disconnect_reason; /* Why we got disconnected */ - - int media; /* Media type */ - __u8 version[2]; /* IrLAN version */ - - struct tsap_cb *tsap_data; /* Data TSAP */ - - int use_udata; /* Use Unit Data transfers */ - - __u8 stsap_sel_data; /* Source data TSAP selector */ - __u8 dtsap_sel_data; /* Destination data TSAP selector */ - __u8 dtsap_sel_ctrl; /* Destination ctrl TSAP selector */ - - struct irlan_client_cb client; /* Client specific fields */ - struct irlan_provider_cb provider; /* Provider specific fields */ - - __u32 max_sdu_size; - __u8 max_header_size; - - wait_queue_head_t open_wait; - struct timer_list watchdog_timer; -}; - -void irlan_close(struct irlan_cb *self); -void irlan_close_tsaps(struct irlan_cb *self); - -int irlan_register_netdev(struct irlan_cb *self); -void irlan_ias_register(struct irlan_cb *self, __u8 tsap_sel); -void irlan_start_watchdog_timer(struct irlan_cb *self, int timeout); - -void irlan_open_data_tsap(struct irlan_cb *self); - -int irlan_run_ctrl_tx_queue(struct irlan_cb *self); - -struct irlan_cb *irlan_get_any(void); -void irlan_get_provider_info(struct irlan_cb *self); -void irlan_get_media_char(struct irlan_cb *self); -void irlan_open_data_channel(struct irlan_cb *self); -void irlan_close_data_channel(struct irlan_cb *self); -void irlan_set_multicast_filter(struct irlan_cb *self, int status); -void irlan_set_broadcast_filter(struct irlan_cb *self, int status); - -int irlan_insert_byte_param(struct sk_buff *skb, char *param, __u8 value); -int irlan_insert_short_param(struct sk_buff *skb, char *param, __u16 value); -int irlan_insert_string_param(struct sk_buff *skb, char *param, char *value); -int irlan_insert_array_param(struct sk_buff *skb, char *name, __u8 *value, - __u16 value_len); - -int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len); - -#endif - - diff --git a/include/net/irda/irlan_eth.h b/include/net/irda/irlan_eth.h deleted file mode 100644 index de5c81691f33..000000000000 --- a/include/net/irda/irlan_eth.h +++ /dev/null @@ -1,32 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_eth.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Thu Oct 15 08:36:58 1998 - * Modified at: Fri May 14 23:29:00 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_ETH_H -#define IRLAN_ETH_H - -struct net_device *alloc_irlandev(const char *name); -int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb); - -void irlan_eth_flow_indication( void *instance, void *sap, LOCAL_FLOW flow); -#endif diff --git a/include/net/irda/irlan_event.h b/include/net/irda/irlan_event.h deleted file mode 100644 index 018b5a77e610..000000000000 --- a/include/net/irda/irlan_event.h +++ /dev/null @@ -1,81 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_event.h - * Version: - * Description: LAN access - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Tue Feb 2 09:45:17 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997 Dag Brattli , All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_EVENT_H -#define IRLAN_EVENT_H - -#include -#include - -#include - -typedef enum { - IRLAN_IDLE, - IRLAN_QUERY, - IRLAN_CONN, - IRLAN_INFO, - IRLAN_MEDIA, - IRLAN_OPEN, - IRLAN_WAIT, - IRLAN_ARB, - IRLAN_DATA, - IRLAN_CLOSE, - IRLAN_SYNC -} IRLAN_STATE; - -typedef enum { - IRLAN_DISCOVERY_INDICATION, - IRLAN_IAS_PROVIDER_AVAIL, - IRLAN_IAS_PROVIDER_NOT_AVAIL, - IRLAN_LAP_DISCONNECT, - IRLAN_LMP_DISCONNECT, - IRLAN_CONNECT_COMPLETE, - IRLAN_DATA_INDICATION, - IRLAN_DATA_CONNECT_INDICATION, - IRLAN_RETRY_CONNECT, - - IRLAN_CONNECT_INDICATION, - IRLAN_GET_INFO_CMD, - IRLAN_GET_MEDIA_CMD, - IRLAN_OPEN_DATA_CMD, - IRLAN_FILTER_CONFIG_CMD, - - IRLAN_CHECK_CON_ARB, - IRLAN_PROVIDER_SIGNAL, - - IRLAN_WATCHDOG_TIMEOUT, -} IRLAN_EVENT; - -extern const char * const irlan_state[]; - -void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); - -void irlan_do_provider_event(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); - -void irlan_next_client_state(struct irlan_cb *self, IRLAN_STATE state); -void irlan_next_provider_state(struct irlan_cb *self, IRLAN_STATE state); - -#endif diff --git a/include/net/irda/irlan_filter.h b/include/net/irda/irlan_filter.h deleted file mode 100644 index a5a2539485bd..000000000000 --- a/include/net/irda/irlan_filter.h +++ /dev/null @@ -1,35 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_filter.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Fri Jan 29 15:24:08 1999 - * Modified at: Sun Feb 7 23:35:31 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_FILTER_H -#define IRLAN_FILTER_H - -void irlan_check_command_param(struct irlan_cb *self, char *param, - char *value); -void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb); -#ifdef CONFIG_PROC_FS -void irlan_print_filter(struct seq_file *seq, int filter_type); -#endif - -#endif /* IRLAN_FILTER_H */ diff --git a/include/net/irda/irlan_provider.h b/include/net/irda/irlan_provider.h deleted file mode 100644 index 92f3b0e1029b..000000000000 --- a/include/net/irda/irlan_provider.h +++ /dev/null @@ -1,52 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_provider.h - * Version: 0.1 - * Description: IrDA LAN access layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Sun May 9 12:26:11 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_SERVER_H -#define IRLAN_SERVER_H - -#include -#include -#include -#include - -#include - -void irlan_provider_ctrl_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb); - - -void irlan_provider_connect_response(struct irlan_cb *, struct tsap_cb *); - -int irlan_parse_open_data_cmd(struct irlan_cb *self, struct sk_buff *skb); -int irlan_provider_parse_command(struct irlan_cb *self, int cmd, - struct sk_buff *skb); - -void irlan_provider_send_reply(struct irlan_cb *self, int command, - int ret_code); -int irlan_provider_open_ctrl_tsap(struct irlan_cb *self); - -#endif - - diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h deleted file mode 100644 index 6f23e820618c..000000000000 --- a/include/net/irda/irlap.h +++ /dev/null @@ -1,311 +0,0 @@ -/********************************************************************* - * - * Filename: irlap.h - * Version: 0.8 - * Description: An IrDA LAP driver for Linux - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Fri Dec 10 13:21:17 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAP_H -#define IRLAP_H - -#include -#include -#include -#include - -#include /* irda_queue_t */ -#include /* struct qos_info */ -#include /* discovery_t */ -#include /* IRLAP_STATE, ... */ -#include /* struct notify_t */ - -#define CONFIG_IRDA_DYNAMIC_WINDOW 1 - -#define LAP_RELIABLE 1 -#define LAP_UNRELIABLE 0 - -#define LAP_ADDR_HEADER 1 /* IrLAP Address Header */ -#define LAP_CTRL_HEADER 1 /* IrLAP Control Header */ - -/* May be different when we get VFIR */ -#define LAP_MAX_HEADER (LAP_ADDR_HEADER + LAP_CTRL_HEADER) - -/* Each IrDA device gets a random 32 bits IRLAP device address */ -#define LAP_ALEN 4 - -#define BROADCAST 0xffffffff /* Broadcast device address */ -#define CBROADCAST 0xfe /* Connection broadcast address */ -#define XID_FORMAT 0x01 /* Discovery XID format */ - -/* Nobody seems to use this constant. */ -#define LAP_WINDOW_SIZE 8 -/* We keep the LAP queue very small to minimise the amount of buffering. - * this improve latency and reduce resource consumption. - * This work only because we have synchronous refilling of IrLAP through - * the flow control mechanism (via scheduler and IrTTP). - * 2 buffers is the minimum we can work with, one that we send while polling - * IrTTP, and another to know that we should not send the pf bit. - * Jean II */ -#define LAP_HIGH_THRESHOLD 2 -/* Some rare non TTP clients don't implement flow control, and - * so don't comply with the above limit (and neither with this one). - * For IAP and management, it doesn't matter, because they never transmit much. - *.For IrLPT, this should be fixed. - * - Jean II */ -#define LAP_MAX_QUEUE 10 -/* Please note that all IrDA management frames (LMP/TTP conn req/disc and - * IAS queries) fall in the second category and are sent to LAP even if TTP - * is stopped. This means that those frames will wait only a maximum of - * two (2) data frames before beeing sent on the "wire", which speed up - * new socket setup when the link is saturated. - * Same story for two sockets competing for the medium : if one saturates - * the LAP, when the other want to transmit it only has to wait for - * maximum three (3) packets (2 + one scheduling), which improve performance - * of delay sensitive applications. - * Jean II */ - -#define NR_EXPECTED 1 -#define NR_UNEXPECTED 0 -#define NR_INVALID -1 - -#define NS_EXPECTED 1 -#define NS_UNEXPECTED 0 -#define NS_INVALID -1 - -/* - * Meta information passed within the IrLAP state machine - */ -struct irlap_info { - __u8 caddr; /* Connection address */ - __u8 control; /* Frame type */ - __u8 cmd; - - __u32 saddr; - __u32 daddr; - - int pf; /* Poll/final bit set */ - - __u8 nr; /* Sequence number of next frame expected */ - __u8 ns; /* Sequence number of frame sent */ - - int S; /* Number of slots */ - int slot; /* Random chosen slot */ - int s; /* Current slot */ - - discovery_t *discovery; /* Discovery information */ -}; - -/* Main structure of IrLAP */ -struct irlap_cb { - irda_queue_t q; /* Must be first */ - magic_t magic; - - /* Device we are attached to */ - struct net_device *netdev; - char hw_name[2*IFNAMSIZ + 1]; - - /* Connection state */ - volatile IRLAP_STATE state; /* Current state */ - - /* Timers used by IrLAP */ - struct timer_list query_timer; - struct timer_list slot_timer; - struct timer_list discovery_timer; - struct timer_list final_timer; - struct timer_list poll_timer; - struct timer_list wd_timer; - struct timer_list backoff_timer; - - /* Media busy stuff */ - struct timer_list media_busy_timer; - int media_busy; - - /* Timeouts which will be different with different turn time */ - int slot_timeout; - int poll_timeout; - int final_timeout; - int wd_timeout; - - struct sk_buff_head txq; /* Frames to be transmitted */ - struct sk_buff_head txq_ultra; - - __u8 caddr; /* Connection address */ - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - - int retry_count; /* Times tried to establish connection */ - int add_wait; /* True if we are waiting for frame */ - - __u8 connect_pending; - __u8 disconnect_pending; - - /* To send a faster RR if tx queue empty */ -#ifdef CONFIG_IRDA_FAST_RR - int fast_RR_timeout; - int fast_RR; -#endif /* CONFIG_IRDA_FAST_RR */ - - int N1; /* N1 * F-timer = Negitiated link disconnect warning threshold */ - int N2; /* N2 * F-timer = Negitiated link disconnect time */ - int N3; /* Connection retry count */ - - int local_busy; - int remote_busy; - int xmitflag; - - __u8 vs; /* Next frame to be sent */ - __u8 vr; /* Next frame to be received */ - __u8 va; /* Last frame acked */ - int window; /* Nr of I-frames allowed to send */ - int window_size; /* Current negotiated window size */ - -#ifdef CONFIG_IRDA_DYNAMIC_WINDOW - __u32 line_capacity; /* Number of bytes allowed to send */ - __u32 bytes_left; /* Number of bytes still allowed to transmit */ -#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ - - struct sk_buff_head wx_list; - - __u8 ack_required; - - /* XID parameters */ - __u8 S; /* Number of slots */ - __u8 slot; /* Random chosen slot */ - __u8 s; /* Current slot */ - int frame_sent; /* Have we sent reply? */ - - hashbin_t *discovery_log; - discovery_t *discovery_cmd; - - __u32 speed; /* Link speed */ - - struct qos_info qos_tx; /* QoS requested by peer */ - struct qos_info qos_rx; /* QoS requested by self */ - struct qos_info *qos_dev; /* QoS supported by device */ - - notify_t notify; /* Callbacks to IrLMP */ - - int mtt_required; /* Minimum turnaround time required */ - int xbofs_delay; /* Nr of XBOF's used to MTT */ - int bofs_count; /* Negotiated extra BOFs */ - int next_bofs; /* Negotiated extra BOFs after next frame */ - - int mode; /* IrLAP mode (primary, secondary or monitor) */ -}; - -/* - * Function prototypes - */ -int irlap_init(void); -void irlap_cleanup(void); - -struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos, - const char *hw_name); -void irlap_close(struct irlap_cb *self); - -void irlap_connect_request(struct irlap_cb *self, __u32 daddr, - struct qos_info *qos, int sniff); -void irlap_connect_response(struct irlap_cb *self, struct sk_buff *skb); -void irlap_connect_indication(struct irlap_cb *self, struct sk_buff *skb); -void irlap_connect_confirm(struct irlap_cb *, struct sk_buff *skb); - -void irlap_data_indication(struct irlap_cb *, struct sk_buff *, int unreliable); -void irlap_data_request(struct irlap_cb *, struct sk_buff *, int unreliable); - -#ifdef CONFIG_IRDA_ULTRA -void irlap_unitdata_request(struct irlap_cb *, struct sk_buff *); -void irlap_unitdata_indication(struct irlap_cb *, struct sk_buff *); -#endif /* CONFIG_IRDA_ULTRA */ - -void irlap_disconnect_request(struct irlap_cb *); -void irlap_disconnect_indication(struct irlap_cb *, LAP_REASON reason); - -void irlap_status_indication(struct irlap_cb *, int quality_of_link); - -void irlap_test_request(__u8 *info, int len); - -void irlap_discovery_request(struct irlap_cb *, discovery_t *discovery); -void irlap_discovery_confirm(struct irlap_cb *, hashbin_t *discovery_log); -void irlap_discovery_indication(struct irlap_cb *, discovery_t *discovery); - -void irlap_reset_indication(struct irlap_cb *self); -void irlap_reset_confirm(void); - -void irlap_update_nr_received(struct irlap_cb *, int nr); -int irlap_validate_nr_received(struct irlap_cb *, int nr); -int irlap_validate_ns_received(struct irlap_cb *, int ns); - -int irlap_generate_rand_time_slot(int S, int s); -void irlap_initiate_connection_state(struct irlap_cb *); -void irlap_flush_all_queues(struct irlap_cb *); -void irlap_wait_min_turn_around(struct irlap_cb *, struct qos_info *); - -void irlap_apply_default_connection_parameters(struct irlap_cb *self); -void irlap_apply_connection_parameters(struct irlap_cb *self, int now); - -#define IRLAP_GET_HEADER_SIZE(self) (LAP_MAX_HEADER) -#define IRLAP_GET_TX_QUEUE_LEN(self) skb_queue_len(&self->txq) - -/* Return TRUE if the node is in primary mode (i.e. master) - * - Jean II */ -static inline int irlap_is_primary(struct irlap_cb *self) -{ - int ret; - switch(self->state) { - case LAP_XMIT_P: - case LAP_NRM_P: - ret = 1; - break; - case LAP_XMIT_S: - case LAP_NRM_S: - ret = 0; - break; - default: - ret = -1; - } - return ret; -} - -/* Clear a pending IrLAP disconnect. - Jean II */ -static inline void irlap_clear_disconnect(struct irlap_cb *self) -{ - self->disconnect_pending = FALSE; -} - -/* - * Function irlap_next_state (self, state) - * - * Switches state and provides debug information - * - */ -static inline void irlap_next_state(struct irlap_cb *self, IRLAP_STATE state) -{ - /* - if (!self || self->magic != LAP_MAGIC) - return; - - pr_debug("next LAP state = %s\n", irlap_state[state]); - */ - self->state = state; -} - -#endif diff --git a/include/net/irda/irlap_event.h b/include/net/irda/irlap_event.h deleted file mode 100644 index e4325fee1267..000000000000 --- a/include/net/irda/irlap_event.h +++ /dev/null @@ -1,129 +0,0 @@ -/********************************************************************* - * - * - * Filename: irlap_event.h - * Version: 0.1 - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sat Aug 16 00:59:29 1997 - * Modified at: Tue Dec 21 11:20:30 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRLAP_EVENT_H -#define IRLAP_EVENT_H - -#include - -/* A few forward declarations (to make compiler happy) */ -struct irlap_cb; -struct irlap_info; - -/* IrLAP States */ -typedef enum { - LAP_NDM, /* Normal disconnected mode */ - LAP_QUERY, - LAP_REPLY, - LAP_CONN, /* Connect indication */ - LAP_SETUP, /* Setting up connection */ - LAP_OFFLINE, /* A really boring state */ - LAP_XMIT_P, - LAP_PCLOSE, - LAP_NRM_P, /* Normal response mode as primary */ - LAP_RESET_WAIT, - LAP_RESET, - LAP_NRM_S, /* Normal response mode as secondary */ - LAP_XMIT_S, - LAP_SCLOSE, - LAP_RESET_CHECK, -} IRLAP_STATE; - -/* IrLAP Events */ -typedef enum { - /* Services events */ - DISCOVERY_REQUEST, - CONNECT_REQUEST, - CONNECT_RESPONSE, - DISCONNECT_REQUEST, - DATA_REQUEST, - RESET_REQUEST, - RESET_RESPONSE, - - /* Send events */ - SEND_I_CMD, - SEND_UI_FRAME, - - /* Receive events */ - RECV_DISCOVERY_XID_CMD, - RECV_DISCOVERY_XID_RSP, - RECV_SNRM_CMD, - RECV_TEST_CMD, - RECV_TEST_RSP, - RECV_UA_RSP, - RECV_DM_RSP, - RECV_RD_RSP, - RECV_I_CMD, - RECV_I_RSP, - RECV_UI_FRAME, - RECV_FRMR_RSP, - RECV_RR_CMD, - RECV_RR_RSP, - RECV_RNR_CMD, - RECV_RNR_RSP, - RECV_REJ_CMD, - RECV_REJ_RSP, - RECV_SREJ_CMD, - RECV_SREJ_RSP, - RECV_DISC_CMD, - - /* Timer events */ - SLOT_TIMER_EXPIRED, - QUERY_TIMER_EXPIRED, - FINAL_TIMER_EXPIRED, - POLL_TIMER_EXPIRED, - DISCOVERY_TIMER_EXPIRED, - WD_TIMER_EXPIRED, - BACKOFF_TIMER_EXPIRED, - MEDIA_BUSY_TIMER_EXPIRED, -} IRLAP_EVENT; - -/* - * Disconnect reason code - */ -typedef enum { /* FIXME check the two first reason codes */ - LAP_DISC_INDICATION=1, /* Received a disconnect request from peer */ - LAP_NO_RESPONSE, /* To many retransmits without response */ - LAP_RESET_INDICATION, /* To many retransmits, or invalid nr/ns */ - LAP_FOUND_NONE, /* No devices were discovered */ - LAP_MEDIA_BUSY, - LAP_PRIMARY_CONFLICT, -} LAP_REASON; - -extern const char *const irlap_state[]; - -void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -void irlap_print_event(IRLAP_EVENT event); - -int irlap_qos_negotiate(struct irlap_cb *self, struct sk_buff *skb); - -#endif diff --git a/include/net/irda/irlap_frame.h b/include/net/irda/irlap_frame.h deleted file mode 100644 index cbc12a926e5f..000000000000 --- a/include/net/irda/irlap_frame.h +++ /dev/null @@ -1,167 +0,0 @@ -/********************************************************************* - * - * Filename: irlap_frame.h - * Version: 0.9 - * Description: IrLAP frame declarations - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Aug 19 10:27:26 1997 - * Modified at: Sat Dec 25 21:07:26 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRLAP_FRAME_H -#define IRLAP_FRAME_H - -#include - -#include - -/* A few forward declarations (to make compiler happy) */ -struct irlap_cb; -struct discovery_t; - -/* Frame types and templates */ -#define INVALID 0xff - -/* Unnumbered (U) commands */ -#define SNRM_CMD 0x83 /* Set Normal Response Mode */ -#define DISC_CMD 0x43 /* Disconnect */ -#define XID_CMD 0x2f /* Exchange Station Identification */ -#define TEST_CMD 0xe3 /* Test */ - -/* Unnumbered responses */ -#define RNRM_RSP 0x83 /* Request Normal Response Mode */ -#define UA_RSP 0x63 /* Unnumbered Acknowledgement */ -#define FRMR_RSP 0x87 /* Frame Reject */ -#define DM_RSP 0x0f /* Disconnect Mode */ -#define RD_RSP 0x43 /* Request Disconnection */ -#define XID_RSP 0xaf /* Exchange Station Identification */ -#define TEST_RSP 0xe3 /* Test frame */ - -/* Supervisory (S) */ -#define RR 0x01 /* Receive Ready */ -#define REJ 0x09 /* Reject */ -#define RNR 0x05 /* Receive Not Ready */ -#define SREJ 0x0d /* Selective Reject */ - -/* Information (I) */ -#define I_FRAME 0x00 /* Information Format */ -#define UI_FRAME 0x03 /* Unnumbered Information */ - -#define CMD_FRAME 0x01 -#define RSP_FRAME 0x00 - -#define PF_BIT 0x10 /* Poll/final bit */ - -/* Some IrLAP field lengths */ -/* - * Only baud rate triplet is 4 bytes (PV can be 2 bytes). - * All others params (7) are 3 bytes, so that's 7*3 + 1*4 bytes. - */ -#define IRLAP_NEGOCIATION_PARAMS_LEN 25 -#define IRLAP_DISCOVERY_INFO_LEN 32 - -struct disc_frame { - __u8 caddr; /* Connection address */ - __u8 control; -} __packed; - -struct xid_frame { - __u8 caddr; /* Connection address */ - __u8 control; - __u8 ident; /* Should always be XID_FORMAT */ - __le32 saddr; /* Source device address */ - __le32 daddr; /* Destination device address */ - __u8 flags; /* Discovery flags */ - __u8 slotnr; - __u8 version; -} __packed; - -struct test_frame { - __u8 caddr; /* Connection address */ - __u8 control; - __le32 saddr; /* Source device address */ - __le32 daddr; /* Destination device address */ -} __packed; - -struct ua_frame { - __u8 caddr; - __u8 control; - __le32 saddr; /* Source device address */ - __le32 daddr; /* Dest device address */ -} __packed; - -struct dm_frame { - __u8 caddr; /* Connection address */ - __u8 control; -} __packed; - -struct rd_frame { - __u8 caddr; /* Connection address */ - __u8 control; -} __packed; - -struct rr_frame { - __u8 caddr; /* Connection address */ - __u8 control; -} __packed; - -struct i_frame { - __u8 caddr; - __u8 control; -} __packed; - -struct snrm_frame { - __u8 caddr; - __u8 control; - __le32 saddr; - __le32 daddr; - __u8 ncaddr; -} __packed; - -void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb); -void irlap_send_discovery_xid_frame(struct irlap_cb *, int S, __u8 s, - __u8 command, - struct discovery_t *discovery); -void irlap_send_snrm_frame(struct irlap_cb *, struct qos_info *); -void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr, - struct sk_buff *cmd); -void irlap_send_ua_response_frame(struct irlap_cb *, struct qos_info *); -void irlap_send_dm_frame(struct irlap_cb *self); -void irlap_send_rd_frame(struct irlap_cb *self); -void irlap_send_disc_frame(struct irlap_cb *self); -void irlap_send_rr_frame(struct irlap_cb *self, int command); - -void irlap_send_data_primary(struct irlap_cb *, struct sk_buff *); -void irlap_send_data_primary_poll(struct irlap_cb *, struct sk_buff *); -void irlap_send_data_secondary(struct irlap_cb *, struct sk_buff *); -void irlap_send_data_secondary_final(struct irlap_cb *, struct sk_buff *); -void irlap_resend_rejected_frames(struct irlap_cb *, int command); -void irlap_resend_rejected_frame(struct irlap_cb *self, int command); - -void irlap_send_ui_frame(struct irlap_cb *self, struct sk_buff *skb, - __u8 caddr, int command); - -int irlap_insert_qos_negotiation_params(struct irlap_cb *self, - struct sk_buff *skb); - -#endif diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h deleted file mode 100644 index f132924cc9da..000000000000 --- a/include/net/irda/irlmp.h +++ /dev/null @@ -1,295 +0,0 @@ -/********************************************************************* - * - * Filename: irlmp.h - * Version: 0.9 - * Description: IrDA Link Management Protocol (LMP) layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 17 20:54:32 1997 - * Modified at: Fri Dec 10 13:23:01 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLMP_H -#define IRLMP_H - -#include /* for HZ */ - -#include - -#include -#include -#include /* LAP_MAX_HEADER, ... */ -#include -#include -#include - -/* LSAP-SEL's */ -#define LSAP_MASK 0x7f -#define LSAP_IAS 0x00 -#define LSAP_ANY 0xff -#define LSAP_MAX 0x6f /* 0x70-0x7f are reserved */ -#define LSAP_CONNLESS 0x70 /* Connectionless LSAP, mostly used for Ultra */ - -#define DEV_ADDR_ANY 0xffffffff - -#define LMP_HEADER 2 /* Dest LSAP + Source LSAP */ -#define LMP_CONTROL_HEADER 4 /* LMP_HEADER + opcode + parameter */ -#define LMP_PID_HEADER 1 /* Used by Ultra */ -#define LMP_MAX_HEADER (LMP_CONTROL_HEADER+LAP_MAX_HEADER) - -#define LM_MAX_CONNECTIONS 10 - -#define LM_IDLE_TIMEOUT 2*HZ /* 2 seconds for now */ - -typedef enum { - S_PNP = 0, - S_PDA, - S_COMPUTER, - S_PRINTER, - S_MODEM, - S_FAX, - S_LAN, - S_TELEPHONY, - S_COMM, - S_OBEX, - S_ANY, - S_END, -} SERVICE; - -/* For selective discovery */ -typedef void (*DISCOVERY_CALLBACK1) (discinfo_t *, DISCOVERY_MODE, void *); -/* For expiry (the same) */ -typedef void (*DISCOVERY_CALLBACK2) (discinfo_t *, DISCOVERY_MODE, void *); - -typedef struct { - irda_queue_t queue; /* Must be first */ - - __u16_host_order hints; /* Hint bits */ -} irlmp_service_t; - -typedef struct { - irda_queue_t queue; /* Must be first */ - - __u16_host_order hint_mask; - - DISCOVERY_CALLBACK1 disco_callback; /* Selective discovery */ - DISCOVERY_CALLBACK2 expir_callback; /* Selective expiration */ - void *priv; /* Used to identify client */ -} irlmp_client_t; - -/* - * Information about each logical LSAP connection - */ -struct lsap_cb { - irda_queue_t queue; /* Must be first */ - magic_t magic; - - unsigned long connected; /* set_bit used on this */ - int persistent; - - __u8 slsap_sel; /* Source (this) LSAP address */ - __u8 dlsap_sel; /* Destination LSAP address (if connected) */ -#ifdef CONFIG_IRDA_ULTRA - __u8 pid; /* Used by connectionless LSAP */ -#endif /* CONFIG_IRDA_ULTRA */ - struct sk_buff *conn_skb; /* Store skb here while connecting */ - - struct timer_list watchdog_timer; - - LSAP_STATE lsap_state; /* Connection state */ - notify_t notify; /* Indication/Confirm entry points */ - struct qos_info qos; /* QoS for this connection */ - - struct lap_cb *lap; /* Pointer to LAP connection structure */ -}; - -/* - * Used for caching the last slsap->dlsap->handle mapping - * - * We don't need to keep/match the remote address in the cache because - * we are associated with a specific LAP (which implies it). - * Jean II - */ -typedef struct { - int valid; - - __u8 slsap_sel; - __u8 dlsap_sel; - struct lsap_cb *lsap; -} CACHE_ENTRY; - -/* - * Information about each registered IrLAP layer - */ -struct lap_cb { - irda_queue_t queue; /* Must be first */ - magic_t magic; - - int reason; /* LAP disconnect reason */ - - IRLMP_STATE lap_state; - - struct irlap_cb *irlap; /* Instance of IrLAP layer */ - hashbin_t *lsaps; /* LSAP associated with this link */ - struct lsap_cb *flow_next; /* Next lsap to be polled for Tx */ - - __u8 caddr; /* Connection address */ - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - - struct qos_info *qos; /* LAP QoS for this session */ - struct timer_list idle_timer; - -#ifdef CONFIG_IRDA_CACHE_LAST_LSAP - /* The lsap cache was moved from struct irlmp_cb to here because - * it must be associated with the specific LAP. Also, this - * improves performance. - Jean II */ - CACHE_ENTRY cache; /* Caching last slsap->dlsap->handle mapping */ -#endif -}; - -/* - * Main structure for IrLMP - */ -struct irlmp_cb { - magic_t magic; - - __u8 conflict_flag; - - discovery_t discovery_cmd; /* Discovery command to use by IrLAP */ - discovery_t discovery_rsp; /* Discovery response to use by IrLAP */ - - /* Last lsap picked automatically by irlmp_find_free_slsap() */ - int last_lsap_sel; - - struct timer_list discovery_timer; - - hashbin_t *links; /* IrLAP connection table */ - hashbin_t *unconnected_lsaps; - hashbin_t *clients; - hashbin_t *services; - - hashbin_t *cachelog; /* Current discovery log */ - - int running; - - __u16_host_order hints; /* Hint bits */ -}; - -/* Prototype declarations */ -int irlmp_init(void); -void irlmp_cleanup(void); -struct lsap_cb *irlmp_open_lsap(__u8 slsap, notify_t *notify, __u8 pid); -void irlmp_close_lsap( struct lsap_cb *self); - -__u16 irlmp_service_to_hint(int service); -void *irlmp_register_service(__u16 hints); -int irlmp_unregister_service(void *handle); -void *irlmp_register_client(__u16 hint_mask, DISCOVERY_CALLBACK1 disco_clb, - DISCOVERY_CALLBACK2 expir_clb, void *priv); -int irlmp_unregister_client(void *handle); -int irlmp_update_client(void *handle, __u16 hint_mask, - DISCOVERY_CALLBACK1 disco_clb, - DISCOVERY_CALLBACK2 expir_clb, void *priv); - -void irlmp_register_link(struct irlap_cb *, __u32 saddr, notify_t *); -void irlmp_unregister_link(__u32 saddr); - -int irlmp_connect_request(struct lsap_cb *, __u8 dlsap_sel, - __u32 saddr, __u32 daddr, - struct qos_info *, struct sk_buff *); -void irlmp_connect_indication(struct lsap_cb *self, struct sk_buff *skb); -int irlmp_connect_response(struct lsap_cb *, struct sk_buff *); -void irlmp_connect_confirm(struct lsap_cb *, struct sk_buff *); -struct lsap_cb *irlmp_dup(struct lsap_cb *self, void *instance); - -void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason, - struct sk_buff *userdata); -int irlmp_disconnect_request(struct lsap_cb *, struct sk_buff *userdata); - -void irlmp_discovery_confirm(hashbin_t *discovery_log, DISCOVERY_MODE mode); -void irlmp_discovery_request(int nslots); -discinfo_t *irlmp_get_discoveries(int *pn, __u16 mask, int nslots); -void irlmp_do_expiry(void); -void irlmp_do_discovery(int nslots); -discovery_t *irlmp_get_discovery_response(void); -void irlmp_discovery_expiry(discinfo_t *expiry, int number); - -int irlmp_data_request(struct lsap_cb *, struct sk_buff *); -void irlmp_data_indication(struct lsap_cb *, struct sk_buff *); - -int irlmp_udata_request(struct lsap_cb *, struct sk_buff *); -void irlmp_udata_indication(struct lsap_cb *, struct sk_buff *); - -#ifdef CONFIG_IRDA_ULTRA -int irlmp_connless_data_request(struct lsap_cb *, struct sk_buff *, __u8); -void irlmp_connless_data_indication(struct lsap_cb *, struct sk_buff *); -#endif /* CONFIG_IRDA_ULTRA */ - -void irlmp_status_indication(struct lap_cb *, LINK_STATUS link, LOCK_STATUS lock); -void irlmp_flow_indication(struct lap_cb *self, LOCAL_FLOW flow); - -LM_REASON irlmp_convert_lap_reason(LAP_REASON); - -static inline __u32 irlmp_get_saddr(const struct lsap_cb *self) -{ - return (self && self->lap) ? self->lap->saddr : 0; -} - -static inline __u32 irlmp_get_daddr(const struct lsap_cb *self) -{ - return (self && self->lap) ? self->lap->daddr : 0; -} - -const char *irlmp_reason_str(LM_REASON reason); - -extern int sysctl_discovery_timeout; -extern int sysctl_discovery_slots; -extern int sysctl_discovery; -extern int sysctl_lap_keepalive_time; /* in ms, default is LM_IDLE_TIMEOUT */ -extern struct irlmp_cb *irlmp; - -/* Check if LAP queue is full. - * Used by IrTTP for low control, see comments in irlap.h - Jean II */ -static inline int irlmp_lap_tx_queue_full(struct lsap_cb *self) -{ - if (self == NULL) - return 0; - if (self->lap == NULL) - return 0; - if (self->lap->irlap == NULL) - return 0; - - return IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD; -} - -/* After doing a irlmp_dup(), this get one of the two socket back into - * a state where it's waiting incoming connections. - * Note : this can be used *only* if the socket is not yet connected - * (i.e. NO irlmp_connect_response() done on this socket). - * - Jean II */ -static inline void irlmp_listen(struct lsap_cb *self) -{ - self->dlsap_sel = LSAP_ANY; - self->lap = NULL; - self->lsap_state = LSAP_DISCONNECTED; - /* Started when we received the LM_CONNECT_INDICATION */ - del_timer(&self->watchdog_timer); -} - -#endif diff --git a/include/net/irda/irlmp_event.h b/include/net/irda/irlmp_event.h deleted file mode 100644 index 9e4ec17a7449..000000000000 --- a/include/net/irda/irlmp_event.h +++ /dev/null @@ -1,98 +0,0 @@ -/********************************************************************* - * - * Filename: irlmp_event.h - * Version: 0.1 - * Description: IrDA-LMP event handling - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Thu Jul 8 12:18:54 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997, 1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLMP_EVENT_H -#define IRLMP_EVENT_H - -/* A few forward declarations (to make compiler happy) */ -struct irlmp_cb; -struct lsap_cb; -struct lap_cb; -struct discovery_t; - -/* LAP states */ -typedef enum { - /* IrLAP connection control states */ - LAP_STANDBY, /* No LAP connection */ - LAP_U_CONNECT, /* Starting LAP connection */ - LAP_ACTIVE, /* LAP connection is active */ -} IRLMP_STATE; - -/* LSAP connection control states */ -typedef enum { - LSAP_DISCONNECTED, /* No LSAP connection */ - LSAP_CONNECT, /* Connect indication from peer */ - LSAP_CONNECT_PEND, /* Connect request from service user */ - LSAP_DATA_TRANSFER_READY, /* LSAP connection established */ - LSAP_SETUP, /* Trying to set up LSAP connection */ - LSAP_SETUP_PEND, /* Request to start LAP connection */ -} LSAP_STATE; - -typedef enum { - /* LSAP events */ - LM_CONNECT_REQUEST, - LM_CONNECT_CONFIRM, - LM_CONNECT_RESPONSE, - LM_CONNECT_INDICATION, - - LM_DISCONNECT_INDICATION, - LM_DISCONNECT_REQUEST, - - LM_DATA_REQUEST, - LM_UDATA_REQUEST, - LM_DATA_INDICATION, - LM_UDATA_INDICATION, - - LM_WATCHDOG_TIMEOUT, - - /* IrLAP events */ - LM_LAP_CONNECT_REQUEST, - LM_LAP_CONNECT_INDICATION, - LM_LAP_CONNECT_CONFIRM, - LM_LAP_DISCONNECT_INDICATION, - LM_LAP_DISCONNECT_REQUEST, - LM_LAP_DISCOVERY_REQUEST, - LM_LAP_DISCOVERY_CONFIRM, - LM_LAP_IDLE_TIMEOUT, -} IRLMP_EVENT; - -extern const char *const irlmp_state[]; -extern const char *const irlsap_state[]; - -void irlmp_watchdog_timer_expired(void *data); -void irlmp_discovery_timer_expired(void *data); -void irlmp_idle_timer_expired(void *data); - -void irlmp_do_lap_event(struct lap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb); -int irlmp_do_lsap_event(struct lsap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb); - -#endif /* IRLMP_EVENT_H */ - - - - diff --git a/include/net/irda/irlmp_frame.h b/include/net/irda/irlmp_frame.h deleted file mode 100644 index 1906eb71422e..000000000000 --- a/include/net/irda/irlmp_frame.h +++ /dev/null @@ -1,62 +0,0 @@ -/********************************************************************* - * - * Filename: irlmp_frame.h - * Version: 0.9 - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Aug 19 02:09:59 1997 - * Modified at: Fri Dec 10 13:21:53 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997, 1999 Dag Brattli , - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRMLP_FRAME_H -#define IRMLP_FRAME_H - -#include - -#include - -/* IrLMP frame opcodes */ -#define CONNECT_CMD 0x01 -#define CONNECT_CNF 0x81 -#define DISCONNECT 0x02 -#define ACCESSMODE_CMD 0x03 -#define ACCESSMODE_CNF 0x83 - -#define CONTROL_BIT 0x80 - -void irlmp_send_data_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap, - int expedited, struct sk_buff *skb); -void irlmp_send_lcf_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap, - __u8 opcode, struct sk_buff *skb); -void irlmp_link_data_indication(struct lap_cb *, struct sk_buff *, - int unreliable); -#ifdef CONFIG_IRDA_ULTRA -void irlmp_link_unitdata_indication(struct lap_cb *, struct sk_buff *); -#endif /* CONFIG_IRDA_ULTRA */ - -void irlmp_link_connect_indication(struct lap_cb *, __u32 saddr, __u32 daddr, - struct qos_info *qos, struct sk_buff *skb); -void irlmp_link_connect_request(__u32 daddr); -void irlmp_link_connect_confirm(struct lap_cb *self, struct qos_info *qos, - struct sk_buff *skb); -void irlmp_link_disconnect_indication(struct lap_cb *, struct irlap_cb *, - LAP_REASON reason, struct sk_buff *); -void irlmp_link_discovery_confirm(struct lap_cb *self, hashbin_t *log); -void irlmp_link_discovery_indication(struct lap_cb *, discovery_t *discovery); - -#endif diff --git a/include/net/irda/irmod.h b/include/net/irda/irmod.h deleted file mode 100644 index 86f0dbb8ee5d..000000000000 --- a/include/net/irda/irmod.h +++ /dev/null @@ -1,109 +0,0 @@ -/********************************************************************* - * - * Filename: irmod.h - * Version: 0.3 - * Description: IrDA module and utilities functions - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Dec 15 13:58:52 1997 - * Modified at: Fri Jan 28 13:15:24 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charg. - * - ********************************************************************/ - -#ifndef IRMOD_H -#define IRMOD_H - -/* Misc status information */ -typedef enum { - STATUS_OK, - STATUS_ABORTED, - STATUS_NO_ACTIVITY, - STATUS_NOISY, - STATUS_REMOTE, -} LINK_STATUS; - -typedef enum { - LOCK_NO_CHANGE, - LOCK_LOCKED, - LOCK_UNLOCKED, -} LOCK_STATUS; - -typedef enum { FLOW_STOP, FLOW_START } LOCAL_FLOW; - -/* - * IrLMP disconnect reasons. The order is very important, since they - * correspond to disconnect reasons sent in IrLMP disconnect frames, so - * please do not touch :-) - */ -typedef enum { - LM_USER_REQUEST = 1, /* User request */ - LM_LAP_DISCONNECT, /* Unexpected IrLAP disconnect */ - LM_CONNECT_FAILURE, /* Failed to establish IrLAP connection */ - LM_LAP_RESET, /* IrLAP reset */ - LM_INIT_DISCONNECT, /* Link Management initiated disconnect */ - LM_LSAP_NOTCONN, /* Data delivered on unconnected LSAP */ - LM_NON_RESP_CLIENT, /* Non responsive LM-MUX client */ - LM_NO_AVAIL_CLIENT, /* No available LM-MUX client */ - LM_CONN_HALF_OPEN, /* Connection is half open */ - LM_BAD_SOURCE_ADDR, /* Illegal source address (i.e 0x00) */ -} LM_REASON; -#define LM_UNKNOWN 0xff /* Unspecified disconnect reason */ - -/* A few forward declarations (to make compiler happy) */ -struct qos_info; /* in */ - -/* - * Notify structure used between transport and link management layers - */ -typedef struct { - int (*data_indication)(void *priv, void *sap, struct sk_buff *skb); - int (*udata_indication)(void *priv, void *sap, struct sk_buff *skb); - void (*connect_confirm)(void *instance, void *sap, - struct qos_info *qos, __u32 max_sdu_size, - __u8 max_header_size, struct sk_buff *skb); - void (*connect_indication)(void *instance, void *sap, - struct qos_info *qos, __u32 max_sdu_size, - __u8 max_header_size, struct sk_buff *skb); - void (*disconnect_indication)(void *instance, void *sap, - LM_REASON reason, struct sk_buff *); - void (*flow_indication)(void *instance, void *sap, LOCAL_FLOW flow); - void (*status_indication)(void *instance, - LINK_STATUS link, LOCK_STATUS lock); - void *instance; /* Layer instance pointer */ - char name[16]; /* Name of layer */ -} notify_t; - -#define NOTIFY_MAX_NAME 16 - -/* Zero the notify structure */ -void irda_notify_init(notify_t *notify); - -/* Locking wrapper - Note the inverted logic on irda_lock(). - * Those function basically return false if the lock is already in the - * position you want to set it. - Jean II */ -#define irda_lock(lock) (! test_and_set_bit(0, (void *) (lock))) -#define irda_unlock(lock) (test_and_clear_bit(0, (void *) (lock))) - -#endif /* IRMOD_H */ - - - - - - - - - diff --git a/include/net/irda/irqueue.h b/include/net/irda/irqueue.h deleted file mode 100644 index 37f512bd6733..000000000000 --- a/include/net/irda/irqueue.h +++ /dev/null @@ -1,96 +0,0 @@ -/********************************************************************* - * - * Filename: irqueue.h - * Version: 0.3 - * Description: General queue implementation - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Jun 9 13:26:50 1998 - * Modified at: Thu Oct 7 13:25:16 1999 - * Modified by: Dag Brattli - * - * Copyright (C) 1998-1999, Aage Kvalnes - * Copyright (c) 1998, Dag Brattli - * All Rights Reserved. - * - * This code is taken from the Vortex Operating System written by Aage - * Kvalnes and has been ported to Linux and Linux/IR by Dag Brattli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include -#include - -#ifndef IRDA_QUEUE_H -#define IRDA_QUEUE_H - -#define NAME_SIZE 32 - -/* - * Hash types (some flags can be xored) - * See comments in irqueue.c for which one to use... - */ -#define HB_NOLOCK 0 /* No concurent access prevention */ -#define HB_LOCK 1 /* Prevent concurent write with global lock */ - -/* - * Hash defines - */ -#define HASHBIN_SIZE 8 -#define HASHBIN_MASK 0x7 - -#ifndef IRDA_ALIGN -#define IRDA_ALIGN __attribute__((aligned)) -#endif - -#define Q_NULL { NULL, NULL, "", 0 } - -typedef void (*FREE_FUNC)(void *arg); - -struct irda_queue { - struct irda_queue *q_next; - struct irda_queue *q_prev; - - char q_name[NAME_SIZE]; - long q_hash; /* Must be able to cast a (void *) */ -}; -typedef struct irda_queue irda_queue_t; - -typedef struct hashbin_t { - __u32 magic; - int hb_type; - int hb_size; - spinlock_t hb_spinlock; /* HB_LOCK - Can be used by the user */ - - irda_queue_t* hb_queue[HASHBIN_SIZE] IRDA_ALIGN; - - irda_queue_t* hb_current; -} hashbin_t; - -hashbin_t *hashbin_new(int type); -int hashbin_delete(hashbin_t* hashbin, FREE_FUNC func); -int hashbin_clear(hashbin_t* hashbin, FREE_FUNC free_func); -void hashbin_insert(hashbin_t* hashbin, irda_queue_t* entry, long hashv, - const char* name); -void* hashbin_remove(hashbin_t* hashbin, long hashv, const char* name); -void* hashbin_remove_first(hashbin_t *hashbin); -void* hashbin_remove_this( hashbin_t* hashbin, irda_queue_t* entry); -void* hashbin_find(hashbin_t* hashbin, long hashv, const char* name); -void* hashbin_lock_find(hashbin_t* hashbin, long hashv, const char* name); -void* hashbin_find_next(hashbin_t* hashbin, long hashv, const char* name, - void ** pnext); -irda_queue_t *hashbin_get_first(hashbin_t *hashbin); -irda_queue_t *hashbin_get_next(hashbin_t *hashbin); - -#define HASHBIN_GET_SIZE(hashbin) hashbin->hb_size - -#endif diff --git a/include/net/irda/irttp.h b/include/net/irda/irttp.h deleted file mode 100644 index 98682d4bae8f..000000000000 --- a/include/net/irda/irttp.h +++ /dev/null @@ -1,210 +0,0 @@ -/********************************************************************* - * - * Filename: irttp.h - * Version: 1.0 - * Description: Tiny Transport Protocol (TTP) definitions - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:31 1997 - * Modified at: Sun Dec 12 13:09:07 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRTTP_H -#define IRTTP_H - -#include -#include -#include - -#include -#include /* struct lsap_cb */ -#include /* struct qos_info */ -#include - -#define TTP_MAX_CONNECTIONS LM_MAX_CONNECTIONS -#define TTP_HEADER 1 -#define TTP_MAX_HEADER (TTP_HEADER + LMP_MAX_HEADER) -#define TTP_SAR_HEADER 5 -#define TTP_PARAMETERS 0x80 -#define TTP_MORE 0x80 - -/* Transmission queue sizes */ -/* Worst case scenario, two window of data - Jean II */ -#define TTP_TX_MAX_QUEUE 14 -/* We need to keep at least 5 frames to make sure that we can refill - * appropriately the LAP layer. LAP keeps only two buffers, and we need - * to have 7 to make a full window - Jean II */ -#define TTP_TX_LOW_THRESHOLD 5 -/* Most clients are synchronous with respect to flow control, so we can - * keep a low number of Tx buffers in TTP - Jean II */ -#define TTP_TX_HIGH_THRESHOLD 7 - -/* Receive queue sizes */ -/* Minimum of credit that the peer should hold. - * If the peer has less credits than 9 frames, we will explicitly send - * him some credits (through irttp_give_credit() and a specific frame). - * Note that when we give credits it's likely that it won't be sent in - * this LAP window, but in the next one. So, we make sure that the peer - * has something to send while waiting for credits (one LAP window == 7 - * + 1 frames while he process the credits). - Jean II */ -#define TTP_RX_MIN_CREDIT 8 -/* This is the default maximum number of credits held by the peer, so the - * default maximum number of frames he can send us before needing flow - * control answer from us (this may be negociated differently at TSAP setup). - * We want to minimise the number of times we have to explicitly send some - * credit to the peer, hoping we can piggyback it on the return data. In - * particular, it doesn't make sense for us to send credit more than once - * per LAP window. - * Moreover, giving credits has some latency, so we need strictly more than - * a LAP window, otherwise we may already have credits in our Tx queue. - * But on the other hand, we don't want to keep too many Rx buffer here - * before starting to flow control the other end, so make it exactly one - * LAP window + 1 + MIN_CREDITS. - Jean II */ -#define TTP_RX_DEFAULT_CREDIT 16 -/* Maximum number of credits we can allow the peer to have, and therefore - * maximum Rx queue size. - * Note that we try to deliver packets to the higher layer every time we - * receive something, so in normal mode the Rx queue will never contains - * more than one or two packets. - Jean II */ -#define TTP_RX_MAX_CREDIT 21 - -/* What clients should use when calling ttp_open_tsap() */ -#define DEFAULT_INITIAL_CREDIT TTP_RX_DEFAULT_CREDIT - -/* Some priorities for disconnect requests */ -#define P_NORMAL 0 -#define P_HIGH 1 - -#define TTP_SAR_DISABLE 0 -#define TTP_SAR_UNBOUND 0xffffffff - -/* Parameters */ -#define TTP_MAX_SDU_SIZE 0x01 - -/* - * This structure contains all data associated with one instance of a TTP - * connection. - */ -struct tsap_cb { - irda_queue_t q; /* Must be first */ - magic_t magic; /* Just in case */ - - __u8 stsap_sel; /* Source TSAP */ - __u8 dtsap_sel; /* Destination TSAP */ - - struct lsap_cb *lsap; /* Corresponding LSAP to this TSAP */ - - __u8 connected; /* TSAP connected */ - - __u8 initial_credit; /* Initial credit to give peer */ - - int avail_credit; /* Available credit to return to peer */ - int remote_credit; /* Credit held by peer TTP entity */ - int send_credit; /* Credit held by local TTP entity */ - - struct sk_buff_head tx_queue; /* Frames to be transmitted */ - struct sk_buff_head rx_queue; /* Received frames */ - struct sk_buff_head rx_fragments; - int tx_queue_lock; - int rx_queue_lock; - spinlock_t lock; - - notify_t notify; /* Callbacks to client layer */ - - struct net_device_stats stats; - struct timer_list todo_timer; - - __u32 max_seg_size; /* Max data that fit into an IrLAP frame */ - __u8 max_header_size; - - int rx_sdu_busy; /* RxSdu.busy */ - __u32 rx_sdu_size; /* Current size of a partially received frame */ - __u32 rx_max_sdu_size; /* Max receive user data size */ - - int tx_sdu_busy; /* TxSdu.busy */ - __u32 tx_max_sdu_size; /* Max transmit user data size */ - - int close_pend; /* Close, but disconnect_pend */ - unsigned long disconnect_pend; /* Disconnect, but still data to send */ - struct sk_buff *disconnect_skb; -}; - -struct irttp_cb { - magic_t magic; - hashbin_t *tsaps; -}; - -int irttp_init(void); -void irttp_cleanup(void); - -struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify); -int irttp_close_tsap(struct tsap_cb *self); - -int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb); -int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb); - -int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel, - __u32 saddr, __u32 daddr, - struct qos_info *qos, __u32 max_sdu_size, - struct sk_buff *userdata); -int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size, - struct sk_buff *userdata); -int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *skb, - int priority); -void irttp_flow_request(struct tsap_cb *self, LOCAL_FLOW flow); -struct tsap_cb *irttp_dup(struct tsap_cb *self, void *instance); - -static inline __u32 irttp_get_saddr(struct tsap_cb *self) -{ - return irlmp_get_saddr(self->lsap); -} - -static inline __u32 irttp_get_daddr(struct tsap_cb *self) -{ - return irlmp_get_daddr(self->lsap); -} - -static inline __u32 irttp_get_max_seg_size(struct tsap_cb *self) -{ - return self->max_seg_size; -} - -/* After doing a irttp_dup(), this get one of the two socket back into - * a state where it's waiting incoming connections. - * Note : this can be used *only* if the socket is not yet connected - * (i.e. NO irttp_connect_response() done on this socket). - * - Jean II */ -static inline void irttp_listen(struct tsap_cb *self) -{ - irlmp_listen(self->lsap); - self->dtsap_sel = LSAP_ANY; -} - -/* Return TRUE if the node is in primary mode (i.e. master) - * - Jean II */ -static inline int irttp_is_primary(struct tsap_cb *self) -{ - if ((self == NULL) || - (self->lsap == NULL) || - (self->lsap->lap == NULL) || - (self->lsap->lap->irlap == NULL)) - return -2; - return irlap_is_primary(self->lsap->lap->irlap); -} - -#endif /* IRTTP_H */ diff --git a/include/net/irda/parameters.h b/include/net/irda/parameters.h deleted file mode 100644 index 2d9cd0007cba..000000000000 --- a/include/net/irda/parameters.h +++ /dev/null @@ -1,100 +0,0 @@ -/********************************************************************* - * - * Filename: parameters.h - * Version: 1.0 - * Description: A more general way to handle (pi,pl,pv) parameters - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Jun 7 08:47:28 1999 - * Modified at: Sun Jan 30 14:05:14 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - * Michel Dänzer , 10/2001 - * - simplify irda_pv_t to avoid endianness issues - * - ********************************************************************/ - -#ifndef IRDA_PARAMS_H -#define IRDA_PARAMS_H - -/* - * The currently supported types. Beware not to change the sequence since - * it a good reason why the sized integers has a value equal to their size - */ -typedef enum { - PV_INTEGER, /* Integer of any (pl) length */ - PV_INT_8_BITS, /* Integer of 8 bits in length */ - PV_INT_16_BITS, /* Integer of 16 bits in length */ - PV_STRING, /* \0 terminated string */ - PV_INT_32_BITS, /* Integer of 32 bits in length */ - PV_OCT_SEQ, /* Octet sequence */ - PV_NO_VALUE /* Does not contain any value (pl=0) */ -} PV_TYPE; - -/* Bit 7 of type field */ -#define PV_BIG_ENDIAN 0x80 -#define PV_LITTLE_ENDIAN 0x00 -#define PV_MASK 0x7f /* To mask away endian bit */ - -#define PV_PUT 0 -#define PV_GET 1 - -typedef union { - char *c; - __u32 i; - __u32 *ip; -} irda_pv_t; - -typedef struct { - __u8 pi; - __u8 pl; - irda_pv_t pv; -} irda_param_t; - -typedef int (*PI_HANDLER)(void *self, irda_param_t *param, int get); -typedef int (*PV_HANDLER)(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func); - -typedef struct { - const PI_HANDLER func; /* Handler for this parameter identifier */ - PV_TYPE type; /* Data type for this parameter */ -} pi_minor_info_t; - -typedef struct { - const pi_minor_info_t *pi_minor_call_table; - int len; -} pi_major_info_t; - -typedef struct { - const pi_major_info_t *tables; - int len; - __u8 pi_mask; - int pi_major_offset; -} pi_param_info_t; - -int irda_param_pack(__u8 *buf, char *fmt, ...); - -int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len, - pi_param_info_t *info); -int irda_param_extract_all(void *self, __u8 *buf, int len, - pi_param_info_t *info); - -#define irda_param_insert_byte(buf,pi,pv) irda_param_pack(buf,"bbb",pi,1,pv) - -#endif /* IRDA_PARAMS_H */ - diff --git a/include/net/irda/qos.h b/include/net/irda/qos.h deleted file mode 100644 index 05a5a249956f..000000000000 --- a/include/net/irda/qos.h +++ /dev/null @@ -1,101 +0,0 @@ -/********************************************************************* - * - * Filename: qos.h - * Version: 1.0 - * Description: Quality of Service definitions - * Status: Experimental. - * Author: Dag Brattli - * Created at: Fri Sep 19 23:21:09 1997 - * Modified at: Thu Dec 2 13:51:54 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRDA_QOS_H -#define IRDA_QOS_H - -#include - -#include - -#define PI_BAUD_RATE 0x01 -#define PI_MAX_TURN_TIME 0x82 -#define PI_DATA_SIZE 0x83 -#define PI_WINDOW_SIZE 0x84 -#define PI_ADD_BOFS 0x85 -#define PI_MIN_TURN_TIME 0x86 -#define PI_LINK_DISC 0x08 - -#define IR_115200_MAX 0x3f - -/* Baud rates (first byte) */ -#define IR_2400 0x01 -#define IR_9600 0x02 -#define IR_19200 0x04 -#define IR_38400 0x08 -#define IR_57600 0x10 -#define IR_115200 0x20 -#define IR_576000 0x40 -#define IR_1152000 0x80 - -/* Baud rates (second byte) */ -#define IR_4000000 0x01 -#define IR_16000000 0x02 - -/* Quality of Service information */ -typedef struct { - __u32 value; - __u16 bits; /* LSB is first byte, MSB is second byte */ -} qos_value_t; - -struct qos_info { - magic_t magic; - - qos_value_t baud_rate; /* IR_11520O | ... */ - qos_value_t max_turn_time; - qos_value_t data_size; - qos_value_t window_size; - qos_value_t additional_bofs; - qos_value_t min_turn_time; - qos_value_t link_disc_time; - - qos_value_t power; -}; - -extern int sysctl_max_baud_rate; -extern int sysctl_max_inactive_time; - -void irda_init_max_qos_capabilies(struct qos_info *qos); -void irda_qos_compute_intersection(struct qos_info *, struct qos_info *); - -__u32 irlap_max_line_capacity(__u32 speed, __u32 max_turn_time); - -void irda_qos_bits_to_value(struct qos_info *qos); - -/* So simple, how could we not inline those two ? - * Note : one byte is 10 bits if you include start and stop bits - * Jean II */ -#define irlap_min_turn_time_in_bytes(speed, min_turn_time) ( \ - speed * min_turn_time / 10000000 \ -) -#define irlap_xbofs_in_usec(speed, xbofs) ( \ - xbofs * 10000000 / speed \ -) - -#endif - diff --git a/include/net/irda/timer.h b/include/net/irda/timer.h deleted file mode 100644 index d784f242cf7b..000000000000 --- a/include/net/irda/timer.h +++ /dev/null @@ -1,105 +0,0 @@ -/********************************************************************* - * - * Filename: timer.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sat Aug 16 00:59:29 1997 - * Modified at: Thu Oct 7 12:25:24 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997, 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef TIMER_H -#define TIMER_H - -#include -#include - -#include /* for HZ */ - -#include - -/* A few forward declarations (to make compiler happy) */ -struct irlmp_cb; -struct irlap_cb; -struct lsap_cb; -struct lap_cb; - -/* - * Timeout definitions, some defined in IrLAP 6.13.5 - p. 92 - */ -#define POLL_TIMEOUT (450*HZ/1000) /* Must never exceed 500 ms */ -#define FINAL_TIMEOUT (500*HZ/1000) /* Must never exceed 500 ms */ - -/* - * Normally twice of p-timer. Note 3, IrLAP 6.3.11.2 - p. 60 suggests - * at least twice duration of the P-timer. - */ -#define WD_TIMEOUT (POLL_TIMEOUT*2) - -#define MEDIABUSY_TIMEOUT (500*HZ/1000) /* 500 msec */ -#define SMALLBUSY_TIMEOUT (100*HZ/1000) /* 100 msec - IrLAP 6.13.4 */ - -/* - * Slot timer must never exceed 85 ms, and must always be at least 25 ms, - * suggested to 75-85 msec by IrDA lite. This doesn't work with a lot of - * devices, and other stackes uses a lot more, so it's best we do it as well - * (Note : this is the default value and sysctl overrides it - Jean II) - */ -#define SLOT_TIMEOUT (90*HZ/1000) - -/* - * The latest discovery frame (XID) is longer due to the extra discovery - * information (hints, device name...). This is its extra length. - * We use that when setting the query timeout. Jean II - */ -#define XIDEXTRA_TIMEOUT (34*HZ/1000) /* 34 msec */ - -#define WATCHDOG_TIMEOUT (20*HZ) /* 20 sec */ - -typedef void (*TIMER_CALLBACK)(void *); - -static inline void irda_start_timer(struct timer_list *ptimer, int timeout, - void* data, TIMER_CALLBACK callback) -{ - ptimer->function = (void (*)(unsigned long)) callback; - ptimer->data = (unsigned long) data; - - /* Set new value for timer (update or add timer). - * We use mod_timer() because it's more efficient and also - * safer with respect to race conditions - Jean II */ - mod_timer(ptimer, jiffies + timeout); -} - - -void irlap_start_slot_timer(struct irlap_cb *self, int timeout); -void irlap_start_query_timer(struct irlap_cb *self, int S, int s); -void irlap_start_final_timer(struct irlap_cb *self, int timeout); -void irlap_start_wd_timer(struct irlap_cb *self, int timeout); -void irlap_start_backoff_timer(struct irlap_cb *self, int timeout); - -void irlap_start_mbusy_timer(struct irlap_cb *self, int timeout); -void irlap_stop_mbusy_timer(struct irlap_cb *); - -void irlmp_start_watchdog_timer(struct lsap_cb *, int timeout); -void irlmp_start_discovery_timer(struct irlmp_cb *, int timeout); -void irlmp_start_idle_timer(struct lap_cb *, int timeout); -void irlmp_stop_idle_timer(struct lap_cb *self); - -#endif - diff --git a/include/net/irda/wrapper.h b/include/net/irda/wrapper.h deleted file mode 100644 index eef53ebe3d76..000000000000 --- a/include/net/irda/wrapper.h +++ /dev/null @@ -1,58 +0,0 @@ -/********************************************************************* - * - * Filename: wrapper.h - * Version: 1.2 - * Description: IrDA SIR async wrapper layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Tue Jan 11 12:37:29 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli , - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef WRAPPER_H -#define WRAPPER_H - -#include -#include -#include - -#include /* iobuff_t */ - -#define BOF 0xc0 /* Beginning of frame */ -#define XBOF 0xff -#define EOF 0xc1 /* End of frame */ -#define CE 0x7d /* Control escape */ - -#define STA BOF /* Start flag */ -#define STO EOF /* End flag */ - -#define IRDA_TRANS 0x20 /* Asynchronous transparency modifier */ - -/* States for receiving a frame in async mode */ -enum { - OUTSIDE_FRAME, - BEGIN_FRAME, - LINK_ESCAPE, - INSIDE_FRAME -}; - -/* Proto definitions */ -int async_wrap_skb(struct sk_buff *skb, __u8 *tx_buff, int buffsize); -void async_unwrap_char(struct net_device *dev, struct net_device_stats *stats, - iobuff_t *buf, __u8 byte); - -#endif -- cgit v1.2.3 From 21acf63013ed3d6fce3176cc34b74064052a31b4 Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Mon, 28 Aug 2017 16:18:42 +1000 Subject: net/ncsi: Configure VLAN tag filter Make use of the ndo_vlan_rx_{add,kill}_vid callbacks to have the NCSI stack process new VLAN tags and configure the channel VLAN filter appropriately. Several VLAN tags can be set and a "Set VLAN Filter" packet must be sent for each one, meaning the ncsi_dev_state_config_svf state must be repeated. An internal list of VLAN tags is maintained, and compared against the current channel's ncsi_channel_filter in order to keep track within the state. VLAN filters are removed in a similar manner, with the introduction of the ncsi_dev_state_config_clear_vids state. The maximum number of VLAN tag filters is determined by the "Get Capabilities" response from the channel. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- include/net/ncsi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/ncsi.h b/include/net/ncsi.h index 68680baac0fd..1f96af46df49 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -28,6 +28,8 @@ struct ncsi_dev { }; #ifdef CONFIG_NET_NCSI +int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid); +int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid); struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*notifier)(struct ncsi_dev *nd)); int ncsi_start_dev(struct ncsi_dev *nd); -- cgit v1.2.3 From e833251ad813168253fef9915aaf6a8c883337b0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2017 10:18:56 +0100 Subject: rxrpc: Add notification of end-of-Tx phase Add a callback to rxrpc_kernel_send_data() so that a kernel service can get a notification that the AF_RXRPC call has transitioned out the Tx phase and is now waiting for a reply or a final ACK. This is called from AF_RXRPC with the call state lock held so the notification is guaranteed to come before any reply is passed back. Further, modify the AFS filesystem to make use of this so that we don't have to change the afs_call state before sending the last bit of data. Signed-off-by: David Howells --- include/net/af_rxrpc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index c172709787af..07a47ee6f783 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -21,6 +21,8 @@ struct rxrpc_call; typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); +typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *, + unsigned long); typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *, unsigned long); typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long); @@ -37,7 +39,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, gfp_t, rxrpc_notify_rx_t); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, - struct msghdr *, size_t); + struct msghdr *, size_t, + rxrpc_notify_end_tx_t); int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, void *, size_t, size_t *, bool, u32 *); bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, -- cgit v1.2.3 From c038a58ccfd6704d4d7d60ed3d6a0fca13cf13a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2017 10:19:01 +0100 Subject: rxrpc: Allow failed client calls to be retried Allow a client call that failed on network error to be retried, provided that the Tx queue still holds DATA packet 1. This allows an operation to be submitted to another server or another address for the same server without having to repackage and re-encrypt the data so far processed. Two new functions are provided: (1) rxrpc_kernel_check_call() - This is used to find out the completion state of a call to guess whether it can be retried and whether it should be retried. (2) rxrpc_kernel_retry_call() - Disconnect the call from its current connection, reset the state and submit it as a new client call to a new address. The new address need not match the previous address. A call may be retried even if all the data hasn't been loaded into it yet; a partially constructed will be retained at the same point it was at when an error condition was detected. msg_data_left() can be used to find out how much data was packaged before the error occurred. Signed-off-by: David Howells --- include/net/af_rxrpc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/net') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 07a47ee6f783..3ac79150291f 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -19,6 +19,18 @@ struct sock; struct socket; struct rxrpc_call; +/* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ + RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ + RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ + RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ + NR__RXRPC_CALL_COMPLETIONS +}; + typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *, @@ -51,5 +63,9 @@ void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); +int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, + struct sockaddr_rxrpc *, struct key *); +int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, + enum rxrpc_call_completion *, u32 *); #endif /* _NET_RXRPC_H */ -- cgit v1.2.3 From fa20e0e32cb3dfc1760b6254b64977f2fb5bd851 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Mon, 28 Aug 2017 21:43:22 +0200 Subject: vxlan: factor out VXLAN-GPE next protocol The values are shared between VXLAN-GPE and NSH. Originally probably by coincidence but I notified both working groups about this last year and they seem to keep the values in sync since then. Hopefully they'll get a single IANA registry for the values, too. (I asked them for that.) Factor out the code to be shared by the NSH implementation. NSH and MPLS values are added in this patch, too. For MPLS, the drafts incorrectly assign only a single value, while we have two MPLS ethertypes. I raised the problem with both groups. For now, I assume the value is for unicast. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/tun_proto.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ include/net/vxlan.h | 6 ------ 2 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 include/net/tun_proto.h (limited to 'include/net') diff --git a/include/net/tun_proto.h b/include/net/tun_proto.h new file mode 100644 index 000000000000..2ea3deba4c99 --- /dev/null +++ b/include/net/tun_proto.h @@ -0,0 +1,49 @@ +#ifndef __NET_TUN_PROTO_H +#define __NET_TUN_PROTO_H + +#include + +/* One byte protocol values as defined by VXLAN-GPE and NSH. These will + * hopefully get a shared IANA registry. + */ +#define TUN_P_IPV4 0x01 +#define TUN_P_IPV6 0x02 +#define TUN_P_ETHERNET 0x03 +#define TUN_P_NSH 0x04 +#define TUN_P_MPLS_UC 0x05 + +static inline __be16 tun_p_to_eth_p(u8 proto) +{ + switch (proto) { + case TUN_P_IPV4: + return htons(ETH_P_IP); + case TUN_P_IPV6: + return htons(ETH_P_IPV6); + case TUN_P_ETHERNET: + return htons(ETH_P_TEB); + case TUN_P_NSH: + return htons(ETH_P_NSH); + case TUN_P_MPLS_UC: + return htons(ETH_P_MPLS_UC); + } + return 0; +} + +static inline u8 tun_p_from_eth_p(__be16 proto) +{ + switch (proto) { + case htons(ETH_P_IP): + return TUN_P_IPV4; + case htons(ETH_P_IPV6): + return TUN_P_IPV6; + case htons(ETH_P_TEB): + return TUN_P_ETHERNET; + case htons(ETH_P_NSH): + return TUN_P_NSH; + case htons(ETH_P_MPLS_UC): + return TUN_P_MPLS_UC; + } + return 0; +} + +#endif diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 3f430e38ab82..4e3876dde295 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -168,12 +168,6 @@ reserved_flags2:2; #define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \ cpu_to_be32(0xff)) -/* VXLAN-GPE header Next Protocol. */ -#define VXLAN_GPE_NP_IPV4 0x01 -#define VXLAN_GPE_NP_IPV6 0x02 -#define VXLAN_GPE_NP_ETHERNET 0x03 -#define VXLAN_GPE_NP_NSH 0x04 - struct vxlan_metadata { u32 gbp; }; -- cgit v1.2.3 From 1f0b7744c50573df464ca33d8e5275be509f852b Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Mon, 28 Aug 2017 21:43:23 +0200 Subject: net: add NSH header structures and helpers NSH (Network Service Header)[1] is a new protocol for service function chaining, it can be handled as a L3 protocol like IPv4 and IPv6, Eth + NSH + Inner packet or VxLAN-gpe + NSH + Inner packet are two typical use cases. This patch adds NSH header structures and helpers for NSH GSO support and Open vSwitch NSH support. [1] https://datatracker.ietf.org/doc/draft-ietf-sfc-nsh/ [Jiri: added nsh_hdr() helper and renamed the header struct to "struct nshhdr" to match the usual pattern. Removed packet type defines, these are now shared with VXLAN-GPE.] Signed-off-by: Yi Yang Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/nsh.h | 307 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 307 insertions(+) create mode 100644 include/net/nsh.h (limited to 'include/net') diff --git a/include/net/nsh.h b/include/net/nsh.h new file mode 100644 index 000000000000..a1eaea20be96 --- /dev/null +++ b/include/net/nsh.h @@ -0,0 +1,307 @@ +#ifndef __NET_NSH_H +#define __NET_NSH_H 1 + +#include + +/* + * Network Service Header: + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Ver|O|U| TTL | Length |U|U|U|U|MD Type| Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Service Path Identifier (SPI) | Service Index | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * ~ Mandatory/Optional Context Headers ~ + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Version: The version field is used to ensure backward compatibility + * going forward with future NSH specification updates. It MUST be set + * to 0x0 by the sender, in this first revision of NSH. Given the + * widespread implementation of existing hardware that uses the first + * nibble after an MPLS label stack for ECMP decision processing, this + * document reserves version 01b and this value MUST NOT be used in + * future versions of the protocol. Please see [RFC7325] for further + * discussion of MPLS-related forwarding requirements. + * + * O bit: Setting this bit indicates an Operations, Administration, and + * Maintenance (OAM) packet. The actual format and processing of SFC + * OAM packets is outside the scope of this specification (see for + * example [I-D.ietf-sfc-oam-framework] for one approach). + * + * The O bit MUST be set for OAM packets and MUST NOT be set for non-OAM + * packets. The O bit MUST NOT be modified along the SFP. + * + * SF/SFF/SFC Proxy/Classifier implementations that do not support SFC + * OAM procedures SHOULD discard packets with O bit set, but MAY support + * a configurable parameter to enable forwarding received SFC OAM + * packets unmodified to the next element in the chain. Forwarding OAM + * packets unmodified by SFC elements that do not support SFC OAM + * procedures may be acceptable for a subset of OAM functions, but can + * result in unexpected outcomes for others, thus it is recommended to + * analyze the impact of forwarding an OAM packet for all OAM functions + * prior to enabling this behavior. The configurable parameter MUST be + * disabled by default. + * + * TTL: Indicates the maximum SFF hops for an SFP. This field is used + * for service plane loop detection. The initial TTL value SHOULD be + * configurable via the control plane; the configured initial value can + * be specific to one or more SFPs. If no initial value is explicitly + * provided, the default initial TTL value of 63 MUST be used. Each SFF + * involved in forwarding an NSH packet MUST decrement the TTL value by + * 1 prior to NSH forwarding lookup. Decrementing by 1 from an incoming + * value of 0 shall result in a TTL value of 63. The packet MUST NOT be + * forwarded if TTL is, after decrement, 0. + * + * All other flag fields, marked U, are unassigned and available for + * future use, see Section 11.2.1. Unassigned bits MUST be set to zero + * upon origination, and MUST be ignored and preserved unmodified by + * other NSH supporting elements. Elements which do not understand the + * meaning of any of these bits MUST NOT modify their actions based on + * those unknown bits. + * + * Length: The total length, in 4-byte words, of NSH including the Base + * Header, the Service Path Header, the Fixed Length Context Header or + * Variable Length Context Header(s). The length MUST be 0x6 for MD + * Type equal to 0x1, and MUST be 0x2 or greater for MD Type equal to + * 0x2. The length of the NSH header MUST be an integer multiple of 4 + * bytes, thus variable length metadata is always padded out to a + * multiple of 4 bytes. + * + * MD Type: Indicates the format of NSH beyond the mandatory Base Header + * and the Service Path Header. MD Type defines the format of the + * metadata being carried. + * + * 0x0 - This is a reserved value. Implementations SHOULD silently + * discard packets with MD Type 0x0. + * + * 0x1 - This indicates that the format of the header includes a fixed + * length Context Header (see Figure 4 below). + * + * 0x2 - This does not mandate any headers beyond the Base Header and + * Service Path Header, but may contain optional variable length Context + * Header(s). The semantics of the variable length Context Header(s) + * are not defined in this document. The format of the optional + * variable length Context Headers is provided in Section 2.5.1. + * + * 0xF - This value is reserved for experimentation and testing, as per + * [RFC3692]. Implementations not explicitly configured to be part of + * an experiment SHOULD silently discard packets with MD Type 0xF. + * + * Next Protocol: indicates the protocol type of the encapsulated data. + * NSH does not alter the inner payload, and the semantics on the inner + * protocol remain unchanged due to NSH service function chaining. + * Please see the IANA Considerations section below, Section 11.2.5. + * + * This document defines the following Next Protocol values: + * + * 0x1: IPv4 + * 0x2: IPv6 + * 0x3: Ethernet + * 0x4: NSH + * 0x5: MPLS + * 0xFE: Experiment 1 + * 0xFF: Experiment 2 + * + * Packets with Next Protocol values not supported SHOULD be silently + * dropped by default, although an implementation MAY provide a + * configuration parameter to forward them. Additionally, an + * implementation not explicitly configured for a specific experiment + * [RFC3692] SHOULD silently drop packets with Next Protocol values 0xFE + * and 0xFF. + * + * Service Path Identifier (SPI): Identifies a service path. + * Participating nodes MUST use this identifier for Service Function + * Path selection. The initial classifier MUST set the appropriate SPI + * for a given classification result. + * + * Service Index (SI): Provides location within the SFP. The initial + * classifier for a given SFP SHOULD set the SI to 255, however the + * control plane MAY configure the initial value of SI as appropriate + * (i.e., taking into account the length of the service function path). + * The Service Index MUST be decremented by a value of 1 by Service + * Functions or by SFC Proxy nodes after performing required services + * and the new decremented SI value MUST be used in the egress packet's + * NSH. The initial Classifier MUST send the packet to the first SFF in + * the identified SFP for forwarding along an SFP. If re-classification + * occurs, and that re-classification results in a new SPI, the + * (re)classifier is, in effect, the initial classifier for the + * resultant SPI. + * + * The SI is used in conjunction the with Service Path Identifier for + * Service Function Path Selection and for determining the next SFF/SF + * in the path. The SI is also valuable when troubleshooting or + * reporting service paths. Additionally, while the TTL field is the + * main mechanism for service plane loop detection, the SI can also be + * used for detecting service plane loops. + * + * When the Base Header specifies MD Type = 0x1, a Fixed Length Context + * Header (16-bytes) MUST be present immediately following the Service + * Path Header. The value of a Fixed Length Context + * Header that carries no metadata MUST be set to zero. + * + * When the base header specifies MD Type = 0x2, zero or more Variable + * Length Context Headers MAY be added, immediately following the + * Service Path Header (see Figure 5). Therefore, Length = 0x2, + * indicates that only the Base Header followed by the Service Path + * Header are present. The optional Variable Length Context Headers + * MUST be of an integer number of 4-bytes. The base header Length + * field MUST be used to determine the offset to locate the original + * packet or frame for SFC nodes that require access to that + * information. + * + * The format of the optional variable length Context Headers + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Metadata Class | Type |U| Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Variable Metadata | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Metadata Class (MD Class): Defines the scope of the 'Type' field to + * provide a hierarchical namespace. The IANA Considerations + * Section 11.2.4 defines how the MD Class values can be allocated to + * standards bodies, vendors, and others. + * + * Type: Indicates the explicit type of metadata being carried. The + * definition of the Type is the responsibility of the MD Class owner. + * + * Unassigned bit: One unassigned bit is available for future use. This + * bit MUST NOT be set, and MUST be ignored on receipt. + * + * Length: Indicates the length of the variable metadata, in bytes. In + * case the metadata length is not an integer number of 4-byte words, + * the sender MUST add pad bytes immediately following the last metadata + * byte to extend the metadata to an integer number of 4-byte words. + * The receiver MUST round up the length field to the nearest 4-byte + * word boundary, to locate and process the next field in the packet. + * The receiver MUST access only those bytes in the metadata indicated + * by the length field (i.e., actual number of bytes) and MUST ignore + * the remaining bytes up to the nearest 4-byte word boundary. The + * Length may be 0 or greater. + * + * A value of 0 denotes a Context Header without a Variable Metadata + * field. + * + * [0] https://datatracker.ietf.org/doc/draft-ietf-sfc-nsh/ + */ + +/** + * struct nsh_md1_ctx - Keeps track of NSH context data + * @nshc<1-4>: NSH Contexts. + */ +struct nsh_md1_ctx { + __be32 context[4]; +}; + +struct nsh_md2_tlv { + __be16 md_class; + u8 type; + u8 length; + u8 md_value[]; +}; + +struct nshhdr { + __be16 ver_flags_ttl_len; + u8 mdtype; + u8 np; + __be32 path_hdr; + union { + struct nsh_md1_ctx md1; + struct nsh_md2_tlv md2; + }; +}; + +/* Masking NSH header fields. */ +#define NSH_VER_MASK 0xc000 +#define NSH_VER_SHIFT 14 +#define NSH_FLAGS_MASK 0x3000 +#define NSH_FLAGS_SHIFT 12 +#define NSH_TTL_MASK 0x0fc0 +#define NSH_TTL_SHIFT 6 +#define NSH_LEN_MASK 0x003f +#define NSH_LEN_SHIFT 0 + +#define NSH_MDTYPE_MASK 0x0f +#define NSH_MDTYPE_SHIFT 0 + +#define NSH_SPI_MASK 0xffffff00 +#define NSH_SPI_SHIFT 8 +#define NSH_SI_MASK 0x000000ff +#define NSH_SI_SHIFT 0 + +/* MD Type Registry. */ +#define NSH_M_TYPE1 0x01 +#define NSH_M_TYPE2 0x02 +#define NSH_M_EXP1 0xFE +#define NSH_M_EXP2 0xFF + +/* NSH Base Header Length */ +#define NSH_BASE_HDR_LEN 8 + +/* NSH MD Type 1 header Length. */ +#define NSH_M_TYPE1_LEN 24 + +/* NSH header maximum Length. */ +#define NSH_HDR_MAX_LEN 256 + +/* NSH context headers maximum Length. */ +#define NSH_CTX_HDRS_MAX_LEN 248 + +static inline struct nshhdr *nsh_hdr(struct sk_buff *skb) +{ + return (struct nshhdr *)skb_network_header(skb); +} + +static inline u16 nsh_hdr_len(const struct nshhdr *nsh) +{ + return ((ntohs(nsh->ver_flags_ttl_len) & NSH_LEN_MASK) + >> NSH_LEN_SHIFT) << 2; +} + +static inline u8 nsh_get_ver(const struct nshhdr *nsh) +{ + return (ntohs(nsh->ver_flags_ttl_len) & NSH_VER_MASK) + >> NSH_VER_SHIFT; +} + +static inline u8 nsh_get_flags(const struct nshhdr *nsh) +{ + return (ntohs(nsh->ver_flags_ttl_len) & NSH_FLAGS_MASK) + >> NSH_FLAGS_SHIFT; +} + +static inline u8 nsh_get_ttl(const struct nshhdr *nsh) +{ + return (ntohs(nsh->ver_flags_ttl_len) & NSH_TTL_MASK) + >> NSH_TTL_SHIFT; +} + +static inline void __nsh_set_xflag(struct nshhdr *nsh, u16 xflag, u16 xmask) +{ + nsh->ver_flags_ttl_len + = (nsh->ver_flags_ttl_len & ~htons(xmask)) | htons(xflag); +} + +static inline void nsh_set_flags_and_ttl(struct nshhdr *nsh, u8 flags, u8 ttl) +{ + __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) | + ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK), + NSH_FLAGS_MASK | NSH_TTL_MASK); +} + +static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags, + u8 ttl, u8 len) +{ + len = len >> 2; + __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) | + ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK) | + ((len << NSH_LEN_SHIFT) & NSH_LEN_MASK), + NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK); +} + +#endif /* __NET_NSH_H */ -- cgit v1.2.3 From 1b70d792cf6775fb5d0737524387893daeb5374a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 28 Aug 2017 13:53:34 -0700 Subject: ipv6: Use rt6i_idev index for echo replies to a local address Tariq repored local pings to linklocal address is failing: $ ifconfig ens8 ens8: flags=4163 mtu 1500 inet 11.141.16.6 netmask 255.255.0.0 broadcast 11.141.255.255 inet6 fe80::7efe:90ff:fecb:7502 prefixlen 64 scopeid 0x20 ether 7c:fe:90:cb:75:02 txqueuelen 1000 (Ethernet) RX packets 12 bytes 1164 (1.1 KiB) RX errors 0 dropped 0 overruns 0 frame 0 TX packets 30 bytes 2484 (2.4 KiB) TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0 $ /bin/ping6 -c 3 fe80::7efe:90ff:fecb:7502%ens8 PING fe80::7efe:90ff:fecb:7502%ens8(fe80::7efe:90ff:fecb:7502) 56 data bytes Signed-off-by: David S. Miller --- include/net/ip6_route.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 882bc3c7ccde..ee96f402cb75 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -164,6 +164,16 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); +static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) +{ + const struct dst_entry *dst = skb_dst(skb); + const struct rt6_info *rt6 = NULL; + + if (dst) + rt6 = container_of(dst, struct rt6_info, dst); + + return rt6; +} /* * Store a destination cache entry in a socket -- cgit v1.2.3 From eaa72dc47488d599439cd0fd0f8c4f1bcb3906bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Aug 2017 15:16:01 -0700 Subject: neigh: increase queue_len_bytes to match wmem_default Florian reported UDP xmit drops that could be root caused to the too small neigh limit. Current limit is 64 KB, meaning that even a single UDP socket would hit it, since its default sk_sndbuf comes from net.core.wmem_default (~212992 bytes on 64bit arches). Once ARP/ND resolution is in progress, we should allow a little more packets to be queued, at least for one producer. Once neigh arp_queue is filled, a rogue socket should hit its sk_sndbuf limit and either block in sendmsg() or return -EAGAIN. Signed-off-by: Eric Dumazet Reported-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 1c2912d433e8..03a362568357 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2368,6 +2368,16 @@ bool sk_net_capable(const struct sock *sk, int cap); void sk_get_meminfo(const struct sock *sk, u32 *meminfo); +/* Take into consideration the size of the struct sk_buff overhead in the + * determination of these values, since that is non-constant across + * platforms. This makes socket queueing behavior and performance + * not depend upon such differences. + */ +#define _SK_MEM_PACKETS 256 +#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) +#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) +#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) + extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; -- cgit v1.2.3 From c1d2b4c3e204e602c97680335d082b8d012d08cd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 30 Aug 2017 19:24:57 +0200 Subject: tcp: Revert "tcp: remove CA_ACK_SLOWPATH" This change was a followup to the header prediction removal, so first revert this as a prerequisite to back out hp removal. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index c614ff135b66..c546d13ffbca 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -910,8 +910,9 @@ enum tcp_ca_event { /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ enum tcp_ca_ack_event_flags { - CA_ACK_WIN_UPDATE = (1 << 0), /* ACK updated window */ - CA_ACK_ECE = (1 << 1), /* ECE bit is set on ack */ + CA_ACK_SLOWPATH = (1 << 0), /* In slow path processing */ + CA_ACK_WIN_UPDATE = (1 << 1), /* ACK updated window */ + CA_ACK_ECE = (1 << 2), /* ECE bit is set on ack */ }; /* -- cgit v1.2.3 From 31770e34e43d6c8dee129bfee77e56c34e61f0e5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 30 Aug 2017 19:24:58 +0200 Subject: tcp: Revert "tcp: remove header prediction" This reverts commit 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78. Eric Dumazet says: We found at Google a significant regression caused by 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78 tcp: remove header prediction In typical RPC (TCP_RR), when a TCP socket receives data, we now call tcp_ack() while we used to not call it. This touches enough cache lines to cause a slowdown. so problem does not seem to be HP removal itself but the tcp_ack() call. Therefore, it might be possible to remove HP after all, provided one finds a way to elide tcp_ack for most cases. Reported-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index c546d13ffbca..9c3db054e47f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -634,6 +634,29 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } +static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) +{ + tp->pred_flags = htonl((tp->tcp_header_len << 26) | + ntohl(TCP_FLAG_ACK) | + snd_wnd); +} + +static inline void tcp_fast_path_on(struct tcp_sock *tp) +{ + __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); +} + +static inline void tcp_fast_path_check(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && + tp->rcv_wnd && + atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && + !tp->urg_data) + tcp_fast_path_on(tp); +} + /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(struct sock *sk) { -- cgit v1.2.3 From 65a206c01e8e7ffe971477a36419422099216eff Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 30 Aug 2017 02:31:59 -0400 Subject: net/sched: Change act_api and act_xxx modules to use IDR Typically, each TC filter has its own action. All the actions of the same type are saved in its hash table. But the hash buckets are too small that it degrades to a list. And the performance is greatly affected. For example, it takes about 0m11.914s to insert 64K rules. If we convert the hash table to IDR, it only takes about 0m1.500s. The improvement is huge. But please note that the test result is based on previous patch that cls_flower uses IDR. Signed-off-by: Chris Mi Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 76 ++++++++++++++++----------------------------------- 1 file changed, 24 insertions(+), 52 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 26ffd8333f50..8f3d5d8b5ae0 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -10,12 +10,9 @@ #include #include - -struct tcf_hashinfo { - struct hlist_head *htab; - unsigned int hmask; - spinlock_t lock; - u32 index; +struct tcf_idrinfo { + spinlock_t lock; + struct idr action_idr; }; struct tc_action_ops; @@ -25,9 +22,8 @@ struct tc_action { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ __u32 order; struct list_head list; - struct tcf_hashinfo *hinfo; + struct tcf_idrinfo *idrinfo; - struct hlist_node tcfa_head; u32 tcfa_index; int tcfa_refcnt; int tcfa_bindcnt; @@ -44,7 +40,6 @@ struct tc_action { struct tc_cookie *act_cookie; struct tcf_chain *goto_chain; }; -#define tcf_head common.tcfa_head #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt #define tcf_bindcnt common.tcfa_bindcnt @@ -57,27 +52,6 @@ struct tc_action { #define tcf_lock common.tcfa_lock #define tcf_rcu common.tcfa_rcu -static inline unsigned int tcf_hash(u32 index, unsigned int hmask) -{ - return index & hmask; -} - -static inline int tcf_hashinfo_init(struct tcf_hashinfo *hf, unsigned int mask) -{ - int i; - - spin_lock_init(&hf->lock); - hf->index = 0; - hf->hmask = mask; - hf->htab = kzalloc((mask + 1) * sizeof(struct hlist_head), - GFP_KERNEL); - if (!hf->htab) - return -ENOMEM; - for (i = 0; i < mask + 1; i++) - INIT_HLIST_HEAD(&hf->htab[i]); - return 0; -} - /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. */ @@ -126,53 +100,51 @@ struct tc_action_ops { }; struct tc_action_net { - struct tcf_hashinfo *hinfo; + struct tcf_idrinfo *idrinfo; const struct tc_action_ops *ops; }; static inline int tc_action_net_init(struct tc_action_net *tn, - const struct tc_action_ops *ops, unsigned int mask) + const struct tc_action_ops *ops) { int err = 0; - tn->hinfo = kmalloc(sizeof(*tn->hinfo), GFP_KERNEL); - if (!tn->hinfo) + tn->idrinfo = kmalloc(sizeof(*tn->idrinfo), GFP_KERNEL); + if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; - err = tcf_hashinfo_init(tn->hinfo, mask); - if (err) - kfree(tn->hinfo); + spin_lock_init(&tn->idrinfo->lock); + idr_init(&tn->idrinfo->action_idr); return err; } -void tcf_hashinfo_destroy(const struct tc_action_ops *ops, - struct tcf_hashinfo *hinfo); +void tcf_idrinfo_destroy(const struct tc_action_ops *ops, + struct tcf_idrinfo *idrinfo); static inline void tc_action_net_exit(struct tc_action_net *tn) { - tcf_hashinfo_destroy(tn->ops, tn->hinfo); - kfree(tn->hinfo); + tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + kfree(tn->idrinfo); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops); -int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index); -u32 tcf_hash_new_index(struct tc_action_net *tn); -bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, +int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); +bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, int bind); -int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, - struct tc_action **a, const struct tc_action_ops *ops, int bind, - bool cpustats); -void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); -void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a); +int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, + struct tc_action **a, const struct tc_action_ops *ops, + int bind, bool cpustats); +void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est); +void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); -int __tcf_hash_release(struct tc_action *a, bool bind, bool strict); +int __tcf_idr_release(struct tc_action *a, bool bind, bool strict); -static inline int tcf_hash_release(struct tc_action *a, bool bind) +static inline int tcf_idr_release(struct tc_action *a, bool bind) { - return __tcf_hash_release(a, bind, false); + return __tcf_idr_release(a, bind, false); } int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops); -- cgit v1.2.3 From 47ebcc0bb1d5eb7f1b1eeab675409ea7f67b4a5c Mon Sep 17 00:00:00 2001 From: Yossi Kuperman Date: Wed, 30 Aug 2017 11:30:39 +0300 Subject: xfrm: Add support for network devices capable of removing the ESP trailer In conjunction with crypto offload [1], removing the ESP trailer by hardware can potentially improve the performance by avoiding (1) a cache miss incurred by reading the nexthdr field and (2) the necessity to calculate the csum value of the trailer in order to keep skb->csum valid. This patch introduces the changes to the xfrm stack and merely serves as an infrastructure. Subsequent patch to mlx5 driver will put this to a good use. [1] https://www.mail-archive.com/netdev@vger.kernel.org/msg175733.html Signed-off-by: Yossi Kuperman Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9c7b70cce6d6..f002a2c5e33c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1019,6 +1019,7 @@ struct xfrm_offload { #define CRYPTO_FALLBACK 8 #define XFRM_GSO_SEGMENT 16 #define XFRM_GRO 32 +#define XFRM_ESP_NO_TRAILER 64 __u32 status; #define CRYPTO_SUCCESS 1 -- cgit v1.2.3 From 07d79fc7d94e3f884b8b1c95aa615b202bb5e4c1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 30 Aug 2017 14:30:36 -0700 Subject: net_sched: add reverse binding for tc class TC filters when used as classifiers are bound to TC classes. However, there is a hidden difference when adding them in different orders: 1. If we add tc classes before its filters, everything is fine. Logically, the classes exist before we specify their ID's in filters, it is easy to bind them together, just as in the current code base. 2. If we add tc filters before the tc classes they bind, we have to do dynamic lookup in fast path. What's worse, this happens all the time not just once, because on fast path tcf_result is passed on stack, there is no way to propagate back to the one in tc filters. This hidden difference hurts performance silently if we have many tc classes in hierarchy. This patch intends to close this gap by doing the reverse binding when we create a new class, in this case we can actually search all the filters in its parent, match and fixup by classid. And because tcf_result is specific to each type of tc filter, we have to introduce a new ops for each filter to tell how to bind the class. Note, we still can NOT totally get rid of those class lookup in ->enqueue() because cgroup and flow filters have no way to determine the classid at setup time, they still have to go through dynamic lookup. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c30b634c5f82..d6247a3c40df 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -217,6 +217,7 @@ struct tcf_proto_ops { void **, bool); int (*delete)(struct tcf_proto*, void *, bool*); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); + void (*bind_class)(void *, u32, unsigned long); /* rtnetlink specific */ int (*dump)(struct net*, struct tcf_proto*, void *, -- cgit v1.2.3 From 1797f5b3cf0b3a73c42b89f7a8fd897417373730 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 31 Aug 2017 17:59:12 +0200 Subject: devlink: Add IPv6 header for dpipe This will be used by the IPv6 host table which will be introduced in the following patches. The fields in the header are added per-use. This header is global and can be reused by many drivers. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index aaf7178127a2..b9654e133599 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -330,6 +330,7 @@ int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_match *match); extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; extern struct devlink_dpipe_header devlink_dpipe_header_ipv4; +extern struct devlink_dpipe_header devlink_dpipe_header_ipv6; #else -- cgit v1.2.3 From 65bce46298d064dff9db1282e17bb26602715819 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 1 Sep 2017 13:41:17 -0700 Subject: Bluetooth: make baswap src const Signed-off-by: Loic Poulain Signed-off-by: Bjorn Andersson Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 01487192f628..020142bb9735 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -233,7 +233,7 @@ static inline void bacpy(bdaddr_t *dst, const bdaddr_t *src) memcpy(dst, src, sizeof(bdaddr_t)); } -void baswap(bdaddr_t *dst, bdaddr_t *src); +void baswap(bdaddr_t *dst, const bdaddr_t *src); /* Common socket structures and functions */ -- cgit v1.2.3 From 864150dfa31dceab6ec5ca4579a2d35ede985cb7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 1 Sep 2017 12:15:17 +0300 Subject: net: Add module reference to FIB notifiers When a listener registers to the FIB notification chain it receives a dump of the FIB entries and rules from existing address families by invoking their dump operations. While we call into these modules we need to make sure they aren't removed. Do that by increasing their reference count before invoking their dump operations and decrease it afterwards. Fixes: 04b1d4e50e82 ("net: core: Make the FIB notification chain generic") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/fib_notifier.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index 241475224f74..669b9716dc7a 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -2,6 +2,7 @@ #define __NET_FIB_NOTIFIER_H #include +#include #include #include @@ -26,6 +27,7 @@ struct fib_notifier_ops { struct list_head list; unsigned int (*fib_seq_read)(struct net *net); int (*fib_dump)(struct net *net, struct notifier_block *nb); + struct module *owner; struct rcu_head rcu; }; -- cgit v1.2.3 From 64327fc811268d4a24de03dac242ea29de6be75f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 31 Aug 2017 18:11:41 +0200 Subject: ipv4: Don't override return code from ip_route_input_noref() After ip_route_input() calls ip_route_input_noref(), another check on skb_dst() is done, but if this fails, we shouldn't override the return code from ip_route_input_noref(), as it could have been more specific (i.e. -EHOSTUNREACH). This also saves one call to skb_dst_force_safe() and one to skb_dst() in case the ip_route_input_noref() check fails. Reported-by: Sabrina Dubroca Fixes: 9df16efadd2a ("ipv4: call dst_hold_safe() properly") Signed-off-by: Stefano Brivio Acked-by: Wei Wang Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/route.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index cb0a76d9dde1..1b09a9368c68 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -189,10 +189,11 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, rcu_read_lock(); err = ip_route_input_noref(skb, dst, src, tos, devin); - if (!err) + if (!err) { skb_dst_force_safe(skb); - if (!skb_dst(skb)) - err = -EINVAL; + if (!skb_dst(skb)) + err = -EINVAL; + } rcu_read_unlock(); return err; -- cgit v1.2.3 From fb452a1aa3fd4034d7999e309c5466ff2d7005aa Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 1 Sep 2017 11:26:08 +0200 Subject: Revert "net: use lib/percpu_counter API for fragmentation mem accounting" This reverts commit 6d7b857d541ecd1d9bd997c97242d4ef94b19de2. There is a bug in fragmentation codes use of the percpu_counter API, that can cause issues on systems with many CPUs. The frag_mem_limit() just reads the global counter (fbc->count), without considering other CPUs can have upto batch size (130K) that haven't been subtracted yet. Due to the 3MBytes lower thresh limit, this become dangerous at >=24 CPUs (3*1024*1024/130000=24). The correct API usage would be to use __percpu_counter_compare() which does the right thing, and takes into account the number of (online) CPUs and batch size, to account for this and call __percpu_counter_sum() when needed. We choose to revert the use of the lib/percpu_counter API for frag memory accounting for several reasons: 1) On systems with CPUs > 24, the heavier fully locked __percpu_counter_sum() is always invoked, which will be more expensive than the atomic_t that is reverted to. Given systems with more than 24 CPUs are becoming common this doesn't seem like a good option. To mitigate this, the batch size could be decreased and thresh be increased. 2) The add_frag_mem_limit+sub_frag_mem_limit pairs happen on the RX CPU, before SKBs are pushed into sockets on remote CPUs. Given NICs can only hash on L2 part of the IP-header, the NIC-RXq's will likely be limited. Thus, a fair chance that atomic add+dec happen on the same CPU. Revert note that commit 1d6119baf061 ("net: fix percpu memory leaks") removed init_frag_mem_limit() and instead use inet_frags_init_net(). After this revert, inet_frags_uninit_net() becomes empty. Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") Fixes: 1d6119baf061 ("net: fix percpu memory leaks") Signed-off-by: Jesper Dangaard Brouer Acked-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 6fdcd2427776..fa635aa6d0b9 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -1,14 +1,9 @@ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ -#include - struct netns_frags { - /* The percpu_counter "mem" need to be cacheline aligned. - * mem.count must not share cacheline with other writers - */ - struct percpu_counter mem ____cacheline_aligned_in_smp; - + /* Keep atomic mem on separate cachelines in structs that include it */ + atomic_t mem ____cacheline_aligned_in_smp; /* sysctls */ int timeout; int high_thresh; @@ -110,11 +105,11 @@ void inet_frags_fini(struct inet_frags *); static inline int inet_frags_init_net(struct netns_frags *nf) { - return percpu_counter_init(&nf->mem, 0, GFP_KERNEL); + atomic_set(&nf->mem, 0); + return 0; } static inline void inet_frags_uninit_net(struct netns_frags *nf) { - percpu_counter_destroy(&nf->mem); } void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); @@ -140,31 +135,24 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q) /* Memory Tracking Functions. */ -/* The default percpu_counter batch size is not big enough to scale to - * fragmentation mem acct sizes. - * The mem size of a 64K fragment is approx: - * (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes - */ -static unsigned int frag_percpu_counter_batch = 130000; - static inline int frag_mem_limit(struct netns_frags *nf) { - return percpu_counter_read(&nf->mem); + return atomic_read(&nf->mem); } static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) { - percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch); + atomic_sub(i, &nf->mem); } static inline void add_frag_mem_limit(struct netns_frags *nf, int i) { - percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch); + atomic_add(i, &nf->mem); } -static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) +static inline int sum_frag_mem_limit(struct netns_frags *nf) { - return percpu_counter_sum_positive(&nf->mem); + return atomic_read(&nf->mem); } /* RFC 3168 support : -- cgit v1.2.3 From 5a63643e583b6a9789d7a225ae076fb4e603991c Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 1 Sep 2017 11:26:13 +0200 Subject: Revert "net: fix percpu memory leaks" This reverts commit 1d6119baf0610f813eb9d9580eb4fd16de5b4ceb. After reverting commit 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") then here is no need for this fix-up patch. As percpu_counter is no longer used, it cannot memory leak it any-longer. Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") Fixes: 1d6119baf061 ("net: fix percpu memory leaks") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/inet_frag.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index fa635aa6d0b9..fc59e0775e00 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -103,15 +103,10 @@ struct inet_frags { int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); -static inline int inet_frags_init_net(struct netns_frags *nf) +static inline void inet_frags_init_net(struct netns_frags *nf) { atomic_set(&nf->mem, 0); - return 0; } -static inline void inet_frags_uninit_net(struct netns_frags *nf) -{ -} - void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); -- cgit v1.2.3 From dfc46034b54af3abf594de75a1ee43ef2ec2a60a Mon Sep 17 00:00:00 2001 From: "Pablo M. Bermudo Garay" Date: Wed, 23 Aug 2017 22:41:23 +0200 Subject: netfilter: nf_tables: add select_ops for stateful objects This patch adds support for overloading stateful objects operations through the select_ops() callback, just as it is implemented for expressions. This change is needed for upcoming additions to the stateful objects infrastructure. Signed-off-by: Pablo M. Bermudo Garay Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f9795fe394f3..0f5b12a4ad09 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1007,12 +1007,12 @@ int nft_verdict_dump(struct sk_buff *skb, int type, * * @list: table stateful object list node * @table: table this object belongs to - * @type: pointer to object type - * @data: pointer to object data * @name: name of this stateful object * @genmask: generation mask * @use: number of references to this stateful object * @data: object data, layout depends on type + * @ops: object operations + * @data: pointer to object data */ struct nft_object { struct list_head list; @@ -1021,7 +1021,7 @@ struct nft_object { u32 genmask:2, use:30; /* runtime data below here */ - const struct nft_object_type *type ____cacheline_aligned; + const struct nft_object_ops *ops ____cacheline_aligned; unsigned char data[] __attribute__((aligned(__alignof__(u64)))); }; @@ -1044,27 +1044,39 @@ void nft_obj_notify(struct net *net, struct nft_table *table, /** * struct nft_object_type - stateful object type * - * @eval: stateful object evaluation function + * @select_ops: function to select nft_object_ops + * @ops: default ops, used when no select_ops functions is present * @list: list node in list of object types * @type: stateful object numeric type - * @size: stateful object size * @owner: module owner * @maxattr: maximum netlink attribute * @policy: netlink attribute policy + */ +struct nft_object_type { + const struct nft_object_ops *(*select_ops)(const struct nft_ctx *, + const struct nlattr * const tb[]); + const struct nft_object_ops *ops; + struct list_head list; + u32 type; + unsigned int maxattr; + struct module *owner; + const struct nla_policy *policy; +}; + +/** + * struct nft_object_ops - stateful object operations + * + * @eval: stateful object evaluation function + * @size: stateful object size * @init: initialize object from netlink attributes * @destroy: release existing stateful object * @dump: netlink dump stateful object */ -struct nft_object_type { +struct nft_object_ops { void (*eval)(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt); - struct list_head list; - u32 type; unsigned int size; - unsigned int maxattr; - struct module *owner; - const struct nla_policy *policy; int (*init)(const struct nft_ctx *ctx, const struct nlattr *const tb[], struct nft_object *obj); @@ -1072,6 +1084,7 @@ struct nft_object_type { int (*dump)(struct sk_buff *skb, struct nft_object *obj, bool reset); + const struct nft_object_type *type; }; int nft_register_obj(struct nft_object_type *obj_type); -- cgit v1.2.3 From d1c1e39de8357d66163da39e893e38ea1410e8f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 29 Aug 2017 12:04:10 +0200 Subject: netfilter: remove unused hooknum arg from packet functions tested with allmodconfig build. Signed-off-by: Florian Westphal --- include/net/netfilter/nf_conntrack_l4proto.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index d4933d56809d..738a0307a96b 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -43,7 +43,6 @@ struct nf_conntrack_l4proto { unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum, unsigned int *timeouts); /* Called when a new connection for this protocol found; -- cgit v1.2.3 From 44d6e2f27328b254111dd716fde45b3b59b8a4f7 Mon Sep 17 00:00:00 2001 From: Varsha Rao Date: Wed, 30 Aug 2017 13:37:11 +0530 Subject: net: Replace NF_CT_ASSERT() with WARN_ON(). This patch removes NF_CT_ASSERT() and instead uses WARN_ON(). Signed-off-by: Varsha Rao --- include/net/netfilter/nf_conntrack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 6e6f678aaac7..0385cb08c478 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -159,7 +159,7 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { - NF_CT_ASSERT(ct); + WARN_ON(!ct); nf_conntrack_put(&ct->ct_general); } -- cgit v1.2.3 From 9efdb14f76f4d7591cd4d7a436ebd716b19703b6 Mon Sep 17 00:00:00 2001 From: Varsha Rao Date: Wed, 30 Aug 2017 13:37:12 +0530 Subject: net: Remove CONFIG_NETFILTER_DEBUG and _ASSERT() macros. This patch removes CONFIG_NETFILTER_DEBUG and _ASSERT() macros as they are no longer required. Replace _ASSERT() macros with WARN_ON(). Signed-off-by: Varsha Rao Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 0385cb08c478..fdc9c64a1c94 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -44,12 +44,6 @@ union nf_conntrack_expect_proto { #include #include -#ifdef CONFIG_NETFILTER_DEBUG -#define NF_CT_ASSERT(x) WARN_ON(!(x)) -#else -#define NF_CT_ASSERT(x) -#endif - #include #include -- cgit v1.2.3 From 53168215909281a09d3afc6fb51a9d4f81f74d39 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Jun 2017 12:20:30 +0200 Subject: mac80211: fix VLAN handling with TXQs With TXQs, the AP_VLAN interfaces are resolved to their owner AP interface when enqueuing the frame, which makes sense since the frame really goes out on that as far as the driver is concerned. However, this introduces a problem: frames to be encrypted with a VLAN-specific GTK will now be encrypted with the AP GTK, since the information about which virtual interface to use to select the key is taken from the TXQ. Fix this by preserving info->control.vif and using that in the dequeue function. This now requires doing the driver-mapping in the dequeue as well. Since there's no way to filter the frames that are sitting on a TXQ, drop all frames, which may affect other interfaces, when an AP_VLAN is removed. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- include/net/mac80211.h | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f8149ca192b4..885690fa39c8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -919,21 +919,10 @@ struct ieee80211_tx_info { unsigned long jiffies; }; /* NB: vif can be NULL for injected frames */ - union { - /* NB: vif can be NULL for injected frames */ - struct ieee80211_vif *vif; - - /* When packets are enqueued on txq it's easy - * to re-construct the vif pointer. There's no - * more space in tx_info so it can be used to - * store the necessary enqueue time for packet - * sojourn time computation. - */ - codel_time_t enqueue_time; - }; + struct ieee80211_vif *vif; struct ieee80211_key_conf *hw_key; u32 flags; - /* 4 bytes free */ + codel_time_t enqueue_time; } control; struct { u64 cookie; -- cgit v1.2.3 From fd0c88b70060e03a2215259ed29b4b31497ad205 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 10:05:47 +0200 Subject: net/ncsi: fix ncsi_vlan_rx_{add,kill}_vid references We get a new link error in allmodconfig kernels after ftgmac100 started using the ncsi helpers: ERROR: "ncsi_vlan_rx_kill_vid" [drivers/net/ethernet/faraday/ftgmac100.ko] undefined! ERROR: "ncsi_vlan_rx_add_vid" [drivers/net/ethernet/faraday/ftgmac100.ko] undefined! Related to that, we get another error when CONFIG_NET_NCSI is disabled: drivers/net/ethernet/faraday/ftgmac100.c:1626:25: error: 'ncsi_vlan_rx_add_vid' undeclared here (not in a function); did you mean 'ncsi_start_dev'? drivers/net/ethernet/faraday/ftgmac100.c:1627:26: error: 'ncsi_vlan_rx_kill_vid' undeclared here (not in a function); did you mean 'ncsi_vlan_rx_add_vid'? This fixes both problems at once, using a 'static inline' stub helper for the disabled case, and exporting the functions when they are present. Fixes: 51564585d8c6 ("ftgmac100: Support NCSI VLAN filtering when available") Fixes: 21acf63013ed ("net/ncsi: Configure VLAN tag filter") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/net/ncsi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/ncsi.h b/include/net/ncsi.h index 1f96af46df49..fdc60ff2511d 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -36,6 +36,16 @@ int ncsi_start_dev(struct ncsi_dev *nd); void ncsi_stop_dev(struct ncsi_dev *nd); void ncsi_unregister_dev(struct ncsi_dev *nd); #else /* !CONFIG_NET_NCSI */ +static inline int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + return -EINVAL; +} + +static inline int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + return -EINVAL; +} + static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*notifier)(struct ncsi_dev *nd)) { -- cgit v1.2.3 From 3a1214e8b06317b4e71cd3a36344df87b7858e19 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 1 Sep 2017 14:04:11 -0700 Subject: flow_dissector: Cleanup control flow __skb_flow_dissect is riddled with gotos that make discerning the flow, debugging, and extending the capability difficult. This patch reorganizes things so that we only perform goto's after the two main switch statements (no gotos within the cases now). It also eliminates several goto labels so that there are only two labels that can be target for goto. Reported-by: Alexander Popov Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index e2663e900b0a..fc3dce730a6b 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -19,6 +19,14 @@ struct flow_dissector_key_control { #define FLOW_DIS_FIRST_FRAG BIT(1) #define FLOW_DIS_ENCAPSULATION BIT(2) +enum flow_dissect_ret { + FLOW_DISSECT_RET_OUT_GOOD, + FLOW_DISSECT_RET_OUT_BAD, + FLOW_DISSECT_RET_PROTO_AGAIN, + FLOW_DISSECT_RET_IPPROTO_AGAIN, + FLOW_DISSECT_RET_CONTINUE, +}; + /** * struct flow_dissector_key_basic: * @thoff: Transport header offset -- cgit v1.2.3 From 55199df6d2af55be414f40856efcb527811001bb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 3 Sep 2017 20:27:00 -0700 Subject: net: dsa: Allow switch drivers to indicate number of TX queues Let switch drivers indicate how many TX queues they support. Some switches, such as Broadcom Starfighter 2 are designed with 8 egress queues. Future changes will allow us to leverage the queue mapping and direct the transmission towards a particular queue. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 398ca8d70ccd..dd44d6ce1097 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -243,6 +243,9 @@ struct dsa_switch { /* devlink used to represent this switch device */ struct devlink *devlink; + /* Number of switch port queues */ + unsigned int num_tx_queues; + /* Dynamically allocated ports, keep last */ size_t num_ports; struct dsa_port ports[]; -- cgit v1.2.3 From e1bf1687740ce1a3598a1c5e452b852ff2190682 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Sep 2017 14:39:51 +0200 Subject: netfilter: nat: Revert "netfilter: nat: convert nat bysrc hash to rhashtable" This reverts commit 870190a9ec9075205c0fa795a09fa931694a3ff1. It was not a good idea. The custom hash table was a much better fit for this purpose. A fast lookup is not essential, in fact for most cases there is no lookup at all because original tuple is not taken and can be used as-is. What needs to be fast is insertion and deletion. rhlist removal however requires a rhlist walk. We can have thousands of entries in such a list if source port/addresses are reused for multiple flows, if this happens removal requests are so expensive that deletions of a few thousand flows can take several seconds(!). The advantages that we got from rhashtable are: 1) table auto-sizing 2) multiple locks 1) would be nice to have, but it is not essential as we have at most one lookup per new flow, so even a million flows in the bysource table are not a problem compared to current deletion cost. 2) is easy to add to custom hash table. I tried to add hlist_node to rhlist to speed up rhltable_remove but this isn't doable without changing semantics. rhltable_remove_fast will check that the to-be-deleted object is part of the table and that requires a list walk that we want to avoid. Furthermore, using hlist_node increases size of struct rhlist_head, which in turn increases nf_conn size. Link: https://bugzilla.kernel.org/show_bug.cgi?id=196821 Reported-by: Ivan Babrou Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +-- include/net/netfilter/nf_nat.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index fdc9c64a1c94..8f3bd30511de 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -77,7 +76,7 @@ struct nf_conn { possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) - struct rhlist_head nat_bysource; + struct hlist_node nat_bysource; #endif /* all members below initialized via memset */ u8 __nfct_init_offset[0]; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 05c82a1a4267..b71701302e61 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -1,6 +1,5 @@ #ifndef _NF_NAT_H #define _NF_NAT_H -#include #include #include #include -- cgit v1.2.3 From d7fb60b9cafb982cb2e46a267646a8dfd4f2e5da Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 11 Sep 2017 16:33:30 -0700 Subject: net_sched: get rid of tcfa_rcu gen estimator has been rewritten in commit 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators"), the caller is no longer needed to wait for a grace period. So this patch gets rid of it. This also completely closes a race condition between action free path and filter chain add/remove path for the following patch. Because otherwise the nested RCU callback can't be caught by rcu_barrier(). Please see also the comments in code. Cc: Jiri Pirko Cc: Jamal Hadi Salim Cc: Eric Dumazet Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 8f3d5d8b5ae0..b944e0eb93be 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -34,7 +34,6 @@ struct tc_action { struct gnet_stats_queue tcfa_qstats; struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; - struct rcu_head tcfa_rcu; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie *act_cookie; @@ -50,7 +49,6 @@ struct tc_action { #define tcf_qstats common.tcfa_qstats #define tcf_rate_est common.tcfa_rate_est #define tcf_lock common.tcfa_lock -#define tcf_rcu common.tcfa_rcu /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. -- cgit v1.2.3 From fa5f7b51fc3080c2b195fa87c7eca7c05e56f673 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 14 Sep 2017 02:00:54 +0300 Subject: sctp: potential read out of bounds in sctp_ulpevent_type_enabled() This code causes a static checker warning because Smatch doesn't trust anything that comes from skb->data. I've reviewed this code and I do think skb->data can be controlled by the user here. The sctp_event_subscribe struct has 13 __u8 fields and we want to see if ours is non-zero. sn_type can be any value in the 0-USHRT_MAX range. We're subtracting SCTP_SN_TYPE_BASE which is 1 << 15 so we could read either before the start of the struct or after the end. This is a very old bug and it's surprising that it would go undetected for so long but my theory is that it just doesn't have a big impact so it would be hard to notice. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- include/net/sctp/ulpevent.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 1060494ac230..b8c86ec1a8f5 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -153,8 +153,12 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event); static inline int sctp_ulpevent_type_enabled(__u16 sn_type, struct sctp_event_subscribe *mask) { + int offset = sn_type - SCTP_SN_TYPE_BASE; char *amask = (char *) mask; - return amask[sn_type - SCTP_SN_TYPE_BASE]; + + if (offset >= sizeof(struct sctp_event_subscribe)) + return 0; + return amask[offset]; } /* Given an event subscription, is this event enabled? */ -- cgit v1.2.3 From d25adbeb0cdb860fb39e09cdd025e9cfc954c5ab Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 15 Sep 2017 11:02:21 +0800 Subject: sctp: fix an use-after-free issue in sctp_sock_dump Commit 86fdb3448cc1 ("sctp: ensure ep is not destroyed before doing the dump") tried to fix an use-after-free issue by checking !sctp_sk(sk)->ep with holding sock and sock lock. But Paolo noticed that endpoint could be destroyed in sctp_rcv without sock lock protection. It means the use-after-free issue still could be triggered when sctp_rcv put and destroy ep after sctp_sock_dump checks !ep, although it's pretty hard to reproduce. I could reproduce it by mdelay in sctp_rcv while msleep in sctp_close and sctp_sock_dump long time. This patch is to add another param cb_done to sctp_for_each_transport and dump ep->assocs with holding tsp after jumping out of transport's traversal in it to avoid this issue. It can also improve sctp diag dump to make it run faster, as no need to save sk into cb->args[5] and keep calling sctp_for_each_transport any more. This patch is also to use int * instead of int for the pos argument in sctp_for_each_transport, which could make postion increment only in sctp_for_each_transport and no need to keep changing cb->args[2] in sctp_sock_filter and sctp_sock_dump any more. Fixes: 86fdb3448cc1 ("sctp: ensure ep is not destroyed before doing the dump") Reported-by: Paolo Abeni Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 06b4f515e157..d7d8cba01469 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -127,7 +127,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - struct net *net, int pos, void *p); + int (*cb_done)(struct sctp_transport *, void *), + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); -- cgit v1.2.3 From 7016e0627171878810798a842a416dddee4e3329 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 13 Sep 2017 13:58:15 -0700 Subject: net: Convert int functions to bool Global function ipv6_rcv_saddr_equal and static functions ipv6_rcv_saddr_equal and ipv4_rcv_saddr_equal currently return int. bool is slightly more descriptive for these functions so change their return type from int to bool. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/addrconf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f44ff2476758..87981cd63180 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -94,8 +94,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); -int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard); +bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); -- cgit v1.2.3 From 4c7124413aa759b8ea0b90cd39177e525396e662 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 18 Sep 2017 11:05:16 -0700 Subject: tcp: remove two unused functions remove tcp_may_send_now and tcp_snd_test that are no longer used Fixes: 840a3cbe8969 ("tcp: remove forward retransmit feature") Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index b510f284427a..3bc910a9bfc6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -544,7 +544,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); -bool tcp_may_send_now(struct sock *sk); int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); void tcp_retransmit_timer(struct sock *sk); -- cgit v1.2.3 From bffa72cf7f9df842f0016ba03586039296b4caaf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 05:14:24 -0700 Subject: net: sk_buff rbnode reorg skb->rbnode shares space with skb->next, skb->prev and skb->tstamp Current uses (TCP receive ofo queue and netem) need to save/restore tstamp, while skb->dev is either NULL (TCP) or a constant for a given queue (netem). Since we plan using an RB tree for TCP retransmit queue to speedup SACK processing with large BDP, this patch exchanges skb->dev and skb->tstamp. This saves some overhead in both TCP and netem. v2: removes the swtstamp field from struct tcp_skb_cb Signed-off-by: Eric Dumazet Cc: Soheil Hassas Yeganeh Cc: Wei Wang Cc: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index b510f284427a..49a8a46466f3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -797,12 +797,6 @@ struct tcp_skb_cb { u16 tcp_gso_segs; u16 tcp_gso_size; }; - - /* Used to stash the receive timestamp while this skb is in the - * out of order queue, as skb->tstamp is overwritten by the - * rbnode. - */ - ktime_t swtstamp; }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ -- cgit v1.2.3 From f5619866592c65adc087364cc1a3ba709201ea26 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 19 Sep 2017 11:56:57 -0400 Subject: net: dsa: remove copy of master ethtool_ops There is no need to store a copy of the master ethtool ops, storing the original pointer in DSA and the new one in the master netdev itself is enough. In the meantime, set orig_ethtool_ops to NULL when restoring the master ethtool ops and check the presence of the master original ethtool ops as well as its needed functions before calling them. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index dd44d6ce1097..8dee216a5a9b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -188,7 +188,6 @@ struct dsa_port { /* * Original copy of the master netdev ethtool_ops */ - struct ethtool_ops ethtool_ops; const struct ethtool_ops *orig_ethtool_ops; }; -- cgit v1.2.3 From 752fbcc33405d6f8249465e4b2c4e420091bb825 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 19 Sep 2017 13:15:42 -0700 Subject: net_sched: no need to free qdisc in RCU callback gen estimator has been rewritten in commit 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators"), the caller no longer needs to wait for a grace period. So this patch gets rid of it. Cc: Jamal Hadi Salim Cc: Eric Dumazet Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 135f5a2dd931..684d8ed27eaa 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -93,7 +93,6 @@ struct Qdisc { unsigned long state; struct Qdisc *next_sched; struct sk_buff *skb_bad_txq; - struct rcu_head rcu_head; int padded; refcount_t refcnt; -- cgit v1.2.3 From a90c9347e90ed1e9323d71402ed18023bc910cd8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:06 -0700 Subject: ipv6: addrlabel: per netns list Having a global list of labels do not scale to thousands of netns in the cloud era. This causes quadratic behavior on netns creation and deletion. This is time having a per netns list of ~10 labels. Tested: $ time perf record (for f in `seq 1 3000` ; do ip netns add tast$f; done) [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 3.637 MB perf.data (~158898 samples) ] real 0m20.837s # instead of 0m24.227s user 0m0.328s sys 0m20.338s # instead of 0m23.753s 16.17% ip [kernel.kallsyms] [k] netlink_broadcast_filtered 12.30% ip [kernel.kallsyms] [k] netlink_has_listeners 6.76% ip [kernel.kallsyms] [k] _raw_spin_lock_irqsave 5.78% ip [kernel.kallsyms] [k] memset_erms 5.77% ip [kernel.kallsyms] [k] kobject_uevent_env 5.18% ip [kernel.kallsyms] [k] refcount_sub_and_test 4.96% ip [kernel.kallsyms] [k] _raw_read_lock 3.82% ip [kernel.kallsyms] [k] refcount_inc_not_zero 3.33% ip [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 2.11% ip [kernel.kallsyms] [k] unmap_page_range 1.77% ip [kernel.kallsyms] [k] __wake_up 1.69% ip [kernel.kallsyms] [k] strlen 1.17% ip [kernel.kallsyms] [k] __wake_up_common 1.09% ip [kernel.kallsyms] [k] insert_header 1.04% ip [kernel.kallsyms] [k] page_remove_rmap 1.01% ip [kernel.kallsyms] [k] consume_skb 0.98% ip [kernel.kallsyms] [k] netlink_trim 0.51% ip [kernel.kallsyms] [k] kernfs_link_sibling 0.51% ip [kernel.kallsyms] [k] filemap_map_pages 0.46% ip [kernel.kallsyms] [k] memcpy_erms Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 2544f9760a42..2ea1ed341ef8 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -89,6 +89,11 @@ struct netns_ipv6 { atomic_t fib6_sernum; struct seg6_pernet_data *seg6_data; struct fib_notifier_ops *notifier_ops; + struct { + struct hlist_head head; + spinlock_t lock; + u32 seq; + } ip6addrlbl_table; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) -- cgit v1.2.3 From 64bc17811b72758753e2b64cd8f2a63812c61fe1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:09 -0700 Subject: ipv4: speedup ipv6 tunnels dismantle Implement exit_batch() method to dismantle more devices per round. (rtnl_lock() ... unregister_netdevice_many() ... rtnl_unlock()) Tested: $ cat add_del_unshare.sh for i in `seq 1 40` do (for j in `seq 1 100` ; do unshare -n /bin/true >/dev/null ; done) & done wait ; grep net_namespace /proc/slabinfo Before patch : $ time ./add_del_unshare.sh net_namespace 126 282 5504 1 2 : tunables 8 4 0 : slabdata 126 282 0 real 1m38.965s user 0m0.688s sys 0m37.017s After patch: $ time ./add_del_unshare.sh net_namespace 135 291 5504 1 2 : tunables 8 4 0 : slabdata 135 291 0 real 0m22.117s user 0m0.728s sys 0m35.328s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 992652856fe8..b41a1e057fce 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -258,7 +258,8 @@ int ip_tunnel_get_iflink(const struct net_device *dev); int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); -void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); +void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, + struct rtnl_link_ops *ops); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); -- cgit v1.2.3 From 2512b1b18d0748d867bb22387db7c86b903291ad Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Sat, 5 Aug 2017 11:44:31 +0300 Subject: mac80211: extend ieee80211_ie_split to support EXTENSION Current ieee80211_ie_split() implementation doesn't account for elements that are sub-elements of the EXTENSION IE. To extend support to these IEs as well, treat the WLAN_EID_EXTENSION ids in the %ids array as indicating that the next id in the array is a sub-element of the EXTENSION IE. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f12fa5245a45..aa9d993e519a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5934,7 +5934,8 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, * @ies: the IE buffer * @ielen: the length of the IE buffer * @ids: an array with element IDs that are allowed before - * the split + * the split. A WLAN_EID_EXTENSION value means that the next + * EID in the list is a sub-element of the EXTENSION IE. * @n_ids: the size of the element ID array * @after_ric: array IE types that come after the RIC element * @n_after_ric: size of the @after_ric array @@ -5965,7 +5966,8 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, * @ies: the IE buffer * @ielen: the length of the IE buffer * @ids: an array with element IDs that are allowed before - * the split + * the split. A WLAN_EID_EXTENSION value means that the next + * EID in the list is a sub-element of the EXTENSION IE. * @n_ids: the size of the element ID array * @offset: offset where to start splitting in the buffer * -- cgit v1.2.3 From 1272c5d89b597995cb10db87dd4a1adc91d36006 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 18 Aug 2017 15:33:56 +0300 Subject: mac80211: add documentation to ieee80211_rx_ba_offl() Add documentation to ieee80211_rx_ba_offl() function and, while at it, rename the bit argument to tid, for consistency. Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 885690fa39c8..cc9073e45be9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5441,8 +5441,14 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, */ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn); +/** + * ieee80211_manage_rx_ba_offl - helper to queue an RX BA work + * @vif: &struct ieee80211_vif pointer from the add_interface callback + * @addr: station mac address + * @tid: the rx tid + */ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, const u8 *addr, - unsigned int bit); + unsigned int tid); /** * ieee80211_start_rx_ba_session_offl - start a Rx BA session -- cgit v1.2.3 From a6bcda44843c6dfced0fb973e2607c2a98addfa9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Sep 2017 11:52:43 +0200 Subject: cfg80211: remove unused function ieee80211_data_from_8023() This function hasn't been used since the removal of iwmc3200wifi in 2012. It also appears to have a bug when qos=True, since then it'll copy uninitialized stack memory to the SKB. Just remove the function entirely. Reported-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index aa9d993e519a..cc1996081463 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4346,19 +4346,6 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype); } -/** - * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11 - * @skb: the 802.3 frame - * @addr: the device MAC address - * @iftype: the virtual interface type - * @bssid: the network bssid (used only for iftype STATION and ADHOC) - * @qos: build 802.11 QoS data frame - * Return: 0 on success, or a negative error code. - */ -int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype, const u8 *bssid, - bool qos); - /** * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame * -- cgit v1.2.3 From 6e617de84e87d626d1e976fc30e1322239fd4d2d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Sep 2017 18:26:53 +0200 Subject: net: avoid a full fib lookup when rp_filter is disabled. Since commit 1dced6a85482 ("ipv4: Restore accept_local behaviour in fib_validate_source()") a full fib lookup is needed even if the rp_filter is disabled, if accept_local is false - which is the default. What we really need in the above scenario is just checking that the source IP address is not local, and in most case we can do that is a cheaper way looking up the ifaddr hash table. This commit adds a helper for such lookup, and uses it to validate the src address when rp_filter is disabled and no 'local' routes are created by the user space in the relevant namespace. A new ipv4 netns flag is added to account for such routes. We need that to preserve the same behavior we had before this patch. It also drops the checks to bail early from __fib_validate_source, added by the commit 1dced6a85482 ("ipv4: Restore accept_local behaviour in fib_validate_source()") they do not give any measurable performance improvement: if we do the lookup with are on a slower path. This improves UDP performances for unconnected sockets when rp_filter is disabled by 5% and also gives small but measurable performance improvement for TCP flood scenarios. v1 -> v2: - use the ifaddr lookup helper in __ip_dev_find(), as suggested by Eric - fall-back to full lookup if custom local routes are present Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 20d061c805e3..20720721da4b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -49,6 +49,7 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; + bool fib_has_custom_local_routes; struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; #endif -- cgit v1.2.3 From a1f3316dd7b5ce740c774697c664e2e60d095794 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 21 Sep 2017 18:18:23 -0700 Subject: ipv4: Move fib_has_custom_local_routes outside of IP_MULTIPLE_TABLES. > net/ipv4/fib_frontend.c: In function 'fib_validate_source': > net/ipv4/fib_frontend.c:411:16: error: 'struct netns_ipv4' has no member named 'fib_has_custom_local_routes' > if (net->ipv4.fib_has_custom_local_routes) > ^ > net/ipv4/fib_frontend.c: In function 'inet_rtm_newroute': > net/ipv4/fib_frontend.c:773:12: error: 'struct netns_ipv4' has no member named 'fib_has_custom_local_routes' > net->ipv4.fib_has_custom_local_routes = true; > ^ Fixes: 6e617de84e87 ("net: avoid a full fib lookup when rp_filter is disabled.") Reported-by: Stephen Rothwell Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 20720721da4b..8387f099115e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -49,10 +49,10 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; - bool fib_has_custom_local_routes; struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; #endif + bool fib_has_custom_local_routes; #ifdef CONFIG_IP_ROUTE_CLASSID int fib_num_tclassid_users; #endif -- cgit v1.2.3 From 222d7dbd258dad4cd5241c43ef818141fad5a87a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2017 09:15:46 -0700 Subject: net: prevent dst uses after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In linux-4.13, Wei worked hard to convert dst to a traditional refcounted model, removing GC. We now want to make sure a dst refcount can not transition from 0 back to 1. The problem here is that input path attached a not refcounted dst to an skb. Then later, because packet is forwarded and hits skb_dst_force() before exiting RCU section, we might try to take a refcount on one dst that is about to be freed, if another cpu saw 1 -> 0 transition in dst_release() and queued the dst for freeing after one RCU grace period. Lets unify skb_dst_force() and skb_dst_force_safe(), since we should always perform the complete check against dst refcount, and not assume it is not zero. Bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=197005 [ 989.919496] skb_dst_force+0x32/0x34 [ 989.919498] __dev_queue_xmit+0x1ad/0x482 [ 989.919501] ? eth_header+0x28/0xc6 [ 989.919502] dev_queue_xmit+0xb/0xd [ 989.919504] neigh_connected_output+0x9b/0xb4 [ 989.919507] ip_finish_output2+0x234/0x294 [ 989.919509] ? ipt_do_table+0x369/0x388 [ 989.919510] ip_finish_output+0x12c/0x13f [ 989.919512] ip_output+0x53/0x87 [ 989.919513] ip_forward_finish+0x53/0x5a [ 989.919515] ip_forward+0x2cb/0x3e6 [ 989.919516] ? pskb_trim_rcsum.part.9+0x4b/0x4b [ 989.919518] ip_rcv_finish+0x2e2/0x321 [ 989.919519] ip_rcv+0x26f/0x2eb [ 989.919522] ? vlan_do_receive+0x4f/0x289 [ 989.919523] __netif_receive_skb_core+0x467/0x50b [ 989.919526] ? tcp_gro_receive+0x239/0x239 [ 989.919529] ? inet_gro_receive+0x226/0x238 [ 989.919530] __netif_receive_skb+0x4d/0x5f [ 989.919532] netif_receive_skb_internal+0x5c/0xaf [ 989.919533] napi_gro_receive+0x45/0x81 [ 989.919536] ixgbe_poll+0xc8a/0xf09 [ 989.919539] ? kmem_cache_free_bulk+0x1b6/0x1f7 [ 989.919540] net_rx_action+0xf4/0x266 [ 989.919543] __do_softirq+0xa8/0x19d [ 989.919545] irq_exit+0x5d/0x6b [ 989.919546] do_IRQ+0x9c/0xb5 [ 989.919548] common_interrupt+0x93/0x93 [ 989.919548] Similarly dst_clone() can use dst_hold() helper to have additional debugging, as a follow up to commit 44ebe79149ff ("net: add debug atomic_inc_not_zero() in dst_hold()") In net-next we will convert dst atomic_t to refcount_t for peace of mind. Fixes: a4c2fd7f7891 ("net: remove DST_NOCACHE flag") Signed-off-by: Eric Dumazet Cc: Wei Wang Reported-by: Paweł Staszewski Bisected-by: Paweł Staszewski Acked-by: Wei Wang Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/dst.h | 22 ++++------------------ include/net/route.h | 2 +- include/net/sock.h | 2 +- 3 files changed, 6 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 93568bd0a352..06a6765da074 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) static inline struct dst_entry *dst_clone(struct dst_entry *dst) { if (dst) - atomic_inc(&dst->__refcnt); + dst_hold(dst); return dst; } @@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb __skb_dst_copy(nskb, oskb->_skb_refdst); } -/** - * skb_dst_force - makes sure skb dst is refcounted - * @skb: buffer - * - * If dst is not yet refcounted, let's do it - */ -static inline void skb_dst_force(struct sk_buff *skb) -{ - if (skb_dst_is_noref(skb)) { - WARN_ON(!rcu_read_lock_held()); - skb->_skb_refdst &= ~SKB_DST_NOREF; - dst_clone(skb_dst(skb)); - } -} - /** * dst_hold_safe - Take a reference on a dst if possible * @dst: pointer to dst entry @@ -339,16 +324,17 @@ static inline bool dst_hold_safe(struct dst_entry *dst) } /** - * skb_dst_force_safe - makes sure skb dst is refcounted + * skb_dst_force - makes sure skb dst is refcounted * @skb: buffer * * If dst is not yet refcounted and not destroyed, grab a ref on it. */ -static inline void skb_dst_force_safe(struct sk_buff *skb) +static inline void skb_dst_force(struct sk_buff *skb) { if (skb_dst_is_noref(skb)) { struct dst_entry *dst = skb_dst(skb); + WARN_ON(!rcu_read_lock_held()); if (!dst_hold_safe(dst)) dst = NULL; diff --git a/include/net/route.h b/include/net/route.h index 1b09a9368c68..57dfc6850d37 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -190,7 +190,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, rcu_read_lock(); err = ip_route_input_noref(skb, dst, src, tos, devin); if (!err) { - skb_dst_force_safe(skb); + skb_dst_force(skb); if (!skb_dst(skb)) err = -EINVAL; } diff --git a/include/net/sock.h b/include/net/sock.h index 03a362568357..a6b9a8d1a6df 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -856,7 +856,7 @@ void sk_stream_write_space(struct sock *sk); static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) { /* dont let skb dst not refcounted, we are going to leave rcu lock */ - skb_dst_force_safe(skb); + skb_dst_force(skb); if (!sk->sk_backlog.tail) sk->sk_backlog.head = skb; -- cgit v1.2.3 From 373b8eeb0c15d4ce58f62afb12f213b1b5bbc3d3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:45:43 +0300 Subject: xfrm: make aead_len() return unsigned int Key lengths can't be negative. Comparison with nla_len() is left signed just in case negative value can sneak in there. Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f002a2c5e33c..0be4c547e383 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1764,7 +1764,7 @@ static inline int xfrm_acquire_is_on(struct net *net) } #endif -static inline int aead_len(struct xfrm_algo_aead *alg) +static inline unsigned int aead_len(struct xfrm_algo_aead *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -- cgit v1.2.3 From 06cd22f830f28023b82455c82c7db65fc6cf9c16 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:46:30 +0300 Subject: xfrm: make xfrm_alg_len() return unsigned int Key lengths can't be negative. Comparison with nla_len() is left signed just in case negative value can sneak in there. Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0be4c547e383..2abc0e117f11 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1769,7 +1769,7 @@ static inline unsigned int aead_len(struct xfrm_algo_aead *alg) return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -static inline int xfrm_alg_len(const struct xfrm_algo *alg) +static inline unsigned int xfrm_alg_len(const struct xfrm_algo *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -- cgit v1.2.3 From 1bd963a72e859d194d87a5a2a8839efee7e23102 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:47:09 +0300 Subject: xfrm: make xfrm_alg_auth_len() return unsigned int Key lengths can't be negative. Comparison with nla_len() is left signed just in case negative value can sneak in there. Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2abc0e117f11..5d5e11b653eb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1774,7 +1774,7 @@ static inline unsigned int xfrm_alg_len(const struct xfrm_algo *alg) return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -static inline int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg) +static inline unsigned int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -- cgit v1.2.3 From 5e708e47c44366453c33373940455a75fd33f635 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:47:50 +0300 Subject: xfrm: make xfrm_replay_state_esn_len() return unsigned int Replay detection bitmaps can't have negative length. Comparisons with nla_len() are left signed just in case negative value can sneak in there. Propagate unsignedness for code size savings: add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-38 (-38) function old new delta xfrm_state_construct 1802 1800 -2 xfrm_update_ae_params 295 289 -6 xfrm_state_migrate 1345 1339 -6 xfrm_replay_notify_esn 349 337 -12 xfrm_replay_notify_bmp 345 333 -12 Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5d5e11b653eb..3cb618bbcfa5 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1779,7 +1779,7 @@ static inline unsigned int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg) return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -static inline int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay_esn) +static inline unsigned int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay_esn) { return sizeof(*replay_esn) + replay_esn->bmp_len * sizeof(__u32); } -- cgit v1.2.3 From b4391db42308c9940944b5d7be5ca4b78fb88dd0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Sep 2017 23:29:19 +0200 Subject: netlink: fix nla_put_{u8,u16,u32} for KASAN When CONFIG_KASAN is enabled, the "--param asan-stack=1" causes rather large stack frames in some functions. This goes unnoticed normally because CONFIG_FRAME_WARN is disabled with CONFIG_KASAN by default as of commit 3f181b4d8652 ("lib/Kconfig.debug: disable -Wframe-larger-than warnings with KASAN=y"). The kernelci.org build bot however has the warning enabled and that led me to investigate it a little further, as every build produces these warnings: net/wireless/nl80211.c:4389:1: warning: the frame size of 2240 bytes is larger than 2048 bytes [-Wframe-larger-than=] net/wireless/nl80211.c:1895:1: warning: the frame size of 3776 bytes is larger than 2048 bytes [-Wframe-larger-than=] net/wireless/nl80211.c:1410:1: warning: the frame size of 2208 bytes is larger than 2048 bytes [-Wframe-larger-than=] net/bridge/br_netlink.c:1282:1: warning: the frame size of 2544 bytes is larger than 2048 bytes [-Wframe-larger-than=] Most of this problem is now solved in gcc-8, which can consolidate the stack slots for the inline function arguments. On older compilers we can add a workaround by declaring a local variable in each function to pass the inline function argument. Cc: stable@vger.kernel.org Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/net/netlink.h | 73 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index e51cf5f81597..14c289393071 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -773,7 +773,10 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, */ static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value) { - return nla_put(skb, attrtype, sizeof(u8), &value); + /* temporary variables to work around GCC PR81715 with asan-stack=1 */ + u8 tmp = value; + + return nla_put(skb, attrtype, sizeof(u8), &tmp); } /** @@ -784,7 +787,9 @@ static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value) */ static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value) { - return nla_put(skb, attrtype, sizeof(u16), &value); + u16 tmp = value; + + return nla_put(skb, attrtype, sizeof(u16), &tmp); } /** @@ -795,7 +800,9 @@ static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value) */ static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value) { - return nla_put(skb, attrtype, sizeof(__be16), &value); + __be16 tmp = value; + + return nla_put(skb, attrtype, sizeof(__be16), &tmp); } /** @@ -806,7 +813,9 @@ static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value) */ static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 value) { - return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, value); + __be16 tmp = value; + + return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, tmp); } /** @@ -817,7 +826,9 @@ static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 value) */ static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value) { - return nla_put(skb, attrtype, sizeof(__le16), &value); + __le16 tmp = value; + + return nla_put(skb, attrtype, sizeof(__le16), &tmp); } /** @@ -828,7 +839,9 @@ static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value) */ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) { - return nla_put(skb, attrtype, sizeof(u32), &value); + u32 tmp = value; + + return nla_put(skb, attrtype, sizeof(u32), &tmp); } /** @@ -839,7 +852,9 @@ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) */ static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value) { - return nla_put(skb, attrtype, sizeof(__be32), &value); + __be32 tmp = value; + + return nla_put(skb, attrtype, sizeof(__be32), &tmp); } /** @@ -850,7 +865,9 @@ static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value) */ static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 value) { - return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, value); + __be32 tmp = value; + + return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, tmp); } /** @@ -861,7 +878,9 @@ static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 value) */ static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value) { - return nla_put(skb, attrtype, sizeof(__le32), &value); + __le32 tmp = value; + + return nla_put(skb, attrtype, sizeof(__le32), &tmp); } /** @@ -874,7 +893,9 @@ static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value) static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype, u64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(u64), &value, padattr); + u64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(u64), &tmp, padattr); } /** @@ -887,7 +908,9 @@ static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype, static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(__be64), &value, padattr); + __be64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(__be64), &tmp, padattr); } /** @@ -900,7 +923,9 @@ static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value, int padattr) { - return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, value, + __be64 tmp = value; + + return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, tmp, padattr); } @@ -914,7 +939,9 @@ static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value, static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(__le64), &value, padattr); + __le64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(__le64), &tmp, padattr); } /** @@ -925,7 +952,9 @@ static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value, */ static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value) { - return nla_put(skb, attrtype, sizeof(s8), &value); + s8 tmp = value; + + return nla_put(skb, attrtype, sizeof(s8), &tmp); } /** @@ -936,7 +965,9 @@ static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value) */ static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value) { - return nla_put(skb, attrtype, sizeof(s16), &value); + s16 tmp = value; + + return nla_put(skb, attrtype, sizeof(s16), &tmp); } /** @@ -947,7 +978,9 @@ static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value) */ static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value) { - return nla_put(skb, attrtype, sizeof(s32), &value); + s32 tmp = value; + + return nla_put(skb, attrtype, sizeof(s32), &tmp); } /** @@ -960,7 +993,9 @@ static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value) static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(s64), &value, padattr); + s64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(s64), &tmp, padattr); } /** @@ -1010,7 +1045,9 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, static inline int nla_put_in_addr(struct sk_buff *skb, int attrtype, __be32 addr) { - return nla_put_be32(skb, attrtype, addr); + __be32 tmp = addr; + + return nla_put_be32(skb, attrtype, tmp); } /** -- cgit v1.2.3 From e451ae8e4f6b3f6bd3b83a5595657b5421b3bf69 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 23 Sep 2017 23:01:06 +0300 Subject: neigh: make struct neigh_table::entry_size unsigned int Neigh entry size can't be negative. Space savings: add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-7 (-7) function old new delta lowpan_neigh_construct 25 24 -1 clip_seq_sub_iter 152 151 -1 clip_ioctl 1475 1474 -1 clip_constructor 93 92 -1 __neigh_create 2455 2452 -3 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/neighbour.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 9816df225af3..9a25512e0a6e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -190,7 +190,7 @@ struct neigh_hash_table { struct neigh_table { int family; - int entry_size; + unsigned int entry_size; int key_len; __be16 protocol; __u32 (*hash)(const void *pkey, -- cgit v1.2.3 From 01ccdf126ca5f9d4fe0889f65ee67afac910f19c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 23 Sep 2017 23:03:04 +0300 Subject: neigh: make strucrt neigh_table::entry_size unsigned int Key length can't be negative. Leave comparisons against nla_len() signed just in case truncated attribute can sneak in there. Space savings: add/remove: 0/0 grow/shrink: 0/7 up/down: 0/-7 (-7) function old new delta pneigh_delete 273 272 -1 mlx5e_rep_netevent_event 1415 1414 -1 mlx5e_create_encap_header_ipv6 1194 1193 -1 mlx5e_create_encap_header_ipv4 1071 1070 -1 cxgb4_l2t_get 1104 1103 -1 __pneigh_lookup 69 68 -1 __neigh_create 2452 2451 -1 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/neighbour.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 9a25512e0a6e..2492000e1035 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -191,7 +191,7 @@ struct neigh_hash_table { struct neigh_table { int family; unsigned int entry_size; - int key_len; + unsigned int key_len; __be16 protocol; __u32 (*hash)(const void *pkey, const struct net_device *dev, -- cgit v1.2.3 From 3b8e9238a8d194e82f0202a5fb68a63686ebe420 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Sep 2017 10:58:21 +0200 Subject: net: sched: introduce helper to identify gact pass action Introduce a helper called is_tcf_gact_pass which could be used to tell if the action is gact pass or not. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/tc_act/tc_gact.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 41afe1ce7b16..d979a0d48f9e 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -33,6 +33,11 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act, return false; } +static inline bool is_tcf_gact_ok(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_OK, false); +} + static inline bool is_tcf_gact_shot(const struct tc_action *a) { return __is_tcf_gact_act(a, TC_ACT_SHOT, false); -- cgit v1.2.3 From 85e482285bbbd508483cbe08de69c8fe00cdbbfe Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:11 +0200 Subject: fib: notifier: Add VIF add and delete event types In order for an interface to forward packets according to the kernel multicast routing table, it must be configured with a VIF index according to the mroute user API. The VIF index is then used to refer to that interface in the mroute user API, for example, to set the iif and oifs of an MFC entry. In order to allow drivers to be aware and offload multicast routes, they have to be aware of the VIF add and delete notifications. Due to the fact that a specific VIF can be deleted and re-added pointing to another netdevice, and the MFC routes that point to it will forward the matching packets to the new netdevice, a driver willing to offload MFC cache entries must be aware of the VIF add and delete events in addition to MFC routes notifications. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/fib_notifier.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index 669b9716dc7a..54cd6b839d2f 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -20,6 +20,8 @@ enum fib_event_type { FIB_EVENT_RULE_DEL, FIB_EVENT_NH_ADD, FIB_EVENT_NH_DEL, + FIB_EVENT_VIF_ADD, + FIB_EVENT_VIF_DEL, }; struct fib_notifier_ops { -- cgit v1.2.3 From 4d65b9487831170e699b2fc64a91b839d729bd78 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:13 +0200 Subject: ipmr: Add FIB notification access functions Make the ipmr module register as a FIB notifier. To do that, implement both the ipmr_seq_read and ipmr_dump ops. The ipmr_seq_read op returns a sequence counter that is incremented on every notification related operation done by the ipmr. To implement that, add a sequence counter in the netns_ipv4 struct and increment it whenever a new MFC route or VIF are added or deleted. The sequence operations are protected by the RTNL lock. The ipmr_dump iterates the list of MFC routes and the list of VIF entries and sends notifications about them. The entries dump is done under RCU where the VIF dump uses the mrt_lock too, as the vif->dev field can change under RCU. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8387f099115e..abc84d986da4 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -163,6 +163,9 @@ struct netns_ipv4 { struct fib_notifier_ops *notifier_ops; unsigned int fib_seq; /* protected by rtnl_mutex */ + struct fib_notifier_ops *ipmr_notifier_ops; + unsigned int ipmr_seq; /* protected by rtnl_mutex */ + atomic_t rt_genid; }; #endif -- cgit v1.2.3 From c7c3e5913bf18eda3cf38932bebdce48351baac9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 27 Sep 2017 19:08:00 -0700 Subject: net: ipv4: remove fib_weight fib_weight in fib_info is set but not used. Remove it and the helpers for setting it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 1a7f7e424320..f80524396c06 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -122,9 +122,6 @@ struct fib_info { #define fib_rtt fib_metrics->metrics[RTAX_RTT-1] #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; -#ifdef CONFIG_IP_ROUTE_MULTIPATH - int fib_weight; -#endif struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev -- cgit v1.2.3 From 7487449c86c65202b3b725c4524cb48dd65e4e6f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 28 Sep 2017 15:51:36 +0200 Subject: IPv4: early demux can return an error code Currently no error is emitted, but this infrastructure will used by the next patch to allow source address validation for mcast sockets. Since early demux can do a route lookup and an ipv4 route lookup can return an error code this is consistent with the current ipv4 route infrastructure. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/protocol.h | 4 ++-- include/net/tcp.h | 2 +- include/net/udp.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/protocol.h b/include/net/protocol.h index 65ba335b0e7e..4fc75f7ae23b 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -39,8 +39,8 @@ /* This is used to register protocols. */ struct net_protocol { - void (*early_demux)(struct sk_buff *skb); - void (*early_demux_handler)(struct sk_buff *skb); + int (*early_demux)(struct sk_buff *skb); + int (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); unsigned int no_policy:1, diff --git a/include/net/tcp.h b/include/net/tcp.h index 3bc910a9bfc6..89974c5286d8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -345,7 +345,7 @@ void tcp_v4_err(struct sk_buff *skb, u32); void tcp_shutdown(struct sock *sk, int how); -void tcp_v4_early_demux(struct sk_buff *skb); +int tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); diff --git a/include/net/udp.h b/include/net/udp.h index 12dfbfe2e2d7..6c759c8594e2 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -259,7 +259,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); } -void udp_v4_early_demux(struct sk_buff *skb); +int udp_v4_early_demux(struct sk_buff *skb); bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, -- cgit v1.2.3 From bc044e8db7962e727a75b591b9851ff2ac5cf846 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 28 Sep 2017 15:51:37 +0200 Subject: udp: perform source validation for mcast early demux The UDP early demux can leverate the rx dst cache even for multicast unconnected sockets. In such scenario the ipv4 source address is validated only on the first packet in the given flow. After that, when we fetch the dst entry from the socket rx cache, we stop enforcing the rp_filter and we even start accepting any kind of martian addresses. Disabling the dst cache for unconnected multicast socket will cause large performace regression, nearly reducing by half the max ingress tput. Instead we factor out a route helper to completely validate an skb source address for multicast packets and we call it from the UDP early demux for mcast packets landing on unconnected sockets, after successful fetching the related cached dst entry. This still gives a measurable, but limited performance regression: rp_filter = 0 rp_filter = 1 edmux disabled: 1182 Kpps 1127 Kpps edmux before: 2238 Kpps 2238 Kpps edmux after: 2037 Kpps 2019 Kpps The above figures are on top of current net tree. Applying the net-next commit 6e617de84e87 ("net: avoid a full fib lookup when rp_filter is disabled.") the delta with rp_filter == 0 will decrease even more. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/route.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 57dfc6850d37..d538e6db1afe 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -175,7 +175,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 fl4->fl4_gre_key = gre_key; return ip_route_output_key(net, fl4); } - +int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev, + struct in_device *in_dev, u32 *itag); int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin); int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src, -- cgit v1.2.3 From 152402483ed75b167d5628d414e876ffa7a6d4c4 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:18 -0400 Subject: net: dsa: add tagging ops to port The DSA tagging protocol operations are specific to each CPU port, thus the dsa_device_ops pointer belongs to the dsa_port structure. >From now on assign a slave's xmit copy from its CPU port tagging operations. This will ease the future support for multiple CPU ports. Also keep the tag_ops at the beginning of the dsa_port structure so that we ensure copies for hot path are in cacheline 1. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 8dee216a5a9b..4d1df2f086e8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -175,6 +175,9 @@ struct dsa_mall_tc_entry { struct dsa_port { + /* CPU port tagging operations used by master or slave devices */ + const struct dsa_device_ops *tag_ops; + struct dsa_switch *ds; unsigned int index; const char *name; -- cgit v1.2.3 From 3e41f93b358a8800194b87995ad076fc50919719 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:19 -0400 Subject: net: dsa: prepare master receive hot path In preparation to make DSA master devices point to their corresponding CPU port instead of the whole tree, add copies of dst and rcv in the dsa_port structure so that we keep fast access in the receive hot path. Also keep the copies at the beginning of the dsa_port structure in order to ensure they are available in cacheline 1. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 4d1df2f086e8..6bda01fa5747 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -178,6 +178,11 @@ struct dsa_port { /* CPU port tagging operations used by master or slave devices */ const struct dsa_device_ops *tag_ops; + /* Copies for faster access in master receive hot path */ + struct dsa_switch_tree *dst; + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt); + struct dsa_switch *ds; unsigned int index; const char *name; -- cgit v1.2.3 From aa193d9b1d7ea6893ce24a9d141f676950563987 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:21 -0400 Subject: net: dsa: remove tag ops from the switch tree Now that the dsa_ptr is a dsa_port instance, there is no need to keep the tag operations in the dsa_switch_tree structure. Remove it. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 6bda01fa5747..10dceccd9ce8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -130,11 +130,6 @@ struct dsa_switch_tree { */ struct dsa_platform_data *pd; - /* Copy of tag_ops->rcv for faster access in hot path */ - struct sk_buff * (*rcv)(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt); - /* * The switch port to which the CPU is attached. */ @@ -144,12 +139,6 @@ struct dsa_switch_tree { * Data for the individual switch chips. */ struct dsa_switch *ds[DSA_MAX_SWITCHES]; - - /* - * Tagging protocol operations for adding and removing an - * encapsulation tag. - */ - const struct dsa_device_ops *tag_ops; }; /* TC matchall action types, only mirroring for now */ -- cgit v1.2.3 From e1cfcbe82b4534bd0f99fef92a6d33843fd85e0e Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 27 Sep 2017 11:35:40 +0800 Subject: ipv4: Namespaceify tcp_fastopen knob Different namespace application might require enable TCP Fast Open feature independently of the host. This patch series continues making more of the TCP Fast Open related sysctl knobs be per net-namespace. Reported-by: Luca BRUNO Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index abc84d986da4..16420ccaef15 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -128,6 +128,7 @@ struct netns_ipv4 { int sysctl_tcp_timestamps; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; + int sysctl_tcp_fastopen; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; diff --git a/include/net/tcp.h b/include/net/tcp.h index 770b608c8439..9e414a99034f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; -- cgit v1.2.3 From dd000598a39b6937fcefdf143720ec9fb5250e72 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 27 Sep 2017 11:35:41 +0800 Subject: ipv4: Remove the 'publish' logic in tcp_fastopen_init_key_once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'publish' logic is not necessary after commit dfea2aa65424 ("tcp: Do not call tcp_fastopen_reset_cipher from interrupt context"), because in tcp_fastopen_cookie_gen,it wouldn't call tcp_fastopen_init_key_once. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 9e414a99034f..d9376e2458e9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1555,7 +1555,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc); -void tcp_fastopen_init_key_once(bool publish); +void tcp_fastopen_init_key_once(void); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); bool tcp_fastopen_defer_connect(struct sock *sk, int *err); -- cgit v1.2.3 From 437138485656c41e32b8c63c0987cfa0348be0e6 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 27 Sep 2017 11:35:42 +0800 Subject: ipv4: Namespaceify tcp_fastopen_key knob Different namespace application might require different tcp_fastopen_key independently of the host. David Miller pointed out there is a leak without releasing the context of tcp_fastopen_key during netns teardown. So add the release action in exit_batch path. Tested: 1. Container namespace: # cat /proc/sys/net/ipv4/tcp_fastopen_key: 2817fff2-f803cf97-eadfd1f3-78c0992b cookie key in tcp syn packets: Fast Open Cookie Kind: TCP Fast Open Cookie (34) Length: 10 Fast Open Cookie: 1e5dd82a8c492ca9 2. Host: # cat /proc/sys/net/ipv4/tcp_fastopen_key: 107d7c5f-68eb2ac7-02fb06e6-ed341702 cookie key in tcp syn packets: Fast Open Cookie Kind: TCP Fast Open Cookie (34) Length: 10 Fast Open Cookie: e213c02bf0afbc8a Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 4 ++++ include/net/tcp.h | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 16420ccaef15..7bb9603ff66c 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -36,6 +36,8 @@ struct inet_timewait_death_row { int sysctl_max_tw_buckets; }; +struct tcp_fastopen_context; + struct netns_ipv4 { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -129,6 +131,8 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; + struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; + spinlock_t tcp_fastopen_ctx_lock; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; diff --git a/include/net/tcp.h b/include/net/tcp.h index d9376e2458e9..6d25d8305054 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1549,13 +1549,13 @@ struct tcp_fastopen_request { }; void tcp_free_fastopen_req(struct tcp_sock *tp); -extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; -int tcp_fastopen_reset_cipher(void *key, unsigned int len); +void tcp_fastopen_ctx_destroy(struct net *net); +int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc); -void tcp_fastopen_init_key_once(void); +void tcp_fastopen_init_key_once(struct net *net); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); bool tcp_fastopen_defer_connect(struct sock *sk, int *err); -- cgit v1.2.3 From 3733be14a32bae288b61ed28341e593baba983af Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 27 Sep 2017 11:35:43 +0800 Subject: ipv4: Namespaceify tcp_fastopen_blackhole_timeout knob Different namespace application might require different time period in second to disable Fastopen on active TCP sockets. Tested: Simulate following similar situation that the server's data gets dropped after 3WHS. C ---- syn-data ---> S C <--- syn/ack ----- S C ---- ack --------> S S (accept & write) C? X <- data ------ S [retry and timeout] And then print netstat of TCPFastOpenBlackhole, the counter increased as expected when the firewall blackhole issue is detected and active TFO is disabled. # cat /proc/net/netstat | awk '{print $91}' TCPFastOpenBlackhole 1 Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7bb9603ff66c..2c4222a5d102 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -133,6 +133,9 @@ struct netns_ipv4 { int sysctl_tcp_fastopen; struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; spinlock_t tcp_fastopen_ctx_lock; + unsigned int sysctl_tcp_fastopen_blackhole_timeout; + atomic_t tfo_active_disable_times; + unsigned long tfo_active_disable_stamp; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; -- cgit v1.2.3 From b80ccfe9bbcac70e66fdfaef73f0988a27f9a68c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 26 Sep 2017 20:37:22 -0700 Subject: net-ipv6: remove unused IP6_ECN_clear() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is unused, and furthermore it is buggy since it suffers from the same issue that requires IP6_ECN_set_ce() to take a pointer to the skb so that it may (in case of CHECKSUM_COMPLETE) update skb->csum Instead of fixing it, let's just outright remove it. Tested: builds, and 'git grep IP6_ECN_clear' comes up empty Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/net/inet_ecn.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index dce2d586d9ce..f5ff16d72fe6 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -133,11 +133,6 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) return 1; } -static inline void IP6_ECN_clear(struct ipv6hdr *iph) -{ - *(__be32*)iph &= ~htonl(INET_ECN_MASK << 20); -} - static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) { dscp &= ~INET_ECN_MASK; -- cgit v1.2.3 From 503c1fb98ba3859c13863957c7c65c92371a9e50 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 29 Sep 2017 14:21:49 +0200 Subject: cfg80211/nl80211: add a port authorized event Add an event that indicates that a connection is authorized (i.e. the 4 way handshake was performed by the driver). This event should be sent by the driver after sending a connect/roamed event. This is useful for networks that require 802.1X authentication. In cases that the driver supports 4 way handshake offload, but the 802.1X authentication is managed by user space, the driver needs to inform user space right after the 802.11 association was completed so user space can initialize its 802.1X state machine etc. However, it is also possible that the AP will choose to skip the 802.1X authentication (e.g. when PMKSA caching is used) and proceed with the 4 way handshake immediately. In this case the driver needs to inform user space that 802.1X authentication is no longer required (e.g. to prevent user space from disconnecting since it did not get any EAPOLs from the AP). This is also useful for roaming, in which case it is possible that the driver used the Fast Transition protocol so 802.1X is not required. Since there will now be a dedicated notification indicating that the connection is authorized, the authorized flag can be removed from the roamed event. Drivers can send the new port authorized event right after sending the roamed event to indicate the new AP is already authorized. This therefore reserves the old PORT_AUTHORIZED attribute. Signed-off-by: Avraham Stern Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cc1996081463..8b8118a7fadb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5428,9 +5428,6 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length - * @authorized: true if the 802.1X authentication was done by the driver or is - * not needed (e.g., when Fast Transition protocol was used), false - * otherwise. Ignored for networks that don't use 802.1X authentication. */ struct cfg80211_roam_info { struct ieee80211_channel *channel; @@ -5440,7 +5437,6 @@ struct cfg80211_roam_info { size_t req_ie_len; const u8 *resp_ie; size_t resp_ie_len; - bool authorized; }; /** @@ -5464,6 +5460,23 @@ struct cfg80211_roam_info { void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, gfp_t gfp); +/** + * cfg80211_port_authorized - notify cfg80211 of successful security association + * + * @dev: network device + * @bssid: the BSSID of the AP + * @gfp: allocation flags + * + * This function should be called by a driver that supports 4 way handshake + * offload after a security association was successfully established (i.e., + * the 4 way handshake was completed successfully). The call to this function + * should be preceded with a call to cfg80211_connect_result(), + * cfg80211_connect_done(), cfg80211_connect_bss() or cfg80211_roamed() to + * indicate the 802.11 association. + */ +void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, + gfp_t gfp); + /** * cfg80211_disconnected - notify cfg80211 that connection was dropped * -- cgit v1.2.3 From 32f16369e59fcc505c5ed93a6a8cad3d5636b463 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 2 Oct 2017 10:41:15 +0200 Subject: net/dst: Make skb parameter of skb{metadata_dst, tunnel_info}() const Make the skb parameter of skb_metadata_dst() and skb_tunnel_info() const as they are not modified. This is in preparation for using them in call-sites where skb is const. Signed-off-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index a803129a4849..9fba2ebf6dda 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -24,7 +24,7 @@ struct metadata_dst { } u; }; -static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) +static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb) { struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); @@ -34,7 +34,8 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) return NULL; } -static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb) +static inline struct ip_tunnel_info * +skb_tunnel_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; -- cgit v1.2.3 From f952be79cebd49d04154781d99408867a069d375 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:11 -0300 Subject: sctp: introduce struct sctp_stream_out_ext With the stream schedulers, sctp_stream_out will become too big to be allocated by kmalloc and as we need to allocate with BH disabled, we cannot use __vmalloc in sctp_stream_init(). This patch moves out the stats from sctp_stream_out to sctp_stream_out_ext, which will be allocated only when the application tries to sendmsg something on it. Just the introduction of sctp_stream_out_ext would already fix the issue described above by splitting the allocation in two. Moving the stats to it also reduces the pressure on the allocator as we will ask for less memory atomically when creating the socket and we will use GFP_KERNEL later. Then, for stream schedulers, we will just use sctp_stream_out_ext. Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0477945de1a3..9b2b30b3ba4d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -84,6 +84,7 @@ struct sctp_ulpq; struct sctp_ep_common; struct crypto_shash; struct sctp_stream; +struct sctp_stream_out; #include @@ -380,6 +381,7 @@ struct sctp_sender_hb_info { int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp); +int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid); void sctp_stream_free(struct sctp_stream *stream); void sctp_stream_clear(struct sctp_stream *stream); void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new); @@ -1315,11 +1317,15 @@ struct sctp_inithdr_host { __u32 initial_tsn; }; +struct sctp_stream_out_ext { + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; + __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; +}; + struct sctp_stream_out { __u16 ssn; __u8 state; - __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; - __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + struct sctp_stream_out_ext *ext; }; struct sctp_stream_in { -- cgit v1.2.3 From 2fc019f790312e703efa1a44204c586112a430dc Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:12 -0300 Subject: sctp: introduce sctp_chunk_stream_no Add a helper to fetch the stream number from a given chunk. Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 9b2b30b3ba4d..c48f7999fe9b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -642,6 +642,11 @@ void sctp_init_addrs(struct sctp_chunk *, union sctp_addr *, union sctp_addr *); const union sctp_addr *sctp_source(const struct sctp_chunk *chunk); +static inline __u16 sctp_chunk_stream_no(struct sctp_chunk *ch) +{ + return ntohs(ch->subh.data_hdr->stream); +} + enum { SCTP_ADDR_NEW, /* new address added to assoc/ep */ SCTP_ADDR_SRC, /* address can be used as source */ -- cgit v1.2.3 From 5bbbbe32a43199c2b9ea5ea66fab6241c64beb51 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:13 -0300 Subject: sctp: introduce stream scheduler foundations This patch introduces the hooks necessary to do stream scheduling, as per RFC Draft ndata. It also introduces the first scheduler, which is what we do today but now factored out: first come first served (FCFS). With stream scheduling now we have to track which chunk was enqueued on which stream and be able to select another other than the in front of the main outqueue. So we introduce a list on sctp_stream_out_ext structure for this purpose. We reuse sctp_chunk->transmitted_list space for the list above, as the chunk cannot belong to the two lists at the same time. By using the union in there, we can have distinct names for these moments. sctp_sched_ops are the operations expected to be implemented by each scheduler. The dequeueing is a bit particular to this implementation but it is to match how we dequeue packets today. We first dequeue and then check if it fits the packet and if not, we requeue it at head. Thus why we don't have a peek operation but have dequeue_done instead, which is called once the chunk can be safely considered as transmitted. The check removed from sctp_outq_flush is now performed by sctp_stream_outq_migrate, which is only called during assoc setup. (sctp_sendmsg() also checks for it) The only operation that is foreseen but not yet added here is a way to signalize that a new packet is starting or that the packet is done, for round robin scheduler per packet, but is intentionally left to the patch that actually implements it. Support for I-DATA chunks, also described in this RFC, with user message interleaving is straightforward as it just requires the schedulers to probe for the feature and ignore datamsg boundaries when dequeueing. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/stream_sched.h | 72 +++++++++++++++++++++++++++++++++++++++++ include/net/sctp/structs.h | 15 +++++++-- 2 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 include/net/sctp/stream_sched.h (limited to 'include/net') diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h new file mode 100644 index 000000000000..c676550a4c7d --- /dev/null +++ b/include/net/sctp/stream_sched.h @@ -0,0 +1,72 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * These are definitions used by the stream schedulers, defined in RFC + * draft ndata (https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-11) + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * . + * + * Please send any bug reports or fixes you make to the + * email addresses: + * lksctp developers + * + * Written or modified by: + * Marcelo Ricardo Leitner + */ + +#ifndef __sctp_stream_sched_h__ +#define __sctp_stream_sched_h__ + +struct sctp_sched_ops { + /* Property handling for a given stream */ + int (*set)(struct sctp_stream *stream, __u16 sid, __u16 value, + gfp_t gfp); + int (*get)(struct sctp_stream *stream, __u16 sid, __u16 *value); + + /* Init the specific scheduler */ + int (*init)(struct sctp_stream *stream); + /* Init a stream */ + int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp); + /* Frees the entire thing */ + void (*free)(struct sctp_stream *stream); + + /* Enqueue a chunk */ + void (*enqueue)(struct sctp_outq *q, struct sctp_datamsg *msg); + /* Dequeue a chunk */ + struct sctp_chunk *(*dequeue)(struct sctp_outq *q); + /* Called only if the chunk fit the packet */ + void (*dequeue_done)(struct sctp_outq *q, struct sctp_chunk *chunk); + /* Sched all chunks already enqueued */ + void (*sched_all)(struct sctp_stream *steam); + /* Unched all chunks already enqueued */ + void (*unsched_all)(struct sctp_stream *steam); +}; + +int sctp_sched_set_sched(struct sctp_association *asoc, + enum sctp_sched_type sched); +int sctp_sched_get_sched(struct sctp_association *asoc); +int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid, + __u16 value, gfp_t gfp); +int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid, + __u16 *value); +void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch); + +void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch); +int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp); +struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream); + +#endif /* __sctp_stream_sched_h__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index c48f7999fe9b..3c22a30fd71b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -84,7 +84,6 @@ struct sctp_ulpq; struct sctp_ep_common; struct crypto_shash; struct sctp_stream; -struct sctp_stream_out; #include @@ -531,8 +530,12 @@ struct sctp_chunk { /* How many times this chunk have been sent, for prsctp RTX policy */ int sent_count; - /* This is our link to the per-transport transmitted list. */ - struct list_head transmitted_list; + union { + /* This is our link to the per-transport transmitted list. */ + struct list_head transmitted_list; + /* List in specific stream outq */ + struct list_head stream_list; + }; /* This field is used by chunks that hold fragmented data. * For the first fragment this is the list that holds the rest of @@ -1019,6 +1022,9 @@ struct sctp_outq { /* Data pending that has never been transmitted. */ struct list_head out_chunk_list; + /* Stream scheduler being used */ + struct sctp_sched_ops *sched; + unsigned int out_qlen; /* Total length of queued data chunks. */ /* Error of send failed, may used in SCTP_SEND_FAILED event. */ @@ -1325,6 +1331,7 @@ struct sctp_inithdr_host { struct sctp_stream_out_ext { __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + struct list_head outq; /* chunks enqueued by this stream */ }; struct sctp_stream_out { @@ -1342,6 +1349,8 @@ struct sctp_stream { struct sctp_stream_in *in; __u16 outcnt; __u16 incnt; + /* Current stream being sent, if any */ + struct sctp_stream_out *out_curr; }; #define SCTP_STREAM_CLOSED 0x00 -- cgit v1.2.3 From 637784ade221a3c8a7ecd0f583eddd95d6276b9a Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:16 -0300 Subject: sctp: introduce priority based stream scheduler This patch introduces RFC Draft ndata section 3.4 Priority Based Scheduler (SCTP_SS_PRIO). It works by having a struct sctp_stream_priority for each priority configured. This struct is then enlisted on a queue ordered per priority if, and only if, there is a stream with data queued, so that dequeueing is very straightforward: either finish current datamsg or simply dequeue from the highest priority queued, which is the next stream pointed, and that's it. If there are multiple streams assigned with the same priority and with data queued, it will do round robin amongst them while respecting datamsgs boundaries (when not using idata chunks), to be reasonably fair. We intentionally don't maintain a list of priorities nor a list of all streams with the same priority to save memory. The first would mean at least 2 other pointers per priority (which, for 1000 priorities, that can mean 16kB) and the second would also mean 2 other pointers but per stream. As SCTP supports up to 65535 streams on a given asoc, that's 1MB. This impacts when giving a priority to some stream, as we have to find out if the new priority is already being used and if we can free the old one, and also when tearing down. The new fields in struct sctp_stream_out_ext and sctp_stream are added under a union because that memory is to be shared with other schedulers. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 3c22a30fd71b..40eb8d66a37c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1328,10 +1328,27 @@ struct sctp_inithdr_host { __u32 initial_tsn; }; +struct sctp_stream_priorities { + /* List of priorities scheduled */ + struct list_head prio_sched; + /* List of streams scheduled */ + struct list_head active; + /* The next stream stream in line */ + struct sctp_stream_out_ext *next; + __u16 prio; +}; + struct sctp_stream_out_ext { __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; struct list_head outq; /* chunks enqueued by this stream */ + union { + struct { + /* Scheduled streams list */ + struct list_head prio_list; + struct sctp_stream_priorities *prio_head; + }; + }; }; struct sctp_stream_out { @@ -1351,6 +1368,13 @@ struct sctp_stream { __u16 incnt; /* Current stream being sent, if any */ struct sctp_stream_out *out_curr; + union { + /* Fields used by priority scheduler */ + struct { + /* List of priorities scheduled */ + struct list_head prio_list; + }; + }; }; #define SCTP_STREAM_CLOSED 0x00 -- cgit v1.2.3 From ac1ed8b82cd60ba8e7d84103ac1414b8c577c485 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:17 -0300 Subject: sctp: introduce round robin stream scheduler This patch introduces RFC Draft ndata section 3.2 Priority Based Scheduler (SCTP_SS_RR). Works by maintaining a list of enqueued streams and tracking the last one used to send data. When the datamsg is done, it switches to the next stream. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 40eb8d66a37c..16f949eef52f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1348,6 +1348,10 @@ struct sctp_stream_out_ext { struct list_head prio_list; struct sctp_stream_priorities *prio_head; }; + /* Fields used by RR scheduler */ + struct { + struct list_head rr_list; + }; }; }; @@ -1374,6 +1378,13 @@ struct sctp_stream { /* List of priorities scheduled */ struct list_head prio_list; }; + /* Fields used by RR scheduler */ + struct { + /* List of streams scheduled */ + struct list_head rr_list; + /* The next stream stream in line */ + struct sctp_stream_out_ext *rr_next; + }; }; }; -- cgit v1.2.3 From e774d96b7d2c3489bfb5bbdc2b65ed41cd68d3d5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Oct 2017 15:55:29 +0200 Subject: rtnetlink: remove slave_validate callback no users in the tree. Signed-off-by: Florian Westphal Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 21837ca68ecc..6520993ff449 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -93,9 +93,6 @@ struct rtnl_link_ops { int slave_maxtype; const struct nla_policy *slave_policy; - int (*slave_validate)(struct nlattr *tb[], - struct nlattr *data[], - struct netlink_ext_ack *extack); int (*slave_changelink)(struct net_device *dev, struct net_device *slave_dev, struct nlattr *tb[], -- cgit v1.2.3 From 5c45121dc39026ab2139910e57cf933fd57d30f2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Oct 2017 15:58:49 +0200 Subject: rtnetlink: remove __rtnl_af_unregister switch the only caller to rtnl_af_unregister. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 6520993ff449..e3ca8e2e3103 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -151,8 +151,6 @@ struct rtnl_af_ops { size_t (*get_stats_af_size)(const struct net_device *dev); }; -void __rtnl_af_unregister(struct rtnl_af_ops *ops); - void rtnl_af_register(struct rtnl_af_ops *ops); void rtnl_af_unregister(struct rtnl_af_ops *ops); -- cgit v1.2.3 From 33eaf2a6eb48ebf00374aaaf4b1b43f9950dcbe4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:46 -0700 Subject: net: Add extack to ndo_add_slave Pass extack to do_set_master and down to ndo_add_slave Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/bonding.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bonding.h b/include/net/bonding.h index b2e68657a216..2860cc66c2bb 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -596,7 +596,8 @@ void bond_destroy_sysfs(struct bond_net *net); void bond_prepare_sysfs_group(struct bonding *bond); int bond_sysfs_slave_add(struct slave *slave); void bond_sysfs_slave_del(struct slave *slave); -int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev); +int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, + struct netlink_ext_ack *extack); int bond_release(struct net_device *bond_dev, struct net_device *slave_dev); u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb); int bond_set_carrier(struct bonding *bond); -- cgit v1.2.3 From 44f209807ee87a5eddf6c0432f3fb63cec27bad8 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 5 Oct 2017 16:46:50 -0400 Subject: VSOCK: export socket tables for sock_diag interface The socket table symbols need to be exported from vsock.ko so that the vsock_diag.ko module will be able to traverse sockets. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- include/net/af_vsock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f9fb566e75cf..30cba806e344 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -27,6 +27,11 @@ #define LAST_RESERVED_PORT 1023 +#define VSOCK_HASH_SIZE 251 +extern struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; +extern struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; +extern spinlock_t vsock_table_lock; + #define vsock_sk(__sk) ((struct vsock_sock *)__sk) #define sk_vsock(__vsk) (&(__vsk)->sk) -- cgit v1.2.3 From bf359b8127719535f88494adb3c2b73c06667dcd Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 5 Oct 2017 16:46:51 -0400 Subject: VSOCK: move __vsock_in_bound/connected_table() to af_vsock.h The vsock_diag.ko module will need to check socket table membership. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- include/net/af_vsock.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 30cba806e344..3dd217718a2f 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -180,6 +180,18 @@ const struct vsock_transport *vsock_core_get_transport(void); /**** UTILS ****/ +/* vsock_table_lock must be held */ +static inline bool __vsock_in_bound_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->bound_table); +} + +/* vsock_table_lock must be held */ +static inline bool __vsock_in_connected_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->connected_table); +} + void vsock_release_pending(struct sock *pending); void vsock_add_pending(struct sock *listener, struct sock *pending); void vsock_remove_pending(struct sock *listener, struct sock *pending); -- cgit v1.2.3 From 3b4477d2dcf2709d0be89e2a8dced3d0f4a017f2 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 5 Oct 2017 16:46:52 -0400 Subject: VSOCK: use TCP state constants for sk_state There are two state fields: socket->state and sock->sk_state. The socket->state field uses SS_UNCONNECTED, SS_CONNECTED, etc while the sock->sk_state typically uses values that match TCP state constants (TCP_CLOSE, TCP_ESTABLISHED). AF_VSOCK does not follow this convention and instead uses SS_* constants for both fields. The sk_state field will be exposed to userspace through the vsock_diag interface for ss(8), netstat(8), and other programs. This patch switches sk_state to TCP state constants so that the meaning of this field is consistent with other address families. Not just AF_INET and AF_INET6 use the TCP constants, AF_UNIX and others do too. The following mapping was used to convert the code: SS_FREE -> TCP_CLOSE SS_UNCONNECTED -> TCP_CLOSE SS_CONNECTING -> TCP_SYN_SENT SS_CONNECTED -> TCP_ESTABLISHED SS_DISCONNECTING -> TCP_CLOSING VSOCK_SS_LISTEN -> TCP_LISTEN In __vsock_create() the sk_state initialization was dropped because sock_init_data() already initializes sk_state to TCP_CLOSE. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- include/net/af_vsock.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 3dd217718a2f..9324ac2d9ff2 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -22,9 +22,6 @@ #include "vsock_addr.h" -/* vsock-specific sock->sk_state constants */ -#define VSOCK_SS_LISTEN 255 - #define LAST_RESERVED_PORT 1023 #define VSOCK_HASH_SIZE 251 -- cgit v1.2.3 From 27204aaa9dc67b833b77179fdac556288ec3a4bf Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 4 Oct 2017 10:03:44 -0700 Subject: tcp: uniform the set up of sockets after successful connection Currently in the TCP code, the initialization sequence for cached metrics, congestion control, BPF, etc, after successful connection is very inconsistent. This introduces inconsistent bevhavior and is prone to bugs. The current call sequence is as follows: (1) for active case (tcp_finish_connect() case): tcp_mtup_init(sk); icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB); tcp_init_congestion_control(sk); tcp_init_buffer_space(sk); (2) for passive case (tcp_rcv_state_process() TCP_SYN_RECV case): icsk->icsk_af_ops->rebuild_header(sk); tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tcp_init_congestion_control(sk); tcp_mtup_init(sk); tcp_init_buffer_space(sk); tcp_init_metrics(sk); (3) for TFO passive case (tcp_fastopen_create_child()): inet_csk(child)->icsk_af_ops->rebuild_header(child); tcp_init_congestion_control(child); tcp_mtup_init(child); tcp_init_metrics(child); tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tcp_init_buffer_space(child); This commit uniforms the above functions to have the following sequence: tcp_mtup_init(sk); icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE/PASSIVE_ESTABLISHED_CB); tcp_init_congestion_control(sk); tcp_init_buffer_space(sk); This sequence is the same as the (1) active case. We pick this sequence because this order correctly allows BPF to override the settings including congestion control module and initial cwnd, etc from the route, and then allows the CC module to see those settings. Suggested-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: Wei Wang Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 7a3a8af56fd6..426c2e986016 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -416,6 +416,7 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); void tcp_disable_fack(struct tcp_sock *tp); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); +void tcp_init_transfer(struct sock *sk, int bpf_op); unsigned int tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, -- cgit v1.2.3 From e2080072ed2d98a55ae69d95dea60ff7a17cddd5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Oct 2017 12:59:58 -0700 Subject: tcp: new list for sent but unacked skbs for RACK recovery This patch adds a new queue (list) that tracks the sent but not yet acked or SACKed skbs for a TCP connection. The list is chronologically ordered by skb->skb_mstamp (the head is the oldest sent skb). This list will be used to optimize TCP Rack recovery, which checks an skb's timestamp to judge if it has been lost and needs to be retransmitted. Since TCP write queue is ordered by sequence instead of sent time, RACK has to scan over the write queue to catch all eligible packets to detect lost retransmission, and iterates through SACKed skbs repeatedly. Special cares for rare events: 1. TCP repair fakes skb transmission so the send queue needs adjusted 2. SACK reneging would require re-inserting SACKed skbs into the send queue. For now I believe it's not worth the complexity to make RACK work perfectly on SACK reneging, so we do nothing here. 3. Fast Open: currently for non-TFO, send-queue correctly queues the pure SYN packet. For TFO which queues a pure SYN and then a data packet, send-queue only queues the data packet but not the pure SYN due to the structure of TFO code. This is okay because the SYN receiver would never respond with a SACK on a missing SYN (i.e. SYN is never fast-retransmitted by SACK/RACK). In order to not grow sk_buff, we use an union for the new list and _skb_refdst/destructor fields. This is a bit complicated because we need to make sure _skb_refdst and destructor are properly zeroed before skb is cloned/copied at transmit, and before being freed. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 426c2e986016..3b16f353b539 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1589,14 +1589,34 @@ enum tcp_chrono { void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type); void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type); +/* This helper is needed, because skb->tcp_tsorted_anchor uses + * the same memory storage than skb->destructor/_skb_refdst + */ +static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb) +{ + skb->destructor = NULL; + skb->_skb_refdst = 0UL; +} + +#define tcp_skb_tsorted_save(skb) { \ + unsigned long _save = skb->_skb_refdst; \ + skb->_skb_refdst = 0UL; + +#define tcp_skb_tsorted_restore(skb) \ + skb->_skb_refdst = _save; \ +} + /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { struct sk_buff *skb; tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) + while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { + tcp_skb_tsorted_anchor_cleanup(skb); sk_wmem_free_skb(sk, skb); + } + INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); } @@ -1711,6 +1731,8 @@ static inline void tcp_insert_write_queue_before(struct sk_buff *new, static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) { + list_del(&skb->tcp_tsorted_anchor); + tcp_skb_tsorted_anchor_cleanup(skb); __skb_unlink(skb, &sk->sk_write_queue); } -- cgit v1.2.3 From 4e64b1ed15e25b8dcc2819c6d43dab72eb0bea26 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 5 Oct 2017 23:46:14 -0700 Subject: net/ipv6: Convert icmpv6_push_pending_frames to void commit cc71b7b07119 ("net/ipv6: remove unused err variable on icmpv6_push_pending_frames") exposed icmpv6_push_pending_frames return value not being used. Remove now unnecessary int err declarations and uses. Miscellanea: o Remove unnecessary goto and out: labels o Realign arguments Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 6eac5cf8f1e6..3cda3b521c36 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -300,8 +300,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl) void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info); -int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, - struct icmp6hdr *thdr, int len); +void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len); int ip6_ra_control(struct sock *sk, int sel); -- cgit v1.2.3 From ac3f09ba3e496bd7cc780ead05b1d1bb5f33aedb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2017 22:21:22 -0700 Subject: tcp: uninline tcp_write_queue_purge() Since the upcoming rtx rbtree will add some extra code, it is time to not inline this fat function anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 3b16f353b539..744559b72784 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1606,20 +1606,7 @@ static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb) skb->_skb_refdst = _save; \ } -/* write queue abstraction */ -static inline void tcp_write_queue_purge(struct sock *sk) -{ - struct sk_buff *skb; - - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { - tcp_skb_tsorted_anchor_cleanup(skb); - sk_wmem_free_skb(sk, skb); - } - INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); - sk_mem_reclaim(sk); - tcp_clear_all_retrans_hints(tcp_sk(sk)); -} +void tcp_write_queue_purge(struct sock *sk); static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) { -- cgit v1.2.3 From 75c119afe14f74b4dd967d75ed9f57ab6c0ef045 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2017 22:21:27 -0700 Subject: tcp: implement rb-tree based retransmit queue Using a linear list to store all skbs in write queue has been okay for quite a while : O(N) is not too bad when N < 500. Things get messy when N is the order of 100,000 : Modern TCP stacks want 10Gbit+ of throughput even with 200 ms RTT flows. 40 ns per cache line miss means a full scan can use 4 ms, blowing away CPU caches. SACK processing often can use various hints to avoid parsing whole retransmit queue. But with high packet losses and/or high reordering, hints no longer work. Sender has to process thousands of unfriendly SACK, accumulating a huge socket backlog, burning a cpu and massively dropping packets. Using an rb-tree for retransmit queue has been avoided for years because it added complexity and overhead, but now is the time to be more resistant and say no to quadratic behavior. 1) RTX queue is no longer part of the write queue : already sent skbs are stored in one rb-tree. 2) Since reaching the head of write queue no longer needs sk->sk_send_head, we added an union of sk_send_head and tcp_rtx_queue Tested: On receiver : netem on ingress : delay 150ms 200us loss 1 GRO disabled to force stress and SACK storms. for f in `seq 1 10` do ./netperf -H lpaa6 -l30 -- -K bbr -o THROUGHPUT|tail -1 done | awk '{print $0} {sum += $0} END {printf "%7u\n",sum}' Before patch : 323.87 351.48 339.59 338.62 306.72 204.07 304.93 291.88 202.47 176.88 2840 After patch: 1700.83 2207.98 2070.17 1544.26 2114.76 2124.89 1693.14 1080.91 2216.82 1299.94 18053 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 7 +++-- include/net/tcp.h | 89 ++++++++++++++++++++++++++++-------------------------- 2 files changed, 52 insertions(+), 44 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index a6b9a8d1a6df..4827094f1db4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -60,7 +60,7 @@ #include #include #include - +#include #include #include #include @@ -397,7 +397,10 @@ struct sock { int sk_wmem_queued; refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; - struct sk_buff *sk_send_head; + union { + struct sk_buff *sk_send_head; + struct rb_root tcp_rtx_queue; + }; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; diff --git a/include/net/tcp.h b/include/net/tcp.h index 744559b72784..5a95e5886b55 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -551,7 +551,13 @@ void tcp_xmit_retransmit_queue(struct sock *); void tcp_simple_retransmit(struct sock *); void tcp_enter_recovery(struct sock *sk, bool ece_ack); int tcp_trim_head(struct sock *, struct sk_buff *, u32); -int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); +enum tcp_queue { + TCP_FRAG_IN_WRITE_QUEUE, + TCP_FRAG_IN_RTX_QUEUE, +}; +int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, + struct sk_buff *skb, u32 len, + unsigned int mss_now, gfp_t gfp); void tcp_send_probe0(struct sock *); void tcp_send_partial(struct sock *); @@ -1608,6 +1614,11 @@ static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb) void tcp_write_queue_purge(struct sock *sk); +static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk) +{ + return skb_rb_first(&sk->tcp_rtx_queue); +} + static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) { return skb_peek(&sk->sk_write_queue); @@ -1630,18 +1641,12 @@ static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk, return skb_queue_prev(&sk->sk_write_queue, skb); } -#define tcp_for_write_queue(skb, sk) \ - skb_queue_walk(&(sk)->sk_write_queue, skb) - -#define tcp_for_write_queue_from(skb, sk) \ - skb_queue_walk_from(&(sk)->sk_write_queue, skb) - #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) static inline struct sk_buff *tcp_send_head(const struct sock *sk) { - return sk->sk_send_head; + return skb_peek(&sk->sk_write_queue); } static inline bool tcp_skb_is_last(const struct sock *sk, @@ -1650,29 +1655,30 @@ static inline bool tcp_skb_is_last(const struct sock *sk, return skb_queue_is_last(&sk->sk_write_queue, skb); } -static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb) +static inline bool tcp_write_queue_empty(const struct sock *sk) { - if (tcp_skb_is_last(sk, skb)) - sk->sk_send_head = NULL; - else - sk->sk_send_head = tcp_write_queue_next(sk, skb); + return skb_queue_empty(&sk->sk_write_queue); +} + +static inline bool tcp_rtx_queue_empty(const struct sock *sk) +{ + return RB_EMPTY_ROOT(&sk->tcp_rtx_queue); +} + +static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk) +{ + return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk); } static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) { - if (sk->sk_send_head == skb_unlinked) { - sk->sk_send_head = NULL; + if (tcp_write_queue_empty(sk)) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - } + if (tcp_sk(sk)->highest_sack == skb_unlinked) tcp_sk(sk)->highest_sack = NULL; } -static inline void tcp_init_send_head(struct sock *sk) -{ - sk->sk_send_head = NULL; -} - static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) { __skb_queue_tail(&sk->sk_write_queue, skb); @@ -1683,8 +1689,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb __tcp_add_write_queue_tail(sk, skb); /* Queue it, remembering where we must start sending. */ - if (sk->sk_send_head == NULL) { - sk->sk_send_head = skb; + if (sk->sk_write_queue.next == skb) { tcp_chrono_start(sk, TCP_CHRONO_BUSY); if (tcp_sk(sk)->highest_sack == NULL) @@ -1697,35 +1702,32 @@ static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *s __skb_queue_head(&sk->sk_write_queue, skb); } -/* Insert buff after skb on the write queue of sk. */ -static inline void tcp_insert_write_queue_after(struct sk_buff *skb, - struct sk_buff *buff, - struct sock *sk) -{ - __skb_queue_after(&sk->sk_write_queue, skb, buff); -} - /* Insert new before skb on the write queue of sk. */ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sk_buff *skb, struct sock *sk) { __skb_queue_before(&sk->sk_write_queue, skb, new); - - if (sk->sk_send_head == skb) - sk->sk_send_head = new; } static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) { - list_del(&skb->tcp_tsorted_anchor); - tcp_skb_tsorted_anchor_cleanup(skb); __skb_unlink(skb, &sk->sk_write_queue); } -static inline bool tcp_write_queue_empty(struct sock *sk) +void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb); + +static inline void tcp_rtx_queue_unlink(struct sk_buff *skb, struct sock *sk) { - return skb_queue_empty(&sk->sk_write_queue); + tcp_skb_tsorted_anchor_cleanup(skb); + rb_erase(&skb->rbnode, &sk->tcp_rtx_queue); +} + +static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct sock *sk) +{ + list_del(&skb->tcp_tsorted_anchor); + tcp_rtx_queue_unlink(skb, sk); + sk_wmem_free_skb(sk, skb); } static inline void tcp_push_pending_frames(struct sock *sk) @@ -1754,8 +1756,9 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb) { - tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL : - tcp_write_queue_next(sk, skb); + struct sk_buff *next = skb_rb_next(skb); + + tcp_sk(sk)->highest_sack = next ?: tcp_send_head(sk); } static inline struct sk_buff *tcp_highest_sack(struct sock *sk) @@ -1765,7 +1768,9 @@ static inline struct sk_buff *tcp_highest_sack(struct sock *sk) static inline void tcp_highest_sack_reset(struct sock *sk) { - tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk); + struct sk_buff *skb = tcp_rtx_queue_head(sk); + + tcp_sk(sk)->highest_sack = skb ?: tcp_send_head(sk); } /* Called when old skb is about to be deleted (to be combined with new skb) */ @@ -1935,7 +1940,7 @@ extern void tcp_rack_reo_timeout(struct sock *sk); /* At how many usecs into the future should the RTO fire? */ static inline s64 tcp_rto_delta_us(const struct sock *sk) { - const struct sk_buff *skb = tcp_write_queue_head(sk); + const struct sk_buff *skb = tcp_rtx_queue_head(sk); u32 rto = inet_csk(sk)->icsk_rto; u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); -- cgit v1.2.3 From 180ca444b985c42948fa26abd278e616b5ce7eb2 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:05:56 -0700 Subject: ipv6: introduce a new function fib6_update_sernum() This function takes a route as input and tries to update the sernum in the fib6_node this route is associated with. It will be used in later commit when adding a cached route into the exception table under that route. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index d060d711a624..152b7b14a5a5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -358,6 +358,8 @@ void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb); +void fib6_update_sernum(struct rt6_info *rt); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); -- cgit v1.2.3 From 35732d01fe311ec13c4e42936878b782b8e7ea85 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:05:57 -0700 Subject: ipv6: introduce a hash table to store dst cache Add a hash table into struct rt6_info in order to store dst caches created by pmtu discovery and ip redirect in ipv6 routing code. APIs to add dst cache, delete dst cache, find dst cache and update dst cache in the hash table are implemented and will be used in later commits. This is a preparation work to move all cache routes into the exception table instead of getting inserted into the fib6 tree. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 19 +++++++++++++++++++ include/net/ip6_route.h | 3 +++ 2 files changed, 22 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 152b7b14a5a5..c4864c1e8f13 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -98,6 +98,22 @@ struct rt6key { struct fib6_table; +struct rt6_exception_bucket { + struct hlist_head chain; + int depth; +}; + +struct rt6_exception { + struct hlist_node hlist; + struct rt6_info *rt6i; + unsigned long stamp; + struct rcu_head rcu; +}; + +#define FIB6_EXCEPTION_BUCKET_SIZE_SHIFT 10 +#define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT) +#define FIB6_MAX_DEPTH 5 + struct rt6_info { struct dst_entry dst; @@ -134,12 +150,15 @@ struct rt6_info { struct inet6_dev *rt6i_idev; struct rt6_info * __percpu *rt6i_pcpu; + struct rt6_exception_bucket __rcu *rt6i_exception_bucket; u32 rt6i_metric; u32 rt6i_pmtu; /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; u8 rt6i_protocol; + u8 exception_bucket_flushed:1, + unused:7; }; static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index ee96f402cb75..3315605f34c9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -95,6 +95,9 @@ int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack); int ip6_ins_rt(struct rt6_info *); int ip6_del_rt(struct rt6_info *); +void rt6_flush_exceptions(struct rt6_info *rt); +int rt6_remove_exception_rt(struct rt6_info *rt); + static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, const struct in6_addr *daddr, unsigned int prefs, -- cgit v1.2.3 From c757faa8bfa26a0dd24b41ff783e0da042156887 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:01 -0700 Subject: ipv6: prepare fib6_age() for exception table If all dst cache entries are stored in the exception table under the main route, we have to go through them during fib6_age() when doing garbage collecting. Introduce a new function rt6_age_exception() which goes through all dst entries in the exception table and remove those entries that are expired. This function is called in fib6_age() so that all dst caches are also garbage collected. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 13 +++++++++++++ include/net/ip6_route.h | 2 ++ 2 files changed, 15 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index c4864c1e8f13..11a79ef87a28 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -29,6 +29,14 @@ #define FIB6_TABLE_HASHSZ 1 #endif +#define RT6_DEBUG 2 + +#if RT6_DEBUG >= 3 +#define RT6_TRACE(x...) pr_debug(x) +#else +#define RT6_TRACE(x...) do { ; } while (0) +#endif + struct rt6_info; struct fib6_config { @@ -75,6 +83,11 @@ struct fib6_node { struct rcu_head rcu; }; +struct fib6_gc_args { + int timeout; + int more; +}; + #ifndef CONFIG_IPV6_SUBTREES #define FIB6_SUBTREE(fn) NULL #else diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 3315605f34c9..a0087fb9864b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -97,6 +97,8 @@ int ip6_del_rt(struct rt6_info *); void rt6_flush_exceptions(struct rt6_info *rt); int rt6_remove_exception_rt(struct rt6_info *rt); +void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now); static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, const struct in6_addr *daddr, -- cgit v1.2.3 From 38fbeeeeccdb38d0635398e8e344d245f6d8dc52 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:02 -0700 Subject: ipv6: prepare fib6_locate() for exception table fib6_locate() is used to find the fib6_node according to the passed in prefix address key. It currently tries to find the fib6_node with the exact match of the passed in key. However, when we move cached routes into the exception table, fib6_locate() will fail to find the fib6_node for it as the cached routes will be stored in the exception table under the fib6_node with the longest prefix match of the cache's dst addr key. This commit adds a new parameter to let the caller specify if it needs exact match or longest prefix match. Right now, all callers still does exact match when calling fib6_locate(). It will be changed in later commit where exception table is hooked up to store cached routes. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 11a79ef87a28..4497a1eb4d41 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -357,7 +357,8 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *daddr, int dst_len, - const struct in6_addr *saddr, int src_len); + const struct in6_addr *saddr, int src_len, + bool exact_match); void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), void *arg); -- cgit v1.2.3 From 2b760fcf5cfb34e8610df56d83745b2b74ae1379 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:03 -0700 Subject: ipv6: hook up exception table to store dst cache This commit makes use of the exception hash table implementation to store dst caches created by pmtu discovery and ip redirect into the hash table under the rt_info and no longer inserts these routes into fib6 tree. This makes the fib6 tree only contain static configured routes and could now be protected by rcu instead of a rw lock. With this change, in the route lookup related functions, after finding the rt6_info with the longest prefix, we also need to search for the exception table before doing backtracking. In the route delete function, if the route being deleted is not a dst cache, deletion of this route also need to flush the whole hash table under it. If it is a dst cache, then only delete the cached dst in the hash table. Note: for fib6_walk_continue() function, w->root now is always pointing to a root node considering that fib6_prune_clones() is removed from the code. So we add a WARN_ON() msg to make sure w->root always points to a root node and also removed the update of w->root in fib6_repair_tree(). This is a prerequisite for later patch because we don't need to make w->root as rcu protected when replacing rwlock with RCU. Also, we remove all prune related variables as it is no longer used. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 4497a1eb4d41..d0b7283073e3 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -280,7 +280,6 @@ struct fib6_walker { struct fib6_node *root, *node; struct rt6_info *leaf; enum fib6_walk_state state; - bool prune; unsigned int skip; unsigned int count; int (*func)(struct fib6_walker *); -- cgit v1.2.3 From bbd63f06d114a52be33f6982fc89ca2768cdeb62 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:07 -0700 Subject: ipv6: update fn_sernum after route is inserted to tree fib6_add() logic currently calls fib6_add_1() to figure out what node should be used for the newly added route and then call fib6_add_rt2node() to insert the route to the node. And during the call of fib6_add_1(), fn_sernum is updated for all nodes that share the same prefix as the new route. This does not have issue in the current code because reader thread will not be able to access the tree while writer thread is inserting new route to it. However, it is not the case once we transition to use RCU. Reader thread could potentially see the new fn_sernum before the new route is inserted. As a result, reader thread's route lookup will return a stale route with the new fn_sernum. In order to solve this issue, we remove all the update of fn_sernum in fib6_add_1(), and instead, introduce a new function that updates fn_sernum for all related nodes and call this functions once the route is successfully inserted to the tree. Also, smp_wmb() is used after a route is successfully inserted into the fib tree and right before the updated of fn->sernum. And smp_rmb() is used right after fn->sernum is accessed in rt6_get_cookie_safe(). This is to guarantee that when the reader thread sees the new fn->sernum, the new route is already inserted in the tree in memory. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index d0b7283073e3..6bf929b50951 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -220,6 +220,8 @@ static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, if (fn) { *cookie = fn->fn_sernum; + /* pairs with smp_wmb() in fib6_update_sernum_upto_root() */ + smp_rmb(); status = true; } -- cgit v1.2.3 From 66f5d6ce53e665477d2a33e8f539d4fa4ca81c83 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:10 -0700 Subject: ipv6: replace rwlock with rcu and spinlock in fib6_table With all the preparation work before, we are now ready to replace rwlock with rcu and spinlock in fib6_table. That means now all fib6_node in fib6_table are protected by rcu. And when freeing fib6_node, call_rcu() is used to wait for the rcu grace period before releasing the memory. When accessing fib6_node, corresponding rcu APIs need to be used. And all previous sessions protected by the write lock will now be protected by the spin lock per table. All previous sessions protected by read lock will now be protected by rcu_read_lock(). A couple of things to note here: 1. As part of the work of replacing rwlock with rcu, the linked list of fn->leaf now has to be rcu protected as well. So both fn->leaf and rt->dst.rt6_next are now __rcu tagged and corresponding rcu APIs are used when manipulating them. 2. For fn->rr_ptr, first of all, it also needs to be rcu protected now and is tagged with __rcu and rcu APIs are used in corresponding places. Secondly, fn->rr_ptr is changed in rt6_select() which is a reader thread. This makes the issue a bit complicated. We think a valid solution for it is to let rt6_select() grab the tb6_lock if it decides to change it. As it is not in the normal operation and only happens when there is no valid neighbor cache for the route, we think the performance impact should be low. 3. fib6_walk_continue() has to be called with tb6_lock held even in the route dumping related functions, e.g. inet6_dump_fib(), fib6_tables_dump() and ipv6_route_seq_ops. It is because fib6_walk_continue() makes modifications to the walker structure, and so are fib6_repair_tree() and fib6_del_route(). In order to do proper syncing between them, we need to let fib6_walk_continue() hold the lock. We may be able to do further improvement on the way we do the tree walk to get rid of the need for holding the spin lock. But not for now. 4. When fib6_del_route() removes a route from the tree, we no longer mark rt->dst.rt6_next to NULL to make simultaneous reader be able to further traverse the list with rcu. However, rt->dst.rt6_next is only valid within this same rcu period. No one should access it later. 5. All the operation of atomic_inc(rt->rt6i_ref) is changed to be performed before we publish this route (either by linking it to fn->leaf or insert it in the list pointed by fn->leaf) just to be safe because as soon as we publish the route, some read thread will be able to access it. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- include/net/ip6_fib.h | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 06a6765da074..204c19e25456 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -101,7 +101,7 @@ struct dst_entry { union { struct dst_entry *next; struct rtable __rcu *rt_next; - struct rt6_info *rt6_next; + struct rt6_info __rcu *rt6_next; struct dn_route __rcu *dn_next; }; }; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 6bf929b50951..0b438b9bcb10 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -68,18 +68,18 @@ struct fib6_config { }; struct fib6_node { - struct fib6_node *parent; - struct fib6_node *left; - struct fib6_node *right; + struct fib6_node __rcu *parent; + struct fib6_node __rcu *left; + struct fib6_node __rcu *right; #ifdef CONFIG_IPV6_SUBTREES - struct fib6_node *subtree; + struct fib6_node __rcu *subtree; #endif - struct rt6_info *leaf; + struct rt6_info __rcu *leaf; __u16 fn_bit; /* bit key */ __u16 fn_flags; int fn_sernum; - struct rt6_info *rr_ptr; + struct rt6_info __rcu *rr_ptr; struct rcu_head rcu; }; @@ -91,7 +91,7 @@ struct fib6_gc_args { #ifndef CONFIG_IPV6_SUBTREES #define FIB6_SUBTREE(fn) NULL #else -#define FIB6_SUBTREE(fn) ((fn)->subtree) +#define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1)) #endif struct mx6_config { @@ -174,6 +174,14 @@ struct rt6_info { unused:7; }; +#define for_each_fib6_node_rt_rcu(fn) \ + for (rt = rcu_dereference((fn)->leaf); rt; \ + rt = rcu_dereference(rt->dst.rt6_next)) + +#define for_each_fib6_walker_rt(w) \ + for (rt = (w)->leaf; rt; \ + rt = rcu_dereference_protected(rt->dst.rt6_next, 1)) + static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { return ((struct rt6_info *)dst)->rt6i_idev; @@ -310,7 +318,7 @@ struct rt6_statistics { struct fib6_table { struct hlist_node tb6_hlist; u32 tb6_id; - rwlock_t tb6_lock; + spinlock_t tb6_lock; struct fib6_node tb6_root; struct inet_peer_base tb6_peers; unsigned int flags; -- cgit v1.2.3 From 81eb8447daae3b62247aa66bb17b82f8fef68249 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:11 -0700 Subject: ipv6: take care of rt6_stats Currently, most of the rt6_stats are not hooked up correctly. As the last part of this patch series, hook up all existing rt6_stats and add one new stat fib_rt_uncache to indicate the number of routes in the uncached list. For details of the stats, please refer to the comments added in include/net/ip6_fib.h. Note: fib_rt_alloc and fib_rt_uncache are not guaranteed to be modified under a lock. So atomic_t is used for them. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0b438b9bcb10..10c913816032 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -297,12 +297,15 @@ struct fib6_walker { }; struct rt6_statistics { - __u32 fib_nodes; - __u32 fib_route_nodes; - __u32 fib_rt_alloc; /* permanent routes */ - __u32 fib_rt_entries; /* rt entries in table */ - __u32 fib_rt_cache; /* cache routes */ - __u32 fib_discarded_routes; + __u32 fib_nodes; /* all fib6 nodes */ + __u32 fib_route_nodes; /* intermediate nodes */ + __u32 fib_rt_entries; /* rt entries in fib table */ + __u32 fib_rt_cache; /* cached rt entries in exception table */ + __u32 fib_discarded_routes; /* total number of routes delete */ + + /* The following stats are not protected by any lock */ + atomic_t fib_rt_alloc; /* total number of routes alloced */ + atomic_t fib_rt_uncache; /* rt entries in uncached list */ }; #define RTN_TL_ROOT 0x0001 -- cgit v1.2.3 From 548ec114705bb8f0879a0da12abec17f17a7cc26 Mon Sep 17 00:00:00 2001 From: Lin Zhang Date: Fri, 6 Oct 2017 01:40:35 +0800 Subject: net: phonet: mark phonet_protocol as const The phonet_protocol structs don't need to be written by anyone and so can be marked as const. Signed-off-by: Lin Zhang Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 039cc29cb4a8..51e1a2a45d02 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -108,8 +108,10 @@ struct phonet_protocol { int sock_type; }; -int phonet_proto_register(unsigned int protocol, struct phonet_protocol *pp); -void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp); +int phonet_proto_register(unsigned int protocol, + const struct phonet_protocol *pp); +void phonet_proto_unregister(unsigned int protocol, + const struct phonet_protocol *pp); int phonet_sysctl_init(void); void phonet_sysctl_exit(void); -- cgit v1.2.3 From 77041420751fe6d4acf2103b245dcc2b4b7b8360 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 9 Oct 2017 11:15:31 +0200 Subject: net: bridge: Notify on bridge device mrouter state changes Add the SWITCHDEV_ATTR_ID_BRIDGE_MROUTER switchdev notification type, used to indicate whether the bridge is or isn't mrouter. Notify when the bridge changes its state, similarly to the already existing bridged port mrouter notifications. The notification uses the switchdev_attr.u.mrouter boolean flag to indicate the current bridge mrouter status. Thus, it only indicates whether the bridge is currently used as an mrouter or not, and does not indicate the exact mrouter state of the bridge (learning, permanent, etc.). Signed-off-by: Yotam Gigi Signed-off-by: Jiri Pirko Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/switchdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d767b7991887..d756fbe46625 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -51,6 +51,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, + SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, }; struct switchdev_attr { -- cgit v1.2.3 From d66f2b91f95b56e31772b9faa0d036cd2e53cb02 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Oct 2017 10:30:14 -0700 Subject: bpf: don't rely on the verifier lock for metadata_dst allocation bpf_skb_set_tunnel_*() functions require allocation of per-cpu metadata_dst. The allocation happens upon verification of the first program using those helpers. In preparation for removing the verifier lock, use cmpxchg() to make sure we only allocate the metadata_dsts once. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 9fba2ebf6dda..87a0bb8d449f 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -87,6 +87,7 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, void metadata_dst_free(struct metadata_dst *); struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags); +void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst); struct metadata_dst __percpu * metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); -- cgit v1.2.3 From 8c418b5b15747eda05d086e80fa0a767982fbf37 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Oct 2017 11:53:32 +0200 Subject: fq: support filtering a given tin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add to the FQ API a way to filter a given tin, in order to remove frames that fulfil certain criteria according to a filter function. This will be used by mac80211 to remove frames belonging to an AP VLAN interface that's being removed. Signed-off-by: Johannes Berg Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- include/net/fq.h | 7 +++++ include/net/fq_impl.h | 72 ++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 69 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/fq.h b/include/net/fq.h index 6d8521a30c5c..ac944a686840 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -90,6 +90,13 @@ typedef void fq_skb_free_t(struct fq *, struct fq_flow *, struct sk_buff *); +/* Return %true to filter (drop) the frame. */ +typedef bool fq_skb_filter_t(struct fq *, + struct fq_tin *, + struct fq_flow *, + struct sk_buff *, + void *); + typedef struct fq_flow *fq_flow_get_default_t(struct fq *, struct fq_tin *, int idx, diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 4e6131cd3f43..8b237e4afee6 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -12,24 +12,22 @@ /* functions that are embedded into includer */ -static struct sk_buff *fq_flow_dequeue(struct fq *fq, - struct fq_flow *flow) +static void fq_adjust_removal(struct fq *fq, + struct fq_flow *flow, + struct sk_buff *skb) { struct fq_tin *tin = flow->tin; - struct fq_flow *i; - struct sk_buff *skb; - - lockdep_assert_held(&fq->lock); - - skb = __skb_dequeue(&flow->queue); - if (!skb) - return NULL; tin->backlog_bytes -= skb->len; tin->backlog_packets--; flow->backlog -= skb->len; fq->backlog--; fq->memory_usage -= skb->truesize; +} + +static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow) +{ + struct fq_flow *i; if (flow->backlog == 0) { list_del_init(&flow->backlogchain); @@ -43,6 +41,21 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq, list_move_tail(&flow->backlogchain, &i->backlogchain); } +} + +static struct sk_buff *fq_flow_dequeue(struct fq *fq, + struct fq_flow *flow) +{ + struct sk_buff *skb; + + lockdep_assert_held(&fq->lock); + + skb = __skb_dequeue(&flow->queue); + if (!skb) + return NULL; + + fq_adjust_removal(fq, flow, skb); + fq_rejigger_backlog(fq, flow); return skb; } @@ -188,6 +201,45 @@ static void fq_tin_enqueue(struct fq *fq, } } +static void fq_flow_filter(struct fq *fq, + struct fq_flow *flow, + fq_skb_filter_t filter_func, + void *filter_data, + fq_skb_free_t free_func) +{ + struct fq_tin *tin = flow->tin; + struct sk_buff *skb, *tmp; + + lockdep_assert_held(&fq->lock); + + skb_queue_walk_safe(&flow->queue, skb, tmp) { + if (!filter_func(fq, tin, flow, skb, filter_data)) + continue; + + __skb_unlink(skb, &flow->queue); + fq_adjust_removal(fq, flow, skb); + free_func(fq, tin, flow, skb); + } + + fq_rejigger_backlog(fq, flow); +} + +static void fq_tin_filter(struct fq *fq, + struct fq_tin *tin, + fq_skb_filter_t filter_func, + void *filter_data, + fq_skb_free_t free_func) +{ + struct fq_flow *flow; + + lockdep_assert_held(&fq->lock); + + list_for_each_entry(flow, &tin->new_flows, flowchain) + fq_flow_filter(fq, flow, filter_func, filter_data, free_func); + list_for_each_entry(flow, &tin->old_flows, flowchain) + fq_flow_filter(fq, flow, filter_func, filter_data, free_func); +} + static void fq_flow_reset(struct fq *fq, struct fq_flow *flow, fq_skb_free_t free_func) -- cgit v1.2.3 From 4a269818a7eb8577d32d8b2879099c689ddbd856 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Oct 2017 13:27:29 -0700 Subject: tcp: fix tcp_unlink_write_queue() Yury reported crash with this signature : [ 554.034021] [] 0xffff80003ccd5a58 [ 554.034156] [] skb_release_all+0x14/0x30 [ 554.034288] [] __kfree_skb+0x14/0x28 [ 554.034409] [] tcp_sendmsg_locked+0x4dc/0xcc8 [ 554.034541] [] tcp_sendmsg+0x34/0x58 [ 554.034659] [] inet_sendmsg+0x2c/0xf8 [ 554.034783] [] sock_sendmsg+0x18/0x30 [ 554.034928] [] SyS_sendto+0x84/0xf8 Problem is that skb->destructor contains garbage, and this is because I accidentally removed tcp_skb_tsorted_anchor_cleanup() from tcp_unlink_write_queue() This would trigger with a write(fd, , len) attempt, and we will add to packetdrill this capability to avoid future regressions. Fixes: 75c119afe14f ("tcp: implement rb-tree based retransmit queue") Reported-by: Yury Norov Tested-by: Yury Norov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 5a95e5886b55..15163454174b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1712,6 +1712,7 @@ static inline void tcp_insert_write_queue_before(struct sk_buff *new, static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) { + tcp_skb_tsorted_anchor_cleanup(skb); __skb_unlink(skb, &sk->sk_write_queue); } -- cgit v1.2.3 From 843e79d05addd8eb06992cd6dfafc7b9d53f2bc8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 11 Oct 2017 09:41:07 +0200 Subject: net: sched: make tc_action_ops->get_dev return dev and avoid passing net Return dev directly, NULL if not possible. That is enough. Makes no sense to pass struct net * to get_dev op, as there is only one net possible, the one the action was created in. So just store it in mirred priv and use directly. Rename the mirred op callback function. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/act_api.h | 3 +-- include/net/tc_act/tc_mirred.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index b944e0eb93be..900168a9901e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -93,8 +93,7 @@ struct tc_action_ops { int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, const struct tc_action_ops *); void (*stats_update)(struct tc_action *, u64, u32, u64); - int (*get_dev)(const struct tc_action *a, struct net *net, - struct net_device **mirred_dev); + struct net_device *(*get_dev)(const struct tc_action *a); }; struct tc_action_net { diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 604bc31e23ab..21a656569840 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -10,6 +10,7 @@ struct tcf_mirred { int tcfm_ifindex; bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; + struct net *net; struct list_head tcfm_list; }; #define to_mirred(a) ((struct tcf_mirred *)a) -- cgit v1.2.3 From b3f55bdda8df55a563005e00b1b71212d8546541 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 11 Oct 2017 09:41:08 +0200 Subject: net: sched: introduce per-egress action device callbacks Introduce infrastructure that allows drivers to register callbacks that are called whenever tc would offload inserted rule and specified device acts as tc action egress device. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/act_api.h | 34 ++++++++++++++++++++++++++++++++++ include/net/pkt_cls.h | 2 ++ 2 files changed, 36 insertions(+) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 900168a9901e..f5e8c9048fb0 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -174,4 +174,38 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, #endif } +typedef int tc_setup_cb_t(enum tc_setup_type type, + void *type_data, void *cb_priv); + +#ifdef CONFIG_NET_CLS_ACT +int tc_setup_cb_egdev_register(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv); +void tc_setup_cb_egdev_unregister(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv); +int tc_setup_cb_egdev_call(const struct net_device *dev, + enum tc_setup_type type, void *type_data, + bool err_stop); +#else +static inline +int tc_setup_cb_egdev_register(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + return 0; +} + +static inline +void tc_setup_cb_egdev_unregister(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ +} + +static inline +int tc_setup_cb_egdev_call(const struct net_device *dev, + enum tc_setup_type type, void *type_data, + bool err_stop) +{ + return 0; +} +#endif + #endif diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e80edd8879ef..6f8149c82571 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -206,6 +206,8 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, struct net_device **hw_dev); +int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type, + void *type_data, bool err_stop); /** * struct tcf_pkt_info - packet information -- cgit v1.2.3 From 717503b9cf57c0bb7ea4d3a9f5699c9a04adf988 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 11 Oct 2017 09:41:09 +0200 Subject: net: sched: convert cls_flower->egress_dev users to tc_setup_cb_egdev infra The only user of cls_flower->egress_dev is mlx5. So do the conversion there alongside with the code originating the call in cls_flower function fl_hw_replace_filter to the newly introduced egress device callback infrastucture. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 6f8149c82571..c0bdf5cad727 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -206,8 +206,6 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, struct net_device **hw_dev); -int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type, - void *type_data, bool err_stop); /** * struct tcf_pkt_info - packet information @@ -407,6 +405,9 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ +int tc_setup_cb_call(struct tcf_exts *exts, enum tc_setup_type type, + void *type_data, bool err_stop); + struct tc_cls_common_offload { u32 chain_index; __be16 protocol; -- cgit v1.2.3 From 7578d7b45ed870b13a8ace57e32feaed623c2a94 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 11 Oct 2017 09:41:10 +0200 Subject: net: sched: remove unused tcf_exts_get_dev helper and cls_flower->egress_dev The helper and the struct field ares no longer used by any code, so remove them. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index c0bdf5cad727..f5263743076b 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -204,8 +204,6 @@ void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); -int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, - struct net_device **hw_dev); /** * struct tcf_pkt_info - packet information @@ -517,7 +515,6 @@ struct tc_cls_flower_offload { struct fl_flow_key *mask; struct fl_flow_key *key; struct tcf_exts *exts; - bool egress_dev; }; enum tc_matchall_command { -- cgit v1.2.3 From 437d2762ba07f0fc639d5a09acb323fe4106a61f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Oct 2017 20:45:40 -0700 Subject: tcp: remove obsolete helpers Remove three inline helpers that are no longer needed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 15163454174b..3b3b9b968e2d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1629,18 +1629,6 @@ static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk) return skb_peek_tail(&sk->sk_write_queue); } -static inline struct sk_buff *tcp_write_queue_next(const struct sock *sk, - const struct sk_buff *skb) -{ - return skb_queue_next(&sk->sk_write_queue, skb); -} - -static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk, - const struct sk_buff *skb) -{ - return skb_queue_prev(&sk->sk_write_queue, skb); -} - #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) @@ -1697,11 +1685,6 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb } } -static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb) -{ - __skb_queue_head(&sk->sk_write_queue, skb); -} - /* Insert new before skb on the write queue of sk. */ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sk_buff *skb, -- cgit v1.2.3 From 60724d4bae14cd295b27b1610cad9a2720eb0860 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 Oct 2017 10:57:48 -0700 Subject: net: dsa: Add support for DSA specific notifiers In preparation for communicating a given DSA network device's port number and switch index, create a specialized DSA notifier and two events: DSA_PORT_REGISTER and DSA_PORT_UNREGISTER that communicate: the slave network device (slave_dev), port number and switch number in the tree. This will be later used for network device drivers like bcmsysport which needs to cooperate with its DSA network devices to set-up queue mapping and scheduling. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 10dceccd9ce8..40a709a0754d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -471,4 +471,49 @@ static inline int dsa_switch_resume(struct dsa_switch *ds) } #endif /* CONFIG_PM_SLEEP */ +enum dsa_notifier_type { + DSA_PORT_REGISTER, + DSA_PORT_UNREGISTER, +}; + +struct dsa_notifier_info { + struct net_device *dev; +}; + +struct dsa_notifier_register_info { + struct dsa_notifier_info info; /* must be first */ + struct net_device *master; + unsigned int port_number; + unsigned int switch_number; +}; + +static inline struct net_device * +dsa_notifier_info_to_dev(const struct dsa_notifier_info *info) +{ + return info->dev; +} + +#if IS_ENABLED(CONFIG_NET_DSA) +int register_dsa_notifier(struct notifier_block *nb); +int unregister_dsa_notifier(struct notifier_block *nb); +int call_dsa_notifiers(unsigned long val, struct net_device *dev, + struct dsa_notifier_info *info); +#else +static inline int register_dsa_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int unregister_dsa_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev, + struct dsa_notifier_info *info) +{ + return NOTIFY_DONE; +} +#endif + #endif -- cgit v1.2.3 From 0a5f14ce67a6e093e651d3cd75e6ac281123d93a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 Oct 2017 10:57:49 -0700 Subject: net: dsa: tag_brcm: Indicate to master netdevice port + queue We need to tell the DSA master network device doing the actual transmission what the desired switch port and queue number is for it to resolve that to the internal transmit queue it is mapped to. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 40a709a0754d..ce1d622734d7 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -516,4 +516,9 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev, } #endif +/* Broadcom tag specific helpers to insert and extract queue/port number */ +#define BRCM_TAG_SET_PORT_QUEUE(p, q) ((p) << 8 | q) +#define BRCM_TAG_GET_PORT(v) ((v) >> 8) +#define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff) + #endif -- cgit v1.2.3 From 8f04748016f3b583e675e0f649d42cfc10812a8b Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 11 Oct 2017 10:50:29 -0400 Subject: net sched actions: change IFE modules alias names Make style of module alias name consistent with other subsystems in kernel, for example net devices. Fixes: 084e2f6566d2 ("Support to encoding decoding skb mark on IFE action") Fixes: 200e10f46936 ("Support to encoding decoding skb prio on IFE action") Fixes: 408fbc22ef1e ("net sched ife action: Introduce skb tcindex metadata encap decap") Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 30ba459ddd34..104578f16062 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -40,7 +40,7 @@ struct tcf_meta_ops { struct module *owner; }; -#define MODULE_ALIAS_IFE_META(metan) MODULE_ALIAS("ifemeta" __stringify_1(metan)) +#define MODULE_ALIAS_IFE_META(metan) MODULE_ALIAS("ife-meta-" metan) int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi); -- cgit v1.2.3 From aa9fd9a325d51fa0b11153b03b8fefff569fa955 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 11 Oct 2017 17:16:08 -0400 Subject: sched: act: ife: update parameters via rcu handling This patch changes the parameter updating via RCU and not protected by a spinlock anymore. This reduce the time that the spinlock is being held. Signed-off-by: Alexander Aring Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 104578f16062..c7fb99c3f76c 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -6,12 +6,18 @@ #include #include -struct tcf_ife_info { - struct tc_action common; +struct tcf_ife_params { u8 eth_dst[ETH_ALEN]; u8 eth_src[ETH_ALEN]; u16 eth_type; u16 flags; + + struct rcu_head rcu; +}; + +struct tcf_ife_info { + struct tc_action common; + struct tcf_ife_params __rcu *params; /* list of metaids allowed */ struct list_head metalist; }; -- cgit v1.2.3 From 4e8b86c062695454df0b76f3fee4fab8dc4bb716 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Thu, 7 Sep 2017 04:00:06 -0700 Subject: mqprio: Introduce new hardware offload mode and shaper in mqprio The offload types currently supported in mqprio are 0 (no offload) and 1 (offload only TCs) by setting these values for the 'hw' option. If offloads are supported by setting the 'hw' option to 1, the default offload mode is 'dcb' where only the TC values are offloaded to the device. This patch introduces a new hardware offload mode called 'channel' with 'hw' set to 1 in mqprio which makes full use of the mqprio options, the TCs, the queue configurations and the QoS parameters for the TCs. This is achieved through a new netlink attribute for the 'mode' option which takes values such as 'dcb' (default) and 'channel'. The 'channel' mode also supports QoS attributes for traffic class such as minimum and maximum values for bandwidth rate limits. This patch enables configuring additional HW shaper attributes associated with a traffic class. Currently the shaper for bandwidth rate limiting is supported which takes options such as minimum and maximum bandwidth rates and are offloaded to the hardware in the 'channel' mode. The min and max limits for bandwidth rates are provided by the user along with the TCs and the queue configurations when creating the mqprio qdisc. The interface can be extended to support new HW shapers in future through the 'shaper' attribute. Introduces a new data structure 'tc_mqprio_qopt_offload' for offloading mqprio queue options and use this to be shared between the kernel and device driver. This contains a copy of the existing data structure for mqprio queue options. This new data structure can be extended when adding new attributes for traffic class such as mode, shaper, shaper parameters (bandwidth rate limits). The existing data structure for mqprio queue options will be shared between the kernel and userspace. Example: queues 4@0 4@4 hw 1 mode channel shaper bw_rlimit\ min_rate 1Gbit 2Gbit max_rate 4Gbit 5Gbit To dump the bandwidth rates: qdisc mqprio 804a: root tc 2 map 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 queues:(0:3) (4:7) mode:channel shaper:bw_rlimit min_rate:1Gbit 2Gbit max_rate:4Gbit 5Gbit Signed-off-by: Amritha Nambiar Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/net/pkt_cls.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f5263743076b..60d39789e4f0 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -546,6 +546,15 @@ struct tc_cls_bpf_offload { u32 gen_flags; }; +struct tc_mqprio_qopt_offload { + /* struct tc_mqprio_qopt must always be the first element */ + struct tc_mqprio_qopt qopt; + u16 mode; + u16 shaper; + u32 flags; + u64 min_rate[TC_QOPT_MAX_QUEUE]; + u64 max_rate[TC_QOPT_MAX_QUEUE]; +}; /* This structure holds cookie structure that is passed from user * to the kernel for actions and classifiers -- cgit v1.2.3 From 841f4f24053acad69240c6ab7427a1d24bc29491 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 13 Oct 2017 14:18:09 -0400 Subject: net: dsa: remove .set_addr Now that there is no user for the .set_addr function, remove it from DSA. If a switch supports this feature (like mv88e6xxx), the implementation can be done in the driver setup. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index ce1d622734d7..2746741f74cf 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -291,7 +291,6 @@ struct dsa_switch_ops { enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds); int (*setup)(struct dsa_switch *ds); - int (*set_addr)(struct dsa_switch *ds, u8 *addr); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); /* -- cgit v1.2.3 From 69d78ef25c7b0058674145500efb12255738ba8a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:00:57 +0200 Subject: net: sched: store Qdisc pointer in struct block Prepare for removal of tp->q and store Qdisc pointer in the block structure. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 ++-- include/net/sch_generic.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 60d39789e4f0..e6c9e1e4d711 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -22,7 +22,7 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create); void tcf_chain_put(struct tcf_chain *chain); int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain); + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q); void tcf_block_put(struct tcf_block *block); int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); @@ -30,7 +30,7 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, #else static inline int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain) + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q) { return 0; } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 684d8ed27eaa..df4032ca1b7f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -270,6 +270,7 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; + struct Qdisc *q; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) -- cgit v1.2.3 From 855319becbcffec6988a4e781a861b69a71c5b58 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:00:58 +0200 Subject: net: sched: store net pointer in block and introduce qdisc_net helper Store net pointer in the block structure. Along the way, introduce qdisc_net helper which allows to easily obtain net pointer for qdisc instance. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 7 +++++++ include/net/sch_generic.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 259bc191ba59..2d234af15f3e 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -4,7 +4,9 @@ #include #include #include +#include #include +#include #include #define DEFAULT_TX_QUEUE_LEN 1000 @@ -146,4 +148,9 @@ static inline bool is_classid_clsact_egress(u32 classid) TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS); } +static inline struct net *qdisc_net(struct Qdisc *q) +{ + return dev_net(q->dev_queue->dev); +} + #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index df4032ca1b7f..9b2cb91dc0d9 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -270,6 +270,7 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; + struct net *net; struct Qdisc *q; }; -- cgit v1.2.3 From 44186460c85a0121562db7cfef132d63c869118f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:00:59 +0200 Subject: net: sched: introduce tcf_block_q and tcf_block_dev helpers These helpers allows to get a q and netdev pointers for given block easily. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e6c9e1e4d711..7bed674ba29a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -24,6 +24,17 @@ void tcf_chain_put(struct tcf_chain *chain); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q); void tcf_block_put(struct tcf_block *block); + +static inline struct Qdisc *tcf_block_q(struct tcf_block *block) +{ + return block->q; +} + +static inline struct net_device *tcf_block_dev(struct tcf_block *block) +{ + return tcf_block_q(block)->dev_queue->dev; +} + int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); @@ -39,6 +50,16 @@ static inline void tcf_block_put(struct tcf_block *block) { } +static inline struct Qdisc *tcf_block_q(struct tcf_block *block) +{ + return NULL; +} + +static inline struct net_device *tcf_block_dev(struct tcf_block *block) +{ + return NULL; +} + static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { -- cgit v1.2.3 From 34e3759cf86a3e75463e34c1bb9691777406a175 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:01:00 +0200 Subject: net: sched: teach tcf_bind/unbind_filter to use block->q Whenever the block->q is set, it can be used instead of tp->q as it contains the same value. When it is not set, which can't happen now but it might happen with the follow-up shared blocks introduction, the class is not set in the result. That would lead to a class lookup instead of direct class pointer use for classful qdiscs. However, it is not planned to support classful qdisqs sharing filter blocks, so that may never happen. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 7bed674ba29a..49a143e0fe65 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -74,36 +74,43 @@ __cls_set_class(unsigned long *clp, unsigned long cl) } static inline unsigned long -cls_set_class(struct tcf_proto *tp, unsigned long *clp, - unsigned long cl) +cls_set_class(struct Qdisc *q, unsigned long *clp, unsigned long cl) { unsigned long old_cl; - - tcf_tree_lock(tp); + + sch_tree_lock(q); old_cl = __cls_set_class(clp, cl); - tcf_tree_unlock(tp); - + sch_tree_unlock(q); return old_cl; } static inline void tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base) { + struct Qdisc *q = tp->chain->block->q; unsigned long cl; - cl = tp->q->ops->cl_ops->bind_tcf(tp->q, base, r->classid); - cl = cls_set_class(tp, &r->class, cl); + /* Check q as it is not set for shared blocks. In that case, + * setting class is not supported. + */ + if (!q) + return; + cl = q->ops->cl_ops->bind_tcf(q, base, r->classid); + cl = cls_set_class(q, &r->class, cl); if (cl) - tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); + q->ops->cl_ops->unbind_tcf(q, cl); } static inline void tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) { + struct Qdisc *q = tp->chain->block->q; unsigned long cl; + if (!q) + return; if ((cl = __cls_set_class(&r->class, 0)) != 0) - tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); + q->ops->cl_ops->unbind_tcf(q, cl); } struct tcf_exts { -- cgit v1.2.3 From 74e3be6021d22df2ffcb691eae1affeb2bd0128e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:01:04 +0200 Subject: net: sched: use tcf_block_q helper to get q pointer for sch_tree_lock Use tcf_block_q helper to get q pointer to be used for direct call of sch_tree_lock/unlock instead of tcf_tree_lock/unlock. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9b2cb91dc0d9..0aea9e23e97a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -359,9 +359,6 @@ static inline void sch_tree_unlock(const struct Qdisc *q) spin_unlock_bh(qdisc_root_sleeping_lock(q)); } -#define tcf_tree_lock(tp) sch_tree_lock((tp)->q) -#define tcf_tree_unlock(tp) sch_tree_unlock((tp)->q) - extern struct Qdisc noop_qdisc; extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_fast_ops; -- cgit v1.2.3 From 0da4af00b2ed3dbe46788623a696c4169447eadc Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 13 Oct 2017 15:08:07 -0700 Subject: ipv6: only update __use and lastusetime once per jiffy at most In order to not dirty the cacheline too often, we try to only update dst->__use and dst->lastusetime at most once per jiffy. As dst->lastusetime is only used by ipv6 garbage collector, it should be good enough time resolution. And __use is only used in ipv6_route_seq_show() to show how many times a dst has been used. And as __use is not atomic_t right now, it does not show the precise number of usage times anyway. So we think it should be OK to only update it at most once per jiffy. According to my latest syn flood test on a machine with intel Xeon 6th gen processor and 2 10G mlx nics bonded together, each with 8 rx queues on 2 NUMA nodes: With this patch, the packet process rate increases from ~3.49Mpps to ~3.75Mpps with a 7% increase rate. Note: dst_use() is being renamed to dst_hold_and_use() to better specify the purpose of the function. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 204c19e25456..5047e8053d6c 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -255,17 +255,18 @@ static inline void dst_hold(struct dst_entry *dst) WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0); } -static inline void dst_use(struct dst_entry *dst, unsigned long time) +static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) { - dst_hold(dst); - dst->__use++; - dst->lastuse = time; + if (time != dst->lastuse) { + dst->__use++; + dst->lastuse = time; + } } -static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) +static inline void dst_hold_and_use(struct dst_entry *dst, unsigned long time) { - dst->__use++; - dst->lastuse = time; + dst_hold(dst); + dst_use_noref(dst, time); } static inline struct dst_entry *dst_clone(struct dst_entry *dst) -- cgit v1.2.3 From 0bfe649fbb1337400065fa47679b381b2ac845f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 16 Oct 2017 17:05:57 +0200 Subject: fq_impl: Properly enforce memory limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fq structure would fail to properly enforce the memory limit in the case where the packet being enqueued was bigger than the packet being removed to bring the memory usage down. So keep dropping packets until the memory usage is back below the limit. Also, fix the statistics for memory limit violations. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- include/net/fq_impl.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 4e6131cd3f43..ac1a2317941e 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -146,6 +146,7 @@ static void fq_tin_enqueue(struct fq *fq, fq_flow_get_default_t get_default_func) { struct fq_flow *flow; + bool oom; lockdep_assert_held(&fq->lock); @@ -167,8 +168,8 @@ static void fq_tin_enqueue(struct fq *fq, } __skb_queue_tail(&flow->queue, skb); - - if (fq->backlog > fq->limit || fq->memory_usage > fq->memory_limit) { + oom = (fq->memory_usage > fq->memory_limit); + while (fq->backlog > fq->limit || oom) { flow = list_first_entry_or_null(&fq->backlogs, struct fq_flow, backlogchain); @@ -183,8 +184,10 @@ static void fq_tin_enqueue(struct fq *fq, flow->tin->overlimit++; fq->overlimit++; - if (fq->memory_usage > fq->memory_limit) + if (oom) { fq->overmemory++; + oom = (fq->memory_usage > fq->memory_limit); + } } } -- cgit v1.2.3 From a68f4a27f55f1d54e35c270aff89383da4b1b656 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Oct 2017 11:36:39 +0100 Subject: rxrpc: Support service upgrade from a kernel service Provide support for a kernel service to make use of the service upgrade facility. This involves: (1) Pass an upgrade request flag to rxrpc_kernel_begin_call(). (2) Make rxrpc_kernel_recv_data() return the call's current service ID so that the caller can detect service upgrade and see what the service was upgraded to. Signed-off-by: David Howells --- include/net/af_rxrpc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 3ac79150291f..820dd365a08e 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -49,12 +49,13 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, unsigned long, s64, gfp_t, - rxrpc_notify_rx_t); + rxrpc_notify_rx_t, + bool); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t, rxrpc_notify_end_tx_t); int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, - void *, size_t, size_t *, bool, u32 *); + void *, size_t, size_t *, bool, u32 *, u16 *); bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32, int, const char *); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); -- cgit v1.2.3 From f4d15fb6f99af9b99f688bd87579137be44f85ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Oct 2017 11:07:31 +0100 Subject: rxrpc: Provide functions for allowing cleaner handling of signals Provide a couple of functions to allow cleaner handling of signals in a kernel service. They are: (1) rxrpc_kernel_get_rtt() This allows the kernel service to find out the RTT time for a call, so as to better judge how large a timeout to employ. Note, though, that whilst this returns a value in nanoseconds, the timeouts can only actually be in jiffies. (2) rxrpc_kernel_check_life() This returns a number that is updated when ACKs are received from the peer (notably including PING RESPONSE ACKs which we can elicit by sending PING ACKs to see if the call still exists on the server). The caller should compare the numbers of two calls to see if the call is still alive. These can be used to provide an extending timeout rather than returning immediately in the case that a signal occurs that would otherwise abort an RPC operation. The timeout would be extended if the server is still responsive and the call is still apparently alive on the server. For most operations this isn't that necessary - but for FS.StoreData it is: OpenAFS writes the data to storage as it comes in without making a backup, so if we immediately abort it when partially complete on a CTRL+C, say, we have no idea of the state of the file after the abort. Signed-off-by: David Howells --- include/net/af_rxrpc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 820dd365a08e..2b3a6eec4570 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -61,6 +61,7 @@ bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); +u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); @@ -68,5 +69,6 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *, struct key *); int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, enum rxrpc_call_completion *, u32 *); +u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *); #endif /* _NET_RXRPC_H */ -- cgit v1.2.3 From f8b8b1cd5aadd221742b45eb0ee3c8a80abf036a Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 16 Oct 2017 11:12:18 -0400 Subject: net: dsa: split dsa_port's netdev member The dsa_port structure has a "netdev" member, which can be used for either the master device, or the slave device, depending on its type. It is true that today, CPU port are not exposed to userspace, thus the port's netdev member can be used to point to its master interface. But it is still slightly confusing, so split it into more explicit "master" and "slave" members inside an anonymous union. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 2746741f74cf..6ed1a17ed1bd 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -164,6 +164,14 @@ struct dsa_mall_tc_entry { struct dsa_port { + /* A CPU port is physically connected to a master device. + * A user port exposed to userspace has a slave device. + */ + union { + struct net_device *master; + struct net_device *slave; + }; + /* CPU port tagging operations used by master or slave devices */ const struct dsa_device_ops *tag_ops; @@ -176,7 +184,6 @@ struct dsa_port { unsigned int index; const char *name; struct dsa_port *cpu_dp; - struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; u8 stp_state; -- cgit v1.2.3 From c8652c83bc84ac8db44060ced0036de7628aa5e5 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 16 Oct 2017 11:12:19 -0400 Subject: net: dsa: add dsa_to_port helper The dsa_port structure is part of DSA core data and must only be updated by the later. It is OK and sometimes necessary for the DSA drivers to access this data, but this has to be read only. For that purpose, add a dsa_to_port() helper which returns a const pointer to a dsa_port structure which must be used by DSA drivers from now on instead of digging into ds->ports[] themselves. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 6ed1a17ed1bd..38961ef91d3d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -269,6 +269,11 @@ static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p); } +static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) +{ + return &ds->ports[p]; +} + static inline u8 dsa_upstream_port(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; -- cgit v1.2.3 From eb4ddaf474285a4c6986f4a1c3205bdb0bed2da9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:45 -0700 Subject: net/decnet: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Johannes Berg Cc: David Ahern Cc: linux-decnet-user@lists.sourceforge.net Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- include/net/dn.h | 7 ------- include/net/dn_nsp.h | 1 - 2 files changed, 8 deletions(-) (limited to 'include/net') diff --git a/include/net/dn.h b/include/net/dn.h index 913b73d239f5..4394f7d5cfe8 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -122,13 +122,6 @@ struct dn_scp /* Session Control Port */ unsigned long keepalive; void (*keepalive_fxn)(struct sock *sk); - /* - * This stuff is for the fast timer for delayed acks - */ - struct timer_list delack_timer; - int delack_pending; - void (*delack_fxn)(struct sock *sk); - }; static inline struct dn_scp *DN_SK(struct sock *sk) diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h index 3a3e33d18456..413a15e5339c 100644 --- a/include/net/dn_nsp.h +++ b/include/net/dn_nsp.h @@ -17,7 +17,6 @@ void dn_nsp_send_data_ack(struct sock *sk); void dn_nsp_send_oth_ack(struct sock *sk); -void dn_nsp_delayed_ack(struct sock *sk); void dn_send_conn_ack(struct sock *sk); void dn_send_conn_conf(struct sock *sk, gfp_t gfp); void dn_nsp_send_disc(struct sock *sk, unsigned char type, -- cgit v1.2.3 From 59f379f9046a9e0532ffd19b44e3c32fe79ec51b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:19 -0700 Subject: inet/connection_sock: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Gerrit Renker Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: netdev@vger.kernel.org Cc: dccp@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 13e4c89a8231..0358745ea059 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -169,9 +169,9 @@ enum inet_csk_ack_state_t { }; void inet_csk_init_xmit_timers(struct sock *sk, - void (*retransmit_handler)(unsigned long), - void (*delack_handler)(unsigned long), - void (*keepalive_handler)(unsigned long)); + void (*retransmit_handler)(struct timer_list *), + void (*delack_handler)(struct timer_list *), + void (*keepalive_handler)(struct timer_list *)); void inet_csk_clear_xmit_timers(struct sock *sk); static inline void inet_csk_schedule_ack(struct sock *sk) -- cgit v1.2.3 From 78802011fbe34331bdef6f2dfb1634011f0e4c32 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:20 -0700 Subject: inet: frags: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Alexander Aring Cc: Stefan Schmidt Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Cc: linux-wpan@vger.kernel.org Cc: netdev@vger.kernel.org Cc: netfilter-devel@vger.kernel.org Cc: coreteam@netfilter.org Signed-off-by: Kees Cook Acked-by: Stefan Schmidt # for ieee802154 Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index fc59e0775e00..c695807ca707 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -95,7 +95,7 @@ struct inet_frags { void (*constructor)(struct inet_frag_queue *q, const void *arg); void (*destructor)(struct inet_frag_queue *); - void (*frag_expire)(unsigned long data); + void (*frag_expire)(struct timer_list *t); struct kmem_cache *frags_cachep; const char *frags_cache_name; }; -- cgit v1.2.3 From 34f79502bbcfab659b8729da68b5e387f96eb4c1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:36 -0700 Subject: bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region SK_SKB BPF programs are run from the socket/tcp context but early in the stack before much of the TCP metadata is needed in tcp_skb_cb. So we can use some unused fields to place BPF metadata needed for SK_SKB programs when implementing the redirect function. This allows us to drop the preempt disable logic. It does however require an API change so sk_redirect_map() has been updated to additionally provide ctx_ptr to skb. Note, we do however continue to disable/enable preemption around actual BPF program running to account for map updates. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/tcp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 89974c5286d8..b1ef98ebce53 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -840,6 +840,11 @@ struct tcp_skb_cb { struct inet6_skb_parm h6; #endif } header; /* For incoming skbs */ + struct { + __u32 key; + __u32 flags; + struct bpf_map *map; + } bpf; }; }; -- cgit v1.2.3 From de95e04791a03de5cb681980a3880db6919e3b4a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 18 Oct 2017 09:56:54 -0700 Subject: net: Add extack to validator_info structs used for address notifier Add extack to in_validator_info and in6_validator_info. Update the one user of each, ipvlan, to return an error message for failures. Only manual configuration of an address is plumbed in the IPv6 code path. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 87981cd63180..b8b16437c6d5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -55,6 +55,7 @@ struct prefix_info { struct in6_validator_info { struct in6_addr i6vi_addr; struct inet6_dev *i6vi_dev; + struct netlink_ext_ack *extack; }; #define IN6_ADDR_HSIZE_SHIFT 4 -- cgit v1.2.3 From 1fba70e5b6bed53496ba1f1f16127f5be01b5fb6 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Oct 2017 11:22:51 -0700 Subject: tcp: socket option to set TCP fast open key New socket option TCP_FASTOPEN_KEY to allow different keys per listener. The listener by default uses the global key until the socket option is set. The key is a 16 bytes long binary data. This option has no effect on regular non-listener TCP sockets. Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Reviewed-by: Christoph Paasch Signed-off-by: David S. Miller --- include/net/request_sock.h | 2 ++ include/net/tcp.h | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 23e22054aa60..347015515a7d 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -150,6 +150,8 @@ struct fastopen_queue { spinlock_t lock; int qlen; /* # of pending (TCP_SYN_RECV) reqs */ int max_qlen; /* != 0 iff TFO is currently enabled */ + + struct tcp_fastopen_context __rcu *ctx; /* cipher context for cookie */ }; /** struct request_sock_queue - queue of request_socks diff --git a/include/net/tcp.h b/include/net/tcp.h index 3b3b9b968e2d..1efe8365cb28 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1555,9 +1555,10 @@ struct tcp_fastopen_request { int copied; /* queued in tcp_connect() */ }; void tcp_free_fastopen_req(struct tcp_sock *tp); - +void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_ctx_destroy(struct net *net); -int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len); +int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, + void *key, unsigned int len); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, -- cgit v1.2.3 From c92e8c02fe664155ac4234516e32544bec0f113d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2017 09:04:13 -0700 Subject: tcp/dccp: fix ireq->opt races syzkaller found another bug in DCCP/TCP stacks [1] For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix ireq->pktopts race"), we need to make sure we do not access ireq->opt unless we own the request sock. Note the opt field is renamed to ireq_opt to ease grep games. [1] BUG: KASAN: use-after-free in ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474 Read of size 1 at addr ffff8801c951039c by task syz-executor5/3295 CPU: 1 PID: 3295 Comm: syz-executor5 Not tainted 4.14.0-rc4+ #80 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x25b/0x340 mm/kasan/report.c:409 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:427 ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474 tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1135 tcp_send_ack.part.37+0x3bb/0x650 net/ipv4/tcp_output.c:3587 tcp_send_ack+0x49/0x60 net/ipv4/tcp_output.c:3557 __tcp_ack_snd_check+0x2c6/0x4b0 net/ipv4/tcp_input.c:5072 tcp_ack_snd_check net/ipv4/tcp_input.c:5085 [inline] tcp_rcv_state_process+0x2eff/0x4850 net/ipv4/tcp_input.c:6071 tcp_child_process+0x342/0x990 net/ipv4/tcp_minisocks.c:816 tcp_v4_rcv+0x1827/0x2f80 net/ipv4/tcp_ipv4.c:1682 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:249 [inline] ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:464 [inline] ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 NF_HOOK include/linux/netfilter.h:249 [inline] ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 netif_receive_skb+0xae/0x390 net/core/dev.c:4611 tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 call_write_iter include/linux/fs.h:1770 [inline] new_sync_write fs/read_write.c:468 [inline] __vfs_write+0x68a/0x970 fs/read_write.c:481 vfs_write+0x18f/0x510 fs/read_write.c:543 SYSC_write fs/read_write.c:588 [inline] SyS_write+0xef/0x220 fs/read_write.c:580 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x40c341 RSP: 002b:00007f469523ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 000000000040c341 RDX: 0000000000000037 RSI: 0000000020004000 RDI: 0000000000000015 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 R10: 00000000000f4240 R11: 0000000000000293 R12: 00000000004b7fd1 R13: 00000000ffffffff R14: 0000000020000000 R15: 0000000000025000 Allocated by task 3295: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 __do_kmalloc mm/slab.c:3725 [inline] __kmalloc+0x162/0x760 mm/slab.c:3734 kmalloc include/linux/slab.h:498 [inline] tcp_v4_save_options include/net/tcp.h:1962 [inline] tcp_v4_init_req+0x2d3/0x3e0 net/ipv4/tcp_ipv4.c:1271 tcp_conn_request+0xf6d/0x3410 net/ipv4/tcp_input.c:6283 tcp_v4_conn_request+0x157/0x210 net/ipv4/tcp_ipv4.c:1313 tcp_rcv_state_process+0x8ea/0x4850 net/ipv4/tcp_input.c:5857 tcp_v4_do_rcv+0x55c/0x7d0 net/ipv4/tcp_ipv4.c:1482 tcp_v4_rcv+0x2d10/0x2f80 net/ipv4/tcp_ipv4.c:1711 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:249 [inline] ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:464 [inline] ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 NF_HOOK include/linux/netfilter.h:249 [inline] ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 netif_receive_skb+0xae/0x390 net/core/dev.c:4611 tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 call_write_iter include/linux/fs.h:1770 [inline] new_sync_write fs/read_write.c:468 [inline] __vfs_write+0x68a/0x970 fs/read_write.c:481 vfs_write+0x18f/0x510 fs/read_write.c:543 SYSC_write fs/read_write.c:588 [inline] SyS_write+0xef/0x220 fs/read_write.c:580 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 3306: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3503 [inline] kfree+0xca/0x250 mm/slab.c:3820 inet_sock_destruct+0x59d/0x950 net/ipv4/af_inet.c:157 __sk_destruct+0xfd/0x910 net/core/sock.c:1560 sk_destruct+0x47/0x80 net/core/sock.c:1595 __sk_free+0x57/0x230 net/core/sock.c:1603 sk_free+0x2a/0x40 net/core/sock.c:1614 sock_put include/net/sock.h:1652 [inline] inet_csk_complete_hashdance+0xd5/0xf0 net/ipv4/inet_connection_sock.c:959 tcp_check_req+0xf4d/0x1620 net/ipv4/tcp_minisocks.c:765 tcp_v4_rcv+0x17f6/0x2f80 net/ipv4/tcp_ipv4.c:1675 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:249 [inline] ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:464 [inline] ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 NF_HOOK include/linux/netfilter.h:249 [inline] ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 netif_receive_skb+0xae/0x390 net/core/dev.c:4611 tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 call_write_iter include/linux/fs.h:1770 [inline] new_sync_write fs/read_write.c:468 [inline] __vfs_write+0x68a/0x970 fs/read_write.c:481 vfs_write+0x18f/0x510 fs/read_write.c:543 SYSC_write fs/read_write.c:588 [inline] SyS_write+0xef/0x220 fs/read_write.c:580 entry_SYSCALL_64_fastpath+0x1f/0xbe Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index aa95053dfc78..425752f768d2 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -96,7 +96,7 @@ struct inet_request_sock { kmemcheck_bitfield_end(flags); u32 ir_mark; union { - struct ip_options_rcu *opt; + struct ip_options_rcu __rcu *ireq_opt; #if IS_ENABLED(CONFIG_IPV6) struct { struct ipv6_txoptions *ipv6_opt; -- cgit v1.2.3 From 8c4083b30e56fc71b0e94c26374b32d95d5ea461 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:29 +0200 Subject: net: sched: add block bind/unbind notif. and extended block_get/put Introduce new type of ndo_setup_tc message to propage binding/unbinding of a block to driver. Call this ndo whenever qdisc gets/puts a block. Alongside with this, there's need to propagate binder type from qdisc code down to the notifier. So introduce extended variants of block_get/put in order to pass this info. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 49a143e0fe65..41bc7d774047 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -17,13 +17,27 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +enum tcf_block_binder_type { + TCF_BLOCK_BINDER_TYPE_UNSPEC, +}; + +struct tcf_block_ext_info { + enum tcf_block_binder_type binder_type; +}; + #ifdef CONFIG_NET_CLS struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create); void tcf_chain_put(struct tcf_chain *chain); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q); +int tcf_block_get_ext(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct tcf_block_ext_info *ei); void tcf_block_put(struct tcf_block *block); +void tcf_block_put_ext(struct tcf_block *block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct tcf_block_ext_info *ei); static inline struct Qdisc *tcf_block_q(struct tcf_block *block) { @@ -46,10 +60,25 @@ int tcf_block_get(struct tcf_block **p_block, return 0; } +static inline +int tcf_block_get_ext(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ + return 0; +} + static inline void tcf_block_put(struct tcf_block *block) { } +static inline +void tcf_block_put_ext(struct tcf_block *block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ +} + static inline struct Qdisc *tcf_block_q(struct tcf_block *block) { return NULL; @@ -434,6 +463,17 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) int tc_setup_cb_call(struct tcf_exts *exts, enum tc_setup_type type, void *type_data, bool err_stop); +enum tc_block_command { + TC_BLOCK_BIND, + TC_BLOCK_UNBIND, +}; + +struct tc_block_offload { + enum tc_block_command command; + enum tcf_block_binder_type binder_type; + struct tcf_block *block; +}; + struct tc_cls_common_offload { u32 chain_index; __be16 protocol; -- cgit v1.2.3 From 6e40cf2d4dee9dc22ff398041ce876bef8172dea Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:30 +0200 Subject: net: sched: use extended variants of block_get/put in ingress and clsact qdiscs Use previously introduced extended variants of block get and put functions. This allows to specify a binder types specific to clsact ingress/egress which is useful for drivers to distinguish who actually got the block. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 41bc7d774047..5c50af8f7183 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -19,6 +19,8 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); enum tcf_block_binder_type { TCF_BLOCK_BINDER_TYPE_UNSPEC, + TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS, + TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS, }; struct tcf_block_ext_info { -- cgit v1.2.3 From acb674428c3d57bccbe3f4a1a7a009f6d73e9f41 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:31 +0200 Subject: net: sched: introduce per-block callbacks Introduce infrastructure that allows drivers to register callbacks that are called whenever tc would offload inserted rule for a specific block. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 81 +++++++++++++++++++++++++++++++++++++++++++++++ include/net/sch_generic.h | 1 + 2 files changed, 82 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 5c50af8f7183..4bc6b1cc245d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -27,6 +27,8 @@ struct tcf_block_ext_info { enum tcf_block_binder_type binder_type; }; +struct tcf_block_cb; + #ifdef CONFIG_NET_CLS struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create); @@ -51,6 +53,21 @@ static inline struct net_device *tcf_block_dev(struct tcf_block *block) return tcf_block_q(block)->dev_queue->dev; } +void *tcf_block_cb_priv(struct tcf_block_cb *block_cb); +struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident); +void tcf_block_cb_incref(struct tcf_block_cb *block_cb); +unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb); +struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv); +int tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv); +void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb); +void tcf_block_cb_unregister(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident); + int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); @@ -91,6 +108,70 @@ static inline struct net_device *tcf_block_dev(struct tcf_block *block) return NULL; } +static inline +int tc_setup_cb_block_register(struct tcf_block *block, tc_setup_cb_t *cb, + void *cb_priv) +{ + return 0; +} + +static inline +void tc_setup_cb_block_unregister(struct tcf_block *block, tc_setup_cb_t *cb, + void *cb_priv) +{ +} + +static inline +void *tcf_block_cb_priv(struct tcf_block_cb *block_cb) +{ + return NULL; +} + +static inline +struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident) +{ + return NULL; +} + +static inline +void tcf_block_cb_incref(struct tcf_block_cb *block_cb) +{ +} + +static inline +unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb) +{ + return 0; +} + +static inline +struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv) +{ + return NULL; +} + +static inline +int tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv) +{ + return 0; +} + +static inline +void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb) +{ +} + +static inline +void tcf_block_cb_unregister(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident) +{ +} + static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0aea9e23e97a..031dffd5836c 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -272,6 +272,7 @@ struct tcf_block { struct list_head chain_list; struct net *net; struct Qdisc *q; + struct list_head cb_list; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) -- cgit v1.2.3 From 208c0f4b5237f1d6611b2c679a8022d6901577d6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:32 +0200 Subject: net: sched: use tc_setup_cb_call to call per-block callbacks Extend the tc_setup_cb_call entrypoint function originally used only for action egress devices callbacks to call per-block callbacks as well. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4bc6b1cc245d..fcca5a9d9880 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -543,8 +543,8 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ -int tc_setup_cb_call(struct tcf_exts *exts, enum tc_setup_type type, - void *type_data, bool err_stop); +int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, + enum tc_setup_type type, void *type_data, bool err_stop); enum tc_block_command { TC_BLOCK_BIND, -- cgit v1.2.3 From d58d31a118690b578897749feda48416ac10ca43 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:47 +0200 Subject: net: sched: remove unused classid field from tc_cls_common_offload It is no longer used by the drivers, so remove it. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index fcca5a9d9880..04caa246e747 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -561,7 +561,6 @@ struct tc_cls_common_offload { u32 chain_index; __be16 protocol; u32 prio; - u32 classid; }; static inline void @@ -571,7 +570,6 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; cls_common->prio = tp->prio; - cls_common->classid = tp->classid; } struct tc_cls_u32_knode { -- cgit v1.2.3 From fa71212e91811ac67014ad19d4fc3b3c3446ccf7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:48 +0200 Subject: net: sched: remove unused is_classid_clsact_ingress/egress helpers These helpers are no longer in use by drivers, so remove them. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2d234af15f3e..b8ecafce4ba1 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -135,19 +135,6 @@ static inline unsigned int psched_mtu(const struct net_device *dev) return dev->mtu + dev->hard_header_len; } -static inline bool is_classid_clsact_ingress(u32 classid) -{ - /* This also returns true for ingress qdisc */ - return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) && - TC_H_MIN(classid) != TC_H_MIN(TC_H_MIN_EGRESS); -} - -static inline bool is_classid_clsact_egress(u32 classid) -{ - return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) && - TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS); -} - static inline struct net *qdisc_net(struct Qdisc *q) { return dev_net(q->dev_queue->dev); -- cgit v1.2.3 From 3f27fb23219e75343b094366f2358bff34300493 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Oct 2017 16:17:47 -0700 Subject: ipv6: addrconf: add per netns perturbation in inet6_addr_hash() Bring IPv6 in par with IPv4 : - Use net_hash_mix() to spread addresses a bit more. - Use 256 slots hash table instead of 16 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index b8b16437c6d5..15b5ffd7253d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -58,7 +58,7 @@ struct in6_validator_info { struct netlink_ext_ack *extack; }; -#define IN6_ADDR_HSIZE_SHIFT 4 +#define IN6_ADDR_HSIZE_SHIFT 8 #define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) int addrconf_init(void); -- cgit v1.2.3 From b6f4f8484d88b69f700907200a9a9ec73806355f Mon Sep 17 00:00:00 2001 From: Tim Hansen Date: Mon, 23 Oct 2017 15:35:58 -0400 Subject: net/sock: Update sk rcu iterator macro. Mark hlist node in sk rcu iterator as protected by the rcu. hlist_next_rcu accomplishes this and silences the warnings sparse throws. Found with make C=1 net/ipv4/udp.o on linux-next tag next-20171009. Signed-off-by: Tim Hansen Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 4827094f1db4..6f1be9726e02 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -737,10 +737,10 @@ static inline void sk_add_bind_node(struct sock *sk, * */ #define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \ - for (pos = rcu_dereference((head)->first); \ + for (pos = rcu_dereference(hlist_first_rcu(head)); \ pos != NULL && \ ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ - pos = rcu_dereference(pos->next)) + pos = rcu_dereference(hlist_next_rcu(pos))) static inline struct user_namespace *sk_user_ns(struct sock *sk) { -- cgit v1.2.3 From 71c02379c762cb616c00fd5c4ed253fbf6bbe11b Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 23 Oct 2017 13:22:23 -0700 Subject: tcp: Configure TFO without cookie per socket and/or per route We already allow to enable TFO without a cookie by using the fastopen-sysctl and setting it to TFO_SERVER_COOKIE_NOT_REQD (or TFO_CLIENT_NO_COOKIE). This is safe to do in certain environments where we know that there isn't a malicous host (aka., data-centers) or when the application-protocol already provides an authentication mechanism in the first flight of data. A server however might be providing multiple services or talking to both sides (public Internet and data-center). So, this server would want to enable cookie-less TFO for certain services and/or for connections that go to the data-center. This patch exposes a socket-option and a per-route attribute to enable such fine-grained configurations. Signed-off-by: Christoph Paasch Reviewed-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/net/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 2c13484704cb..2392f74074e7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1567,7 +1567,8 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct tcp_fastopen_cookie *foc); + struct tcp_fastopen_cookie *foc, + const struct dst_entry *dst); void tcp_fastopen_init_key_once(struct net *net); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); -- cgit v1.2.3 From c4f3db15958277c03d1c324894255ea3ecbf86e1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2017 10:47:40 +0200 Subject: netfilter: conntrack: add and use nf_l4proto_log_invalid We currently pass down the l4 protocol to the conntrack ->packet() function, but the only user of this is the debug info decision. Same information can be derived from struct nf_conn. As a first step, add and use a new log function for this, similar to nf_ct_helper_log(). Add __cold annotation -- invalid packets should be infrequent so gcc can consider all call paths that lead to such a function as unlikely. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 738a0307a96b..6d79a061d360 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -152,8 +152,18 @@ extern const struct nla_policy nf_ct_port_nla_policy[]; #define LOG_INVALID(net, proto) \ ((net)->ct.sysctl_log_invalid == (proto) || \ (net)->ct.sysctl_log_invalid == IPPROTO_RAW) + +__printf(5, 6) __cold +void nf_l4proto_log_invalid(const struct sk_buff *skb, + struct net *net, + u16 pf, u8 protonum, + const char *fmt, ...); #else static inline int LOG_INVALID(struct net *net, int proto) { return 0; } + +static inline __printf(5, 6) __cold +void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, + u16 pf, u8 protonum, const char *fmt, ...) {} #endif /* CONFIG_SYSCTL */ #endif /*_NF_CONNTRACK_PROTOCOL_H*/ -- cgit v1.2.3 From 3d0b527bc9dc0e8c4428eb1a98d4cd27bd1114c7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2017 10:47:41 +0200 Subject: netfilter: conntrack: add and use nf_ct_l4proto_log_invalid We currently pass down the l4 protocol to the conntrack ->packet() function, but the only user of this is the debug info decision. Same information can be derived from struct nf_conn. Add a wrapper for the previous patch that extracs the information from nf_conn and passes it to nf_l4proto_log_invalid(). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 6d79a061d360..5d51255b5bfb 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -149,21 +149,23 @@ int nf_ct_port_nlattr_tuple_size(void); extern const struct nla_policy nf_ct_port_nla_policy[]; #ifdef CONFIG_SYSCTL -#define LOG_INVALID(net, proto) \ - ((net)->ct.sysctl_log_invalid == (proto) || \ - (net)->ct.sysctl_log_invalid == IPPROTO_RAW) - +__printf(3, 4) __cold +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const char *fmt, ...); __printf(5, 6) __cold void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, u16 pf, u8 protonum, const char *fmt, ...); #else -static inline int LOG_INVALID(struct net *net, int proto) { return 0; } - static inline __printf(5, 6) __cold void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, u16 pf, u8 protonum, const char *fmt, ...) {} +static inline __printf(3, 4) __cold +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const char *fmt, ...) { } #endif /* CONFIG_SYSCTL */ #endif /*_NF_CONNTRACK_PROTOCOL_H*/ -- cgit v1.2.3 From eb6fad5a4a328b85d3faa8b301b522e3f316b49d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2017 10:47:42 +0200 Subject: netfilter: conntrack: remove pf argument from l4 packet functions not needed/used anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 5d51255b5bfb..e06518874144 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -42,7 +42,6 @@ struct nf_conntrack_l4proto { const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts); /* Called when a new connection for this protocol found; -- cgit v1.2.3 From 28efb0046512e8a13ed9f9bdf0d68d10bbfbe9cf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 Oct 2017 09:38:30 +0200 Subject: netfilter: conntrack: make l3proto trackers const previous patches removed all writes to them. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 2 +- include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 919e4e8af327..5534ecca7a5d 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -10,7 +10,7 @@ #define _NF_CONNTRACK_IPV4_H -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; +const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index eaea968f8657..30dc57980866 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -1,7 +1,7 @@ #ifndef _NF_CONNTRACK_IPV6_H #define _NF_CONNTRACK_IPV6_H -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; +extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; -- cgit v1.2.3 From 829385f08ae99740276cbd46c9db29764c519211 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 20 Oct 2017 16:40:43 -0700 Subject: strparser: Use delayed work instead of timer for msg timeout Sock lock may be taken in the message timer function which is a problem since timers run in BH. Instead of timers use delayed_work. Reported-by: Eric Dumazet Fixes: bbb03029a899 ("strparser: Generalize strparser") Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/strparser.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/strparser.h b/include/net/strparser.h index 7dc131d62ad5..d96b59f45eba 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -74,10 +74,9 @@ struct strparser { u32 unrecov_intr : 1; struct sk_buff **skb_nextp; - struct timer_list msg_timer; struct sk_buff *skb_head; unsigned int need_bytes; - struct delayed_work delayed_work; + struct delayed_work msg_timer_work; struct work_struct work; struct strp_stats stats; struct strp_callbacks cb; -- cgit v1.2.3 From ef5201c83d1400570a3b6f004ad7a23d71934411 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 24 Oct 2017 13:54:20 +0800 Subject: bonding: remove rtmsg_ifinfo called after bond_lower_state_changed After the patch 'rtnetlink: bring NETDEV_CHANGELOWERSTATE event process back to rtnetlink_event', bond_lower_state_changed would generate NETDEV_CHANGEUPPER event which would send a notification to userspace in rtnetlink_event. There's no need to call rtmsg_ifinfo to send the notification any more. So this patch is to remove it from these places after bond_lower_state_changed. Besides, after this, rtmsg_ifinfo is not needed to be exported. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/bonding.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/bonding.h b/include/net/bonding.h index 2860cc66c2bb..f801fc940b29 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -330,7 +330,6 @@ static inline void bond_set_active_slave(struct slave *slave) slave->backup = 0; bond_queue_slave_event(slave); bond_lower_state_changed(slave); - rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -340,7 +339,6 @@ static inline void bond_set_backup_slave(struct slave *slave) slave->backup = 1; bond_queue_slave_event(slave); bond_lower_state_changed(slave); - rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -353,7 +351,6 @@ static inline void bond_set_slave_state(struct slave *slave, slave->backup = slave_state; if (notify) { bond_lower_state_changed(slave); - rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); bond_queue_slave_event(slave); slave->should_notify = 0; } else { @@ -385,7 +382,6 @@ static inline void bond_slave_state_notify(struct bonding *bond) bond_for_each_slave(bond, tmp, iter) { if (tmp->should_notify) { bond_lower_state_changed(tmp); - rtmsg_ifinfo(RTM_NEWLINK, tmp->dev, 0, GFP_ATOMIC); tmp->should_notify = 0; } } -- cgit v1.2.3 From 9c3b57518363577d4e2ea1964ef4fa03e100beaa Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 01:45:31 -0700 Subject: net: sctp: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Vlad Yasevich Cc: Neil Horman Cc: "David S. Miller" Cc: linux-sctp@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 2db3d3a9ce1d..13cc4963e905 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -72,7 +72,7 @@ typedef enum sctp_disposition (sctp_state_fn_t) ( const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); -typedef void (sctp_timer_event_t) (unsigned long); +typedef void (sctp_timer_event_t) (struct timer_list *); struct sctp_sm_table_entry { sctp_state_fn_t *fn; const char *name; @@ -314,10 +314,10 @@ int sctp_do_sm(struct net *net, enum sctp_event event_type, void *event_arg, gfp_t gfp); /* 2nd level prototypes */ -void sctp_generate_t3_rtx_event(unsigned long peer); -void sctp_generate_heartbeat_event(unsigned long peer); -void sctp_generate_reconf_event(unsigned long peer); -void sctp_generate_proto_unreach_event(unsigned long peer); +void sctp_generate_t3_rtx_event(struct timer_list *t); +void sctp_generate_heartbeat_event(struct timer_list *t); +void sctp_generate_reconf_event(struct timer_list *t); +void sctp_generate_proto_unreach_event(struct timer_list *t); void sctp_ootb_pkt_free(struct sctp_packet *packet); -- cgit v1.2.3 From fc8bcaa05160528d56432e4612f522e3ceafc513 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 01:45:48 -0700 Subject: net: LLC: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Eric Dumazet Cc: Hans Liljestrand Cc: "Paul E. McKenney" Cc: "Reshetova, Elena" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- include/net/llc_c_ac.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/llc_c_ac.h b/include/net/llc_c_ac.h index f3be818e73c1..e766300b3e99 100644 --- a/include/net/llc_c_ac.h +++ b/include/net/llc_c_ac.h @@ -171,10 +171,10 @@ int llc_conn_ac_rst_sendack_flag(struct sock *sk, struct sk_buff *skb); int llc_conn_ac_send_i_rsp_as_ack(struct sock *sk, struct sk_buff *skb); int llc_conn_ac_send_i_as_ack(struct sock *sk, struct sk_buff *skb); -void llc_conn_busy_tmr_cb(unsigned long timeout_data); -void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data); -void llc_conn_ack_tmr_cb(unsigned long timeout_data); -void llc_conn_rej_tmr_cb(unsigned long timeout_data); +void llc_conn_busy_tmr_cb(struct timer_list *t); +void llc_conn_pf_cycle_tmr_cb(struct timer_list *t); +void llc_conn_ack_tmr_cb(struct timer_list *t); +void llc_conn_rej_tmr_cb(struct timer_list *t); void llc_conn_set_p_flag(struct sock *sk, u8 value); #endif /* LLC_C_AC_H */ -- cgit v1.2.3 From 14cd5d4a0125f643350e7fa12f5384f1fc2d3e9d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Oct 2017 14:07:17 -0700 Subject: locking/atomics, net/netlink/netfilter: Convert ACCESS_ONCE() to READ_ONCE()/WRITE_ONCE() For several reasons, it is desirable to use {READ,WRITE}_ONCE() in preference to ACCESS_ONCE(), and new code is expected to use one of the former. So far, there's been no reason to change most existing uses of ACCESS_ONCE(), as these aren't currently harmful. However, for some features it is necessary to instrument reads and writes separately, which is not possible with ACCESS_ONCE(). This distinction is critical to correct operation. It's possible to transform the bulk of kernel code using the Coccinelle script below. However, this doesn't handle comments, leaving references to ACCESS_ONCE() instances which have been removed. As a preparatory step, this patch converts netlink and netfilter code and comments to use {READ,WRITE}_ONCE() consistently. ---- virtual patch @ depends on patch @ expression E1, E2; @@ - ACCESS_ONCE(E1) = E2 + WRITE_ONCE(E1, E2) @ depends on patch @ expression E; @@ - ACCESS_ONCE(E) + READ_ONCE(E) ---- Signed-off-by: Mark Rutland Signed-off-by: Paul E. McKenney Cc: David S. Miller Cc: Florian Westphal Cc: Jozsef Kadlecsik Cc: Linus Torvalds Cc: Pablo Neira Ayuso Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: mpe@ellerman.id.au Cc: shuah@kernel.org Cc: snitzer@redhat.com Cc: thor.thayer@linux.intel.com Cc: tj@kernel.org Cc: viro@zeniv.linux.org.uk Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1508792849-3115-7-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/net/netfilter/nf_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0f5b12a4ad09..5c68e279eaea 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1164,8 +1164,8 @@ static inline u8 nft_genmask_next(const struct net *net) static inline u8 nft_genmask_cur(const struct net *net) { - /* Use ACCESS_ONCE() to prevent refetching the value for atomicity */ - return 1 << ACCESS_ONCE(net->nft.gencursor); + /* Use READ_ONCE() to prevent refetching the value for atomicity */ + return 1 << READ_ONCE(net->nft.gencursor); } #define NFT_GENMASK_ANY ((1 << 0) | (1 << 1)) -- cgit v1.2.3 From 6aa7de059173a986114ac43b8f50b297a86f09a8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Oct 2017 14:07:29 -0700 Subject: locking/atomics: COCCINELLE/treewide: Convert trivial ACCESS_ONCE() patterns to READ_ONCE()/WRITE_ONCE() Please do not apply this to mainline directly, instead please re-run the coccinelle script shown below and apply its output. For several reasons, it is desirable to use {READ,WRITE}_ONCE() in preference to ACCESS_ONCE(), and new code is expected to use one of the former. So far, there's been no reason to change most existing uses of ACCESS_ONCE(), as these aren't harmful, and changing them results in churn. However, for some features, the read/write distinction is critical to correct operation. To distinguish these cases, separate read/write accessors must be used. This patch migrates (most) remaining ACCESS_ONCE() instances to {READ,WRITE}_ONCE(), using the following coccinelle script: ---- // Convert trivial ACCESS_ONCE() uses to equivalent READ_ONCE() and // WRITE_ONCE() // $ make coccicheck COCCI=/home/mark/once.cocci SPFLAGS="--include-headers" MODE=patch virtual patch @ depends on patch @ expression E1, E2; @@ - ACCESS_ONCE(E1) = E2 + WRITE_ONCE(E1, E2) @ depends on patch @ expression E; @@ - ACCESS_ONCE(E) + READ_ONCE(E) ---- Signed-off-by: Mark Rutland Signed-off-by: Paul E. McKenney Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: davem@davemloft.net Cc: linux-arch@vger.kernel.org Cc: mpe@ellerman.id.au Cc: shuah@kernel.org Cc: snitzer@redhat.com Cc: thor.thayer@linux.intel.com Cc: tj@kernel.org Cc: viro@zeniv.linux.org.uk Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1508792849-3115-19-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/net/ip_vs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4f4f786255ef..3fadb6f9982b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -983,12 +983,12 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) static inline int sysctl_sync_period(struct netns_ipvs *ipvs) { - return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]); + return READ_ONCE(ipvs->sysctl_sync_threshold[1]); } static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs) { - return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period); + return READ_ONCE(ipvs->sysctl_sync_refresh_period); } static inline int sysctl_sync_retries(struct netns_ipvs *ipvs) @@ -1013,7 +1013,7 @@ static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs) static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) { - return ACCESS_ONCE(ipvs->sysctl_sync_ports); + return READ_ONCE(ipvs->sysctl_sync_ports); } static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs) -- cgit v1.2.3 From 06f877d613be3621604c2520ec0351d9fbdca15f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Oct 2017 08:20:31 -0700 Subject: tcp/dccp: fix other lockdep splats accessing ireq_opt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In my first attempt to fix the lockdep splat, I forgot we could enter inet_csk_route_req() with a freshly allocated request socket, for which refcount has not yet been elevated, due to complex SLAB_TYPESAFE_BY_RCU rules. We either are in rcu_read_lock() section _or_ we own a refcount on the request. Correct RCU verb to use here is rcu_dereference_check(), although it is not possible to prove we actually own a reference on a shared refcount :/ In v2, I added ireq_opt_deref() helper and use in three places, to fix other possible splats. [ 49.844590] lockdep_rcu_suspicious+0xea/0xf3 [ 49.846487] inet_csk_route_req+0x53/0x14d [ 49.848334] tcp_v4_route_req+0xe/0x10 [ 49.850174] tcp_conn_request+0x31c/0x6a0 [ 49.851992] ? __lock_acquire+0x614/0x822 [ 49.854015] tcp_v4_conn_request+0x5a/0x79 [ 49.855957] ? tcp_v4_conn_request+0x5a/0x79 [ 49.858052] tcp_rcv_state_process+0x98/0xdcc [ 49.859990] ? sk_filter_trim_cap+0x2f6/0x307 [ 49.862085] tcp_v4_do_rcv+0xfc/0x145 [ 49.864055] ? tcp_v4_do_rcv+0xfc/0x145 [ 49.866173] tcp_v4_rcv+0x5ab/0xaf9 [ 49.868029] ip_local_deliver_finish+0x1af/0x2e7 [ 49.870064] ip_local_deliver+0x1b2/0x1c5 [ 49.871775] ? inet_del_offload+0x45/0x45 [ 49.873916] ip_rcv_finish+0x3f7/0x471 [ 49.875476] ip_rcv+0x3f1/0x42f [ 49.876991] ? ip_local_deliver_finish+0x2e7/0x2e7 [ 49.878791] __netif_receive_skb_core+0x6d3/0x950 [ 49.880701] ? process_backlog+0x7e/0x216 [ 49.882589] __netif_receive_skb+0x1d/0x5e [ 49.884122] process_backlog+0x10c/0x216 [ 49.885812] net_rx_action+0x147/0x3df Fixes: a6ca7abe53633 ("tcp/dccp: fix lockdep splat in inet_csk_route_req()") Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races") Signed-off-by: Eric Dumazet Reported-by: kernel test robot Reported-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/net/inet_sock.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 425752f768d2..db8162dd8c0b 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -132,6 +132,12 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, return sk->sk_bound_dev_if; } +static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) +{ + return rcu_dereference_check(ireq->ireq_opt, + refcount_read(&ireq->req.rsk_refcnt) > 0); +} + struct inet_cork { unsigned int flags; __be32 addr; -- cgit v1.2.3 From 60e2a7780793bae0debc275a9ccd57f7da0cf195 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 25 Oct 2017 11:01:45 +0200 Subject: tcp: TCP experimental option for SMC The SMC protocol [1] relies on the use of a new TCP experimental option [2, 3]. With this option, SMC capabilities are exchanged between peers during the TCP three way handshake. This patch adds support for this experimental option to TCP. References: [1] SMC-R Informational RFC: http://www.rfc-editor.org/info/rfc7609 [2] Shared Use of TCP Experimental Options RFC 6994: https://tools.ietf.org/rfc/rfc6994.txt [3] IANA ExID SMCR: http://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml#tcp-exids Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/net/inet_sock.h | 3 ++- include/net/tcp.h | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 425752f768d2..c49938d1481a 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -92,7 +92,8 @@ struct inet_request_sock { wscale_ok : 1, ecn_ok : 1, acked : 1, - no_srccheck: 1; + no_srccheck: 1, + smc_ok : 1; kmemcheck_bitfield_end(flags); u32 ir_mark; union { diff --git a/include/net/tcp.h b/include/net/tcp.h index 2392f74074e7..285bc82dea41 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -191,6 +191,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); * experimental options. See draft-ietf-tcpm-experimental-options-00.txt */ #define TCPOPT_FASTOPEN_MAGIC 0xF989 +#define TCPOPT_SMC_MAGIC 0xE2D4C3D9 /* * TCP option lengths @@ -203,6 +204,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_MD5SIG 18 #define TCPOLEN_FASTOPEN_BASE 2 #define TCPOLEN_EXP_FASTOPEN_BASE 4 +#define TCPOLEN_EXP_SMC_BASE 6 /* But this is what stacks really send out. */ #define TCPOLEN_TSTAMP_ALIGNED 12 @@ -213,6 +215,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_SACK_PERBLOCK 8 #define TCPOLEN_MD5SIG_ALIGNED 20 #define TCPOLEN_MSS_ALIGNED 4 +#define TCPOLEN_EXP_SMC_BASE_ALIGNED 8 /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ @@ -2108,4 +2111,8 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) { return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1); } + +#if IS_ENABLED(CONFIG_SMC) +extern struct static_key_false tcp_have_smc; +#endif #endif /* _TCP_H */ -- cgit v1.2.3 From 32d18ab1d44166cbb1dcaf8b359183636734ddb1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 24 Oct 2017 12:41:01 +0200 Subject: net: updating dst lastusage is an unlikely event. Since commit 0da4af00b2ed ("ipv6: only update __use and lastusetime once per jiffy at most"), updating the dst lastuse field is an unlikely action: it happens at most once per jiffy, out of potentially millions of calls per second. Mark explicitly the code as such, and let the compiler generate better code. Note: gcc 7.2 and several older versions do actually generate different - better - code when the unlikely() hint is in place, avoid jump in the fast path and keeping better code locality. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 5047e8053d6c..2f53ecc2c296 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -257,7 +257,7 @@ static inline void dst_hold(struct dst_entry *dst) static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) { - if (time != dst->lastuse) { + if (unlikely(time != dst->lastuse)) { dst->__use++; dst->lastuse = time; } -- cgit v1.2.3 From 2ae21cf527da0e5cf9d7ee14bd5b0909bb9d1a75 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:54:56 -0700 Subject: tcp: Namespace-ify sysctl_tcp_early_retrans Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2c4222a5d102..a7f39e3ea666 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -128,6 +128,7 @@ struct netns_ipv4 { int sysctl_tcp_sack; int sysctl_tcp_window_scaling; int sysctl_tcp_timestamps; + int sysctl_tcp_early_retrans; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 285bc82dea41..a12b71d4118b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -265,7 +265,6 @@ extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; -extern int sysctl_tcp_early_retrans; extern int sysctl_tcp_recovery; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -- cgit v1.2.3 From e20223f1962831d1b1c416d59d259879d0639d68 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:54:57 -0700 Subject: tcp: Namespace-ify sysctl_tcp_recovery Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a7f39e3ea666..d6ed718075d4 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -129,6 +129,7 @@ struct netns_ipv4 { int sysctl_tcp_window_scaling; int sysctl_tcp_timestamps; int sysctl_tcp_early_retrans; + int sysctl_tcp_recovery; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index a12b71d4118b..c7f51534fc44 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -265,7 +265,7 @@ extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; -extern int sysctl_tcp_recovery; + #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ extern int sysctl_tcp_limit_output_bytes; -- cgit v1.2.3 From 2c04ac8ae0b61e0780a30b7069a11bb202b21f26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:54:58 -0700 Subject: tcp: Namespace-ify sysctl_tcp_thin_linear_timeouts Note that sysctl_tcp_thin_dupack was not used, I deleted it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d6ed718075d4..2a9f37b39c45 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -130,6 +130,7 @@ struct netns_ipv4 { int sysctl_tcp_timestamps; int sysctl_tcp_early_retrans; int sysctl_tcp_recovery; + int sysctl_tcp_thin_linear_timeouts; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index c7f51534fc44..063a7a48b7fe 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -263,8 +263,6 @@ extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; -extern int sysctl_tcp_thin_linear_timeouts; -extern int sysctl_tcp_thin_dupack; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -- cgit v1.2.3 From b510f0d23a47c3d1f074fe583e7867dc4918fe02 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:54:59 -0700 Subject: tcp: Namespace-ify sysctl_tcp_slow_start_after_idle Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2a9f37b39c45..8662692686b3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -131,6 +131,7 @@ struct netns_ipv4 { int sysctl_tcp_early_retrans; int sysctl_tcp_recovery; int sysctl_tcp_thin_linear_timeouts; + int sysctl_tcp_slow_start_after_idle; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 063a7a48b7fe..cc2ab522eb5c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -262,7 +262,6 @@ extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_workaround_signed_windows; -extern int sysctl_tcp_slow_start_after_idle; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ @@ -1308,7 +1307,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sysctl_tcp_slow_start_after_idle || tp->packets_out || + if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; -- cgit v1.2.3 From e0a1e5b519236dc1662ff25e42560dd1be9e3776 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:00 -0700 Subject: tcp: Namespace-ify sysctl_tcp_retrans_collapse Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8662692686b3..b28c172b10e4 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -132,6 +132,7 @@ struct netns_ipv4 { int sysctl_tcp_recovery; int sysctl_tcp_thin_linear_timeouts; int sysctl_tcp_slow_start_after_idle; + int sysctl_tcp_retrans_collapse; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index cc2ab522eb5c..33cc86355b8f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; -- cgit v1.2.3 From 3f4c7c6f6a9053493ce7dd8a0f17ed8eafc53893 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:01 -0700 Subject: tcp: Namespace-ify sysctl_tcp_stdurg Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b28c172b10e4..ffa2cf3dc747 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -133,6 +133,7 @@ struct netns_ipv4 { int sysctl_tcp_thin_linear_timeouts; int sysctl_tcp_slow_start_after_idle; int sysctl_tcp_retrans_collapse; + int sysctl_tcp_stdurg; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 33cc86355b8f..cf3fac7008d7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; -- cgit v1.2.3 From 625357aa175c688d219da43c8cfaa2e1629e0e1a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:02 -0700 Subject: tcp: Namespace-ify sysctl_tcp_rfc1337 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ffa2cf3dc747..968edce38eb5 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -134,6 +134,7 @@ struct netns_ipv4 { int sysctl_tcp_slow_start_after_idle; int sysctl_tcp_retrans_collapse; int sysctl_tcp_stdurg; + int sysctl_tcp_rfc1337; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index cf3fac7008d7..2aea2b3373b3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; extern int sysctl_tcp_fack; -- cgit v1.2.3 From 65c9410cf55ecf32da1b720f563365d565d6289a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:03 -0700 Subject: tcp: Namespace-ify sysctl_tcp_abort_on_overflow Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 968edce38eb5..3875fdf6b186 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -135,6 +135,7 @@ struct netns_ipv4 { int sysctl_tcp_retrans_collapse; int sysctl_tcp_stdurg; int sysctl_tcp_rfc1337; + int sysctl_tcp_abort_on_overflow; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 2aea2b3373b3..7331281a2292 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; -- cgit v1.2.3 From 0bc65a28ae2aeb14aab7f4a930e0d8cf4cad9dc4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:04 -0700 Subject: tcp: Namespace-ify sysctl_tcp_fack Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3875fdf6b186..f0e792beeea9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -136,6 +136,7 @@ struct netns_ipv4 { int sysctl_tcp_stdurg; int sysctl_tcp_rfc1337; int sysctl_tcp_abort_on_overflow; + int sysctl_tcp_fack; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 7331281a2292..e7b15e9f6e28 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; -- cgit v1.2.3 From 773d4bb96ceca6829ae9928f6b002b93e2e62cdc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:05 -0700 Subject: tcp: remove stale sysctl_tcp_reordering This extern is no longer used. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index e7b15e9f6e28..fc134ba74c7d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_reordering; extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; -- cgit v1.2.3 From c6e218035913e14952b04ceecf1a543205106fdb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:06 -0700 Subject: tcp: Namespace-ify sysctl_tcp_max_reordering Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f0e792beeea9..3f6844665a2f 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -137,6 +137,7 @@ struct netns_ipv4 { int sysctl_tcp_rfc1337; int sysctl_tcp_abort_on_overflow; int sysctl_tcp_fack; + int sysctl_tcp_max_reordering; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index fc134ba74c7d..8cd286226a1e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; -- cgit v1.2.3 From 6496f6bde0c323fba5e8c5b5cbf3a7bf28dad7ed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:07 -0700 Subject: tcp: Namespace-ify sysctl_tcp_dsack Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3f6844665a2f..956957a77db9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -138,6 +138,7 @@ struct netns_ipv4 { int sysctl_tcp_abort_on_overflow; int sysctl_tcp_fack; int sysctl_tcp_max_reordering; + int sysctl_tcp_dsack; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 8cd286226a1e..8b2ae3e8d79f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -- cgit v1.2.3 From 0c12654ac6d9004b9538b2a969b2b59e9a5ed831 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:08 -0700 Subject: tcp: Namespace-ify sysctl_tcp_app_win Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 956957a77db9..63f91d52cbc0 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -139,6 +139,7 @@ struct netns_ipv4 { int sysctl_tcp_fack; int sysctl_tcp_max_reordering; int sysctl_tcp_dsack; + int sysctl_tcp_app_win; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 8b2ae3e8d79f..7aa3d65062a1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_frto; extern int sysctl_tcp_nometrics_save; -- cgit v1.2.3 From 94f0893e0c27219f4a726932618505aab6795973 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:09 -0700 Subject: tcp: Namespace-ify sysctl_tcp_adv_win_scale Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 63f91d52cbc0..9dbb07d4eff4 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -140,6 +140,7 @@ struct netns_ipv4 { int sysctl_tcp_max_reordering; int sysctl_tcp_dsack; int sysctl_tcp_app_win; + int sysctl_tcp_adv_win_scale; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 7aa3d65062a1..0dc27cd24899 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_frto; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; @@ -1311,9 +1310,9 @@ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd); -static inline int tcp_win_from_space(int space) +static inline int tcp_win_from_space(const struct sock *sk, int space) { - int tcp_adv_win_scale = sysctl_tcp_adv_win_scale; + int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; return tcp_adv_win_scale <= 0 ? (space>>(-tcp_adv_win_scale)) : @@ -1323,13 +1322,13 @@ static inline int tcp_win_from_space(int space) /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { - return tcp_win_from_space(sk->sk_rcvbuf - + return tcp_win_from_space(sk, sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)); } static inline int tcp_full_space(const struct sock *sk) { - return tcp_win_from_space(sk->sk_rcvbuf); + return tcp_win_from_space(sk, sk->sk_rcvbuf); } extern void tcp_openreq_init_rwin(struct request_sock *req, -- cgit v1.2.3 From af9b69a7a6ca6b817e8d6f416e7aa5b2a5bf1d91 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:10 -0700 Subject: tcp: Namespace-ify sysctl_tcp_frto Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9dbb07d4eff4..f4622e28db3a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -141,6 +141,7 @@ struct netns_ipv4 { int sysctl_tcp_dsack; int sysctl_tcp_app_win; int sysctl_tcp_adv_win_scale; + int sysctl_tcp_frto; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0dc27cd24899..18f047501f53 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_frto; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; -- cgit v1.2.3 From bff7b688d5b10a8cb8cecefdea5e255408b78f2f Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:51 -0400 Subject: net: dsa: add dsa_is_unused_port helper As the comment above the chunk states, the b53 driver attempts to disable the unused ports. But using ds->enabled_port_mask is misleading, because this mask reports in fact the user ports. To avoid confusion and fix this, this patch introduces an explicit dsa_is_unused_port helper which ensures the corresponding bit is not masked in any of the switch port masks. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 38961ef91d3d..6b1bc1c8f7e2 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -254,6 +254,13 @@ struct dsa_switch { struct dsa_port ports[]; }; +static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) +{ + u32 m = ds->enabled_port_mask | ds->dsa_port_mask | ds->cpu_port_mask; + + return !(m & BIT(p)); +} + static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) { return !!(ds->cpu_port_mask & (1 << p)); -- cgit v1.2.3 From deb8ee0b51204273c120a3b3848efbb5695ad658 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:53 -0400 Subject: net: dsa: fix dsa_is_normal_port helper In order to know if a port is of type user, dsa_is_normal_port checks that the given port is not of type DSA nor CPU. This is not enough because a port can be unused. Without the previous fix, this caused the unused mv88e6xxx ports to be configured in normal mode. The ds->enabled_port_mask reports the user ports, so check this instead. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 6b1bc1c8f7e2..4ad432ad2d40 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -273,7 +273,7 @@ static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) { - return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p); + return !!(ds->enabled_port_mask & BIT(p)); } static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) -- cgit v1.2.3 From 2b3e9891cb607f7c7d5a4b11fb5a6e775e7f3ef4 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:54 -0400 Subject: net: dsa: rename dsa_is_normal_port helper This patch renames dsa_is_normal_port to dsa_is_user_port because "user" is the correct term in the DSA terminology, not "normal". Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 4ad432ad2d40..49701d958663 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -271,7 +271,7 @@ static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) return !!((ds->dsa_port_mask) & (1 << p)); } -static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) +static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) { return !!(ds->enabled_port_mask & BIT(p)); } -- cgit v1.2.3 From 02bc6e546e858b209c3ebe380a13a73b333b1b3f Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:56 -0400 Subject: net: dsa: introduce dsa_user_ports helper Introduce a dsa_user_ports() helper to return the ds->enabled_port_mask mask which is more explicit. This will also minimize diffs when touching this internal mask. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 49701d958663..dc7728062396 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -276,6 +276,11 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) return !!(ds->enabled_port_mask & BIT(p)); } +static inline u32 dsa_user_ports(struct dsa_switch *ds) +{ + return ds->enabled_port_mask; +} + static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) { return &ds->ports[p]; -- cgit v1.2.3 From 057cad2c59d73b0c4a6638546f3099d6fb444094 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:57 -0400 Subject: net: dsa: define port types Introduce an enumerated type for ports, which will be way more explicit to identify a port type instead of digging into switch port masks. A port can be of type CPU, DSA, user, or unused by default. This is a static parsed information that cannot be changed at runtime. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index dc7728062396..8da20c4a6552 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -180,6 +180,13 @@ struct dsa_port { struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); + enum { + DSA_PORT_TYPE_UNUSED = 0, + DSA_PORT_TYPE_CPU, + DSA_PORT_TYPE_DSA, + DSA_PORT_TYPE_USER, + } type; + struct dsa_switch *ds; unsigned int index; const char *name; -- cgit v1.2.3 From c38c5a66506e4e8223fd03e950b1bde99190701e Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:58 -0400 Subject: net: dsa: use new port type in helpers Now that DSA exposes an enumerated type for the ports, we can use them directly instead of checking bitmaps, which is more consistent. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 8da20c4a6552..07dfbd7f4fd5 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -261,36 +261,41 @@ struct dsa_switch { struct dsa_port ports[]; }; -static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) +static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) { - u32 m = ds->enabled_port_mask | ds->dsa_port_mask | ds->cpu_port_mask; + return &ds->ports[p]; +} - return !(m & BIT(p)); +static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) +{ + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED; } static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) { - return !!(ds->cpu_port_mask & (1 << p)); + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_CPU; } static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) { - return !!((ds->dsa_port_mask) & (1 << p)); + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_DSA; } static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) { - return !!(ds->enabled_port_mask & BIT(p)); + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_USER; } static inline u32 dsa_user_ports(struct dsa_switch *ds) { - return ds->enabled_port_mask; -} + u32 mask = 0; + int p; -static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) -{ - return &ds->ports[p]; + for (p = 0; p < ds->num_ports; p++) + if (dsa_is_user_port(ds, p)) + mask |= BIT(p); + + return mask; } static inline u8 dsa_upstream_port(struct dsa_switch *ds) -- cgit v1.2.3 From 5749f0f3772b9d98f37e3a92539f49fafaa64eca Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:59 -0400 Subject: net: dsa: remove port masks Now that DSA core provides port types, there is no need to keep this information at the switch level. This is a static information that is part of a DSA core dsa_port structure. Remove them. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 07dfbd7f4fd5..50e276dc4c01 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -240,9 +240,6 @@ struct dsa_switch { /* * Slave mii_bus and devices for the individual ports. */ - u32 dsa_port_mask; - u32 cpu_port_mask; - u32 enabled_port_mask; u32 phys_mii_mask; struct mii_bus *slave_mii_bus; -- cgit v1.2.3 From 3d0bd028ffb4a4915cb64cfa0d2cee1578cc0321 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 16 Oct 2017 18:01:27 -0700 Subject: net/sched: Add support for HW offloading for CBS This adds support for offloading the CBS algorithm to the controller, if supported. Drivers wanting to support CBS offload must implement the .ndo_setup_tc callback and handle the TC_SETUP_CBS (introduced here) type. Signed-off-by: Vinicius Costa Gomes Tested-by: Henrik Austad Signed-off-by: Jeff Kirsher --- include/net/pkt_sched.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index b8ecafce4ba1..02f2db26e30c 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -140,4 +140,13 @@ static inline struct net *qdisc_net(struct Qdisc *q) return dev_net(q->dev_queue->dev); } +struct tc_cbs_qopt_offload { + u8 enable; + s32 queue; + s32 hicredit; + s32 locredit; + s32 idleslope; + s32 sendslope; +}; + #endif -- cgit v1.2.3 From ec36e416f06f6a8659281053fdc46ce484ad2211 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:21 -0700 Subject: tcp: Namespace-ify sysctl_tcp_nometrics_save Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f4622e28db3a..9606e2ea1f14 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -142,6 +142,7 @@ struct netns_ipv4 { int sysctl_tcp_app_win; int sysctl_tcp_adv_win_scale; int sysctl_tcp_frto; + int sysctl_tcp_nometrics_save; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 18f047501f53..6ab7fa4154b2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_workaround_signed_windows; -- cgit v1.2.3 From 4540c0cf98b8892a642d2453eec20ae3eb5696fb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:22 -0700 Subject: tcp: Namespace-ify sysctl_tcp_moderate_rcvbuf Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9606e2ea1f14..4458a54fe3f4 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -143,6 +143,7 @@ struct netns_ipv4 { int sysctl_tcp_adv_win_scale; int sysctl_tcp_frto; int sysctl_tcp_nometrics_save; + int sysctl_tcp_moderate_rcvbuf; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 6ab7fa4154b2..f954e74578ff 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_workaround_signed_windows; -- cgit v1.2.3 From d06a99045837d3f4d5431793c4c390b0daf2a08d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:23 -0700 Subject: tcp: Namespace-ify sysctl_tcp_tso_win_divisor Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 4458a54fe3f4..60bccda046db 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -144,6 +144,7 @@ struct netns_ipv4 { int sysctl_tcp_frto; int sysctl_tcp_nometrics_save; int sysctl_tcp_moderate_rcvbuf; + int sysctl_tcp_tso_win_divisor; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index f954e74578ff..ed0828dc82f1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_workaround_signed_windows; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -- cgit v1.2.3 From ceef9ab6be7234f9e49f79769e0da88d1dccfcc7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:24 -0700 Subject: tcp: Namespace-ify sysctl_tcp_workaround_signed_windows Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 60bccda046db..e74c7c1b0d18 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -145,6 +145,7 @@ struct netns_ipv4 { int sysctl_tcp_nometrics_save; int sysctl_tcp_moderate_rcvbuf; int sysctl_tcp_tso_win_divisor; + int sysctl_tcp_workaround_signed_windows; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index ed0828dc82f1..e338e16178dd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_workaround_signed_windows; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ @@ -1302,7 +1301,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) } /* Determine a window scaling and initial window to offer. */ -void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, +void tcp_select_initial_window(const struct sock *sk, int __space, + __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd); -- cgit v1.2.3 From 9184d8bb448a3d2c2d9f90f1e2f5de625292e769 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:25 -0700 Subject: tcp: Namespace-ify sysctl_tcp_limit_output_bytes Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e74c7c1b0d18..e98f473bab13 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -146,6 +146,7 @@ struct netns_ipv4 { int sysctl_tcp_moderate_rcvbuf; int sysctl_tcp_tso_win_divisor; int sysctl_tcp_workaround_signed_windows; + int sysctl_tcp_limit_output_bytes; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index e338e16178dd..33f9d30a6905 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_challenge_ack_limit; extern int sysctl_tcp_min_tso_segs; extern int sysctl_tcp_min_rtt_wlen; -- cgit v1.2.3 From b530b68148301d73775cd27cc136ce4dd5738ae8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:26 -0700 Subject: tcp: Namespace-ify sysctl_tcp_challenge_ack_limit Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e98f473bab13..e9895d40868e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -147,6 +147,7 @@ struct netns_ipv4 { int sysctl_tcp_tso_win_divisor; int sysctl_tcp_workaround_signed_windows; int sysctl_tcp_limit_output_bytes; + int sysctl_tcp_challenge_ack_limit; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 33f9d30a6905..afc23596e9aa 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_challenge_ack_limit; extern int sysctl_tcp_min_tso_segs; extern int sysctl_tcp_min_rtt_wlen; extern int sysctl_tcp_autocorking; -- cgit v1.2.3 From 26e9596e5b8f11025b57b12e7265df649129ab00 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:27 -0700 Subject: tcp: Namespace-ify sysctl_tcp_min_tso_segs Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e9895d40868e..a2da3e19a977 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -148,6 +148,7 @@ struct netns_ipv4 { int sysctl_tcp_workaround_signed_windows; int sysctl_tcp_limit_output_bytes; int sysctl_tcp_challenge_ack_limit; + int sysctl_tcp_min_tso_segs; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index afc23596e9aa..0735303a6575 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_min_tso_segs; extern int sysctl_tcp_min_rtt_wlen; extern int sysctl_tcp_autocorking; extern int sysctl_tcp_invalid_ratelimit; -- cgit v1.2.3 From bd239704295c66196e6b77c5717ec4aec076ddd5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:28 -0700 Subject: tcp: Namespace-ify sysctl_tcp_min_rtt_wlen Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a2da3e19a977..1a66af8a0d32 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -149,6 +149,7 @@ struct netns_ipv4 { int sysctl_tcp_limit_output_bytes; int sysctl_tcp_challenge_ack_limit; int sysctl_tcp_min_tso_segs; + int sysctl_tcp_min_rtt_wlen; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0735303a6575..56f50c9a3e6a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_min_rtt_wlen; extern int sysctl_tcp_autocorking; extern int sysctl_tcp_invalid_ratelimit; extern int sysctl_tcp_pacing_ss_ratio; -- cgit v1.2.3 From 790f00e19f65673c3c169dfc137c09a9236847d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:29 -0700 Subject: tcp: Namespace-ify sysctl_tcp_autocorking Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 1a66af8a0d32..537830882149 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -150,6 +150,7 @@ struct netns_ipv4 { int sysctl_tcp_challenge_ack_limit; int sysctl_tcp_min_tso_segs; int sysctl_tcp_min_rtt_wlen; + int sysctl_tcp_autocorking; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 56f50c9a3e6a..0268f1025d9d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_autocorking; extern int sysctl_tcp_invalid_ratelimit; extern int sysctl_tcp_pacing_ss_ratio; extern int sysctl_tcp_pacing_ca_ratio; -- cgit v1.2.3 From 4170ba6b589ced82da56c7e4f71cc84b2be036d6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:30 -0700 Subject: tcp: Namespace-ify sysctl_tcp_invalid_ratelimit Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 537830882149..e52c2124b32e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -151,6 +151,7 @@ struct netns_ipv4 { int sysctl_tcp_min_tso_segs; int sysctl_tcp_min_rtt_wlen; int sysctl_tcp_autocorking; + int sysctl_tcp_invalid_ratelimit; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0268f1025d9d..5869a822ecb1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_invalid_ratelimit; extern int sysctl_tcp_pacing_ss_ratio; extern int sysctl_tcp_pacing_ca_ratio; -- cgit v1.2.3 From 23a7102a2d1068508fa2a0ce593a0df7f8fdc0ac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:31 -0700 Subject: tcp: Namespace-ify sysctl_tcp_pacing_ss_ratio Also remove an obsolete comment about TCP pacing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e52c2124b32e..eb2dcf1cbe61 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -152,6 +152,7 @@ struct netns_ipv4 { int sysctl_tcp_min_rtt_wlen; int sysctl_tcp_autocorking; int sysctl_tcp_invalid_ratelimit; + int sysctl_tcp_pacing_ss_ratio; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5869a822ecb1..2a5f8261ca03 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_pacing_ss_ratio; extern int sysctl_tcp_pacing_ca_ratio; extern atomic_long_t tcp_memory_allocated; -- cgit v1.2.3 From c26e91f8b9b8e1fd252e07c1f60e50220cd7ebab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:32 -0700 Subject: tcp: Namespace-ify sysctl_tcp_pacing_ca_ratio Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index eb2dcf1cbe61..141ba82b5efb 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -153,6 +153,7 @@ struct netns_ipv4 { int sysctl_tcp_autocorking; int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; + int sysctl_tcp_pacing_ca_ratio; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 2a5f8261ca03..092d606fcc16 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,8 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_pacing_ca_ratio; - extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern unsigned long tcp_memory_pressure; -- cgit v1.2.3 From 5b52a4c3acf5f4b4854d1c3ddc8be8770330a79c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 27 Oct 2017 10:01:39 -0700 Subject: tcp: remove unnecessary include two extra #include are not necessary in tcp.h Remove them. Fixes: 40304b2a1567 ("bpf: BPF support for sock_ops") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/tcp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 092d606fcc16..aa1cc90fdc02 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -45,9 +45,6 @@ #include #include - -#include -#include #include extern struct inet_hashinfo tcp_hashinfo; -- cgit v1.2.3 From 8108a77515126f6db4374e8593956e20430307c0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 27 Oct 2017 09:45:34 -0700 Subject: bpf: bpf_compute_data uses incorrect cb structure SK_SKB program types use bpf_compute_data to store the end of the packet data. However, bpf_compute_data assumes the cb is stored in the qdisc layer format. But, for SK_SKB this is the wrong layer of the stack for this type. It happens to work (sort of!) because in most cases nothing happens to be overwritten today. This is very fragile and error prone. Fortunately, we have another hole in tcp_skb_cb we can use so lets put the data_end value there. Note, SK_SKB program types do not use data_meta, they are failed by sk_skb_is_valid_access(). Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index b1ef98ebce53..33599d17522d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -844,6 +844,7 @@ struct tcp_skb_cb { __u32 key; __u32 flags; struct bpf_map *map; + void *data_end; } bpf; }; }; -- cgit v1.2.3 From 1da4fc97cbf89514e417a3df46eaec864a9b8a48 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:43:54 +0800 Subject: sctp: fix some type cast warnings introduced by stream reconf These warnings were found by running 'make C=2 M=net/sctp/'. They are introduced by not aware of Endian when coding stream reconf patches. Since commit c0d8bab6ae51 ("sctp: add get and set sockopt for reconf_enable") enabled stream reconf feature for users, the Fixes tag below would use it. Fixes: c0d8bab6ae51 ("sctp: add get and set sockopt for reconf_enable") Reported-by: Eric Dumazet Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 2 +- include/net/sctp/ulpevent.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 2db3d3a9ce1d..88233cf8b8d4 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -261,7 +261,7 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, struct sctp_fwdtsn_skip *skiplist); struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc); struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc, - __u16 stream_num, __u16 *stream_list, + __u16 stream_num, __be16 *stream_list, bool out, bool in); struct sctp_chunk *sctp_make_strreset_tsnreq( const struct sctp_association *asoc); diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index b8c86ec1a8f5..231dc42f1da6 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -130,7 +130,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event( struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( const struct sctp_association *asoc, __u16 flags, - __u16 stream_num, __u16 *stream_list, gfp_t gfp); + __u16 stream_num, __be16 *stream_list, gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event( const struct sctp_association *asoc, __u16 flags, -- cgit v1.2.3 From 1f01d8be0e6a04bd682a55f6d50c14c1679e7571 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Konrad=20Zapa=C5=82owicz?= Date: Tue, 17 Oct 2017 15:53:49 +0200 Subject: Bluetooth: increase timeout for le auto connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch increases the connection timeout for LE connections that are triggered by the advertising report to 4 seconds. It has been observed that devices equipped with wifi+bt combo SoC fail to create a connection with BLE devices due to their coexistence issues. Increasing this timeout gives them enough time to complete the connection with success. Signed-off-by: Konrad Zapałowicz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index fe98f0a5bef0..1668211297a9 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -273,7 +273,7 @@ enum { #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ #define HCI_POWER_OFF_TIMEOUT msecs_to_jiffies(5000) /* 5 seconds */ #define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ -#define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ +#define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ /* HCI data types */ #define HCI_COMMAND_PKT 0x01 -- cgit v1.2.3 From 7aa0045dadb6ef37485ea9f2a7d28278ca588b51 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:28 -0700 Subject: net_sched: introduce a workqueue for RCU callbacks of tc filter This patch introduces a dedicated workqueue for tc filters so that each tc filter's RCU callback could defer their action destroy work to this workqueue. The helper tcf_queue_work() is introduced for them to use. Because we hold RTNL lock when calling tcf_block_put(), we can not simply flush works inside it, therefore we have to defer it again to this workqueue and make sure all flying RCU callbacks have already queued their work before this one, in other words, to ensure this is the last one to execute to prevent any use-after-free. On the other hand, this makes tcf_block_put() ugly and harder to understand. Since David and Eric strongly dislike adding synchronize_rcu(), this is probably the only solution that could make everyone happy. Please also see the code comments below. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 3 +++ include/net/sch_generic.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e80edd8879ef..3009547f3c66 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -2,6 +2,7 @@ #define __NET_PKT_CLS_H #include +#include #include #include @@ -17,6 +18,8 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +bool tcf_queue_work(struct work_struct *work); + #ifdef CONFIG_NET_CLS struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 135f5a2dd931..0dec8a23be57 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -271,6 +272,7 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; + struct work_struct work; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) -- cgit v1.2.3 From 2064ee332e4c1b7495cf68b84355c213d8fe71fd Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 30 Oct 2017 10:42:59 +0100 Subject: Bluetooth: Use bt_dev_err and bt_dev_info when possible In case of using BT_ERR and BT_INFO, convert to bt_dev_err and bt_dev_info when possible. This allows for controller specific reporting. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 020142bb9735..e89cff0c4c23 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -147,6 +147,9 @@ void bt_err_ratelimited(const char *fmt, ...); #define bt_dev_dbg(hdev, fmt, ...) \ BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__) +#define bt_dev_err_ratelimited(hdev, fmt, ...) \ + BT_ERR_RATELIMITED("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + /* Connection and socket states */ enum { BT_CONNECTED = 1, /* Equal to TCP_ESTABLISHED to make net code happy */ -- cgit v1.2.3 From e4dca7b7aa08b22893c45485d222b5807c1375ae Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Oct 2017 19:04:42 -0700 Subject: treewide: Fix function prototypes for module_param_call() Several function prototypes for the set/get functions defined by module_param_call() have a slightly wrong argument types. This fixes those in an effort to clean up the calls when running under type-enforced compiler instrumentation for CFI. This is the result of running the following semantic patch: @match_module_param_call_function@ declarer name module_param_call; identifier _name, _set_func, _get_func; expression _arg, _mode; @@ module_param_call(_name, _set_func, _get_func, _arg, _mode); @fix_set_prototype depends on match_module_param_call_function@ identifier match_module_param_call_function._set_func; identifier _val, _param; type _val_type, _param_type; @@ int _set_func( -_val_type _val +const char * _val , -_param_type _param +const struct kernel_param * _param ) { ... } @fix_get_prototype depends on match_module_param_call_function@ identifier match_module_param_call_function._get_func; identifier _val, _param; type _val_type, _param_type; @@ int _get_func( -_val_type _val +char * _val , -_param_type _param +const struct kernel_param * _param ) { ... } Two additional by-hand changes are included for places where the above Coccinelle script didn't notice them: drivers/platform/x86/thinkpad_acpi.c fs/lockd/svc.c Signed-off-by: Kees Cook Signed-off-by: Jessica Yu --- include/net/netfilter/nf_conntrack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8f3bd30511de..fd5241ce1fc9 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -284,7 +284,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) struct kernel_param; -int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); +int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp); int nf_conntrack_hash_resize(unsigned int hashsize); extern struct hlist_nulls_head *nf_conntrack_hash; -- cgit v1.2.3 From 384c181e3780ddc45e70483e29d84495b484730d Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 27 Oct 2017 02:35:34 -0700 Subject: net: sched: Identify hardware traffic classes using classid This patch offloads the classid to hardware and uses the classid reserved in the range :ffe0 - :ffef to identify hardware traffic classes reported via dev->num_tc. tcf_result structure contains the class ID of the class to which the packet belongs and is offloaded to hardware via flower filter. A new helper function is introduced to represent HW traffic classes 0 through 15 using the reserved classid values :ffe0 - :ffef. Signed-off-by: Amritha Nambiar Acked-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/net/pkt_cls.h | 1 + include/net/sch_generic.h | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index bf73e1675519..37c5ef766655 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -666,6 +666,7 @@ struct tc_cls_flower_offload { struct fl_flow_key *mask; struct fl_flow_key *key; struct tcf_exts *exts; + u32 classid; }; enum tc_matchall_command { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 07c179dab478..c23e938f5b19 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -411,6 +411,13 @@ qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id) return NULL; } +static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid) +{ + u32 hwtc = TC_H_MIN(classid) - TC_H_MIN_PRIORITY; + + return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL; +} + int qdisc_class_hash_init(struct Qdisc_class_hash *); void qdisc_class_hash_insert(struct Qdisc_class_hash *, struct Qdisc_class_common *); -- cgit v1.2.3 From 6c31e5a91fde2e718e59c8a627c56451f88be54c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 27 Oct 2017 17:37:13 -0700 Subject: net: Add extack to fib_notifier_info Add extack to fib_notifier_info and plumb through stack to call_fib_rule_notifiers, call_fib_entry_notifiers and call_fib6_entry_notifiers. This allows notifer handlers to return messages to user. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/fib_notifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index 54cd6b839d2f..c91ec732afd6 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -9,6 +9,7 @@ struct fib_notifier_info { struct net *net; int family; + struct netlink_ext_ack *extack; }; enum fib_event_type { -- cgit v1.2.3 From da13c59b9936dfedcf9f2203bd29fbf83ad672bf Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Mon, 30 Oct 2017 19:38:52 -0400 Subject: net: display hw address of source machine during ipv6 DAD failure This patch updates the error messages displayed in kernel log to include hwaddress of the source machine that caused ipv6 duplicate address detection failures. Examples: a) When we receive a NA packet from another machine advertising our address: ICMPv6: NA: 34:ab:cd:56:11:e8 advertised our address 2001:db8:: on eth0! b) When we detect DAD failure during address assignment to an interface: IPv6: eth0: IPv6 duplicate address 2001:db8:: used by 34:ab:cd:56:11:e8 detected! v2: Changed %pI6 to %pI6c in ndisc_recv_na() Chaged the v6 address in the commit message to 2001:db8:: Suggested-by: Igor Lubashev Signed-off-by: Vishwanath Pai Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 15b5ffd7253d..2a616ea53956 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -208,7 +208,7 @@ void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed); -void addrconf_dad_failure(struct inet6_ifaddr *ifp); +void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, const struct in6_addr *src_addr); -- cgit v1.2.3 From 2b7cda9c35d3b940eb9ce74b30bbd5eb30db493d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Oct 2017 23:08:20 -0700 Subject: tcp: fix tcp_mtu_probe() vs highest_sack Based on SNMP values provided by Roman, Yuchung made the observation that some crashes in tcp_sacktag_walk() might be caused by MTU probing. Looking at tcp_mtu_probe(), I found that when a new skb was placed in front of the write queue, we were not updating tcp highest sack. If one skb is freed because all its content was copied to the new skb (for MTU probing), then tp->highest_sack could point to a now freed skb. Bad things would then happen, including infinite loops. This patch renames tcp_highest_sack_combine() and uses it from tcp_mtu_probe() to fix the bug. Note that I also removed one test against tp->sacked_out, since we want to replace tp->highest_sack regardless of whatever condition, since keeping a stale pointer to freed skb is a recipe for disaster. Fixes: a47e5a988a57 ("[TCP]: Convert highest_sack to sk_buff to allow direct access") Signed-off-by: Eric Dumazet Reported-by: Alexei Starovoitov Reported-by: Roman Gushchin Reported-by: Oleksandr Natalenko Acked-by: Alexei Starovoitov Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/net/tcp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 33599d17522d..e6d0002a1b0b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1771,12 +1771,12 @@ static inline void tcp_highest_sack_reset(struct sock *sk) tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk); } -/* Called when old skb is about to be deleted (to be combined with new skb) */ -static inline void tcp_highest_sack_combine(struct sock *sk, +/* Called when old skb is about to be deleted and replaced by new skb */ +static inline void tcp_highest_sack_replace(struct sock *sk, struct sk_buff *old, struct sk_buff *new) { - if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack)) + if (old == tcp_highest_sack(sk)) tcp_sk(sk)->highest_sack = new; } -- cgit v1.2.3 From 0b5a89caee5c9958c18cd933c7f8891e35b21781 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 1 Nov 2017 11:47:38 +0100 Subject: net: sched: remove unused tc_should_offload helper tc_should_offload is no longer used, remove it. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 37c5ef766655..108dcdd96421 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -623,13 +623,6 @@ static inline bool tc_skip_hw(u32 flags) return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false; } -static inline bool tc_should_offload(const struct net_device *dev, u32 flags) -{ - if (tc_skip_hw(flags)) - return false; - return tc_can_offload(dev); -} - static inline bool tc_skip_sw(u32 flags) { return (flags & TCA_CLS_FLAGS_SKIP_SW) ? true : false; -- cgit v1.2.3 From 70b5aee46782208c14d93b715e9f62f7fec844f1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 1 Nov 2017 11:47:41 +0100 Subject: net: sched: remove ndo_setup_tc check from tc_can_offload Since tc_can_offload is always called from block callback or egdev callback, no need to check if ndo_setup_tc exists. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 108dcdd96421..d15c40c7bde7 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -611,11 +611,7 @@ struct tc_cls_u32_offload { static inline bool tc_can_offload(const struct net_device *dev) { - if (!(dev->features & NETIF_F_HW_TC)) - return false; - if (!dev->netdev_ops->ndo_setup_tc) - return false; - return true; + return dev->features & NETIF_F_HW_TC; } static inline bool tc_skip_hw(u32 flags) -- cgit v1.2.3 From b24413180f5600bcb3bb70fbed5cf186b60864bd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Nov 2017 15:07:57 +0100 Subject: License cleanup: add SPDX GPL-2.0 license identifier to files with no license Many source files in the tree are missing licensing information, which makes it harder for compliance tools to determine the correct license. By default all files without license information are under the default license of the kernel, which is GPL version 2. Update the files which contain no license information with the 'GPL-2.0' SPDX license identifier. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This patch is based on work done by Thomas Gleixner and Kate Stewart and Philippe Ombredanne. How this work was done: Patches were generated and checked against linux-4.14-rc6 for a subset of the use cases: - file had no licensing information it it. - file was a */uapi/* one with no licensing information in it, - file was a */uapi/* one with existing licensing information, Further patches will be generated in subsequent months to fix up cases where non-standard license headers were used, and references to license had to be inferred by heuristics based on keywords. The analysis to determine which SPDX License Identifier to be applied to a file was done in a spreadsheet of side by side results from of the output of two independent scanners (ScanCode & Windriver) producing SPDX tag:value files created by Philippe Ombredanne. Philippe prepared the base worksheet, and did an initial spot review of a few 1000 files. The 4.13 kernel was the starting point of the analysis with 60,537 files assessed. Kate Stewart did a file by file comparison of the scanner results in the spreadsheet to determine which SPDX license identifier(s) to be applied to the file. She confirmed any determination that was not immediately clear with lawyers working with the Linux Foundation. Criteria used to select files for SPDX license identifier tagging was: - Files considered eligible had to be source code files. - Make and config files were included as candidates if they contained >5 lines of source - File already had some variant of a license header in it (even if <5 lines). All documentation files were explicitly excluded. The following heuristics were used to determine which SPDX license identifiers to apply. - when both scanners couldn't find any license traces, file was considered to have no license information in it, and the top level COPYING file license applied. For non */uapi/* files that summary was: SPDX license identifier # files ---------------------------------------------------|------- GPL-2.0 11139 and resulted in the first patch in this series. If that file was a */uapi/* path one, it was "GPL-2.0 WITH Linux-syscall-note" otherwise it was "GPL-2.0". Results of that was: SPDX license identifier # files ---------------------------------------------------|------- GPL-2.0 WITH Linux-syscall-note 930 and resulted in the second patch in this series. - if a file had some form of licensing information in it, and was one of the */uapi/* ones, it was denoted with the Linux-syscall-note if any GPL family license was found in the file or had no licensing in it (per prior point). Results summary: SPDX license identifier # files ---------------------------------------------------|------ GPL-2.0 WITH Linux-syscall-note 270 GPL-2.0+ WITH Linux-syscall-note 169 ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) 21 ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) 17 LGPL-2.1+ WITH Linux-syscall-note 15 GPL-1.0+ WITH Linux-syscall-note 14 ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) 5 LGPL-2.0+ WITH Linux-syscall-note 4 LGPL-2.1 WITH Linux-syscall-note 3 ((GPL-2.0 WITH Linux-syscall-note) OR MIT) 3 ((GPL-2.0 WITH Linux-syscall-note) AND MIT) 1 and that resulted in the third patch in this series. - when the two scanners agreed on the detected license(s), that became the concluded license(s). - when there was disagreement between the two scanners (one detected a license but the other didn't, or they both detected different licenses) a manual inspection of the file occurred. - In most cases a manual inspection of the information in the file resulted in a clear resolution of the license that should apply (and which scanner probably needed to revisit its heuristics). - When it was not immediately clear, the license identifier was confirmed with lawyers working with the Linux Foundation. - If there was any question as to the appropriate license identifier, the file was flagged for further research and to be revisited later in time. In total, over 70 hours of logged manual review was done on the spreadsheet to determine the SPDX license identifiers to apply to the source files by Kate, Philippe, Thomas and, in some cases, confirmation by lawyers working with the Linux Foundation. Kate also obtained a third independent scan of the 4.13 code base from FOSSology, and compared selected files where the other two scanners disagreed against that SPDX file, to see if there was new insights. The Windriver scanner is based on an older version of FOSSology in part, so they are related. Thomas did random spot checks in about 500 files from the spreadsheets for the uapi headers and agreed with SPDX license identifier in the files he inspected. For the non-uapi files Thomas did random spot checks in about 15000 files. In initial set of patches against 4.14-rc6, 3 files were found to have copy/paste license identifier errors, and have been fixed to reflect the correct identifier. Additionally Philippe spent 10 hours this week doing a detailed manual inspection and review of the 12,461 patched files from the initial patch version early this week with: - a full scancode scan run, collecting the matched texts, detected license ids and scores - reviewing anything where there was a license detected (about 500+ files) to ensure that the applied SPDX license was correct - reviewing anything where there was no detection but the patch license was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied SPDX license was correct This produced a worksheet with 20 files needing minor correction. This worksheet was then exported into 3 different .csv files for the different types of files to be modified. These .csv files were then reviewed by Greg. Thomas wrote a script to parse the csv files and add the proper SPDX tag to the file, in the format that the file expected. This script was further refined by Greg based on the output to detect more types of files automatically and to distinguish between header and source .c files (which need different comment types.) Finally Greg ran the script using the .csv files to generate the patches. Reviewed-by: Kate Stewart Reviewed-by: Philippe Ombredanne Reviewed-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/net/Space.h | 1 + include/net/act_api.h | 1 + include/net/addrconf.h | 1 + include/net/af_unix.h | 1 + include/net/ah.h | 1 + include/net/arp.h | 1 + include/net/atmclip.h | 1 + include/net/ax25.h | 1 + include/net/compat.h | 1 + include/net/datalink.h | 1 + include/net/dn.h | 1 + include/net/dn_dev.h | 1 + include/net/dn_fib.h | 1 + include/net/dn_neigh.h | 1 + include/net/dsfield.h | 1 + include/net/dst.h | 1 + include/net/dst_cache.h | 1 + include/net/dst_metadata.h | 1 + include/net/dst_ops.h | 1 + include/net/esp.h | 1 + include/net/fib_rules.h | 1 + include/net/firewire.h | 1 + include/net/flow.h | 1 + include/net/flow_dissector.h | 1 + include/net/fou.h | 1 + include/net/garp.h | 1 + include/net/gen_stats.h | 1 + include/net/genetlink.h | 1 + include/net/geneve.h | 1 + include/net/gre.h | 1 + include/net/gro_cells.h | 1 + include/net/gtp.h | 1 + include/net/gue.h | 1 + include/net/hwbm.h | 1 + include/net/ife.h | 1 + include/net/inet_common.h | 1 + include/net/inet_ecn.h | 1 + include/net/inet_frag.h | 1 + include/net/inetpeer.h | 1 + include/net/ip6_route.h | 1 + include/net/ip6_tunnel.h | 1 + include/net/ip_tunnels.h | 1 + include/net/ip_vs.h | 1 + include/net/ipcomp.h | 1 + include/net/ipconfig.h | 1 + include/net/ipx.h | 1 + include/net/iucv/af_iucv.h | 1 + include/net/iucv/iucv.h | 1 + include/net/iw_handler.h | 1 + include/net/lapb.h | 1 + include/net/lib80211.h | 1 + include/net/lwtunnel.h | 1 + include/net/mld.h | 1 + include/net/mrp.h | 1 + include/net/ncsi.h | 1 + include/net/ndisc.h | 1 + include/net/neighbour.h | 1 + include/net/net_namespace.h | 1 + include/net/net_ratelimit.h | 1 + include/net/netevent.h | 1 + include/net/netfilter/br_netfilter.h | 1 + include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 1 + include/net/netfilter/ipv4/nf_defrag_ipv4.h | 1 + include/net/netfilter/ipv4/nf_dup_ipv4.h | 1 + include/net/netfilter/ipv4/nf_nat_masquerade.h | 1 + include/net/netfilter/ipv4/nf_reject.h | 1 + include/net/netfilter/ipv6/nf_conntrack_icmpv6.h | 1 + include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 1 + include/net/netfilter/ipv6/nf_defrag_ipv6.h | 1 + include/net/netfilter/ipv6/nf_dup_ipv6.h | 1 + include/net/netfilter/ipv6/nf_nat_masquerade.h | 1 + include/net/netfilter/ipv6/nf_reject.h | 1 + include/net/netfilter/nf_conntrack.h | 1 + include/net/netfilter/nf_conntrack_core.h | 1 + include/net/netfilter/nf_conntrack_ecache.h | 1 + include/net/netfilter/nf_conntrack_expect.h | 1 + include/net/netfilter/nf_conntrack_extend.h | 1 + include/net/netfilter/nf_conntrack_helper.h | 1 + include/net/netfilter/nf_conntrack_l3proto.h | 1 + include/net/netfilter/nf_conntrack_l4proto.h | 1 + include/net/netfilter/nf_conntrack_labels.h | 1 + include/net/netfilter/nf_conntrack_seqadj.h | 1 + include/net/netfilter/nf_conntrack_synproxy.h | 1 + include/net/netfilter/nf_conntrack_timeout.h | 1 + include/net/netfilter/nf_conntrack_timestamp.h | 1 + include/net/netfilter/nf_conntrack_tuple.h | 1 + include/net/netfilter/nf_conntrack_zones.h | 1 + include/net/netfilter/nf_dup_netdev.h | 1 + include/net/netfilter/nf_log.h | 1 + include/net/netfilter/nf_nat.h | 1 + include/net/netfilter/nf_nat_core.h | 1 + include/net/netfilter/nf_nat_helper.h | 1 + include/net/netfilter/nf_nat_l3proto.h | 1 + include/net/netfilter/nf_nat_l4proto.h | 1 + include/net/netfilter/nf_nat_redirect.h | 1 + include/net/netfilter/nf_queue.h | 1 + include/net/netfilter/nf_socket.h | 1 + include/net/netfilter/nf_tables.h | 1 + include/net/netfilter/nf_tables_core.h | 1 + include/net/netfilter/nf_tables_ipv4.h | 1 + include/net/netfilter/nf_tables_ipv6.h | 1 + include/net/netfilter/nfnetlink_log.h | 1 + include/net/netfilter/nft_dup.h | 1 + include/net/netfilter/nft_fib.h | 1 + include/net/netfilter/nft_masq.h | 1 + include/net/netfilter/nft_meta.h | 1 + include/net/netfilter/nft_redir.h | 1 + include/net/netfilter/nft_reject.h | 1 + include/net/netfilter/xt_rateest.h | 1 + include/net/netlink.h | 1 + include/net/netns/can.h | 1 + include/net/netns/conntrack.h | 1 + include/net/netns/core.h | 1 + include/net/netns/dccp.h | 1 + include/net/netns/generic.h | 1 + include/net/netns/hash.h | 1 + include/net/netns/ieee802154_6lowpan.h | 1 + include/net/netns/ipv4.h | 1 + include/net/netns/ipv6.h | 1 + include/net/netns/mib.h | 1 + include/net/netns/mpls.h | 1 + include/net/netns/netfilter.h | 1 + include/net/netns/nftables.h | 1 + include/net/netns/packet.h | 1 + include/net/netns/sctp.h | 1 + include/net/netns/unix.h | 1 + include/net/netns/x_tables.h | 1 + include/net/netns/xfrm.h | 1 + include/net/netrom.h | 1 + include/net/nexthop.h | 1 + include/net/p8022.h | 1 + include/net/pkt_cls.h | 1 + include/net/pkt_sched.h | 1 + include/net/pptp.h | 1 + include/net/psample.h | 1 + include/net/psnap.h | 1 + include/net/rawv6.h | 1 + include/net/red.h | 1 + include/net/rose.h | 1 + include/net/rtnetlink.h | 1 + include/net/sch_generic.h | 1 + include/net/scm.h | 1 + include/net/secure_seq.h | 1 + include/net/smc.h | 1 + include/net/sock_reuseport.h | 1 + include/net/stp.h | 1 + include/net/tc_act/tc_connmark.h | 1 + include/net/tc_act/tc_csum.h | 1 + include/net/tc_act/tc_defact.h | 1 + include/net/tc_act/tc_gact.h | 1 + include/net/tc_act/tc_ife.h | 1 + include/net/tc_act/tc_ipt.h | 1 + include/net/tc_act/tc_mirred.h | 1 + include/net/tc_act/tc_nat.h | 1 + include/net/tc_act/tc_pedit.h | 1 + include/net/tc_act/tc_sample.h | 1 + include/net/transp_v6.h | 1 + include/net/tso.h | 1 + include/net/udp_tunnel.h | 1 + include/net/udplite.h | 1 + include/net/vxlan.h | 1 + include/net/wext.h | 1 + include/net/x25.h | 1 + include/net/x25device.h | 1 + include/net/xfrm.h | 1 + 165 files changed, 165 insertions(+) (limited to 'include/net') diff --git a/include/net/Space.h b/include/net/Space.h index 8a32771e4215..27fb5c937c4f 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* A unified ethernet device probe. This is the easiest way to have every * ethernet adaptor have the name "eth[0123...]". */ diff --git a/include/net/act_api.h b/include/net/act_api.h index b944e0eb93be..97a908ac424d 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_ACT_API_H #define __NET_ACT_API_H diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f44ff2476758..35f5aabd432f 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ADDRCONF_H #define _ADDRCONF_H diff --git a/include/net/af_unix.h b/include/net/af_unix.h index afb37f835449..a5ba41b3b867 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NET_AFUNIX_H #define __LINUX_NET_AFUNIX_H diff --git a/include/net/ah.h b/include/net/ah.h index 4e2dfa474a7e..2d2dea521169 100644 --- a/include/net/ah.h +++ b/include/net/ah.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_AH_H #define _NET_AH_H diff --git a/include/net/arp.h b/include/net/arp.h index 17d90e4e8dc5..dc8cd47f883b 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* linux/net/inet/arp.h */ #ifndef _ARP_H #define _ARP_H diff --git a/include/net/atmclip.h b/include/net/atmclip.h index 5865924d4aac..70e350e0db3d 100644 --- a/include/net/atmclip.h +++ b/include/net/atmclip.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* net/atm/atmarp.h - RFC1577 ATM ARP */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ diff --git a/include/net/ax25.h b/include/net/ax25.h index c4a0cf6f0810..76fb39c272a7 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Declarations of AX.25 type objects. * diff --git a/include/net/compat.h b/include/net/compat.h index 13de0ccaa059..a91bea80b9fc 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef NET_COMPAT_H #define NET_COMPAT_H diff --git a/include/net/datalink.h b/include/net/datalink.h index 93cb18f729b5..a9663229b913 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_INET_DATALINK_H_ #define _NET_INET_DATALINK_H_ diff --git a/include/net/dn.h b/include/net/dn.h index 913b73d239f5..fc0036228d20 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_DN_H #define _NET_DN_H diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h index 197886cd7bdd..595b4f6c1eb1 100644 --- a/include/net/dn_dev.h +++ b/include/net/dn_dev.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_DN_DEV_H #define _NET_DN_DEV_H diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index 81210a8b8d7c..6dd2213c5eb2 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_DN_FIB_H #define _NET_DN_FIB_H diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h index 5e902fc3f4eb..2e3e7793973a 100644 --- a/include/net/dn_neigh.h +++ b/include/net/dn_neigh.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_DN_NEIGH_H #define _NET_DN_NEIGH_H diff --git a/include/net/dsfield.h b/include/net/dsfield.h index e1ad903a8d6a..1a245ee10c95 100644 --- a/include/net/dsfield.h +++ b/include/net/dsfield.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* include/net/dsfield.h - Manipulation of the Differentiated Services field */ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ diff --git a/include/net/dst.h b/include/net/dst.h index 06a6765da074..694c2e6ae618 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * net/dst.h Protocol independent destination cache definitions. * diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h index 151accae708b..72fd5067c353 100644 --- a/include/net/dst_cache.h +++ b/include/net/dst_cache.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_DST_CACHE_H #define _NET_DST_CACHE_H diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index a803129a4849..91bc7bdf6bf5 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_DST_METADATA_H #define __NET_DST_METADATA_H 1 diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index c84b3287e38b..5ec645f27ee3 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_DST_OPS_H #define _NET_DST_OPS_H #include diff --git a/include/net/esp.h b/include/net/esp.h index c41994d1bfef..117652eb6ea3 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_ESP_H #define _NET_ESP_H diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 3d7f1cefc6f5..648caf90ec07 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_FIB_RULES_H #define __NET_FIB_RULES_H diff --git a/include/net/firewire.h b/include/net/firewire.h index 31bcbfe7a220..299e5df38552 100644 --- a/include/net/firewire.h +++ b/include/net/firewire.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_FIREWIRE_H #define _NET_FIREWIRE_H diff --git a/include/net/flow.h b/include/net/flow.h index eb60cee30b44..f1624fd5b1d0 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * * Generic internet FLOW. diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index fc3dce730a6b..22aba321282d 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_FLOW_DISSECTOR_H #define _NET_FLOW_DISSECTOR_H diff --git a/include/net/fou.h b/include/net/fou.h index f5cc6910a27e..80f56e275b08 100644 --- a/include/net/fou.h +++ b/include/net/fou.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_FOU_H #define __NET_FOU_H diff --git a/include/net/garp.h b/include/net/garp.h index abf33bbd2e6a..c41833bd4590 100644 --- a/include/net/garp.h +++ b/include/net/garp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_GARP_H #define _NET_GARP_H diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 8b7aa370e7a4..304f7aa9cc01 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_GEN_STATS_H #define __NET_GEN_STATS_H diff --git a/include/net/genetlink.h b/include/net/genetlink.h index c59a098221db..5ac169a735f4 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_GENERIC_NETLINK_H #define __NET_GENERIC_NETLINK_H diff --git a/include/net/geneve.h b/include/net/geneve.h index ec0327d4331b..a7600ed55ea3 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_GENEVE_H #define __NET_GENEVE_H 1 diff --git a/include/net/gre.h b/include/net/gre.h index d25d836c129b..f90585decbce 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_GRE_H #define __LINUX_GRE_H diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index fcaf8f479130..596688b67a2a 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_GRO_CELLS_H #define _NET_GRO_CELLS_H diff --git a/include/net/gtp.h b/include/net/gtp.h index 6398891b99ba..0e16ebb2a82d 100644 --- a/include/net/gtp.h +++ b/include/net/gtp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _GTP_H_ #define _GTP_H_ diff --git a/include/net/gue.h b/include/net/gue.h index 3f28ec7f1c7f..2fdb29ca74c2 100644 --- a/include/net/gue.h +++ b/include/net/gue.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_GUE_H #define __NET_GUE_H diff --git a/include/net/hwbm.h b/include/net/hwbm.h index 47d08662501b..89085e2e2da5 100644 --- a/include/net/hwbm.h +++ b/include/net/hwbm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _HWBM_H #define _HWBM_H diff --git a/include/net/ife.h b/include/net/ife.h index 2d87d6898b0a..44b9c00f7223 100644 --- a/include/net/ife.h +++ b/include/net/ife.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_IFE_H #define __NET_IFE_H diff --git a/include/net/inet_common.h b/include/net/inet_common.h index f39ae697347f..5a54c9570977 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _INET_COMMON_H #define _INET_COMMON_H diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index dce2d586d9ce..d30e4c869438 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _INET_ECN_H_ #define _INET_ECN_H_ diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index fc59e0775e00..a6e4edd8d4a2 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 950ed182f62f..00b5e7825508 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * INETPEER - A storage for permanent information about peers * diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index ee96f402cb75..bee528135cf1 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_IP6_ROUTE_H #define _NET_IP6_ROUTE_H diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 08fbc7f7d8d7..d66f70f63734 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_IP6_TUNNEL_H #define _NET_IP6_TUNNEL_H diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 992652856fe8..eb2321a13506 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_IP_TUNNELS_H #define __NET_IP_TUNNELS_H 1 diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4f4f786255ef..5d08c1950e7d 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* IP Virtual Server * data structure and functionality definitions */ diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index cc4f30cd7315..fee6fc451597 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_IPCOMP_H #define _NET_IPCOMP_H diff --git a/include/net/ipconfig.h b/include/net/ipconfig.h index c74cc1bd5a02..e3534299bd2a 100644 --- a/include/net/ipconfig.h +++ b/include/net/ipconfig.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 1997 Martin Mares * diff --git a/include/net/ipx.h b/include/net/ipx.h index af32b97b5ddd..baf090390998 100644 --- a/include/net/ipx.h +++ b/include/net/ipx.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_INET_IPX_H_ #define _NET_INET_IPX_H_ /* diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index 714cc9a54a4c..070e93a17c59 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright 2006 IBM Corporation * IUCV protocol stack for Linux on zSeries diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h index b867b0cf79e8..f9e88401d7da 100644 --- a/include/net/iucv/iucv.h +++ b/include/net/iucv/iucv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * drivers/s390/net/iucv.h * IUCV base support. diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index 2509728650bd..725282095840 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This file define the new driver API for Wireless Extensions * diff --git a/include/net/lapb.h b/include/net/lapb.h index 85e773742f4e..ccc3d1f020b0 100644 --- a/include/net/lapb.h +++ b/include/net/lapb.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LAPB_H #define _LAPB_H #include diff --git a/include/net/lib80211.h b/include/net/lib80211.h index aab0f427edb5..8b47d3a51cf8 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * lib80211.h -- common bits for IEEE802.11 wireless drivers * diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 7c26863b8cf4..d747ef975cd8 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_LWTUNNEL_H #define __NET_LWTUNNEL_H 1 diff --git a/include/net/mld.h b/include/net/mld.h index 01d751303498..b0f5b3105ef0 100644 --- a/include/net/mld.h +++ b/include/net/mld.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef LINUX_MLD_H #define LINUX_MLD_H diff --git a/include/net/mrp.h b/include/net/mrp.h index 31912c3be772..ef58b4a07190 100644 --- a/include/net/mrp.h +++ b/include/net/mrp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_MRP_H #define _NET_MRP_H diff --git a/include/net/ncsi.h b/include/net/ncsi.h index fdc60ff2511d..fbefe80361ee 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_NCSI_H #define __NET_NCSI_H diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 31b1bb11ba3f..ddfbb591e2c5 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NDISC_H #define _NDISC_H diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 9816df225af3..a964366a7ef5 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_NEIGHBOUR_H #define _NET_NEIGHBOUR_H diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 57faa375eab9..10f99dafd5ac 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Operations on the network namespace */ diff --git a/include/net/net_ratelimit.h b/include/net/net_ratelimit.h index 7727b4247daf..93c1bd5133c2 100644 --- a/include/net/net_ratelimit.h +++ b/include/net/net_ratelimit.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NET_RATELIMIT_H #define _LINUX_NET_RATELIMIT_H diff --git a/include/net/netevent.h b/include/net/netevent.h index f440df172b56..f728d9cad170 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_EVENT_H #define _NET_EVENT_H diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 925524ede6c8..74af19c3a8f7 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BR_NETFILTER_H_ #define _BR_NETFILTER_H_ diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 919e4e8af327..2cc728ef8cd0 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * IPv4 support for nf_conntrack. * diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h index db405f70e538..bcbd724cc048 100644 --- a/include/net/netfilter/ipv4/nf_defrag_ipv4.h +++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_DEFRAG_IPV4_H #define _NF_DEFRAG_IPV4_H diff --git a/include/net/netfilter/ipv4/nf_dup_ipv4.h b/include/net/netfilter/ipv4/nf_dup_ipv4.h index 0a14733e8b82..c962e0be3549 100644 --- a/include/net/netfilter/ipv4/nf_dup_ipv4.h +++ b/include/net/netfilter/ipv4/nf_dup_ipv4.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_DUP_IPV4_H_ #define _NF_DUP_IPV4_H_ diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h index a9c001c646da..ebd869473603 100644 --- a/include/net/netfilter/ipv4/nf_nat_masquerade.h +++ b/include/net/netfilter/ipv4/nf_nat_masquerade.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_NAT_MASQUERADE_IPV4_H_ #define _NF_NAT_MASQUERADE_IPV4_H_ diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index df7ecd806aba..2eb43fcefc50 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _IPV4_NF_REJECT_H #define _IPV4_NF_REJECT_H diff --git a/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h b/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h index 67edd50a398a..c86895bc5eb6 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * ICMPv6 tracking. * diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index eaea968f8657..79a335c0d8b8 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_IPV6_H #define _NF_CONNTRACK_IPV6_H diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index 7664efe37974..9d7e28736da9 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_DEFRAG_IPV6_H #define _NF_DEFRAG_IPV6_H diff --git a/include/net/netfilter/ipv6/nf_dup_ipv6.h b/include/net/netfilter/ipv6/nf_dup_ipv6.h index fa6237b382a3..caf0c2dd8ee7 100644 --- a/include/net/netfilter/ipv6/nf_dup_ipv6.h +++ b/include/net/netfilter/ipv6/nf_dup_ipv6.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_DUP_IPV6_H_ #define _NF_DUP_IPV6_H_ diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h index 0a13396cd390..1ed4f2631ed6 100644 --- a/include/net/netfilter/ipv6/nf_nat_masquerade.h +++ b/include/net/netfilter/ipv6/nf_nat_masquerade.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_NAT_MASQUERADE_IPV6_H_ #define _NF_NAT_MASQUERADE_IPV6_H_ diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h index 0ea4fa37db16..3a5a9a36a0b2 100644 --- a/include/net/netfilter/ipv6/nf_reject.h +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _IPV6_NF_REJECT_H #define _IPV6_NF_REJECT_H diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8f3bd30511de..792c3f6d30ce 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Connection state tracking for netfilter. This is separated from, * but required by, the (future) NAT layer; it can also be used by an iptables diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 81d7f8a30945..9b5e7634713e 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This header is used to share core functionality between the * standalone connection tracking module, and the compatibility layer's use diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 2a10c6570fcc..3f1ce9a8776e 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * connection tracking event cache. */ diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 818def011110..006e430d1cdf 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * connection tracking expectations. */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 4944bc9153cf..21f887c5058c 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_EXTEND_H #define _NF_CONNTRACK_EXTEND_H diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index c519bb5b5bb8..fc39bbaf107c 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * connection tracking helpers. * diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 6269deecbee7..d5808f3e2715 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C)2003,2004 USAGI/WIDE Project * diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 738a0307a96b..510192eb7e9d 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Header for use in defining a given L4 protocol for connection tracking. * diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 1723a67c0b0a..4eacce6f3bcc 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #include #include #include diff --git a/include/net/netfilter/nf_conntrack_seqadj.h b/include/net/netfilter/nf_conntrack_seqadj.h index 4b3362991a25..0a10b50537ae 100644 --- a/include/net/netfilter/nf_conntrack_seqadj.h +++ b/include/net/netfilter/nf_conntrack_seqadj.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_SEQADJ_H #define _NF_CONNTRACK_SEQADJ_H diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index a2fcb5271726..2c7559a54092 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_SYNPROXY_H #define _NF_CONNTRACK_SYNPROXY_H diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 483d104fa254..9468ab4ad12d 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_TIMEOUT_H #define _NF_CONNTRACK_TIMEOUT_H diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h index 300ae2209f25..3b661986be8f 100644 --- a/include/net/netfilter/nf_conntrack_timestamp.h +++ b/include/net/netfilter/nf_conntrack_timestamp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_TSTAMP_H #define _NF_CONNTRACK_TSTAMP_H diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index aea3f8221be0..bf0444e111a6 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Definitions and Declarations for tuple. * diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index 64a718b60839..52950baa3ab5 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_ZONES_H #define _NF_CONNTRACK_ZONES_H diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h index 3e919356bedf..2a6f6dcad3d9 100644 --- a/include/net/netfilter/nf_dup_netdev.h +++ b/include/net/netfilter/nf_dup_netdev.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_DUP_NETDEV_H_ #define _NF_DUP_NETDEV_H_ diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 42e0696f38d8..e811ac07ea94 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_LOG_H #define _NF_LOG_H diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index b71701302e61..207a467e7ca6 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_NAT_H #define _NF_NAT_H #include diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h index 186c54138f35..235bd0e9a5aa 100644 --- a/include/net/netfilter/nf_nat_core.h +++ b/include/net/netfilter/nf_nat_core.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_NAT_CORE_H #define _NF_NAT_CORE_H #include diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index fbfa5acf4f14..97d7033e93a4 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_NAT_HELPER_H #define _NF_NAT_HELPER_H /* NAT protocol helper routines. */ diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h index aef3e5fc9fd9..ce7c2b4e64bb 100644 --- a/include/net/netfilter/nf_nat_l3proto.h +++ b/include/net/netfilter/nf_nat_l3proto.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_NAT_L3PROTO_H #define _NF_NAT_L3PROTO_H diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h index 3923150f2a1e..67835ff8a2d9 100644 --- a/include/net/netfilter/nf_nat_l4proto.h +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Header for use in defining a given protocol. */ #ifndef _NF_NAT_L4PROTO_H #define _NF_NAT_L4PROTO_H diff --git a/include/net/netfilter/nf_nat_redirect.h b/include/net/netfilter/nf_nat_redirect.h index 73b729543309..5ddabb08c472 100644 --- a/include/net/netfilter/nf_nat_redirect.h +++ b/include/net/netfilter/nf_nat_redirect.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_NAT_REDIRECT_H_ #define _NF_NAT_REDIRECT_H_ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 39468720fc19..814058d0f167 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_QUEUE_H #define _NF_QUEUE_H diff --git a/include/net/netfilter/nf_socket.h b/include/net/netfilter/nf_socket.h index f2fc39c97d43..8230fefff9f5 100644 --- a/include/net/netfilter/nf_socket.h +++ b/include/net/netfilter/nf_socket.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_SOCK_H_ #define _NF_SOCK_H_ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0f5b12a4ad09..079c69cae2f6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_NF_TABLES_H #define _NET_NF_TABLES_H diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 424684c33771..ea5aab568be8 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_NF_TABLES_CORE_H #define _NET_NF_TABLES_CORE_H diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index 25e33aee91e7..f0896ba456c4 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_TABLES_IPV4_H_ #define _NF_TABLES_IPV4_H_ diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 97983d1c05e4..b8065b72f56e 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_TABLES_IPV6_H_ #define _NF_TABLES_IPV6_H_ diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h index 5ca3f14f0998..612cfb63ac68 100644 --- a/include/net/netfilter/nfnetlink_log.h +++ b/include/net/netfilter/nfnetlink_log.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _KER_NFNETLINK_LOG_H #define _KER_NFNETLINK_LOG_H diff --git a/include/net/netfilter/nft_dup.h b/include/net/netfilter/nft_dup.h index 6b84cf6491a2..4d9d512984b2 100644 --- a/include/net/netfilter/nft_dup.h +++ b/include/net/netfilter/nft_dup.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NFT_DUP_H_ #define _NFT_DUP_H_ diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 381af9469e6a..a88f92737308 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NFT_FIB_H_ #define _NFT_FIB_H_ diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h index a3f3c11b2526..e51ab3815797 100644 --- a/include/net/netfilter/nft_masq.h +++ b/include/net/netfilter/nft_masq.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NFT_MASQ_H_ #define _NFT_MASQ_H_ diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 1139cde0fdc5..5c69e9b09388 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NFT_META_H_ #define _NFT_META_H_ diff --git a/include/net/netfilter/nft_redir.h b/include/net/netfilter/nft_redir.h index a2d67546afab..4a970737c03c 100644 --- a/include/net/netfilter/nft_redir.h +++ b/include/net/netfilter/nft_redir.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NFT_REDIR_H_ #define _NFT_REDIR_H_ diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h index 02e28c529b29..de80c50761f0 100644 --- a/include/net/netfilter/nft_reject.h +++ b/include/net/netfilter/nft_reject.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NFT_REJECT_H_ #define _NFT_REJECT_H_ diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h index 130e58361f99..b1db13772554 100644 --- a/include/net/netfilter/xt_rateest.h +++ b/include/net/netfilter/xt_rateest.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _XT_RATEEST_H #define _XT_RATEEST_H diff --git a/include/net/netlink.h b/include/net/netlink.h index 14c289393071..0c154f98e987 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_NETLINK_H #define __NET_NETLINK_H diff --git a/include/net/netns/can.h b/include/net/netns/can.h index b106e6ae2e5b..ecf238b8862c 100644 --- a/include/net/netns/can.h +++ b/include/net/netns/can.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * can in net namespaces */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 17724c62de97..9795d628a127 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NETNS_CONNTRACK_H #define __NETNS_CONNTRACK_H diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 78eb1ff75475..0ad4d0c71228 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NETNS_CORE_H__ #define __NETNS_CORE_H__ diff --git a/include/net/netns/dccp.h b/include/net/netns/dccp.h index 98d2a7ce1f71..cdbc4f5b8390 100644 --- a/include/net/netns/dccp.h +++ b/include/net/netns/dccp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NETNS_DCCP_H__ #define __NETNS_DCCP_H__ diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index f15daaa89385..8a1ab47c3fb3 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * generic net pointers */ diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h index 69a6715d9f3f..24c78183a4c2 100644 --- a/include/net/netns/hash.h +++ b/include/net/netns/hash.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_NS_HASH_H__ #define __NET_NS_HASH_H__ diff --git a/include/net/netns/ieee802154_6lowpan.h b/include/net/netns/ieee802154_6lowpan.h index 8170f8d7052b..736aeac52f56 100644 --- a/include/net/netns/ieee802154_6lowpan.h +++ b/include/net/netns/ieee802154_6lowpan.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * ieee802154 6lowpan in net namespaces */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 20d061c805e3..8fcff2837484 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * ipv4 in net namespaces */ diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 2544f9760a42..dc825a5ddd7f 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * ipv6 in net namespaces */ diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index d542a4b28cca..830bdf345b17 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NETNS_MIB_H__ #define __NETNS_MIB_H__ diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index 6608b3693385..a7bdcfbb0b28 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * mpls in net namespaces */ diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 72d66c8763d0..cc00af2ac2d7 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NETNS_NETFILTER_H #define __NETNS_NETFILTER_H diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index c80781146019..4109b5f3010f 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NETNS_NFTABLES_H_ #define _NETNS_NFTABLES_H_ diff --git a/include/net/netns/packet.h b/include/net/netns/packet.h index 17ec2b95c062..aae69bb43cde 100644 --- a/include/net/netns/packet.h +++ b/include/net/netns/packet.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Packet network namespace */ diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index b7871d018354..ebc813277662 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NETNS_SCTP_H__ #define __NETNS_SCTP_H__ diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h index 284649d4dfb4..91a3d7e39198 100644 --- a/include/net/netns/unix.h +++ b/include/net/netns/unix.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Unix network namespace */ diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index c8a7681efa6a..9bc5a12fdbb0 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NETNS_X_TABLES_H #define __NETNS_X_TABLES_H diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 611521646dd4..9991e5ef52cc 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NETNS_XFRM_H #define __NETNS_XFRM_H diff --git a/include/net/netrom.h b/include/net/netrom.h index 443a4ffca7aa..0dad2dd5f9d7 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Declarations of NET/ROM type objects. * diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 3334dbfa5aa4..36bb794f5cd6 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_NEXTHOP_H #define __NET_NEXTHOP_H diff --git a/include/net/p8022.h b/include/net/p8022.h index 05e41383856b..c2bacc66bfbc 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_P8022_H #define _NET_P8022_H struct datalink_proto * diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e80edd8879ef..13b23f3ed69a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_PKT_CLS_H #define __NET_PKT_CLS_H diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 259bc191ba59..b3869f97d37d 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_PKT_SCHED_H #define __NET_PKT_SCHED_H diff --git a/include/net/pptp.h b/include/net/pptp.h index 92e9f1fe2628..383e25ca53a7 100644 --- a/include/net/pptp.h +++ b/include/net/pptp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_PPTP_H #define _NET_PPTP_H diff --git a/include/net/psample.h b/include/net/psample.h index 8888b0e1a82e..9b80f814ab04 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_PSAMPLE_H #define __NET_PSAMPLE_H diff --git a/include/net/psnap.h b/include/net/psnap.h index 78db4cc1306a..7cb0c8ab4171 100644 --- a/include/net/psnap.h +++ b/include/net/psnap.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_PSNAP_H #define _NET_PSNAP_H diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 4addc5c988e0..53d86b6055e8 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_RAWV6_H #define _NET_RAWV6_H diff --git a/include/net/red.h b/include/net/red.h index 208e718e16b9..9a9347710701 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_SCHED_RED_H #define __NET_SCHED_RED_H diff --git a/include/net/rose.h b/include/net/rose.h index 50811fe2c585..04b72681f2ab 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Declarations of Rose type objects. * diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 21837ca68ecc..7b938fbeebc1 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_RTNETLINK_H #define __NET_RTNETLINK_H diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 135f5a2dd931..1b33a6c8b477 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_SCHED_GENERIC_H #define __NET_SCHED_GENERIC_H diff --git a/include/net/scm.h b/include/net/scm.h index 142ea9e7a6d0..903771c8d4e3 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NET_SCM_H #define __LINUX_NET_SCM_H diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index 031bf16d1521..d7d2495f83c2 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_SECURE_SEQ #define _NET_SECURE_SEQ diff --git a/include/net/smc.h b/include/net/smc.h index 12d26358ad9f..8381d163fefa 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index aecd30308d50..0054b3a9b923 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _SOCK_REUSEPORT_H #define _SOCK_REUSEPORT_H diff --git a/include/net/stp.h b/include/net/stp.h index 3af174d70d9e..2914e6d53490 100644 --- a/include/net/stp.h +++ b/include/net/stp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_STP_H #define _NET_STP_H diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h index 59b515d32bb4..1f4cb477bb5d 100644 --- a/include/net/tc_act/tc_connmark.h +++ b/include/net/tc_act/tc_connmark.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_TC_CONNMARK_H #define __NET_TC_CONNMARK_H diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index 3248beaf16b0..781f3433a0be 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_TC_CSUM_H #define __NET_TC_CSUM_H diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h index d47f040a3bdf..d7ba0402a732 100644 --- a/include/net/tc_act/tc_defact.h +++ b/include/net/tc_act/tc_defact.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_TC_DEF_H #define __NET_TC_DEF_H diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 41afe1ce7b16..e82d93346b63 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_TC_GACT_H #define __NET_TC_GACT_H diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 30ba459ddd34..ba6667125bcd 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_TC_IFE_H #define __NET_TC_IFE_H diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h index 31309766e379..4225fcb1c6ba 100644 --- a/include/net/tc_act/tc_ipt.h +++ b/include/net/tc_act/tc_ipt.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_TC_IPT_H #define __NET_TC_IPT_H diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 604bc31e23ab..b2dbbfaefd22 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_TC_MIR_H #define __NET_TC_MIR_H diff --git a/include/net/tc_act/tc_nat.h b/include/net/tc_act/tc_nat.h index 56681a320612..c14407160812 100644 --- a/include/net/tc_act/tc_nat.h +++ b/include/net/tc_act/tc_nat.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_TC_NAT_H #define __NET_TC_NAT_H diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index a46c3f2ace70..227a6f1d02f4 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_TC_PED_H #define __NET_TC_PED_H diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h index 89e9305be880..524cee4f4c81 100644 --- a/include/net/tc_act/tc_sample.h +++ b/include/net/tc_act/tc_sample.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_TC_SAMPLE_H #define __NET_TC_SAMPLE_H diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 276f9760ab56..c4f5caaf3778 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TRANSP_V6_H #define _TRANSP_V6_H diff --git a/include/net/tso.h b/include/net/tso.h index 9a56c39e6d0a..7e166a570349 100644 --- a/include/net/tso.h +++ b/include/net/tso.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TSO_H #define _TSO_H diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 10cce0dd4450..b95a6927c718 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_UDP_TUNNEL_H #define __NET_UDP_TUNNEL_H diff --git a/include/net/udplite.h b/include/net/udplite.h index b7a18f63d86d..81bdbf97319b 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Definitions for the UDP-Lite (RFC 3828) code. */ diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 4e3876dde295..13223396dc64 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_VXLAN_H #define __NET_VXLAN_H 1 diff --git a/include/net/wext.h b/include/net/wext.h index 454ff763eeba..e51f067fdb3a 100644 --- a/include/net/wext.h +++ b/include/net/wext.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_WEXT_H #define __NET_WEXT_H diff --git a/include/net/x25.h b/include/net/x25.h index 2609b57bd459..ed1acc3044ac 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Declarations of X.25 Packet Layer type objects. * diff --git a/include/net/x25device.h b/include/net/x25device.h index 1fa08b49f1c2..cf749efca24d 100644 --- a/include/net/x25device.h +++ b/include/net/x25device.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _X25DEVICE_H #define _X25DEVICE_H diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f002a2c5e33c..e015e164bac0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_XFRM_H #define _NET_XFRM_H -- cgit v1.2.3 From 47d3d7ac656a1ffb9d0f0d3c845663ed6fd7e78d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 30 Oct 2017 14:16:00 -0700 Subject: ipv6: Implement limits on Hop-by-Hop and Destination options RFC 8200 (IPv6) defines Hop-by-Hop options and Destination options extension headers. Both of these carry a list of TLVs which is only limited by the maximum length of the extension header (2048 bytes). By the spec a host must process all the TLVs in these options, however these could be used as a fairly obvious denial of service attack. I think this could in fact be a significant DOS vector on the Internet, one mitigating factor might be that many FWs drop all packets with EH (and obviously this is only IPv6) so an Internet wide attack might not be so effective (yet!). By my calculation, the worse case packet with TLVs in a standard 1500 byte MTU packet that would be processed by the stack contains 1282 invidual TLVs (including pad TLVS) or 724 two byte TLVs. I wrote a quick test program that floods a whole bunch of these packets to a host and sure enough there is substantial time spent in ip6_parse_tlv. These packets contain nothing but unknown TLVS (that are ignored), TLV padding, and bogus UDP header with zero payload length. 25.38% [kernel] [k] __fib6_clean_all 21.63% [kernel] [k] ip6_parse_tlv 4.21% [kernel] [k] __local_bh_enable_ip 2.18% [kernel] [k] ip6_pol_route.isra.39 1.98% [kernel] [k] fib6_walk_continue 1.88% [kernel] [k] _raw_write_lock_bh 1.65% [kernel] [k] dst_release This patch adds configurable limits to Destination and Hop-by-Hop options. There are three limits that may be set: - Limit the number of options in a Hop-by-Hop or Destination options extension header. - Limit the byte length of a Hop-by-Hop or Destination options extension header. - Disallow unrecognized options in a Hop-by-Hop or Destination options extension header. The limits are set in corresponding sysctls: ipv6.sysctl.max_dst_opts_cnt ipv6.sysctl.max_hbh_opts_cnt ipv6.sysctl.max_dst_opts_len ipv6.sysctl.max_hbh_opts_len If a max_*_opts_cnt is less than zero then unknown TLVs are disallowed. The number of known TLVs that are allowed is the absolute value of this number. If a limit is exceeded when processing an extension header the packet is dropped. Default values are set to 8 for options counts, and set to INT_MAX for maximum length. Note the choice to limit options to 8 is an arbitrary guess (roughly based on the fact that the stack supports three HBH options and just one destination option). These limits have being proposed in draft-ietf-6man-rfc6434-bis. Tested (by Martin Lau) I tested out 1 thread (i.e. one raw_udp process). I changed the net.ipv6.max_dst_(opts|hbh)_number between 8 to 2048. With sysctls setting to 2048, the softirq% is packed to 100%. With 8, the softirq% is almost unnoticable from mpstat. v2; - Code and documention cleanup. - Change references of RFC2460 to be RFC8200. - Add reference to RFC6434-bis where the limits will be in standard. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ipv6.h | 40 ++++++++++++++++++++++++++++++++++++++++ include/net/netns/ipv6.h | 4 ++++ 2 files changed, 44 insertions(+) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3cda3b521c36..fb6d67012de6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -51,6 +51,46 @@ #define IPV6_DEFAULT_HOPLIMIT 64 #define IPV6_DEFAULT_MCASTHOPS 1 +/* Limits on Hop-by-Hop and Destination options. + * + * Per RFC8200 there is no limit on the maximum number or lengths of options in + * Hop-by-Hop or Destination options other then the packet must fit in an MTU. + * We allow configurable limits in order to mitigate potential denial of + * service attacks. + * + * There are three limits that may be set: + * - Limit the number of options in a Hop-by-Hop or Destination options + * extension header + * - Limit the byte length of a Hop-by-Hop or Destination options extension + * header + * - Disallow unknown options + * + * The limits are expressed in corresponding sysctls: + * + * ipv6.sysctl.max_dst_opts_cnt + * ipv6.sysctl.max_hbh_opts_cnt + * ipv6.sysctl.max_dst_opts_len + * ipv6.sysctl.max_hbh_opts_len + * + * max_*_opts_cnt is the number of TLVs that are allowed for Destination + * options or Hop-by-Hop options. If the number is less than zero then unknown + * TLVs are disallowed and the number of known options that are allowed is the + * absolute value. Setting the value to INT_MAX indicates no limit. + * + * max_*_opts_len is the length limit in bytes of a Destination or + * Hop-by-Hop options extension header. Setting the value to INT_MAX + * indicates no length limit. + * + * If a limit is exceeded when processing an extension header the packet is + * silently discarded. + */ + +/* Default limits for Hop-by-Hop and Destination options */ +#define IP6_DEFAULT_MAX_DST_OPTS_CNT 8 +#define IP6_DEFAULT_MAX_HBH_OPTS_CNT 8 +#define IP6_DEFAULT_MAX_DST_OPTS_LEN INT_MAX /* No limit */ +#define IP6_DEFAULT_MAX_HBH_OPTS_LEN INT_MAX /* No limit */ + /* * Addr type * diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 2ea1ed341ef8..600ba1c1befc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -37,6 +37,10 @@ struct netns_sysctl_ipv6 { int idgen_delay; int flowlabel_state_ranges; int flowlabel_reflect; + int max_dst_opts_cnt; + int max_hbh_opts_cnt; + int max_dst_opts_len; + int max_hbh_opts_len; }; struct netns_ipv6 { -- cgit v1.2.3 From a159d3c4b8291998c018f0dbddd4678315264a1e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 1 Nov 2017 10:23:49 -0700 Subject: net_sched: acquire RTNL in tc_action_net_exit() I forgot to acquire RTNL in tc_action_net_exit() which leads that action ops->cleanup() is not always called with RTNL. This usually is not a big deal because this function is called after all netns refcnt are gone, but given RTNL protects more than just actions, add it for safety and consistency. Also add an assertion to catch other potential bugs. Fixes: ddf97ccdd7cb ("net_sched: add network namespace support for tc actions") Reported-by: Lucas Bates Tested-by: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index b944e0eb93be..5072446d5f06 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -122,7 +122,9 @@ void tcf_idrinfo_destroy(const struct tc_action_ops *ops, static inline void tc_action_net_exit(struct tc_action_net *tn) { + rtnl_lock(); tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + rtnl_unlock(); kfree(tn->idrinfo); } -- cgit v1.2.3 From ceffcc5e254b450e6159f173e4538215cebf1b59 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 1 Nov 2017 10:23:50 -0700 Subject: net_sched: hold netns refcnt for each action TC actions have been destroyed asynchronously for a long time, previously in a RCU callback and now in a workqueue. If we don't hold a refcnt for its netns, we could use the per netns data structure, struct tcf_idrinfo, after it has been freed by netns workqueue. Hold refcnt to ensure netns destroy happens after all actions are gone. Fixes: ddf97ccdd7cb ("net_sched: add network namespace support for tc actions") Reported-by: Lucas Bates Tested-by: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 5072446d5f06..c68551255032 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -13,6 +13,7 @@ struct tcf_idrinfo { spinlock_t lock; struct idr action_idr; + struct net *net; }; struct tc_action_ops; @@ -104,7 +105,7 @@ struct tc_action_net { static inline int tc_action_net_init(struct tc_action_net *tn, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, struct net *net) { int err = 0; @@ -112,6 +113,7 @@ int tc_action_net_init(struct tc_action_net *tn, if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; + tn->idrinfo->net = net; spin_lock_init(&tn->idrinfo->lock); idr_init(&tn->idrinfo->action_idr); return err; -- cgit v1.2.3 From 3ae6ec08292f01c6782d1a80be0b2cc675e0ecfc Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 2 Nov 2017 17:14:05 +0100 Subject: ipv4: Send a netevent whenever multipath hash policy is changed Devices performing IPv4 forwarding need to update their multipath hash policy whenever it is changed. Inform these devices by generating a netevent. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/netevent.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netevent.h b/include/net/netevent.h index f440df172b56..e3f0e8f2f6e8 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -25,6 +25,7 @@ enum netevent_notif_type { NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ + NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */ }; int register_netevent_notifier(struct notifier_block *nb); -- cgit v1.2.3 From c7eb7d7230509ec862d4144f7a831f995bc5d028 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 3 Nov 2017 11:46:24 +0100 Subject: net: sched: introduce chain_head_change callback Add a callback that is to be called whenever head of the chain changes. Also provide a callback for the default case when the caller gets a block using non-extended getter. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 14 ++++++-------- include/net/sch_generic.h | 5 ++++- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index d15c40c7bde7..505d4b71975f 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -26,6 +26,8 @@ enum tcf_block_binder_type { struct tcf_block_ext_info { enum tcf_block_binder_type binder_type; + tcf_chain_head_change_t *chain_head_change; + void *chain_head_change_priv; }; struct tcf_block_cb; @@ -37,12 +39,10 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, void tcf_chain_put(struct tcf_chain *chain); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q); -int tcf_block_get_ext(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei); void tcf_block_put(struct tcf_block *block); -void tcf_block_put_ext(struct tcf_block *block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei); static inline struct Qdisc *tcf_block_q(struct tcf_block *block) @@ -82,8 +82,7 @@ int tcf_block_get(struct tcf_block **p_block, } static inline -int tcf_block_get_ext(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei) { return 0; @@ -94,8 +93,7 @@ static inline void tcf_block_put(struct tcf_block *block) } static inline -void tcf_block_put_ext(struct tcf_block *block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c23e938f5b19..f230269e0bfb 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -260,9 +260,12 @@ struct qdisc_skb_cb { unsigned char data[QDISC_CB_PRIV_LEN]; }; +typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); + struct tcf_chain { struct tcf_proto __rcu *filter_chain; - struct tcf_proto __rcu **p_filter_chain; + tcf_chain_head_change_t *chain_head_change; + void *chain_head_change_priv; struct list_head list; struct tcf_block *block; u32 index; /* chain index */ -- cgit v1.2.3 From 46209401f8f6116bd0b2c2d14a63958e83ffca0b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 3 Nov 2017 11:46:25 +0100 Subject: net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath In sch_handle_egress and sch_handle_ingress tp->q is used only in order to update stats. So stats and filter list are the only things that are needed in clsact qdisc fastpath processing. Introduce new mini_Qdisc struct to hold those items. Also, introduce a helper to swap the mini_Qdisc structures in case filter list head changes. This removes need for tp->q usage without added overhead. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f230269e0bfb..c64e62c9450a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -904,4 +904,36 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } +/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc. + * The fast path only needs to access filter list and to update stats + */ +struct mini_Qdisc { + struct tcf_proto *filter_list; + struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_queue __percpu *cpu_qstats; + struct rcu_head rcu; +}; + +static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq, + const struct sk_buff *skb) +{ + bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb); +} + +static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq) +{ + this_cpu_inc(miniq->cpu_qstats->drops); +} + +struct mini_Qdisc_pair { + struct mini_Qdisc miniq1; + struct mini_Qdisc miniq2; + struct mini_Qdisc __rcu **p_miniq; +}; + +void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, + struct tcf_proto *tp_head); +void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, + struct mini_Qdisc __rcu **p_miniq); + #endif -- cgit v1.2.3 From 27c565ae9d554fa1c00c799754cff43476c8d3b5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Nov 2017 08:53:27 -0700 Subject: ipv6: remove IN6_ADDR_HSIZE from addrconf.h IN6_ADDR_HSIZE is private to addrconf.c, move it here to avoid confusion. Signed-off-by: Eric Dumazet Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/addrconf.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 3357332ea375..b623b65a79d1 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -59,9 +59,6 @@ struct in6_validator_info { struct netlink_ext_ack *extack; }; -#define IN6_ADDR_HSIZE_SHIFT 8 -#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) - int addrconf_init(void); void addrconf_cleanup(void); -- cgit v1.2.3 From 99feaafcdb566e8f032e7acc2a303713ad6bf196 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:20 -0400 Subject: net: dsa: make switch index unsigned Define the DSA switch index as an unsigned int, because it will never be less than 0. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 50e276dc4c01..fa1c21ab8092 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -209,7 +209,7 @@ struct dsa_switch { * Parent switch tree, and switch index. */ struct dsa_switch_tree *dst; - int index; + unsigned int index; /* Listener for switch fabric events */ struct notifier_block nb; -- cgit v1.2.3 From 49463b7f2da1a115404b02c5533bc2c2125833a3 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:21 -0400 Subject: net: dsa: make tree index unsigned Similarly to a DSA switch and port, rename the tree index from "tree" to "index" and make it an unsigned int because it isn't supposed to be less than 0. u32 is an OF specific data used to retrieve the value and has no need to be propagated up to the tree index. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index fa1c21ab8092..e54332968417 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -116,7 +116,7 @@ struct dsa_switch_tree { struct raw_notifier_head nh; /* Tree identifier */ - u32 tree; + unsigned int index; /* Number of switches attached to this tree */ struct kref refcount; -- cgit v1.2.3 From 1f2556916d974cfb62b6af51660186b5f58bd869 Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Fri, 3 Nov 2017 16:38:48 -0700 Subject: tcp: higher throughput under reordering with adaptive RACK reordering wnd Currently TCP RACK loss detection does not work well if packets are being reordered beyond its static reordering window (min_rtt/4).Under such reordering it may falsely trigger loss recoveries and reduce TCP throughput significantly. This patch improves that by increasing and reducing the reordering window based on DSACK, which is now supported in major TCP implementations. It makes RACK's reo_wnd adaptive based on DSACK and no. of recoveries. - If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded by srtt), since there is possibility that spurious retransmission was due to reordering delay longer than reo_wnd. - Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16) no. of successful recoveries (accounts for full DSACK-based loss recovery undo). After that, reset it to default (min_rtt/4). - At max, reo_wnd is incremented only once per rtt. So that the new DSACK on which we are reacting, is due to the spurious retx (approx) after the reo_wnd has been updated last time. - reo_wnd is tracked in terms of steps (of min_rtt/4), rather than absolute value to account for change in rtt. In our internal testing, we observed significant increase in throughput, in scenarios where reordering exceeds min_rtt/4 (previous static value). Signed-off-by: Priyaranjan Jha Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index c2bf2a822b10..babfd4da1515 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -246,6 +246,7 @@ extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ +#define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */ extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; @@ -1901,6 +1902,7 @@ extern void tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); +extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs); /* At how many usecs into the future should the RTO fire? */ static inline s64 tcp_rto_delta_us(const struct sock *sk) -- cgit v1.2.3 From 5caaed151a68ae36aca2981cc245f5960a0a7603 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Nov 2017 19:41:09 +0100 Subject: netfilter: conntrack: don't cache nlattr_tuple_size result in nla_size We currently call ->nlattr_tuple_size() once at register time and cache result in l4proto->nla_size. nla_size is the only member that is written to, avoiding this would allow to make l4proto trackers const. We can use ->nlattr_tuple_size() at run time, and cache result in the individual trackers instead. This is an intermediate step, next patch removes nlattr_size() callback and computes size at compile time, then removes nla_size. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index e06518874144..46e786ffcf2f 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -74,7 +74,7 @@ struct nf_conntrack_l4proto { int (*tuple_to_nlattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); /* Calculate tuple nlattr size */ - int (*nlattr_tuple_size)(void); + unsigned int (*nlattr_tuple_size)(void); int (*nlattr_to_tuple)(struct nlattr *tb[], struct nf_conntrack_tuple *t); const struct nla_policy *nla_policy; @@ -144,7 +144,7 @@ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple); int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t); -int nf_ct_port_nlattr_tuple_size(void); +unsigned int nf_ct_port_nlattr_tuple_size(void); extern const struct nla_policy nf_ct_port_nla_policy[]; #ifdef CONFIG_SYSCTL -- cgit v1.2.3 From ba0e4d9917b43dfa746cbbcb4477da59aae73bd6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 9 Oct 2017 19:52:28 +0200 Subject: netfilter: nf_tables: get set elements via netlink This patch adds a new get operation to look up for specific elements in a set via netlink interface. You can also use it to check if an interval already exists. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0f5b12a4ad09..d011e56cc7a9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -311,6 +311,7 @@ struct nft_expr; * @flush: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts + * @get: get set elements * @privsize: function to return size of set private data * @init: initialize private data of new set instance * @destroy: destroy private data of set instance @@ -350,6 +351,10 @@ struct nft_set_ops { void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter); + void * (*get)(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem, + unsigned int flags); unsigned int (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); -- cgit v1.2.3 From 602f3baf22188aad24b9a58be3209ab774b97d74 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 6 Nov 2017 07:23:41 +0100 Subject: net_sch: red: Add offload ability to RED qdisc Add the ability to offload RED qdisc by using ndo_setup_tc. There are four commands for RED offloading: * TC_RED_SET: handles set and change. * TC_RED_DESTROY: handle qdisc destroy. * TC_RED_STATS: update the qdiscs counters (given as reference) * TC_RED_XSTAT: returns red xstats. Whether RED is being offloaded is being determined every time dump action is being called because parent change of this qdisc could change its offload state but doesn't require any RED function to be called. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 98fef3221227..03c208d3c922 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -703,4 +703,34 @@ struct tc_cookie { u8 *data; u32 len; }; + +enum tc_red_command { + TC_RED_REPLACE, + TC_RED_DESTROY, + TC_RED_STATS, + TC_RED_XSTATS, +}; + +struct tc_red_qopt_offload_params { + u32 min; + u32 max; + u32 probability; + bool is_ecn; +}; +struct tc_red_qopt_offload_stats { + struct gnet_stats_basic_packed *bstats; + struct gnet_stats_queue *qstats; +}; + +struct tc_red_qopt_offload { + enum tc_red_command command; + u32 handle; + u32 parent; + union { + struct tc_red_qopt_offload_params set; + struct tc_red_qopt_offload_stats stats; + struct red_stats *xstats; + }; +}; + #endif -- cgit v1.2.3 From b2d0f5d5dc53532e6f07bc546a476a55ebdfe0f3 Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Tue, 7 Nov 2017 21:07:02 +0800 Subject: openvswitch: enable NSH support v16->17 - Fixed disputed check code: keep them in nsh_push and nsh_pop but also add them in __ovs_nla_copy_actions v15->v16 - Add csum recalculation for nsh_push, nsh_pop and set_nsh pointed out by Pravin - Move nsh key into the union with ipv4 and ipv6 and add check for nsh key in match_validate pointed out by Pravin - Add nsh check in validate_set and __ovs_nla_copy_actions v14->v15 - Check size in nsh_hdr_from_nlattr - Fixed four small issues pointed out By Jiri and Eric v13->v14 - Rename skb_push_nsh to nsh_push per Dave's comment - Rename skb_pop_nsh to nsh_pop per Dave's comment v12->v13 - Fix NSH header length check in set_nsh v11->v12 - Fix missing changes old comments pointed out - Fix new comments for v11 v10->v11 - Fix the left three disputable comments for v9 but not fixed in v10. v9->v10 - Change struct ovs_key_nsh to struct ovs_nsh_key_base base; __be32 context[NSH_MD1_CONTEXT_SIZE]; - Fix new comments for v9 v8->v9 - Fix build error reported by daily intel build because nsh module isn't selected by openvswitch v7->v8 - Rework nested value and mask for OVS_KEY_ATTR_NSH - Change pop_nsh to adapt to nsh kernel module - Fix many issues per comments from Jiri Benc v6->v7 - Remove NSH GSO patches in v6 because Jiri Benc reworked it as another patch series and they have been merged. - Change it to adapt to nsh kernel module added by NSH GSO patch series v5->v6 - Fix the rest comments for v4. - Add NSH GSO support for VxLAN-gpe + NSH and Eth + NSH. v4->v5 - Fix many comments by Jiri Benc and Eric Garver for v4. v3->v4 - Add new NSH match field ttl - Update NSH header to the latest format which will be final format and won't change per its author's confirmation. - Fix comments for v3. v2->v3 - Change OVS_KEY_ATTR_NSH to nested key to handle length-fixed attributes and length-variable attriubte more flexibly. - Remove struct ovs_action_push_nsh completely - Add code to handle nested attribute for SET_MASKED - Change PUSH_NSH to use the nested OVS_KEY_ATTR_NSH to transfer NSH header data. - Fix comments and coding style issues by Jiri and Eric v1->v2 - Change encap_nsh and decap_nsh to push_nsh and pop_nsh - Dynamically allocate struct ovs_action_push_nsh for length-variable metadata. OVS master and 2.8 branch has merged NSH userspace patch series, this patch is to enable NSH support in kernel data path in order that OVS can support NSH in compat mode by porting this. Signed-off-by: Yi Yang Acked-by: Jiri Benc Acked-by: Eric Garver Acked-by: Pravin Shelar Signed-off-by: David S. Miller --- include/net/nsh.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/nsh.h b/include/net/nsh.h index a1eaea20be96..350b1ad11c7f 100644 --- a/include/net/nsh.h +++ b/include/net/nsh.h @@ -304,4 +304,7 @@ static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags, NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK); } +int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh); +int nsh_pop(struct sk_buff *skb); + #endif /* __NET_NSH_H */ -- cgit v1.2.3 From 24a9332a58b7f41a0d36c35a2c6897242bffdbc0 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:43 -0500 Subject: net: dsa: constify cpu_dp member of dsa_port A DSA port has a dedicated CPU port assigned to it, stored in the cpu_dp member. It is not meant to be modified by a port, thus make it const. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index e54332968417..2a8613b5a23d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -190,7 +190,7 @@ struct dsa_port { struct dsa_switch *ds; unsigned int index; const char *name; - struct dsa_port *cpu_dp; + const struct dsa_port *cpu_dp; struct device_node *dn; unsigned int ageing_time; u8 stp_state; -- cgit v1.2.3 From ec15dd4269d0cbf947c9a2dfdcf08a917098fab1 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:46 -0500 Subject: net: dsa: setup and teardown tree This commit provides better scope for the DSA tree setup and teardown functions. It renames the "applied" bool to "setup" and print a message when the tree is setup, as it is done during teardown. At the same time, check dst->setup in dsa_tree_setup, where it is set to true. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 2a8613b5a23d..6c239257309b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -122,7 +122,7 @@ struct dsa_switch_tree { struct kref refcount; /* Has this tree been applied to the hardware? */ - bool applied; + bool setup; /* * Configuration data for the platform device that owns -- cgit v1.2.3 From c7e460ce55724d4e4e22d3126e5c47273819c53a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:18 -0800 Subject: Revert "net_sched: hold netns refcnt for each action" This reverts commit ceffcc5e254b450e6159f173e4538215cebf1b59. If we hold that refcnt, the netns can never be destroyed until all actions are destroyed by user, this breaks our netns design which we expect all actions are destroyed when we destroy the whole netns. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 1e6df0eb058f..a10a3b1813f3 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -14,7 +14,6 @@ struct tcf_idrinfo { spinlock_t lock; struct idr action_idr; - struct net *net; }; struct tc_action_ops; @@ -106,7 +105,7 @@ struct tc_action_net { static inline int tc_action_net_init(struct tc_action_net *tn, - const struct tc_action_ops *ops, struct net *net) + const struct tc_action_ops *ops) { int err = 0; @@ -114,7 +113,6 @@ int tc_action_net_init(struct tc_action_net *tn, if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; - tn->idrinfo->net = net; spin_lock_init(&tn->idrinfo->lock); idr_init(&tn->idrinfo->action_idr); return err; -- cgit v1.2.3 From e4b95c41df36befcfd117210900cd790bc2cd048 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:19 -0800 Subject: net_sched: introduce tcf_exts_get_net() and tcf_exts_put_net() Instead of holding netns refcnt in tc actions, we can minimize the holding time by saving it in struct tcf_exts instead. This means we can just hold netns refcnt right before call_rcu() and release it after tcf_exts_destroy() is done. However, because on netns cleanup path we call tcf_proto_destroy() too, obviously we can not hold netns for a zero refcnt, in this case we have to do cleanup synchronously. It is fine for RCU too, the caller cleanup_net() already waits for a grace period. For other cases, refcnt is non-zero and we can safely grab it as normal and release it after we are done. This patch provides two new API for each filter to use: tcf_exts_get_net() and tcf_exts_put_net(). And all filters now can use the following pattern: void __destroy_filter() { tcf_exts_destroy(); tcf_exts_put_net(); // <== release netns refcnt kfree(); } void some_work() { rtnl_lock(); __destroy_filter(); rtnl_unlock(); } void some_rcu_callback() { tcf_queue_work(some_work); } if (tcf_exts_get_net()) // <== hold netns refcnt call_rcu(some_rcu_callback); else __destroy_filter(); Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 70ca2437740e..8826747ef83e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -94,6 +94,7 @@ struct tcf_exts { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ int nr_actions; struct tc_action **actions; + struct net *net; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -107,6 +108,7 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; + exts->net = NULL; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); if (!exts->actions) @@ -117,6 +119,28 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) return 0; } +/* Return false if the netns is being destroyed in cleanup_net(). Callers + * need to do cleanup synchronously in this case, otherwise may race with + * tc_action_net_exit(). Return true for other cases. + */ +static inline bool tcf_exts_get_net(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + exts->net = maybe_get_net(exts->net); + return exts->net != NULL; +#else + return true; +#endif +} + +static inline void tcf_exts_put_net(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + if (exts->net) + put_net(exts->net); +#endif +} + static inline void tcf_exts_to_list(const struct tcf_exts *exts, struct list_head *actions) { -- cgit v1.2.3 From 47d5b6db2afa766d7af85db684d0b5f092e4fc46 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 23:10:59 +0100 Subject: net: bridge: Add/del switchdev object on host join/leave When the host joins or leaves a multicast group, use switchdev to add an object to the hardware to forward traffic for the group to the host. Signed-off-by: Andrew Lunn Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/switchdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d756fbe46625..39bc855d7fee 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -76,6 +76,7 @@ enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, SWITCHDEV_OBJ_ID_PORT_MDB, + SWITCHDEV_OBJ_ID_HOST_MDB, }; struct switchdev_obj { -- cgit v1.2.3 From a3dcaf17ee54f1d01d22cc2b22cab0b4f60d78cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Nov 2017 00:29:27 -0800 Subject: net: allow per netns sysctl_rmem and sysctl_wmem for protos As we want to gradually implement per netns sysctl_rmem and sysctl_wmem on per protocol basis, add two new fields in struct proto, and two new helpers : sk_get_wmem0() and sk_get_rmem0() First user will be TCP. Then UDP and SCTP can be easily converted, while DECNET probably wont get this support. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 6f1be9726e02..688a823dccc3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1101,8 +1101,12 @@ struct proto { */ unsigned long *memory_pressure; long *sysctl_mem; + int *sysctl_wmem; int *sysctl_rmem; + u32 sysctl_wmem_offset; + u32 sysctl_rmem_offset; + int max_header; bool no_autobind; @@ -2390,4 +2394,22 @@ extern int sysctl_optmem_max; extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; +static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) +{ + /* Does this proto have per netns sysctl_wmem ? */ + if (proto->sysctl_wmem_offset) + return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); + + return *proto->sysctl_wmem; +} + +static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) +{ + /* Does this proto have per netns sysctl_rmem ? */ + if (proto->sysctl_rmem_offset) + return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); + + return *proto->sysctl_rmem; +} + #endif /* _SOCK_H */ -- cgit v1.2.3 From 356d1833b638bd465672aefeb71def3ab93fc17d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Nov 2017 00:29:28 -0800 Subject: tcp: Namespace-ify sysctl_tcp_rmem and sysctl_tcp_wmem Note that when a new netns is created, it inherits its sysctl_tcp_rmem and sysctl_tcp_wmem from initial netns. This change is needed so that we can refine TCP rcvbuf autotuning, to take RTT into consideration. Signed-off-by: Eric Dumazet Cc: Wei Wang Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ include/net/tcp.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 379550f8124a..5e12975fc658 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -155,6 +155,8 @@ struct netns_ipv4 { int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; + int sysctl_tcp_wmem[3]; + int sysctl_tcp_rmem[3]; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index babfd4da1515..2f2c69ad31b2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -242,8 +242,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; -extern int sysctl_tcp_wmem[3]; -extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ #define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */ -- cgit v1.2.3 From 4c5b9d9642c859f7369338fc42c0f62f4151bef3 Mon Sep 17 00:00:00 2001 From: Manish Kurup Date: Tue, 7 Nov 2017 15:49:05 -0500 Subject: act_vlan: VLAN action rewrite to use RCU lock/unlock and update Using a spinlock in the VLAN action causes performance issues when the VLAN action is used on multiple cores. Rewrote the VLAN action to use RCU read locking for reads and updates instead. All functions now use an RCU dereferenced pointer to access the VLAN action context. Modified helper functions used by other modules, to use the RCU as opposed to directly accessing the structure. Acked-by: Jamal Hadi Salim Acked-by: Jiri Pirko Signed-off-by: Manish Kurup Signed-off-by: David S. Miller --- include/net/tc_act/tc_vlan.h | 46 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index c2090df944ff..22ae260d6869 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -13,12 +13,17 @@ #include #include +struct tcf_vlan_params { + int tcfv_action; + u16 tcfv_push_vid; + __be16 tcfv_push_proto; + u8 tcfv_push_prio; + struct rcu_head rcu; +}; + struct tcf_vlan { struct tc_action common; - int tcfv_action; - u16 tcfv_push_vid; - __be16 tcfv_push_proto; - u8 tcfv_push_prio; + struct tcf_vlan_params __rcu *vlan_p; }; #define to_vlan(a) ((struct tcf_vlan *)a) @@ -33,22 +38,45 @@ static inline bool is_tcf_vlan(const struct tc_action *a) static inline u32 tcf_vlan_action(const struct tc_action *a) { - return to_vlan(a)->tcfv_action; + u32 tcfv_action; + + rcu_read_lock(); + tcfv_action = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_action; + rcu_read_unlock(); + + return tcfv_action; } static inline u16 tcf_vlan_push_vid(const struct tc_action *a) { - return to_vlan(a)->tcfv_push_vid; + u16 tcfv_push_vid; + + rcu_read_lock(); + tcfv_push_vid = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_vid; + rcu_read_unlock(); + + return tcfv_push_vid; } static inline __be16 tcf_vlan_push_proto(const struct tc_action *a) { - return to_vlan(a)->tcfv_push_proto; + __be16 tcfv_push_proto; + + rcu_read_lock(); + tcfv_push_proto = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_proto; + rcu_read_unlock(); + + return tcfv_push_proto; } static inline u8 tcf_vlan_push_prio(const struct tc_action *a) { - return to_vlan(a)->tcfv_push_prio; -} + u8 tcfv_push_prio; + rcu_read_lock(); + tcfv_push_prio = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_prio; + rcu_read_unlock(); + + return tcfv_push_prio; +} #endif /* __NET_TC_VLAN_H */ -- cgit v1.2.3 From 8d6e79d3ce13e34957de87f7584cbf1bcde74c57 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Wed, 8 Nov 2017 09:59:26 +0100 Subject: tipc: improve link resiliency when rps is activated Currently, the TIPC RPS dissector is based only on the incoming packets' source node address, hence steering all traffic from a node to the same core. We have seen that this makes the links vulnerable to starvation and unnecessary resets when we turn down the link tolerance to very low values. To reduce the risk of this happening, we exempt probe and probe replies packets from the convergence to one core per source node. Instead, we do the opposite, - we try to diverge those packets across as many cores as possible, by randomizing the flow selector key. To make such packets identifiable to the dissector, we add a new 'is_keepalive' bit to word 0 of the LINK_PROTOCOL header. This bit is set both for PROBE and PROBE_REPLY messages, and only for those. It should be noted that these packets are not part of any flow anyway, and only constitute a minuscule fraction of all packets sent across a link. Hence, there is no risk that this will affect overall performance. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 12 ++++----- include/net/tipc.h | 62 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 6 deletions(-) create mode 100644 include/net/tipc.h (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 22aba321282d..9a074776f70b 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -84,11 +84,11 @@ struct flow_dissector_key_ipv6_addrs { }; /** - * struct flow_dissector_key_tipc_addrs: - * @srcnode: source node address + * struct flow_dissector_key_tipc: + * @key: source node address combined with selector */ -struct flow_dissector_key_tipc_addrs { - __be32 srcnode; +struct flow_dissector_key_tipc { + __be32 key; }; /** @@ -100,7 +100,7 @@ struct flow_dissector_key_addrs { union { struct flow_dissector_key_ipv4_addrs v4addrs; struct flow_dissector_key_ipv6_addrs v6addrs; - struct flow_dissector_key_tipc_addrs tipcaddrs; + struct flow_dissector_key_tipc tipckey; }; }; @@ -192,7 +192,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ - FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ + FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */ FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ diff --git a/include/net/tipc.h b/include/net/tipc.h new file mode 100644 index 000000000000..07670ec022a7 --- /dev/null +++ b/include/net/tipc.h @@ -0,0 +1,62 @@ +/* + * include/net/tipc.h: Include file for TIPC message header routines + * + * Copyright (c) 2017 Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_HDR_H +#define _TIPC_HDR_H + +#include + +#define KEEPALIVE_MSG_MASK 0x0e080000 /* LINK_PROTOCOL + MSG_IS_KEEPALIVE */ + +struct tipc_basic_hdr { + __be32 w[4]; +}; + +static inline u32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr) +{ + u32 w0 = ntohl(hdr->w[0]); + bool keepalive_msg = (w0 & KEEPALIVE_MSG_MASK) == KEEPALIVE_MSG_MASK; + int key; + + /* Return source node identity as key */ + if (likely(!keepalive_msg)) + return hdr->w[3]; + + /* Spread PROBE/PROBE_REPLY messages across the cores */ + get_random_bytes(&key, sizeof(key)); + return key; +} + +#endif -- cgit v1.2.3 From 713bafea92920103cd3d361657406cf04d0e22dd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 8 Nov 2017 13:01:26 -0800 Subject: tcp: retire FACK loss detection FACK loss detection has been disabled by default and the successor RACK subsumed FACK and can handle reordering better. This patch removes FACK to simplify TCP loss recovery. Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Soheil Hassas Yeganeh Reviewed-by: Priyaranjan Jha Signed-off-by: David S. Miller --- include/net/tcp.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 2f2c69ad31b2..ed71511e67a6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -384,7 +384,6 @@ void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); -void tcp_disable_fack(struct tcp_sock *tp); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op); @@ -776,7 +775,7 @@ struct tcp_skb_cb { }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ - __u8 sacked; /* State flags for SACK/FACK. */ + __u8 sacked; /* State flags for SACK. */ #define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */ #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ #define TCPCB_LOST 0x04 /* SKB is lost */ @@ -1066,7 +1065,6 @@ void tcp_rate_check_app_limited(struct sock *sk); * * tcp_is_sack - SACK enabled * tcp_is_reno - No SACK - * tcp_is_fack - FACK enabled, implies SACK enabled */ static inline int tcp_is_sack(const struct tcp_sock *tp) { @@ -1078,16 +1076,6 @@ static inline bool tcp_is_reno(const struct tcp_sock *tp) return !tcp_is_sack(tp); } -static inline bool tcp_is_fack(const struct tcp_sock *tp) -{ - return tp->rx_opt.sack_ok & TCP_FACK_ENABLED; -} - -static inline void tcp_enable_fack(struct tcp_sock *tp) -{ - tp->rx_opt.sack_ok |= TCP_FACK_ENABLED; -} - static inline unsigned int tcp_left_out(const struct tcp_sock *tp) { return tp->sacked_out + tp->lost_out; -- cgit v1.2.3 From 39b175211053c7a6a4d794c42e225994f1c069c2 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Fri, 10 Nov 2017 14:03:51 -0800 Subject: net: Remove unused skb_shared_info member ip6_frag_id was only used by UFO, which has been removed. ipv6_proxy_select_ident() only existed to set ip6_frag_id and has no in-tree callers. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/ipv6.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index fb6d67012de6..ec14f0d5a3a1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -767,7 +767,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr); -void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); -- cgit v1.2.3 From 5ed4e3eb021762fee584ce65620bc822131c7aa0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 10 Nov 2017 15:22:52 -0800 Subject: net: dsa: Pass a port to get_tag_protocol() A number of drivers want to check whether the configured CPU port is a possible configuration for enabling tagging, pass down the CPU port number so they verify that. Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 6c239257309b..68e232fd4b0f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -321,7 +321,8 @@ struct dsa_switch_ops { struct device *host_dev, int sw_addr, void **priv); - enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds); + enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, + int port); int (*setup)(struct dsa_switch *ds); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); -- cgit v1.2.3 From b74b70c44986dee87881fbed3d912e02c5dcf78c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 10 Nov 2017 15:22:54 -0800 Subject: net: dsa: Support prepended Broadcom tag Add a new type: DSA_TAG_PROTO_PREPEND which allows us to support for the 4-bytes Broadcom tag that we already support, but in a format where it is pre-pended to the packet instead of located between the MAC SA and the Ethertyper (DSA_TAG_PROTO_BRCM). Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 68e232fd4b0f..2a05738570d8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -29,6 +29,7 @@ struct fixed_phy_status; enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = 0, DSA_TAG_PROTO_BRCM, + DSA_TAG_PROTO_BRCM_PREPEND, DSA_TAG_PROTO_DSA, DSA_TAG_PROTO_EDSA, DSA_TAG_PROTO_KSZ, -- cgit v1.2.3 From 3a9b76fd0db9f0d426533f96a68a62a58753a51e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 11 Nov 2017 15:54:12 -0800 Subject: tcp: allow drivers to tweak TSQ logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I had many reports that TSQ logic breaks wifi aggregation. Current logic is to allow up to 1 ms of bytes to be queued into qdisc and drivers queues. But Wifi aggregation needs a bigger budget to allow bigger rates to be discovered by various TCP Congestion Controls algorithms. This patch adds an extra socket field, allowing wifi drivers to select another log scale to derive TCP Small Queue credit from current pacing rate. Initial value is 10, meaning that this patch does not change current behavior. We expect wifi drivers to set this field to smaller values (tests have been done with values from 6 to 9) They would have to use following template : if (skb->sk && skb->sk->sk_pacing_shift != MY_PACING_SHIFT) skb->sk->sk_pacing_shift = MY_PACING_SHIFT; Ref: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1670041 Signed-off-by: Eric Dumazet Cc: Johannes Berg Cc: Toke Høiland-Jørgensen Cc: Kir Kolyshkin Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 688a823dccc3..f8715c5af37d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -267,6 +267,7 @@ struct sock_common { * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build * @sk_gso_max_segs: Maximum number of GSO segments + * @sk_pacing_shift: scaling factor for TCP Small Queues * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct @@ -451,6 +452,7 @@ struct sock { kmemcheck_bitfield_end(flags); u16 sk_gso_max_segs; + u8 sk_pacing_shift; unsigned long sk_lingertime; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; -- cgit v1.2.3 From 6d88207fcfddc002afe3e2e4a455e5201089d5d9 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:45 +0200 Subject: tls: Add function to update the TLS socket configuration The tx configuration is now stored in ctx->tx_conf. And sk->sk_prot is updated trough a function This will simplify things when we add rx and support for different possible tx and rx cross configurations. Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- include/net/tls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/tls.h b/include/net/tls.h index b89d397dd62f..f058a6e08eaa 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -83,6 +83,8 @@ struct tls_context { void *priv_ctx; + u8 tx_conf:2; + u16 prepend_size; u16 tag_size; u16 overhead_size; -- cgit v1.2.3 From ff45d820a2df163957ad8ab459b6eb6976144c18 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:46 +0200 Subject: tls: Fix TLS ulp context leak, when TLS_TX setsockopt is not used. Previously the TLS ulp context would leak if we attached a TLS ulp to a socket but did not use the TLS_TX setsockopt, or did use it but it failed. This patch solves the issue by overriding prot[TLS_BASE_TX].close and fixing tls_sk_proto_close to work properly when its called with ctx->tx_conf == TLS_BASE_TX. This patch also removes ctx->free_resources as we can use ctx->tx_conf to obtain the relevant information. Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- include/net/tls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tls.h b/include/net/tls.h index f058a6e08eaa..7cb58a6b8fd0 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -99,7 +99,6 @@ struct tls_context { u16 pending_open_record_frags; int (*push_pending_record)(struct sock *sk, int flags); - void (*free_resources)(struct sock *sk); void (*sk_write_space)(struct sock *sk); void (*sk_proto_close)(struct sock *sk, long timeout); @@ -124,6 +123,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_close(struct sock *sk, long timeout); +void tls_sw_free_tx_resources(struct sock *sk); void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); void tls_icsk_clean_acked(struct sock *sk); -- cgit v1.2.3 From 213ef6e7c9c063c482d77f12cc438872628d48ec Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:47 +0200 Subject: tls: Move tls_make_aad to header to allow sharing move tls_make_aad as it is going to be reused by the device offload code and rx path. Remove unused recv parameter. Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- include/net/tls.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/net') diff --git a/include/net/tls.h b/include/net/tls.h index 7cb58a6b8fd0..70becd0a9299 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -214,6 +214,21 @@ static inline void tls_fill_prepend(struct tls_context *ctx, ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size); } +static inline void tls_make_aad(char *buf, + size_t size, + char *record_sequence, + int record_sequence_size, + unsigned char record_type) +{ + memcpy(buf, record_sequence, record_sequence_size); + + buf[8] = record_type; + buf[9] = TLS_1_2_VERSION_MAJOR; + buf[10] = TLS_1_2_VERSION_MINOR; + buf[11] = size >> 8; + buf[12] = size & 0xFF; +} + static inline struct tls_context *tls_get_ctx(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); -- cgit v1.2.3 From b9f3eb499d84f8d4adcb2f9212ec655700b28228 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 14 Nov 2017 06:30:11 +0300 Subject: uapi: fix linux/tls.h userspace compilation error Move inclusion of a private kernel header from uapi/linux/tls.h to its only user - net/tls.h, to fix the following linux/tls.h userspace compilation error: /usr/include/linux/tls.h:41:21: fatal error: net/tcp.h: No such file or directory As to this point uapi/linux/tls.h was totaly unusuable for userspace, cleanup this header file further by moving other redundant includes to net/tls.h. Fixes: 3c4d7559159b ("tls: kernel TLS support") Cc: # v4.13+ Signed-off-by: Dmitry V. Levin Signed-off-by: David S. Miller --- include/net/tls.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/tls.h b/include/net/tls.h index 70becd0a9299..936cfc5cab7d 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -35,6 +35,10 @@ #define _TLS_OFFLOAD_H #include +#include +#include +#include +#include #include -- cgit v1.2.3 From 6670e152447732ba90626f36dfc015a13fbf150e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 14 Nov 2017 08:25:49 -0800 Subject: tcp: Namespace-ify sysctl_tcp_default_congestion_control Make default TCP default congestion control to a per namespace value. This changes default congestion control to a pointer to congestion ops (rather than implicit as first element of available lsit). The congestion control setting of new namespaces is inherited from the current setting of the root namespace. Signed-off-by: Stephen Hemminger Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 5e12975fc658..44668c29701a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -160,6 +160,7 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; + const struct tcp_congestion_ops __rcu *tcp_congestion_control; struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; spinlock_t tcp_fastopen_ctx_lock; unsigned int sysctl_tcp_fastopen_blackhole_timeout; diff --git a/include/net/tcp.h b/include/net/tcp.h index ed71511e67a6..35cc7d0d3d47 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1002,8 +1002,8 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); void tcp_assign_congestion_control(struct sock *sk); void tcp_init_congestion_control(struct sock *sk); void tcp_cleanup_congestion_control(struct sock *sk); -int tcp_set_default_congestion_control(const char *name); -void tcp_get_default_congestion_control(char *name); +int tcp_set_default_congestion_control(struct net *net, const char *name); +void tcp_get_default_congestion_control(struct net *net, char *name); void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); @@ -1017,7 +1017,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; struct tcp_congestion_ops *tcp_ca_find_key(u32 key); -u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); +u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); #ifdef CONFIG_INET char *tcp_ca_get_name_by_key(u32 key, char *buffer); #else -- cgit v1.2.3 From 50895b9de1d3e0258e015e8e55128d835d9a9f19 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Nov 2017 21:02:19 -0800 Subject: tcp: highest_sack fix syzbot easily found a regression added in our latest patches [1] No longer set tp->highest_sack to the head of the send queue since this is not logical and error prone. Only sack processing should maintain the pointer to an skb from rtx queue. We might in the future only remember the sequence instead of a pointer to skb, since rb-tree should allow a fast lookup. [1] BUG: KASAN: use-after-free in tcp_highest_sack_seq include/net/tcp.h:1706 [inline] BUG: KASAN: use-after-free in tcp_ack+0x42bb/0x4fd0 net/ipv4/tcp_input.c:3537 Read of size 4 at addr ffff8801c154faa8 by task syz-executor4/12860 CPU: 0 PID: 12860 Comm: syz-executor4 Not tainted 4.14.0-next-20171113+ #41 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x25b/0x340 mm/kasan/report.c:409 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429 tcp_highest_sack_seq include/net/tcp.h:1706 [inline] tcp_ack+0x42bb/0x4fd0 net/ipv4/tcp_input.c:3537 tcp_rcv_established+0x672/0x18a0 net/ipv4/tcp_input.c:5439 tcp_v4_do_rcv+0x2ab/0x7d0 net/ipv4/tcp_ipv4.c:1468 sk_backlog_rcv include/net/sock.h:909 [inline] __release_sock+0x124/0x360 net/core/sock.c:2264 release_sock+0xa4/0x2a0 net/core/sock.c:2778 tcp_sendmsg+0x3a/0x50 net/ipv4/tcp.c:1462 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:763 sock_sendmsg_nosec net/socket.c:632 [inline] sock_sendmsg+0xca/0x110 net/socket.c:642 ___sys_sendmsg+0x75b/0x8a0 net/socket.c:2048 __sys_sendmsg+0xe5/0x210 net/socket.c:2082 SYSC_sendmsg net/socket.c:2093 [inline] SyS_sendmsg+0x2d/0x50 net/socket.c:2089 entry_SYSCALL_64_fastpath+0x1f/0x96 RIP: 0033:0x452879 RSP: 002b:00007fc9761bfbe8 EFLAGS: 00000212 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000758020 RCX: 0000000000452879 RDX: 0000000000000000 RSI: 0000000020917fc8 RDI: 0000000000000015 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000212 R12: 00000000006ee3a0 R13: 00000000ffffffff R14: 00007fc9761c06d4 R15: 0000000000000000 Allocated by task 12860: save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489 kmem_cache_alloc_node+0x144/0x760 mm/slab.c:3638 __alloc_skb+0xf1/0x780 net/core/skbuff.c:193 alloc_skb_fclone include/linux/skbuff.h:1023 [inline] sk_stream_alloc_skb+0x11d/0x900 net/ipv4/tcp.c:870 tcp_sendmsg_locked+0x1341/0x3b80 net/ipv4/tcp.c:1299 tcp_sendmsg+0x2f/0x50 net/ipv4/tcp.c:1461 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:763 sock_sendmsg_nosec net/socket.c:632 [inline] sock_sendmsg+0xca/0x110 net/socket.c:642 SYSC_sendto+0x358/0x5a0 net/socket.c:1749 SyS_sendto+0x40/0x50 net/socket.c:1717 entry_SYSCALL_64_fastpath+0x1f/0x96 Freed by task 12860: save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3492 [inline] kmem_cache_free+0x77/0x280 mm/slab.c:3750 kfree_skbmem+0xdd/0x1d0 net/core/skbuff.c:603 __kfree_skb+0x1d/0x20 net/core/skbuff.c:642 sk_wmem_free_skb include/net/sock.h:1419 [inline] tcp_rtx_queue_unlink_and_free include/net/tcp.h:1682 [inline] tcp_clean_rtx_queue net/ipv4/tcp_input.c:3111 [inline] tcp_ack+0x1b17/0x4fd0 net/ipv4/tcp_input.c:3593 tcp_rcv_established+0x672/0x18a0 net/ipv4/tcp_input.c:5439 tcp_v4_do_rcv+0x2ab/0x7d0 net/ipv4/tcp_ipv4.c:1468 sk_backlog_rcv include/net/sock.h:909 [inline] __release_sock+0x124/0x360 net/core/sock.c:2264 release_sock+0xa4/0x2a0 net/core/sock.c:2778 tcp_sendmsg+0x3a/0x50 net/ipv4/tcp.c:1462 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:763 sock_sendmsg_nosec net/socket.c:632 [inline] sock_sendmsg+0xca/0x110 net/socket.c:642 ___sys_sendmsg+0x75b/0x8a0 net/socket.c:2048 __sys_sendmsg+0xe5/0x210 net/socket.c:2082 SYSC_sendmsg net/socket.c:2093 [inline] SyS_sendmsg+0x2d/0x50 net/socket.c:2089 entry_SYSCALL_64_fastpath+0x1f/0x96 The buggy address belongs to the object at ffff8801c154fa80 which belongs to the cache skbuff_fclone_cache of size 456 The buggy address is located 40 bytes inside of 456-byte region [ffff8801c154fa80, ffff8801c154fc48) The buggy address belongs to the page: page:ffffea00070553c0 count:1 mapcount:0 mapping:ffff8801c154f080 index:0x0 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffff8801c154f080 0000000000000000 0000000100000006 raw: ffffea00070a5a20 ffffea0006a18360 ffff8801d9ca0500 0000000000000000 page dumped because: kasan: bad access detected Fixes: 737ff314563c ("tcp: use sequence distance to detect reordering") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/tcp.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 35cc7d0d3d47..85ea578195d4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1630,9 +1630,6 @@ static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unli { if (tcp_write_queue_empty(sk)) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - - if (tcp_sk(sk)->highest_sack == skb_unlinked) - tcp_sk(sk)->highest_sack = NULL; } static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) @@ -1645,12 +1642,8 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb __tcp_add_write_queue_tail(sk, skb); /* Queue it, remembering where we must start sending. */ - if (sk->sk_write_queue.next == skb) { + if (sk->sk_write_queue.next == skb) tcp_chrono_start(sk, TCP_CHRONO_BUSY); - - if (tcp_sk(sk)->highest_sack == NULL) - tcp_sk(sk)->highest_sack = skb; - } } /* Insert new before skb on the write queue of sk. */ @@ -1708,9 +1701,7 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb) { - struct sk_buff *next = skb_rb_next(skb); - - tcp_sk(sk)->highest_sack = next ?: tcp_send_head(sk); + tcp_sk(sk)->highest_sack = skb_rb_next(skb); } static inline struct sk_buff *tcp_highest_sack(struct sock *sk) @@ -1720,9 +1711,7 @@ static inline struct sk_buff *tcp_highest_sack(struct sock *sk) static inline void tcp_highest_sack_reset(struct sock *sk) { - struct sk_buff *skb = tcp_rtx_queue_head(sk); - - tcp_sk(sk)->highest_sack = skb ?: tcp_send_head(sk); + tcp_sk(sk)->highest_sack = tcp_rtx_queue_head(sk); } /* Called when old skb is about to be deleted and replaced by new skb */ -- cgit v1.2.3 From 0a833c29d89656025443cb9f0ebff7052dd95ce0 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Wed, 15 Nov 2017 13:09:32 +0100 Subject: genetlink: fix genlmsg_nlhdr() According to the description, first argument of genlmsg_nlhdr() points to what genlmsg_put() returns, i.e. beginning of user header. Therefore we should only subtract size of genetlink header and netlink message header, not user header. This also means we don't need to pass the pointer to genetlink family and the same is true for genl_dump_check_consistent() which is the only caller of genlmsg_nlhdr(). (Note that at the moment, these functions are only used for families which do not have user header so that they are not affected.) Fixes: 670dc2833d14 ("netlink: advertise incomplete dumps") Signed-off-by: Michal Kubecek Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/genetlink.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 5ac169a735f4..decf6012a401 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -154,15 +154,12 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, /** * genlmsg_nlhdr - Obtain netlink header from user specified header * @user_hdr: user header as returned from genlmsg_put() - * @family: generic netlink family * * Returns pointer to netlink header. */ -static inline struct nlmsghdr * -genlmsg_nlhdr(void *user_hdr, const struct genl_family *family) +static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr) { return (struct nlmsghdr *)((char *)user_hdr - - family->hdrsize - GENL_HDRLEN - NLMSG_HDRLEN); } @@ -190,16 +187,14 @@ static inline int genlmsg_parse(const struct nlmsghdr *nlh, * genl_dump_check_consistent - check if sequence is consistent and advertise if not * @cb: netlink callback structure that stores the sequence number * @user_hdr: user header as returned from genlmsg_put() - * @family: generic netlink family * * Cf. nl_dump_check_consistent(), this just provides a wrapper to make it * simpler to use with generic netlink. */ static inline void genl_dump_check_consistent(struct netlink_callback *cb, - void *user_hdr, - const struct genl_family *family) + void *user_hdr) { - nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr, family)); + nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr)); } /** -- cgit v1.2.3 From d50112edde1d0c621520e53747044009f11c656b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 15 Nov 2017 17:32:18 -0800 Subject: slab, slub, slob: add slab_flags_t Add sparse-checked slab_flags_t for struct kmem_cache::flags (SLAB_POISON, etc). SLAB is bloated temporarily by switching to "unsigned long", but only temporarily. Link: http://lkml.kernel.org/r/20171021100225.GA22428@avx2 Signed-off-by: Alexey Dobriyan Acked-by: Pekka Enberg Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index a6b9a8d1a6df..c577286dbffb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1105,7 +1105,7 @@ struct proto { struct kmem_cache *slab; unsigned int obj_size; - int slab_flags; + slab_flags_t slab_flags; struct percpu_counter *orphan_count; -- cgit v1.2.3 From 4950276672fce5c241857540f8561c440663673d Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Wed, 15 Nov 2017 17:35:51 -0800 Subject: kmemcheck: remove annotations Patch series "kmemcheck: kill kmemcheck", v2. As discussed at LSF/MM, kill kmemcheck. KASan is a replacement that is able to work without the limitation of kmemcheck (single CPU, slow). KASan is already upstream. We are also not aware of any users of kmemcheck (or users who don't consider KASan as a suitable replacement). The only objection was that since KASAN wasn't supported by all GCC versions provided by distros at that time we should hold off for 2 years, and try again. Now that 2 years have passed, and all distros provide gcc that supports KASAN, kill kmemcheck again for the very same reasons. This patch (of 4): Remove kmemcheck annotations, and calls to kmemcheck from the kernel. [alexander.levin@verizon.com: correctly remove kmemcheck call from dma_map_sg_attrs] Link: http://lkml.kernel.org/r/20171012192151.26531-1-alexander.levin@verizon.com Link: http://lkml.kernel.org/r/20171007030159.22241-2-alexander.levin@verizon.com Signed-off-by: Sasha Levin Cc: Alexander Potapenko Cc: Eric W. Biederman Cc: Michal Hocko Cc: Pekka Enberg Cc: Steven Rostedt Cc: Tim Hansen Cc: Vegard Nossum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/inet_sock.h | 3 --- include/net/inet_timewait_sock.h | 4 ---- include/net/sock.h | 3 --- 3 files changed, 10 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index db8162dd8c0b..8e51b4a69088 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -17,7 +17,6 @@ #define _INET_SOCK_H #include -#include #include #include #include @@ -84,7 +83,6 @@ struct inet_request_sock { #define ireq_state req.__req_common.skc_state #define ireq_family req.__req_common.skc_family - kmemcheck_bitfield_begin(flags); u16 snd_wscale : 4, rcv_wscale : 4, tstamp_ok : 1, @@ -93,7 +91,6 @@ struct inet_request_sock { ecn_ok : 1, acked : 1, no_srccheck: 1; - kmemcheck_bitfield_end(flags); u32 ir_mark; union { struct ip_options_rcu __rcu *ireq_opt; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 6a75d67a30fd..1356fa6a7566 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -15,8 +15,6 @@ #ifndef _INET_TIMEWAIT_SOCK_ #define _INET_TIMEWAIT_SOCK_ - -#include #include #include #include @@ -69,14 +67,12 @@ struct inet_timewait_sock { /* Socket demultiplex comparisons on incoming packets. */ /* these three are in inet_sock */ __be16 tw_sport; - kmemcheck_bitfield_begin(flags); /* And these are ours. */ unsigned int tw_kill : 1, tw_transparent : 1, tw_flowlabel : 20, tw_pad : 2, /* 2 bits hole */ tw_tos : 8; - kmemcheck_bitfield_end(flags); struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; }; diff --git a/include/net/sock.h b/include/net/sock.h index c577286dbffb..a63e6a8bb7e0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -436,7 +436,6 @@ struct sock { #define SK_FL_TYPE_MASK 0xffff0000 #endif - kmemcheck_bitfield_begin(flags); unsigned int sk_padding : 1, sk_kern_sock : 1, sk_no_check_tx : 1, @@ -445,8 +444,6 @@ struct sock { sk_protocol : 8, sk_type : 16; #define SK_PROTOCOL_MAX U8_MAX - kmemcheck_bitfield_end(flags); - u16 sk_gso_max_segs; unsigned long sk_lingertime; struct proto *sk_prot_creator; -- cgit v1.2.3 From ecca8f88da5c4260cc2bccfefd2a24976704c366 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 17 Nov 2017 14:11:11 +0800 Subject: sctp: set frag_point in sctp_setsockopt_maxseg correctly Now in sctp_setsockopt_maxseg user_frag or frag_point can be set with val >= 8 and val <= SCTP_MAX_CHUNK_LEN. But both checks are incorrect. val >= 8 means frag_point can even be less than SCTP_DEFAULT_MINSEGMENT. Then in sctp_datamsg_from_user(), when it's value is greater than cookie echo len and trying to bundle with cookie echo chunk, the first_len will overflow. The worse case is when it's value is equal as cookie echo len, first_len becomes 0, it will go into a dead loop for fragment later on. In Hangbin syzkaller testing env, oom was even triggered due to consecutive memory allocation in that loop. Besides, SCTP_MAX_CHUNK_LEN is the max size of the whole chunk, it should deduct the data header for frag_point or user_frag check. This patch does a proper check with SCTP_DEFAULT_MINSEGMENT subtracting the sctphdr and datahdr, SCTP_MAX_CHUNK_LEN subtracting datahdr when setting frag_point via sockopt. It also improves sctp_setsockopt_maxseg codes. Suggested-by: Marcelo Ricardo Leitner Reported-by: Hangbin Liu Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index d7d8cba01469..749a42882437 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -444,7 +444,8 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); - frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); + frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN - + sizeof(struct sctp_data_chunk))); return frag; } -- cgit v1.2.3 From ed66dfaf236c04d414de1d218441296e57fb2bd2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 17 Nov 2017 21:06:14 -0500 Subject: tcp: when scheduling TLP, time of RTO should account for current ACK Fix the TLP scheduling logic so that when scheduling a TLP probe, we ensure that the estimated time at which an RTO would fire accounts for the fact that ACKs indicating forward progress should push back RTO times. After the following fix: df92c8394e6e ("tcp: fix xmit timer to only be reset if data ACKed/SACKed") we had an unintentional behavior change in the following kind of scenario: suppose the RTT variance has been very low recently. Then suppose we send out a flight of N packets and our RTT is 100ms: t=0: send a flight of N packets t=100ms: receive an ACK for N-1 packets The response before df92c8394e6e that was: -> schedule a TLP for now + RTO_interval The response after df92c8394e6e is: -> schedule a TLP for t=0 + RTO_interval Since RTO_interval = srtt + RTT_variance, this means that we have scheduled a TLP timer at a point in the future that only accounts for RTT_variance. If the RTT_variance term is small, this means that the timer fires soon. Before df92c8394e6e this would not happen, because in that code, when we receive an ACK for a prefix of flight, we did: 1) Near the top of tcp_ack(), switch from TLP timer to RTO at write_queue_head->paket_tx_time + RTO_interval: if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); 2) In tcp_clean_rtx_queue(), update the RTO to now + RTO_interval: if (flag & FLAG_ACKED) { tcp_rearm_rto(sk); 3) In tcp_ack() after tcp_fastretrans_alert() switch from RTO to TLP at now + RTO_interval: if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); In df92c8394e6e we removed that 3-phase dance, and instead directly set the TLP timer once: we set the TLP timer in cases like this to write_queue_head->packet_tx_time + RTO_interval. So if the RTT variance is small, then this means that this is setting the TLP timer to fire quite soon. This means if the ACK for the tail of the flight takes longer than an RTT to arrive (often due to delayed ACKs), then the TLP timer fires too quickly. Fixes: df92c8394e6e ("tcp: fix xmit timer to only be reset if data ACKed/SACKed") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 85ea578195d4..4e09398009c1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -539,7 +539,7 @@ void tcp_push_one(struct sock *, unsigned int mss_now); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); -bool tcp_schedule_loss_probe(struct sock *sk); +bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto); void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb); -- cgit v1.2.3 From 0c19f846d582af919db66a5914a0189f9f92c936 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 21 Nov 2017 10:22:25 -0500 Subject: net: accept UFO datagrams from tuntap and packet Tuntap and similar devices can inject GSO packets. Accept type VIRTIO_NET_HDR_GSO_UDP, even though not generating UFO natively. Processes are expected to use feature negotiation such as TUNSETOFFLOAD to detect supported offload types and refrain from injecting other packets. This process breaks down with live migration: guest kernels do not renegotiate flags, so destination hosts need to expose all features that the source host does. Partially revert the UFO removal from 182e0b6b5846~1..d9d30adf5677. This patch introduces nearly(*) no new code to simplify verification. It brings back verbatim tuntap UFO negotiation, VIRTIO_NET_HDR_GSO_UDP insertion and software UFO segmentation. It does not reinstate protocol stack support, hardware offload (NETIF_F_UFO), SKB_GSO_UDP tunneling in SKB_GSO_SOFTWARE or reception of VIRTIO_NET_HDR_GSO_UDP packets in tuntap. To support SKB_GSO_UDP reappearing in the stack, also reinstate logic in act_csum and openvswitch. Achieve equivalence with v4.13 HEAD by squashing in commit 939912216fa8 ("net: skb_needs_check() removes CHECKSUM_UNNECESSARY check for tx.") and reverting commit 8d63bee643f1 ("net: avoid skb_warn_bad_offload false positives on UFO"). (*) To avoid having to bring back skb_shinfo(skb)->ip6_frag_id, ipv6_proxy_select_ident is changed to return a __be32 and this is assigned directly to the frag_hdr. Also, SKB_GSO_UDP is inserted at the end of the enum to minimize code churn. Tested Booted a v4.13 guest kernel with QEMU. On a host kernel before this patch `ethtool -k eth0` shows UFO disabled. After the patch, it is enabled, same as on a v4.13 host kernel. A UFO packet sent from the guest appears on the tap device: host: nc -l -p -u 8000 & tcpdump -n -i tap0 guest: dd if=/dev/zero of=payload.txt bs=1 count=2000 nc -u 192.16.1.1 8000 < payload.txt Direct tap to tap transmission of VIRTIO_NET_HDR_GSO_UDP succeeds, packets arriving fragmented: ./with_tap_pair.sh ./tap_send_ufo tap0 tap1 (from https://github.com/wdebruij/kerneltools/tree/master/tests) Changes v1 -> v2 - simplified set_offload change (review comment) - documented test procedure Link: http://lkml.kernel.org/r/ Fixes: fb652fdfe837 ("macvlan/macvtap: Remove NETIF_F_UFO advertisement.") Reported-by: Michal Kubecek Signed-off-by: Willem de Bruijn Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/net/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ec14f0d5a3a1..f73797e2fa60 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -767,6 +767,7 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr); +__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); -- cgit v1.2.3