From ccdb2d17df9f07c291d43b0aeea7c90e4c020489 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 15 Jul 2017 19:40:20 +0200 Subject: ip6: fix PMTU discovery when using /127 subnets The definition of an "anycast destination address" has been tweaked as a side-effect of commit 2647a9b07032 ("ipv6: Remove external dependency on rt6i_gateway and RTF_ANYCAST"). The first address of a point-to-point /127 subnet is now considered as an anycast address. This prevents ICMPv6 errors to be returned to a sender of such a subnet and breaks PMTU discovery. This can be reproduced with: ip link add name out6 type veth peer name in6 ip link add name out7 type veth peer name in7 ip link set mtu 1400 dev out7 ip link set mtu 1400 dev in7 ip netns add next-hop ip netns add next-next-hop ip link set netns next-hop dev in6 ip link set netns next-hop dev out7 ip link set netns next-next-hop dev in7 ip link set up dev out6 ip addr add 2001:db8:1::12/127 dev out6 ip netns exec next-hop ip link set up dev in6 ip netns exec next-hop ip link set up dev out7 ip netns exec next-hop ip addr add 2001:db8:1::13/127 dev in6 ip netns exec next-hop ip addr add 2001:db8:1::14/127 dev out7 ip netns exec next-hop ip route add default via 2001:db8:1::15 ip netns exec next-hop sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec next-next-hop ip link set up dev in7 ip netns exec next-next-hop ip addr add 2001:db8:1::15/127 dev in7 ip netns exec next-next-hop ip addr add 2001:db8:1::50/128 dev in7 ip netns exec next-next-hop ip route add default via 2001:db8:1::14 ip netns exec next-next-hop sysctl -qw net.ipv6.conf.all.forwarding=1 ip route add 2001:db8:1::48/123 via 2001:db8:1::13 sleep 4 ping -M do -s 1452 -c 3 2001:db8:1::50 || true ip route get 2001:db8:1::50 Before the patch, we get: 2001:db8:1::50 from :: via 2001:db8:1::13 dev out6 src 2001:db8:1::12 metric 1024 pref medium After the patch, we get: 2001:db8:1::50 via 2001:db8:1::13 dev out6 src 2001:db8:1::12 metric 0 cache expires 578sec mtu 1400 pref medium Fixes: 2647a9b07032 ("ipv6: Remove external dependency on rt6i_gateway and RTF_ANYCAST") Signed-off-by: Vincent Bernat Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 199056933dcb..907d39a42f6b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -194,7 +194,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, struct rt6_info *rt = (struct rt6_info *)dst; return rt->rt6i_flags & RTF_ANYCAST || - (rt->rt6i_dst.plen != 128 && + (rt->rt6i_dst.plen < 127 && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } -- cgit v1.2.3 From a38905e6aa1a758af003b80f3318196eadb86dfe Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:49 +0800 Subject: sctp: remove the typedef sctp_ipv4addr_param_t This patch is to remove the typedef sctp_ipv4addr_param_t, and replace with struct sctp_ipv4addr_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 99e866487e2f..e42095d7ce57 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -273,10 +273,10 @@ struct sctp_init_chunk { /* Section 3.3.2.1. IPv4 Address Parameter (5) */ -typedef struct sctp_ipv4addr_param { +struct sctp_ipv4addr_param { struct sctp_paramhdr param_hdr; - struct in_addr addr; -} sctp_ipv4addr_param_t; + struct in_addr addr; +}; /* Section 3.3.2.1. IPv6 Address Parameter (6) */ typedef struct sctp_ipv6addr_param { -- cgit v1.2.3 From 00987cc07e3f0f01699800cd89adf13a908cdee5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:50 +0800 Subject: sctp: remove the typedef sctp_ipv6addr_param_t This patch is to remove the typedef sctp_ipv6addr_param_t, and replace with struct sctp_ipv6addr_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index e42095d7ce57..6b45c8a38642 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -279,10 +279,10 @@ struct sctp_ipv4addr_param { }; /* Section 3.3.2.1. IPv6 Address Parameter (6) */ -typedef struct sctp_ipv6addr_param { +struct sctp_ipv6addr_param { struct sctp_paramhdr param_hdr; struct in6_addr addr; -} sctp_ipv6addr_param_t; +}; /* Section 3.3.2.1 Cookie Preservative (9) */ typedef struct sctp_cookie_preserve_param { -- cgit v1.2.3 From 365ddb65e77f6b99d4aba09e0d8a096aada57815 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:51 +0800 Subject: sctp: remove the typedef sctp_cookie_preserve_param_t This patch is to remove the typedef sctp_cookie_preserve_param_t, and replace with struct sctp_cookie_preserve_param in the places where it's using this typedef. It is also to fix some indents in sctp_sf_do_5_2_6_stale(). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 6b45c8a38642..d8f9d8f8649b 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -285,10 +285,10 @@ struct sctp_ipv6addr_param { }; /* Section 3.3.2.1 Cookie Preservative (9) */ -typedef struct sctp_cookie_preserve_param { +struct sctp_cookie_preserve_param { struct sctp_paramhdr param_hdr; - __be32 lifespan_increment; -} sctp_cookie_preserve_param_t; + __be32 lifespan_increment; +}; /* Section 3.3.2.1 Host Name Address (11) */ typedef struct sctp_hostname_param { -- cgit v1.2.3 From df9af0063f154c1a4f22a5570749d185d080cf56 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:52 +0800 Subject: sctp: remove the typedef sctp_hostname_param_t Remove this typedef, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index d8f9d8f8649b..c43e9067d41a 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -291,10 +291,10 @@ struct sctp_cookie_preserve_param { }; /* Section 3.3.2.1 Host Name Address (11) */ -typedef struct sctp_hostname_param { +struct sctp_hostname_param { struct sctp_paramhdr param_hdr; uint8_t hostname[0]; -} sctp_hostname_param_t; +}; /* Section 3.3.2.1 Supported Address Types (12) */ typedef struct sctp_supported_addrs_param { -- cgit v1.2.3 From e925d506f1a21f7fd24a8fdd3e73e0810c655de4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:53 +0800 Subject: sctp: remove the typedef sctp_supported_addrs_param_t This patch is to remove the typedef sctp_supported_addrs_param_t, and replace with struct sctp_supported_addrs_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index c43e9067d41a..3ca3ab7302a6 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -297,10 +297,10 @@ struct sctp_hostname_param { }; /* Section 3.3.2.1 Supported Address Types (12) */ -typedef struct sctp_supported_addrs_param { +struct sctp_supported_addrs_param { struct sctp_paramhdr param_hdr; __be16 types[0]; -} sctp_supported_addrs_param_t; +}; /* Appendix A. ECN Capable (32768) */ typedef struct sctp_ecn_capable_param { -- cgit v1.2.3 From c1dd5df39be5a98c843b9352c22c5569f84bec44 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:54 +0800 Subject: sctp: remove struct sctp_ecn_capable_param Remove it, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 3ca3ab7302a6..75524829aa81 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -302,11 +302,6 @@ struct sctp_supported_addrs_param { __be16 types[0]; }; -/* Appendix A. ECN Capable (32768) */ -typedef struct sctp_ecn_capable_param { - struct sctp_paramhdr param_hdr; -} sctp_ecn_capable_param_t; - /* ADDIP Section 3.2.6 Adaptation Layer Indication */ typedef struct sctp_adaptation_ind_param { struct sctp_paramhdr param_hdr; -- cgit v1.2.3 From 85f6bd24ac579ef0926eb4c564ba1f3c8a7f8563 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:55 +0800 Subject: sctp: remove the typedef sctp_adaptation_ind_param_t This patch is to remove the typedef sctp_adaptation_ind_param_t, and replace with struct sctp_adaptation_ind_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 75524829aa81..72b87874ea76 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -303,10 +303,10 @@ struct sctp_supported_addrs_param { }; /* ADDIP Section 3.2.6 Adaptation Layer Indication */ -typedef struct sctp_adaptation_ind_param { +struct sctp_adaptation_ind_param { struct sctp_paramhdr param_hdr; __be32 adaptation_ind; -} sctp_adaptation_ind_param_t; +}; /* ADDIP Section 4.2.7 Supported Extensions Parameter */ typedef struct sctp_supported_ext_param { -- cgit v1.2.3 From 15328d9feede450d64ff77cac5d25bc734ec8b27 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:56 +0800 Subject: sctp: remove the typedef sctp_supported_ext_param_t This patch is to remove the typedef sctp_supported_ext_param_t, and replace with struct sctp_supported_ext_param in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 72b87874ea76..76245685f923 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -309,10 +309,10 @@ struct sctp_adaptation_ind_param { }; /* ADDIP Section 4.2.7 Supported Extensions Parameter */ -typedef struct sctp_supported_ext_param { +struct sctp_supported_ext_param { struct sctp_paramhdr param_hdr; __u8 chunks[0]; -} sctp_supported_ext_param_t; +}; /* AUTH Section 3.1 Random */ typedef struct sctp_random_param { -- cgit v1.2.3 From b02db702face3791889a4fcf06691c086648ee89 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:57 +0800 Subject: sctp: remove the typedef sctp_random_param_t This patch is to remove the typedef sctp_random_param_t, and replace with struct sctp_random_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- include/net/sctp/structs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 76245685f923..9b1aa3907c9e 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -315,10 +315,10 @@ struct sctp_supported_ext_param { }; /* AUTH Section 3.1 Random */ -typedef struct sctp_random_param { +struct sctp_random_param { struct sctp_paramhdr param_hdr; __u8 random_val[0]; -} sctp_random_param_t; +}; /* AUTH Section 3.2 Chunk List */ typedef struct sctp_chunks_param { diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 5ab29af8ca8a..f22c079fd1f1 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1556,7 +1556,7 @@ struct sctp_association { * and authenticated chunk list. All that is part of the * cookie and these are just pointers to those locations */ - sctp_random_param_t *peer_random; + struct sctp_random_param *peer_random; sctp_chunks_param_t *peer_chunks; sctp_hmac_algo_param_t *peer_hmacs; } peer; -- cgit v1.2.3 From a762a9d94d44980e3690f9de87b918376daa6428 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:58 +0800 Subject: sctp: remove the typedef sctp_chunks_param_t This patch is to remove the typedef sctp_chunks_param_t, and replace with struct sctp_chunks_param in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- include/net/sctp/structs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 9b1aa3907c9e..b52def9bcfa1 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -321,10 +321,10 @@ struct sctp_random_param { }; /* AUTH Section 3.2 Chunk List */ -typedef struct sctp_chunks_param { +struct sctp_chunks_param { struct sctp_paramhdr param_hdr; __u8 chunks[0]; -} sctp_chunks_param_t; +}; /* AUTH Section 3.3 HMAC Algorithm */ typedef struct sctp_hmac_algo_param { diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f22c079fd1f1..8042e6380b0e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1557,7 +1557,7 @@ struct sctp_association { * cookie and these are just pointers to those locations */ struct sctp_random_param *peer_random; - sctp_chunks_param_t *peer_chunks; + struct sctp_chunks_param *peer_chunks; sctp_hmac_algo_param_t *peer_hmacs; } peer; -- cgit v1.2.3 From 1474774a7f0daf9878fd9537a24714f419e744ed Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:59 +0800 Subject: sctp: remove the typedef sctp_hmac_algo_param_t This patch is to remove the typedef sctp_hmac_algo_param_t, and replace with struct sctp_hmac_algo_param in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- include/net/sctp/structs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index b52def9bcfa1..913474dfc96c 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -327,10 +327,10 @@ struct sctp_chunks_param { }; /* AUTH Section 3.3 HMAC Algorithm */ -typedef struct sctp_hmac_algo_param { +struct sctp_hmac_algo_param { struct sctp_paramhdr param_hdr; __be16 hmac_ids[0]; -} sctp_hmac_algo_param_t; +}; /* RFC 2960. Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2): * The INIT ACK chunk is used to acknowledge the initiation of an SCTP diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8042e6380b0e..66cd7639b912 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1558,7 +1558,7 @@ struct sctp_association { */ struct sctp_random_param *peer_random; struct sctp_chunks_param *peer_chunks; - sctp_hmac_algo_param_t *peer_hmacs; + struct sctp_hmac_algo_param *peer_hmacs; } peer; /* State : A state variable indicating what state the -- cgit v1.2.3 From 27eac47b00789522ba00501b0838026e1ecb6f05 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 17 Jul 2017 11:35:54 +0200 Subject: net/unix: drop obsolete fd-recursion limits All unix sockets now account inflight FDs to the respective sender. This was introduced in: commit 712f4aad406bb1ed67f3f98d04c044191f0ff593 Author: willy tarreau Date: Sun Jan 10 07:54:56 2016 +0100 unix: properly account for FDs passed over unix sockets and further refined in: commit 415e3d3e90ce9e18727e8843ae343eda5a58fad6 Author: Hannes Frederic Sowa Date: Wed Feb 3 02:11:03 2016 +0100 unix: correctly track in-flight fds in sending process user_struct Hence, regardless of the stacking depth of FDs, the total number of inflight FDs is limited, and accounted. There is no known way for a local user to exceed those limits or exploit the accounting. Furthermore, the GC logic is independent of the recursion/stacking depth as well. It solely depends on the total number of inflight FDs, regardless of their layout. Lastly, the current `recursion_level' suffers a TOCTOU race, since it checks and inherits depths only at queue time. If we consider `A<-B' to mean `queue-B-on-A', the following sequence circumvents the recursion level easily: A<-B B<-C C<-D ... Y<-Z resulting in: A<-B<-C<-...<-Z With all of this in mind, lets drop the recursion limit. It has no additional security value, anymore. On the contrary, it randomly confuses message brokers that try to forward file-descriptors, since any sendmsg(2) call can fail spuriously with ETOOMANYREFS if a client maliciously modifies the FD while inflight. Cc: Alban Crequy Cc: Simon McVittie Signed-off-by: David Herrmann Reviewed-by: Tom Gundersen Signed-off-by: David S. Miller --- include/net/af_unix.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 678e4d6fa317..3b3194b2fc65 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -58,7 +58,6 @@ struct unix_sock { struct list_head link; atomic_long_t inflight; spinlock_t lock; - unsigned char recursion_level; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 -- cgit v1.2.3 From b145425f269a17ed344d737f746b844dfac60c82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Jul 2017 02:56:10 -0700 Subject: inetpeer: remove AVL implementation in favor of RB tree As discussed in Faro during Netfilter Workshop 2017, RB trees can be used with RCU, using a seqlock. Note that net/rxrpc/conn_service.c is already using this. This patch converts inetpeer from AVL tree to RB tree, since it allows to remove private AVL implementation in favor of shared RB code. $ size net/ipv4/inetpeer.before net/ipv4/inetpeer.after text data bss dec hex filename 3195 40 128 3363 d23 net/ipv4/inetpeer.before 1562 24 0 1586 632 net/ipv4/inetpeer.after The same technique can be used to speed up net/netfilter/nft_set_rbtree.c (removing rwlock contention in fast path) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inetpeer.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index f2a215fc78e4..950ed182f62f 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -33,18 +33,12 @@ struct inetpeer_addr { }; struct inet_peer { - /* group together avl_left,avl_right,v4daddr to speedup lookups */ - struct inet_peer __rcu *avl_left, *avl_right; + struct rb_node rb_node; struct inetpeer_addr daddr; - __u32 avl_height; u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ unsigned long rate_last; - union { - struct list_head gc_list; - struct rcu_head gc_rcu; - }; /* * Once inet_peer is queued for deletion (refcnt == 0), following field * is not available: rid @@ -55,7 +49,6 @@ struct inet_peer { atomic_t rid; /* Frag reception counter */ }; struct rcu_head rcu; - struct inet_peer *gc_next; }; /* following fields might be frequently dirtied */ @@ -64,7 +57,7 @@ struct inet_peer { }; struct inet_peer_base { - struct inet_peer __rcu *root; + struct rb_root rb_root; seqlock_t lock; int total; }; -- cgit v1.2.3 From 814abfabef3ceed390c10d06a0cc69a86454b6cf Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:27:07 -0700 Subject: xdp: add bpf_redirect helper function This adds support for a bpf_redirect helper function to the XDP infrastructure. For now this only supports redirecting to the egress path of a port. In order to support drivers handling a xdp_buff natively this patches uses a new ndo operation ndo_xdp_xmit() that takes pushes a xdp_buff to the specified device. If the program specifies either (a) an unknown device or (b) a device that does not support the operation a BPF warning is thrown and the XDP_ABORTED error code is returned. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/filter.h | 4 ++++ include/linux/netdevice.h | 6 ++++++ include/uapi/linux/bpf.h | 1 + 3 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index bfef1e5734f8..64cae7a08148 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -711,7 +711,11 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); + +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp); + void bpf_warn_invalid_xdp_action(u32 act); +void bpf_warn_invalid_xdp_redirect(u32 ifindex); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 779b23595596..77f5376005e6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -66,6 +66,7 @@ struct mpls_dev; /* UDP Tunnel offloads */ struct udp_tunnel_info; struct bpf_prog; +struct xdp_buff; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -1138,6 +1139,9 @@ struct xfrmdev_ops { * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); * This function is used to set or query state related to XDP on the * netdevice. See definition of enum xdp_netdev_command for details. + * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); + * This function is used to submit a XDP packet for transmit on a + * netdevice. * */ struct net_device_ops { @@ -1323,6 +1327,8 @@ struct net_device_ops { int needed_headroom); int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); + int (*ndo_xdp_xmit)(struct net_device *dev, + struct xdp_buff *xdp); }; /** diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e99e3e6f8b37..4dbb7a3f4677 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -717,6 +717,7 @@ enum xdp_action { XDP_DROP, XDP_PASS, XDP_TX, + XDP_REDIRECT, }; /* user accessible metadata for XDP packet hook -- cgit v1.2.3 From 6103aa96ec077c976e851e0b89cc2446cb76573d Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:27:50 -0700 Subject: net: implement XDP_REDIRECT for xdp generic Add support for redirect to xdp generic creating a fall back for devices that do not yet have support and allowing test infrastructure using veth pairs to be built. Signed-off-by: John Fastabend Tested-by: Andy Gospodarek Acked-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 64cae7a08148..10df7daf5ec6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -712,6 +712,7 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp); void bpf_warn_invalid_xdp_action(u32 act); -- cgit v1.2.3 From 5acaee0a8964c9bab7775ab8bedcd1f66a2a1011 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:28:35 -0700 Subject: xdp: add trace event for xdp redirect This adds a trace event for xdp redirect which may help when debugging XDP programs that use redirect bpf commands. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/filter.h | 4 +++- include/trace/events/xdp.h | 31 ++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 10df7daf5ec6..ce8211fa91c7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -713,7 +713,9 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb); -int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp); +int xdp_do_redirect(struct net_device *dev, + struct xdp_buff *xdp, + struct bpf_prog *prog); void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_redirect(u32 ifindex); diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 1b61357d3f57..7b1eb7b4be41 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -12,7 +12,8 @@ FN(ABORTED) \ FN(DROP) \ FN(PASS) \ - FN(TX) + FN(TX) \ + FN(REDIRECT) #define __XDP_ACT_TP_FN(x) \ TRACE_DEFINE_ENUM(XDP_##x); @@ -48,6 +49,34 @@ TRACE_EVENT(xdp_exception, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) ); +TRACE_EVENT(xdp_redirect, + + TP_PROTO(const struct net_device *from, + const struct net_device *to, + const struct bpf_prog *xdp, u32 act), + + TP_ARGS(from, to, xdp, act), + + TP_STRUCT__entry( + __string(name_from, from->name) + __string(name_to, to->name) + __array(u8, prog_tag, 8) + __field(u32, act) + ), + + TP_fast_assign( + BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); + memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); + __assign_str(name_from, from->name); + __assign_str(name_to, to->name); + __entry->act = act; + ), + + TP_printk("prog=%s from=%s to=%s action=%s", + __print_hex_str(__entry->prog_tag, 8), + __get_str(name_from), __get_str(name_to), + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) +); #endif /* _TRACE_XDP_H */ #include -- cgit v1.2.3 From 546ac1ffb70d25b56c1126940e5ec639c4dd7413 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:28:56 -0700 Subject: bpf: add devmap, a map for storing net device references Device map (devmap) is a BPF map, primarily useful for networking applications, that uses a key to lookup a reference to a netdevice. The map provides a clean way for BPF programs to build virtual port to physical port maps. Additionally, it provides a scoping function for the redirect action itself allowing multiple optimizations. Future patches will leverage the map to provide batching at the XDP layer. Another optimization/feature, that is not yet implemented, would be to support multiple netdevices per key to support efficient multicast and broadcast support. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/bpf_types.h | 3 +++ include/uapi/linux/bpf.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 3d137c33d664..b1e1035ca24b 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -35,3 +35,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) +#ifdef CONFIG_NET +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +#endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4dbb7a3f4677..ecbb0e7e15bc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -104,6 +104,7 @@ enum bpf_map_type { BPF_MAP_TYPE_LPM_TRIE, BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS, + BPF_MAP_TYPE_DEVMAP, }; enum bpf_prog_type { -- cgit v1.2.3 From 97f91a7cf04ff605845c20948b8a80e54cbd3376 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:29:18 -0700 Subject: bpf: add bpf_redirect_map helper routine BPF programs can use the devmap with a bpf_redirect_map() helper routine to forward packets to netdevice in map. Signed-off-by: John Fastabend Signed-off-by: Jesper Dangaard Brouer Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b69e7a5869ff..d0d3281ac678 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -379,4 +379,7 @@ extern const struct bpf_func_proto bpf_get_stackid_proto; void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +/* Map specifics */ +struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); + #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ecbb0e7e15bc..1106a8c4cd36 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -348,6 +348,11 @@ union bpf_attr { * @flags: bit 0 - if set, redirect to ingress instead of egress * other bits - reserved * Return: TC_ACT_REDIRECT + * int bpf_redirect_map(key, map, flags) + * redirect to endpoint in map + * @key: index in map to lookup + * @map: fd of map to do lookup in + * @flags: -- * * u32 bpf_get_route_realm(skb) * retrieve a dst's tclassid @@ -592,7 +597,8 @@ union bpf_attr { FN(get_socket_uid), \ FN(set_hash), \ FN(setsockopt), \ - FN(skb_adjust_room), + FN(skb_adjust_room), \ + FN(redirect_map), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 11393cc9b9be2a1f61559e6fb9c27bc8fa20b1ff Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:29:40 -0700 Subject: xdp: Add batching support to redirect map For performance reasons we want to avoid updating the tail pointer in the driver tx ring as much as possible. To accomplish this we add batching support to the redirect path in XDP. This adds another ndo op "xdp_flush" that is used to inform the driver that it should bump the tail pointer on the TX ring. Signed-off-by: John Fastabend Signed-off-by: Jesper Dangaard Brouer Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ include/linux/filter.h | 7 +++++++ include/linux/netdevice.h | 5 ++++- 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d0d3281ac678..6850a760dc94 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -381,5 +381,7 @@ u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); /* Map specifics */ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); +void __dev_map_insert_ctx(struct bpf_map *map, u32 index); +void __dev_map_flush(struct bpf_map *map); #endif /* _LINUX_BPF_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index ce8211fa91c7..3323ee91c172 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -712,10 +712,17 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the + * same cpu context. Further for best results no more than a single map + * for the do_redirect/do_flush pair should be used. This limitation is + * because we only track one map and force a flush when the map changes. + * This does not appear to be a real limiation for existing software. + */ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *prog); +void xdp_do_flush_map(void); void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_redirect(u32 ifindex); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 77f5376005e6..03b104908235 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1142,7 +1142,9 @@ struct xfrmdev_ops { * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); * This function is used to submit a XDP packet for transmit on a * netdevice. - * + * void (*ndo_xdp_flush)(struct net_device *dev); + * This function is used to inform the driver to flush a paticular + * xpd tx queue. Must be called on same CPU as xdp_xmit. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1329,6 +1331,7 @@ struct net_device_ops { struct netdev_xdp *xdp); int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); + void (*ndo_xdp_flush)(struct net_device *dev); }; /** -- cgit v1.2.3 From 2ddf71e23cc246e95af72a6deed67b4a50a7b81c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:30:02 -0700 Subject: net: add notifier hooks for devmap bpf map The BPF map devmap holds a refcnt on the net_device structure when it is in the map. We need to do this to ensure on driver unload we don't lose a dev reference. However, its not very convenient to have to manually unload the map when destroying a net device so add notifier handlers to do the cleanup automatically. But this creates a race between update/destroy BPF syscall and programs and the unregister netdev hook. Unfortunately, the best I could come up with is either to live with requiring manual removal of net devices from the map before removing the net device OR to add a mutex in devmap to ensure the map is not modified while we are removing a device. The fallout also requires that BPF programs no longer update/delete the map from the BPF program side because the mutex may sleep and this can not be done from inside an rcu critical section. This is not a real problem though because I have not come up with any use cases where this is actually useful in practice. If/when we come up with a compelling user for this we may need to revisit this. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 3323ee91c172..d19ed3c15e1e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -716,7 +716,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * same cpu context. Further for best results no more than a single map * for the do_redirect/do_flush pair should be used. This limitation is * because we only track one map and force a flush when the map changes. - * This does not appear to be a real limiation for existing software. + * This does not appear to be a real limitation for existing software. */ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb); int xdp_do_redirect(struct net_device *dev, -- cgit v1.2.3 From 880388aa3c07fdea4f9b85e35641753017b1852f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 3 Jul 2017 07:29:12 -0700 Subject: net: Remove all references to SKB_GSO_UDP. Such packets are no longer possible. Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 5209b5ed2a64..32fb046f2173 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -18,9 +18,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, case VIRTIO_NET_HDR_GSO_TCPV6: gso_type = SKB_GSO_TCPV6; break; - case VIRTIO_NET_HDR_GSO_UDP: - gso_type = SKB_GSO_UDP; - break; default: return -EINVAL; } @@ -73,8 +70,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - else if (sinfo->gso_type & SKB_GSO_UDP) - hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; else return -EINVAL; if (sinfo->gso_type & SKB_GSO_TCP_ECN) -- cgit v1.2.3 From d9d30adf56777c402c0027c0e6ae21f17cc0a365 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 3 Jul 2017 07:31:57 -0700 Subject: net: Kill NETIF_F_UFO and SKB_GSO_UDP. No longer used. Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 4 +--- include/linux/netdevice.h | 1 - include/linux/skbuff.h | 31 +++++++++++++++---------------- 3 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 1d4737cffc71..ebd273627334 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -36,7 +36,6 @@ enum { /**/NETIF_F_GSO_SHIFT, /* keep the order of SKB_GSO_* bits */ NETIF_F_TSO_BIT /* ... TCPv4 segmentation */ = NETIF_F_GSO_SHIFT, - NETIF_F_UFO_BIT, /* ... UDPv4 fragmentation */ NETIF_F_GSO_ROBUST_BIT, /* ... ->SKB_GSO_DODGY */ NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ NETIF_F_TSO_MANGLEID_BIT, /* ... IPV4 ID mangling allowed */ @@ -118,7 +117,6 @@ enum { #define NETIF_F_TSO6 __NETIF_F(TSO6) #define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN) #define NETIF_F_TSO __NETIF_F(TSO) -#define NETIF_F_UFO __NETIF_F(UFO) #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) @@ -172,7 +170,7 @@ enum { NETIF_F_FSO) /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO | \ +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | \ NETIF_F_GSO_SCTP) /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 03b104908235..c60351b84323 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4098,7 +4098,6 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) /* check flags correspondence */ BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index dbe29b6c9bd6..4d7a284ba3ee 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -463,39 +463,38 @@ enum { enum { SKB_GSO_TCPV4 = 1 << 0, - SKB_GSO_UDP = 1 << 1, /* This indicates the skb is from an untrusted source. */ - SKB_GSO_DODGY = 1 << 2, + SKB_GSO_DODGY = 1 << 1, /* This indicates the tcp segment has CWR set. */ - SKB_GSO_TCP_ECN = 1 << 3, + SKB_GSO_TCP_ECN = 1 << 2, - SKB_GSO_TCP_FIXEDID = 1 << 4, + SKB_GSO_TCP_FIXEDID = 1 << 3, - SKB_GSO_TCPV6 = 1 << 5, + SKB_GSO_TCPV6 = 1 << 4, - SKB_GSO_FCOE = 1 << 6, + SKB_GSO_FCOE = 1 << 5, - SKB_GSO_GRE = 1 << 7, + SKB_GSO_GRE = 1 << 6, - SKB_GSO_GRE_CSUM = 1 << 8, + SKB_GSO_GRE_CSUM = 1 << 7, - SKB_GSO_IPXIP4 = 1 << 9, + SKB_GSO_IPXIP4 = 1 << 8, - SKB_GSO_IPXIP6 = 1 << 10, + SKB_GSO_IPXIP6 = 1 << 9, - SKB_GSO_UDP_TUNNEL = 1 << 11, + SKB_GSO_UDP_TUNNEL = 1 << 10, - SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, - SKB_GSO_PARTIAL = 1 << 13, + SKB_GSO_PARTIAL = 1 << 12, - SKB_GSO_TUNNEL_REMCSUM = 1 << 14, + SKB_GSO_TUNNEL_REMCSUM = 1 << 13, - SKB_GSO_SCTP = 1 << 15, + SKB_GSO_SCTP = 1 << 14, - SKB_GSO_ESP = 1 << 16, + SKB_GSO_ESP = 1 << 15, }; #if BITS_PER_LONG > 32 -- cgit v1.2.3 From 06dc75ab06943fcc126a951a0680980ad5cb75c6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 18:56:54 +0200 Subject: net: Revert "net: add function to allocate sk_buff head without data area" It was added for netlink mmap tx, there are no callers in the tree. The commit also added a check for skb->head != NULL in kfree_skb path, remove that too -- all skbs ought to have skb->head set. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4d7a284ba3ee..4093552be1de 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -944,12 +944,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } -struct sk_buff *__alloc_skb_head(gfp_t priority, int node); -static inline struct sk_buff *alloc_skb_head(gfp_t priority) -{ - return __alloc_skb_head(priority, -1); -} - struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); -- cgit v1.2.3 From 46f55cffa47330b99537985a50d92945d4b34658 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 21:56:48 -0700 Subject: net: fix build error in devmap helper calls Initial patches missed case with CONFIG_BPF_SYSCALL not set. Fixes: 11393cc9b9be ("xdp: Add batching support to redirect map") Fixes: 97f91a7cf04f ("bpf: add bpf_redirect_map helper routine") Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/bpf.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6850a760dc94..6353c7474dba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -318,6 +318,12 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); + +/* Map specifics */ +struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); +void __dev_map_insert_ctx(struct bpf_map *map, u32 index); +void __dev_map_flush(struct bpf_map *map); + #else static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -356,6 +362,20 @@ static inline int __bpf_prog_charge(struct user_struct *user, u32 pages) static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } + +static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, + u32 key) +{ + return NULL; +} + +static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index) +{ +} + +static inline void __dev_map_flush(struct bpf_map *map) +{ +} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ @@ -379,9 +399,4 @@ extern const struct bpf_func_proto bpf_get_stackid_proto; void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -/* Map specifics */ -struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); -void __dev_map_insert_ctx(struct bpf_map *map, u32 index); -void __dev_map_flush(struct bpf_map *map); - #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From 09c7570480f7544ffbf8e6db365208b0b0c154c6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:26 +0200 Subject: xfrm: remove flow cache After rcu conversions performance degradation in forward tests isn't that noticeable anymore. See next patch for some numbers. A followup patcg could then also remove genid from the policies as we do not cache bundles anymore. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/flow.h | 34 ---------------------------------- include/net/flowcache.h | 25 ------------------------- include/net/netns/xfrm.h | 11 ----------- include/net/xfrm.h | 8 -------- 4 files changed, 78 deletions(-) delete mode 100644 include/net/flowcache.h (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index bae198b3039e..f3dc61b29bb5 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family) return 0; } -#define FLOW_DIR_IN 0 -#define FLOW_DIR_OUT 1 -#define FLOW_DIR_FWD 2 - -struct net; -struct sock; -struct flow_cache_ops; - -struct flow_cache_object { - const struct flow_cache_ops *ops; -}; - -struct flow_cache_ops { - struct flow_cache_object *(*get)(struct flow_cache_object *); - int (*check)(struct flow_cache_object *); - void (*delete)(struct flow_cache_object *); -}; - -typedef struct flow_cache_object *(*flow_resolve_t)( - struct net *net, const struct flowi *key, u16 family, - u8 dir, struct flow_cache_object *oldobj, void *ctx); - -struct flow_cache_object *flow_cache_lookup(struct net *net, - const struct flowi *key, u16 family, - u8 dir, flow_resolve_t resolver, - void *ctx); -int flow_cache_init(struct net *net); -void flow_cache_fini(struct net *net); -void flow_cache_hp_init(void); - -void flow_cache_flush(struct net *net); -void flow_cache_flush_deferred(struct net *net); -extern atomic_t flow_cache_genid; - __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6) diff --git a/include/net/flowcache.h b/include/net/flowcache.h deleted file mode 100644 index 51eb971e8973..000000000000 --- a/include/net/flowcache.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _NET_FLOWCACHE_H -#define _NET_FLOWCACHE_H - -#include -#include -#include -#include - -struct flow_cache_percpu { - struct hlist_head *hash_table; - unsigned int hash_count; - u32 hash_rnd; - int hash_rnd_recalc; - struct tasklet_struct flush_tasklet; -}; - -struct flow_cache { - u32 hash_shift; - struct flow_cache_percpu __percpu *percpu; - struct hlist_node node; - unsigned int low_watermark; - unsigned int high_watermark; - struct timer_list rnd_timer; -}; -#endif /* _NET_FLOWCACHE_H */ diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 27bb9633c69d..611521646dd4 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,7 +6,6 @@ #include #include #include -#include struct ctl_table_header; @@ -73,16 +72,6 @@ struct netns_xfrm { spinlock_t xfrm_state_lock; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; - - /* flow cache part */ - struct flow_cache flow_cache_global; - atomic_t flow_cache_genid; - struct list_head flow_cache_gc_list; - atomic_t flow_cache_gc_count; - spinlock_t flow_cache_gc_lock; - struct work_struct flow_cache_gc_work; - struct work_struct flow_cache_flush_work; - struct mutex flow_flush_sem; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c0916ab18d32..e0feba2ce76a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -563,7 +563,6 @@ struct xfrm_policy { refcount_t refcnt; struct timer_list timer; - struct flow_cache_object flo; atomic_t genid; u32 priority; u32 index; @@ -978,7 +977,6 @@ struct xfrm_dst { struct rt6_info rt6; } u; struct dst_entry *route; - struct flow_cache_object flo; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; u32 xfrm_genid; @@ -1226,9 +1224,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk) } } -void xfrm_garbage_collect(struct net *net); -void xfrm_garbage_collect_deferred(struct net *net); - #else static inline void xfrm_sk_free_policy(struct sock *sk) {} @@ -1263,9 +1258,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, { return 1; } -static inline void xfrm_garbage_collect(struct net *net) -{ -} #endif static __inline__ -- cgit v1.2.3 From ec30d78c14a813db39a647b6a348b4286ba4abf5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:27 +0200 Subject: xfrm: add xdst pcpu cache retain last used xfrm_dst in a pcpu cache. On next request, reuse this dst if the policies are the same. The cache will not help with strict RR workloads as there is no hit. The cache packet-path part is reasonably small, the notifier part is needed so we do not add long hangs when a device is dismantled but some pcpu xdst still holds a reference, there are also calls to the flush operation when userspace deletes SAs so modules can be removed (there is no hit. We need to run the dst_release on the correct cpu to avoid races with packet path. This is done by adding a work_struct for each cpu and then doing the actual test/release on each affected cpu via schedule_work_on(). Test results using 4 network namespaces and null encryption: ns1 ns2 -> ns3 -> ns4 netperf -> xfrm/null enc -> xfrm/null dec -> netserver what TCP_STREAM UDP_STREAM UDP_RR Flow cache: 14644.61 294.35 327231.64 No flow cache: 14349.81 242.64 202301.72 Pcpu cache: 14629.70 292.21 205595.22 UDP tests used 64byte packets, tests ran for one minute each, value is average over ten iterations. 'Flow cache' is 'net-next', 'No flow cache' is net-next plus this series but without this patch. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e0feba2ce76a..afb4929d7232 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c); +void xfrm_policy_cache_flush(void); void km_state_notify(struct xfrm_state *x, const struct km_event *c); struct xfrm_tmpl; -- cgit v1.2.3 From bb4d991a28cc86a2dfbeefeff32911ca9f779c18 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 19 Jul 2017 15:41:26 -0700 Subject: tcp: adjust tail loss probe timeout This patch adjusts the timeout formula to schedule the TCP loss probe (TLP). The previous formula uses 2*SRTT or 1.5*RTT + DelayACKMax if only one packet is in flight. It keeps a lower bound of 10 msec which is too large for short RTT connections (e.g. within a data-center). The new formula = 2*RTT + (inflight == 1 ? 200ms : 2ticks) which performs better for short and fast connections. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 70483296157f..4f056ea79df2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #endif #define TCP_RTO_MAX ((unsigned)(120*HZ)) #define TCP_RTO_MIN ((unsigned)(HZ/5)) +#define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */ #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now * used as a fallback RTO for the @@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes * for local resources. */ -#define TCP_REO_TIMEOUT_MIN (2000) /* Min RACK reordering timeout in usec */ - #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */ #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ #define TCP_KEEPALIVE_INTVL (75*HZ) -- cgit v1.2.3 From e7d53ad3239de636ea478fc003d3652b49b8e593 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 18 Jul 2017 16:23:56 -0400 Subject: net: dsa: unexport dsa_is_port_initialized The dsa_is_port_initialized helper is only used by dsa_switch_resume and dsa_switch_suspend, if CONFIG_PM_SLEEP is enabled. Make it static to dsa.c. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 58969b9a090c..88da272d20d0 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -256,11 +256,6 @@ static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p); } -static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p) -{ - return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; -} - static inline u8 dsa_upstream_port(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; -- cgit v1.2.3 From 7051b88a35c7dde5705923833117e14f9cc17d92 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 18 Jul 2017 15:59:27 -0700 Subject: net: make dev_close and related functions void There is no useful return value from dev_close. All paths return 0. Change dev_close and helper functions to void. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c60351b84323..614642eb7eb7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2432,8 +2432,8 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); int dev_open(struct net_device *dev); -int dev_close(struct net_device *dev); -int dev_close_many(struct list_head *head, bool unlink); +void dev_close(struct net_device *dev); +void dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); int dev_queue_xmit(struct sk_buff *skb); -- cgit v1.2.3 From 727f8914477e4642c7d1ff381667cdc4178b40c6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 21 Jul 2017 10:39:26 +0100 Subject: rxrpc: Expose UAPI definitions to userspace Move UAPI definitions from the internal header and place them in a UAPI header file so that userspace can make use of them. Signed-off-by: David Howells --- include/linux/rxrpc.h | 79 --------------------------------------------- include/uapi/linux/rxrpc.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 79 deletions(-) delete mode 100644 include/linux/rxrpc.h create mode 100644 include/uapi/linux/rxrpc.h (limited to 'include') diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h deleted file mode 100644 index 7343f71783dc..000000000000 --- a/include/linux/rxrpc.h +++ /dev/null @@ -1,79 +0,0 @@ -/* AF_RXRPC parameters - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _LINUX_RXRPC_H -#define _LINUX_RXRPC_H - -#include -#include - -/* - * RxRPC socket address - */ -struct sockaddr_rxrpc { - sa_family_t srx_family; /* address family */ - u16 srx_service; /* service desired */ - u16 transport_type; /* type of transport socket (SOCK_DGRAM) */ - u16 transport_len; /* length of transport address */ - union { - sa_family_t family; /* transport address family */ - struct sockaddr_in sin; /* IPv4 transport address */ - struct sockaddr_in6 sin6; /* IPv6 transport address */ - } transport; -}; - -/* - * RxRPC socket options - */ -#define RXRPC_SECURITY_KEY 1 /* [clnt] set client security key */ -#define RXRPC_SECURITY_KEYRING 2 /* [srvr] set ring of server security keys */ -#define RXRPC_EXCLUSIVE_CONNECTION 3 /* Deprecated; use RXRPC_EXCLUSIVE_CALL instead */ -#define RXRPC_MIN_SECURITY_LEVEL 4 /* minimum security level */ -#define RXRPC_UPGRADEABLE_SERVICE 5 /* Upgrade service[0] -> service[1] */ -#define RXRPC_SUPPORTED_CMSG 6 /* Get highest supported control message type */ - -/* - * RxRPC control messages - * - If neither abort or accept are specified, the message is a data message. - * - terminal messages mean that a user call ID tag can be recycled - * - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg() - */ -enum rxrpc_cmsg_type { - RXRPC_USER_CALL_ID = 1, /* sr: user call ID specifier */ - RXRPC_ABORT = 2, /* sr: abort request / notification [terminal] */ - RXRPC_ACK = 3, /* -r: [Service] RPC op final ACK received [terminal] */ - RXRPC_NET_ERROR = 5, /* -r: network error received [terminal] */ - RXRPC_BUSY = 6, /* -r: server busy received [terminal] */ - RXRPC_LOCAL_ERROR = 7, /* -r: local error generated [terminal] */ - RXRPC_NEW_CALL = 8, /* -r: [Service] new incoming call notification */ - RXRPC_ACCEPT = 9, /* s-: [Service] accept request */ - RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */ - RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */ - RXRPC_TX_LENGTH = 12, /* s-: Total length of Tx data */ - RXRPC__SUPPORTED -}; - -/* - * RxRPC security levels - */ -#define RXRPC_SECURITY_PLAIN 0 /* plain secure-checksummed packets only */ -#define RXRPC_SECURITY_AUTH 1 /* authenticated packets */ -#define RXRPC_SECURITY_ENCRYPT 2 /* encrypted packets */ - -/* - * RxRPC security indices - */ -#define RXRPC_SECURITY_NONE 0 /* no security protocol */ -#define RXRPC_SECURITY_RXKAD 2 /* kaserver or kerberos 4 */ -#define RXRPC_SECURITY_RXGK 4 /* gssapi-based */ -#define RXRPC_SECURITY_RXK5 5 /* kerberos 5 */ - -#endif /* _LINUX_RXRPC_H */ diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h new file mode 100644 index 000000000000..08e2fb9c70ae --- /dev/null +++ b/include/uapi/linux/rxrpc.h @@ -0,0 +1,80 @@ +/* Types and definitions for AF_RXRPC. + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_RXRPC_H +#define _UAPI_LINUX_RXRPC_H + +#include +#include +#include + +/* + * RxRPC socket address + */ +struct sockaddr_rxrpc { + sa_family_t srx_family; /* address family */ + u16 srx_service; /* service desired */ + u16 transport_type; /* type of transport socket (SOCK_DGRAM) */ + u16 transport_len; /* length of transport address */ + union { + sa_family_t family; /* transport address family */ + struct sockaddr_in sin; /* IPv4 transport address */ + struct sockaddr_in6 sin6; /* IPv6 transport address */ + } transport; +}; + +/* + * RxRPC socket options + */ +#define RXRPC_SECURITY_KEY 1 /* [clnt] set client security key */ +#define RXRPC_SECURITY_KEYRING 2 /* [srvr] set ring of server security keys */ +#define RXRPC_EXCLUSIVE_CONNECTION 3 /* Deprecated; use RXRPC_EXCLUSIVE_CALL instead */ +#define RXRPC_MIN_SECURITY_LEVEL 4 /* minimum security level */ +#define RXRPC_UPGRADEABLE_SERVICE 5 /* Upgrade service[0] -> service[1] */ +#define RXRPC_SUPPORTED_CMSG 6 /* Get highest supported control message type */ + +/* + * RxRPC control messages + * - If neither abort or accept are specified, the message is a data message. + * - terminal messages mean that a user call ID tag can be recycled + * - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg() + */ +enum rxrpc_cmsg_type { + RXRPC_USER_CALL_ID = 1, /* sr: user call ID specifier */ + RXRPC_ABORT = 2, /* sr: abort request / notification [terminal] */ + RXRPC_ACK = 3, /* -r: [Service] RPC op final ACK received [terminal] */ + RXRPC_NET_ERROR = 5, /* -r: network error received [terminal] */ + RXRPC_BUSY = 6, /* -r: server busy received [terminal] */ + RXRPC_LOCAL_ERROR = 7, /* -r: local error generated [terminal] */ + RXRPC_NEW_CALL = 8, /* -r: [Service] new incoming call notification */ + RXRPC_ACCEPT = 9, /* s-: [Service] accept request */ + RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */ + RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */ + RXRPC_TX_LENGTH = 12, /* s-: Total length of Tx data */ + RXRPC__SUPPORTED +}; + +/* + * RxRPC security levels + */ +#define RXRPC_SECURITY_PLAIN 0 /* plain secure-checksummed packets only */ +#define RXRPC_SECURITY_AUTH 1 /* authenticated packets */ +#define RXRPC_SECURITY_ENCRYPT 2 /* encrypted packets */ + +/* + * RxRPC security indices + */ +#define RXRPC_SECURITY_NONE 0 /* no security protocol */ +#define RXRPC_SECURITY_RXKAD 2 /* kaserver or kerberos 4 */ +#define RXRPC_SECURITY_RXGK 4 /* gssapi-based */ +#define RXRPC_SECURITY_RXK5 5 /* kerberos 5 */ + +#endif /* _UAPI_LINUX_RXRPC_H */ -- cgit v1.2.3 From ddc6c70f07bb1f6dd39a2c6c430f7b4fa95199c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 21 Jul 2017 10:07:10 +0100 Subject: rxrpc: Move the packet.h include file into net/rxrpc/ Move the protocol description header file into net/rxrpc/ and rename it to protocol.h. It's no longer necessary to expose it as packets are no longer exposed to kernel services (such as AFS) that use the facility. The abort codes are transferred to the UAPI header instead as we pass these back to userspace and also to kernel services. Signed-off-by: David Howells --- include/rxrpc/packet.h | 235 --------------------------------------------- include/uapi/linux/rxrpc.h | 44 +++++++++ 2 files changed, 44 insertions(+), 235 deletions(-) delete mode 100644 include/rxrpc/packet.h (limited to 'include') diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h deleted file mode 100644 index a2dcfb850b9f..000000000000 --- a/include/rxrpc/packet.h +++ /dev/null @@ -1,235 +0,0 @@ -/* packet.h: Rx packet layout and definitions - * - * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _LINUX_RXRPC_PACKET_H -#define _LINUX_RXRPC_PACKET_H - -typedef u32 rxrpc_seq_t; /* Rx message sequence number */ -typedef u32 rxrpc_serial_t; /* Rx message serial number */ -typedef __be32 rxrpc_seq_net_t; /* on-the-wire Rx message sequence number */ -typedef __be32 rxrpc_serial_net_t; /* on-the-wire Rx message serial number */ - -/*****************************************************************************/ -/* - * on-the-wire Rx packet header - * - all multibyte fields should be in network byte order - */ -struct rxrpc_wire_header { - __be32 epoch; /* client boot timestamp */ -#define RXRPC_RANDOM_EPOCH 0x80000000 /* Random if set, date-based if not */ - - __be32 cid; /* connection and channel ID */ -#define RXRPC_MAXCALLS 4 /* max active calls per conn */ -#define RXRPC_CHANNELMASK (RXRPC_MAXCALLS-1) /* mask for channel ID */ -#define RXRPC_CIDMASK (~RXRPC_CHANNELMASK) /* mask for connection ID */ -#define RXRPC_CIDSHIFT ilog2(RXRPC_MAXCALLS) /* shift for connection ID */ -#define RXRPC_CID_INC (1 << RXRPC_CIDSHIFT) /* connection ID increment */ - - __be32 callNumber; /* call ID (0 for connection-level packets) */ - __be32 seq; /* sequence number of pkt in call stream */ - __be32 serial; /* serial number of pkt sent to network */ - - uint8_t type; /* packet type */ -#define RXRPC_PACKET_TYPE_DATA 1 /* data */ -#define RXRPC_PACKET_TYPE_ACK 2 /* ACK */ -#define RXRPC_PACKET_TYPE_BUSY 3 /* call reject */ -#define RXRPC_PACKET_TYPE_ABORT 4 /* call/connection abort */ -#define RXRPC_PACKET_TYPE_ACKALL 5 /* ACK all outstanding packets on call */ -#define RXRPC_PACKET_TYPE_CHALLENGE 6 /* connection security challenge (SRVR->CLNT) */ -#define RXRPC_PACKET_TYPE_RESPONSE 7 /* connection secutity response (CLNT->SRVR) */ -#define RXRPC_PACKET_TYPE_DEBUG 8 /* debug info request */ -#define RXRPC_PACKET_TYPE_VERSION 13 /* version string request */ -#define RXRPC_N_PACKET_TYPES 14 /* number of packet types (incl type 0) */ - - uint8_t flags; /* packet flags */ -#define RXRPC_CLIENT_INITIATED 0x01 /* signifies a packet generated by a client */ -#define RXRPC_REQUEST_ACK 0x02 /* request an unconditional ACK of this packet */ -#define RXRPC_LAST_PACKET 0x04 /* the last packet from this side for this call */ -#define RXRPC_MORE_PACKETS 0x08 /* more packets to come */ -#define RXRPC_JUMBO_PACKET 0x20 /* [DATA] this is a jumbo packet */ -#define RXRPC_SLOW_START_OK 0x20 /* [ACK] slow start supported */ - - uint8_t userStatus; /* app-layer defined status */ -#define RXRPC_USERSTATUS_SERVICE_UPGRADE 0x01 /* AuriStor service upgrade request */ - - uint8_t securityIndex; /* security protocol ID */ - union { - __be16 _rsvd; /* reserved */ - __be16 cksum; /* kerberos security checksum */ - }; - __be16 serviceId; /* service ID */ - -} __packed; - -#define RXRPC_SUPPORTED_PACKET_TYPES ( \ - (1 << RXRPC_PACKET_TYPE_DATA) | \ - (1 << RXRPC_PACKET_TYPE_ACK) | \ - (1 << RXRPC_PACKET_TYPE_BUSY) | \ - (1 << RXRPC_PACKET_TYPE_ABORT) | \ - (1 << RXRPC_PACKET_TYPE_ACKALL) | \ - (1 << RXRPC_PACKET_TYPE_CHALLENGE) | \ - (1 << RXRPC_PACKET_TYPE_RESPONSE) | \ - /*(1 << RXRPC_PACKET_TYPE_DEBUG) | */ \ - (1 << RXRPC_PACKET_TYPE_VERSION)) - -/*****************************************************************************/ -/* - * jumbo packet secondary header - * - can be mapped to read header by: - * - new_serial = serial + 1 - * - new_seq = seq + 1 - * - new_flags = j_flags - * - new__rsvd = j__rsvd - * - duplicating all other fields - */ -struct rxrpc_jumbo_header { - uint8_t flags; /* packet flags (as per rxrpc_header) */ - uint8_t pad; - union { - __be16 _rsvd; /* reserved */ - __be16 cksum; /* kerberos security checksum */ - }; -}; - -#define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */ -#define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header)) - -/*****************************************************************************/ -/* - * on-the-wire Rx ACK packet data payload - * - all multibyte fields should be in network byte order - */ -struct rxrpc_ackpacket { - __be16 bufferSpace; /* number of packet buffers available */ - __be16 maxSkew; /* diff between serno being ACK'd and highest serial no - * received */ - __be32 firstPacket; /* sequence no of first ACK'd packet in attached list */ - __be32 previousPacket; /* sequence no of previous packet received */ - __be32 serial; /* serial no of packet that prompted this ACK */ - - uint8_t reason; /* reason for ACK */ -#define RXRPC_ACK_REQUESTED 1 /* ACK was requested on packet */ -#define RXRPC_ACK_DUPLICATE 2 /* duplicate packet received */ -#define RXRPC_ACK_OUT_OF_SEQUENCE 3 /* out of sequence packet received */ -#define RXRPC_ACK_EXCEEDS_WINDOW 4 /* packet received beyond end of ACK window */ -#define RXRPC_ACK_NOSPACE 5 /* packet discarded due to lack of buffer space */ -#define RXRPC_ACK_PING 6 /* keep alive ACK */ -#define RXRPC_ACK_PING_RESPONSE 7 /* response to RXRPC_ACK_PING */ -#define RXRPC_ACK_DELAY 8 /* nothing happened since received packet */ -#define RXRPC_ACK_IDLE 9 /* ACK due to fully received ACK window */ -#define RXRPC_ACK__INVALID 10 /* Representation of invalid ACK reason */ - - uint8_t nAcks; /* number of ACKs */ -#define RXRPC_MAXACKS 255 - - uint8_t acks[0]; /* list of ACK/NAKs */ -#define RXRPC_ACK_TYPE_NACK 0 -#define RXRPC_ACK_TYPE_ACK 1 - -} __packed; - -/* Some ACKs refer to specific packets and some are general and can be updated. */ -#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED) | \ - (1 << RXRPC_ACK_PING_RESPONSE) | \ - (1 << RXRPC_ACK_DELAY) | \ - (1 << RXRPC_ACK_IDLE)) - - -/* - * ACK packets can have a further piece of information tagged on the end - */ -struct rxrpc_ackinfo { - __be32 rxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */ - __be32 maxMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */ - __be32 rwind; /* Rx window size (packets) [AFS 3.4] */ - __be32 jumbo_max; /* max packets to stick into a jumbo packet [AFS 3.5] */ -}; - -/*****************************************************************************/ -/* - * Kerberos security type-2 challenge packet - */ -struct rxkad_challenge { - __be32 version; /* version of this challenge type */ - __be32 nonce; /* encrypted random number */ - __be32 min_level; /* minimum security level */ - __be32 __padding; /* padding to 8-byte boundary */ -} __packed; - -/*****************************************************************************/ -/* - * Kerberos security type-2 response packet - */ -struct rxkad_response { - __be32 version; /* version of this response type */ - __be32 __pad; - - /* encrypted bit of the response */ - struct { - __be32 epoch; /* current epoch */ - __be32 cid; /* parent connection ID */ - __be32 checksum; /* checksum */ - __be32 securityIndex; /* security type */ - __be32 call_id[4]; /* encrypted call IDs */ - __be32 inc_nonce; /* challenge nonce + 1 */ - __be32 level; /* desired level */ - } encrypted; - - __be32 kvno; /* Kerberos key version number */ - __be32 ticket_len; /* Kerberos ticket length */ -} __packed; - -/*****************************************************************************/ -/* - * RxRPC-level abort codes - */ -#define RX_CALL_DEAD -1 /* call/conn has been inactive and is shut down */ -#define RX_INVALID_OPERATION -2 /* invalid operation requested / attempted */ -#define RX_CALL_TIMEOUT -3 /* call timeout exceeded */ -#define RX_EOF -4 /* unexpected end of data on read op */ -#define RX_PROTOCOL_ERROR -5 /* low-level protocol error */ -#define RX_USER_ABORT -6 /* generic user abort */ -#define RX_ADDRINUSE -7 /* UDP port in use */ -#define RX_DEBUGI_BADTYPE -8 /* bad debugging packet type */ - -/* - * (un)marshalling abort codes (rxgen) - */ -#define RXGEN_CC_MARSHAL -450 -#define RXGEN_CC_UNMARSHAL -451 -#define RXGEN_SS_MARSHAL -452 -#define RXGEN_SS_UNMARSHAL -453 -#define RXGEN_DECODE -454 -#define RXGEN_OPCODE -455 -#define RXGEN_SS_XDRFREE -456 -#define RXGEN_CC_XDRFREE -457 - -/* - * Rx kerberos security abort codes - * - unfortunately we have no generalised security abort codes to say things - * like "unsupported security", so we have to use these instead and hope the - * other side understands - */ -#define RXKADINCONSISTENCY 19270400 /* security module structure inconsistent */ -#define RXKADPACKETSHORT 19270401 /* packet too short for security challenge */ -#define RXKADLEVELFAIL 19270402 /* security level negotiation failed */ -#define RXKADTICKETLEN 19270403 /* ticket length too short or too long */ -#define RXKADOUTOFSEQUENCE 19270404 /* packet had bad sequence number */ -#define RXKADNOAUTH 19270405 /* caller not authorised */ -#define RXKADBADKEY 19270406 /* illegal key: bad parity or weak */ -#define RXKADBADTICKET 19270407 /* security object was passed a bad ticket */ -#define RXKADUNKNOWNKEY 19270408 /* ticket contained unknown key version number */ -#define RXKADEXPIRED 19270409 /* authentication expired */ -#define RXKADSEALEDINCON 19270410 /* sealed data inconsistent */ -#define RXKADDATALEN 19270411 /* user data too long */ -#define RXKADILLEGALLEVEL 19270412 /* caller not authorised to use encrypted conns */ - -#endif /* _LINUX_RXRPC_PACKET_H */ diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h index 08e2fb9c70ae..9656aad8f8f7 100644 --- a/include/uapi/linux/rxrpc.h +++ b/include/uapi/linux/rxrpc.h @@ -77,4 +77,48 @@ enum rxrpc_cmsg_type { #define RXRPC_SECURITY_RXGK 4 /* gssapi-based */ #define RXRPC_SECURITY_RXK5 5 /* kerberos 5 */ +/* + * RxRPC-level abort codes + */ +#define RX_CALL_DEAD -1 /* call/conn has been inactive and is shut down */ +#define RX_INVALID_OPERATION -2 /* invalid operation requested / attempted */ +#define RX_CALL_TIMEOUT -3 /* call timeout exceeded */ +#define RX_EOF -4 /* unexpected end of data on read op */ +#define RX_PROTOCOL_ERROR -5 /* low-level protocol error */ +#define RX_USER_ABORT -6 /* generic user abort */ +#define RX_ADDRINUSE -7 /* UDP port in use */ +#define RX_DEBUGI_BADTYPE -8 /* bad debugging packet type */ + +/* + * (un)marshalling abort codes (rxgen) + */ +#define RXGEN_CC_MARSHAL -450 +#define RXGEN_CC_UNMARSHAL -451 +#define RXGEN_SS_MARSHAL -452 +#define RXGEN_SS_UNMARSHAL -453 +#define RXGEN_DECODE -454 +#define RXGEN_OPCODE -455 +#define RXGEN_SS_XDRFREE -456 +#define RXGEN_CC_XDRFREE -457 + +/* + * Rx kerberos security abort codes + * - unfortunately we have no generalised security abort codes to say things + * like "unsupported security", so we have to use these instead and hope the + * other side understands + */ +#define RXKADINCONSISTENCY 19270400 /* security module structure inconsistent */ +#define RXKADPACKETSHORT 19270401 /* packet too short for security challenge */ +#define RXKADLEVELFAIL 19270402 /* security level negotiation failed */ +#define RXKADTICKETLEN 19270403 /* ticket length too short or too long */ +#define RXKADOUTOFSEQUENCE 19270404 /* packet had bad sequence number */ +#define RXKADNOAUTH 19270405 /* caller not authorised */ +#define RXKADBADKEY 19270406 /* illegal key: bad parity or weak */ +#define RXKADBADTICKET 19270407 /* security object was passed a bad ticket */ +#define RXKADUNKNOWNKEY 19270408 /* ticket contained unknown key version number */ +#define RXKADEXPIRED 19270409 /* authentication expired */ +#define RXKADSEALEDINCON 19270410 /* sealed data inconsistent */ +#define RXKADDATALEN 19270411 /* user data too long */ +#define RXKADILLEGALLEVEL 19270412 /* caller not authorised to use encrypted conns */ + #endif /* _UAPI_LINUX_RXRPC_H */ -- cgit v1.2.3 From 9f08ea848117ab521efcfd3e004d8e1a0edc640c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 18 Jul 2017 20:18:09 +0200 Subject: netfilter: nf_tables: keep chain counters away from hot path These chain counters are only used by the iptables-compat tool, that allow users to use the x_tables extensions from the existing nf_tables framework. This patch makes nf_tables by ~5% for the general usecase, ie. native nft users, where no chain counters are used at all. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 8f690effec37..424684c33771 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -49,6 +49,8 @@ struct nft_payload_set { }; extern const struct nft_expr_ops nft_payload_fast_ops; + +extern struct static_key_false nft_counters_enabled; extern struct static_key_false nft_trace_enabled; #endif /* _NET_NF_TABLES_CORE_H */ -- cgit v1.2.3 From 784b4e612d42a2b7578d7fab2ed78940e10536bc Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 19 Jul 2017 16:32:23 +0200 Subject: netfilter: nf_tables: Attach process info to NFT_MSG_NEWGEN notifications This is helpful for 'nft monitor' to track which process caused a given change to the ruleset. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 683f6f88fcac..6f0a950e21c3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1221,6 +1221,8 @@ enum nft_objref_attributes { enum nft_gen_attributes { NFTA_GEN_UNSPEC, NFTA_GEN_ID, + NFTA_GEN_PROC_PID, + NFTA_GEN_PROC_NAME, __NFTA_GEN_MAX }; #define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) -- cgit v1.2.3 From d764a122cc7af7ab1c40c08745f0fcd33cc2f7db Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 21 Jul 2017 12:49:28 +0200 Subject: net: add new netdevice feature for offload of RX port for UDP tunnels This adds a new netdevice feature, so that the offloading of RX port for UDP tunnels can be disabled by the administrator on some netdevices, using the "rx-udp_tunnel-port-offload" feature in ethtool. This feature is set for all devices that provide ndo_udp_tunnel_add. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index ebd273627334..dc8b4896b77b 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -75,6 +75,7 @@ enum { NETIF_F_HW_TC_BIT, /* Offload TC infrastructure */ NETIF_F_HW_ESP_BIT, /* Hardware ESP transformation offload */ NETIF_F_HW_ESP_TX_CSUM_BIT, /* ESP with TX checksum offload */ + NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */ /* * Add your fresh new feature above and remember to update @@ -138,6 +139,7 @@ enum { #define NETIF_F_HW_TC __NETIF_F(HW_TC) #define NETIF_F_HW_ESP __NETIF_F(HW_ESP) #define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM) +#define NETIF_F_RX_UDP_TUNNEL_PORT __NETIF_F(RX_UDP_TUNNEL_PORT) #define for_each_netdev_feature(mask_addr, bit) \ for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) -- cgit v1.2.3 From 296d8ee37c50f139d934bdefbab85509b2e4a525 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 21 Jul 2017 12:49:30 +0200 Subject: net: add infrastructure to un-offload UDP tunnel port This adds a new NETDEV_UDP_TUNNEL_DROP_INFO event, similar to NETDEV_UDP_TUNNEL_PUSH_INFO, to signal to un-offload ports. This also adds udp_tunnel_drop_rx_port(), which calls ndo_udp_tunnel_del. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/udp_tunnel.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 614642eb7eb7..3a3cdc1b1f31 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2317,6 +2317,7 @@ struct netdev_lag_lower_state_info { #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B #define NETDEV_UDP_TUNNEL_PUSH_INFO 0x001C +#define NETDEV_UDP_TUNNEL_DROP_INFO 0x001D #define NETDEV_CHANGE_TX_QUEUE_LEN 0x001E int register_netdevice_notifier(struct notifier_block *nb); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 02c5be037451..10cce0dd4450 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -115,6 +115,8 @@ struct udp_tunnel_info { /* Notify network devices of offloadable types */ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, unsigned short type); +void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, + unsigned short type); void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); @@ -124,6 +126,12 @@ static inline void udp_tunnel_get_rx_info(struct net_device *dev) call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); } +static inline void udp_tunnel_drop_rx_info(struct net_device *dev) +{ + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); +} + /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, -- cgit v1.2.3 From cb1844c47279fb59129f8a021a0b09bcf2011ad7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:26 +0800 Subject: sctp: remove the typedef sctp_initack_chunk_t This patch is to remove the typedef sctp_initack_chunk_t, and replace with struct sctp_initack_chunk in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 913474dfc96c..05c2099be537 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -336,7 +336,10 @@ struct sctp_hmac_algo_param { * The INIT ACK chunk is used to acknowledge the initiation of an SCTP * association. */ -typedef struct sctp_init_chunk sctp_initack_chunk_t; +struct sctp_initack_chunk { + struct sctp_chunkhdr chunk_hdr; + struct sctp_inithdr init_hdr; +}; /* Section 3.3.3.1 State Cookie (7) */ typedef struct sctp_cookie_param { -- cgit v1.2.3 From f48ef4c7f7979e8e658b7e038a82f096ab292d70 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:27 +0800 Subject: sctp: remove the typedef sctp_cookie_param_t This patch is to remove the typedef sctp_cookie_param_t, and replace with struct sctp_cookie_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 05c2099be537..9e77abda2111 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -342,10 +342,10 @@ struct sctp_initack_chunk { }; /* Section 3.3.3.1 State Cookie (7) */ -typedef struct sctp_cookie_param { +struct sctp_cookie_param { struct sctp_paramhdr p; __u8 body[0]; -} sctp_cookie_param_t; +}; /* Section 3.3.3.1 Unrecognized Parameters (8) */ typedef struct sctp_unrecognized_param { -- cgit v1.2.3 From 62e6b7e4ee244f8043c169c049d3b2c2c798cd60 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:28 +0800 Subject: sctp: remove the typedef sctp_unrecognized_param_t This patch is to remove the typedef sctp_unrecognized_param_t, and replace with struct sctp_unrecognized_param in the places where it's using this typedef. It is also to fix some indents in sctp_sf_do_unexpected_init() and sctp_sf_do_5_1B_init(). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 9e77abda2111..c323b3e3ecdb 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -348,10 +348,10 @@ struct sctp_cookie_param { }; /* Section 3.3.3.1 Unrecognized Parameters (8) */ -typedef struct sctp_unrecognized_param { +struct sctp_unrecognized_param { struct sctp_paramhdr param_hdr; struct sctp_paramhdr unrecognized; -} sctp_unrecognized_param_t; +}; -- cgit v1.2.3 From fe9a0fe7210d803adb3d5817da029fe39b8a4133 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:29 +0800 Subject: sctp: remove the typedef sctp_gap_ack_block_t This patch is to remove the typedef sctp_gap_ack_block_t, and replace with struct sctp_gap_ack_block in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index c323b3e3ecdb..b84b8e8340da 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -363,16 +363,16 @@ struct sctp_unrecognized_param { * subsequences of DATA chunks as represented by their TSNs. */ -typedef struct sctp_gap_ack_block { +struct sctp_gap_ack_block { __be16 start; __be16 end; -} sctp_gap_ack_block_t; +}; typedef __be32 sctp_dup_tsn_t; typedef union { - sctp_gap_ack_block_t gab; - sctp_dup_tsn_t dup; + struct sctp_gap_ack_block gab; + sctp_dup_tsn_t dup; } sctp_sack_variable_t; typedef struct sctp_sackhdr { -- cgit v1.2.3 From 9b41515636563ae76e730dbcb97fd303b94ed7d9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:30 +0800 Subject: sctp: remove the typedef sctp_dup_tsn_t This patch is to remove the typedef sctp_dup_tsn_t, and replace with __be32 in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index b84b8e8340da..8faf74eff63d 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -368,11 +368,9 @@ struct sctp_gap_ack_block { __be16 end; }; -typedef __be32 sctp_dup_tsn_t; - typedef union { struct sctp_gap_ack_block gab; - sctp_dup_tsn_t dup; + __be32 dup; } sctp_sack_variable_t; typedef struct sctp_sackhdr { -- cgit v1.2.3 From afd93b7be6e24731d82d9fd84b8a5ea73a68214b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:31 +0800 Subject: sctp: remove the typedef sctp_sack_variable_t This patch is to remove the typedef sctp_sack_variable_t, and replace with union sctp_sack_variable in the places where it's using this typedef. It is also to fix some indents in sctp_acked(). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 8faf74eff63d..8df6ac53f05b 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -368,17 +368,17 @@ struct sctp_gap_ack_block { __be16 end; }; -typedef union { +union sctp_sack_variable { struct sctp_gap_ack_block gab; __be32 dup; -} sctp_sack_variable_t; +}; typedef struct sctp_sackhdr { __be32 cum_tsn_ack; __be32 a_rwnd; __be16 num_gap_ack_blocks; __be16 num_dup_tsns; - sctp_sack_variable_t variable[0]; + union sctp_sack_variable variable[0]; } sctp_sackhdr_t; typedef struct sctp_sack_chunk { -- cgit v1.2.3 From 787310859d8d1a72545db2343fb3ac8f765b0f35 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:32 +0800 Subject: sctp: remove the typedef sctp_sackhdr_t This patch is to remove the typedef sctp_sackhdr_t, and replace with struct sctp_sackhdr in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- include/net/sctp/command.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 8df6ac53f05b..a2e43129d11a 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -373,17 +373,17 @@ union sctp_sack_variable { __be32 dup; }; -typedef struct sctp_sackhdr { +struct sctp_sackhdr { __be32 cum_tsn_ack; __be32 a_rwnd; __be16 num_gap_ack_blocks; __be16 num_dup_tsns; union sctp_sack_variable variable[0]; -} sctp_sackhdr_t; +}; typedef struct sctp_sack_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_sackhdr_t sack_hdr; + struct sctp_sackhdr sack_hdr; } sctp_sack_chunk_t; diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index d4679e7a5ed5..1d5f6ff3f440 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -135,7 +135,7 @@ typedef union { struct sctp_init_chunk *init; struct sctp_ulpevent *ulpevent; struct sctp_packet *packet; - sctp_sackhdr_t *sackh; + struct sctp_sackhdr *sackh; struct sctp_datamsg *msg; } sctp_arg_t; @@ -176,7 +176,7 @@ SCTP_ARG_CONSTRUCTOR(BA, struct sctp_bind_addr *, bp) SCTP_ARG_CONSTRUCTOR(PEER_INIT, struct sctp_init_chunk *, init) SCTP_ARG_CONSTRUCTOR(ULPEVENT, struct sctp_ulpevent *, ulpevent) SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet) -SCTP_ARG_CONSTRUCTOR(SACKH, sctp_sackhdr_t *, sackh) +SCTP_ARG_CONSTRUCTOR(SACKH, struct sctp_sackhdr *, sackh) SCTP_ARG_CONSTRUCTOR(DATAMSG, struct sctp_datamsg *, msg) static inline sctp_arg_t SCTP_FORCE(void) -- cgit v1.2.3 From d4d6c61489e7e4a8944360312e572988889558a8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:33 +0800 Subject: sctp: remove the typedef sctp_sack_chunk_t This patch is to remove the typedef sctp_sack_chunk_t, and replace with struct sctp_sack_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index a2e43129d11a..48f6560dd880 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -381,10 +381,10 @@ struct sctp_sackhdr { union sctp_sack_variable variable[0]; }; -typedef struct sctp_sack_chunk { +struct sctp_sack_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_sackhdr sack_hdr; -} sctp_sack_chunk_t; +}; /* RFC 2960. Section 3.3.5 Heartbeat Request (HEARTBEAT) (4): -- cgit v1.2.3 From 4d2dcdf4e04c938b266f06f271000e4b0f3a288f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:34 +0800 Subject: sctp: remove the typedef sctp_heartbeathdr_t This patch is to remove the typedef sctp_heartbeathdr_t, and replace with struct sctp_heartbeathdr in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 48f6560dd880..6e26b86770f1 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -394,13 +394,13 @@ struct sctp_sack_chunk { * the present association. */ -typedef struct sctp_heartbeathdr { +struct sctp_heartbeathdr { struct sctp_paramhdr info; -} sctp_heartbeathdr_t; +}; typedef struct sctp_heartbeat_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_heartbeathdr_t hb_hdr; + struct sctp_heartbeathdr hb_hdr; } sctp_heartbeat_chunk_t; -- cgit v1.2.3 From 38c00f7482281801d6f7fe410c7a4b61ae25218e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:35 +0800 Subject: sctp: remove the typedef sctp_heartbeat_chunk_t This patch is to remove the typedef sctp_heartbeat_chunk_t, and replace with struct sctp_heartbeat_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 6e26b86770f1..bfda7c66960c 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -398,10 +398,10 @@ struct sctp_heartbeathdr { struct sctp_paramhdr info; }; -typedef struct sctp_heartbeat_chunk { +struct sctp_heartbeat_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_heartbeathdr hb_hdr; -} sctp_heartbeat_chunk_t; +}; /* For the abort and shutdown ACK we must carry the init tag in the -- cgit v1.2.3 From 441ae65ae000dd325a609306bd0cd850df50cfc4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:36 +0800 Subject: sctp: remove the typedef sctp_abort_chunk_t This patch is to remove the typedef sctp_abort_chunk_t, and replace with struct sctp_abort_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index bfda7c66960c..ba007163acfd 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -408,9 +408,9 @@ struct sctp_heartbeat_chunk { * common header. Just the common header is all that is needed with a * chunk descriptor. */ -typedef struct sctp_abort_chunk { +struct sctp_abort_chunk { struct sctp_chunkhdr uh; -} sctp_abort_chunk_t; +}; /* For the graceful shutdown we must carry the tag (in common header) -- cgit v1.2.3 From e42e24c3cc072088756d84ef07b492ac2a3ae2e5 Mon Sep 17 00:00:00 2001 From: Matvejchikov Ilya Date: Mon, 24 Jul 2017 16:02:12 +0400 Subject: tcp: remove redundant argument from tcp_rcv_established() The last (4th) argument of tcp_rcv_established() is redundant as it always equals to skb->len and the skb itself is always passed as 2th agrument. There is no reason to have it. Signed-off-by: Ilya V. Matveychikov Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 4f056ea79df2..12d68335acd4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -362,7 +362,7 @@ void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len); + const struct tcphdr *th); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); -- cgit v1.2.3 From 645874e5807ae5ffa09fba2ccd23f01e4eb16d58 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 26 Jul 2017 06:07:11 -0700 Subject: qed: Add support for Energy efficient ethernet. The patch adds required driver support for reading/configuring the Energy Efficient Ethernet (EEE) parameters. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index ef39c7f40ae6..9f3276271b02 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -161,6 +161,18 @@ enum qed_nvm_images { QED_NVM_IMAGE_FCOE_CFG, }; +struct qed_link_eee_params { + u32 tx_lpi_timer; +#define QED_EEE_1G_ADV BIT(0) +#define QED_EEE_10G_ADV BIT(1) + + /* Capabilities are represented using QED_EEE_*_ADV values */ + u8 adv_caps; + u8 lp_adv_caps; + bool enable; + bool tx_lpi_enable; +}; + enum qed_led_mode { QED_LED_MODE_OFF, QED_LED_MODE_ON, @@ -408,6 +420,7 @@ struct qed_link_params { #define QED_LINK_OVERRIDE_SPEED_FORCED_SPEED BIT(2) #define QED_LINK_OVERRIDE_PAUSE_CONFIG BIT(3) #define QED_LINK_OVERRIDE_LOOPBACK_MODE BIT(4) +#define QED_LINK_OVERRIDE_EEE_CONFIG BIT(5) u32 override_flags; bool autoneg; u32 adv_speeds; @@ -422,6 +435,7 @@ struct qed_link_params { #define QED_LINK_LOOPBACK_EXT BIT(3) #define QED_LINK_LOOPBACK_MAC BIT(4) u32 loopback_mode; + struct qed_link_eee_params eee; }; struct qed_link_output { @@ -437,6 +451,12 @@ struct qed_link_output { u8 port; /* In PORT defs */ bool autoneg; u32 pause_config; + + /* EEE - capability & param */ + bool eee_supported; + bool eee_active; + u8 sup_caps; + struct qed_link_eee_params eee; }; struct qed_probe_params { -- cgit v1.2.3 From 477f2d1460a636abd08f03eafabe0c51366fa5de Mon Sep 17 00:00:00 2001 From: Rahul Verma Date: Wed, 26 Jul 2017 06:07:13 -0700 Subject: qed: Add support for vf coalesce configuration. This patch add the ethtool support to set RX/Tx coalesce value to the VF associated Rx/Tx queues. Signed-off-by: Rahul Verma Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 9f3276271b02..4d59ca16134c 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -694,8 +694,8 @@ struct qed_common_ops { * * @return 0 on success, error otherwise. */ - int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, - u16 qid, u16 sb_id); + int (*set_coalesce)(struct qed_dev *cdev, + u16 rx_coal, u16 tx_coal, void *handle); /** * @brief set_led - Configure LED mode -- cgit v1.2.3 From bf5a94bfe26a9fcd4af91ae6bccd4f3d600d2262 Mon Sep 17 00:00:00 2001 From: Rahul Verma Date: Wed, 26 Jul 2017 06:07:14 -0700 Subject: qed: Read per queue coalesce from hardware Retrieve the actual coalesce value from hardware for every Rx/Tx queue, instead of Rx/Tx coalesce value cached during set coalesce. Signed-off-by: Rahul Verma Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 1 + include/linux/qed/qed_if.h | 11 +---------- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 0eef0a2b1901..d60de4a39810 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -323,6 +323,7 @@ struct qed_eth_ops { int (*configure_arfs_searcher)(struct qed_dev *cdev, bool en_searcher); + int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle); }; const struct qed_eth_ops *qed_get_eth_ops(void); diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 4d59ca16134c..2b4720bb8b40 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -186,6 +186,7 @@ enum qed_led_mode { #define QED_COALESCE_MAX 0xFF #define QED_DEFAULT_RX_USECS 12 +#define QED_DEFAULT_TX_USECS 48 /* forward */ struct qed_dev; @@ -673,16 +674,6 @@ struct qed_common_ops { int (*nvm_get_image)(struct qed_dev *cdev, enum qed_nvm_images type, u8 *buf, u16 len); -/** - * @brief get_coalesce - Get coalesce parameters in usec - * - * @param cdev - * @param rx_coal - Rx coalesce value in usec - * @param tx_coal - Tx coalesce value in usec - * - */ - void (*get_coalesce)(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal); - /** * @brief set_coalesce - Configure Rx coalesce value in usec * -- cgit v1.2.3 From 41822878b2ff40a1f57be81f1a1f0f040847b912 Mon Sep 17 00:00:00 2001 From: Rahul Verma Date: Wed, 26 Jul 2017 06:07:15 -0700 Subject: qed: enhanced per queue max coalesce value. Maximum coalesce per Rx/Tx queue is extended from 255 to 511. Signed-off-by: Rahul Verma Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 2b4720bb8b40..cc646ca97974 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -184,7 +184,7 @@ enum qed_led_mode { #define DIRECT_REG_RD(reg_addr) readl((void __iomem *)(reg_addr)) -#define QED_COALESCE_MAX 0xFF +#define QED_COALESCE_MAX 0x1FF #define QED_DEFAULT_RX_USECS 12 #define QED_DEFAULT_TX_USECS 48 -- cgit v1.2.3 From 1a5f3da20bd966220931239fbd31e6ac6ff42251 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Ravipati Date: Thu, 27 Jul 2017 16:47:26 -0700 Subject: net: ethtool: add support for forward error correction modes Forward Error Correction (FEC) modes i.e Base-R and Reed-Solomon modes are introduced in 25G/40G/100G standards for providing good BER at high speeds. Various networking devices which support 25G/40G/100G provides ability to manage supported FEC modes and the lack of FEC encoding control and reporting today is a source for interoperability issues for many vendors. FEC capability as well as specific FEC mode i.e. Base-R or RS modes can be requested or advertised through bits D44:47 of base link codeword. This patch set intends to provide option under ethtool to manage and report FEC encoding settings for networking devices as per IEEE 802.3 bj, bm and by specs. set-fec/show-fec option(s) are designed to provide control and report the FEC encoding on the link. SET FEC option: root@tor: ethtool --set-fec swp1 encoding [off | RS | BaseR | auto] Encoding: Types of encoding Off : Turning off any encoding RS : enforcing RS-FEC encoding on supported speeds BaseR : enforcing Base R encoding on supported speeds Auto : IEEE defaults for the speed/medium combination Here are a few examples of what we would expect if encoding=auto: - if autoneg is on, we are expecting FEC to be negotiated as on or off as long as protocol supports it - if the hardware is capable of detecting the FEC encoding on it's receiver it will reconfigure its encoder to match - in absence of the above, the configuration would be set to IEEE defaults. >From our understanding , this is essentially what most hardware/driver combinations are doing today in the absence of a way for users to control the behavior. SHOW FEC option: root@tor: ethtool --show-fec swp1 FEC parameters for swp1: Active FEC encodings: RS Configured FEC encodings: RS | BaseR ETHTOOL DEVNAME output modification: ethtool devname output: root@tor:~# ethtool swp1 Settings for swp1: root@hpe-7712-03:~# ethtool swp18 Settings for swp18: Supported ports: [ FIBRE ] Supported link modes: 40000baseCR4/Full 40000baseSR4/Full 40000baseLR4/Full 100000baseSR4/Full 100000baseCR4/Full 100000baseLR4_ER4/Full Supported pause frame use: No Supports auto-negotiation: Yes Supported FEC modes: [RS | BaseR | None | Not reported] Advertised link modes: Not reported Advertised pause frame use: No Advertised auto-negotiation: No Advertised FEC modes: [RS | BaseR | None | Not reported] <<<< One or more FEC modes Speed: 100000Mb/s Duplex: Full Port: FIBRE PHYAD: 106 Transceiver: internal Auto-negotiation: off Link detected: yes This patch includes following changes a) New ETHTOOL_SFECPARAM/SFECPARAM API, handled by the new get_fecparam/set_fecparam callbacks, provides support for configuration of forward error correction modes. b) Link mode bits for FEC modes i.e. None (No FEC mode), RS, BaseR/FC are defined so that users can configure these fec modes for supported and advertising fields as part of link autonegotiation. Signed-off-by: Vidya Sagar Ravipati Signed-off-by: Dustin Byford Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/ethtool.h | 4 ++++ include/uapi/linux/ethtool.h | 48 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 83cc9863444b..afdbb701fdb4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -374,5 +374,9 @@ struct ethtool_ops { struct ethtool_link_ksettings *); int (*set_link_ksettings)(struct net_device *, const struct ethtool_link_ksettings *); + int (*get_fecparam)(struct net_device *, + struct ethtool_fecparam *); + int (*set_fecparam)(struct net_device *, + struct ethtool_fecparam *); }; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 7d4a594d5d58..9c041dae8e2c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1238,6 +1238,47 @@ struct ethtool_per_queue_op { char data[]; }; +/** + * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters + * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM + * @active_fec: FEC mode which is active on porte + * @fec: Bitmask of supported/configured FEC modes + * @rsvd: Reserved for future extensions. i.e FEC bypass feature. + * + * Drivers should reject a non-zero setting of @autoneg when + * autoneogotiation is disabled (or not supported) for the link. + * + */ +struct ethtool_fecparam { + __u32 cmd; + /* bitmask of FEC modes */ + __u32 active_fec; + __u32 fec; + __u32 reserved; +}; + +/** + * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration + * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported + * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver + * @ETHTOOL_FEC_OFF: No FEC Mode + * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode + * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode + */ +enum ethtool_fec_config_bits { + ETHTOOL_FEC_NONE_BIT, + ETHTOOL_FEC_AUTO_BIT, + ETHTOOL_FEC_OFF_BIT, + ETHTOOL_FEC_RS_BIT, + ETHTOOL_FEC_BASER_BIT, +}; + +#define ETHTOOL_FEC_NONE (1 << ETHTOOL_FEC_NONE_BIT) +#define ETHTOOL_FEC_AUTO (1 << ETHTOOL_FEC_AUTO_BIT) +#define ETHTOOL_FEC_OFF (1 << ETHTOOL_FEC_OFF_BIT) +#define ETHTOOL_FEC_RS (1 << ETHTOOL_FEC_RS_BIT) +#define ETHTOOL_FEC_BASER (1 << ETHTOOL_FEC_BASER_BIT) + /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* DEPRECATED, Get settings. * Please use ETHTOOL_GLINKSETTINGS @@ -1330,6 +1371,8 @@ struct ethtool_per_queue_op { #define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */ #define ETHTOOL_PHY_GTUNABLE 0x0000004e /* Get PHY tunable configuration */ #define ETHTOOL_PHY_STUNABLE 0x0000004f /* Set PHY tunable configuration */ +#define ETHTOOL_GFECPARAM 0x00000050 /* Get FEC settings */ +#define ETHTOOL_SFECPARAM 0x00000051 /* Set FEC settings */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET @@ -1387,6 +1430,9 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_2500baseT_Full_BIT = 47, ETHTOOL_LINK_MODE_5000baseT_Full_BIT = 48, + ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49, + ETHTOOL_LINK_MODE_FEC_RS_BIT = 50, + ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51, /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* @@ -1395,7 +1441,7 @@ enum ethtool_link_mode_bit_indices { */ __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + = ETHTOOL_LINK_MODE_FEC_BASER_BIT, }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ -- cgit v1.2.3 From 64c83d837329531252a1a0f0dfdd4fd607e1d8e9 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 30 Jul 2017 13:24:49 -0400 Subject: net netlink: Add new type NLA_BITFIELD32 Generic bitflags attribute content sent to the kernel by user. With this netlink attr type the user can either set or unset a flag in the kernel. The value is a bitmap that defines the bit values being set The selector is a bitmask that defines which value bit is to be considered. A check is made to ensure the rules that a kernel subsystem always conforms to bitflags the kernel already knows about. i.e if the user tries to set a bit flag that is not understood then the _it will be rejected_. In the most basic form, the user specifies the attribute policy as: [ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags }, where myvalidflags is the bit mask of the flags the kernel understands. If the user _does not_ provide myvalidflags then the attribute will also be rejected. Examples: value = 0x0, and selector = 0x1 implies we are selecting bit 1 and we want to set its value to 0. value = 0x2, and selector = 0x2 implies we are selecting bit 2 and we want to set its value to 1. Suggested-by: Jiri Pirko Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/netlink.h | 16 ++++++++++++++++ include/uapi/linux/netlink.h | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index ef8e6c3a80a6..82dd298b40c7 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -178,6 +178,7 @@ enum { NLA_S16, NLA_S32, NLA_S64, + NLA_BITFIELD32, __NLA_TYPE_MAX, }; @@ -206,6 +207,7 @@ enum { * NLA_MSECS Leaving the length field zero will verify the * given type fits, using it verifies minimum length * just like "All other" + * NLA_BITFIELD32 A 32-bit bitmap/bitselector attribute * All other Minimum length of attribute payload * * Example: @@ -213,11 +215,13 @@ enum { * [ATTR_FOO] = { .type = NLA_U16 }, * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ }, * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, + * [ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags }, * }; */ struct nla_policy { u16 type; u16 len; + void *validation_data; }; /** @@ -1202,6 +1206,18 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla) return tmp; } +/** + * nla_get_bitfield32 - return payload of 32 bitfield attribute + * @nla: nla_bitfield32 attribute + */ +static inline struct nla_bitfield32 nla_get_bitfield32(const struct nlattr *nla) +{ + struct nla_bitfield32 tmp; + + nla_memcpy(&tmp, nla, sizeof(tmp)); + return tmp; +} + /** * nla_memdup - duplicate attribute memory (kmemdup) * @src: netlink attribute to duplicate from diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index f86127a46cfc..f4fc9c9e123d 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -226,5 +226,22 @@ struct nlattr { #define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) #define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) +/* Generic 32 bitflags attribute content sent to the kernel. + * + * The value is a bitmap that defines the values being set + * The selector is a bitmask that defines which value is legit + * + * Examples: + * value = 0x0, and selector = 0x1 + * implies we are selecting bit 1 and we want to set its value to 0. + * + * value = 0x2, and selector = 0x2 + * implies we are selecting bit 2 and we want to set its value to 1. + * + */ +struct nla_bitfield32 { + __u32 value; + __u32 selector; +}; #endif /* _UAPI__LINUX_NETLINK_H */ -- cgit v1.2.3 From 90825b23a887f06f6c05bdde77b200c5fe9b6217 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 30 Jul 2017 13:24:51 -0400 Subject: net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch When you dump hundreds of thousands of actions, getting only 32 per dump batch even when the socket buffer and memory allocations allow is inefficient. With this change, the user will get as many as possibly fitting within the given constraints available to the kernel. The top level action TLV space is extended. An attribute TCA_ROOT_FLAGS is used to carry flags; flag TCA_FLAG_LARGE_DUMP_ON is set by the user indicating the user is capable of processing these large dumps. Older user space which doesnt set this flag doesnt get the large (than 32) batches. The kernel uses the TCA_ROOT_COUNT attribute to tell the user how many actions are put in a single batch. As such user space app knows how long to iterate (independent of the type of action being dumped) instead of hardcoded maximum of 32 thus maintaining backward compat. Some results dumping 1.5M actions below: first an unpatched tc which doesnt understand these features... prompt$ time -p tc actions ls action gact | grep index | wc -l 1500000 real 1388.43 user 2.07 sys 1386.79 Now lets see a patched tc which sets the correct flags when requesting a dump: prompt$ time -p updatedtc actions ls action gact | grep index | wc -l 1500000 real 178.13 user 2.02 sys 176.96 That is about 8x performance improvement for tc app which sets its receive buffer to about 32K. Signed-off-by: Jamal Hadi Salim Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index d148505010a7..bfa80a6164d9 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -683,10 +683,28 @@ struct tcamsg { unsigned char tca__pad1; unsigned short tca__pad2; }; + +enum { + TCA_ROOT_UNSPEC, + TCA_ROOT_TAB, +#define TCA_ACT_TAB TCA_ROOT_TAB +#define TCAA_MAX TCA_ROOT_TAB + TCA_ROOT_FLAGS, + TCA_ROOT_COUNT, + __TCA_ROOT_MAX, +#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) +}; + #define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg)))) #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) -#define TCA_ACT_TAB 1 /* attr type must be >=1 */ -#define TCAA_MAX 1 +/* tcamsg flags stored in attribute TCA_ROOT_FLAGS + * + * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO + * actions in a dump. All dump responses will contain the number of actions + * being dumped stored in for user app's consumption in TCA_ROOT_COUNT + * + */ +#define TCA_FLAG_LARGE_DUMP_ON (1 << 0) /* New extended info filters for IFLA_EXT_MASK */ #define RTEXT_FILTER_VF (1 << 0) -- cgit v1.2.3 From e62e484df04964ac947c679ef4f00c54ae5395aa Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 30 Jul 2017 13:24:52 -0400 Subject: net sched actions: add time filter for action dumping This patch adds support for filtering based on time since last used. When we are dumping a large number of actions it is useful to have the option of filtering based on when the action was last used to reduce the amount of data crossing to user space. With this patch the user space app sets the TCA_ROOT_TIME_DELTA attribute with the value in milliseconds with "time of interest since now". The kernel converts this to jiffies and does the filtering comparison matching entries that have seen activity since then and returns them to user space. Old kernels and old tc continue to work in legacy mode since they dont specify this attribute. Some example (we have 400 actions bound to 400 filters); at installation time. Using updated when tc setting the time of interest to 120 seconds earlier (we see 400 actions): prompt$ hackedtc actions ls action gact since 120000| grep index | wc -l 400 go get some coffee and wait for > 120 seconds and try again: prompt$ hackedtc actions ls action gact since 120000 | grep index | wc -l 0 Lets see a filter bound to one of these actions: .... filter pref 10 u32 filter pref 10 u32 fh 800: ht divisor 1 filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10 (rule hit 2 success 1) match 7f000002/ffffffff at 12 (success 1 ) action order 1: gact action pass random type none pass val 0 index 23 ref 2 bind 1 installed 1145 sec used 802 sec Action statistics: Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 .... that coffee took long, no? It was good. Now lets ping -c 1 127.0.0.2, then run the actions again: prompt$ hackedtc actions ls action gact since 120 | grep index | wc -l 1 More details please: prompt$ hackedtc -s actions ls action gact since 120000 action order 0: gact action pass random type none pass val 0 index 23 ref 2 bind 1 installed 1270 sec used 30 sec Action statistics: Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 And the filter? filter pref 10 u32 filter pref 10 u32 fh 800: ht divisor 1 filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10 (rule hit 4 success 2) match 7f000002/ffffffff at 12 (success 2 ) action order 1: gact action pass random type none pass val 0 index 23 ref 2 bind 1 installed 1324 sec used 84 sec Action statistics: Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 Signed-off-by: Jamal Hadi Salim Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index bfa80a6164d9..dab7dad9e01a 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -691,6 +691,7 @@ enum { #define TCAA_MAX TCA_ROOT_TAB TCA_ROOT_FLAGS, TCA_ROOT_COUNT, + TCA_ROOT_TIME_DELTA, /* in msecs */ __TCA_ROOT_MAX, #define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) }; -- cgit v1.2.3 From ac7b848390036dadd4351899d2a23748075916bd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jul 2017 00:02:31 +0200 Subject: netfilter: expect: add and use nf_ct_expect_iterate helpers We have several spots that open-code a expect walk, add a helper that is similar to nf_ct_iterate_destroy/nf_ct_iterate_cleanup. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_expect.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 2ba54feaccd8..818def011110 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -107,6 +107,11 @@ void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); bool nf_ct_remove_expect(struct nf_conntrack_expect *exp); +void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data), void *data); +void nf_ct_expect_iterate_net(struct net *net, + bool (*iter)(struct nf_conntrack_expect *e, void *data), + void *data, u32 portid, int report); + /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me); -- cgit v1.2.3 From 84657984c26fd0b64743a397f3a1a587fa4b575a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jul 2017 00:02:32 +0200 Subject: netfilter: add and use nf_ct_unconfirmed_destroy This also removes __nf_ct_unconfirmed_destroy() call from nf_ct_iterate_cleanup_net, so that function can be used only when missing conntracks from unconfirmed list isn't a problem. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 48407569585d..6e6f678aaac7 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -224,6 +224,9 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); +/* Set all unconfirmed conntrack as dying */ +void nf_ct_unconfirmed_destroy(struct net *); + /* Iterate over all conntracks: if iter returns true, it's deleted. */ void nf_ct_iterate_cleanup_net(struct net *net, int (*iter)(struct nf_conn *i, void *data), -- cgit v1.2.3 From 2cf0c8b3e6942ecafe6ebb1a6d0328a81641bf39 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:40 +0200 Subject: netlink: Introduce nla_strdup() This is similar to strdup() for netlink string attributes. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index ef8e6c3a80a6..c8c2eb5ae55e 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -247,6 +247,7 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); +char *nla_strdup(const struct nlattr *nla, gfp_t flags); int nla_memcpy(void *dest, const struct nlattr *src, int count); int nla_memcmp(const struct nlattr *nla, const void *data, size_t size); int nla_strcmp(const struct nlattr *nla, const char *str); -- cgit v1.2.3 From e46abbcc05aa8a16b0e7f5c94e86d11af9aa2770 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:41 +0200 Subject: netfilter: nf_tables: Allow table names of up to 255 chars Allocate all table names dynamically to allow for arbitrary lengths but introduce NFT_NAME_MAXLEN as an upper sanity boundary. It's value was chosen to allow using a domain name as per RFC 1035. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/uapi/linux/netfilter/nf_tables.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index bd5be0d691d5..05ecf78ec078 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -957,7 +957,7 @@ struct nft_table { u32 use; u16 flags:14, genmask:2; - char name[NFT_TABLE_MAXNAMELEN]; + char *name; }; enum nft_af_flags { diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 6f0a950e21c3..0b94e572ef16 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1,7 +1,8 @@ #ifndef _LINUX_NF_TABLES_H #define _LINUX_NF_TABLES_H -#define NFT_TABLE_MAXNAMELEN 32 +#define NFT_NAME_MAXLEN 256 +#define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_CHAIN_MAXNAMELEN 32 #define NFT_SET_MAXNAMELEN 32 #define NFT_OBJ_MAXNAMELEN 32 -- cgit v1.2.3 From b7263e071aba736cea9e71cdf2e76dfa7aebd039 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:42 +0200 Subject: netfilter: nf_tables: Allow chain name of up to 255 chars Same conversion as for table names, use NFT_NAME_MAXLEN as upper boundary as well. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- include/uapi/linux/netfilter/nf_tables.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 05ecf78ec078..be1610162ee0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -859,7 +859,7 @@ struct nft_chain { u16 level; u8 flags:6, genmask:2; - char name[NFT_CHAIN_MAXNAMELEN]; + char *name; }; enum nft_chain_type { @@ -1272,7 +1272,7 @@ struct nft_trans_set { struct nft_trans_chain { bool update; - char name[NFT_CHAIN_MAXNAMELEN]; + char *name; struct nft_stats __percpu *stats; u8 policy; }; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 0b94e572ef16..d9c03a8608ee 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -3,7 +3,7 @@ #define NFT_NAME_MAXLEN 256 #define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN -#define NFT_CHAIN_MAXNAMELEN 32 +#define NFT_CHAIN_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_SET_MAXNAMELEN 32 #define NFT_OBJ_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 -- cgit v1.2.3 From 387454901bd62022ac1b04e15bd8d4fcc60bbed4 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:43 +0200 Subject: netfilter: nf_tables: Allow set names of up to 255 chars Same conversion as for table names, use NFT_NAME_MAXLEN as upper boundary as well. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/uapi/linux/netfilter/nf_tables.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index be1610162ee0..66ba62fa7d90 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -396,7 +396,7 @@ void nft_unregister_set(struct nft_set_type *type); struct nft_set { struct list_head list; struct list_head bindings; - char name[NFT_SET_MAXNAMELEN]; + char *name; u32 ktype; u32 dtype; u32 objtype; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index d9c03a8608ee..b5e73e80b7b6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -4,7 +4,7 @@ #define NFT_NAME_MAXLEN 256 #define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_CHAIN_MAXNAMELEN NFT_NAME_MAXLEN -#define NFT_SET_MAXNAMELEN 32 +#define NFT_SET_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_OBJ_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 -- cgit v1.2.3 From 615095752100748e221028fc96163c2b78185ae4 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:44 +0200 Subject: netfilter: nf_tables: Allow object names of up to 255 chars Same conversion as for table names, use NFT_NAME_MAXLEN as upper boundary as well. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/uapi/linux/netfilter/nf_tables.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 66ba62fa7d90..f9795fe394f3 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1016,7 +1016,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type, */ struct nft_object { struct list_head list; - char name[NFT_OBJ_MAXNAMELEN]; + char *name; struct nft_table *table; u32 genmask:2, use:30; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b5e73e80b7b6..be25cf69295b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -5,7 +5,7 @@ #define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_CHAIN_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_SET_MAXNAMELEN NFT_NAME_MAXLEN -#define NFT_OBJ_MAXNAMELEN 32 +#define NFT_OBJ_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_USERDATA_MAXLEN 256 /** -- cgit v1.2.3 From 4d3a57f23dec59f0a2362e63540b2d01b37afe0a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Jul 2017 11:22:04 +0200 Subject: netfilter: conntrack: do not enable connection tracking unless needed Discussion during NFWS 2017 in Faro has shown that the current conntrack behaviour is unreasonable. Even if conntrack module is loaded on behalf of a single net namespace, its turned on for all namespaces, which is expensive. Commit 481fa373476 ("netfilter: conntrack: add nf_conntrack_default_on sysctl") attempted to provide an alternative to the 'default on' behaviour by adding a sysctl to change it. However, as Eric points out, the sysctl only becomes available once the module is loaded, and then its too late. So we either have to move the sysctl to the core, or, alternatively, change conntrack to become active only once the rule set requires this. This does the latter, conntrack is only enabled when a rule needs it. Reported-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 6d14b36e3a49..1b8de164d744 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -73,21 +73,6 @@ struct nf_conntrack_l3proto { extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; -#ifdef CONFIG_SYSCTL -/* Protocol pernet registration. */ -int nf_ct_l3proto_pernet_register(struct net *net, - struct nf_conntrack_l3proto *proto); -#else -static inline int nf_ct_l3proto_pernet_register(struct net *n, - struct nf_conntrack_l3proto *p) -{ - return 0; -} -#endif - -void nf_ct_l3proto_pernet_unregister(struct net *net, - struct nf_conntrack_l3proto *proto); - /* Protocol global registration. */ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto); void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto); -- cgit v1.2.3 From e7942d0633c47c791ece6afa038be9cf977226de Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:18 +0200 Subject: tcp: remove prequeue support prequeue is a tcp receive optimization that moves part of rx processing from bh to process context. This only works if the socket being processed belongs to a process that is blocked in recv on that socket. In practice, this doesn't happen anymore that often because nowadays servers tend to use an event driven (epoll) model. Even normal client applications (web browsers) commonly use many tcp connections in parallel. This has measureable impact only in netperf (which uses plain recv and thus allows prequeue use) from host to locally running vm (~4%), however, there were no changes when using netperf between two physical hosts with ixgbe interfaces. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/tcp.h | 9 --------- include/net/tcp.h | 11 ----------- 2 files changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 542ca1ae02c4..32fb37cfb0d1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -192,15 +192,6 @@ struct tcp_sock { struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - /* Data for direct copy to user */ - struct { - struct sk_buff_head prequeue; - struct task_struct *task; - struct msghdr *msg; - int memory; - int len; - } ucopy; - u32 snd_wl1; /* Sequence for window update */ u32 snd_wnd; /* The window we expect to receive */ u32 max_window; /* Maximal window ever seen from peer */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 12d68335acd4..93f115cfc8f8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1244,17 +1244,6 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) __tcp_checksum_complete(skb); } -/* Prequeue for VJ style copy to user, combined with checksumming. */ - -static inline void tcp_prequeue_init(struct tcp_sock *tp) -{ - tp->ucopy.task = NULL; - tp->ucopy.len = 0; - tp->ucopy.memory = 0; - skb_queue_head_init(&tp->ucopy.prequeue); -} - -bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); int tcp_filter(struct sock *sk, struct sk_buff *skb); -- cgit v1.2.3 From b6690b14386698ce2c19309abad3f17656bdfaea Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:20 +0200 Subject: tcp: remove low_latency sysctl Was only checked by the removed prequeue code. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 93f115cfc8f8..8507c81fb0e9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -256,7 +256,6 @@ extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_frto; -extern int sysctl_tcp_low_latency; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; -- cgit v1.2.3 From 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:21 +0200 Subject: tcp: remove header prediction Like prequeue, I am not sure this is overly useful nowadays. If we receive a train of packets, GRO will aggregate them if the headers are the same (HP predates GRO by several years) so we don't get a per-packet benefit, only a per-aggregated-packet one. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ------ include/net/tcp.h | 23 ----------------------- 2 files changed, 29 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 32fb37cfb0d1..d7389ea36e10 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -147,12 +147,6 @@ struct tcp_sock { u16 tcp_header_len; /* Bytes of tcp header to send */ u16 gso_segs; /* Max number of segs per GSO packet */ -/* - * Header prediction flags - * 0x5?10 << 16 + snd_wnd in net byte order - */ - __be32 pred_flags; - /* * RFC793 variables by their proper names. This means you can * read the code and the spec side by side (and laugh ...) diff --git a/include/net/tcp.h b/include/net/tcp.h index 8507c81fb0e9..8f11b82b5b5a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -631,29 +631,6 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } -static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) -{ - tp->pred_flags = htonl((tp->tcp_header_len << 26) | - ntohl(TCP_FLAG_ACK) | - snd_wnd); -} - -static inline void tcp_fast_path_on(struct tcp_sock *tp) -{ - __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); -} - -static inline void tcp_fast_path_check(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && - tp->rcv_wnd && - atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && - !tp->urg_data) - tcp_fast_path_on(tp); -} - /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(struct sock *sk) { -- cgit v1.2.3 From 573aeb0492be3d0e5be9796a0c91abde794c1e36 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:22 +0200 Subject: tcp: remove CA_ACK_SLOWPATH re-indent tcp_ack, and remove CA_ACK_SLOWPATH; it is always set now. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8f11b82b5b5a..3ecb62811004 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -880,9 +880,8 @@ enum tcp_ca_event { /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ enum tcp_ca_ack_event_flags { - CA_ACK_SLOWPATH = (1 << 0), /* In slow path processing */ - CA_ACK_WIN_UPDATE = (1 << 1), /* ACK updated window */ - CA_ACK_ECE = (1 << 2), /* ECE bit is set on ack */ + CA_ACK_WIN_UPDATE = (1 << 0), /* ACK updated window */ + CA_ACK_ECE = (1 << 1), /* ECE bit is set on ack */ }; /* -- cgit v1.2.3 From 3282e65558b3651e230ee985c174c35cb2fedaf1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:23 +0200 Subject: tcp: remove unused mib counters was used by tcp prequeue and header prediction. TCPFORWARDRETRANS use was removed in january. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index d85693295798..b3f346fb9fe3 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -184,14 +184,7 @@ enum LINUX_MIB_DELAYEDACKLOST, /* DelayedACKLost */ LINUX_MIB_LISTENOVERFLOWS, /* ListenOverflows */ LINUX_MIB_LISTENDROPS, /* ListenDrops */ - LINUX_MIB_TCPPREQUEUED, /* TCPPrequeued */ - LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, /* TCPDirectCopyFromBacklog */ - LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, /* TCPDirectCopyFromPrequeue */ - LINUX_MIB_TCPPREQUEUEDROPPED, /* TCPPrequeueDropped */ - LINUX_MIB_TCPHPHITS, /* TCPHPHits */ - LINUX_MIB_TCPHPHITSTOUSER, /* TCPHPHitsToUser */ LINUX_MIB_TCPPUREACKS, /* TCPPureAcks */ - LINUX_MIB_TCPHPACKS, /* TCPHPAcks */ LINUX_MIB_TCPRENORECOVERY, /* TCPRenoRecovery */ LINUX_MIB_TCPSACKRECOVERY, /* TCPSackRecovery */ LINUX_MIB_TCPSACKRENEGING, /* TCPSACKReneging */ @@ -208,14 +201,12 @@ enum LINUX_MIB_TCPSACKFAILURES, /* TCPSackFailures */ LINUX_MIB_TCPLOSSFAILURES, /* TCPLossFailures */ LINUX_MIB_TCPFASTRETRANS, /* TCPFastRetrans */ - LINUX_MIB_TCPFORWARDRETRANS, /* TCPForwardRetrans */ LINUX_MIB_TCPSLOWSTARTRETRANS, /* TCPSlowStartRetrans */ LINUX_MIB_TCPTIMEOUTS, /* TCPTimeouts */ LINUX_MIB_TCPLOSSPROBES, /* TCPLossProbes */ LINUX_MIB_TCPLOSSPROBERECOVERY, /* TCPLossProbeRecovery */ LINUX_MIB_TCPRENORECOVERYFAIL, /* TCPRenoRecoveryFail */ LINUX_MIB_TCPSACKRECOVERYFAIL, /* TCPSackRecoveryFail */ - LINUX_MIB_TCPSCHEDULERFAILED, /* TCPSchedulerFailed */ LINUX_MIB_TCPRCVCOLLAPSED, /* TCPRcvCollapsed */ LINUX_MIB_TCPDSACKOLDSENT, /* TCPDSACKOldSent */ LINUX_MIB_TCPDSACKOFOSENT, /* TCPDSACKOfoSent */ -- cgit v1.2.3 From f248aff86d1fd6e60b656c2af278f1723c3b84c2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 31 Jul 2017 12:04:25 -0700 Subject: net: phy: mdio-bcm-unimac: Allow specifying platform data In preparation for having the bcmgenet driver migrate over the mdio-bcm-unimac driver, add a platform data structure which allows passing integrating specific details like bus name, wait function to complete MDIO operations and PHY mask. We also define what the platform device name contract is by defining UNIMAC_MDIO_DRV_NAME and moving it to the platform_data header. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/platform_data/mdio-bcm-unimac.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/platform_data/mdio-bcm-unimac.h (limited to 'include') diff --git a/include/linux/platform_data/mdio-bcm-unimac.h b/include/linux/platform_data/mdio-bcm-unimac.h new file mode 100644 index 000000000000..8a5f9f0b2c52 --- /dev/null +++ b/include/linux/platform_data/mdio-bcm-unimac.h @@ -0,0 +1,13 @@ +#ifndef __MDIO_BCM_UNIMAC_PDATA_H +#define __MDIO_BCM_UNIMAC_PDATA_H + +struct unimac_mdio_pdata { + u32 phy_mask; + int (*wait_func)(void *data); + void *wait_func_data; + const char *bus_name; +}; + +#define UNIMAC_MDIO_DRV_NAME "unimac-mdio" + +#endif /* __MDIO_BCM_UNIMAC_PDATA_H */ -- cgit v1.2.3 From bb7c19f96012720b895111300b9d9f3f858c3a69 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 28 Jul 2017 10:28:21 -0700 Subject: tcp: add related fields into SCM_TIMESTAMPING_OPT_STATS Add the following stats into SCM_TIMESTAMPING_OPT_STATS control msg: TCP_NLA_PACING_RATE TCP_NLA_DELIVERY_RATE TCP_NLA_SND_CWND TCP_NLA_REORDERING TCP_NLA_MIN_RTT TCP_NLA_RECUR_RETRANS TCP_NLA_DELIVERY_RATE_APP_LMT Signed-off-by: Wei Wang Acked-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index a5507c977497..030e594bab45 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -231,6 +231,14 @@ enum { TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer */ TCP_NLA_DATA_SEGS_OUT, /* Data pkts sent including retransmission */ TCP_NLA_TOTAL_RETRANS, /* Data pkts retransmitted */ + TCP_NLA_PACING_RATE, /* Pacing rate in bytes per second */ + TCP_NLA_DELIVERY_RATE, /* Delivery rate in bytes per second */ + TCP_NLA_SND_CWND, /* Sending congestion window */ + TCP_NLA_REORDERING, /* Reordering metric */ + TCP_NLA_MIN_RTT, /* minimum RTT */ + TCP_NLA_RECUR_RETRANS, /* Recurring retransmits for the current pkt */ + TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */ + }; /* for TCP_MD5SIG socket option */ -- cgit v1.2.3 From 306b13eb3cf9515a8214bbf5d69d811371d05792 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 28 Jul 2017 16:22:41 -0700 Subject: proto_ops: Add locked held versions of sendmsg and sendpage Add new proto_ops sendmsg_locked and sendpage_locked that can be called when the socket lock is already held. Correspondingly, add kernel_sendmsg_locked and kernel_sendpage_locked as front end functions. These functions will be used in zero proxy so that we can take the socket lock in a ULP sendmsg/sendpage and then directly call the backend transport proto_ops functions. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/net.h | 12 ++++++++++++ include/net/sock.h | 3 +++ include/net/tcp.h | 3 +++ 3 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index dda2cc939a53..b5c15b31709b 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -190,8 +190,16 @@ struct proto_ops { struct pipe_inode_info *pipe, size_t len, unsigned int flags); int (*set_peek_off)(struct sock *sk, int val); int (*peek_len)(struct socket *sock); + + /* The following functions are called internally by kernel with + * sock lock already held. + */ int (*read_sock)(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); + int (*sendpage_locked)(struct sock *sk, struct page *page, + int offset, size_t size, int flags); + int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, + size_t size); }; #define DECLARE_SOCKADDR(type, dst, src) \ @@ -279,6 +287,8 @@ do { \ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); +int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, + struct kvec *vec, size_t num, size_t len); int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len, int flags); @@ -297,6 +307,8 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); +int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, + size_t size, int flags); int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); diff --git a/include/net/sock.h b/include/net/sock.h index 7c0632c7e870..393c38e9f6aa 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1582,11 +1582,14 @@ int sock_no_shutdown(struct socket *, int); int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *); int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int); int sock_no_sendmsg(struct socket *, struct msghdr *, size_t); +int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len); int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int); int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); +ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page, + int offset, size_t size, int flags); /* * Functions to fill in entries in struct proto_ops when a protocol diff --git a/include/net/tcp.h b/include/net/tcp.h index 3ecb62811004..bb1881b4ce48 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -350,8 +350,11 @@ int tcp_v4_rcv(struct sk_buff *skb); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); +int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, + size_t size, int flags); ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tcp_release_cb(struct sock *sk); -- cgit v1.2.3 From 20bf50de3028cb15fa81e1d1e63ab6e0c85257fc Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 28 Jul 2017 16:22:42 -0700 Subject: skbuff: Function to send an skbuf on a socket Add skb_send_sock to send an skbuff on a socket within the kernel. Arguments include an offset so that an skbuf might be sent in mulitple calls (e.g. send buffer limit is hit). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4093552be1de..18e76bf9574e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3113,6 +3113,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, unsigned int flags); +int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, + int len); +int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, -- cgit v1.2.3 From bbb03029a899679d73e62d7e6ae80348cc5d0054 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 28 Jul 2017 16:22:43 -0700 Subject: strparser: Generalize strparser Generalize strparser from more than just being used in conjunction with read_sock. strparser will also be used in the send path with zero proxy. The primary change is to create strp_process function that performs the critical processing on skbs. The documentation is also updated to reflect the new uses. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/strparser.h | 119 +++++++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/include/net/strparser.h b/include/net/strparser.h index 0c28ad97c52f..4fe966a0ad92 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -18,26 +18,26 @@ #define STRP_STATS_INCR(stat) ((stat)++) struct strp_stats { - unsigned long long rx_msgs; - unsigned long long rx_bytes; - unsigned int rx_mem_fail; - unsigned int rx_need_more_hdr; - unsigned int rx_msg_too_big; - unsigned int rx_msg_timeouts; - unsigned int rx_bad_hdr_len; + unsigned long long msgs; + unsigned long long bytes; + unsigned int mem_fail; + unsigned int need_more_hdr; + unsigned int msg_too_big; + unsigned int msg_timeouts; + unsigned int bad_hdr_len; }; struct strp_aggr_stats { - unsigned long long rx_msgs; - unsigned long long rx_bytes; - unsigned int rx_mem_fail; - unsigned int rx_need_more_hdr; - unsigned int rx_msg_too_big; - unsigned int rx_msg_timeouts; - unsigned int rx_bad_hdr_len; - unsigned int rx_aborts; - unsigned int rx_interrupted; - unsigned int rx_unrecov_intr; + unsigned long long msgs; + unsigned long long bytes; + unsigned int mem_fail; + unsigned int need_more_hdr; + unsigned int msg_too_big; + unsigned int msg_timeouts; + unsigned int bad_hdr_len; + unsigned int aborts; + unsigned int interrupted; + unsigned int unrecov_intr; }; struct strparser; @@ -48,16 +48,18 @@ struct strp_callbacks { void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb); int (*read_sock_done)(struct strparser *strp, int err); void (*abort_parser)(struct strparser *strp, int err); + void (*lock)(struct strparser *strp); + void (*unlock)(struct strparser *strp); }; -struct strp_rx_msg { +struct strp_msg { int full_len; int offset; }; -static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb) +static inline struct strp_msg *strp_msg(struct sk_buff *skb) { - return (struct strp_rx_msg *)((void *)skb->cb + + return (struct strp_msg *)((void *)skb->cb + offsetof(struct qdisc_skb_cb, data)); } @@ -65,18 +67,18 @@ static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb) struct strparser { struct sock *sk; - u32 rx_stopped : 1; - u32 rx_paused : 1; - u32 rx_aborted : 1; - u32 rx_interrupted : 1; - u32 rx_unrecov_intr : 1; - - struct sk_buff **rx_skb_nextp; - struct timer_list rx_msg_timer; - struct sk_buff *rx_skb_head; - unsigned int rx_need_bytes; - struct delayed_work rx_delayed_work; - struct work_struct rx_work; + u32 stopped : 1; + u32 paused : 1; + u32 aborted : 1; + u32 interrupted : 1; + u32 unrecov_intr : 1; + + struct sk_buff **skb_nextp; + struct timer_list msg_timer; + struct sk_buff *skb_head; + unsigned int need_bytes; + struct delayed_work delayed_work; + struct work_struct work; struct strp_stats stats; struct strp_callbacks cb; }; @@ -84,7 +86,7 @@ struct strparser { /* Must be called with lock held for attached socket */ static inline void strp_pause(struct strparser *strp) { - strp->rx_paused = 1; + strp->paused = 1; } /* May be called without holding lock for attached socket */ @@ -97,37 +99,37 @@ static inline void save_strp_stats(struct strparser *strp, #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += \ strp->stats._stat) - SAVE_PSOCK_STATS(rx_msgs); - SAVE_PSOCK_STATS(rx_bytes); - SAVE_PSOCK_STATS(rx_mem_fail); - SAVE_PSOCK_STATS(rx_need_more_hdr); - SAVE_PSOCK_STATS(rx_msg_too_big); - SAVE_PSOCK_STATS(rx_msg_timeouts); - SAVE_PSOCK_STATS(rx_bad_hdr_len); + SAVE_PSOCK_STATS(msgs); + SAVE_PSOCK_STATS(bytes); + SAVE_PSOCK_STATS(mem_fail); + SAVE_PSOCK_STATS(need_more_hdr); + SAVE_PSOCK_STATS(msg_too_big); + SAVE_PSOCK_STATS(msg_timeouts); + SAVE_PSOCK_STATS(bad_hdr_len); #undef SAVE_PSOCK_STATS - if (strp->rx_aborted) - agg_stats->rx_aborts++; - if (strp->rx_interrupted) - agg_stats->rx_interrupted++; - if (strp->rx_unrecov_intr) - agg_stats->rx_unrecov_intr++; + if (strp->aborted) + agg_stats->aborts++; + if (strp->interrupted) + agg_stats->interrupted++; + if (strp->unrecov_intr) + agg_stats->unrecov_intr++; } static inline void aggregate_strp_stats(struct strp_aggr_stats *stats, struct strp_aggr_stats *agg_stats) { #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat) - SAVE_PSOCK_STATS(rx_msgs); - SAVE_PSOCK_STATS(rx_bytes); - SAVE_PSOCK_STATS(rx_mem_fail); - SAVE_PSOCK_STATS(rx_need_more_hdr); - SAVE_PSOCK_STATS(rx_msg_too_big); - SAVE_PSOCK_STATS(rx_msg_timeouts); - SAVE_PSOCK_STATS(rx_bad_hdr_len); - SAVE_PSOCK_STATS(rx_aborts); - SAVE_PSOCK_STATS(rx_interrupted); - SAVE_PSOCK_STATS(rx_unrecov_intr); + SAVE_PSOCK_STATS(msgs); + SAVE_PSOCK_STATS(bytes); + SAVE_PSOCK_STATS(mem_fail); + SAVE_PSOCK_STATS(need_more_hdr); + SAVE_PSOCK_STATS(msg_too_big); + SAVE_PSOCK_STATS(msg_timeouts); + SAVE_PSOCK_STATS(bad_hdr_len); + SAVE_PSOCK_STATS(aborts); + SAVE_PSOCK_STATS(interrupted); + SAVE_PSOCK_STATS(unrecov_intr); #undef SAVE_PSOCK_STATS } @@ -135,8 +137,11 @@ static inline void aggregate_strp_stats(struct strp_aggr_stats *stats, void strp_done(struct strparser *strp); void strp_stop(struct strparser *strp); void strp_check_rcv(struct strparser *strp); -int strp_init(struct strparser *strp, struct sock *csk, +int strp_init(struct strparser *strp, struct sock *sk, struct strp_callbacks *cb); void strp_data_ready(struct strparser *strp); +int strp_process(struct strparser *strp, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len, + size_t max_msg_size, long timeo); #endif /* __NET_STRPARSER_H_ */ -- cgit v1.2.3 From c613c209c3f351d47158f728271d0c73b6dd24c6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 31 Jul 2017 08:15:47 -0400 Subject: net: add skb_frag_foreach_page and use with kmap_atomic Skb frags may contain compound pages. Various operations map frags temporarily using kmap_atomic, but this function works on single pages, not whole compound pages. The distinction is only relevant for high mem pages that require temporary mappings. Introduce a looping mechanism that for compound highmem pages maps one page at a time, does not change behavior on other pages. Use the loop in the kmap_atomic callers in net/core/skbuff.c. Verified by triggering skb_copy_bits with tcpdump -n -c 100 -i ${DEV} -w /dev/null & netperf -t TCP_STREAM -H ${HOST} and by triggering __skb_checksum with ethtool -K ${DEV} tx off repeated the tests with looping on a non-highmem platform (x86_64) by making skb_frag_must_loop always return true. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 18e76bf9574e..6f9f1b2715ec 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -345,6 +345,42 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) frag->size -= delta; } +static inline bool skb_frag_must_loop(struct page *p) +{ +#if defined(CONFIG_HIGHMEM) + if (PageHighMem(p)) + return true; +#endif + return false; +} + +/** + * skb_frag_foreach_page - loop over pages in a fragment + * + * @f: skb frag to operate on + * @f_off: offset from start of f->page.p + * @f_len: length from f_off to loop over + * @p: (temp var) current page + * @p_off: (temp var) offset from start of current page, + * non-zero only on first page. + * @p_len: (temp var) length in current page, + * < PAGE_SIZE only on first and last page. + * @copied: (temp var) length so far, excluding current p_len. + * + * A fragment can hold a compound page, in which case per-page + * operations, notably kmap_atomic, must be called for each + * regular page. + */ +#define skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied) \ + for (p = skb_frag_page(f) + ((f_off) >> PAGE_SHIFT), \ + p_off = (f_off) & (PAGE_SIZE - 1), \ + p_len = skb_frag_must_loop(p) ? \ + min_t(u32, f_len, PAGE_SIZE - p_off) : f_len, \ + copied = 0; \ + copied < f_len; \ + copied += p_len, p++, p_off = 0, \ + p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \ + #define HAVE_HW_TIME_STAMP /** -- cgit v1.2.3 From 46587e4a312780a63132483bc8678c397eca6aff Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 1 Aug 2017 16:32:39 -0400 Subject: net: dsa: remove PHY device argument from .set_eee The DSA switch operations for EEE are only meant to configure a port's MAC EEE settings. The port's PHY EEE settings are accessed by the DSA layer and must be made available via a proper PHY driver. In order to reduce this confusion, remove the phy_device argument from the .set_eee operation. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 88da272d20d0..ce46db323394 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -335,7 +335,6 @@ struct dsa_switch_ops { * EEE setttings */ int (*set_eee)(struct dsa_switch *ds, int port, - struct phy_device *phydev, struct ethtool_eee *e); int (*get_eee)(struct dsa_switch *ds, int port, struct ethtool_eee *e); -- cgit v1.2.3 From 08f500610f39809c107f206cba1f799c98c38054 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 1 Aug 2017 16:32:41 -0400 Subject: net: dsa: rename switch EEE ops To avoid confusion with the PHY EEE settings, rename the .set_eee and .get_eee ops to respectively .set_mac_eee and .get_mac_eee. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index ce46db323394..0b1a0622b33c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -332,12 +332,12 @@ struct dsa_switch_ops { struct phy_device *phy); /* - * EEE setttings + * Port's MAC EEE settings */ - int (*set_eee)(struct dsa_switch *ds, int port, - struct ethtool_eee *e); - int (*get_eee)(struct dsa_switch *ds, int port, - struct ethtool_eee *e); + int (*set_mac_eee)(struct dsa_switch *ds, int port, + struct ethtool_eee *e); + int (*get_mac_eee)(struct dsa_switch *ds, int port, + struct ethtool_eee *e); /* EEPROM access */ int (*get_eeprom_len)(struct dsa_switch *ds); -- cgit v1.2.3 From ffdb5211da1c20354f1b40c204b6cf6c29c68161 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Tue, 1 Aug 2017 12:49:08 +0300 Subject: xfrm: Auto-load xfrm offload modules IPSec crypto offload depends on the protocol-specific offload module (such as esp_offload.ko). When the user installs an SA with crypto-offload, load the offload module automatically, in the same way that the protocol module is loaded (such as esp.ko) Signed-off-by: Ilan Tayari Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index afb4929d7232..5a360100136c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -43,6 +43,8 @@ MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap)) #define MODULE_ALIAS_XFRM_TYPE(family, proto) \ MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto)) +#define MODULE_ALIAS_XFRM_OFFLOAD_TYPE(family, proto) \ + MODULE_ALIAS("xfrm-offload-" __stringify(family) "-" __stringify(proto)) #ifdef CONFIG_XFRM_STATISTICS #define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field) @@ -1558,7 +1560,7 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x); int xfrm_state_mtu(struct xfrm_state *x, int mtu); -int __xfrm_init_state(struct xfrm_state *x, bool init_replay); +int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); -- cgit v1.2.3 From f70f250a77313b542531e1ff7a449cd0ccd83ec0 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 1 Aug 2017 12:49:10 +0300 Subject: net: Allow IPsec GSO for local sockets This patch allows local sockets to make use of XFRM GSO code path. Signed-off-by: Steffen Klassert Signed-off-by: Ilan Tayari --- include/net/xfrm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5a360100136c..18d7de34a5c3 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1858,6 +1858,20 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); +static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) +{ + struct xfrm_state *x = dst->xfrm; + + if (!x || !x->type_offload) + return false; + + if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) && + !dst->child->xfrm) + return true; + + return false; +} + static inline void xfrm_dev_state_delete(struct xfrm_state *x) { struct xfrm_state_offload *xso = &x->xso; @@ -1900,6 +1914,11 @@ static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x { return false; } + +static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) +{ + return false; +} #endif static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) -- cgit v1.2.3 From 2a04aabf5c96c9e25df488949b21223bcc623815 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 1 Aug 2017 12:25:01 +0200 Subject: netfilter: constify nf_conntrack_l3/4proto parameters When a nf_conntrack_l3/4proto parameter is not on the left hand side of an assignment, its address is not taken, and it is not passed to a function that may modify its fields, then it can be declared as const. This change is useful from a documentation point of view, and can possibly facilitate making some nf_conntrack_l3/4proto structures const subsequently. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 14 +++++++------- include/net/netfilter/nf_conntrack_timeout.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7032e044bbe2..b6e27cafb1d9 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -125,23 +125,23 @@ struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto); -void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); +void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); /* Protocol pernet registration. */ int nf_ct_l4proto_pernet_register_one(struct net *net, - struct nf_conntrack_l4proto *proto); + const struct nf_conntrack_l4proto *proto); void nf_ct_l4proto_pernet_unregister_one(struct net *net, - struct nf_conntrack_l4proto *proto); + const struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *proto[], + struct nf_conntrack_l4proto *const proto[], unsigned int num_proto); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *proto[], - unsigned int num_proto); + struct nf_conntrack_l4proto *const proto[], + unsigned int num_proto); /* Protocol global registration. */ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto); +void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[], unsigned int num_proto); void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[], diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index d40b89355fdd..b222957062b5 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -68,7 +68,7 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct, static inline unsigned int * nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_conn_timeout *timeout_ext; -- cgit v1.2.3 From b2f9d432deebab5096aad5942c2f2b1ec2865f5a Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 1 Aug 2017 13:18:09 -0700 Subject: flow_dissector: remove unused functions They are introduced by commit f70ea018da06 ("net: Add functions to get skb->hash based on flow structures") but never gets used in tree. Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f9f1b2715ec..be76082f48aa 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1158,8 +1158,6 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->hash; } -__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6); - static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6) { if (!skb->l4_hash && !skb->sw_hash) { @@ -1172,20 +1170,6 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 return skb->hash; } -__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl); - -static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4) -{ - if (!skb->l4_hash && !skb->sw_hash) { - struct flow_keys keys; - __u32 hash = __get_hash_from_flowi4(fl4, &keys); - - __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); - } - - return skb->hash; -} - __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) -- cgit v1.2.3 From 2202e35d47fff379fc744e6bd3c111b018cc77df Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 2 Aug 2017 09:56:06 +0200 Subject: ipv4: fib: Remove unused functions Previous patches converted users of these functions to provide offload indication using the nexthop's flags instead of the FIB info's. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 41d580c6185f..ef8992d49bc3 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -124,7 +124,6 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; #endif - unsigned int fib_offload_cnt; struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev @@ -177,18 +176,6 @@ struct fib_result_nl { __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); -static inline void fib_info_offload_inc(struct fib_info *fi) -{ - fi->fib_offload_cnt++; - fi->fib_flags |= RTNH_F_OFFLOAD; -} - -static inline void fib_info_offload_dec(struct fib_info *fi) -{ - if (--fi->fib_offload_cnt == 0) - fi->fib_flags &= ~RTNH_F_OFFLOAD; -} - #define FIB_RES_SADDR(net, res) \ ((FIB_RES_NH(res).nh_saddr_genid == \ atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ -- cgit v1.2.3 From e61e4055b165f4c645ce2a85890b313abf841f67 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:09 +0800 Subject: sctp: remove the typedef sctp_shutdownhdr_t This patch is to remove the typedef sctp_shutdownhdr_t, and replace with struct sctp_shutdownhdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index ba007163acfd..7a586ba7dcd4 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -416,13 +416,13 @@ struct sctp_abort_chunk { /* For the graceful shutdown we must carry the tag (in common header) * and the highest consecutive acking value. */ -typedef struct sctp_shutdownhdr { +struct sctp_shutdownhdr { __be32 cum_tsn_ack; -} sctp_shutdownhdr_t; +}; struct sctp_shutdown_chunk_t { struct sctp_chunkhdr chunk_hdr; - sctp_shutdownhdr_t shutdown_hdr; + struct sctp_shutdownhdr shutdown_hdr; }; /* RFC 2960. Section 3.3.10 Operation Error (ERROR) (9) */ -- cgit v1.2.3 From ac23e68133f4570f40ec0910286ced08ced2d378 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:10 +0800 Subject: sctp: fix the name of struct sctp_shutdown_chunk_t This patch is to fix the name of struct sctp_shutdown_chunk_t , replace with struct sctp_initack_chunk in the places where it's using it. It is also to fix some indent problem. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 7a586ba7dcd4..553020cbf6f7 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -420,7 +420,7 @@ struct sctp_shutdownhdr { __be32 cum_tsn_ack; }; -struct sctp_shutdown_chunk_t { +struct sctp_shutdown_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_shutdownhdr shutdown_hdr; }; -- cgit v1.2.3 From d8238d9dab8fbea22dd04f4e77639c7f7b83eef7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:11 +0800 Subject: sctp: remove the typedef sctp_errhdr_t This patch is to remove the typedef sctp_errhdr_t, and replace with struct sctp_errhdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- include/net/sctp/sctp.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 553020cbf6f7..d35bdd30fa0f 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -427,15 +427,15 @@ struct sctp_shutdown_chunk { /* RFC 2960. Section 3.3.10 Operation Error (ERROR) (9) */ -typedef struct sctp_errhdr { +struct sctp_errhdr { __be16 cause; __be16 length; __u8 variable[0]; -} sctp_errhdr_t; +}; typedef struct sctp_operr_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_errhdr_t err_hdr; + struct sctp_errhdr err_hdr; } sctp_operr_chunk_t; /* RFC 2960 3.3.10 - Operation Error diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 45fd4c6056b5..84650fed1e6a 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -479,13 +479,13 @@ for (pos.v = chunk->member;\ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ -for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ +for (err = (struct sctp_errhdr *)((void *)chunk_hdr + \ sizeof(struct sctp_chunkhdr));\ - ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\ + ((void *)err + offsetof(struct sctp_errhdr, length) + sizeof(err->length) <=\ (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ - ntohs(err->length) >= sizeof(sctp_errhdr_t); \ - err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) + ntohs(err->length) >= sizeof(struct sctp_errhdr); \ + err = (struct sctp_errhdr *)((void *)err + SCTP_PAD4(ntohs(err->length)))) #define sctp_walk_fwdtsn(pos, chunk)\ _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk)) -- cgit v1.2.3 From 87caeba7914979ef6a5765dfa3163b51a30133ab Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:12 +0800 Subject: sctp: remove the typedef sctp_operr_chunk_t This patch is to remove the typedef sctp_operr_chunk_t, and replace with struct sctp_operr_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index d35bdd30fa0f..07c12e98fa85 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -433,10 +433,10 @@ struct sctp_errhdr { __u8 variable[0]; }; -typedef struct sctp_operr_chunk { +struct sctp_operr_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_errhdr err_hdr; -} sctp_operr_chunk_t; +}; /* RFC 2960 3.3.10 - Operation Error * -- cgit v1.2.3 From 2a4932167772874c5bc4b3dfebf61cfadb5554b9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:13 +0800 Subject: sctp: remove the typedef sctp_error_t This patch is to remove the typedef sctp_error_t, and replace with enum sctp_error in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 07c12e98fa85..c74ea93e36eb 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -457,7 +457,7 @@ struct sctp_operr_chunk { * 9 No User Data * 10 Cookie Received While Shutting Down */ -typedef enum { +enum sctp_error { SCTP_ERROR_NO_ERROR = cpu_to_be16(0x00), SCTP_ERROR_INV_STRM = cpu_to_be16(0x01), @@ -512,7 +512,7 @@ typedef enum { * 0x0105 Unsupported HMAC Identifier */ SCTP_ERROR_UNSUP_HMAC = cpu_to_be16(0x0105) -} sctp_error_t; +}; -- cgit v1.2.3 From 1fb6d83bd37dc7d0949b8a8005f7c24dddc3ee1e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:14 +0800 Subject: sctp: remove the typedef sctp_ecnehdr_t This patch is to remove the typedef sctp_ecnehdr_t, and replace with struct sctp_ecnehdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index c74ea93e36eb..5ea739b99f8d 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -519,13 +519,13 @@ enum sctp_error { /* RFC 2960. Appendix A. Explicit Congestion Notification. * Explicit Congestion Notification Echo (ECNE) (12) */ -typedef struct sctp_ecnehdr { +struct sctp_ecnehdr { __be32 lowest_tsn; -} sctp_ecnehdr_t; +}; typedef struct sctp_ecne_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_ecnehdr_t ence_hdr; + struct sctp_ecnehdr ence_hdr; } sctp_ecne_chunk_t; /* RFC 2960. Appendix A. Explicit Congestion Notification. -- cgit v1.2.3 From b515fd27591dea06bc9f0178fba361b43b76ab6b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:15 +0800 Subject: sctp: remove the typedef sctp_ecne_chunk_t This patch is to remove the typedef sctp_ecne_chunk_t, and replace with struct sctp_ecne_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 5ea739b99f8d..026bbdfcaf44 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -523,10 +523,10 @@ struct sctp_ecnehdr { __be32 lowest_tsn; }; -typedef struct sctp_ecne_chunk { +struct sctp_ecne_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_ecnehdr ence_hdr; -} sctp_ecne_chunk_t; +}; /* RFC 2960. Appendix A. Explicit Congestion Notification. * Congestion Window Reduced (CWR) (13) -- cgit v1.2.3 From 65f77105438a7793836d7ba2d9a05fa585b8caf9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:16 +0800 Subject: sctp: remove the typedef sctp_cwrhdr_t This patch is to remove the typedef sctp_cwrhdr_t, and replace with struct sctp_cwrhdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 026bbdfcaf44..3c8c418425e6 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -531,13 +531,13 @@ struct sctp_ecne_chunk { /* RFC 2960. Appendix A. Explicit Congestion Notification. * Congestion Window Reduced (CWR) (13) */ -typedef struct sctp_cwrhdr { +struct sctp_cwrhdr { __be32 lowest_tsn; -} sctp_cwrhdr_t; +}; typedef struct sctp_cwr_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_cwrhdr_t cwr_hdr; + struct sctp_cwrhdr cwr_hdr; } sctp_cwr_chunk_t; /* PR-SCTP -- cgit v1.2.3 From 05b25d0ba68d5b6d5d06f7c6cc3fec49ffa2df38 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:17 +0800 Subject: sctp: remove the typedef sctp_cwr_chunk_t Remove this typedef including the struct, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 3c8c418425e6..2a97639eb035 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -535,11 +535,6 @@ struct sctp_cwrhdr { __be32 lowest_tsn; }; -typedef struct sctp_cwr_chunk { - struct sctp_chunkhdr chunk_hdr; - struct sctp_cwrhdr cwr_hdr; -} sctp_cwr_chunk_t; - /* PR-SCTP * 3.2 Forward Cumulative TSN Chunk Definition (FORWARD TSN) * -- cgit v1.2.3 From 8b32f2348a0441ef202562618b13d1bb494f3a47 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:18 +0800 Subject: sctp: remove the typedef sctp_addip_param_t This patch is to remove the typedef sctp_addip_param_t, and replace with struct sctp_addip_param in the places where it's using this typedef. It is to use sizeof(variable) instead of sizeof(type), and also fix some indent problems. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 2a97639eb035..5b7d6d9d3fc5 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -629,10 +629,10 @@ struct sctp_fwdtsn_chunk { * The ASCONF Parameter Response is used in the ASCONF-ACK to * report status of ASCONF processing. */ -typedef struct sctp_addip_param { - struct sctp_paramhdr param_hdr; - __be32 crr_id; -} sctp_addip_param_t; +struct sctp_addip_param { + struct sctp_paramhdr param_hdr; + __be32 crr_id; +}; typedef struct sctp_addiphdr { __be32 serial; -- cgit v1.2.3 From 65205cc465e9b37abbdbb3d595c46081b97e35bc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:19 +0800 Subject: sctp: remove the typedef sctp_addiphdr_t This patch is to remove the typedef sctp_addiphdr_t, and replace with struct sctp_addiphdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 5b7d6d9d3fc5..83dac9b0b4bd 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -634,14 +634,14 @@ struct sctp_addip_param { __be32 crr_id; }; -typedef struct sctp_addiphdr { +struct sctp_addiphdr { __be32 serial; __u8 params[0]; -} sctp_addiphdr_t; +}; typedef struct sctp_addip_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_addiphdr_t addip_hdr; + struct sctp_addiphdr addip_hdr; } sctp_addip_chunk_t; /* AUTH -- cgit v1.2.3 From 68d75469468620c37cd58dc352f1dcec8c3896b6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:20 +0800 Subject: sctp: remove the typedef sctp_addip_chunk_t This patch is to remove the typedef sctp_addip_chunk_t, and replace with struct sctp_addip_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 83dac9b0b4bd..e7b439ed6371 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -639,10 +639,10 @@ struct sctp_addiphdr { __u8 params[0]; }; -typedef struct sctp_addip_chunk { +struct sctp_addip_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_addiphdr addip_hdr; -} sctp_addip_chunk_t; +}; /* AUTH * Section 4.1 Authentication Chunk (AUTH) -- cgit v1.2.3 From 96f7ef4d58100d7168e0c4d01a59fbd8589f5756 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:21 +0800 Subject: sctp: remove the typedef sctp_authhdr_t This patch is to remove the typedef sctp_authhdr_t, and replace with struct sctp_authhdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index e7b439ed6371..b7603f5efebe 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -693,15 +693,15 @@ struct sctp_addip_chunk { * HMAC: n bytes (unsigned integer) This hold the result of the HMAC * calculation. */ -typedef struct sctp_authhdr { +struct sctp_authhdr { __be16 shkey_id; __be16 hmac_id; __u8 hmac[0]; -} sctp_authhdr_t; +}; typedef struct sctp_auth_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_authhdr_t auth_hdr; + struct sctp_authhdr auth_hdr; } sctp_auth_chunk_t; struct sctp_infox { -- cgit v1.2.3 From bb96dec74543bb3ceb4ac5caf39341dadb4cb559 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:22 +0800 Subject: sctp: remove the typedef sctp_auth_chunk_t This patch is to remove the typedef sctp_auth_chunk_t, and replace with struct sctp_auth_chunk in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index b7603f5efebe..82b171e1aa0b 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -699,10 +699,10 @@ struct sctp_authhdr { __u8 hmac[0]; }; -typedef struct sctp_auth_chunk { +struct sctp_auth_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_authhdr auth_hdr; -} sctp_auth_chunk_t; +}; struct sctp_infox { struct sctp_info *sctpinfo; -- cgit v1.2.3 From 04b1d4e50e82536c12da00ee04a77510c459c844 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:11 +0200 Subject: net: core: Make the FIB notification chain generic The FIB notification chain is currently soley used by IPv4 code. However, we're going to introduce IPv6 FIB offload support, which requires these notification as well. As explained in commit c3852ef7f2f8 ("ipv4: fib: Replay events when registering FIB notifier"), upon registration to the chain, the callee receives a full dump of the FIB tables and rules by traversing all the net namespaces. The integrity of the dump is ensured by a per-namespace sequence counter that is incremented whenever a change to the tables or rules occurs. In order to allow more address families to use the chain, each family is expected to register its fib_notifier_ops in its pernet init. These operations allow the common code to read the family's sequence counter as well as dump its tables and rules in the given net namespace. Additionally, a 'family' parameter is added to sent notifications, so that listeners could distinguish between the different families. Implement the common code that allows listeners to register to the chain and for address families to register their fib_notifier_ops. Subsequent patches will implement these operations in IPv6. In the future, ipmr and ip6mr will be extended to provide these notifications as well. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/fib_notifier.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/net/ip_fib.h | 30 ++++++++---------------------- include/net/net_namespace.h | 1 + include/net/netns/ipv4.h | 1 + 4 files changed, 54 insertions(+), 22 deletions(-) create mode 100644 include/net/fib_notifier.h (limited to 'include') diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h new file mode 100644 index 000000000000..241475224f74 --- /dev/null +++ b/include/net/fib_notifier.h @@ -0,0 +1,44 @@ +#ifndef __NET_FIB_NOTIFIER_H +#define __NET_FIB_NOTIFIER_H + +#include +#include +#include + +struct fib_notifier_info { + struct net *net; + int family; +}; + +enum fib_event_type { + FIB_EVENT_ENTRY_REPLACE, + FIB_EVENT_ENTRY_APPEND, + FIB_EVENT_ENTRY_ADD, + FIB_EVENT_ENTRY_DEL, + FIB_EVENT_RULE_ADD, + FIB_EVENT_RULE_DEL, + FIB_EVENT_NH_ADD, + FIB_EVENT_NH_DEL, +}; + +struct fib_notifier_ops { + int family; + struct list_head list; + unsigned int (*fib_seq_read)(struct net *net); + int (*fib_dump)(struct net *net, struct notifier_block *nb); + struct rcu_head rcu; +}; + +int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info); +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)); +int unregister_fib_notifier(struct notifier_block *nb); +struct fib_notifier_ops * +fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net); +void fib_notifier_ops_unregister(struct fib_notifier_ops *ops); + +#endif diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ef8992d49bc3..c0295c3ec5f3 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -188,10 +189,6 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); #define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ FIB_RES_SADDR(net, res)) -struct fib_notifier_info { - struct net *net; -}; - struct fib_entry_notifier_info { struct fib_notifier_info info; /* must be first */ u32 dst; @@ -212,25 +209,14 @@ struct fib_nh_notifier_info { struct fib_nh *fib_nh; }; -enum fib_event_type { - FIB_EVENT_ENTRY_REPLACE, - FIB_EVENT_ENTRY_APPEND, - FIB_EVENT_ENTRY_ADD, - FIB_EVENT_ENTRY_DEL, - FIB_EVENT_RULE_ADD, - FIB_EVENT_RULE_DEL, - FIB_EVENT_NH_ADD, - FIB_EVENT_NH_DEL, -}; - -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)); -int unregister_fib_notifier(struct notifier_block *nb); -int call_fib_notifier(struct notifier_block *nb, struct net *net, - enum fib_event_type event_type, - struct fib_notifier_info *info); -int call_fib_notifiers(struct net *net, enum fib_event_type event_type, +int call_fib4_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, struct fib_notifier_info *info); +int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); + +int __net_init fib4_notifier_init(struct net *net); +void __net_exit fib4_notifier_exit(struct net *net); void fib_notify(struct net *net, struct notifier_block *nb); #ifdef CONFIG_IP_MULTIPLE_TABLES diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1c401bd4c2e0..57faa375eab9 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -88,6 +88,7 @@ struct net { /* core fib_rules */ struct list_head rules_ops; + struct list_head fib_notifier_ops; /* protected by net_mutex */ struct net_device *loopback_dev; /* The loopback */ struct netns_core core; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9a14a0850b0e..20d061c805e3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -159,6 +159,7 @@ struct netns_ipv4 { int sysctl_fib_multipath_hash_policy; #endif + struct fib_notifier_ops *notifier_ops; unsigned int fib_seq; /* protected by rtnl_mutex */ atomic_t rt_genid; -- cgit v1.2.3 From 1b2a4440858857f2f93bb2ec5bb3a60f4fcc25be Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:14 +0200 Subject: net: fib_rules: Implement notification logic in core Unlike the routing tables, the FIB rules share a common core, so instead of replicating the same logic for each address family we can simply dump the rules and send notifications from the core itself. To protect the integrity of the dump, a rules-specific sequence counter is added for each address family and incremented whenever a rule is added or deleted (under RTNL). Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/fib_rules.h | 9 +++++++++ include/net/ip_fib.h | 24 ++++++++++++------------ 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index c487bfa2f479..3d7f1cefc6f5 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -8,6 +8,7 @@ #include #include #include +#include struct fib_kuid_range { kuid_t start; @@ -57,6 +58,7 @@ struct fib_rules_ops { int addr_size; int unresolved_rules; int nr_goto_rules; + unsigned int fib_rules_seq; int (*action)(struct fib_rule *, struct flowi *, int, @@ -89,6 +91,11 @@ struct fib_rules_ops { struct rcu_head rcu; }; +struct fib_rule_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct fib_rule *rule; +}; + #define FRA_GENERIC_POLICY \ [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ @@ -143,6 +150,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); bool fib_rule_matchall(const struct fib_rule *rule); +int fib_rules_dump(struct net *net, struct notifier_block *nb, int family); +unsigned int fib_rules_seq_read(struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c0295c3ec5f3..1a7f7e424320 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -199,11 +199,6 @@ struct fib_entry_notifier_info { u32 tb_id; }; -struct fib_rule_notifier_info { - struct fib_notifier_info info; /* must be first */ - struct fib_rule *rule; -}; - struct fib_nh_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_nh *fib_nh; @@ -219,13 +214,6 @@ int __net_init fib4_notifier_init(struct net *net); void __net_exit fib4_notifier_exit(struct net *net); void fib_notify(struct net *net, struct notifier_block *nb); -#ifdef CONFIG_IP_MULTIPLE_TABLES -void fib_rules_notify(struct net *net, struct notifier_block *nb); -#else -static inline void fib_rules_notify(struct net *net, struct notifier_block *nb) -{ -} -#endif struct fib_table { struct hlist_node tb_hlist; @@ -298,6 +286,16 @@ static inline bool fib4_rule_default(const struct fib_rule *rule) return true; } +static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} + +static inline unsigned int fib4_rules_seq_read(struct net *net) +{ + return 0; +} + #else /* CONFIG_IP_MULTIPLE_TABLES */ int __net_init fib4_rules_init(struct net *net); void __net_exit fib4_rules_exit(struct net *net); @@ -343,6 +341,8 @@ out: } bool fib4_rule_default(const struct fib_rule *rule); +int fib4_rules_dump(struct net *net, struct notifier_block *nb); +unsigned int fib4_rules_seq_read(struct net *net); #endif /* CONFIG_IP_MULTIPLE_TABLES */ -- cgit v1.2.3 From e3ea973159d53559c5ae9a9dbc824da9aba6cac0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:15 +0200 Subject: ipv6: fib_rules: Check if rule is a default rule As explained in commit 3c71006d15fd ("ipv4: fib_rules: Check if rule is a default rule"), drivers supporting IPv6 FIB offload need to be able to sanitize the rules they don't support and potentially flush their tables. Add an IPv6 helper to check if a FIB rule is a default rule. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1a88008cc6f5..6000b0dc51ee 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -295,6 +295,7 @@ int ipv6_route_open(struct inode *inode, struct file *file); #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); +bool fib6_rule_default(const struct fib_rule *rule); #else static inline int fib6_rules_init(void) { @@ -304,5 +305,9 @@ static inline void fib6_rules_cleanup(void) { return ; } +static inline bool fib6_rule_default(const struct fib_rule *rule) +{ + return true; +} #endif #endif -- cgit v1.2.3 From 16ab6d7d4d8cc037bb4be12c2b849ac92787e1ff Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:16 +0200 Subject: ipv6: fib: Add FIB notifiers callbacks We're about to add IPv6 FIB offload support, so implement the necessary callbacks in IPv6 code, which will later allow us to add routes and rules notifications. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 11 +++++++++++ include/net/netns/ipv6.h | 1 + 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 6000b0dc51ee..be8ddf3253dc 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -16,10 +16,12 @@ #include #include #include +#include #include #include #include #include +#include #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 @@ -292,6 +294,15 @@ int fib6_init(void); int ipv6_route_open(struct inode *inode, struct file *file); +int call_fib6_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info); +int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); + +int __net_init fib6_notifier_init(struct net *net); +void __net_exit fib6_notifier_exit(struct net *net); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index de7745e2edcc..abdf3b40303b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -86,6 +86,7 @@ struct netns_ipv6 { atomic_t dev_addr_genid; atomic_t fib6_sernum; struct seg6_pernet_data *seg6_data; + struct fib_notifier_ops *notifier_ops; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) -- cgit v1.2.3 From df77fe4d9865c6354372876632bcbceeda84f6c8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:17 +0200 Subject: ipv6: fib: Add in-kernel notifications for route add / delete As with IPv4, allow listeners of the FIB notification chain to receive notifications whenever a route is added, replaced or deleted. This is done by placing calls to the FIB notification chain in the two lowest level functions that end up performing these operations - namely, fib6_add_rt2node() and fib6_del_route(). Unlike IPv4, APPEND notifications aren't sent as the kernel doesn't distinguish between "append" (NLM_F_CREATE|NLM_F_APPEND) and "prepend" (NLM_F_CREATE). If NLM_F_EXCL isn't set, duplicate routes are always added after the existing duplicate routes. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index be8ddf3253dc..e2b292b79e99 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -258,6 +258,11 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_table *, struct flowi6 *, int); +struct fib6_entry_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct rt6_info *rt; +}; + /* * exported functions */ -- cgit v1.2.3 From dcb18f762f6ac83a6dc9cdc26dd694dcc167beb7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:18 +0200 Subject: ipv6: fib_rules: Dump rules during registration to FIB chain Allow users of the FIB notification chain to receive a complete view of the IPv6 FIB rules upon registration to the chain. The integrity of the dump is ensured by a per-family sequence counter that is incremented (under RTNL) whenever a rule is added or deleted. All the sequence counters are read (under RTNL) and summed, prior and after the dump. In case the counters differ, then the dump is either restarted or the registration fails. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index e2b292b79e99..dbe5537809f5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -312,6 +312,8 @@ void __net_exit fib6_notifier_exit(struct net *net); int fib6_rules_init(void); void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); +int fib6_rules_dump(struct net *net, struct notifier_block *nb); +unsigned int fib6_rules_seq_read(struct net *net); #else static inline int fib6_rules_init(void) { @@ -325,5 +327,13 @@ static inline bool fib6_rule_default(const struct fib_rule *rule) { return true; } +static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} +static inline unsigned int fib6_rules_seq_read(struct net *net) +{ + return 0; +} #endif #endif -- cgit v1.2.3 From e1ee0a5ba35d999caef94d659b4cb842e63aeb68 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:19 +0200 Subject: ipv6: fib: Dump tables during registration to FIB chain Dump all the FIB tables in each net namespace upon registration to the FIB notification chain so that the callee will have a complete view of the tables. The integrity of the dump is ensured by a per-table sequence counter that is incremented (under write lock) whenever a route is added or deleted from the table. All the sequence counters are read (under each table's read lock) and summed, prior and after the dump. In case the counters differ, then the dump is either restarted or the registration fails. While it's possible for a table to be modified after its counter has been read, this isn't really a problem. In case it happened before it was read the second time, then the comparison at the end will fail. If it happened afterwards, then we're guaranteed to be notified about the change, as the notification block is registered prior to the second read. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index dbe5537809f5..0b3052157e6b 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -235,6 +235,7 @@ struct fib6_table { struct fib6_node tb6_root; struct inet_peer_base tb6_peers; unsigned int flags; + unsigned int fib_seq; #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -308,6 +309,9 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, int __net_init fib6_notifier_init(struct net *net); void __net_exit fib6_notifier_exit(struct net *net); +unsigned int fib6_tables_seq_read(struct net *net); +int fib6_tables_dump(struct net *net, struct notifier_block *nb); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); -- cgit v1.2.3 From 61e4d01e16acddadb9723143637a20417fa67ac9 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:20 +0200 Subject: ipv6: fib: Add offload indication to routes Allow user space applications to see which routes are offloaded and which aren't by setting the RTNH_F_OFFLOAD flag when dumping them. To be consistent with IPv4, offload indication is provided on a per-nexthop basis. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/ipv6_route.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h index d496c02e14bc..33e2a5732bd1 100644 --- a/include/uapi/linux/ipv6_route.h +++ b/include/uapi/linux/ipv6_route.h @@ -35,6 +35,7 @@ #define RTF_PREF(pref) ((pref) << 27) #define RTF_PREF_MASK 0x18000000 +#define RTF_OFFLOAD 0x20000000 /* offloaded route */ #define RTF_PCPU 0x40000000 /* read-only: can not be set by user */ #define RTF_LOCAL 0x80000000 -- cgit v1.2.3 From a460aa83963b185a32a6377eb486b6e613ac8e38 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:25 +0200 Subject: ipv6: fib: Add helpers to hold / drop a reference on rt6_info Similar to commit 1c677b3d2828 ("ipv4: fib: Add fib_info_hold() helper") and commit b423cb10807b ("ipv4: fib: Export free_fib_info()") add an helper to hold a reference on rt6_info and export rt6_release() to drop it and potentially release the route. This is needed so that drivers capable of FIB offload could hold a reference on the route before queueing it for offload and drop it after the route has been programmed to the device's tables. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0b3052157e6b..1d790ea40ea7 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -187,6 +187,22 @@ static inline void ip6_rt_put(struct rt6_info *rt) dst_release(&rt->dst); } +void rt6_free_pcpu(struct rt6_info *non_pcpu_rt); + +static inline void rt6_hold(struct rt6_info *rt) +{ + atomic_inc(&rt->rt6i_ref); +} + +static inline void rt6_release(struct rt6_info *rt) +{ + if (atomic_dec_and_test(&rt->rt6i_ref)) { + rt6_free_pcpu(rt); + dst_dev_put(&rt->dst); + dst_release(&rt->dst); + } +} + enum fib6_walk_state { #ifdef CONFIG_IPV6_SUBTREES FWS_S, -- cgit v1.2.3 From 98ba0bd5505dcbb90322a4be07bcfe6b8a18c73f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:37 -0400 Subject: sock: allocate skbs from optmem Add sock_omalloc and sock_ofree to be able to allocate control skbs, for instance for looping errors onto sk_error_queue. The transmit budget (sk_wmem_alloc) is involved in transmit skb shaping, most notably in TCP Small Queues. Using this budget for control packets would impact transmission. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 393c38e9f6aa..0f778d3c4300 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1531,6 +1531,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority); void __sock_wfree(struct sk_buff *skb); void sock_wfree(struct sk_buff *skb); +struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, + gfp_t priority); void skb_orphan_partial(struct sk_buff *skb); void sock_rfree(struct sk_buff *skb); void sock_efree(struct sk_buff *skb); -- cgit v1.2.3 From 3ece782693c4b64d588dd217868558ab9a19bfe7 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:38 -0400 Subject: sock: skb_copy_ubufs support for compound pages Refine skb_copy_ubufs to support compound pages. With upcoming TCP zerocopy sendmsg, such fragments may appear. The existing code replaces each page one for one. Splitting each compound page into an independent number of regular pages can result in exceeding limit MAX_SKB_FRAGS if data is not exactly page aligned. Instead, fill all destination pages but the last to PAGE_SIZE. Split the existing alloc + copy loop into separate stages: 1. compute bytelength and minimum number of pages to store this. 2. allocate 3. copy, filling each page except the last to PAGE_SIZE bytes 4. update skb frag array Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index be76082f48aa..2f64e2bbb592 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1796,13 +1796,18 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb) return skb->len - skb->data_len; } -static inline unsigned int skb_pagelen(const struct sk_buff *skb) +static inline unsigned int __skb_pagelen(const struct sk_buff *skb) { unsigned int i, len = 0; for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--) len += skb_frag_size(&skb_shinfo(skb)->frags[i]); - return len + skb_headlen(skb); + return len; +} + +static inline unsigned int skb_pagelen(const struct sk_buff *skb) +{ + return skb_headlen(skb) + __skb_pagelen(skb); } /** -- cgit v1.2.3 From 52267790ef52d7513879238ca9fac22c1733e0e3 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:39 -0400 Subject: sock: add MSG_ZEROCOPY The kernel supports zerocopy sendmsg in virtio and tap. Expand the infrastructure to support other socket types. Introduce a completion notification channel over the socket error queue. Notifications are returned with ee_origin SO_EE_ORIGIN_ZEROCOPY. ee_errno is 0 to avoid blocking the send/recv path on receiving notifications. Add reference counting, to support the skb split, merge, resize and clone operations possible with SOCK_STREAM and other socket types. The patch does not yet modify any datapaths. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 60 +++++++++++++++++++++++++++++++++++++++++++ include/linux/socket.h | 1 + include/net/sock.h | 2 ++ include/uapi/linux/errqueue.h | 3 +++ 4 files changed, 66 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2f64e2bbb592..59cff7aa494e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -429,6 +429,7 @@ enum { SKBTX_SCHED_TSTAMP = 1 << 6, }; +#define SKBTX_ZEROCOPY_FRAG (SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG) #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ SKBTX_SCHED_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) @@ -445,8 +446,28 @@ struct ubuf_info { void (*callback)(struct ubuf_info *, bool zerocopy_success); void *ctx; unsigned long desc; + u16 zerocopy:1; + atomic_t refcnt; }; +#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) + +struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size); + +static inline void sock_zerocopy_get(struct ubuf_info *uarg) +{ + atomic_inc(&uarg->refcnt); +} + +void sock_zerocopy_put(struct ubuf_info *uarg); +void sock_zerocopy_put_abort(struct ubuf_info *uarg); + +void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); + +int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, + struct msghdr *msg, int len, + struct ubuf_info *uarg); + /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -1214,6 +1235,45 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) return &skb_shinfo(skb)->hwtstamps; } +static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) +{ + bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY; + + return is_zcopy ? skb_uarg(skb) : NULL; +} + +static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg) +{ + if (skb && uarg && !skb_zcopy(skb)) { + sock_zerocopy_get(uarg); + skb_shinfo(skb)->destructor_arg = uarg; + skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; + } +} + +/* Release a reference on a zerocopy structure */ +static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) +{ + struct ubuf_info *uarg = skb_zcopy(skb); + + if (uarg) { + uarg->zerocopy = uarg->zerocopy && zerocopy; + sock_zerocopy_put(uarg); + skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; + } +} + +/* Abort a zerocopy operation and revert zckey on error in send syscall */ +static inline void skb_zcopy_abort(struct sk_buff *skb) +{ + struct ubuf_info *uarg = skb_zcopy(skb); + + if (uarg) { + sock_zerocopy_put_abort(uarg); + skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; + } +} + /** * skb_queue_empty - check if a queue is empty * @list: queue head diff --git a/include/linux/socket.h b/include/linux/socket.h index 8b13db5163cc..8ad963cdc88c 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -287,6 +287,7 @@ struct ucred { #define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */ #define MSG_EOF MSG_FIN +#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file descriptor received through diff --git a/include/net/sock.h b/include/net/sock.h index 0f778d3c4300..fe1a0bc25cd3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -294,6 +294,7 @@ struct sock_common { * @sk_stamp: time stamp of last packet received * @sk_tsflags: SO_TIMESTAMPING socket options * @sk_tskey: counter to disambiguate concurrent tstamp requests + * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals * @sk_user_data: RPC layer private data * @sk_frag: cached page frag @@ -462,6 +463,7 @@ struct sock { u16 sk_tsflags; u8 sk_shutdown; u32 sk_tskey; + atomic_t sk_zckey; struct socket *sk_socket; void *sk_user_data; #ifdef CONFIG_SECURITY diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index 07bdce1f444a..78fdf52d6b2f 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -18,10 +18,13 @@ struct sock_extended_err { #define SO_EE_ORIGIN_ICMP 2 #define SO_EE_ORIGIN_ICMP6 3 #define SO_EE_ORIGIN_TXSTATUS 4 +#define SO_EE_ORIGIN_ZEROCOPY 5 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS #define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) +#define SO_EE_CODE_ZEROCOPY_COPIED 1 + /** * struct scm_timestamping - timestamps exposed through cmsg * -- cgit v1.2.3 From 76851d1212c11365362525e1e2c0a18c97478e6b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:40 -0400 Subject: sock: add SOCK_ZEROCOPY sockopt The send call ignores unknown flags. Legacy applications may already unwittingly pass MSG_ZEROCOPY. Continue to ignore this flag unless a socket opts in to zerocopy. Introduce socket option SO_ZEROCOPY to enable MSG_ZEROCOPY processing. Processes can also query this socket option to detect kernel support for the feature. Older kernels will return ENOPROTOOPT. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/asm-generic/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 9861be8da65e..e47c9e436221 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -104,4 +104,6 @@ #define SO_PEERGROUPS 59 +#define SO_ZEROCOPY 60 + #endif /* __ASM_GENERIC_SOCKET_H */ -- cgit v1.2.3 From 1f8b977ab32dc5d148f103326e80d9097f1cefb5 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:41 -0400 Subject: sock: enable MSG_ZEROCOPY Prepare the datapath for refcounted ubuf_info. Clone ubuf_info with skb_zerocopy_clone() wherever needed due to skb split, merge, resize or clone. Split skb_orphan_frags into two variants. The split, merge, .. paths support reference counted zerocopy buffers, so do not do a deep copy. Add skb_orphan_frags_rx for paths that may loop packets to receive sockets. That is not allowed, as it may cause unbounded latency. Deep copy all zerocopy copy buffers, ref-counted or not, in this path. The exact locations to modify were chosen by exhaustively searching through all code that might modify skb_frag references and/or the the SKBTX_DEV_ZEROCOPY tx_flags bit. The changes err on the safe side, in two ways. (1) legacy ubuf_info paths virtio and tap are not modified. They keep a 1:1 ubuf_info to sk_buff relationship. Calls to skb_orphan_frags still call skb_copy_ubufs and thus copy frags in this case. (2) not all copies deep in the stack are addressed yet. skb_shift, skb_split and skb_try_coalesce can be refined to avoid copying. These are not in the hot path and this patch is hairy enough as is, so that is left for future refinement. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 59cff7aa494e..e5387932c266 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2512,7 +2512,17 @@ static inline void skb_orphan(struct sk_buff *skb) */ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) { - if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY))) + if (likely(!skb_zcopy(skb))) + return 0; + if (skb_uarg(skb)->callback == sock_zerocopy_callback) + return 0; + return skb_copy_ubufs(skb, gfp_mask); +} + +/* Frags must be orphaned, even if refcounted, if skb might loop to rx path */ +static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask) +{ + if (likely(!skb_zcopy(skb))) return 0; return skb_copy_ubufs(skb, gfp_mask); } @@ -2944,6 +2954,8 @@ static inline int skb_add_data(struct sk_buff *skb, static inline bool skb_can_coalesce(struct sk_buff *skb, int i, const struct page *page, int off) { + if (skb_zcopy(skb)) + return false; if (i) { const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; -- cgit v1.2.3 From 4ab6c99d99bb1bf0fbba8ff4e52114c66109992f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:42 -0400 Subject: sock: MSG_ZEROCOPY notification coalescing In the simple case, each sendmsg() call generates data and eventually a zerocopy ready notification N, where N indicates the Nth successful invocation of sendmsg() with the MSG_ZEROCOPY flag on this socket. TCP and corked sockets can cause send() calls to append new data to an existing sk_buff and, thus, ubuf_info. In that case the notification must hold a range. odify ubuf_info to store a inclusive range [N..N+m] and add skb_zerocopy_realloc() to optionally extend an existing range. Also coalesce notifications in this common case: if a notification [1, 1] is about to be queued while [0, 0] is the queue tail, just modify the head of the queue to read [0, 1]. Coalescing is limited to a few TSO frames worth of data to bound notification latency. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e5387932c266..f5bdd93a87da 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -444,15 +444,26 @@ enum { */ struct ubuf_info { void (*callback)(struct ubuf_info *, bool zerocopy_success); - void *ctx; - unsigned long desc; - u16 zerocopy:1; + union { + struct { + unsigned long desc; + void *ctx; + }; + struct { + u32 id; + u16 len; + u16 zerocopy:1; + u32 bytelen; + }; + }; atomic_t refcnt; }; #define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size); +struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, + struct ubuf_info *uarg); static inline void sock_zerocopy_get(struct ubuf_info *uarg) { -- cgit v1.2.3 From a91dbff551a6f1865b68fa82b654591490b59901 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:43 -0400 Subject: sock: ulimit on MSG_ZEROCOPY pages Bound the number of pages that a user may pin. Follow the lead of perf tools to maintain a per-user bound on memory locked pages commit 789f90fcf6b0 ("perf_counter: per user mlock gift") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/sched/user.h | 3 ++- include/linux/skbuff.h | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 5d5415e129d4..3c07e4135127 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -36,7 +36,8 @@ struct user_struct { struct hlist_node uidhash_node; kuid_t uid; -#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) +#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \ + defined(CONFIG_NET) atomic_long_t locked_vm; #endif }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f5bdd93a87da..8c0708d2e5e6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -457,6 +457,11 @@ struct ubuf_info { }; }; atomic_t refcnt; + + struct mmpin { + struct user_struct *user; + unsigned int num_pg; + } mmp; }; #define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) -- cgit v1.2.3 From 4ebc1e3cfcd8778e2150bdb799b19e85348b8efa Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:28:57 +0200 Subject: net: sched: remove unneeded tcf_em_tree_change Since tcf_em_tree_validate could be always called on a newly created filter, there is no need for this change function. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 537d0a0ad4c4..f4462ec8b2f4 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -332,26 +332,6 @@ int tcf_em_tree_dump(struct sk_buff *, struct tcf_ematch_tree *, int); int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *, struct tcf_pkt_info *); -/** - * tcf_em_tree_change - replace ematch tree of a running classifier - * - * @tp: classifier kind handle - * @dst: destination ematch tree variable - * @src: source ematch tree (temporary tree from tcf_em_tree_validate) - * - * This functions replaces the ematch tree in @dst with the ematch - * tree in @src. The classifier in charge of the ematch tree may be - * running. - */ -static inline void tcf_em_tree_change(struct tcf_proto *tp, - struct tcf_ematch_tree *dst, - struct tcf_ematch_tree *src) -{ - tcf_tree_lock(tp); - memcpy(dst, src, sizeof(*dst)); - tcf_tree_unlock(tp); -} - /** * tcf_em_tree_match - evaulate an ematch tree * @@ -386,7 +366,6 @@ struct tcf_ematch_tree { #define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0) #define tcf_em_tree_destroy(t) do { (void)(t); } while(0) #define tcf_em_tree_dump(skb, t, tlv) (0) -#define tcf_em_tree_change(tp, dst, src) do { } while(0) #define tcf_em_tree_match(skb, t, info) ((void)(info), 1) #endif /* CONFIG_NET_EMATCH */ -- cgit v1.2.3 From 3bcc0cec818fa969fe555b44443347211ed787a3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:28:58 +0200 Subject: net: sched: change names of action number helpers to be aligned with the rest The rest of the helpers are named tcf_exts_*, so change the name of the action number helpers to be aligned. While at it, change to inline functions. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f4462ec8b2f4..7f2563636df0 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -199,17 +199,35 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, return 0; } +/** + * tcf_exts_has_actions - check if at least one action is present + * @exts: tc filter extensions handle + * + * Returns true if at least one action is present. + */ +static inline bool tcf_exts_has_actions(struct tcf_exts *exts) +{ #ifdef CONFIG_NET_CLS_ACT + return exts->nr_actions; +#else + return false; +#endif +} -#define tc_no_actions(_exts) ((_exts)->nr_actions == 0) -#define tc_single_action(_exts) ((_exts)->nr_actions == 1) - -#else /* CONFIG_NET_CLS_ACT */ - -#define tc_no_actions(_exts) true -#define tc_single_action(_exts) false - -#endif /* CONFIG_NET_CLS_ACT */ +/** + * tcf_exts_has_one_action - check if exactly one action is present + * @exts: tc filter extensions handle + * + * Returns true if exactly one action is present. + */ +static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + return exts->nr_actions == 1; +#else + return false; +#endif +} int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, -- cgit v1.2.3 From af69afc551eb9f9b1f2cc3295e2dfcdaa1dc948d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:28:59 +0200 Subject: net: sched: use tcf_exts_has_actions in tcf_exts_exec Use the tcf_exts_has_actions helper instead or directly testing exts->nr_actions in tcf_exts_exec. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 7f2563636df0..322a2823cc6a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -176,29 +176,6 @@ tcf_exts_stats_update(const struct tcf_exts *exts, #endif } -/** - * tcf_exts_exec - execute tc filter extensions - * @skb: socket buffer - * @exts: tc filter extensions handle - * @res: desired result - * - * Executes all configured extensions. Returns 0 on a normal execution, - * a negative number if the filter must be considered unmatched or - * a positive action code (TC_ACT_*) which must be returned to the - * underlying layer. - */ -static inline int -tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, - struct tcf_result *res) -{ -#ifdef CONFIG_NET_CLS_ACT - if (exts->nr_actions) - return tcf_action_exec(skb, exts->actions, exts->nr_actions, - res); -#endif - return 0; -} - /** * tcf_exts_has_actions - check if at least one action is present * @exts: tc filter extensions handle @@ -229,6 +206,29 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) #endif } +/** + * tcf_exts_exec - execute tc filter extensions + * @skb: socket buffer + * @exts: tc filter extensions handle + * @res: desired result + * + * Executes all configured extensions. Returns 0 on a normal execution, + * a negative number if the filter must be considered unmatched or + * a positive action code (TC_ACT_*) which must be returned to the + * underlying layer. + */ +static inline int +tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, + struct tcf_result *res) +{ +#ifdef CONFIG_NET_CLS_ACT + if (tcf_exts_has_actions(exts)) + return tcf_action_exec(skb, exts->actions, exts->nr_actions, + res); +#endif + return 0; +} + int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr); -- cgit v1.2.3 From 6fc6d06e5371507e68c6904a3423622b0e465b64 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:29:00 +0200 Subject: net: sched: remove redundant helpers tcf_exts_is_predicative and tcf_exts_is_available These two helpers are doing the same as tcf_exts_has_actions, so remove them and use tcf_exts_has_actions instead. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 322a2823cc6a..817badf733b5 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -113,36 +113,6 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) return 0; } -/** - * tcf_exts_is_predicative - check if a predicative extension is present - * @exts: tc filter extensions handle - * - * Returns 1 if a predicative extension is present, i.e. an extension which - * might cause further actions and thus overrule the regular tcf_result. - */ -static inline int -tcf_exts_is_predicative(struct tcf_exts *exts) -{ -#ifdef CONFIG_NET_CLS_ACT - return exts->nr_actions; -#else - return 0; -#endif -} - -/** - * tcf_exts_is_available - check if at least one extension is present - * @exts: tc filter extensions handle - * - * Returns 1 if at least one extension is present. - */ -static inline int -tcf_exts_is_available(struct tcf_exts *exts) -{ - /* All non-predicative extensions must be added here. */ - return tcf_exts_is_predicative(exts); -} - static inline void tcf_exts_to_list(const struct tcf_exts *exts, struct list_head *actions) { -- cgit v1.2.3 From af089e701adfb5898fee00a56ea4bb421edc308d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:29:01 +0200 Subject: net: sched: fix return value of tcf_exts_exec Return the defined TC_ACT_OK instead of 0. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 817badf733b5..61ce521688b2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -182,7 +182,7 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) * @exts: tc filter extensions handle * @res: desired result * - * Executes all configured extensions. Returns 0 on a normal execution, + * Executes all configured extensions. Returns TC_ACT_OK on a normal execution, * a negative number if the filter must be considered unmatched or * a positive action code (TC_ACT_*) which must be returned to the * underlying layer. @@ -196,7 +196,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, return tcf_action_exec(skb, exts->actions, exts->nr_actions, res); #endif - return 0; + return TC_ACT_OK; } int tcf_exts_validate(struct net *net, struct tcf_proto *tp, -- cgit v1.2.3 From ec1a9cca0e13391167567964fd04e61a39d6a4ae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:29:02 +0200 Subject: net: sched: remove check for number of actions in tcf_exts_exec Leave it to tcf_action_exec to return TC_ACT_OK in case there is no action present. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 61ce521688b2..b8959c9a190d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -192,9 +192,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, struct tcf_result *res) { #ifdef CONFIG_NET_CLS_ACT - if (tcf_exts_has_actions(exts)) - return tcf_action_exec(skb, exts->actions, exts->nr_actions, - res); + return tcf_action_exec(skb, exts->actions, exts->nr_actions, res); #endif return TC_ACT_OK; } -- cgit v1.2.3 From 9b0d4446b56904b59ae3809913b0ac760fa941a6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:29:15 +0200 Subject: net: sched: avoid atomic swap in tcf_exts_change tcf_exts_change is always called on newly created exts, which are not used on fastpath. Therefore, simple struct copy is enough. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index b8959c9a190d..e0c54f111467 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -201,8 +201,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr); void tcf_exts_destroy(struct tcf_exts *exts); -void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, - struct tcf_exts *src); +void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, -- cgit v1.2.3 From 56ce097c1caede1f9c191a7c9699b950e7c36ad9 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 4 Aug 2017 08:24:05 -0700 Subject: net: comment fixes against BPF devmap helper calls Update BPF comments to accurately reflect XDP usage. Fixes: 97f91a7cf04ff ("bpf: add bpf_redirect_map helper routine") Reported-by: Alexei Starovoitov Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1106a8c4cd36..1d06be1569b1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -345,14 +345,20 @@ union bpf_attr { * int bpf_redirect(ifindex, flags) * redirect to another netdev * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: TC_ACT_REDIRECT - * int bpf_redirect_map(key, map, flags) + * @flags: + * cls_bpf: + * bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * xdp_bpf: + * all bits - reserved + * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error + * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error + * int bpf_redirect_map(map, key, flags) * redirect to endpoint in map + * @map: pointer to dev map * @key: index in map to lookup - * @map: fd of map to do lookup in * @flags: -- + * Return: XDP_REDIRECT on success or XDP_ABORT on error * * u32 bpf_get_route_realm(skb) * retrieve a dst's tclassid -- cgit v1.2.3 From 91ed1e666a4ea2e260452a7d7d311ac5ae852cba Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 3 Aug 2017 18:07:06 +0200 Subject: ip/options: explicitly provide net ns to __ip_options_echo() __ip_options_echo() uses the current network namespace, and currently retrives it via skb->dst->dev. This commit adds an explicit 'net' argument to __ip_options_echo() and update all the call sites to provide it, usually via a simpler sock_net(). After this change, __ip_options_echo() no more needs to access skb->dst and we can drop a couple of hack to preserve such info in the rx path. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/ip.h | 9 +++++---- include/net/tcp.h | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 821cedcc8e73..9e59dcf1787a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -567,11 +567,12 @@ int ip_forward(struct sk_buff *skb); void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag); -int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, - const struct ip_options *sopt); -static inline int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) +int __ip_options_echo(struct net *net, struct ip_options *dopt, + struct sk_buff *skb, const struct ip_options *sopt); +static inline int ip_options_echo(struct net *net, struct ip_options *dopt, + struct sk_buff *skb) { - return __ip_options_echo(dopt, skb, &IPCB(skb)->opt); + return __ip_options_echo(net, dopt, skb, &IPCB(skb)->opt); } void ip_options_fragment(struct sk_buff *skb); diff --git a/include/net/tcp.h b/include/net/tcp.h index bb1881b4ce48..5173fecde495 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1885,7 +1885,8 @@ extern void tcp_rack_reo_timeout(struct sock *sk); /* * Save and compile IPv4 options, return a pointer to it */ -static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) +static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net, + struct sk_buff *skb) { const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; struct ip_options_rcu *dopt = NULL; @@ -1894,7 +1895,7 @@ static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) int opt_size = sizeof(*dopt) + opt->optlen; dopt = kmalloc(opt_size, GFP_ATOMIC); - if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) { + if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) { kfree(dopt); dopt = NULL; } -- cgit v1.2.3 From da4625ac2637e4e5249dc08a10f8dce7643603d2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jul 2017 15:02:42 +0100 Subject: net: phy: split out PHY speed and duplex string generation Other code would like to make use of this, so make the speed and duplex string generation visible, and place it in a separate file. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 0bb5b212ab42..e8264c78b75b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -667,6 +667,9 @@ struct phy_fixup { int (*run)(struct phy_device *phydev); }; +const char *phy_speed_to_str(int speed); +const char *phy_duplex_to_str(unsigned int duplex); + /** * phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. -- cgit v1.2.3 From 0ccb4fc65d2799a315d5ee8732d75f35a114379c Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jul 2017 15:02:47 +0100 Subject: net: phy: move phy_lookup_setting() and guts of phy_supported_speeds() to phy-core phy_lookup_setting() provides useful functionality in ethtool code outside phylib. Move it to phy-core and allow it to be re-used (eg, in phylink) rather than duplicated elsewhere. Note that this supports the larger linkmode space. As we move the phy settings table, we also need to move the guts of phy_supported_speeds() as well. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index e8264c78b75b..8a280257778c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -670,6 +670,21 @@ struct phy_fixup { const char *phy_speed_to_str(int speed); const char *phy_duplex_to_str(unsigned int duplex); +/* A structure for mapping a particular speed and duplex + * combination to a particular SUPPORTED and ADVERTISED value + */ +struct phy_setting { + u32 speed; + u8 duplex; + u8 bit; +}; + +const struct phy_setting * +phy_lookup_setting(int speed, int duplex, const unsigned long *mask, + size_t maxbit, bool exact); +size_t phy_speeds(unsigned int *speeds, size_t size, + unsigned long *mask, size_t maxbit); + /** * phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. -- cgit v1.2.3 From a81497bee70eb15039594b3116913133aa9c9b29 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jul 2017 15:02:58 +0100 Subject: net: phy: provide a hook for link up/link down events Sometimes, we need to do additional work between the PHY coming up and marking the carrier present - for example, we may need to wait for the PHY to MAC link to finish negotiation. This changes phylib to provide a notification function pointer which avoids the built-in netif_carrier_on() and netif_carrier_off() functions. Standard ->adjust_link functionality is provided by hooking a helper into the new ->phy_link_change method. Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 8a280257778c..0a5e8e62c9e0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -474,6 +474,7 @@ struct phy_device { u8 mdix; u8 mdix_ctrl; + void (*phy_link_change)(struct phy_device *, bool up, bool do_carrier); void (*adjust_link)(struct net_device *dev); }; #define to_phy_device(d) container_of(to_mdio_device(d), \ -- cgit v1.2.3 From 9525ae83959b60c6061fe2f2caabdc8f69a48bc6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jul 2017 15:03:13 +0100 Subject: phylink: add phylink infrastructure The link between the ethernet MAC and its PHY has become more complex as the interface evolves. This is especially true with serdes links, where the part of the PHY is effectively integrated into the MAC. Serdes links can be connected to a variety of devices, including SFF modules soldered down onto the board with the MAC, a SFP cage with a hotpluggable SFP module which may contain a PHY or directly modulate the serdes signals onto optical media with or without a PHY, or even a classical PHY connection. Moreover, the negotiation information on serdes links comes in two varieties - SGMII mode, where the PHY provides its speed/duplex/flow control information to the MAC, and 1000base-X mode where both ends exchange their abilities and each resolve the link capabilities. This means we need a more flexible means to support these arrangements, particularly with the hotpluggable nature of SFP, where the PHY can be attached or detached after the network device has been brought up. Ethtool information can come from multiple sources: - we may have a PHY operating in either SGMII or 1000base-X mode, in which case we take ethtool/mii data directly from the PHY. - we may have a optical SFP module without a PHY, with the MAC operating in 1000base-X mode - the ethtool/mii data needs to come from the MAC. - we may have a copper SFP module with a PHY whic can't be accessed, which means we need to take ethtool/mii data from the MAC. Phylink aims to solve this by providing an intermediary between the MAC and PHY, providing a safe way for PHYs to be hotplugged, and allowing a SFP driver to reconfigure the serdes connection. Phylink also takes over support of fixed link connections, where the speed/duplex/flow control are fixed, but link status may be controlled by a GPIO signal. By avoiding the fixed-phy implementation, phylink can provide a faster response to link events: fixed-phy has to wait for phylib to operate its state machine, which can take several seconds. In comparison, phylink takes milliseconds. Signed-off-by: Russell King - remove sync status - rework supported and advertisment handling - add 1000base-x speed for fixed links - use functionality exported from phy-core, reworking __phylink_ethtool_ksettings_set for it Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 2 + include/linux/phylink.h | 145 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 include/linux/phylink.h (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 0a5e8e62c9e0..d78cd01ea513 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -182,6 +182,7 @@ static inline const char *phy_modes(phy_interface_t interface) #define MII_ADDR_C45 (1<<30) struct device; +struct phylink; struct sk_buff; /* @@ -469,6 +470,7 @@ struct phy_device { struct mutex lock; + struct phylink *phylink; struct net_device *attached_dev; u8 mdix; diff --git a/include/linux/phylink.h b/include/linux/phylink.h new file mode 100644 index 000000000000..76f054f39684 --- /dev/null +++ b/include/linux/phylink.h @@ -0,0 +1,145 @@ +#ifndef NETDEV_PCS_H +#define NETDEV_PCS_H + +#include +#include +#include + +struct device_node; +struct ethtool_cmd; +struct net_device; + +enum { + MLO_PAUSE_NONE, + MLO_PAUSE_ASYM = BIT(0), + MLO_PAUSE_SYM = BIT(1), + MLO_PAUSE_RX = BIT(2), + MLO_PAUSE_TX = BIT(3), + MLO_PAUSE_TXRX_MASK = MLO_PAUSE_TX | MLO_PAUSE_RX, + MLO_PAUSE_AN = BIT(4), + + MLO_AN_PHY = 0, /* Conventional PHY */ + MLO_AN_FIXED, /* Fixed-link mode */ + MLO_AN_SGMII, /* Cisco SGMII protocol */ + MLO_AN_8023Z, /* 1000base-X protocol */ +}; + +static inline bool phylink_autoneg_inband(unsigned int mode) +{ + return mode == MLO_AN_SGMII || mode == MLO_AN_8023Z; +} + +struct phylink_link_state { + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); + __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); + phy_interface_t interface; /* PHY_INTERFACE_xxx */ + int speed; + int duplex; + int pause; + unsigned int link:1; + unsigned int an_enabled:1; + unsigned int an_complete:1; +}; + +struct phylink_mac_ops { + /** + * validate: validate and update the link configuration + * @ndev: net_device structure associated with MAC + * @config: configuration to validate + * + * Update the %config->supported and %config->advertised masks + * clearing bits that can not be supported. + * + * Note: the PHY may be able to transform from one connection + * technology to another, so, eg, don't clear 1000BaseX just + * because the MAC is unable to support it. This is more about + * clearing unsupported speeds and duplex settings. + * + * If the %config->interface mode is %PHY_INTERFACE_MODE_1000BASEX + * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode + * based on %config->advertised and/or %config->speed. + */ + void (*validate)(struct net_device *ndev, unsigned long *supported, + struct phylink_link_state *state); + + /* Read the current link state from the hardware */ + int (*mac_link_state)(struct net_device *, struct phylink_link_state *); + + /* Configure the MAC */ + /** + * mac_config: configure the MAC for the selected mode and state + * @ndev: net_device structure for the MAC + * @mode: one of MLO_AN_FIXED, MLO_AN_PHY, MLO_AN_8023Z, MLO_AN_SGMII + * @state: state structure + * + * The action performed depends on the currently selected mode: + * + * %MLO_AN_FIXED, %MLO_AN_PHY: + * set the specified speed, duplex, pause mode, and phy interface + * mode in the provided @state. + * %MLO_AN_8023Z: + * place the link in 1000base-X mode, advertising the parameters + * given in advertising in @state. + * %MLO_AN_SGMII: + * place the link in Cisco SGMII mode - there is no advertisment + * to make as the PHY communicates the speed and duplex to the + * MAC over the in-band control word. Configuration of the pause + * mode is as per MLO_AN_PHY since this is not included. + */ + void (*mac_config)(struct net_device *ndev, unsigned int mode, + const struct phylink_link_state *state); + + /** + * mac_an_restart: restart 802.3z BaseX autonegotiation + * @ndev: net_device structure for the MAC + */ + void (*mac_an_restart)(struct net_device *ndev); + + void (*mac_link_down)(struct net_device *, unsigned int mode); + void (*mac_link_up)(struct net_device *, unsigned int mode, + struct phy_device *); +}; + +struct phylink *phylink_create(struct net_device *, struct device_node *, + phy_interface_t iface, const struct phylink_mac_ops *ops); +void phylink_destroy(struct phylink *); + +int phylink_connect_phy(struct phylink *, struct phy_device *); +int phylink_of_phy_connect(struct phylink *, struct device_node *); +void phylink_disconnect_phy(struct phylink *); + +void phylink_mac_change(struct phylink *, bool up); + +void phylink_start(struct phylink *); +void phylink_stop(struct phylink *); + +void phylink_ethtool_get_wol(struct phylink *, struct ethtool_wolinfo *); +int phylink_ethtool_set_wol(struct phylink *, struct ethtool_wolinfo *); + +int phylink_ethtool_ksettings_get(struct phylink *, + struct ethtool_link_ksettings *); +int phylink_ethtool_ksettings_set(struct phylink *, + const struct ethtool_link_ksettings *); +int phylink_ethtool_nway_reset(struct phylink *); +void phylink_ethtool_get_pauseparam(struct phylink *, + struct ethtool_pauseparam *); +int phylink_ethtool_set_pauseparam(struct phylink *, + struct ethtool_pauseparam *); +int phylink_init_eee(struct phylink *, bool); +int phylink_get_eee_err(struct phylink *); +int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); +int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); +int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); + +#define phylink_zero(bm) \ + bitmap_zero(bm, __ETHTOOL_LINK_MODE_MASK_NBITS) +#define __phylink_do_bit(op, bm, mode) \ + op(ETHTOOL_LINK_MODE_ ## mode ## _BIT, bm) + +#define phylink_set(bm, mode) __phylink_do_bit(__set_bit, bm, mode) +#define phylink_clear(bm, mode) __phylink_do_bit(__clear_bit, bm, mode) +#define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode) + +void phylink_set_port_modes(unsigned long *bits); + +#endif -- cgit v1.2.3 From ce0aa27ff3f68ed4ea1631d33797e573b3508bfa Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jul 2017 15:03:18 +0100 Subject: sfp: add sfp-bus to bridge between network devices and sfp cages Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/sfp.h | 434 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 434 insertions(+) create mode 100644 include/linux/sfp.h (limited to 'include') diff --git a/include/linux/sfp.h b/include/linux/sfp.h new file mode 100644 index 000000000000..4a906f560817 --- /dev/null +++ b/include/linux/sfp.h @@ -0,0 +1,434 @@ +#ifndef LINUX_SFP_H +#define LINUX_SFP_H + +#include + +struct __packed sfp_eeprom_base { + u8 phys_id; + u8 phys_ext_id; + u8 connector; +#if defined __BIG_ENDIAN_BITFIELD + u8 e10g_base_er:1; + u8 e10g_base_lrm:1; + u8 e10g_base_lr:1; + u8 e10g_base_sr:1; + u8 if_1x_sx:1; + u8 if_1x_lx:1; + u8 if_1x_copper_active:1; + u8 if_1x_copper_passive:1; + + u8 escon_mmf_1310_led:1; + u8 escon_smf_1310_laser:1; + u8 sonet_oc192_short_reach:1; + u8 sonet_reach_bit1:1; + u8 sonet_reach_bit2:1; + u8 sonet_oc48_long_reach:1; + u8 sonet_oc48_intermediate_reach:1; + u8 sonet_oc48_short_reach:1; + + u8 unallocated_5_7:1; + u8 sonet_oc12_smf_long_reach:1; + u8 sonet_oc12_smf_intermediate_reach:1; + u8 sonet_oc12_short_reach:1; + u8 unallocated_5_3:1; + u8 sonet_oc3_smf_long_reach:1; + u8 sonet_oc3_smf_intermediate_reach:1; + u8 sonet_oc3_short_reach:1; + + u8 e_base_px:1; + u8 e_base_bx10:1; + u8 e100_base_fx:1; + u8 e100_base_lx:1; + u8 e1000_base_t:1; + u8 e1000_base_cx:1; + u8 e1000_base_lx:1; + u8 e1000_base_sx:1; + + u8 fc_ll_v:1; + u8 fc_ll_s:1; + u8 fc_ll_i:1; + u8 fc_ll_l:1; + u8 fc_ll_m:1; + u8 fc_tech_sa:1; + u8 fc_tech_lc:1; + u8 fc_tech_electrical_inter_enclosure:1; + + u8 fc_tech_electrical_intra_enclosure:1; + u8 fc_tech_sn:1; + u8 fc_tech_sl:1; + u8 fc_tech_ll:1; + u8 sfp_ct_active:1; + u8 sfp_ct_passive:1; + u8 unallocated_8_1:1; + u8 unallocated_8_0:1; + + u8 fc_media_tw:1; + u8 fc_media_tp:1; + u8 fc_media_mi:1; + u8 fc_media_tv:1; + u8 fc_media_m6:1; + u8 fc_media_m5:1; + u8 unallocated_9_1:1; + u8 fc_media_sm:1; + + u8 fc_speed_1200:1; + u8 fc_speed_800:1; + u8 fc_speed_1600:1; + u8 fc_speed_400:1; + u8 fc_speed_3200:1; + u8 fc_speed_200:1; + u8 unallocated_10_1:1; + u8 fc_speed_100:1; +#elif defined __LITTLE_ENDIAN_BITFIELD + u8 if_1x_copper_passive:1; + u8 if_1x_copper_active:1; + u8 if_1x_lx:1; + u8 if_1x_sx:1; + u8 e10g_base_sr:1; + u8 e10g_base_lr:1; + u8 e10g_base_lrm:1; + u8 e10g_base_er:1; + + u8 sonet_oc3_short_reach:1; + u8 sonet_oc3_smf_intermediate_reach:1; + u8 sonet_oc3_smf_long_reach:1; + u8 unallocated_5_3:1; + u8 sonet_oc12_short_reach:1; + u8 sonet_oc12_smf_intermediate_reach:1; + u8 sonet_oc12_smf_long_reach:1; + u8 unallocated_5_7:1; + + u8 sonet_oc48_short_reach:1; + u8 sonet_oc48_intermediate_reach:1; + u8 sonet_oc48_long_reach:1; + u8 sonet_reach_bit2:1; + u8 sonet_reach_bit1:1; + u8 sonet_oc192_short_reach:1; + u8 escon_smf_1310_laser:1; + u8 escon_mmf_1310_led:1; + + u8 e1000_base_sx:1; + u8 e1000_base_lx:1; + u8 e1000_base_cx:1; + u8 e1000_base_t:1; + u8 e100_base_lx:1; + u8 e100_base_fx:1; + u8 e_base_bx10:1; + u8 e_base_px:1; + + u8 fc_tech_electrical_inter_enclosure:1; + u8 fc_tech_lc:1; + u8 fc_tech_sa:1; + u8 fc_ll_m:1; + u8 fc_ll_l:1; + u8 fc_ll_i:1; + u8 fc_ll_s:1; + u8 fc_ll_v:1; + + u8 unallocated_8_0:1; + u8 unallocated_8_1:1; + u8 sfp_ct_passive:1; + u8 sfp_ct_active:1; + u8 fc_tech_ll:1; + u8 fc_tech_sl:1; + u8 fc_tech_sn:1; + u8 fc_tech_electrical_intra_enclosure:1; + + u8 fc_media_sm:1; + u8 unallocated_9_1:1; + u8 fc_media_m5:1; + u8 fc_media_m6:1; + u8 fc_media_tv:1; + u8 fc_media_mi:1; + u8 fc_media_tp:1; + u8 fc_media_tw:1; + + u8 fc_speed_100:1; + u8 unallocated_10_1:1; + u8 fc_speed_200:1; + u8 fc_speed_3200:1; + u8 fc_speed_400:1; + u8 fc_speed_1600:1; + u8 fc_speed_800:1; + u8 fc_speed_1200:1; +#else +#error Unknown Endian +#endif + u8 encoding; + u8 br_nominal; + u8 rate_id; + u8 link_len[6]; + char vendor_name[16]; + u8 extended_cc; + char vendor_oui[3]; + char vendor_pn[16]; + char vendor_rev[4]; + union { + __be16 optical_wavelength; + u8 cable_spec; + }; + u8 reserved62; + u8 cc_base; +}; + +struct __packed sfp_eeprom_ext { + __be16 options; + u8 br_max; + u8 br_min; + char vendor_sn[16]; + char datecode[8]; + u8 diagmon; + u8 enhopts; + u8 sff8472_compliance; + u8 cc_ext; +}; + +struct __packed sfp_eeprom_id { + struct sfp_eeprom_base base; + struct sfp_eeprom_ext ext; +}; + +/* SFP EEPROM registers */ +enum { + SFP_PHYS_ID = 0x00, + SFP_PHYS_EXT_ID = 0x01, + SFP_CONNECTOR = 0x02, + SFP_COMPLIANCE = 0x03, + SFP_ENCODING = 0x0b, + SFP_BR_NOMINAL = 0x0c, + SFP_RATE_ID = 0x0d, + SFP_LINK_LEN_SM_KM = 0x0e, + SFP_LINK_LEN_SM_100M = 0x0f, + SFP_LINK_LEN_50UM_OM2_10M = 0x10, + SFP_LINK_LEN_62_5UM_OM1_10M = 0x11, + SFP_LINK_LEN_COPPER_1M = 0x12, + SFP_LINK_LEN_50UM_OM4_10M = 0x12, + SFP_LINK_LEN_50UM_OM3_10M = 0x13, + SFP_VENDOR_NAME = 0x14, + SFP_VENDOR_OUI = 0x25, + SFP_VENDOR_PN = 0x28, + SFP_VENDOR_REV = 0x38, + SFP_OPTICAL_WAVELENGTH_MSB = 0x3c, + SFP_OPTICAL_WAVELENGTH_LSB = 0x3d, + SFP_CABLE_SPEC = 0x3c, + SFP_CC_BASE = 0x3f, + SFP_OPTIONS = 0x40, /* 2 bytes, MSB, LSB */ + SFP_BR_MAX = 0x42, + SFP_BR_MIN = 0x43, + SFP_VENDOR_SN = 0x44, + SFP_DATECODE = 0x54, + SFP_DIAGMON = 0x5c, + SFP_ENHOPTS = 0x5d, + SFP_SFF8472_COMPLIANCE = 0x5e, + SFP_CC_EXT = 0x5f, + + SFP_PHYS_ID_SFP = 0x03, + SFP_PHYS_EXT_ID_SFP = 0x04, + SFP_CONNECTOR_UNSPEC = 0x00, + /* codes 01-05 not supportable on SFP, but some modules have single SC */ + SFP_CONNECTOR_SC = 0x01, + SFP_CONNECTOR_FIBERJACK = 0x06, + SFP_CONNECTOR_LC = 0x07, + SFP_CONNECTOR_MT_RJ = 0x08, + SFP_CONNECTOR_MU = 0x09, + SFP_CONNECTOR_SG = 0x0a, + SFP_CONNECTOR_OPTICAL_PIGTAIL = 0x0b, + SFP_CONNECTOR_MPO_1X12 = 0x0c, + SFP_CONNECTOR_MPO_2X16 = 0x0d, + SFP_CONNECTOR_HSSDC_II = 0x20, + SFP_CONNECTOR_COPPER_PIGTAIL = 0x21, + SFP_CONNECTOR_RJ45 = 0x22, + SFP_CONNECTOR_NOSEPARATE = 0x23, + SFP_CONNECTOR_MXC_2X16 = 0x24, + SFP_ENCODING_UNSPEC = 0x00, + SFP_ENCODING_8B10B = 0x01, + SFP_ENCODING_4B5B = 0x02, + SFP_ENCODING_NRZ = 0x03, + SFP_ENCODING_8472_MANCHESTER = 0x04, + SFP_ENCODING_8472_SONET = 0x05, + SFP_ENCODING_8472_64B66B = 0x06, + SFP_ENCODING_256B257B = 0x07, + SFP_ENCODING_PAM4 = 0x08, + SFP_OPTIONS_HIGH_POWER_LEVEL = BIT(13), + SFP_OPTIONS_PAGING_A2 = BIT(12), + SFP_OPTIONS_RETIMER = BIT(11), + SFP_OPTIONS_COOLED_XCVR = BIT(10), + SFP_OPTIONS_POWER_DECL = BIT(9), + SFP_OPTIONS_RX_LINEAR_OUT = BIT(8), + SFP_OPTIONS_RX_DECISION_THRESH = BIT(7), + SFP_OPTIONS_TUNABLE_TX = BIT(6), + SFP_OPTIONS_RATE_SELECT = BIT(5), + SFP_OPTIONS_TX_DISABLE = BIT(4), + SFP_OPTIONS_TX_FAULT = BIT(3), + SFP_OPTIONS_LOS_INVERTED = BIT(2), + SFP_OPTIONS_LOS_NORMAL = BIT(1), + SFP_DIAGMON_DDM = BIT(6), + SFP_DIAGMON_INT_CAL = BIT(5), + SFP_DIAGMON_EXT_CAL = BIT(4), + SFP_DIAGMON_RXPWR_AVG = BIT(3), + SFP_DIAGMON_ADDRMODE = BIT(2), + SFP_ENHOPTS_ALARMWARN = BIT(7), + SFP_ENHOPTS_SOFT_TX_DISABLE = BIT(6), + SFP_ENHOPTS_SOFT_TX_FAULT = BIT(5), + SFP_ENHOPTS_SOFT_RX_LOS = BIT(4), + SFP_ENHOPTS_SOFT_RATE_SELECT = BIT(3), + SFP_ENHOPTS_APP_SELECT_SFF8079 = BIT(2), + SFP_ENHOPTS_SOFT_RATE_SFF8431 = BIT(1), + SFP_SFF8472_COMPLIANCE_NONE = 0x00, + SFP_SFF8472_COMPLIANCE_REV9_3 = 0x01, + SFP_SFF8472_COMPLIANCE_REV9_5 = 0x02, + SFP_SFF8472_COMPLIANCE_REV10_2 = 0x03, + SFP_SFF8472_COMPLIANCE_REV10_4 = 0x04, + SFP_SFF8472_COMPLIANCE_REV11_0 = 0x05, + SFP_SFF8472_COMPLIANCE_REV11_3 = 0x06, + SFP_SFF8472_COMPLIANCE_REV11_4 = 0x07, + SFP_SFF8472_COMPLIANCE_REV12_0 = 0x08, +}; + +/* SFP Diagnostics */ +enum { + /* Alarm and warnings stored MSB at lower address then LSB */ + SFP_TEMP_HIGH_ALARM = 0x00, + SFP_TEMP_LOW_ALARM = 0x02, + SFP_TEMP_HIGH_WARN = 0x04, + SFP_TEMP_LOW_WARN = 0x06, + SFP_VOLT_HIGH_ALARM = 0x08, + SFP_VOLT_LOW_ALARM = 0x0a, + SFP_VOLT_HIGH_WARN = 0x0c, + SFP_VOLT_LOW_WARN = 0x0e, + SFP_BIAS_HIGH_ALARM = 0x10, + SFP_BIAS_LOW_ALARM = 0x12, + SFP_BIAS_HIGH_WARN = 0x14, + SFP_BIAS_LOW_WARN = 0x16, + SFP_TXPWR_HIGH_ALARM = 0x18, + SFP_TXPWR_LOW_ALARM = 0x1a, + SFP_TXPWR_HIGH_WARN = 0x1c, + SFP_TXPWR_LOW_WARN = 0x1e, + SFP_RXPWR_HIGH_ALARM = 0x20, + SFP_RXPWR_LOW_ALARM = 0x22, + SFP_RXPWR_HIGH_WARN = 0x24, + SFP_RXPWR_LOW_WARN = 0x26, + SFP_LASER_TEMP_HIGH_ALARM = 0x28, + SFP_LASER_TEMP_LOW_ALARM = 0x2a, + SFP_LASER_TEMP_HIGH_WARN = 0x2c, + SFP_LASER_TEMP_LOW_WARN = 0x2e, + SFP_TEC_CUR_HIGH_ALARM = 0x30, + SFP_TEC_CUR_LOW_ALARM = 0x32, + SFP_TEC_CUR_HIGH_WARN = 0x34, + SFP_TEC_CUR_LOW_WARN = 0x36, + SFP_CAL_RXPWR4 = 0x38, + SFP_CAL_RXPWR3 = 0x3c, + SFP_CAL_RXPWR2 = 0x40, + SFP_CAL_RXPWR1 = 0x44, + SFP_CAL_RXPWR0 = 0x48, + SFP_CAL_TXI_SLOPE = 0x4c, + SFP_CAL_TXI_OFFSET = 0x4e, + SFP_CAL_TXPWR_SLOPE = 0x50, + SFP_CAL_TXPWR_OFFSET = 0x52, + SFP_CAL_T_SLOPE = 0x54, + SFP_CAL_T_OFFSET = 0x56, + SFP_CAL_V_SLOPE = 0x58, + SFP_CAL_V_OFFSET = 0x5a, + SFP_CHKSUM = 0x5f, + + SFP_TEMP = 0x60, + SFP_VCC = 0x62, + SFP_TX_BIAS = 0x64, + SFP_TX_POWER = 0x66, + SFP_RX_POWER = 0x68, + SFP_LASER_TEMP = 0x6a, + SFP_TEC_CUR = 0x6c, + + SFP_STATUS = 0x6e, + SFP_ALARM = 0x70, + + SFP_EXT_STATUS = 0x76, + SFP_VSL = 0x78, + SFP_PAGE = 0x7f, +}; + +struct device_node; +struct ethtool_eeprom; +struct ethtool_modinfo; +struct net_device; +struct sfp_bus; + +struct sfp_upstream_ops { + int (*module_insert)(void *, const struct sfp_eeprom_id *id); + void (*module_remove)(void *); + void (*link_down)(void *); + void (*link_up)(void *); + int (*connect_phy)(void *, struct phy_device *); + void (*disconnect_phy)(void *); +}; + +#if IS_ENABLED(CONFIG_SFP) +int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, + unsigned long *support); +phy_interface_t sfp_parse_interface(struct sfp_bus *bus, + const struct sfp_eeprom_id *id); +void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, + unsigned long *support); + +int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo); +int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, + u8 *data); +void sfp_upstream_start(struct sfp_bus *bus); +void sfp_upstream_stop(struct sfp_bus *bus); +struct sfp_bus *sfp_register_upstream(struct device_node *np, + struct net_device *ndev, void *upstream, + const struct sfp_upstream_ops *ops); +void sfp_unregister_upstream(struct sfp_bus *bus); +#else +static inline int sfp_parse_port(struct sfp_bus *bus, + const struct sfp_eeprom_id *id, + unsigned long *support) +{ + return PORT_OTHER; +} + +static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus, + const struct sfp_eeprom_id *id) +{ + return PHY_INTERFACE_MODE_NA; +} + +static inline void sfp_parse_support(struct sfp_bus *bus, + const struct sfp_eeprom_id *id, + unsigned long *support) +{ +} + +static inline int sfp_get_module_info(struct sfp_bus *bus, + struct ethtool_modinfo *modinfo) +{ + return -EOPNOTSUPP; +} + +static inline int sfp_get_module_eeprom(struct sfp_bus *bus, + struct ethtool_eeprom *ee, u8 *data) +{ + return -EOPNOTSUPP; +} + +static inline void sfp_upstream_start(struct sfp_bus *bus) +{ +} + +static inline void sfp_upstream_stop(struct sfp_bus *bus) +{ +} + +static inline struct sfp_bus *sfp_register_upstream(struct device_node *np, + struct net_device *ndev, void *upstream, + const struct sfp_upstream_ops *ops) +{ + return (struct sfp_bus *)-1; +} + +static inline void sfp_unregister_upstream(struct sfp_bus *bus) +{ +} +#endif + +#endif -- cgit v1.2.3 From 770a1ad55763a8a783cb71078e0b33a6b91ad92b Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jul 2017 15:03:23 +0100 Subject: phylink: add module EEPROM support Add support for reading module EEPROMs through phylink. Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 76f054f39684..af67edd4ae38 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -125,6 +125,9 @@ void phylink_ethtool_get_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); +int phylink_ethtool_get_module_info(struct phylink *, struct ethtool_modinfo *); +int phylink_ethtool_get_module_eeprom(struct phylink *, + struct ethtool_eeprom *, u8 *); int phylink_init_eee(struct phylink *, bool); int phylink_get_eee_err(struct phylink *); int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); -- cgit v1.2.3 From 4faf783998b8cb88294e9df89032f473f8771b78 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 3 Aug 2017 20:38:51 -0700 Subject: tcp: fix cwnd undo in Reno and HTCP congestion controls Using ssthresh to revert cwnd is less reliable when ssthresh is bounded to 2 packets. This patch uses an existing variable in TCP "prior_cwnd" that snapshots the cwnd right before entering fast recovery and RTO recovery in Reno. This fixes the issue discussed in netdev thread: "A buggy behavior for Linux TCP Reno and HTCP" https://www.spinics.net/lists/netdev/msg444955.html Suggested-by: Neal Cardwell Reported-by: Wei Sun Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d7389ea36e10..267164a1d559 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,7 +258,7 @@ struct tcp_sock { u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; - u32 prior_cwnd; /* Congestion window at start of Recovery. */ + u32 prior_cwnd; /* cwnd right before starting loss recovery */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ -- cgit v1.2.3 From 233e7936c84b7d7dd90c10e2ba27abb5ab42956f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:51 +0800 Subject: sctp: remove the typedef sctp_lower_cwnd_t This patch is to remove the typedef sctp_lower_cwnd_t, and replace with enum sctp_lower_cwnd in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/structs.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 9b18044c551e..761064ee3ac5 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -378,12 +378,12 @@ typedef enum { } sctp_retransmit_reason_t; /* Reasons to lower cwnd. */ -typedef enum { +enum sctp_lower_cwnd { SCTP_LOWER_CWND_T3_RTX, SCTP_LOWER_CWND_FAST_RTX, SCTP_LOWER_CWND_ECNE, SCTP_LOWER_CWND_INACTIVE, -} sctp_lower_cwnd_t; +}; /* SCTP-AUTH Necessary constants */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 66cd7639b912..53802d8872d7 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -950,7 +950,8 @@ int sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32); -void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t); +void sctp_transport_lower_cwnd(struct sctp_transport *t, + enum sctp_lower_cwnd reason); void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); -- cgit v1.2.3 From 125c29820252bfa5bf8081e75618e4ee7e9487da Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:52 +0800 Subject: sctp: remove the typedef sctp_retransmit_reason_t This patch is to remove the typedef sctp_retransmit_reason_t, and replace with enum sctp_retransmit_reason in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/structs.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 761064ee3ac5..922fba5880d6 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -370,12 +370,12 @@ typedef enum { peer */ /* Reasons to retransmit. */ -typedef enum { +enum sctp_retransmit_reason { SCTP_RTXR_T3_RTX, SCTP_RTXR_FAST_RTX, SCTP_RTXR_PMTUD, SCTP_RTXR_T1_RTX, -} sctp_retransmit_reason_t; +}; /* Reasons to lower cwnd. */ enum sctp_lower_cwnd { diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 53802d8872d7..5e872acee6e3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1054,8 +1054,8 @@ int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *); int sctp_outq_is_empty(const struct sctp_outq *); void sctp_outq_restart(struct sctp_outq *); -void sctp_retransmit(struct sctp_outq *, struct sctp_transport *, - sctp_retransmit_reason_t); +void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, + enum sctp_retransmit_reason reason); void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); void sctp_prsctp_prune(struct sctp_association *asoc, -- cgit v1.2.3 From 701ef3e6c74be771a76be39817941e68e7228644 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:53 +0800 Subject: sctp: remove the typedef sctp_scope_policy_t This patch is to remove the typedef sctp_scope_policy_t and keep it's members as an anonymous enum. It is also to define SCTP_SCOPE_POLICY_MAX to replace the num 3 in sysctl.c to make codes clear. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 922fba5880d6..acb03eb64842 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -341,12 +341,14 @@ typedef enum { SCTP_SCOPE_UNUSABLE, /* IPv4 unusable addresses */ } sctp_scope_t; -typedef enum { +enum { SCTP_SCOPE_POLICY_DISABLE, /* Disable IPv4 address scoping */ SCTP_SCOPE_POLICY_ENABLE, /* Enable IPv4 address scoping */ SCTP_SCOPE_POLICY_PRIVATE, /* Follow draft but allow IPv4 private addresses */ SCTP_SCOPE_POLICY_LINK, /* Follow draft but allow IPv4 link local addresses */ -} sctp_scope_policy_t; +}; + +#define SCTP_SCOPE_POLICY_MAX SCTP_SCOPE_POLICY_LINK /* Based on IPv4 scoping , * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24, -- cgit v1.2.3 From 1c662018d2d41ecc5550cbd74d29d2d32c164ed3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:54 +0800 Subject: sctp: remove the typedef sctp_scope_t This patch is to remove the typedef sctp_scope_t, and replace with enum sctp_scope in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/sctp.h | 4 ++-- include/net/sctp/structs.h | 17 +++++++++-------- 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index acb03eb64842..0503bb70e5d9 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -333,13 +333,13 @@ typedef enum { * At this point, the IPv6 scopes will be mapped to these internal scopes * as much as possible. */ -typedef enum { +enum sctp_scope { SCTP_SCOPE_GLOBAL, /* IPv4 global addresses */ SCTP_SCOPE_PRIVATE, /* IPv4 private addresses */ SCTP_SCOPE_LINK, /* IPv4 link local address */ SCTP_SCOPE_LOOPBACK, /* IPv4 loopback address */ SCTP_SCOPE_UNUSABLE, /* IPv4 unusable addresses */ -} sctp_scope_t; +}; enum { SCTP_SCOPE_POLICY_DISABLE, /* Disable IPv4 address scoping */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 84650fed1e6a..ca66b033ec38 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -94,8 +94,8 @@ /* * sctp/protocol.c */ -int sctp_copy_local_addr_list(struct net *, struct sctp_bind_addr *, - sctp_scope_t, gfp_t gfp, int flags); +int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *addr, + enum sctp_scope, gfp_t gfp, int flags); struct sctp_pf *sctp_get_pf_specific(sa_family_t family); int sctp_register_pf(struct sctp_pf *, sa_family_t); void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 5e872acee6e3..d771d418481f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -449,7 +449,7 @@ struct sctp_af { int (*addr_valid) (union sctp_addr *, struct sctp_sock *, const struct sk_buff *); - sctp_scope_t (*scope) (union sctp_addr *); + enum sctp_scope (*scope)(union sctp_addr *); void (*inaddr_any) (union sctp_addr *, __be16); int (*is_any) (const union sctp_addr *); int (*available) (union sctp_addr *, @@ -1111,7 +1111,7 @@ void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port); void sctp_bind_addr_free(struct sctp_bind_addr *); int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, - sctp_scope_t scope, gfp_t gfp, + enum sctp_scope scope, gfp_t gfp, int flags); int sctp_bind_addr_dup(struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, @@ -1135,8 +1135,9 @@ union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp, int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len, __u16 port, gfp_t gfp); -sctp_scope_t sctp_scope(const union sctp_addr *); -int sctp_in_scope(struct net *net, const union sctp_addr *addr, const sctp_scope_t scope); +enum sctp_scope sctp_scope(const union sctp_addr *addr); +int sctp_in_scope(struct net *net, const union sctp_addr *addr, + const enum sctp_scope scope); int sctp_is_any(struct sock *sk, const union sctp_addr *addr); int sctp_is_ep_boundall(struct sock *sk); @@ -1925,8 +1926,8 @@ static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base) struct sctp_association * -sctp_association_new(const struct sctp_endpoint *, const struct sock *, - sctp_scope_t scope, gfp_t gfp); +sctp_association_new(const struct sctp_endpoint *ep, const struct sock *sk, + enum sctp_scope scope, gfp_t gfp); void sctp_association_free(struct sctp_association *); void sctp_association_put(struct sctp_association *); void sctp_association_hold(struct sctp_association *); @@ -1967,8 +1968,8 @@ void sctp_assoc_set_primary(struct sctp_association *, struct sctp_transport *); void sctp_assoc_del_nonprimary_peers(struct sctp_association *, struct sctp_transport *); -int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *, - sctp_scope_t, gfp_t); +int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, + enum sctp_scope scope, gfp_t gfp); int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *, struct sctp_cookie*, gfp_t gfp); -- cgit v1.2.3 From 0ceaeebe28d46e49c865f88a3e3ca75f6cf13e1f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:55 +0800 Subject: sctp: remove the typedef sctp_transport_cmd_t This patch is to remove the typedef sctp_transport_cmd_t, and replace with enum sctp_transport_cmd in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/structs.h | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 0503bb70e5d9..8ce6d3263e41 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -320,11 +320,11 @@ typedef enum { } sctp_xmit_t; /* These are the commands for manipulating transports. */ -typedef enum { +enum sctp_transport_cmd { SCTP_TRANSPORT_UP, SCTP_TRANSPORT_DOWN, SCTP_TRANSPORT_PF, -} sctp_transport_cmd_t; +}; /* These are the address scopes defined mainly for IPv4 addresses * based on draft of SCTP IPv4 scoping . diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d771d418481f..d098d1c4ed74 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1947,9 +1947,10 @@ void sctp_assoc_del_peer(struct sctp_association *asoc, const union sctp_addr *addr); void sctp_assoc_rm_peer(struct sctp_association *asoc, struct sctp_transport *peer); -void sctp_assoc_control_transport(struct sctp_association *, - struct sctp_transport *, - sctp_transport_cmd_t, sctp_sn_error_t); +void sctp_assoc_control_transport(struct sctp_association *asoc, + struct sctp_transport *transport, + enum sctp_transport_cmd command, + sctp_sn_error_t error); struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32); struct sctp_transport *sctp_assoc_is_match(struct sctp_association *, struct net *, -- cgit v1.2.3 From 8496561430df6e2c21b4ed37bd93604d3acaf5d6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:56 +0800 Subject: sctp: remove the typedef sctp_sock_state_t This patch is to remove the typedef sctp_sock_state_t, and replace with enum sctp_sock_state in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 6 +++--- include/net/sctp/sctp.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 8ce6d3263e41..049868ea7ae9 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -42,7 +42,7 @@ #include #include /* For ipv6hdr. */ -#include /* For TCP states used in sctp_sock_state_t */ +#include /* For TCP states used in enum sctp_sock_state */ /* Value used for stream negotiation. */ enum { SCTP_MAX_STREAM = 0xffff }; @@ -214,13 +214,13 @@ typedef enum { * - A socket in SCTP_SS_ESTABLISHED state indicates that it has a single * association. */ -typedef enum { +enum sctp_sock_state { SCTP_SS_CLOSED = TCP_CLOSE, SCTP_SS_LISTENING = TCP_LISTEN, SCTP_SS_ESTABLISHING = TCP_SYN_SENT, SCTP_SS_ESTABLISHED = TCP_ESTABLISHED, SCTP_SS_CLOSING = TCP_CLOSE_WAIT, -} sctp_sock_state_t; +}; /* These functions map various type to printable names. */ const char *sctp_cname(const sctp_subtype_t); /* chunk types */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ca66b033ec38..0022bc713434 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -565,7 +565,8 @@ static inline int __sctp_state(const struct sctp_association *asoc, /* Is the socket in this state? */ #define sctp_sstate(sk, state) __sctp_sstate((sk), (SCTP_SS_##state)) -static inline int __sctp_sstate(const struct sock *sk, sctp_sock_state_t state) +static inline int __sctp_sstate(const struct sock *sk, + enum sctp_sock_state state) { return sk->sk_state == state; } -- cgit v1.2.3 From 86b36f2a9b9ea58dd2100b0e6f1f45a1f67ee95e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:57 +0800 Subject: sctp: remove the typedef sctp_xmit_t This patch is to remove the typedef sctp_xmit_t, and replace with enum sctp_xmit in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/structs.h | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 049868ea7ae9..311b2e3773e8 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -312,12 +312,12 @@ enum { SCTP_MAX_GABS = 16 }; /* These return values describe the success or failure of a number of * routines which form the lower interface to SCTP_outqueue. */ -typedef enum { +enum sctp_xmit { SCTP_XMIT_OK, SCTP_XMIT_PMTU_FULL, SCTP_XMIT_RWND_FULL, SCTP_XMIT_DELAY, -} sctp_xmit_t; +}; /* These are the commands for manipulating transports. */ enum sctp_transport_cmd { diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d098d1c4ed74..6fab67e7f1e5 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -697,10 +697,11 @@ struct sctp_packet { void sctp_packet_init(struct sctp_packet *, struct sctp_transport *, __u16 sport, __u16 dport); void sctp_packet_config(struct sctp_packet *, __u32 vtag, int); -sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *, - struct sctp_chunk *, int, gfp_t); -sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *, - struct sctp_chunk *); +enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk, + int one_packet, gfp_t gfp); +enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk); int sctp_packet_transmit(struct sctp_packet *, gfp_t); void sctp_packet_free(struct sctp_packet *); -- cgit v1.2.3 From 4785c7ae1848244da3435ba5269f6288c15975c7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:58 +0800 Subject: sctp: remove the typedef sctp_ierror_t This patch is to remove the typedef sctp_ierror_t, and replace with enum sctp_ierror in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 311b2e3773e8..9a694653b708 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -155,8 +155,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) - sizeof(struct sctp_data_chunk))) /* Internal error codes */ -typedef enum { - +enum sctp_ierror { SCTP_IERROR_NO_ERROR = 0, SCTP_IERROR_BASE = 1000, SCTP_IERROR_NO_COOKIE, @@ -177,7 +176,7 @@ typedef enum { SCTP_IERROR_PROTO_VIOLATION, SCTP_IERROR_ERROR, SCTP_IERROR_ABORT, -} sctp_ierror_t; +}; -- cgit v1.2.3 From 5210601945f5aedaf2d7f13a88436e27a39c6a8a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:59 +0800 Subject: sctp: remove the typedef sctp_state_t This patch is to remove the typedef sctp_state_t, and replace with enum sctp_state in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 4 ++-- include/net/sctp/constants.h | 4 ++-- include/net/sctp/sctp.h | 2 +- include/net/sctp/sm.h | 10 +++++----- include/net/sctp/structs.h | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 1d5f6ff3f440..be12ec946628 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -126,7 +126,7 @@ typedef union { __u8 u8; int error; __be16 err; - sctp_state_t state; + enum sctp_state state; sctp_event_timeout_t to; struct sctp_chunk *chunk; struct sctp_association *asoc; @@ -167,7 +167,7 @@ SCTP_ARG_CONSTRUCTOR(U16, __u16, u16) SCTP_ARG_CONSTRUCTOR(U8, __u8, u8) SCTP_ARG_CONSTRUCTOR(ERROR, int, error) SCTP_ARG_CONSTRUCTOR(PERR, __be16, err) /* protocol error */ -SCTP_ARG_CONSTRUCTOR(STATE, sctp_state_t, state) +SCTP_ARG_CONSTRUCTOR(STATE, enum sctp_state, state) SCTP_ARG_CONSTRUCTOR(TO, sctp_event_timeout_t, to) SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) SCTP_ARG_CONSTRUCTOR(ASOC, struct sctp_association *, asoc) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 9a694653b708..db9f40b657ba 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -181,7 +181,7 @@ enum sctp_ierror { /* SCTP state defines for internal state machine */ -typedef enum { +enum sctp_state { SCTP_STATE_CLOSED = 0, SCTP_STATE_COOKIE_WAIT = 1, @@ -192,7 +192,7 @@ typedef enum { SCTP_STATE_SHUTDOWN_RECEIVED = 6, SCTP_STATE_SHUTDOWN_ACK_SENT = 7, -} sctp_state_t; +}; #define SCTP_STATE_MAX SCTP_STATE_SHUTDOWN_ACK_SENT #define SCTP_STATE_NUM_STATES (SCTP_STATE_MAX + 1) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 0022bc713434..24ff7931d38c 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -558,7 +558,7 @@ static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style) /* Is the association in this state? */ #define sctp_state(asoc, state) __sctp_state((asoc), (SCTP_STATE_##state)) static inline int __sctp_state(const struct sctp_association *asoc, - sctp_state_t state) + enum sctp_state state) { return asoc->state == state; } diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 860f378333b5..281e8d1e0752 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -175,10 +175,10 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); -const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *, - sctp_event_t, - sctp_state_t, - sctp_subtype_t); +const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, + sctp_event_t event_type, + enum sctp_state state, + sctp_subtype_t event_subtype); int sctp_chunk_iif(const struct sctp_chunk *); struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *, struct sctp_chunk *, @@ -313,7 +313,7 @@ struct sctp_chunk *sctp_process_strreset_resp( /* Prototypes for statetable processing. */ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, - sctp_state_t state, + enum sctp_state state, struct sctp_endpoint *, struct sctp_association *asoc, void *event_arg, diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6fab67e7f1e5..fbe6e81b889b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1577,7 +1577,7 @@ struct sctp_association { * * State takes values from SCTP_STATE_*. */ - sctp_state_t state; + enum sctp_state state; /* Overall : The overall association error count. * Error Count : [Clear this any time I get something.] -- cgit v1.2.3 From dc1e0e6eb8b2e2c6b5c6cdfa4f0cc789e9516de5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:00 +0800 Subject: sctp: remove the typedef sctp_event_primitive_t This patch is to remove the typedef sctp_event_primitive_t, and replace with enum sctp_event_primitive in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index db9f40b657ba..162ee954a6af 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -110,7 +110,7 @@ typedef enum { #define SCTP_NUM_OTHER_TYPES (SCTP_EVENT_OTHER_MAX + 1) /* These are primitive requests from the ULP. */ -typedef enum { +enum sctp_event_primitive { SCTP_PRIMITIVE_ASSOCIATE = 0, SCTP_PRIMITIVE_SHUTDOWN, SCTP_PRIMITIVE_ABORT, @@ -118,7 +118,7 @@ typedef enum { SCTP_PRIMITIVE_REQUESTHEARTBEAT, SCTP_PRIMITIVE_ASCONF, SCTP_PRIMITIVE_RECONF, -} sctp_event_primitive_t; +}; #define SCTP_EVENT_PRIMITIVE_MAX SCTP_PRIMITIVE_RECONF #define SCTP_NUM_PRIMITIVE_TYPES (SCTP_EVENT_PRIMITIVE_MAX + 1) @@ -133,7 +133,7 @@ typedef union { enum sctp_cid chunk; sctp_event_timeout_t timeout; sctp_event_other_t other; - sctp_event_primitive_t primitive; + enum sctp_event_primitive primitive; } sctp_subtype_t; #define SCTP_SUBTYPE_CONSTRUCTOR(_name, _type, _elt) \ @@ -144,7 +144,7 @@ SCTP_ST_## _name (_type _arg) \ SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) SCTP_SUBTYPE_CONSTRUCTOR(OTHER, sctp_event_other_t, other) -SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) +SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) #define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA) -- cgit v1.2.3 From a0f098d0385a40f9c4c8a2ce48d075f77ea7edd8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:01 +0800 Subject: sctp: remove the typedef sctp_event_other_t This patch is to remove the typedef sctp_event_other_t, and replace with enum sctp_event_other in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 162ee954a6af..fd8a80ec573f 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -101,10 +101,10 @@ typedef enum { #define SCTP_EVENT_TIMEOUT_MAX SCTP_EVENT_TIMEOUT_AUTOCLOSE #define SCTP_NUM_TIMEOUT_TYPES (SCTP_EVENT_TIMEOUT_MAX + 1) -typedef enum { +enum sctp_event_other { SCTP_EVENT_NO_PENDING_TSN = 0, SCTP_EVENT_ICMP_PROTO_UNREACH, -} sctp_event_other_t; +}; #define SCTP_EVENT_OTHER_MAX SCTP_EVENT_ICMP_PROTO_UNREACH #define SCTP_NUM_OTHER_TYPES (SCTP_EVENT_OTHER_MAX + 1) @@ -132,7 +132,7 @@ enum sctp_event_primitive { typedef union { enum sctp_cid chunk; sctp_event_timeout_t timeout; - sctp_event_other_t other; + enum sctp_event_other other; enum sctp_event_primitive primitive; } sctp_subtype_t; @@ -143,7 +143,7 @@ SCTP_ST_## _name (_type _arg) \ SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) -SCTP_SUBTYPE_CONSTRUCTOR(OTHER, sctp_event_other_t, other) +SCTP_SUBTYPE_CONSTRUCTOR(OTHER, enum sctp_event_other, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) -- cgit v1.2.3 From 19cd1592a24754e16d48398812d5f69b63f674dd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:02 +0800 Subject: sctp: remove the typedef sctp_event_timeout_t This patch is to remove the typedef sctp_event_timeout_t, and replace with enum sctp_event_timeout in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 4 ++-- include/net/sctp/constants.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index be12ec946628..376cb78b6247 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -127,7 +127,7 @@ typedef union { int error; __be16 err; enum sctp_state state; - sctp_event_timeout_t to; + enum sctp_event_timeout to; struct sctp_chunk *chunk; struct sctp_association *asoc; struct sctp_transport *transport; @@ -168,7 +168,7 @@ SCTP_ARG_CONSTRUCTOR(U8, __u8, u8) SCTP_ARG_CONSTRUCTOR(ERROR, int, error) SCTP_ARG_CONSTRUCTOR(PERR, __be16, err) /* protocol error */ SCTP_ARG_CONSTRUCTOR(STATE, enum sctp_state, state) -SCTP_ARG_CONSTRUCTOR(TO, sctp_event_timeout_t, to) +SCTP_ARG_CONSTRUCTOR(TO, enum sctp_event_timeout, to) SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) SCTP_ARG_CONSTRUCTOR(ASOC, struct sctp_association *, asoc) SCTP_ARG_CONSTRUCTOR(TRANSPORT, struct sctp_transport *, transport) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index fd8a80ec573f..fb931f07e83a 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -84,7 +84,7 @@ typedef enum { * SCTP_ULP_* to the list of possible chunks. */ -typedef enum { +enum sctp_event_timeout { SCTP_EVENT_TIMEOUT_NONE = 0, SCTP_EVENT_TIMEOUT_T1_COOKIE, SCTP_EVENT_TIMEOUT_T1_INIT, @@ -96,7 +96,7 @@ typedef enum { SCTP_EVENT_TIMEOUT_RECONF, SCTP_EVENT_TIMEOUT_SACK, SCTP_EVENT_TIMEOUT_AUTOCLOSE, -} sctp_event_timeout_t; +}; #define SCTP_EVENT_TIMEOUT_MAX SCTP_EVENT_TIMEOUT_AUTOCLOSE #define SCTP_NUM_TIMEOUT_TYPES (SCTP_EVENT_TIMEOUT_MAX + 1) @@ -131,7 +131,7 @@ enum sctp_event_primitive { typedef union { enum sctp_cid chunk; - sctp_event_timeout_t timeout; + enum sctp_event_timeout timeout; enum sctp_event_other other; enum sctp_event_primitive primitive; } sctp_subtype_t; @@ -142,7 +142,7 @@ SCTP_ST_## _name (_type _arg) \ { sctp_subtype_t _retval; _retval._elt = _arg; return _retval; } SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) -SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) +SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, enum sctp_event_timeout, timeout) SCTP_SUBTYPE_CONSTRUCTOR(OTHER, enum sctp_event_other, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) -- cgit v1.2.3 From 61f0eb072294a148f707335d4d7f858b2af73770 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:03 +0800 Subject: sctp: remove the typedef sctp_event_t This patch is to remove the typedef sctp_event_t, and replace with enum sctp_event in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 6 ++---- include/net/sctp/sm.h | 12 +++++------- 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index fb931f07e83a..3181e0f95b60 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -71,14 +71,12 @@ enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM }; SCTP_NUM_AUTH_CHUNK_TYPES) /* These are the different flavours of event. */ -typedef enum { - +enum sctp_event { SCTP_EVENT_T_CHUNK = 1, SCTP_EVENT_T_TIMEOUT, SCTP_EVENT_T_OTHER, SCTP_EVENT_T_PRIMITIVE - -} sctp_event_t; +}; /* As a convenience for the state machine, we append SCTP_EVENT_* and * SCTP_ULP_* to the list of possible chunks. diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 281e8d1e0752..96f54cff7964 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -176,7 +176,7 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, - sctp_event_t event_type, + enum sctp_event event_type, enum sctp_state state, sctp_subtype_t event_subtype); int sctp_chunk_iif(const struct sctp_chunk *); @@ -312,12 +312,10 @@ struct sctp_chunk *sctp_process_strreset_resp( /* Prototypes for statetable processing. */ -int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, - enum sctp_state state, - struct sctp_endpoint *, - struct sctp_association *asoc, - void *event_arg, - gfp_t gfp); +int sctp_do_sm(struct net *net, enum sctp_event event_type, + sctp_subtype_t subtype, enum sctp_state state, + struct sctp_endpoint *ep, struct sctp_association *asoc, + void *event_arg, gfp_t gfp); /* 2nd level prototypes */ void sctp_generate_t3_rtx_event(unsigned long peer); -- cgit v1.2.3 From bfc6f8270fefb323662d1d7713f940149f27b7f1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:04 +0800 Subject: sctp: remove the typedef sctp_subtype_t This patch is to remove the typedef sctp_subtype_t, and replace with union sctp_subtype in the places where it's using this typedef. Note that it doesn't fix many indents although it should, as sctp_disposition_t's removal would mess them up again. So better to fix them when removing sctp_disposition_t in later patch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 18 +++++++++--------- include/net/sctp/sm.h | 13 +++++++------ 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 3181e0f95b60..deaafa9b09cb 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -124,20 +124,20 @@ enum sctp_event_primitive { /* We define here a utility type for manipulating subtypes. * The subtype constructors all work like this: * - * sctp_subtype_t foo = SCTP_ST_CHUNK(SCTP_CID_INIT); + * union sctp_subtype foo = SCTP_ST_CHUNK(SCTP_CID_INIT); */ -typedef union { +union sctp_subtype { enum sctp_cid chunk; enum sctp_event_timeout timeout; enum sctp_event_other other; enum sctp_event_primitive primitive; -} sctp_subtype_t; +}; #define SCTP_SUBTYPE_CONSTRUCTOR(_name, _type, _elt) \ -static inline sctp_subtype_t \ +static inline union sctp_subtype \ SCTP_ST_## _name (_type _arg) \ -{ sctp_subtype_t _retval; _retval._elt = _arg; return _retval; } +{ union sctp_subtype _retval; _retval._elt = _arg; return _retval; } SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, enum sctp_event_timeout, timeout) @@ -220,10 +220,10 @@ enum sctp_sock_state { }; /* These functions map various type to printable names. */ -const char *sctp_cname(const sctp_subtype_t); /* chunk types */ -const char *sctp_oname(const sctp_subtype_t); /* other events */ -const char *sctp_tname(const sctp_subtype_t); /* timeouts */ -const char *sctp_pname(const sctp_subtype_t); /* primitives */ +const char *sctp_cname(const union sctp_subtype id); /* chunk types */ +const char *sctp_oname(const union sctp_subtype id); /* other events */ +const char *sctp_tname(const union sctp_subtype id); /* timeouts */ +const char *sctp_pname(const union sctp_subtype id); /* primitives */ /* This is a table of printable names of sctp_state_t's. */ extern const char *const sctp_state_tbl[]; diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 96f54cff7964..1e7651c3b158 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -73,7 +73,7 @@ typedef struct { typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, const struct sctp_endpoint *, const struct sctp_association *, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *); typedef void (sctp_timer_event_t) (unsigned long); @@ -175,10 +175,11 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); -const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, - enum sctp_event event_type, - enum sctp_state state, - sctp_subtype_t event_subtype); +const sctp_sm_table_entry_t *sctp_sm_lookup_event( + struct net *net, + enum sctp_event event_type, + enum sctp_state state, + union sctp_subtype event_subtype); int sctp_chunk_iif(const struct sctp_chunk *); struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *, struct sctp_chunk *, @@ -313,7 +314,7 @@ struct sctp_chunk *sctp_process_strreset_resp( /* Prototypes for statetable processing. */ int sctp_do_sm(struct net *net, enum sctp_event event_type, - sctp_subtype_t subtype, enum sctp_state state, + union sctp_subtype subtype, enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, gfp_t gfp); -- cgit v1.2.3 From eeb66cdb682678bfd1f02a4547e3649b38ffea7e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 4 Jun 2017 23:11:55 +0300 Subject: net/mlx5: Separate between E-Switch and MPFS Multi-Physical Function Switch (MPFs) is required for when multi-PF configuration is enabled to allow passing user configured unicast MAC addresses to the requesting PF. Before this patch eswitch.c used to manage the HW MPFS l2 table, E-Switch always (regardless of sriov) enabled vport(0) (NIC PF) vport's contexts update on unicast mac address list changes, to populate the PF's MPFS L2 table accordingly. In downstream patch we would like to allow compiling the driver without E-Switch functionalities, for that we move MPFS l2 table logic out of eswitch.c into its own file, and provide Kconfig flag (MLX5_MPFS) to allow compiling out MPFS for those who don't want Multi-PF support. NIC PF netdevice will now directly update MPFS l2 table via the new MPFS API. VF netdevice has no access to MPFS L2 table, so E-Switch will remain responsible of updating its MPFS l2 table on behalf of its VFs. Due to this change we also don't require enabling vport(0) (PF vport) unicast mac changes events anymore, for when SRIOV is not enabled. Which means E-Switch is now activated only on SRIOV activation, and not required otherwise. Signed-off-by: Saeed Mahameed Cc: Jes Sorensen Cc: kernel-team@fb.com --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index df6ce59a1f95..88d6eb5b3a76 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -550,6 +550,7 @@ struct mlx5_fc_stats { unsigned long sampling_interval; /* jiffies */ }; +struct mlx5_mpfs; struct mlx5_eswitch; struct mlx5_lag; struct mlx5_pagefault; @@ -647,6 +648,7 @@ struct mlx5_priv { spinlock_t ctx_lock; struct mlx5_flow_steering *steering; + struct mlx5_mpfs *mpfs; struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; struct mlx5_lag *lag; -- cgit v1.2.3 From 97834eba7c194659a72c5bb0f8c19c7055bb69ea Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Wed, 7 Jun 2017 12:14:24 +0300 Subject: net/mlx5: Delay events till ib registration ends When mlx5_ib registers itself to mlx5_core as an interface, it will call mlx5_add_device which will call mlx5_ib interface add callback, in case the latter successfully returns, only then mlx5_core will add it to the interface list and async events will be forwarded to mlx5_ib. Between mlx5_ib interface add callback and mlx5_core adding the mlx5_ib interface to its devices list, arriving mlx5_core events can be missed by the new mlx5_ib registering interface. In other words: thread 1: mlx5_ib: mlx5_register_interface(dev) thread 1: mlx5_core: mlx5_add_device(dev) thread 1: mlx5_core: ctx = dev->add => (mlx5_ib)->mlx5_ib_add thread 2: mlx5_core_event: **new event arrives, forward to dev_list thread 1: mlx5_core: add_ctx_to_dev_list(ctx) /* previous event was missed by the new interface.*/ It is ok to miss events before dev->add (mlx5_ib)->mlx5_ib_add_device but not after. We fix this race by accumulating the events that come between the ib_register_device (inside mlx5_add_device->(dev->add)) till the adding to the list completes and fire them to the new registering interface after that. Fixes: f1ee87fe55c8 ("net/mlx5: Organize device list API in one place") Signed-off-by: Erez Shitrit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 88d6eb5b3a76..d26f18b39c4a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -647,6 +647,9 @@ struct mlx5_priv { struct list_head ctx_list; spinlock_t ctx_lock; + struct list_head waiting_events_list; + bool is_accum_events; + struct mlx5_flow_steering *steering; struct mlx5_mpfs *mpfs; struct mlx5_eswitch *eswitch; -- cgit v1.2.3 From 61690e09c3b4a40401bdaa89ee11522cc0dc4b11 Mon Sep 17 00:00:00 2001 From: Rabie Loulou Date: Mon, 10 Jul 2017 14:35:10 +0300 Subject: net/mlx5: Fix counter list hardware structure The counter list hardware structure doesn't contain a clear and num_of_counters fields, remove them. These wrong fields were never used by the driver hence no other driver changes. Fixes: a351a1b03bf1 ("net/mlx5: Introduce bulk reading of flow counters") Signed-off-by: Rabie Loulou Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3030121b4746..f847a3a57913 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1071,8 +1071,7 @@ struct mlx5_ifc_dest_format_struct_bits { }; struct mlx5_ifc_flow_counter_list_bits { - u8 clear[0x1]; - u8 num_of_counters[0xf]; + u8 reserved_at_0[0x10]; u8 flow_counter_id[0x10]; u8 reserved_at_20[0x20]; -- cgit v1.2.3 From a8ffcc741acb3c7f3dcf4c7d001209aa0995a5f1 Mon Sep 17 00:00:00 2001 From: Rabie Loulou Date: Sun, 9 Jul 2017 13:39:30 +0300 Subject: net/mlx5: Increase the maximum flow counters supported Read new NIC capability field which represnts 16 MSBs of the max flow counters number supported (max_flow_counter_31_16). Backward compatibility with older firmware is preserved, the modified driver reads max_flow_counter_31_16 as 0 from the older firmware and uses up to 64K counters. Changed flow counter id from 16 bits to 32 bits. Backward compatibility with older firmware is preserved as we kept the 16 LSBs of the counter id in place and added 16 MSBs from reserved field. Changed the background bulk reading of flow counters to work in chunks of at most 32K counters, to make sure we don't attempt to allocate very large buffers. Signed-off-by: Rabie Loulou Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f847a3a57913..c99daffc3c3c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -963,7 +963,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_2a0[0x10]; u8 max_wqe_sz_rq[0x10]; - u8 reserved_at_2c0[0x10]; + u8 max_flow_counter_31_16[0x10]; u8 max_wqe_sz_sq_dc[0x10]; u8 reserved_at_2e0[0x7]; @@ -981,7 +981,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_340[0x8]; u8 log_max_flow_counter_bulk[0x8]; - u8 max_flow_counter[0x10]; + u8 max_flow_counter_15_0[0x10]; u8 reserved_at_360[0x3]; @@ -1071,8 +1071,7 @@ struct mlx5_ifc_dest_format_struct_bits { }; struct mlx5_ifc_flow_counter_list_bits { - u8 reserved_at_0[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; u8 reserved_at_20[0x20]; }; @@ -4402,8 +4401,7 @@ struct mlx5_ifc_query_flow_counter_in_bits { u8 reserved_at_c1[0xf]; u8 num_of_counters[0x10]; - u8 reserved_at_e0[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; }; struct mlx5_ifc_query_esw_vport_context_out_bits { @@ -6271,8 +6269,7 @@ struct mlx5_ifc_dealloc_flow_counter_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; u8 reserved_at_60[0x20]; }; @@ -7097,8 +7094,7 @@ struct mlx5_ifc_alloc_flow_counter_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; u8 reserved_at_60[0x20]; }; -- cgit v1.2.3 From 2572ac53c46f58e500b9d8d0f99785666038c590 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:17 +0200 Subject: net: sched: make type an argument for ndo_setup_tc Since the type is always present, push it to be a separate argument to ndo_setup_tc. On the way, name the type enum and use it for arg type. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3a3cdc1b1f31..e4238e540544 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -774,7 +774,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, /* These structures hold the attributes of qdisc and classifiers * that are being passed to the netdevice through the setup_tc op. */ -enum { +enum tc_setup_type { TC_SETUP_MQPRIO, TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, @@ -785,7 +785,6 @@ enum { struct tc_cls_u32_offload; struct tc_to_netdev { - unsigned int type; union { struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; @@ -978,8 +977,9 @@ struct xfrmdev_ops { * with PF and querying it may introduce a theoretical security risk. * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); - * int (*ndo_setup_tc)(struct net_device *dev, u32 handle, u32 chain_index, - * __be16 protocol, struct tc_to_netdev *tc); + * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, + * u32 handle, u32 chain_index, __be16 protocol, + * struct tc_to_netdev *tc); * Called to setup any 'tc' scheduler, classifier or action on @dev. * This is always called from the stack with the rtnl lock held and netif * tx queues stopped. This allows the netdevice to perform queue @@ -1227,6 +1227,7 @@ struct net_device_ops { struct net_device *dev, int vf, bool setting); int (*ndo_setup_tc)(struct net_device *dev, + enum tc_setup_type type, u32 handle, u32 chain_index, __be16 protocol, struct tc_to_netdev *tc); -- cgit v1.2.3 From ade9b6588420b335851951702ab975c975b0c1b2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:18 +0200 Subject: net: sched: rename TC_SETUP_MATCHALL to TC_SETUP_CLSMATCHALL In order to be aligned with the rest of the types, rename TC_SETUP_MATCHALL to TC_SETUP_CLSMATCHALL. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e4238e540544..f8051a36f900 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -778,7 +778,7 @@ enum tc_setup_type { TC_SETUP_MQPRIO, TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, - TC_SETUP_MATCHALL, + TC_SETUP_CLSMATCHALL, TC_SETUP_CLSBPF, }; -- cgit v1.2.3 From 3e0e82664322290a59189f7c2bcb39b0de932505 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:19 +0200 Subject: net: sched: make egress_dev flag part of flower offload struct Since this is specific to flower now, make it part of the flower offload struct. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - include/net/pkt_cls.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f8051a36f900..bd49dbaee84e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -792,7 +792,6 @@ struct tc_to_netdev { struct tc_cls_bpf_offload *cls_bpf; struct tc_mqprio_qopt *mqprio; }; - bool egress_dev; }; /* These structures hold the attributes of xdp state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e0c54f111467..8213acdfdf5a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -504,6 +504,7 @@ struct tc_cls_flower_offload { struct fl_flow_key *mask; struct fl_flow_key *key; struct tcf_exts *exts; + bool egress_dev; }; enum tc_matchall_command { -- cgit v1.2.3 From 5fd9fc4e207dba0c05cafe78417952b4c4ca02dc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:29 +0200 Subject: net: sched: push cls related args into cls_common structure As ndo_setup_tc is generic offload op for whole tc subsystem, does not really make sense to have cls-specific args. So move them under cls_common structurure which is embedded in all cls structs. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 --- include/net/pkt_cls.h | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bd49dbaee84e..6e2f7e38cf8e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -977,7 +977,6 @@ struct xfrmdev_ops { * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, - * u32 handle, u32 chain_index, __be16 protocol, * struct tc_to_netdev *tc); * Called to setup any 'tc' scheduler, classifier or action on @dev. * This is always called from the stack with the rtnl lock held and netif @@ -1227,8 +1226,6 @@ struct net_device_ops { int vf, bool setting); int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, - u32 handle, u32 chain_index, - __be16 protocol, struct tc_to_netdev *tc); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 8213acdfdf5a..ffaddf72108e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -405,6 +405,21 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ +struct tc_cls_common_offload { + u32 handle; + u32 chain_index; + __be16 protocol; +}; + +static inline void +tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, + const struct tcf_proto *tp) +{ + cls_common->handle = tp->q->handle; + cls_common->chain_index = tp->chain->index; + cls_common->protocol = tp->protocol; +} + struct tc_cls_u32_knode { struct tcf_exts *exts; struct tc_u32_sel *sel; @@ -431,6 +446,7 @@ enum tc_clsu32_command { }; struct tc_cls_u32_offload { + struct tc_cls_common_offload common; /* knode values */ enum tc_clsu32_command command; union { @@ -497,6 +513,7 @@ enum tc_fl_command { }; struct tc_cls_flower_offload { + struct tc_cls_common_offload common; enum tc_fl_command command; u32 prio; unsigned long cookie; @@ -513,6 +530,7 @@ enum tc_matchall_command { }; struct tc_cls_matchall_offload { + struct tc_cls_common_offload common; enum tc_matchall_command command; struct tcf_exts *exts; unsigned long cookie; @@ -526,6 +544,7 @@ enum tc_clsbpf_command { }; struct tc_cls_bpf_offload { + struct tc_cls_common_offload common; enum tc_clsbpf_command command; struct tcf_exts *exts; struct bpf_prog *prog; -- cgit v1.2.3 From d7c1c8d2e53be974b5c72e31d7d35f6d9737fe84 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:30 +0200 Subject: net: sched: move prio into cls_common prio is not cls_flower specific, but it is meaningful for all classifiers. Seems that only mlxsw cares about the value. Obviously, cls offload in other drivers is broken. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ffaddf72108e..572083af02ac 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -409,6 +409,7 @@ struct tc_cls_common_offload { u32 handle; u32 chain_index; __be16 protocol; + u32 prio; }; static inline void @@ -418,6 +419,7 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, cls_common->handle = tp->q->handle; cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; + cls_common->prio = tp->prio; } struct tc_cls_u32_knode { @@ -515,7 +517,6 @@ enum tc_fl_command { struct tc_cls_flower_offload { struct tc_cls_common_offload common; enum tc_fl_command command; - u32 prio; unsigned long cookie; struct flow_dissector *dissector; struct fl_flow_key *mask; -- cgit v1.2.3 From de4784ca030fed17d527dbb2bb4e21328b12de94 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:32 +0200 Subject: net: sched: get rid of struct tc_to_netdev Get rid of struct tc_to_netdev which is now just unnecessary container and rather pass per-type structures down to drivers directly. Along with that, consolidate the naming of per-type structure variables in cls_*. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6e2f7e38cf8e..1d238d54c484 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -771,9 +771,6 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); -/* These structures hold the attributes of qdisc and classifiers - * that are being passed to the netdevice through the setup_tc op. - */ enum tc_setup_type { TC_SETUP_MQPRIO, TC_SETUP_CLSU32, @@ -782,18 +779,6 @@ enum tc_setup_type { TC_SETUP_CLSBPF, }; -struct tc_cls_u32_offload; - -struct tc_to_netdev { - union { - struct tc_cls_u32_offload *cls_u32; - struct tc_cls_flower_offload *cls_flower; - struct tc_cls_matchall_offload *cls_mall; - struct tc_cls_bpf_offload *cls_bpf; - struct tc_mqprio_qopt *mqprio; - }; -}; - /* These structures hold the attributes of xdp state that are being passed * to the netdevice through the xdp op. */ @@ -977,7 +962,7 @@ struct xfrmdev_ops { * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, - * struct tc_to_netdev *tc); + * void *type_data); * Called to setup any 'tc' scheduler, classifier or action on @dev. * This is always called from the stack with the rtnl lock held and netif * tx queues stopped. This allows the netdevice to perform queue @@ -1226,7 +1211,7 @@ struct net_device_ops { int vf, bool setting); int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, - struct tc_to_netdev *tc); + void *type_data); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); -- cgit v1.2.3 From fb74c27735f0a34e76dbf1972084e984ad2ea145 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:16 -0700 Subject: net: ipv4: add second dif to udp socket lookups Add a second device index, sdif, to udp socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. Early demux lookups are handled in the next patch as part of INET_MATCH changes. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip.h | 10 ++++++++++ include/net/udp.h | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 9e59dcf1787a..39db596eb89f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -78,6 +78,16 @@ struct ipcm_cookie { #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb)) +/* return enslaved device index if relevant */ +static inline int inet_sdif(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) + return IPCB(skb)->iif; +#endif + return 0; +} + struct ip_ra_chain { struct ip_ra_chain __rcu *next; struct sock *sk; diff --git a/include/net/udp.h b/include/net/udp.h index cc8036987dcb..826c713d5a48 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -287,7 +287,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, int dif, + __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport); -- cgit v1.2.3 From 3fa6f616a7a4d0bdf4d877d530456d8a5c3b109b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:17 -0700 Subject: net: ipv4: add second dif to inet socket lookups Add a second device index, sdif, to inet socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. TCP moves the data in the cb. Prior to tcp_v4_rcv (e.g., early demux) the ingress index is obtained from IPCB using inet_sdif and after the cb move in tcp_v4_rcv the tcp_v4_sdif helper is used. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 31 +++++++++++++++++-------------- include/net/tcp.h | 10 ++++++++++ 2 files changed, 27 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 5026b1f08bb8..2dbbbff5e1e3 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -221,16 +221,16 @@ struct sock *__inet_lookup_listener(struct net *net, const __be32 saddr, const __be16 sport, const __be32 daddr, const unsigned short hnum, - const int dif); + const int dif, const int sdif); static inline struct sock *inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, int dif) + __be32 daddr, __be16 dport, int dif, int sdif) { return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport, - daddr, ntohs(dport), dif); + daddr, ntohs(dport), dif, sdif); } /* Socket demux engine toys. */ @@ -262,22 +262,24 @@ static inline struct sock *inet_lookup_listener(struct net *net, (((__force __u64)(__be32)(__daddr)) << 32) | \ ((__force __u64)(__be32)(__saddr))) #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \ +#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_addrpair == (__cookie)) && \ (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const int __name __deprecated __attribute__((unused)) -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \ +#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_daddr == (__saddr)) && \ ((__sk)->sk_rcv_saddr == (__daddr)) && \ (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* 64-bit arch */ @@ -288,7 +290,7 @@ struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, - const int dif); + const int dif, const int sdif); static inline struct sock * inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, @@ -297,7 +299,7 @@ static inline struct sock * const int dif) { return __inet_lookup_established(net, hashinfo, saddr, sport, daddr, - ntohs(dport), dif); + ntohs(dport), dif, 0); } static inline struct sock *__inet_lookup(struct net *net, @@ -305,20 +307,20 @@ static inline struct sock *__inet_lookup(struct net *net, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - const int dif, + const int dif, const int sdif, bool *refcounted) { u16 hnum = ntohs(dport); struct sock *sk; sk = __inet_lookup_established(net, hashinfo, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, - sport, daddr, hnum, dif); + sport, daddr, hnum, dif, sdif); } static inline struct sock *inet_lookup(struct net *net, @@ -332,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net, bool refcounted; sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, - dport, dif, &refcounted); + dport, dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -344,6 +346,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, int doff, const __be16 sport, const __be16 dport, + const int sdif, bool *refcounted) { struct sock *sk = skb_steal_sock(skb); @@ -355,7 +358,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, doff, iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), + iph->daddr, dport, inet_iif(skb), sdif, refcounted); } diff --git a/include/net/tcp.h b/include/net/tcp.h index 5173fecde495..2b89f1ab8552 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -840,6 +840,16 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) return false; } +/* TCP_SKB_CB reference means this can not be used from early demux */ +static inline int tcp_v4_sdif(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + return TCP_SKB_CB(skb)->header.h4.iif; +#endif + return 0; +} + /* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */ -- cgit v1.2.3 From 67359930e185c491b47cb958d5f1d6c1af4598a2 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:18 -0700 Subject: net: ipv4: add second dif to raw socket lookups Add a second device index, sdif, to raw socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/raw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/raw.h b/include/net/raw.h index 57c33dd22ec4..99d26d0c4a19 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -26,7 +26,7 @@ extern struct proto raw_prot; extern struct raw_hashinfo raw_v4_hashinfo; struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, - __be32 laddr, int dif); + __be32 laddr, int dif, int sdif); int raw_abort(struct sock *sk, int err); void raw_icmp_error(struct sk_buff *, int, u32); -- cgit v1.2.3 From 60d9b03141243589dacd3136f3fcb4e6976df954 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:19 -0700 Subject: net: ipv4: add second dif to multicast source filter Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/igmp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 97caf1821de8..f8231854b5d6 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -118,7 +118,8 @@ extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, struct ip_msfilter __user *optval, int __user *optlen); extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, struct group_filter __user *optval, int __user *optlen); -extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, int dif); +extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, + int dif, int sdif); extern void ip_mc_init_dev(struct in_device *); extern void ip_mc_destroy_dev(struct in_device *); extern void ip_mc_up(struct in_device *); -- cgit v1.2.3 From 1801b570dd2ae50b90231f283e79a9a94fbe7875 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:20 -0700 Subject: net: ipv6: add second dif to udp socket lookups Add a second device index, sdif, to udp socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. Early demux lookups are handled in the next patch as part of INET_MATCH changes. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 10 ++++++++++ include/net/udp.h | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 474d6bbc158c..ac2da4e11d5e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -158,6 +158,16 @@ static inline bool inet6_is_jumbogram(const struct sk_buff *skb) return !!(IP6CB(skb)->flags & IP6SKB_JUMBOGRAM); } +/* can not be used in TCP layer after tcp_v6_fill_cb */ +static inline int inet6_sdif(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) + return IP6CB(skb)->iif; +#endif + return 0; +} + /* can not be used in TCP layer after tcp_v6_fill_cb */ static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) { diff --git a/include/net/udp.h b/include/net/udp.h index 826c713d5a48..20dcdca4e85c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -298,7 +298,7 @@ struct sock *udp6_lib_lookup(struct net *net, struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *tbl, + int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport); -- cgit v1.2.3 From 4297a0ef085729af98adab9131d128c576ed3044 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:21 -0700 Subject: net: ipv6: add second dif to inet6 socket lookups Add a second device index, sdif, to inet6 socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. TCP moves the data in the cb. Prior to tcp_v4_rcv (e.g., early demux) the ingress index is obtained from IPCB using inet_sdif and after tcp_v4_rcv tcp_v4_sdif is used. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 22 +++++++++++++--------- include/net/tcp.h | 10 ++++++++++ 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index b87becacd9d3..6e91e38a31da 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -49,7 +49,8 @@ struct sock *__inet6_lookup_established(struct net *net, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, - const u16 hnum, const int dif); + const u16 hnum, const int dif, + const int sdif); struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, @@ -57,7 +58,8 @@ struct sock *inet6_lookup_listener(struct net *net, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, - const unsigned short hnum, const int dif); + const unsigned short hnum, + const int dif, const int sdif); static inline struct sock *__inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, @@ -66,24 +68,25 @@ static inline struct sock *__inet6_lookup(struct net *net, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif, + const int dif, const int sdif, bool *refcounted) { struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr, - sport, daddr, hnum, dif); + sport, daddr, hnum, + dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, sdif); } static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, - int iif, + int iif, int sdif, bool *refcounted) { struct sock *sk = skb_steal_sock(skb); @@ -95,7 +98,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, doff, &ipv6_hdr(skb)->saddr, sport, &ipv6_hdr(skb)->daddr, ntohs(dport), - iif, refcounted); + iif, sdif, refcounted); } struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, @@ -107,13 +110,14 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, int inet6_hash(struct sock *sk); #endif /* IS_ENABLED(CONFIG_IPV6) */ -#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ +#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* _INET6_HASHTABLES_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 2b89f1ab8552..999f3efe572b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -827,6 +827,16 @@ static inline int tcp_v6_iif(const struct sk_buff *skb) return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif; } + +/* TCP_SKB_CB reference means this can not be used from early demux */ +static inline int tcp_v6_sdif(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags)) + return TCP_SKB_CB(skb)->header.h6.iif; +#endif + return 0; +} #endif /* TCP_SKB_CB reference means this can not be used from early demux */ -- cgit v1.2.3 From 5108ab4bf446fa9ad2c71f5fc1d839067b72636f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:22 -0700 Subject: net: ipv6: add second dif to raw socket lookups Add a second device index, sdif, to raw socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/rawv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/rawv6.h b/include/net/rawv6.h index cbe4e9de1894..4addc5c988e0 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -6,7 +6,7 @@ extern struct raw_hashinfo raw_v6_hashinfo; struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, - const struct in6_addr *rmt_addr, int dif); + const struct in6_addr *rmt_addr, int dif, int sdif); int raw_abort(struct sock *sk, int err); -- cgit v1.2.3 From cf5f5cea270655dd49370760576c64b228583b79 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 4 Aug 2017 16:00:09 -0700 Subject: bpf: add support for sys_enter_* and sys_exit_* tracepoints Currently, bpf programs cannot be attached to sys_enter_* and sys_exit_* style tracepoints. The iovisor/bcc issue #748 (https://github.com/iovisor/bcc/issues/748) documents this issue. For example, if you try to attach a bpf program to tracepoints syscalls/sys_enter_newfstat, you will get the following error: # ./tools/trace.py t:syscalls:sys_enter_newfstat Ioctl(PERF_EVENT_IOC_SET_BPF): Invalid argument Failed to attach BPF to tracepoint The main reason is that syscalls/sys_enter_* and syscalls/sys_exit_* tracepoints are treated differently from other tracepoints and there is no bpf hook to it. This patch adds bpf support for these syscalls tracepoints by . permitting bpf attachment in ioctl PERF_EVENT_IOC_SET_BPF . calling bpf programs in perf_syscall_enter and perf_syscall_exit The legality of bpf program ctx access is also checked. Function trace_event_get_offsets returns correct max offset for each specific syscall tracepoint, which is compared against the maximum offset access in bpf program. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/syscalls.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3cb15ea48aee..c9170218e9e6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -172,8 +172,20 @@ extern struct trace_event_functions exit_syscall_print_funcs; static struct syscall_metadata __used \ __attribute__((section("__syscalls_metadata"))) \ *__p_syscall_meta_##sname = &__syscall_meta_##sname; + +static inline int is_syscall_trace_event(struct trace_event_call *tp_event) +{ + return tp_event->class == &event_class_syscall_enter || + tp_event->class == &event_class_syscall_exit; +} + #else #define SYSCALL_METADATA(sname, nb, ...) + +static inline int is_syscall_trace_event(struct trace_event_call *tp_event) +{ + return 0; +} #endif #define SYSCALL_DEFINE0(sname) \ -- cgit v1.2.3 From 8113c095672f6504b23eba6edf4a57b5f7f744af Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 4 Aug 2017 21:31:43 -0700 Subject: net_sched: use void pointer for filter handle Now we use 'unsigned long fh' as a pointer in every place, it is safe to convert it to a void pointer now. This gets rid of many casts to pointer. Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- include/net/sch_generic.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 572083af02ac..0f78e6560b2d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -11,7 +11,7 @@ struct tcf_walker { int stop; int skip; int count; - int (*fn)(struct tcf_proto *, unsigned long node, struct tcf_walker *); + int (*fn)(struct tcf_proto *, void *node, struct tcf_walker *); }; int register_tcf_proto_ops(struct tcf_proto_ops *ops); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1c123e2b2415..e79f5ad1c5f3 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -213,16 +213,16 @@ struct tcf_proto_ops { int (*init)(struct tcf_proto*); void (*destroy)(struct tcf_proto*); - unsigned long (*get)(struct tcf_proto*, u32 handle); + void* (*get)(struct tcf_proto*, u32 handle); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - unsigned long *, bool); - int (*delete)(struct tcf_proto*, unsigned long, bool*); + void **, bool); + int (*delete)(struct tcf_proto*, void *, bool*); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); /* rtnetlink specific */ - int (*dump)(struct net*, struct tcf_proto*, unsigned long, + int (*dump)(struct net*, struct tcf_proto*, void *, struct sk_buff *skb, struct tcmsg*); struct module *owner; -- cgit v1.2.3 From b04c80d3a7e228cfb832cdb1c9ce8151f174669c Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Sat, 5 Aug 2017 12:38:25 +0200 Subject: ipv6: sr: export SRH insertion functions This patch exports the seg6_do_srh_encap() and seg6_do_srh_inline() functions. It also removes the CONFIG_IPV6_SEG6_INLINE knob that enabled the compilation of seg6_do_srh_inline(). This function is now built-in. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/net/seg6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/seg6.h b/include/net/seg6.h index 4e0357517d79..a32abb040e1d 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -58,5 +58,7 @@ extern int seg6_iptunnel_init(void); extern void seg6_iptunnel_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); +extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); +extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); #endif -- cgit v1.2.3 From d1df6fd8a1d22d37cffa0075ab8ad423ce656777 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Sat, 5 Aug 2017 12:38:26 +0200 Subject: ipv6: sr: define core operations for seg6local lightweight tunnel This patch implements a new type of lightweight tunnel named seg6local. A seg6local lwt is defined by a type of action and a set of parameters. The action represents the operation to perform on the packets matching the lwt's route, and is not necessarily an encapsulation. The set of parameters are arguments for the processing function. Each action is defined in a struct seg6_action_desc within seg6_action_table[]. This structure contains the action, mandatory attributes, the processing function, and a static headroom size required by the action. The mandatory attributes are encoded as a bitmask field. The static headroom is set to a non-zero value when the processing function always add a constant number of bytes to the skb (e.g. the header size for encapsulations). To facilitate rtnetlink-related operations such as parsing, fill_encap, and cmp_encap, each type of action parameter is associated to three function pointers, in seg6_action_params[]. All actions defined in seg6_local.h are detailed in [1]. [1] https://tools.ietf.org/html/draft-filsfils-spring-srv6-network-programming-01 Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/seg6_local.h | 6 ++++ include/net/seg6.h | 2 ++ include/uapi/linux/lwtunnel.h | 1 + include/uapi/linux/seg6_local.h | 68 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+) create mode 100644 include/linux/seg6_local.h create mode 100644 include/uapi/linux/seg6_local.h (limited to 'include') diff --git a/include/linux/seg6_local.h b/include/linux/seg6_local.h new file mode 100644 index 000000000000..ee63e76fe0c7 --- /dev/null +++ b/include/linux/seg6_local.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_LOCAL_H +#define _LINUX_SEG6_LOCAL_H + +#include + +#endif diff --git a/include/net/seg6.h b/include/net/seg6.h index a32abb040e1d..5379f550f521 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -56,6 +56,8 @@ extern int seg6_init(void); extern void seg6_exit(void); extern int seg6_iptunnel_init(void); extern void seg6_iptunnel_exit(void); +extern int seg6_local_init(void); +extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 92724cba1eba..7fdd19ca7511 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -11,6 +11,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_IP6, LWTUNNEL_ENCAP_SEG6, LWTUNNEL_ENCAP_BPF, + LWTUNNEL_ENCAP_SEG6_LOCAL, __LWTUNNEL_ENCAP_MAX, }; diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h new file mode 100644 index 000000000000..ef2d8c3e76c1 --- /dev/null +++ b/include/uapi/linux/seg6_local.h @@ -0,0 +1,68 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_LOCAL_H +#define _UAPI_LINUX_SEG6_LOCAL_H + +#include + +enum { + SEG6_LOCAL_UNSPEC, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_SRH, + SEG6_LOCAL_TABLE, + SEG6_LOCAL_NH4, + SEG6_LOCAL_NH6, + SEG6_LOCAL_IIF, + SEG6_LOCAL_OIF, + __SEG6_LOCAL_MAX, +}; +#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1) + +enum { + SEG6_LOCAL_ACTION_UNSPEC = 0, + /* node segment */ + SEG6_LOCAL_ACTION_END = 1, + /* adjacency segment (IPv6 cross-connect) */ + SEG6_LOCAL_ACTION_END_X = 2, + /* lookup of next seg NH in table */ + SEG6_LOCAL_ACTION_END_T = 3, + /* decap and L2 cross-connect */ + SEG6_LOCAL_ACTION_END_DX2 = 4, + /* decap and IPv6 cross-connect */ + SEG6_LOCAL_ACTION_END_DX6 = 5, + /* decap and IPv4 cross-connect */ + SEG6_LOCAL_ACTION_END_DX4 = 6, + /* decap and lookup of DA in v6 table */ + SEG6_LOCAL_ACTION_END_DT6 = 7, + /* decap and lookup of DA in v4 table */ + SEG6_LOCAL_ACTION_END_DT4 = 8, + /* binding segment with insertion */ + SEG6_LOCAL_ACTION_END_B6 = 9, + /* binding segment with encapsulation */ + SEG6_LOCAL_ACTION_END_B6_ENCAP = 10, + /* binding segment with MPLS encap */ + SEG6_LOCAL_ACTION_END_BM = 11, + /* lookup last seg in table */ + SEG6_LOCAL_ACTION_END_S = 12, + /* forward to SR-unaware VNF with static proxy */ + SEG6_LOCAL_ACTION_END_AS = 13, + /* forward to SR-unaware VNF with masquerading */ + SEG6_LOCAL_ACTION_END_AM = 14, + + __SEG6_LOCAL_ACTION_MAX, +}; + +#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1) + +#endif -- cgit v1.2.3 From 6c2c1dcb185f1e44e1c895781dbaba40195234f9 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:39 +0300 Subject: net: dsa: Change DSA slave FDB API to be switchdev independent In order to support FDB add/del to be on a notifier chain the slave API need to be changed to be switchdev independent. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 0b1a0622b33c..ba11005dac8c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -386,13 +386,11 @@ struct dsa_switch_ops { * Forwarding database */ int (*port_fdb_prepare)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans); + const unsigned char *addr, u16 vid); void (*port_fdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans); + const unsigned char *addr, u16 vid); int (*port_fdb_del)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb); + const unsigned char *addr, u16 vid); int (*port_fdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, switchdev_obj_dump_cb_t *cb); -- cgit v1.2.3 From 1b6dd556c3045ca5fa31cc1e98a4a43afa680e1e Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:40 +0300 Subject: net: dsa: Remove prepare phase for FDB The prepare phase for FDB add is unneeded because most of DSA devices can have failures during bus transactions (SPI, I2C, etc.), thus, the prepare phase cannot guarantee success of the commit stage. The support for learning FDB through notification chain, which will be introduced in the following patches, will provide the ability to notify back the bridge about successful offload. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index ba11005dac8c..446fc438cb80 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -385,9 +385,7 @@ struct dsa_switch_ops { /* * Forwarding database */ - int (*port_fdb_prepare)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - void (*port_fdb_add)(struct dsa_switch *ds, int port, + int (*port_fdb_add)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid); int (*port_fdb_del)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid); -- cgit v1.2.3 From c069fcd82c571953b8aaf68769afe9ccb1aa7a9f Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:46 +0300 Subject: net: dsa: Remove support for bypass bridge port attributes/vlan set The bridge port attributes/vlan for DSA devices should be set only from bridge code. Furthermore, The vlans are synced totally with the bridge so there is no need for special dump support. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 446fc438cb80..d7b9bdd0e768 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -378,10 +378,6 @@ struct dsa_switch_ops { struct switchdev_trans *trans); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); - int (*port_vlan_dump)(struct dsa_switch *ds, int port, - struct switchdev_obj_port_vlan *vlan, - switchdev_obj_dump_cb_t *cb); - /* * Forwarding database */ -- cgit v1.2.3 From dc0cbff3ff9fe331160c2be2b3f47564e247137d Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:48 +0300 Subject: net: dsa: Remove redundant MDB dump support Currently the MDB HW database is synced with the bridge's one, thus, There is no need to support special dump functionality. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index d7b9bdd0e768..4ef185997b6f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -400,10 +400,6 @@ struct dsa_switch_ops { struct switchdev_trans *trans); int (*port_mdb_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); - int (*port_mdb_dump)(struct dsa_switch *ds, int port, - struct switchdev_obj_port_mdb *mdb, - switchdev_obj_dump_cb_t *cb); - /* * RXNFC */ -- cgit v1.2.3 From 2bedde1abbef5eec211308f0293dd7681b0513ec Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:49 +0300 Subject: net: dsa: Move FDB dump implementation inside DSA >From all switchdev devices only DSA requires special FDB dump. This is due to lack of ability for syncing the hardware learned FDBs with the bridge. Due to this it is removed from switchdev and moved inside DSA. Signed-off-by: Arkadi Sharshevsky Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++-- include/net/switchdev.h | 12 ------------ 2 files changed, 3 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 4ef185997b6f..a4f66dbb4b7c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -272,6 +272,8 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) return ds->rtable[dst->cpu_dp->ds->index]; } +typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, + bool is_static, void *data); struct dsa_switch_ops { /* * Legacy probing. @@ -386,8 +388,7 @@ struct dsa_switch_ops { int (*port_fdb_del)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid); int (*port_fdb_dump)(struct dsa_switch *ds, int port, - struct switchdev_obj_port_fdb *fdb, - switchdev_obj_dump_cb_t *cb); + dsa_fdb_dump_cb_t *cb, void *data); /* * Multicast database diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 8ae9e3b6392e..d2637a685ca6 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -208,9 +208,6 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); -int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, - struct net_device *filter_dev, int *idx); void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); @@ -309,15 +306,6 @@ static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], return -EOPNOTSUPP; } -static inline int switchdev_port_fdb_dump(struct sk_buff *skb, - struct netlink_callback *cb, - struct net_device *dev, - struct net_device *filter_dev, - int *idx) -{ - return *idx; -} - static inline bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { -- cgit v1.2.3 From 29ab586c3d83f81c435e269cace9a1619afb5bbd Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:51 +0300 Subject: net: switchdev: Remove bridge bypass support from switchdev Currently the bridge port flags, vlans, FDBs and MDBs can be offloaded through the bridge code, making the switchdev's SELF bridge bypass implementation to be redundant. This implies several changes: - No need for dump infra in switchdev, DSA's special case is handled privately. - Remove obj_dump from switchdev_ops. - FDBs are removed from obj_add/del routines, due to the fact that they are offloaded through the bridge notification chain. - The switchdev_port_bridge_xx() and switchdev_port_fdb_xx() functions can be removed. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Vivien Didelot Acked-by: Jiri Pirko Reviewed-by: Ivan Vecera Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/switchdev.h | 75 ------------------------------------------------- 1 file changed, 75 deletions(-) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d2637a685ca6..d767b7991887 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -74,7 +74,6 @@ struct switchdev_attr { enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, - SWITCHDEV_OBJ_ID_PORT_FDB, SWITCHDEV_OBJ_ID_PORT_MDB, }; @@ -97,17 +96,6 @@ struct switchdev_obj_port_vlan { #define SWITCHDEV_OBJ_PORT_VLAN(obj) \ container_of(obj, struct switchdev_obj_port_vlan, obj) -/* SWITCHDEV_OBJ_ID_PORT_FDB */ -struct switchdev_obj_port_fdb { - struct switchdev_obj obj; - unsigned char addr[ETH_ALEN]; - u16 vid; - u16 ndm_state; -}; - -#define SWITCHDEV_OBJ_PORT_FDB(obj) \ - container_of(obj, struct switchdev_obj_port_fdb, obj) - /* SWITCHDEV_OBJ_ID_PORT_MDB */ struct switchdev_obj_port_mdb { struct switchdev_obj obj; @@ -135,8 +123,6 @@ typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj); * @switchdev_port_obj_add: Add an object to port (see switchdev_obj_*). * * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj_*). - * - * @switchdev_port_obj_dump: Dump port objects (see switchdev_obj_*). */ struct switchdev_ops { int (*switchdev_port_attr_get)(struct net_device *dev, @@ -149,9 +135,6 @@ struct switchdev_ops { struct switchdev_trans *trans); int (*switchdev_port_obj_del)(struct net_device *dev, const struct switchdev_obj *obj); - int (*switchdev_port_obj_dump)(struct net_device *dev, - struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb); }; enum switchdev_notifier_type { @@ -189,25 +172,10 @@ int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj); int switchdev_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj); -int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb); int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info); -int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask, - int nlflags); -int switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, const unsigned char *addr, - u16 vid, u16 nlm_flags); -int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, const unsigned char *addr, - u16 vid); void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); @@ -246,13 +214,6 @@ static inline int switchdev_port_obj_del(struct net_device *dev, return -EOPNOTSUPP; } -static inline int switchdev_port_obj_dump(struct net_device *dev, - const struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb) -{ - return -EOPNOTSUPP; -} - static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; @@ -270,42 +231,6 @@ static inline int call_switchdev_notifiers(unsigned long val, return NOTIFY_DONE; } -static inline int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, - u32 seq, struct net_device *dev, - u32 filter_mask, int nlflags) -{ - return -EOPNOTSUPP; -} - -static inline int switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) -{ - return -EOPNOTSUPP; -} - -static inline int switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) -{ - return -EOPNOTSUPP; -} - -static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, - u16 vid, u16 nlm_flags) -{ - return -EOPNOTSUPP; -} - -static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid) -{ - return -EOPNOTSUPP; -} - static inline bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { -- cgit v1.2.3 From 0ec9eb90feec4933637fbde9d5bfbc3b62aea218 Mon Sep 17 00:00:00 2001 From: Chi-Hsien Lin Date: Thu, 3 Aug 2017 17:37:58 +0800 Subject: brcmfmac: Add support for CYW4373 SDIO/USB chipset Add support for CYW4373 SDIO/USB chipset. CYW4373 is a 1x1 dual-band 11ac chipset with 20/40/80Mhz channel support. It's a WiFi/BT combo device. Signed-off-by: Chi-Hsien Lin Reviewed-by: Arend van Spriel Signed-off-by: Kalle Valo --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index b733eb404ffc..abacd5484bc0 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -39,6 +39,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43455 0xa9bf #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 #define SDIO_DEVICE_ID_BROADCOM_4356 0x4356 +#define SDIO_DEVICE_ID_CYPRESS_4373 0x4373 #define SDIO_VENDOR_ID_INTEL 0x0089 #define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX 0x1402 -- cgit v1.2.3 From f1174f77b50c94eecaa658fdc56fa69b421de4b8 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 7 Aug 2017 15:26:19 +0100 Subject: bpf/verifier: rework value tracking Unifies adjusted and unadjusted register value types (e.g. FRAME_POINTER is now just a PTR_TO_STACK with zero offset). Tracks value alignment by means of tracking known & unknown bits. This also replaces the 'reg->imm' (leading zero bits) calculations for (what were) UNKNOWN_VALUEs. If pointer leaks are allowed, and adjust_ptr_min_max_vals returns -EACCES, treat the pointer as an unknown scalar and try again, because we might be able to conclude something about the result (e.g. pointer & 0x40 is either 0 or 0x40). Verifier hooks in the netronome/nfp driver were changed to match the new data structures. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- include/linux/bpf.h | 34 +++++++------------ include/linux/bpf_verifier.h | 34 +++++++++++-------- include/linux/tnum.h | 79 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+), 35 deletions(-) create mode 100644 include/linux/tnum.h (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6353c7474dba..39229c455cba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -117,35 +117,25 @@ enum bpf_access_type { }; /* types of values stored in eBPF registers */ +/* Pointer types represent: + * pointer + * pointer + imm + * pointer + (u16) var + * pointer + (u16) var + imm + * if (range > 0) then [ptr, ptr + range - off) is safe to access + * if (id > 0) means that some 'var' was added + * if (off > 0) means that 'imm' was added + */ enum bpf_reg_type { NOT_INIT = 0, /* nothing was written into register */ - UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */ + SCALAR_VALUE, /* reg doesn't contain a valid pointer */ PTR_TO_CTX, /* reg points to bpf_context */ CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ PTR_TO_MAP_VALUE, /* reg points to map element value */ PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ - FRAME_PTR, /* reg == frame_pointer */ - PTR_TO_STACK, /* reg == frame_pointer + imm */ - CONST_IMM, /* constant integer value */ - - /* PTR_TO_PACKET represents: - * skb->data - * skb->data + imm - * skb->data + (u16) var - * skb->data + (u16) var + imm - * if (range > 0) then [ptr, ptr + range - off) is safe to access - * if (id > 0) means that some 'var' was added - * if (off > 0) menas that 'imm' was added - */ - PTR_TO_PACKET, + PTR_TO_STACK, /* reg == frame_pointer + offset */ + PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ - - /* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map - * elem value. We only allow this if we can statically verify that - * access from this register are going to fall within the size of the - * map element. - */ - PTR_TO_MAP_VALUE_ADJ, }; struct bpf_prog; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 8e5d31f6faef..85936fa92d12 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -9,6 +9,7 @@ #include /* for enum bpf_reg_type */ #include /* for MAX_BPF_STACK */ +#include /* Just some arbitrary values so we can safely do math without overflowing and * are obviously wrong for any sort of memory access. @@ -19,30 +20,37 @@ struct bpf_reg_state { enum bpf_reg_type type; union { - /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ - s64 imm; - - /* valid when type == PTR_TO_PACKET* */ - struct { - u16 off; - u16 range; - }; + /* valid when type == PTR_TO_PACKET */ + u16 range; /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | * PTR_TO_MAP_VALUE_OR_NULL */ struct bpf_map *map_ptr; }; + /* Fixed part of pointer offset, pointer types only */ + s32 off; + /* For PTR_TO_PACKET, used to find other pointers with the same variable + * offset, so they can share range knowledge. + * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we + * came from, when one is tested for != NULL. + */ u32 id; + /* These three fields must be last. See states_equal() */ + /* For scalar types (SCALAR_VALUE), this represents our knowledge of + * the actual value. + * For pointer types, this represents the variable part of the offset + * from the pointed-to object, and is shared with all bpf_reg_states + * with the same id as us. + */ + struct tnum var_off; /* Used to determine if any memory access using this register will - * result in a bad access. These two fields must be last. - * See states_equal() + * result in a bad access. + * These refer to the same value as var_off, not necessarily the actual + * contents of the register. */ s64 min_value; u64 max_value; - u32 min_align; - u32 aux_off; - u32 aux_off_align; bool value_from_signed; }; diff --git a/include/linux/tnum.h b/include/linux/tnum.h new file mode 100644 index 000000000000..a0b07bf1842b --- /dev/null +++ b/include/linux/tnum.h @@ -0,0 +1,79 @@ +/* tnum: tracked (or tristate) numbers + * + * A tnum tracks knowledge about the bits of a value. Each bit can be either + * known (0 or 1), or unknown (x). Arithmetic operations on tnums will + * propagate the unknown bits such that the tnum result represents all the + * possible results for possible values of the operands. + */ +#include + +struct tnum { + u64 value; + u64 mask; +}; + +/* Constructors */ +/* Represent a known constant as a tnum. */ +struct tnum tnum_const(u64 value); +/* A completely unknown value */ +extern const struct tnum tnum_unknown; + +/* Arithmetic and logical ops */ +/* Shift a tnum left (by a fixed shift) */ +struct tnum tnum_lshift(struct tnum a, u8 shift); +/* Shift a tnum right (by a fixed shift) */ +struct tnum tnum_rshift(struct tnum a, u8 shift); +/* Add two tnums, return @a + @b */ +struct tnum tnum_add(struct tnum a, struct tnum b); +/* Subtract two tnums, return @a - @b */ +struct tnum tnum_sub(struct tnum a, struct tnum b); +/* Bitwise-AND, return @a & @b */ +struct tnum tnum_and(struct tnum a, struct tnum b); +/* Bitwise-OR, return @a | @b */ +struct tnum tnum_or(struct tnum a, struct tnum b); +/* Bitwise-XOR, return @a ^ @b */ +struct tnum tnum_xor(struct tnum a, struct tnum b); +/* Multiply two tnums, return @a * @b */ +struct tnum tnum_mul(struct tnum a, struct tnum b); + +/* Return a tnum representing numbers satisfying both @a and @b */ +struct tnum tnum_intersect(struct tnum a, struct tnum b); + +/* Return @a with all but the lowest @size bytes cleared */ +struct tnum tnum_cast(struct tnum a, u8 size); + +/* Returns true if @a is a known constant */ +static inline bool tnum_is_const(struct tnum a) +{ + return !a.mask; +} + +/* Returns true if @a == tnum_const(@b) */ +static inline bool tnum_equals_const(struct tnum a, u64 b) +{ + return tnum_is_const(a) && a.value == b; +} + +/* Returns true if @a is completely unknown */ +static inline bool tnum_is_unknown(struct tnum a) +{ + return !~a.mask; +} + +/* Returns true if @a is known to be a multiple of @size. + * @size must be a power of two. + */ +bool tnum_is_aligned(struct tnum a, u64 size); + +/* Returns true if @b represents a subset of @a. */ +bool tnum_in(struct tnum a, struct tnum b); + +/* Formatting functions. These have snprintf-like semantics: they will write + * up to @size bytes (including the terminating NUL byte), and return the number + * of bytes (excluding the terminating NUL) which would have been written had + * sufficient space been available. (Thus tnum_sbin always returns 64.) + */ +/* Format a tnum as a pair of hex numbers (value; mask) */ +int tnum_strn(char *str, size_t size, struct tnum a); +/* Format a tnum as tristate binary expansion */ +int tnum_sbin(char *str, size_t size, struct tnum a); -- cgit v1.2.3 From b03c9f9fdc37dab81ea04d5dacdc5995d4c224c2 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 7 Aug 2017 15:26:36 +0100 Subject: bpf/verifier: track signed and unsigned min/max values Allows us to, sometimes, combine information from a signed check of one bound and an unsigned check of the other. We now track the full range of possible values, rather than restricting ourselves to [0, 1<<30) and considering anything beyond that as unknown. While this is probably not necessary, it makes the code more straightforward and symmetrical between signed and unsigned bounds. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 23 ++++++++++++++--------- include/linux/tnum.h | 2 ++ 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 85936fa92d12..c61c3033522e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -11,11 +11,15 @@ #include /* for MAX_BPF_STACK */ #include - /* Just some arbitrary values so we can safely do math without overflowing and - * are obviously wrong for any sort of memory access. - */ -#define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024) -#define BPF_REGISTER_MIN_RANGE -1 +/* Maximum variable offset umax_value permitted when resolving memory accesses. + * In practice this is far bigger than any realistic pointer offset; this limit + * ensures that umax_value + (int)off + (int)size cannot overflow a u64. + */ +#define BPF_MAX_VAR_OFF (1ULL << 31) +/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures + * that converting umax_value to int cannot overflow. + */ +#define BPF_MAX_VAR_SIZ INT_MAX struct bpf_reg_state { enum bpf_reg_type type; @@ -36,7 +40,7 @@ struct bpf_reg_state { * came from, when one is tested for != NULL. */ u32 id; - /* These three fields must be last. See states_equal() */ + /* These five fields must be last. See states_equal() */ /* For scalar types (SCALAR_VALUE), this represents our knowledge of * the actual value. * For pointer types, this represents the variable part of the offset @@ -49,9 +53,10 @@ struct bpf_reg_state { * These refer to the same value as var_off, not necessarily the actual * contents of the register. */ - s64 min_value; - u64 max_value; - bool value_from_signed; + s64 smin_value; /* minimum possible (s64)value */ + s64 smax_value; /* maximum possible (s64)value */ + u64 umin_value; /* minimum possible (u64)value */ + u64 umax_value; /* maximum possible (u64)value */ }; enum bpf_stack_slot_type { diff --git a/include/linux/tnum.h b/include/linux/tnum.h index a0b07bf1842b..0d2d3da46139 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -17,6 +17,8 @@ struct tnum { struct tnum tnum_const(u64 value); /* A completely unknown value */ extern const struct tnum tnum_unknown; +/* A value that's unknown except that @min <= value <= @max */ +struct tnum tnum_range(u64 min, u64 max); /* Arithmetic and logical ops */ /* Shift a tnum left (by a fixed shift) */ -- cgit v1.2.3 From feca7d8c135bc1527b244fe817b8b6498066ccec Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Tue, 8 Aug 2017 20:23:49 +0200 Subject: net: ipv6: avoid overhead when no custom FIB rules are installed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the user hasn't installed any custom rules, don't go through the whole FIB rules layer. This is pretty similar to f4530fa574df (ipv4: Avoid overhead when no custom FIB rules are installed). Using a micro-benchmark module [1], timing ip6_route_output() with get_cycles(), with 40,000 routes in the main routing table, before this patch: min=606 max=12911 count=627 average=1959 95th=4903 90th=3747 50th=1602 mad=821 table=254 avgdepth=21.8 maxdepth=39 value │ ┊ count 600 │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ 199 880 │▒▒▒░░░░░░░░░░░░░░░░ 43 1160 │▒▒▒░░░░░░░░░░░░░░░░░░░░ 48 1440 │▒▒▒░░░░░░░░░░░░░░░░░░░░░░░ 43 1720 │▒▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░ 59 2000 │▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 50 2280 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 26 2560 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 31 2840 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 28 3120 │▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 17 3400 │▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 17 3680 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 3960 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 11 4240 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 6 4520 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 6 4800 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 9 After: min=544 max=11687 count=627 average=1776 95th=4546 90th=3585 50th=1227 mad=565 table=254 avgdepth=21.8 maxdepth=39 value │ ┊ count 540 │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ 201 800 │▒▒▒▒▒░░░░░░░░░░░░░░░░ 63 1060 │▒▒▒▒▒░░░░░░░░░░░░░░░░░░░░░ 68 1320 │▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░ 39 1580 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 32 1840 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 32 2100 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 34 2360 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 33 2620 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 26 2880 │▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 22 3140 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 9 3400 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 3660 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 9 3920 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 4180 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 4440 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 At the frequency of the host during the bench (~ 3.7 GHz), this is about a 100 ns difference on the median value. A next step would be to collapse local and main tables, as in 0ddcf43d5d4a (ipv4: FIB Local/MAIN table collapse). [1]: https://github.com/vincentbernat/network-lab/blob/master/lab-routes-ipv6/kbench_mod.c Signed-off-by: Vincent Bernat Reviewed-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index abdf3b40303b..0e50bf3ed097 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -65,6 +65,7 @@ struct netns_ipv6 { unsigned int ip6_rt_gc_expire; unsigned long ip6_rt_last_gc; #ifdef CONFIG_IPV6_MULTIPLE_TABLES + bool fib6_has_custom_rules; struct rt6_info *ip6_prohibit_entry; struct rt6_info *ip6_blk_hole_entry; struct fib6_table *fib6_local_tbl; -- cgit v1.2.3 From 0a4a060bb204c47825eb4f7c27f66fc7ee85d508 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 9 Aug 2017 19:09:44 -0400 Subject: sock: fix zerocopy_success regression with msg_zerocopy Do not use uarg->zerocopy outside msg_zerocopy. In other paths the field is not explicitly initialized and aliases another field. Those paths have only one reference so do not need this intermediate variable. Call uarg->callback directly. Fixes: 1f8b977ab32d ("sock: enable MSG_ZEROCOPY") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8c0708d2e5e6..7594e19bce62 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1273,8 +1273,13 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) struct ubuf_info *uarg = skb_zcopy(skb); if (uarg) { - uarg->zerocopy = uarg->zerocopy && zerocopy; - sock_zerocopy_put(uarg); + if (uarg->callback == sock_zerocopy_callback) { + uarg->zerocopy = uarg->zerocopy && zerocopy; + sock_zerocopy_put(uarg); + } else { + uarg->callback(uarg, zerocopy); + } + skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; } } -- cgit v1.2.3 From 92b31a9af73b3a3fc801899335d6c47966351830 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 10 Aug 2017 01:39:55 +0200 Subject: bpf: add BPF_J{LT,LE,SLT,SLE} instructions Currently, eBPF only understands BPF_JGT (>), BPF_JGE (>=), BPF_JSGT (s>), BPF_JSGE (s>=) instructions, this means that particularly *JLT/*JLE counterparts involving immediates need to be rewritten from e.g. X < [IMM] by swapping arguments into [IMM] > X, meaning the immediate first is required to be loaded into a register Y := [IMM], such that then we can compare with Y > X. Note that the destination operand is always required to be a register. This has the downside of having unnecessarily increased register pressure, meaning complex program would need to spill other registers temporarily to stack in order to obtain an unused register for the [IMM]. Loading to registers will thus also affect state pruning since we need to account for that register use and potentially those registers that had to be spilled/filled again. As a consequence slightly more stack space might have been used due to spilling, and BPF programs are a bit longer due to extra code involving the register load and potentially required spill/fills. Thus, add BPF_JLT (<), BPF_JLE (<=), BPF_JSLT (s<), BPF_JSLE (s<=) counterparts to the eBPF instruction set. Modifying LLVM to remove the NegateCC() workaround in a PoC patch at [1] and allowing it to also emit the new instructions resulted in cilium's BPF programs that are injected into the fast-path to have a reduced program length in the range of 2-3% (e.g. accumulated main and tail call sections from one of the object file reduced from 4864 to 4729 insns), reduced complexity in the range of 10-30% (e.g. accumulated sections reduced in one of the cases from 116432 to 88428 insns), and reduced stack usage in the range of 1-5% (e.g. accumulated sections from one of the object files reduced from 824 to 784b). The modification for LLVM will be incorporated in a backwards compatible way. Plan is for LLVM to have i) a target specific option to offer a possibility to explicitly enable the extension by the user (as we have with -m target specific extensions today for various CPU insns), and ii) have the kernel checked for presence of the extensions and enable them transparently when the user is selecting more aggressive options such as -march=native in a bpf target context. (Other frontends generating BPF byte code, e.g. ply can probe the kernel directly for its code generation.) [1] https://github.com/borkmann/llvm/tree/bpf-insns Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1d06be1569b1..91da8371a2d0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -30,9 +30,14 @@ #define BPF_FROM_LE BPF_TO_LE #define BPF_FROM_BE BPF_TO_BE +/* jmp encodings */ #define BPF_JNE 0x50 /* jump != */ +#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ +#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ #define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ +#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ -- cgit v1.2.3 From b97bac64a589d0158cf866e8995e831030f68f4f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Aug 2017 20:41:48 +0200 Subject: rtnetlink: make rtnl_register accept a flags parameter This change allows us to later indicate to rtnetlink core that certain doit functions should be called without acquiring rtnl_mutex. This change should have no effect, we simply replace the last (now unused) calcit argument with the new flag. Signed-off-by: Florian Westphal Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index abe6b733d473..ac32460a0adb 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -7,12 +7,11 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); -typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *); int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, rtnl_calcit_func); + rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); void rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, rtnl_calcit_func); + rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); -- cgit v1.2.3 From 62256f98f244fbb1c7a10465e1ee412f209d8978 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Aug 2017 20:41:52 +0200 Subject: rtnetlink: add RTNL_FLAG_DOIT_UNLOCKED Allow callers to tell rtnetlink core that its doit callback should be invoked without holding rtnl mutex. Signed-off-by: Florian Westphal Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index ac32460a0adb..21837ca68ecc 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -8,6 +8,10 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); +enum rtnl_link_flags { + RTNL_FLAG_DOIT_UNLOCKED = 1, +}; + int __rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); void rtnl_register(int protocol, int msgtype, -- cgit v1.2.3 From 68277a2c9d32fd9090247a5c08aaf1353049c0b1 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Wed, 9 Aug 2017 14:41:16 +0200 Subject: net-next: dsa: move struct dsa_device_ops to the global header file We need to access this struct from within the flow_dissector to fix dissection for packets coming in on DSA devices. Signed-off-by: Muciri Gatimu Signed-off-by: Shashidhar Lakkavalli Signed-off-by: John Crispin Signed-off-by: David S. Miller --- include/net/dsa.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index a4f66dbb4b7c..65d7804c6f69 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -101,6 +101,13 @@ struct dsa_platform_data { struct packet_type; +struct dsa_device_ops { + struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev); +}; + struct dsa_switch_tree { struct list_head list; -- cgit v1.2.3 From 598a968011ffc4d624934995fe46d06bd450cdf4 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Wed, 9 Aug 2017 14:41:17 +0200 Subject: net-next: dsa: add flow_dissect callback to struct dsa_device_ops When the flow dissector first sees packets coming in on a DSA devices the 802.3 header wont be located where the code expects it to be as the tag is still present. Adding this new callback allows a DSA device to provide a new function that the flow_dissector can use to get the correct protocol and offset of the network header. Signed-off-by: Muciri Gatimu Signed-off-by: Shashidhar Lakkavalli Signed-off-by: John Crispin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 65d7804c6f69..7f46b521313e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -106,6 +106,8 @@ struct dsa_device_ops { struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); + int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, + int *offset); }; struct dsa_switch_tree { -- cgit v1.2.3 From 077fbac405bfc6d41419ad6c1725804ad4e9887c Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 11 Aug 2017 02:11:33 +0900 Subject: net: xfrm: support setting an output mark. On systems that use mark-based routing it may be necessary for routing lookups to use marks in order for packets to be routed correctly. An example of such a system is Android, which uses socket marks to route packets via different networks. Currently, routing lookups in tunnel mode always use a mark of zero, making routing incorrect on such systems. This patch adds a new output_mark element to the xfrm state and a corresponding XFRMA_OUTPUT_MARK netlink attribute. The output mark differs from the existing xfrm mark in two ways: 1. The xfrm mark is used to match xfrm policies and states, while the xfrm output mark is used to set the mark (and influence the routing) of the packets emitted by those states. 2. The existing mark is constrained to be a subset of the bits of the originating socket or transformed packet, but the output mark is arbitrary and depends only on the state. The use of a separate mark provides additional flexibility. For example: - A packet subject to two transforms (e.g., transport mode inside tunnel mode) can have two different output marks applied to it, one for the transport mode SA and one for the tunnel mode SA. - On a system where socket marks determine routing, the packets emitted by an IPsec tunnel can be routed based on a mark that is determined by the tunnel, not by the marks of the unencrypted packets. - Support for setting the output marks can be introduced without breaking any existing setups that employ both mark-based routing and xfrm tunnel mode. Simply changing the code to use the xfrm mark for routing output packets could xfrm mark could change behaviour in a way that breaks these setups. If the output mark is unspecified or set to zero, the mark is not set or changed. Tested: make allyesconfig; make -j64 Tested: https://android-review.googlesource.com/452776 Signed-off-by: Lorenzo Colitti Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 9 ++++++--- include/uapi/linux/xfrm.h | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 18d7de34a5c3..9c7b70cce6d6 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -165,6 +165,7 @@ struct xfrm_state { int header_len; int trailer_len; u32 extra_flags; + u32 output_mark; } props; struct xfrm_lifetime_cfg lft; @@ -298,10 +299,12 @@ struct xfrm_policy_afinfo { struct dst_entry *(*dst_lookup)(struct net *net, int tos, int oif, const xfrm_address_t *saddr, - const xfrm_address_t *daddr); + const xfrm_address_t *daddr, + u32 mark); int (*get_saddr)(struct net *net, int oif, xfrm_address_t *saddr, - xfrm_address_t *daddr); + xfrm_address_t *daddr, + u32 mark); void (*decode_session)(struct sk_buff *skb, struct flowi *fl, int reverse); @@ -1640,7 +1643,7 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, - int family); + int family, u32 mark); struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 2b384ff09fa0..5fe7370a2bef 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -304,6 +304,7 @@ enum xfrm_attr_type_t { XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ XFRMA_PAD, XFRMA_OFFLOAD_DEV, /* struct xfrm_state_offload */ + XFRMA_OUTPUT_MARK, /* __u32 */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) -- cgit v1.2.3 From afa6c45429f6e5ddd1eb6b77a36358f9c4b789da Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:45 +0800 Subject: sctp: remove the unused typedef sctp_packet_phandler_t Remove this function typedef, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index fbe6e81b889b..73e9509c057e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -657,8 +657,6 @@ struct sctp_sockaddr_entry { #define SCTP_ADDRESS_TICK_DELAY 500 -typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *); - /* This structure holds lists of chunks as we are assembling for * transmission. */ -- cgit v1.2.3 From edf903f83ebca988e04a39f515ab6eacb92055df Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:46 +0800 Subject: sctp: remove the typedef sctp_sender_hb_info_t This patch is to remove the typedef sctp_sender_hb_info_t, and replace with struct sctp_sender_hb_info in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 73e9509c057e..60033a263193 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -371,12 +371,12 @@ union sctp_params { * chunk is sent and the destination transport address to which this * HEARTBEAT is sent (see Section 8.3). */ -typedef struct sctp_sender_hb_info { +struct sctp_sender_hb_info { struct sctp_paramhdr param_hdr; union sctp_addr daddr; unsigned long sent_at; __u64 hb_nonce; -} sctp_sender_hb_info_t; +}; int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp); -- cgit v1.2.3 From 74439f344b1becd57ec822bc0e2c1a4cbf240a53 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:47 +0800 Subject: sctp: remove the typedef sctp_endpoint_type_t This patch is to remove the typedef sctp_endpoint_type_t, and replace with enum sctp_endpoint_type in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 60033a263193..937187f3bffc 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1142,10 +1142,10 @@ int sctp_is_ep_boundall(struct sock *sk); /* What type of endpoint? */ -typedef enum { +enum sctp_endpoint_type { SCTP_EP_TYPE_SOCKET, SCTP_EP_TYPE_ASSOCIATION, -} sctp_endpoint_type_t; +}; /* * A common base class to bridge the implmentation view of a @@ -1169,7 +1169,7 @@ struct sctp_ep_common { int hashent; /* Runtime type information. What kind of endpoint is this? */ - sctp_endpoint_type_t type; + enum sctp_endpoint_type type; /* Some fields to help us manage this object. * refcnt - Reference count access to this object. -- cgit v1.2.3 From a05437ac5deb100f94e290ad4c5eef3e78f4b6bb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:48 +0800 Subject: sctp: remove the typedef sctp_cmsgs_t This patch is to remove the typedef sctp_cmsgs_t, and replace with struct sctp_cmsgs in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 937187f3bffc..e171d3a3d2b4 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1985,11 +1985,11 @@ int sctp_cmp_addr_exact(const union sctp_addr *ss1, struct sctp_chunk *sctp_get_ecne_prepend(struct sctp_association *asoc); /* A convenience structure to parse out SCTP specific CMSGs. */ -typedef struct sctp_cmsgs { +struct sctp_cmsgs { struct sctp_initmsg *init; struct sctp_sndrcvinfo *srinfo; struct sctp_sndinfo *sinfo; -} sctp_cmsgs_t; +}; /* Structure for tracking memory objects */ typedef struct { -- cgit v1.2.3 From d38ef5ae35b0960f3219f1cf0203e19819e757c7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:49 +0800 Subject: sctp: remove the typedef sctp_dbg_objcnt_entry_t This patch is to remove the typedef sctp_dbg_objcnt_entry_t, and replace with struct sctp_dbg_objcnt_entry in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e171d3a3d2b4..b6d75b37f84d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1992,9 +1992,9 @@ struct sctp_cmsgs { }; /* Structure for tracking memory objects */ -typedef struct { +struct sctp_dbg_objcnt_entry { char *label; atomic_t *counter; -} sctp_dbg_objcnt_entry_t; +}; #endif /* __sctp_structs_h__ */ -- cgit v1.2.3 From b7ef2618a0bf75c1e480b05739b0c5f2a42081cd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:50 +0800 Subject: sctp: remove the typedef sctp_socket_type_t This patch is to remove the typedef sctp_socket_type_t, and replace with enum sctp_socket_type in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 3 ++- include/net/sctp/structs.h | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 24ff7931d38c..06b4f515e157 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -550,7 +550,8 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport) /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) -static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style) +static inline int __sctp_style(const struct sock *sk, + enum sctp_socket_type style) { return sctp_sk(sk)->type == style; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index b6d75b37f84d..0477945de1a3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -150,18 +150,18 @@ extern struct sctp_globals { #define sctp_checksum_disable (sctp_globals.checksum_disable) /* SCTP Socket type: UDP or TCP style. */ -typedef enum { +enum sctp_socket_type { SCTP_SOCKET_UDP = 0, SCTP_SOCKET_UDP_HIGH_BANDWIDTH, SCTP_SOCKET_TCP -} sctp_socket_type_t; +}; /* Per socket SCTP information. */ struct sctp_sock { /* inet_sock has to be the first member of sctp_sock */ struct inet_sock inet; /* What kind of a socket is this? */ - sctp_socket_type_t type; + enum sctp_socket_type type; /* PF_ family specific functions. */ struct sctp_pf *pf; -- cgit v1.2.3 From e2c3108ab25b4dbab3821e8b6084bfb73afb655c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:51 +0800 Subject: sctp: remove the typedef sctp_cmd_t This patch is to remove the typedef sctp_cmd_t, and replace with enum sctp_cmd in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 376cb78b6247..4e9e589b8f18 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -196,15 +196,15 @@ static inline sctp_arg_t SCTP_NULL(void) return retval; } -typedef struct { +struct sctp_cmd { sctp_arg_t obj; sctp_verb_t verb; -} sctp_cmd_t; +}; typedef struct { - sctp_cmd_t cmds[SCTP_MAX_NUM_COMMANDS]; - sctp_cmd_t *last_used_slot; - sctp_cmd_t *next_cmd; + struct sctp_cmd cmds[SCTP_MAX_NUM_COMMANDS]; + struct sctp_cmd *last_used_slot; + struct sctp_cmd *next_cmd; } sctp_cmd_seq_t; @@ -228,7 +228,7 @@ static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq) static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, sctp_arg_t obj) { - sctp_cmd_t *cmd = seq->last_used_slot - 1; + struct sctp_cmd *cmd = seq->last_used_slot - 1; BUG_ON(cmd < seq->cmds); @@ -240,7 +240,7 @@ static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, /* Return the next command structure in an sctp_cmd_seq. * Return NULL at the end of the sequence. */ -static inline sctp_cmd_t *sctp_next_cmd(sctp_cmd_seq_t *seq) +static inline struct sctp_cmd *sctp_next_cmd(sctp_cmd_seq_t *seq) { if (seq->next_cmd <= seq->last_used_slot) return NULL; -- cgit v1.2.3 From a85bbeb221d860097859f110ba1321f2b0653f07 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:52 +0800 Subject: sctp: remove the typedef sctp_cmd_seq_t This patch is to remove the typedef sctp_cmd_seq_t, and replace with struct sctp_cmd_seq in the places where it's using this typedef. Note that it doesn't fix many indents although it should, as sctp_disposition_t's removal would mess them up again. So better to fix them when removing sctp_disposition_t in the later patch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 14 +++++++------- include/net/sctp/sm.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 4e9e589b8f18..cbf6798866ef 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -110,7 +110,7 @@ typedef enum { SCTP_CMD_LAST } sctp_verb_t; -/* How many commands can you put in an sctp_cmd_seq_t? +/* How many commands can you put in an struct sctp_cmd_seq? * This is a rather arbitrary number, ideally derived from a careful * analysis of the state functions, but in reality just taken from * thin air in the hopes othat we don't trigger a kernel panic. @@ -201,17 +201,17 @@ struct sctp_cmd { sctp_verb_t verb; }; -typedef struct { +struct sctp_cmd_seq { struct sctp_cmd cmds[SCTP_MAX_NUM_COMMANDS]; struct sctp_cmd *last_used_slot; struct sctp_cmd *next_cmd; -} sctp_cmd_seq_t; +}; /* Initialize a block of memory as a command sequence. * Return 0 if the initialization fails. */ -static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq) +static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq) { /* cmds[] is filled backwards to simplify the overflow BUG() check */ seq->last_used_slot = seq->cmds + SCTP_MAX_NUM_COMMANDS; @@ -220,12 +220,12 @@ static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq) } -/* Add a command to an sctp_cmd_seq_t. +/* Add a command to an struct sctp_cmd_seq. * * Use the SCTP_* constructors defined by SCTP_ARG_CONSTRUCTOR() above * to wrap data which goes in the obj argument. */ -static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, +static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, sctp_verb_t verb, sctp_arg_t obj) { struct sctp_cmd *cmd = seq->last_used_slot - 1; @@ -240,7 +240,7 @@ static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, /* Return the next command structure in an sctp_cmd_seq. * Return NULL at the end of the sequence. */ -static inline struct sctp_cmd *sctp_next_cmd(sctp_cmd_seq_t *seq) +static inline struct sctp_cmd *sctp_next_cmd(struct sctp_cmd_seq *seq) { if (seq->next_cmd <= seq->last_used_slot) return NULL; diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 1e7651c3b158..9af64b98d96b 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -75,7 +75,7 @@ typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, const struct sctp_association *, const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *); + struct sctp_cmd_seq *); typedef void (sctp_timer_event_t) (unsigned long); typedef struct { sctp_state_fn_t *fn; -- cgit v1.2.3 From c488b7704ed0eed18e11f9b685931558735f2a68 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:53 +0800 Subject: sctp: remove the typedef sctp_arg_t This patch is to remove the typedef sctp_arg_t, and replace with union sctp_arg in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index cbf6798866ef..f5fc425b5a4f 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -117,7 +117,7 @@ typedef enum { */ #define SCTP_MAX_NUM_COMMANDS 20 -typedef union { +union sctp_arg { void *zero_all; /* Set to NULL to clear the entire union */ __s32 i32; __u32 u32; @@ -137,24 +137,24 @@ typedef union { struct sctp_packet *packet; struct sctp_sackhdr *sackh; struct sctp_datamsg *msg; -} sctp_arg_t; +}; /* We are simulating ML type constructors here. * * SCTP_ARG_CONSTRUCTOR(NAME, TYPE, ELT) builds a function called * SCTP_NAME() which takes an argument of type TYPE and returns an - * sctp_arg_t. It does this by inserting the sole argument into the - * ELT union element of a local sctp_arg_t. + * union sctp_arg. It does this by inserting the sole argument into + * the ELT union element of a local union sctp_arg. * * E.g., SCTP_ARG_CONSTRUCTOR(I32, __s32, i32) builds SCTP_I32(arg), - * which takes an __s32 and returns a sctp_arg_t containing the + * which takes an __s32 and returns a union sctp_arg containing the * __s32. So, after foo = SCTP_I32(arg), foo.i32 == arg. */ #define SCTP_ARG_CONSTRUCTOR(name, type, elt) \ -static inline sctp_arg_t \ +static inline union sctp_arg \ SCTP_## name (type arg) \ -{ sctp_arg_t retval;\ +{ union sctp_arg retval;\ retval.zero_all = NULL;\ retval.elt = arg;\ return retval;\ @@ -179,25 +179,25 @@ SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet) SCTP_ARG_CONSTRUCTOR(SACKH, struct sctp_sackhdr *, sackh) SCTP_ARG_CONSTRUCTOR(DATAMSG, struct sctp_datamsg *, msg) -static inline sctp_arg_t SCTP_FORCE(void) +static inline union sctp_arg SCTP_FORCE(void) { return SCTP_I32(1); } -static inline sctp_arg_t SCTP_NOFORCE(void) +static inline union sctp_arg SCTP_NOFORCE(void) { return SCTP_I32(0); } -static inline sctp_arg_t SCTP_NULL(void) +static inline union sctp_arg SCTP_NULL(void) { - sctp_arg_t retval; + union sctp_arg retval; retval.zero_all = NULL; return retval; } struct sctp_cmd { - sctp_arg_t obj; + union sctp_arg obj; sctp_verb_t verb; }; @@ -226,7 +226,7 @@ static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq) * to wrap data which goes in the obj argument. */ static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, sctp_verb_t verb, - sctp_arg_t obj) + union sctp_arg obj) { struct sctp_cmd *cmd = seq->last_used_slot - 1; -- cgit v1.2.3 From e08af95df1130883762b388a19bb150ae5d16c09 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:54 +0800 Subject: sctp: remove the typedef sctp_verb_t This patch is to remove the typedef sctp_verb_t, and replace with enum sctp_verb in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index f5fc425b5a4f..b55c6a48a206 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -40,7 +40,7 @@ #include -typedef enum { +enum sctp_verb { SCTP_CMD_NOP = 0, /* Do nothing. */ SCTP_CMD_NEW_ASOC, /* Register a new association. */ SCTP_CMD_DELETE_TCB, /* Delete the current association. */ @@ -108,7 +108,7 @@ typedef enum { SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/ SCTP_CMD_SET_ASOC, /* Restore association context */ SCTP_CMD_LAST -} sctp_verb_t; +}; /* How many commands can you put in an struct sctp_cmd_seq? * This is a rather arbitrary number, ideally derived from a careful @@ -198,7 +198,7 @@ static inline union sctp_arg SCTP_NULL(void) struct sctp_cmd { union sctp_arg obj; - sctp_verb_t verb; + enum sctp_verb verb; }; struct sctp_cmd_seq { @@ -225,8 +225,8 @@ static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq) * Use the SCTP_* constructors defined by SCTP_ARG_CONSTRUCTOR() above * to wrap data which goes in the obj argument. */ -static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, sctp_verb_t verb, - union sctp_arg obj) +static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, + enum sctp_verb verb, union sctp_arg obj) { struct sctp_cmd *cmd = seq->last_used_slot - 1; -- cgit v1.2.3 From eb662a6a9b2a4ee3c76bbfc4c23f4dc56859b0f3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:55 +0800 Subject: sctp: remove the unused typedef sctp_sm_command_t Remove this typedef including the struct, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 9af64b98d96b..3ca75a6c4b5e 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -65,11 +65,6 @@ typedef enum { SCTP_DISPOSITION_BUG, /* This is a bug. */ } sctp_disposition_t; -typedef struct { - int name; - int action; -} sctp_sm_command_t; - typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, const struct sctp_endpoint *, const struct sctp_association *, -- cgit v1.2.3 From 8ee821aea39c6bf4142c9319adecea6d3e1af4a2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:56 +0800 Subject: sctp: remove the typedef sctp_sm_table_entry_t This patch is to remove the typedef sctp_sm_table_entry_t, and replace with struct sctp_sm_table_entry in the places where it's using this typedef. It is also to fix some indents. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 3ca75a6c4b5e..7ad240228a0f 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -72,10 +72,10 @@ typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, void *arg, struct sctp_cmd_seq *); typedef void (sctp_timer_event_t) (unsigned long); -typedef struct { +struct sctp_sm_table_entry { sctp_state_fn_t *fn; const char *name; -} sctp_sm_table_entry_t; +}; /* A naming convention of "sctp_sf_xxx" applies to all the state functions * currently in use. @@ -170,7 +170,7 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); -const sctp_sm_table_entry_t *sctp_sm_lookup_event( +const struct sctp_sm_table_entry *sctp_sm_lookup_event( struct net *net, enum sctp_event event_type, enum sctp_state state, -- cgit v1.2.3 From 172a1599ba88df7147f6503a75686fb89c8a1f3f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:57 +0800 Subject: sctp: remove the typedef sctp_disposition_t This patch is to remove the typedef sctp_disposition_t, and replace with enum sctp_disposition in the places where it's using this typedef. It's also to fix the indent for many functions' defination. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 7ad240228a0f..33077f317995 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -53,7 +53,7 @@ /* * Possible values for the disposition are: */ -typedef enum { +enum sctp_disposition { SCTP_DISPOSITION_DISCARD, /* No further processing. */ SCTP_DISPOSITION_CONSUME, /* Process return values normally. */ SCTP_DISPOSITION_NOMEM, /* We ran out of memory--recover. */ @@ -63,14 +63,15 @@ typedef enum { SCTP_DISPOSITION_NOT_IMPL, /* This entry is not implemented. */ SCTP_DISPOSITION_ERROR, /* This is plain old user error. */ SCTP_DISPOSITION_BUG, /* This is a bug. */ -} sctp_disposition_t; - -typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, - const struct sctp_endpoint *, - const struct sctp_association *, - const union sctp_subtype type, - void *arg, - struct sctp_cmd_seq *); +}; + +typedef enum sctp_disposition (sctp_state_fn_t) ( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands); typedef void (sctp_timer_event_t) (unsigned long); struct sctp_sm_table_entry { sctp_state_fn_t *fn; -- cgit v1.2.3 From 327c0dab8d1301cd866816de8c7f32eb872cabac Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:58 +0800 Subject: sctp: fix some indents in sm_make_chunk.c There are some bad indents of functions' defination in sm_make_chunk.c. They have been there since beginning, it was probably caused by that the typedef sctp_chunk_t was replaced with struct sctp_chunk. So it's the best time to fix them in this patchset, it's also to fix some bad indents in other functions' defination in sm_make_chunk.c. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 158 +++++++++++++++++++++++++------------------------- 1 file changed, 79 insertions(+), 79 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 33077f317995..2db3d3a9ce1d 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -184,68 +184,69 @@ __u32 sctp_generate_verification_tag(void); void sctp_populate_tie_tags(__u8 *cookie, __u32 curTag, __u32 hisTag); /* Prototypes for chunk-building functions. */ -struct sctp_chunk *sctp_make_init(const struct sctp_association *, - const struct sctp_bind_addr *, - gfp_t gfp, int vparam_len); -struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *, - const struct sctp_chunk *, - const gfp_t gfp, - const int unkparam_len); -struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *, - const struct sctp_chunk *); -struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *, - const struct sctp_chunk *); -struct sctp_chunk *sctp_make_cwr(const struct sctp_association *, +struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, + const struct sctp_bind_addr *bp, + gfp_t gfp, int vparam_len); +struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const gfp_t gfp, const int unkparam_len); +struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc, + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc, + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc, const __u32 lowest_tsn, - const struct sctp_chunk *); -struct sctp_chunk * sctp_make_datafrag_empty(struct sctp_association *, - const struct sctp_sndrcvinfo *sinfo, - int len, const __u8 flags, - __u16 ssn, gfp_t gfp); -struct sctp_chunk *sctp_make_ecne(const struct sctp_association *, - const __u32); -struct sctp_chunk *sctp_make_sack(const struct sctp_association *); + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, + const struct sctp_sndrcvinfo *sinfo, + int len, const __u8 flags, + __u16 ssn, gfp_t gfp); +struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, + const __u32 lowest_tsn); +struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc); struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc, - const struct sctp_chunk *); -struct sctp_chunk *sctp_make_shutdown_complete(const struct sctp_association *, - const struct sctp_chunk *); -void sctp_init_cause(struct sctp_chunk *, __be16 cause, size_t); -struct sctp_chunk *sctp_make_abort(const struct sctp_association *, - const struct sctp_chunk *, - const size_t hint); -struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *, - const struct sctp_chunk *, - __u32 tsn); -struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *, - struct msghdr *, size_t msg_len); -struct sctp_chunk *sctp_make_abort_violation(const struct sctp_association *, - const struct sctp_chunk *, - const __u8 *, - const size_t ); -struct sctp_chunk *sctp_make_violation_paramlen(const struct sctp_association *, - const struct sctp_chunk *, - struct sctp_paramhdr *); -struct sctp_chunk *sctp_make_violation_max_retrans(const struct sctp_association *, - const struct sctp_chunk *); -struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *, - const struct sctp_transport *); -struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *, - const struct sctp_chunk *, - const void *payload, - const size_t paylen); -struct sctp_chunk *sctp_make_op_error(const struct sctp_association *, - const struct sctp_chunk *chunk, - __be16 cause_code, - const void *payload, - size_t paylen, - size_t reserve_tail); - -struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *, - union sctp_addr *, - struct sockaddr *, - int, __be16); + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_shutdown_complete( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk); +void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen); +struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const size_t hint); +struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + __u32 tsn); +struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc, + struct msghdr *msg, size_t msg_len); +struct sctp_chunk *sctp_make_abort_violation( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const __u8 *payload, + const size_t paylen); +struct sctp_chunk *sctp_make_violation_paramlen( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + struct sctp_paramhdr *param); +struct sctp_chunk *sctp_make_violation_max_retrans( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, + const struct sctp_transport *transport); +struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const void *payload, + const size_t paylen); +struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + __be16 cause_code, const void *payload, + size_t paylen, size_t reserve_tail); + +struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, + union sctp_addr *laddr, + struct sockaddr *addrs, + int addrcnt, __be16 flags); struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc, union sctp_addr *addr); bool sctp_verify_asconf(const struct sctp_association *asoc, @@ -259,27 +260,25 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, __u32 new_cum_tsn, size_t nstreams, struct sctp_fwdtsn_skip *skiplist); struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc); -struct sctp_chunk *sctp_make_strreset_req( - const struct sctp_association *asoc, - __u16 stream_num, __u16 *stream_list, - bool out, bool in); +struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc, + __u16 stream_num, __u16 *stream_list, + bool out, bool in); struct sctp_chunk *sctp_make_strreset_tsnreq( - const struct sctp_association *asoc); + const struct sctp_association *asoc); struct sctp_chunk *sctp_make_strreset_addstrm( - const struct sctp_association *asoc, - __u16 out, __u16 in); -struct sctp_chunk *sctp_make_strreset_resp( - const struct sctp_association *asoc, - __u32 result, __u32 sn); -struct sctp_chunk *sctp_make_strreset_tsnresp( - struct sctp_association *asoc, - __u32 result, __u32 sn, - __u32 sender_tsn, __u32 receiver_tsn); + const struct sctp_association *asoc, + __u16 out, __u16 in); +struct sctp_chunk *sctp_make_strreset_resp(const struct sctp_association *asoc, + __u32 result, __u32 sn); +struct sctp_chunk *sctp_make_strreset_tsnresp(struct sctp_association *asoc, + __u32 result, __u32 sn, + __u32 sender_tsn, + __u32 receiver_tsn); bool sctp_verify_reconf(const struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_paramhdr **errp); -void sctp_chunk_assign_tsn(struct sctp_chunk *); -void sctp_chunk_assign_ssn(struct sctp_chunk *); +void sctp_chunk_assign_tsn(struct sctp_chunk *chunk); +void sctp_chunk_assign_ssn(struct sctp_chunk *chunk); /* Prototypes for stream-processing functions. */ struct sctp_chunk *sctp_process_strreset_outreq( @@ -322,11 +321,12 @@ void sctp_generate_proto_unreach_event(unsigned long peer); void sctp_ootb_pkt_free(struct sctp_packet *packet); -struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - struct sctp_chunk *chunk, - gfp_t gfp, int *err, - struct sctp_chunk **err_chk_p); +struct sctp_association *sctp_unpack_cookie( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + struct sctp_chunk *chunk, + gfp_t gfp, int *err, + struct sctp_chunk **err_chk_p); /* 3rd level prototypes */ __u32 sctp_generate_tag(const struct sctp_endpoint *ep); -- cgit v1.2.3 From 861932ecc36063196c80ca3e240502b0f6d0e977 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 9 Aug 2017 14:30:31 +0200 Subject: net: sched: Add helpers to identify classids Offloading drivers need to understand what qdisc class a filter is added to. Currently they only need to identify ingress, clsact->ingress and clsact->egress. So provide these helpers. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2579c209ea51..259bc191ba59 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -5,6 +5,7 @@ #include #include #include +#include #define DEFAULT_TX_QUEUE_LEN 1000 @@ -132,4 +133,17 @@ static inline unsigned int psched_mtu(const struct net_device *dev) return dev->mtu + dev->hard_header_len; } +static inline bool is_classid_clsact_ingress(u32 classid) +{ + /* This also returns true for ingress qdisc */ + return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) && + TC_H_MIN(classid) != TC_H_MIN(TC_H_MIN_EGRESS); +} + +static inline bool is_classid_clsact_egress(u32 classid) +{ + return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) && + TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS); +} + #endif -- cgit v1.2.3 From 7690f2a51d8afe51ac97e7fae66b081f192a7158 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 9 Aug 2017 14:30:32 +0200 Subject: net: sched: propagate classid down to offload drivers Drivers need classid to decide they support this specific qdisc+class or not. So propagate it down via the tc_cls_common_offload struct. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0f78e6560b2d..1f1de20e584f 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -410,6 +410,7 @@ struct tc_cls_common_offload { u32 chain_index; __be16 protocol; u32 prio; + u32 classid; }; static inline void @@ -420,6 +421,7 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; cls_common->prio = tp->prio; + cls_common->classid = tp->classid; } struct tc_cls_u32_knode { -- cgit v1.2.3 From 237f79d24ebe1eb9b5651b7342ba5cc9d9b8f222 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 9 Aug 2017 14:30:34 +0200 Subject: net: sched: remove handle propagation down to the drivers There is no longer need to use handle in drivers, so remove it from tc_cls_common_offload struct. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 1f1de20e584f..bd9dd79357fe 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -406,7 +406,6 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) #endif /* CONFIG_NET_CLS_IND */ struct tc_cls_common_offload { - u32 handle; u32 chain_index; __be16 protocol; u32 prio; @@ -417,7 +416,6 @@ static inline void tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, const struct tcf_proto *tp) { - cls_common->handle = tp->q->handle; cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; cls_common->prio = tp->prio; -- cgit v1.2.3 From 7b06e8aed283081010596c98a67f06c595affe51 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 9 Aug 2017 14:30:35 +0200 Subject: net: sched: remove cops->tcf_cl_offload cops->tcf_cl_offload is no longer needed, as the drivers check what they can and cannot offload using the classid identify helpers. So remove this. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 14 +++----------- include/net/sch_generic.h | 1 - 2 files changed, 3 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index bd9dd79357fe..e80edd8879ef 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -457,19 +457,12 @@ struct tc_cls_u32_offload { }; }; -static inline bool tc_can_offload(const struct net_device *dev, - const struct tcf_proto *tp) +static inline bool tc_can_offload(const struct net_device *dev) { - const struct Qdisc *sch = tp->q; - const struct Qdisc_class_ops *cops = sch->ops->cl_ops; - if (!(dev->features & NETIF_F_HW_TC)) return false; if (!dev->netdev_ops->ndo_setup_tc) return false; - if (cops && cops->tcf_cl_offload) - return cops->tcf_cl_offload(tp->classid); - return true; } @@ -478,12 +471,11 @@ static inline bool tc_skip_hw(u32 flags) return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false; } -static inline bool tc_should_offload(const struct net_device *dev, - const struct tcf_proto *tp, u32 flags) +static inline bool tc_should_offload(const struct net_device *dev, u32 flags) { if (tc_skip_hw(flags)) return false; - return tc_can_offload(dev, tp); + return tc_can_offload(dev); } static inline bool tc_skip_sw(u32 flags) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e79f5ad1c5f3..5865db91976b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -156,7 +156,6 @@ struct Qdisc_class_ops { /* Filter manipulation */ struct tcf_block * (*tcf_block)(struct Qdisc *, unsigned long); - bool (*tcf_cl_offload)(u32 classid); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); void (*unbind_tcf)(struct Qdisc *, unsigned long); -- cgit v1.2.3 From 7c4974786f4794178f04e96318fc3b2f2850cbc6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 11 Aug 2017 19:41:17 +0800 Subject: net: export some generic xdp helpers This patch tries to export some generic xdp helpers to drivers. This can let driver to do XDP for a specific skb. This is useful for the case when the packet is hard to be processed at page level directly (e.g jumbo/GSO frame). With this patch, there's no need for driver to forbid the XDP set when configuration is not suitable. Instead, it can defer the XDP for packets that is hard to be processed directly after skb is created. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1d238d54c484..0f1c4cb2441e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3243,6 +3243,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) __dev_kfree_skb_any(skb, SKB_REASON_CONSUMED); } +void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int netif_rx(struct sk_buff *skb); int netif_rx_ni(struct sk_buff *skb); int netif_receive_skb(struct sk_buff *skb); -- cgit v1.2.3 From dc503a8ad98474ea0073a1c5c4d9f18cb8dd0dbf Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 15 Aug 2017 20:34:35 +0100 Subject: bpf/verifier: track liveness for pruning State of a register doesn't matter if it wasn't read in reaching an exit; a write screens off all reads downstream of it from all explored_states upstream of it. This allows us to prune many more branches; here are some processed insn counts for some Cilium programs: Program before after bpf_lb_opt_-DLB_L3.o 6515 3361 bpf_lb_opt_-DLB_L4.o 8976 5176 bpf_lb_opt_-DUNKNOWN.o 2960 1137 bpf_lxc_opt_-DDROP_ALL.o 95412 48537 bpf_lxc_opt_-DUNKNOWN.o 141706 78718 bpf_netdev.o 24251 17995 bpf_overlay.o 10999 9385 The runtime is also improved; here are 'time' results in ms: Program before after bpf_lb_opt_-DLB_L3.o 24 6 bpf_lb_opt_-DLB_L4.o 26 11 bpf_lb_opt_-DUNKNOWN.o 11 2 bpf_lxc_opt_-DDROP_ALL.o 1288 139 bpf_lxc_opt_-DUNKNOWN.o 1768 234 bpf_netdev.o 62 31 bpf_overlay.o 15 13 Signed-off-by: Edward Cree Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c61c3033522e..91d07efed2ba 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -21,6 +21,12 @@ */ #define BPF_MAX_VAR_SIZ INT_MAX +enum bpf_reg_liveness { + REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */ + REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */ + REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */ +}; + struct bpf_reg_state { enum bpf_reg_type type; union { @@ -40,7 +46,7 @@ struct bpf_reg_state { * came from, when one is tested for != NULL. */ u32 id; - /* These five fields must be last. See states_equal() */ + /* Ordering of fields matters. See states_equal() */ /* For scalar types (SCALAR_VALUE), this represents our knowledge of * the actual value. * For pointer types, this represents the variable part of the offset @@ -57,6 +63,8 @@ struct bpf_reg_state { s64 smax_value; /* maximum possible (s64)value */ u64 umin_value; /* minimum possible (u64)value */ u64 umax_value; /* maximum possible (u64)value */ + /* This field must be last, for states_equal() reasons. */ + enum bpf_reg_liveness live; }; enum bpf_stack_slot_type { @@ -74,6 +82,7 @@ struct bpf_verifier_state { struct bpf_reg_state regs[MAX_BPF_REG]; u8 stack_slot_type[MAX_BPF_STACK]; struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; + struct bpf_verifier_state *parent; }; /* linked list of verifier states used to prune search */ -- cgit v1.2.3 From fe4007999599c02598c17b643e8de43e487d48e8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 15 Aug 2017 09:09:49 +0200 Subject: ipv6: fib: Provide offload indication using nexthop flags IPv6 routes currently lack nexthop flags as in IPv4. This has several implications. In the forwarding path, it requires us to check the carrier state of the nexthop device and potentially ignore a linkdown route, instead of checking for RTNH_F_LINKDOWN. It also requires capable drivers to use the user facing IPv6-specific route flags to provide offload indication, instead of using the nexthop flags as in IPv4. Add nexthop flags to IPv6 routes in the 40 bytes hole and use it to provide offload indication instead of the RTF_OFFLOAD flag, which is removed while it's still not part of any official kernel release. In the near future we would like to use the field for the RTNH_F_{LINKDOWN,DEAD} flags, but this change is more involved and might not be ready in time for the current cycle. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ include/uapi/linux/ipv6_route.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1d790ea40ea7..71c1646298ae 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -120,6 +120,8 @@ struct rt6_info { atomic_t rt6i_ref; + unsigned int rt6i_nh_flags; + /* These are in a separate cache line. */ struct rt6key rt6i_dst ____cacheline_aligned_in_smp; u32 rt6i_flags; diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h index 33e2a5732bd1..d496c02e14bc 100644 --- a/include/uapi/linux/ipv6_route.h +++ b/include/uapi/linux/ipv6_route.h @@ -35,7 +35,6 @@ #define RTF_PREF(pref) ((pref) << 27) #define RTF_PREF_MASK 0x18000000 -#define RTF_OFFLOAD 0x20000000 /* offloaded route */ #define RTF_PCPU 0x40000000 /* read-only: can not be set by user */ #define RTF_LOCAL 0x80000000 -- cgit v1.2.3 From b005fd189cec9407b700599e1e80e0552446ee79 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 15 Aug 2017 22:31:58 -0700 Subject: bpf: introduce new program type for skbs on sockets A class of programs, run from strparser and soon from a new map type called sock map, are used with skb as the context but on established sockets. By creating a specific program type for these we can use bpf helpers that expect full sockets and get the verifier to ensure these helpers are not used out of context. The new type is BPF_PROG_TYPE_SK_SKB. This patch introduces the infrastructure and type. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index b1e1035ca24b..4b72db30dacf 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -11,6 +11,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb_prog_ops) #endif #ifdef CONFIG_BPF_EVENTS BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 91da8371a2d0..2e796e384aeb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -127,6 +127,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_OUT, BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, + BPF_PROG_TYPE_SK_SKB, }; enum bpf_attach_type { -- cgit v1.2.3 From a6f6df69c48b86cd84f36c70593eb4968fceb34a Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 15 Aug 2017 22:32:22 -0700 Subject: bpf: export bpf_prog_inc_not_zero bpf_prog_inc_not_zero will be used by upcoming sockmap patches this patch simply exports it so we can pull it in. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 39229c455cba..d6e1de8ce0fc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -252,6 +252,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); void bpf_prog_sub(struct bpf_prog *prog, int i); struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); +struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); int __bpf_prog_charge(struct user_struct *user, u32 pages); void __bpf_prog_uncharge(struct user_struct *user, u32 pages); @@ -344,6 +345,12 @@ static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog) return ERR_PTR(-EOPNOTSUPP); } +static inline struct bpf_prog *__must_check +bpf_prog_inc_not_zero(struct bpf_prog *prog) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline int __bpf_prog_charge(struct user_struct *user, u32 pages) { return 0; -- cgit v1.2.3 From 174a79ff9515f400b9a6115643dafd62a635b7e6 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 15 Aug 2017 22:32:47 -0700 Subject: bpf: sockmap with sk redirect support Recently we added a new map type called dev map used to forward XDP packets between ports (6093ec2dc313). This patches introduces a similar notion for sockets. A sockmap allows users to add participating sockets to a map. When sockets are added to the map enough context is stored with the map entry to use the entry with a new helper bpf_sk_redirect_map(map, key, flags) This helper (analogous to bpf_redirect_map in XDP) is given the map and an entry in the map. When called from a sockmap program, discussed below, the skb will be sent on the socket using skb_send_sock(). With the above we need a bpf program to call the helper from that will then implement the send logic. The initial site implemented in this series is the recv_sock hook. For this to work we implemented a map attach command to add attributes to a map. In sockmap we add two programs a parse program and a verdict program. The parse program uses strparser to build messages and pass them to the verdict program. The parse programs use the normal strparser semantics. The verdict program is of type SK_SKB. The verdict program returns a verdict SK_DROP, or SK_REDIRECT for now. Additional actions may be added later. When SK_REDIRECT is returned, expected when bpf program uses bpf_sk_redirect_map(), the sockmap logic will consult per cpu variables set by the helper routine and pull the sock entry out of the sock map. This pattern follows the existing redirect logic in cls and xdp programs. This gives the flow, recv_sock -> str_parser (parse_prog) -> verdict_prog -> skb_send_sock \ -> kfree_skb As an example use case a message based load balancer may use specific logic in the verdict program to select the sock to send on. Sample programs are provided in future patches that hopefully illustrate the user interfaces. Also selftests are in follow-on patches. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 +++++-- include/linux/bpf_types.h | 1 + include/linux/filter.h | 2 ++ include/uapi/linux/bpf.h | 33 ++++++++++++++++++++++++++++++++- 4 files changed, 40 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d6e1de8ce0fc..a4145e9c74b5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -16,6 +16,7 @@ #include struct perf_event; +struct bpf_prog; struct bpf_map; /* map is generic key/value storage optionally accesible by eBPF programs */ @@ -37,6 +38,8 @@ struct bpf_map_ops { void (*map_fd_put_ptr)(void *ptr); u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); + int (*map_attach)(struct bpf_map *map, + struct bpf_prog *p1, struct bpf_prog *p2); }; struct bpf_map { @@ -138,8 +141,6 @@ enum bpf_reg_type { PTR_TO_PACKET_END, /* skb->data + headlen */ }; -struct bpf_prog; - /* The information passed from prog-specific *_is_valid_access * back to the verifier. */ @@ -312,6 +313,7 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); /* Map specifics */ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); +struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); @@ -391,6 +393,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_skb_vlan_push_proto; extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; +extern const struct bpf_func_proto bpf_sock_map_update_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 4b72db30dacf..fa805074d168 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -38,4 +38,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) #ifdef CONFIG_NET BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) #endif diff --git a/include/linux/filter.h b/include/linux/filter.h index d19ed3c15e1e..7015116331af 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -727,6 +727,8 @@ void xdp_do_flush_map(void); void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_redirect(u32 ifindex); +struct sock *do_sk_redirect_map(void); + #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; extern int bpf_jit_harden; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2e796e384aeb..7f774769e3f5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -110,6 +110,7 @@ enum bpf_map_type { BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_DEVMAP, + BPF_MAP_TYPE_SOCKMAP, }; enum bpf_prog_type { @@ -135,11 +136,15 @@ enum bpf_attach_type { BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, BPF_CGROUP_SOCK_OPS, + BPF_CGROUP_SMAP_INGRESS, __MAX_BPF_ATTACH_TYPE }; #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */ +#define BPF_SOCKMAP_STRPARSER (1U << 0) + /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command * to the given target_fd cgroup the descendent cgroup will be able to * override effective bpf program that was inherited from this cgroup @@ -211,6 +216,7 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; + __u32 attach_bpf_fd2; }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ @@ -557,6 +563,23 @@ union bpf_attr { * @mode: operation mode (enum bpf_adj_room_mode) * @flags: reserved for future use * Return: 0 on success or negative error code + * + * int bpf_sk_redirect_map(map, key, flags) + * Redirect skb to a sock in map using key as a lookup key for the + * sock in map. + * @map: pointer to sockmap + * @key: key to lookup sock in map + * @flags: reserved for future use + * Return: SK_REDIRECT + * + * int bpf_sock_map_update(skops, map, key, flags, map_flags) + * @skops: pointer to bpf_sock_ops + * @map: pointer to sockmap to update + * @key: key to insert/update sock in map + * @flags: same flags as map update elem + * @map_flags: sock map specific flags + * bit 1: Enable strparser + * other bits: reserved */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -610,7 +633,9 @@ union bpf_attr { FN(set_hash), \ FN(setsockopt), \ FN(skb_adjust_room), \ - FN(redirect_map), + FN(redirect_map), \ + FN(sk_redirect_map), \ + FN(sock_map_update), \ /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -747,6 +772,12 @@ struct xdp_md { __u32 data_end; }; +enum sk_action { + SK_ABORTED = 0, + SK_DROP, + SK_REDIRECT, +}; + #define BPF_TAG_SIZE 8 struct bpf_prog_info { -- cgit v1.2.3 From 8a31db5615667956c513d205cfb06885c3ec6d0b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 15 Aug 2017 22:33:09 -0700 Subject: bpf: add access to sock fields and pkt data from sk_skb programs Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7f774769e3f5..5ecbe812a2cc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -712,6 +712,15 @@ struct __sk_buff { __u32 data; __u32 data_end; __u32 napi_id; + + /* accessed by BPF_PROG_TYPE_sk_skb types */ + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ }; struct bpf_tunnel_key { -- cgit v1.2.3 From e543002f77f463501d47fab43acf7ba881e9dcaf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 15 Aug 2017 21:11:03 +0200 Subject: qdisc: add tracepoint qdisc:qdisc_dequeue for dequeued SKBs The main purpose of this tracepoint is to monitor bulk dequeue in the network qdisc layer, as it cannot be deducted from the existing qdisc stats. The txq_state can be used for determining the reason for zero packet dequeues, see enum netdev_queue_state_t. Notice all packets doesn't necessary activate this tracepoint. As qdiscs with flag TCQ_F_CAN_BYPASS, can directly invoke sch_direct_xmit() when qdisc_qlen is zero. Remember that perf record supports filters like: perf record -e qdisc:qdisc_dequeue \ --filter 'ifindex == 4 && (packets > 1 || txq_state > 0)' Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/qdisc.h | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 include/trace/events/qdisc.h (limited to 'include') diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h new file mode 100644 index 000000000000..60d0d8bd336d --- /dev/null +++ b/include/trace/events/qdisc.h @@ -0,0 +1,50 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM qdisc + +#if !defined(_TRACE_QDISC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_QDISC_H_ + +#include +#include +#include +#include + +TRACE_EVENT(qdisc_dequeue, + + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, + int packets, struct sk_buff *skb), + + TP_ARGS(qdisc, txq, packets, skb), + + TP_STRUCT__entry( + __field( struct Qdisc *, qdisc ) + __field(const struct netdev_queue *, txq ) + __field( int, packets ) + __field( void *, skbaddr ) + __field( int, ifindex ) + __field( u32, handle ) + __field( u32, parent ) + __field( unsigned long, txq_state) + ), + + /* skb==NULL indicate packets dequeued was 0, even when packets==1 */ + TP_fast_assign( + __entry->qdisc = qdisc; + __entry->txq = txq; + __entry->packets = skb ? packets : 0; + __entry->skbaddr = skb; + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; + __entry->handle = qdisc->handle; + __entry->parent = qdisc->parent; + __entry->txq_state = txq->state; + ), + + TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p", + __entry->ifindex, __entry->handle, __entry->parent, + __entry->txq_state, __entry->packets, __entry->skbaddr ) +); + +#endif /* _TRACE_QDISC_H_ */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 6bdc9c4c31c81688e19cb186d49be01bbb6a1618 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 16 Aug 2017 15:02:32 -0700 Subject: bpf: sock_map fixes for !CONFIG_BPF_SYSCALL and !STREAM_PARSER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve issues with !CONFIG_BPF_SYSCALL and !STREAM_PARSER net/core/filter.c: In function ‘do_sk_redirect_map’: net/core/filter.c:1881:3: error: implicit declaration of function ‘__sock_map_lookup_elem’ [-Werror=implicit-function-declaration] sk = __sock_map_lookup_elem(ri->map, ri->ifindex); ^ net/core/filter.c:1881:6: warning: assignment makes pointer from integer without a cast [enabled by default] sk = __sock_map_lookup_elem(ri->map, ri->ifindex); Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support") Reported-by: Eric Dumazet Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 10 +++++++++- include/linux/bpf_types.h | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a4145e9c74b5..1cc6c5ff61ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -313,7 +313,6 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); /* Map specifics */ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); -struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); @@ -377,6 +376,15 @@ static inline void __dev_map_flush(struct bpf_map *map) } #endif /* CONFIG_BPF_SYSCALL */ +#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) +struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); +#else +static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) +{ + return NULL; +} +#endif + /* verifier prototypes for helper functions called from eBPF programs */ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fa805074d168..6f1a567667b8 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -38,5 +38,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) #ifdef CONFIG_NET BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +#ifdef CONFIG_STREAM_PARSER BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) #endif +#endif -- cgit v1.2.3 From 9a603b8e1136f2b55f780fefbcbf84d31844ff2b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 16 Aug 2017 08:56:24 -0700 Subject: vmbus: remove unused vmbus_sendpacket_multipagebuffer This function is not used anywhere in current code. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hyperv.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b7d7bbec74e0..39a080ce17da 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1052,12 +1052,6 @@ extern int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, u64 requestid, u32 flags); -extern int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, - struct hv_multipage_buffer *mpb, - void *buffer, - u32 bufferlen, - u64 requestid); - extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *mpb, u32 desc_size, -- cgit v1.2.3 From 5a668d8cddbe8bf14379ce110c49ca088a1e9fae Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 16 Aug 2017 08:56:25 -0700 Subject: vmbus: remove unused vmubs_sendpacket_pagebuffer_ctl The function vmbus_sendpacket_pagebuffer_ctl was never used directly. Just have vmbus_send_pagebuffer Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hyperv.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 39a080ce17da..9692592d43a3 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1044,14 +1044,6 @@ extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, u32 bufferlen, u64 requestid); -extern int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, - void *buffer, - u32 bufferlen, - u64 requestid, - u32 flags); - extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *mpb, u32 desc_size, -- cgit v1.2.3 From 5dd0fb9b9ffc0ef9b312d05604f4ad0fffc50505 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 16 Aug 2017 08:56:26 -0700 Subject: vmbus: remove unused vmbus_sendpacket_ctl The only usage of vmbus_sendpacket_ctl was by vmbus_sendpacket. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hyperv.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 9692592d43a3..a5f961c4149e 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1030,13 +1030,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel, enum vmbus_packet_type type, u32 flags); -extern int vmbus_sendpacket_ctl(struct vmbus_channel *channel, - void *buffer, - u32 bufferLen, - u64 requestid, - enum vmbus_packet_type type, - u32 flags); - extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, struct hv_page_buffer pagebuffers[], u32 pagecount, -- cgit v1.2.3 From 9620fef27ed2cdb37bf6fd028f32bea2ef5119a8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Aug 2017 12:08:07 -0700 Subject: ipv4: convert dst_metrics.refcnt from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index f73611ec4017..93568bd0a352 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -107,7 +108,7 @@ struct dst_entry { struct dst_metrics { u32 metrics[RTAX_MAX]; - atomic_t refcnt; + refcount_t refcnt; }; extern const struct dst_metrics dst_default_metrics; -- cgit v1.2.3 From 0888e372c37fa31882c8ed89fb2f8188b08b6718 Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Thu, 17 Aug 2017 00:35:11 +0000 Subject: net: inet: diag: expose sockets cgroup classid This is useful for directly looking up a task based on class id rather than having to scan through all open file descriptors. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index bbe201047df6..678496897a68 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -142,6 +142,7 @@ enum { INET_DIAG_PAD, INET_DIAG_MARK, INET_DIAG_BBRINFO, + INET_DIAG_CLASS_ID, __INET_DIAG_MAX, }; -- cgit v1.2.3 From 4c03bdd7b5c084c3c6973cb2419edac5363c051f Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 17 Aug 2017 18:22:37 +0200 Subject: xdp: adjust xdp redirect tracepoint to include return error code The return error code need to be included in the tracepoint xdp:xdp_redirect, else its not possible to distinguish successful or failed XDP_REDIRECT transmits. XDP have no queuing mechanism. Thus, it is fairly easily to overrun a NIC transmit queue. The eBPF program invoking helpers (bpf_redirect or bpf_redirect_map) to redirect a packet doesn't get any feedback whether the packet was actually transmitted. Info on failed transmits in the tracepoint xdp:xdp_redirect, is interesting as this opens for providing a feedback-loop to the receiving XDP program. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 7b1eb7b4be41..0e42e69f773b 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -53,15 +53,16 @@ TRACE_EVENT(xdp_redirect, TP_PROTO(const struct net_device *from, const struct net_device *to, - const struct bpf_prog *xdp, u32 act), + const struct bpf_prog *xdp, u32 act, int err), - TP_ARGS(from, to, xdp, act), + TP_ARGS(from, to, xdp, act, err), TP_STRUCT__entry( __string(name_from, from->name) __string(name_to, to->name) __array(u8, prog_tag, 8) __field(u32, act) + __field(int, err) ), TP_fast_assign( @@ -70,12 +71,14 @@ TRACE_EVENT(xdp_redirect, __assign_str(name_from, from->name); __assign_str(name_to, to->name); __entry->act = act; + __entry->err = err; ), - TP_printk("prog=%s from=%s to=%s action=%s", + TP_printk("prog=%s from=%s to=%s action=%s err=%d", __print_hex_str(__entry->prog_tag, 8), __get_str(name_from), __get_str(name_to), - __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->err) ); #endif /* _TRACE_XDP_H */ -- cgit v1.2.3 From 606c07f3086017a8c2d7ce0843807e81b541edcc Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Fri, 18 Aug 2017 09:03:44 -0500 Subject: net: ethtool: Add macro to clear a link mode setting There are currently macros to set and test an ETHTOOL_LINK_MODE_ setting, but not to clear one. Add a macro to clear an ETHTOOL_LINK_MODE_ setting. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- include/linux/ethtool.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index afdbb701fdb4..4587a4c36923 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -136,6 +136,17 @@ struct ethtool_link_ksettings { #define ethtool_link_ksettings_add_link_mode(ptr, name, mode) \ __set_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name) +/** + * ethtool_link_ksettings_del_link_mode - clear bit in link_ksettings + * link mode mask + * @ptr : pointer to struct ethtool_link_ksettings + * @name : one of supported/advertising/lp_advertising + * @mode : one of the ETHTOOL_LINK_MODE_*_BIT + * (not atomic, no bound checking) + */ +#define ethtool_link_ksettings_del_link_mode(ptr, name, mode) \ + __clear_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name) + /** * ethtool_link_ksettings_test_link_mode - test bit in ksettings link mode mask * @ptr : pointer to struct ethtool_link_ksettings -- cgit v1.2.3 From b793dc5c6edfb106fd57d12ad6aca64bf160b403 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 18 Aug 2017 13:46:20 -0700 Subject: net: constify netdev_class_file These functions are wrapper arount class_create_file which can take a const attribute. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0f1c4cb2441e..eaa77bd9cb80 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4013,17 +4013,17 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi return rc; } -int netdev_class_create_file_ns(struct class_attribute *class_attr, +int netdev_class_create_file_ns(const struct class_attribute *class_attr, const void *ns); -void netdev_class_remove_file_ns(struct class_attribute *class_attr, +void netdev_class_remove_file_ns(const struct class_attribute *class_attr, const void *ns); -static inline int netdev_class_create_file(struct class_attribute *class_attr) +static inline int netdev_class_create_file(const struct class_attribute *class_attr) { return netdev_class_create_file_ns(class_attr, NULL); } -static inline void netdev_class_remove_file(struct class_attribute *class_attr) +static inline void netdev_class_remove_file(const struct class_attribute *class_attr) { netdev_class_remove_file_ns(class_attr, NULL); } -- cgit v1.2.3 From 737aec57c672c1308d602afecd841455c39561e5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 18 Aug 2017 13:46:22 -0700 Subject: net: constify net_ns_type_operations This can be const. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index eaa77bd9cb80..b0c928598dab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4028,7 +4028,7 @@ static inline void netdev_class_remove_file(const struct class_attribute *class_ netdev_class_remove_file_ns(class_attr, NULL); } -extern struct kobj_ns_type_operations net_ns_type_operations; +extern const struct kobj_ns_type_operations net_ns_type_operations; const char *netdev_drivername(const struct net_device *dev); -- cgit v1.2.3 From 718ad681eff47d3d04879ff5f5290bdab0b8bad6 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 18 Aug 2017 13:46:24 -0700 Subject: net: drop unused attribute argument from sysfs queue funcs The show and store functions don't need/use the attribute. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b0c928598dab..c5475b37a631 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -694,10 +694,9 @@ struct netdev_rx_queue { */ struct rx_queue_attribute { struct attribute attr; - ssize_t (*show)(struct netdev_rx_queue *queue, - struct rx_queue_attribute *attr, char *buf); + ssize_t (*show)(struct netdev_rx_queue *queue, char *buf); ssize_t (*store)(struct netdev_rx_queue *queue, - struct rx_queue_attribute *attr, const char *buf, size_t len); + const char *buf, size_t len); }; #ifdef CONFIG_XPS -- cgit v1.2.3 From 99d1712bc41c7c9a5a473c104a4ad15427757b22 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Aug 2017 15:15:29 +0200 Subject: netfilter: exthdr: tcp option set support This allows setting 2 and 4 byte quantities in the tcp option space. Main purpose is to allow native replacement for xt_TCPMSS to work around pmtu blackholes. Writes to kind and len are now allowed at the moment, it does not seem useful to do this as it causes corruption of the tcp option space. We can always lift this restriction later if a use-case appears. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index be25cf69295b..40fd199f7531 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -732,7 +732,8 @@ enum nft_exthdr_op { * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32) * @NFTA_EXTHDR_LEN: extension header length (NLA_U32) * @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32) - * @NFTA_EXTHDR_OP: option match type (NLA_U8) + * @NFTA_EXTHDR_OP: option match type (NLA_U32) + * @NFTA_EXTHDR_SREG: option match type (NLA_U32) */ enum nft_exthdr_attributes { NFTA_EXTHDR_UNSPEC, @@ -742,6 +743,7 @@ enum nft_exthdr_attributes { NFTA_EXTHDR_LEN, NFTA_EXTHDR_FLAGS, NFTA_EXTHDR_OP, + NFTA_EXTHDR_SREG, __NFTA_EXTHDR_MAX }; #define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1) -- cgit v1.2.3 From 6b5dc98e8fac041a3decfc3186e08c1c570ea691 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Aug 2017 15:48:04 +0200 Subject: netfilter: rt: add support to fetch path mss to be used in combination with tcp option set support to mimic iptables TCPMSS --clamp-mss-to-pmtu. v2: Eric Dumazet points out dst must be initialized. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 40fd199f7531..b49da72efa68 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -811,11 +811,13 @@ enum nft_meta_keys { * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid) * @NFT_RT_NEXTHOP4: routing nexthop for IPv4 * @NFT_RT_NEXTHOP6: routing nexthop for IPv6 + * @NFT_RT_TCPMSS: fetch current path tcp mss */ enum nft_rt_keys { NFT_RT_CLASSID, NFT_RT_NEXTHOP4, NFT_RT_NEXTHOP6, + NFT_RT_TCPMSS, }; /** -- cgit v1.2.3 From 96eabe7a40aa17e613cf3db2c742ee8b1fc764d0 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 18 Aug 2017 11:28:00 -0700 Subject: bpf: Allow selecting numa node during map creation The current map creation API does not allow to provide the numa-node preference. The memory usually comes from where the map-creation-process is running. The performance is not ideal if the bpf_prog is known to always run in a numa node different from the map-creation-process. One of the use case is sharding on CPU to different LRU maps (i.e. an array of LRU maps). Here is the test result of map_perf_test on the INNER_LRU_HASH_PREALLOC test if we force the lru map used by CPU0 to be allocated from a remote numa node: [ The machine has 20 cores. CPU0-9 at node 0. CPU10-19 at node 1 ] ># taskset -c 10 ./map_perf_test 512 8 1260000 8000000 5:inner_lru_hash_map_perf pre-alloc 1628380 events per sec 4:inner_lru_hash_map_perf pre-alloc 1626396 events per sec 3:inner_lru_hash_map_perf pre-alloc 1626144 events per sec 6:inner_lru_hash_map_perf pre-alloc 1621657 events per sec 2:inner_lru_hash_map_perf pre-alloc 1621534 events per sec 1:inner_lru_hash_map_perf pre-alloc 1620292 events per sec 7:inner_lru_hash_map_perf pre-alloc 1613305 events per sec 0:inner_lru_hash_map_perf pre-alloc 1239150 events per sec #<<< After specifying numa node: ># taskset -c 10 ./map_perf_test 512 8 1260000 8000000 5:inner_lru_hash_map_perf pre-alloc 1629627 events per sec 3:inner_lru_hash_map_perf pre-alloc 1628057 events per sec 1:inner_lru_hash_map_perf pre-alloc 1623054 events per sec 6:inner_lru_hash_map_perf pre-alloc 1616033 events per sec 2:inner_lru_hash_map_perf pre-alloc 1614630 events per sec 4:inner_lru_hash_map_perf pre-alloc 1612651 events per sec 7:inner_lru_hash_map_perf pre-alloc 1609337 events per sec 0:inner_lru_hash_map_perf pre-alloc 1619340 events per sec #<<< This patch adds one field, numa_node, to the bpf_attr. Since numa node 0 is a valid node, a new flag BPF_F_NUMA_NODE is also added. The numa_node field is honored if and only if the BPF_F_NUMA_NODE flag is set. Numa node selection is not supported for percpu map. This patch does not change all the kmalloc. F.e. 'htab = kzalloc()' is not changed since the object is small enough to stay in the cache. Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 10 +++++++++- include/uapi/linux/bpf.h | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1cc6c5ff61ec..55b88e329804 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -51,6 +51,7 @@ struct bpf_map { u32 map_flags; u32 pages; u32 id; + int numa_node; struct user_struct *user; const struct bpf_map_ops *ops; struct work_struct work; @@ -264,7 +265,7 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); -void *bpf_map_area_alloc(size_t size); +void *bpf_map_area_alloc(size_t size, int numa_node); void bpf_map_area_free(void *base); extern int sysctl_unprivileged_bpf_disabled; @@ -316,6 +317,13 @@ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); +/* Return map's numa specified by userspace */ +static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) +{ + return (attr->map_flags & BPF_F_NUMA_NODE) ? + attr->numa_node : NUMA_NO_NODE; +} + #else static inline struct bpf_prog *bpf_prog_get(u32 ufd) { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5ecbe812a2cc..843818dff96d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -165,6 +165,7 @@ enum bpf_attach_type { #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ +/* flags for BPF_MAP_CREATE command */ #define BPF_F_NO_PREALLOC (1U << 0) /* Instead of having one common LRU list in the * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list @@ -173,6 +174,8 @@ enum bpf_attach_type { * across different LRU lists. */ #define BPF_F_NO_COMMON_LRU (1U << 1) +/* Specify numa node during map creation */ +#define BPF_F_NUMA_NODE (1U << 2) union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ @@ -180,8 +183,13 @@ union bpf_attr { __u32 key_size; /* size of key in bytes */ __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ - __u32 map_flags; /* prealloc or not */ + __u32 map_flags; /* BPF_MAP_CREATE related + * flags defined above. + */ __u32 inner_map_fd; /* fd pointing to the inner map */ + __u32 numa_node; /* numa node (effective only if + * BPF_F_NUMA_NODE is set). + */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ -- cgit v1.2.3 From d6e1e46f69fbe956e877cdd00dbfb002baddf577 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 19 Aug 2017 23:34:03 -0700 Subject: bpf: linux/bpf.h needs linux/numa.h Reported-by: kbuild test robot Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 55b88e329804..830f472d8df5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -14,6 +14,7 @@ #include #include #include +#include struct perf_event; struct bpf_prog; -- cgit v1.2.3 From 5405fa26c25e18ab735daddc46c8a7a78f138f70 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 15 Jun 2017 18:29:23 +0300 Subject: net/mlx5: Add PCIe outbound stalls counters infrastructure Add capability bit in MCAM register and counters to MPCNT register. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c99daffc3c3c..ba533b39c885 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1854,7 +1854,17 @@ struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits { u8 crc_error_tlp[0x20]; - u8 reserved_at_140[0x680]; + u8 reserved_at_140[0x40]; + + u8 outbound_stalled_reads[0x20]; + + u8 outbound_stalled_writes[0x20]; + + u8 outbound_stalled_reads_events[0x20]; + + u8 outbound_stalled_writes_events[0x20]; + + u8 reserved_at_200[0x5c0]; }; struct mlx5_ifc_cmd_inter_comp_event_bits { @@ -7744,8 +7754,9 @@ struct mlx5_ifc_pcam_reg_bits { }; struct mlx5_ifc_mcam_enhanced_features_bits { - u8 reserved_at_0[0x7d]; - + u8 reserved_at_0[0x7b]; + u8 pcie_outbound_stalled[0x1]; + u8 reserved_at_7c[0x1]; u8 mtpps_enh_out_per_adj[0x1]; u8 mtpps_fs[0x1]; u8 pcie_performance_group[0x1]; -- cgit v1.2.3 From 2dba07975acbc30c77a22d38030ee5a86ab8a748 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 18 Jun 2017 14:56:45 +0300 Subject: net/mlx5: Add RX buffer fullness counters infrastructure Add capability bit in PCAM register and counters to PPCNT register. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ba533b39c885..cf7ff52c594e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1538,7 +1538,17 @@ struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits { u8 port_transmit_wait_low[0x20]; - u8 reserved_at_40[0x780]; + u8 reserved_at_40[0x100]; + + u8 rx_buffer_almost_full_high[0x20]; + + u8 rx_buffer_almost_full_low[0x20]; + + u8 rx_buffer_full_high[0x20]; + + u8 rx_buffer_full_low[0x20]; + + u8 reserved_at_1c0[0x600]; }; struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits { @@ -7723,8 +7733,9 @@ struct mlx5_ifc_peir_reg_bits { }; struct mlx5_ifc_pcam_enhanced_features_bits { - u8 reserved_at_0[0x7c]; + u8 reserved_at_0[0x7b]; + u8 rx_buffer_fullness_counters[0x1]; u8 ptys_connector_type[0x1]; u8 reserved_at_7d[0x1]; u8 ppcnt_discard_group[0x1]; -- cgit v1.2.3 From efae7f78c45ba37bdc23a95d219b59ac85bdd0a7 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Fri, 12 May 2017 02:47:02 +0300 Subject: net/mlx5e: Add outbound PCI buffer overflow counter Add outbound_pci_buffer_overflow to ethtool output for monitoring the number of packets that were dropped due to lack of PCIe buffers on receive path from NIC port toward the host(s). This counter is valid only in case that tx_overflow_buffer_pkt is supported in MCAM enhanced features. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cf7ff52c594e..ae7d09b9c52f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1864,7 +1864,9 @@ struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits { u8 crc_error_tlp[0x20]; - u8 reserved_at_140[0x40]; + u8 tx_overflow_buffer_pkt_high[0x20]; + + u8 tx_overflow_buffer_pkt_low[0x20]; u8 outbound_stalled_reads[0x20]; @@ -7767,7 +7769,7 @@ struct mlx5_ifc_pcam_reg_bits { struct mlx5_ifc_mcam_enhanced_features_bits { u8 reserved_at_0[0x7b]; u8 pcie_outbound_stalled[0x1]; - u8 reserved_at_7c[0x1]; + u8 tx_overflow_buffer_pkt[0x1]; u8 mtpps_enh_out_per_adj[0x1]; u8 mtpps_fs[0x1]; u8 pcie_performance_group[0x1]; -- cgit v1.2.3 From 89e49506bc62520f93e64a278293444319a6aebb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 17 Aug 2017 16:47:00 +0200 Subject: dsa: remove unused net_device arg from handlers compile tested only, but saw no warnings/errors with allmodconfig build. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/dsa.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 7f46b521313e..398ca8d70ccd 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -104,8 +104,7 @@ struct packet_type; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev); + struct packet_type *pt); int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); }; @@ -134,8 +133,7 @@ struct dsa_switch_tree { /* Copy of tag_ops->rcv for faster access in hot path */ struct sk_buff * (*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev); + struct packet_type *pt); /* * The switch port to which the CPU is attached. -- cgit v1.2.3 From 7d3f0cd43feea1636dd7746f22fe8249b34d1b79 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 18 Aug 2017 15:23:24 +0800 Subject: net: sched: Add the invalid handle check in qdisc_class_find Add the invalid handle "0" check to avoid unnecessary search, because the qdisc uses the skb->priority as the handle value to look up, and it is "0" usually. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 5865db91976b..107c52432245 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -393,6 +393,9 @@ qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id) struct Qdisc_class_common *cl; unsigned int h; + if (!id) + return NULL; + h = qdisc_class_hash(id, hash->hashmask); hlist_for_each_entry(cl, &hash->hash[h], hnode) { if (cl->classid == id) -- cgit v1.2.3 From 111993692741a7044e6c01b428cecf1071de3d0b Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 21 Aug 2017 23:33:49 -0700 Subject: tcp: Remove the unused parameter for tcp_try_fastopen. Signed-off-by: Tonghao Zhang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index afdab3781425..a995004ae946 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1533,8 +1533,7 @@ int tcp_fastopen_reset_cipher(void *key, unsigned int len); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct tcp_fastopen_cookie *foc, - struct dst_entry *dst); + struct tcp_fastopen_cookie *foc); void tcp_fastopen_init_key_once(bool publish); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); -- cgit v1.2.3 From 84e54fe0a5eaed696dee4019c396f8396f5a908b Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 22 Aug 2017 09:40:28 -0700 Subject: gre: introduce native tunnel support for ERSPAN The patch adds ERSPAN type II tunnel support. The implementation is based on the draft at [1]. One of the purposes is for Linux box to be able to receive ERSPAN monitoring traffic sent from the Cisco switch, by creating a ERSPAN tunnel device. In addition, the patch also adds ERSPAN TX, so Linux virtual switch can redirect monitored traffic to the ERSPAN tunnel device. The traffic will be encapsulated into ERSPAN and sent out. The implementation reuses tunnel key as ERSPAN session ID, and field 'erspan' as ERSPAN Index fields: ./ip link add dev ers11 type erspan seq key 100 erspan 123 \ local 172.16.1.200 remote 172.16.1.100 To use the above device as ERSPAN receiver, configure Nexus 5000 switch as below: monitor session 100 type erspan-source erspan-id 123 vrf default destination ip 172.16.1.200 source interface Ethernet1/11 both source interface Ethernet1/12 both no shut monitor erspan origin ip-address 172.16.1.100 global [1] https://tools.ietf.org/html/draft-foschiano-erspan-01 [2] iproute2 patch: http://marc.info/?l=linux-netdev&m=150306086924951&w=2 [3] test script: http://marc.info/?l=linux-netdev&m=150231021807304&w=2 Signed-off-by: William Tu Signed-off-by: Meenakshi Vohra Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Signed-off-by: David S. Miller --- include/net/erspan.h | 61 ++++++++++++++++++++++++++++++++++++++++++ include/net/ip_tunnels.h | 3 +++ include/uapi/linux/if_ether.h | 1 + include/uapi/linux/if_tunnel.h | 1 + 4 files changed, 66 insertions(+) create mode 100644 include/net/erspan.h (limited to 'include') diff --git a/include/net/erspan.h b/include/net/erspan.h new file mode 100644 index 000000000000..ca94fc86865e --- /dev/null +++ b/include/net/erspan.h @@ -0,0 +1,61 @@ +#ifndef __LINUX_ERSPAN_H +#define __LINUX_ERSPAN_H + +/* + * GRE header for ERSPAN encapsulation (8 octets [34:41]) -- 8 bytes + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |0|0|0|1|0|00000|000000000|00000| Protocol Type for ERSPAN | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Sequence Number (increments per packet per session) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Note that in the above GRE header [RFC1701] out of the C, R, K, S, + * s, Recur, Flags, Version fields only S (bit 03) is set to 1. The + * other fields are set to zero, so only a sequence number follows. + * + * ERSPAN Type II header (8 octets [42:49]) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Ver | VLAN | COS | En|T| Session ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | Index | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB + */ + +#define ERSPAN_VERSION 0x1 + +#define VER_MASK 0xf000 +#define VLAN_MASK 0x0fff +#define COS_MASK 0xe000 +#define EN_MASK 0x1800 +#define T_MASK 0x0400 +#define ID_MASK 0x03ff +#define INDEX_MASK 0xfffff + +enum erspan_encap_type { + ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */ + ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */ + ERSPAN_ENCAP_8021Q = 0x2, /* originally 802.1Q encapsulated */ + ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */ +}; + +struct erspan_metadata { + __be32 index; /* type II */ +}; + +struct erspanhdr { + __be16 ver_vlan; +#define VER_OFFSET 12 + __be16 session_id; +#define COS_OFFSET 13 +#define EN_OFFSET 11 +#define T_OFFSET 10 + struct erspan_metadata md; +}; + +#endif diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 520809912f03..625c29329372 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -115,6 +115,9 @@ struct ip_tunnel { u32 o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ + /* This field used only by ERSPAN */ + u32 index; /* ERSPAN type II index */ + struct dst_cache dst_cache; struct ip_tunnel_parm parms; diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 5bc9bfd816b7..efeb1190c2ca 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -66,6 +66,7 @@ #define ETH_P_ATALK 0x809B /* Appletalk DDP */ #define ETH_P_AARP 0x80F3 /* Appletalk AARP */ #define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ +#define ETH_P_ERSPAN 0x88BE /* ERSPAN type II */ #define ETH_P_IPX 0x8137 /* IPX over DIX */ #define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ #define ETH_P_PAUSE 0x8808 /* IEEE Pause frames. See 802.3 31B */ diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 6792d1967d31..2e520883c054 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -134,6 +134,7 @@ enum { IFLA_GRE_COLLECT_METADATA, IFLA_GRE_IGNORE_DF, IFLA_GRE_FWMARK, + IFLA_GRE_ERSPAN_INDEX, __IFLA_GRE_MAX, }; -- cgit v1.2.3 From 98aaa913b4ed250324429f0a9e6d5f77a3b5276c Mon Sep 17 00:00:00 2001 From: Mike Maloney Date: Tue, 22 Aug 2017 17:08:48 -0400 Subject: tcp: Extend SOF_TIMESTAMPING_RX_SOFTWARE to TCP recvmsg When SOF_TIMESTAMPING_RX_SOFTWARE is enabled for tcp sockets, return the timestamp corresponding to the highest sequence number data returned. Previously the skb->tstamp is overwritten when a TCP packet is placed in the out of order queue. While the packet is in the ooo queue, save the timestamp in the TCB_SKB_CB. This space is shared with the gso_* options which are only used on the tx path, and a previously unused 4 byte hole. When skbs are coalesced either in the sk_receive_queue or the out_of_order_queue always choose the timestamp of the appended skb to maintain the invariant of returning the timestamp of the last byte in the recvmsg buffer. Signed-off-by: Mike Maloney Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/tcp.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index a995004ae946..c614ff135b66 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -774,6 +774,12 @@ struct tcp_skb_cb { u16 tcp_gso_segs; u16 tcp_gso_size; }; + + /* Used to stash the receive timestamp while this skb is in the + * out of order queue, as skb->tstamp is overwritten by the + * rbnode. + */ + ktime_t swtstamp; }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ @@ -790,7 +796,8 @@ struct tcp_skb_cb { __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ __u8 txstamp_ack:1, /* Record TX timestamp for ack? */ eor:1, /* Is skb MSG_EOR marked? */ - unused:6; + has_rxtstamp:1, /* SKB has a RX timestamp */ + unused:5; __u32 ack_seq; /* Sequence number ACK'd */ union { struct { -- cgit v1.2.3 From f9cbe9a556afca9e82df9aebe4412d93769566b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20T=C3=A9nart?= Date: Wed, 23 Aug 2017 09:46:54 +0200 Subject: net: define the TSO header size in net/tso.h The TSO header size was defined in many drivers. Factorize the code and define its size in net/tso.h. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/net/tso.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/tso.h b/include/net/tso.h index b7be852bfe9d..9a56c39e6d0a 100644 --- a/include/net/tso.h +++ b/include/net/tso.h @@ -3,6 +3,8 @@ #include +#define TSO_HEADER_SIZE 128 + struct tso_t { int next_frag_idx; void *data; -- cgit v1.2.3 From e457d86ada27cbd2f46ded75d4b4bc06e26d0e2e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 23 Aug 2017 10:08:19 +0200 Subject: net: sched: add couple of goto_chain helpers Add helpers to find out if a gact instance is goto_chain termination action and to get chain index. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/tc_act/tc_gact.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index d576374c4d6f..41afe1ce7b16 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -15,7 +15,8 @@ struct tcf_gact { }; #define to_gact(a) ((struct tcf_gact *)a) -static inline bool __is_tcf_gact_act(const struct tc_action *a, int act) +static inline bool __is_tcf_gact_act(const struct tc_action *a, int act, + bool is_ext) { #ifdef CONFIG_NET_CLS_ACT struct tcf_gact *gact; @@ -24,7 +25,8 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act) return false; gact = to_gact(a); - if (gact->tcf_action == act) + if ((!is_ext && gact->tcf_action == act) || + (is_ext && TC_ACT_EXT_CMP(gact->tcf_action, act))) return true; #endif @@ -33,12 +35,22 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act) static inline bool is_tcf_gact_shot(const struct tc_action *a) { - return __is_tcf_gact_act(a, TC_ACT_SHOT); + return __is_tcf_gact_act(a, TC_ACT_SHOT, false); } static inline bool is_tcf_gact_trap(const struct tc_action *a) { - return __is_tcf_gact_act(a, TC_ACT_TRAP); + return __is_tcf_gact_act(a, TC_ACT_TRAP, false); +} + +static inline bool is_tcf_gact_goto_chain(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_GOTO_CHAIN, true); +} + +static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a) +{ + return a->goto_chain->index; } #endif /* __NET_TC_GACT_H */ -- cgit v1.2.3 From 1b688a19a92223cf2d1892c9d05d64dc397b33e3 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 23 Aug 2017 15:10:50 +0100 Subject: bpf/verifier: remove varlen_map_value_access flag The optimisation it does is broken when the 'new' register value has a variable offset and the 'old' was constant. I broke it with my pointer types unification (see Fixes tag below), before which the 'new' value would have type PTR_TO_MAP_VALUE_ADJ and would thus not compare equal; other changes in that patch mean that its original behaviour (ignore min/max values) cannot be restored. Tests on a sample set of cilium programs show no change in count of processed instructions. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Edward Cree Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 91d07efed2ba..d8f131a36fd0 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -125,7 +125,6 @@ struct bpf_verifier_env { u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; bool seen_direct_write; - bool varlen_map_value_access; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; -- cgit v1.2.3 From 8e9cd9ce90d48369b2c5ddd79fe3d4a4cb1ccb56 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 23 Aug 2017 15:11:21 +0100 Subject: bpf/verifier: document liveness analysis The liveness tracking algorithm is quite subtle; add comments to explain it. Signed-off-by: Edward Cree Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d8f131a36fd0..b8d200f60a40 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -21,6 +21,19 @@ */ #define BPF_MAX_VAR_SIZ INT_MAX +/* Liveness marks, used for registers and spilled-regs (in stack slots). + * Read marks propagate upwards until they find a write mark; they record that + * "one of this state's descendants read this reg" (and therefore the reg is + * relevant for states_equal() checks). + * Write marks collect downwards and do not propagate; they record that "the + * straight-line code that reached this state (from its parent) wrote this reg" + * (and therefore that reads propagated from this state or its descendants + * should not propagate to its parent). + * A state with a write mark can receive read marks; it just won't propagate + * them to its parent, since the write mark is a property, not of the state, + * but of the link between it and its parent. See mark_reg_read() and + * mark_stack_slot_read() in kernel/bpf/verifier.c. + */ enum bpf_reg_liveness { REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */ REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */ -- cgit v1.2.3 From 07533c6765de05199417ec73f9c2a495ddd29473 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 21 Aug 2017 17:54:21 +0300 Subject: net/mlx5: Remove a leftover unused variable mlx5_core_wq is no longer being used and should be removed from the code. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d26f18b39c4a..d5b6f6a9fcc5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -890,8 +890,6 @@ static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) return buf->direct.buf + offset; } -extern struct workqueue_struct *mlx5_core_wq; - #define STRUCT_FIELD(header, field) \ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field -- cgit v1.2.3 From 667cb65ae5ad01523505d48d6cfd92bd1d3c9785 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 7 Aug 2017 11:14:11 +0300 Subject: net/mlx5: Don't store reserved part in FTEs and FGs The current code stores fte_match_param in the software representation of FTEs and FGs. fte_match_param contains a large reserved area at the bottom of the struct. Since downstream patches are going to hash this part, we would like to avoid doing so on a reserved part. Signed-off-by: Matan Barak Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f31a0b5377e1..3c7442b56460 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -48,7 +48,7 @@ /* helper macros */ #define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0) #define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld) -#define __mlx5_bit_off(typ, fld) ((unsigned)(unsigned long)(&(__mlx5_nullp(typ)->fld))) +#define __mlx5_bit_off(typ, fld) (offsetof(struct mlx5_ifc_##typ##_bits, fld)) #define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32) #define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64) #define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0x1f)) -- cgit v1.2.3 From 1177009131bee310421f5c04c43d3777cbacbdc8 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 24 Aug 2017 08:39:59 +0200 Subject: devlink: Add Ethernet header for dpipe This will be used by the IPv4 host table which will be introduced in the following patches. This header is global and can be reused by many drivers. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 1 + include/uapi/linux/devlink.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index ed7687bbf5d0..ddb8b5227580 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -328,6 +328,7 @@ int devlink_dpipe_action_put(struct sk_buff *skb, struct devlink_dpipe_action *action); int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_match *match); +extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; #else diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b0e807ac53bb..a855f8dcc8ee 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -226,4 +226,12 @@ enum devlink_dpipe_action_type { DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY, }; +enum devlink_dpipe_field_ethernet_id { + DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC, +}; + +enum devlink_dpipe_header_id { + DEVLINK_DPIPE_HEADER_ETHERNET, +}; + #endif /* _UAPI_LINUX_DEVLINK_H_ */ -- cgit v1.2.3 From 3fb886ecea93605a8ea14e258ff3158b8966781e Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 24 Aug 2017 08:40:00 +0200 Subject: devlink: Add IPv4 header for dpipe This will be used by the IPv4 host table which will be introduced in the following patches. This header is global and can be reused by many drivers. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 1 + include/uapi/linux/devlink.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index ddb8b5227580..8ff8a6f77f29 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -329,6 +329,7 @@ int devlink_dpipe_action_put(struct sk_buff *skb, int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_match *match); extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; +extern struct devlink_dpipe_header devlink_dpipe_header_ipv4; #else diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index a855f8dcc8ee..6c172548589d 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -230,8 +230,13 @@ enum devlink_dpipe_field_ethernet_id { DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC, }; +enum devlink_dpipe_field_ipv4_id { + DEVLINK_DPIPE_FIELD_IPV4_DST_IP, +}; + enum devlink_dpipe_header_id { DEVLINK_DPIPE_HEADER_ETHERNET, + DEVLINK_DPIPE_HEADER_IPV4, }; #endif /* _UAPI_LINUX_DEVLINK_H_ */ -- cgit v1.2.3 From ffd3cdccf214cf0df08856a6738544076c4cd548 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 24 Aug 2017 08:40:02 +0200 Subject: devlink: Add support for dynamic table size Up until now the dpipe table's size was static and known at registration time. The host table does not have constant size and it is resized in dynamic manner. In order to support this behavior the size is changed to be obtained dynamically via an op. This patch also adjust the current dpipe table for the new API. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 8ff8a6f77f29..e96272b2cfec 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -178,7 +178,6 @@ struct devlink_dpipe_table_ops; * struct devlink_dpipe_table - table object * @priv: private * @name: table name - * @size: maximum number of entries * @counters_enabled: indicates if counters are active * @counter_control_extern: indicates if counter control is in dpipe or * external tool @@ -189,7 +188,6 @@ struct devlink_dpipe_table { void *priv; struct list_head list; const char *name; - u64 size; bool counters_enabled; bool counter_control_extern; struct devlink_dpipe_table_ops *table_ops; @@ -204,6 +202,7 @@ struct devlink_dpipe_table { * @counters_set_update - when changing the counter status hardware sync * maybe needed to allocate/free counter related * resources + * @size_get - get size */ struct devlink_dpipe_table_ops { int (*actions_dump)(void *priv, struct sk_buff *skb); @@ -211,6 +210,7 @@ struct devlink_dpipe_table_ops { int (*entries_dump)(void *priv, bool counters_enabled, struct devlink_dpipe_dump_ctx *dump_ctx); int (*counters_set_update)(void *priv, bool enable); + u64 (*size_get)(void *priv); }; /** @@ -311,8 +311,7 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); int devlink_dpipe_table_register(struct devlink *devlink, const char *table_name, struct devlink_dpipe_table_ops *table_ops, - void *priv, u64 size, - bool counter_control_extern); + void *priv, bool counter_control_extern); void devlink_dpipe_table_unregister(struct devlink *devlink, const char *table_name); int devlink_dpipe_headers_register(struct devlink *devlink, -- cgit v1.2.3 From 3580732448f128c39e7325912bc4368ade5dce7d Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 24 Aug 2017 08:40:03 +0200 Subject: devlink: Move dpipe entry clear function into devlink The entry clear routine can be shared between the drivers, thus it is moved inside devlink. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index e96272b2cfec..047a0c54f652 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -323,6 +323,7 @@ int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx); int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, struct devlink_dpipe_entry *entry); int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx); +void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry); int devlink_dpipe_action_put(struct sk_buff *skb, struct devlink_dpipe_action *action); int devlink_dpipe_match_put(struct sk_buff *skb, @@ -448,6 +449,11 @@ devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) return 0; } +static inline void +devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry) +{ +} + static inline int devlink_dpipe_action_put(struct sk_buff *skb, struct devlink_dpipe_action *action) -- cgit v1.2.3 From 0d03510038bda70b5a4a252e8216822e6ce0cbdb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:02 +0200 Subject: netfilter: conntrack: compute l3proto nla size at compile time avoids a pointer and allows struct to be const later on. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 1b8de164d744..6a27ffea7480 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -20,6 +20,9 @@ struct nf_conntrack_l3proto { /* L3 Protocol Family number. ex) PF_INET */ u_int16_t l3proto; + /* size of tuple nlattr, fills a hole */ + u16 nla_size; + /* Protocol name */ const char *name; @@ -49,23 +52,17 @@ struct nf_conntrack_l3proto { int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum); +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) int (*tuple_to_nlattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); - - /* Called when netns wants to use connection tracking */ - int (*net_ns_get)(struct net *); - void (*net_ns_put)(struct net *); - - /* - * Calculate size of tuple nlattr - */ - int (*nlattr_tuple_size)(void); - int (*nlattr_to_tuple)(struct nlattr *tb[], struct nf_conntrack_tuple *t); const struct nla_policy *nla_policy; +#endif - size_t nla_size; + /* Called when netns wants to use connection tracking */ + int (*net_ns_get)(struct net *); + void (*net_ns_put)(struct net *); /* Module (if any) which this is connected to. */ struct module *me; -- cgit v1.2.3 From a3134d537f8209f5b149d7ed9f287047158845f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:03 +0200 Subject: netfilter: conntrack: remove protocol name from l3proto struct no need to waste storage for something that is only needed in one place and can be deduced from protocol number. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 6a27ffea7480..e31861e4fa6a 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -23,9 +23,6 @@ struct nf_conntrack_l3proto { /* size of tuple nlattr, fills a hole */ u16 nla_size; - /* Protocol name */ - const char *name; - /* * Try to fill in the third arg: nhoff is offset of l3 proto * hdr. Return true if possible. -- cgit v1.2.3 From 09ec82f5af99d1e35614eb0844b920fc335a313d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:04 +0200 Subject: netfilter: conntrack: remove protocol name from l4proto struct no need to waste storage for something that is only needed in one place and can be deduced from protocol number. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index b6e27cafb1d9..47c16bae5e00 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -108,9 +108,6 @@ struct nf_conntrack_l4proto { /* Return the per-net protocol part. */ struct nf_proto_net *(*get_net_proto)(struct net *net); - /* Protocol name */ - const char *name; - /* Module (if any) which this is connected to. */ struct module *me; }; -- cgit v1.2.3 From 036c69400a34cf8f6d39c88325dd510462809e7b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:05 +0200 Subject: netfilter: conntrack: reduce size of l4protocol trackers can use u16 for both, shrinks size by another 8 bytes. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 47c16bae5e00..15c58dd3f701 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -92,12 +92,12 @@ struct nf_conntrack_l4proto { #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) struct { - size_t obj_size; int (*nlattr_to_obj)(struct nlattr *tb[], struct net *net, void *data); int (*obj_to_nlattr)(struct sk_buff *skb, const void *data); - unsigned int nlattr_max; + u16 obj_size; + u16 nlattr_max; const struct nla_policy *nla_policy; } ctnl_timeout; #endif -- cgit v1.2.3 From 91950833dd5a34ac6336aa88da6d43aaeb56ac6d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:06 +0200 Subject: netfilter: conntrack: place print_tuple in procfs part CONFIG_NF_CONNTRACK_PROCFS is deprecated, no need to use a function pointer in the trackers for this. Place the printf formatting in the one place that uses it. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 4 ---- include/net/netfilter/nf_conntrack_l4proto.h | 4 ---- 2 files changed, 8 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index e31861e4fa6a..dabb53b0913c 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -37,10 +37,6 @@ struct nf_conntrack_l3proto { bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig); - /* Print out the per-protocol part of the tuple. */ - void (*print_tuple)(struct seq_file *s, - const struct nf_conntrack_tuple *); - /* * Called before tracking. * *dataoff: offset of protocol header (TCP, UDP,...) in skb diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 15c58dd3f701..7e8da04a5eb6 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -61,10 +61,6 @@ struct nf_conntrack_l4proto { /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); - /* Print out the per-protocol part of the tuple. Return like seq_* */ - void (*print_tuple)(struct seq_file *s, - const struct nf_conntrack_tuple *); - /* Print out the private part of the conntrack. */ void (*print_conntrack)(struct seq_file *s, struct nf_conn *); -- cgit v1.2.3 From ea48cc83cf612fddb4e8868369348b9f936cfedb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:07 +0200 Subject: netfilter: conntrack: print_conntrack only needed if CONFIG_NF_CONNTRACK_PROCFS Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7e8da04a5eb6..4976ef92dc78 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -61,9 +61,6 @@ struct nf_conntrack_l4proto { /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); - /* Print out the private part of the conntrack. */ - void (*print_conntrack)(struct seq_file *s, struct nf_conn *); - /* Return the array of timeouts for this protocol. */ unsigned int *(*get_timeouts)(struct net *net); @@ -96,6 +93,10 @@ struct nf_conntrack_l4proto { u16 nlattr_max; const struct nla_policy *nla_policy; } ctnl_timeout; +#endif +#ifdef CONFIG_NF_CONNTRACK_PROCFS + /* Print out the private part of the conntrack. */ + void (*print_conntrack)(struct seq_file *s, struct nf_conn *); #endif unsigned int *net_id; /* Init l4proto pernet data */ -- cgit v1.2.3 From b3480fe059ac9121b5714205b4ddae14b59ef4be Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:08 +0200 Subject: netfilter: conntrack: make protocol tracker pointers const Doesn't change generated code, but will make it easier to eventually make the actual trackers themselvers const. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 6 +++--- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- include/net/netfilter/nf_conntrack_timeout.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index dabb53b0913c..6269deecbee7 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -64,10 +64,10 @@ struct nf_conntrack_l3proto { extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; /* Protocol global registration. */ -int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto); -void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto); +int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto); +void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto); -struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); +const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); /* Existing built-in protocols */ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 4976ef92dc78..d4933d56809d 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -114,10 +114,10 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; #define MAX_NF_CT_PROTO 256 -struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, +const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto); -struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, +const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto); void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index b222957062b5..483d104fa254 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -16,7 +16,7 @@ struct ctnl_timeout { refcount_t refcnt; char name[CTNL_TIMEOUT_NAME_MAX]; __u16 l3num; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; char data[0]; }; -- cgit v1.2.3 From 2facaad6000f2322eb40ca379aced31c957f0a41 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 24 Aug 2017 12:33:08 +0200 Subject: xdp: make generic xdp redirect use tracepoint trace_xdp_redirect If the xdp_do_generic_redirect() call fails, it trigger the trace_xdp_exception tracepoint. It seems better to use the same tracepoint trace_xdp_redirect, as the native xdp_do_redirect{,_map} does. Signed-off-by: Jesper Dangaard Brouer Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 7015116331af..d29e58fde364 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -718,7 +718,8 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * because we only track one map and force a flush when the map changes. * This does not appear to be a real limitation for existing software. */ -int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb); +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, + struct bpf_prog *prog); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *prog); -- cgit v1.2.3 From a873585587205750e7accfb2c93c29239ffa6e09 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 24 Aug 2017 12:33:18 +0200 Subject: xdp: remove net_device names from xdp_redirect tracepoint There is too much overhead in the current trace_xdp_redirect tracepoint as it does strcpy and strlen on the net_device names. Besides, exposing the ifindex/index is actually the information that is needed in the tracepoint to diagnose issues. When a lookup fails (either ifindex or devmap index) then there is a need for saying which to_index that have issues. V2: Adjust args to be aligned with trace_xdp_exception. Signed-off-by: Jesper Dangaard Brouer Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 0e42e69f773b..cd37706c6f55 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -51,33 +51,33 @@ TRACE_EVENT(xdp_exception, TRACE_EVENT(xdp_redirect, - TP_PROTO(const struct net_device *from, - const struct net_device *to, - const struct bpf_prog *xdp, u32 act, int err), + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, u32 act, + int to_index, int err), - TP_ARGS(from, to, xdp, act, err), + TP_ARGS(dev, xdp, act, to_index, err), TP_STRUCT__entry( - __string(name_from, from->name) - __string(name_to, to->name) __array(u8, prog_tag, 8) __field(u32, act) + __field(int, ifindex) + __field(int, to_index) __field(int, err) ), TP_fast_assign( BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); - __assign_str(name_from, from->name); - __assign_str(name_to, to->name); - __entry->act = act; - __entry->err = err; + __entry->act = act; + __entry->ifindex = dev->ifindex; + __entry->to_index = to_index; + __entry->err = err; ), - TP_printk("prog=%s from=%s to=%s action=%s err=%d", + TP_printk("prog=%s action=%s ifindex=%d to_index=%d err=%d", __print_hex_str(__entry->prog_tag, 8), - __get_str(name_from), __get_str(name_to), __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->ifindex, __entry->to_index, __entry->err) ); #endif /* _TRACE_XDP_H */ -- cgit v1.2.3 From 315ec3990efd71f87e556cf7827a1ac2d565d5e8 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 24 Aug 2017 12:33:23 +0200 Subject: xdp: get tracepoints xdp_exception and xdp_redirect in sync Remove the net_device string name from the xdp_exception tracepoint, like the xdp_redirect tracepoint. Align the TP_STRUCT to have common entries between these two tracepoint. Signed-off-by: Jesper Dangaard Brouer Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index cd37706c6f55..27cf2ef35f5f 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -31,22 +31,22 @@ TRACE_EVENT(xdp_exception, TP_ARGS(dev, xdp, act), TP_STRUCT__entry( - __string(name, dev->name) __array(u8, prog_tag, 8) __field(u32, act) + __field(int, ifindex) ), TP_fast_assign( BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); - __assign_str(name, dev->name); - __entry->act = act; + __entry->act = act; + __entry->ifindex = dev->ifindex; ), - TP_printk("prog=%s device=%s action=%s", + TP_printk("prog=%s action=%s ifindex=%d", __print_hex_str(__entry->prog_tag, 8), - __get_str(name), - __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->ifindex) ); TRACE_EVENT(xdp_redirect, -- cgit v1.2.3 From 22b6722bfa591ba03d6a0c5521b600d4ab2d9a27 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 23 Aug 2017 09:55:41 +0200 Subject: ipv6: Add sysctl for per namespace flow label reflection Reflecting IPv6 Flow Label at server nodes is useful in environments that employ multipath routing to load balance the requests. As "IPv6 Flow Label Reflection" standard draft [1] points out - ICMPv6 PTB error messages generated in response to a downstream packets from the server can be routed by a load balancer back to the original server without looking at transport headers, if the server applies the flow label reflection. This enables the Path MTU Discovery past the ECMP router in load-balance or anycast environments where each server node is reachable by only one path. Introduce a sysctl to enable flow label reflection per net namespace for all newly created sockets. Same could be earlier achieved only per socket by setting the IPV6_FL_F_REFLECT flag for the IPV6_FLOWLABEL_MGR socket option. [1] https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01 Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 0e50bf3ed097..2544f9760a42 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -36,6 +36,7 @@ struct netns_sysctl_ipv6 { int idgen_retries; int idgen_delay; int flowlabel_state_ranges; + int flowlabel_reflect; }; struct netns_ipv6 { -- cgit v1.2.3 From 790c6056686cc4dd5b149b330bbd5ae208d4d721 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2017 18:10:46 -0700 Subject: devlink: Fix devlink_dpipe_table_register() stub signature. One too many arguments compared to the non-stub version. Reported-by: kbuild test robot Fixes: ffd3cdccf214 ("devlink: Add support for dynamic table size") Signed-off-by: David S. Miller --- include/net/devlink.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 047a0c54f652..aaf7178127a2 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -402,8 +402,7 @@ static inline int devlink_dpipe_table_register(struct devlink *devlink, const char *table_name, struct devlink_dpipe_table_ops *table_ops, - void *priv, u64 size, - bool counter_control_extern) + void *priv, bool counter_control_extern) { return 0; } -- cgit v1.2.3 From 29825717123fb9cfb9e709327d565c2f2fa89903 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 23 Aug 2017 09:58:28 +0200 Subject: net: Extend struct flowi6 with multipath hash Allow for functions that fill out the IPv6 flow info to also pass a hash computed over the skb contents. The hash value will drive the multipath routing decisions. This is intended for special treatment of ICMPv6 errors, where we would like to make a routing decision based on the flow identifying the offending IPv6 datagram that triggered the error, rather than the flow of the ICMP error itself. Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- include/net/flow.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index f3dc61b29bb5..eb60cee30b44 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -149,6 +149,7 @@ struct flowi6 { #define fl6_ipsec_spi uli.spi #define fl6_mh_type uli.mht.type #define fl6_gre_key uli.gre_key + __u32 mp_hash; } __attribute__((__aligned__(BITS_PER_LONG/8))); struct flowidn { -- cgit v1.2.3 From 23aebdacb05dab9efdf22b9e0413491cbd5f128f Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 23 Aug 2017 09:58:29 +0200 Subject: ipv6: Compute multipath hash for ICMP errors from offending packet When forwarding or sending out an ICMPv6 error, look at the embedded packet that triggered the error and compute a flow hash over its headers. This let's us route the ICMP error together with the flow it belongs to when multipath (ECMP) routing is in use, which in turn makes Path MTU Discovery work in ECMP load-balanced or anycast setups (RFC 7690). Granted, end-hosts behind the ECMP router (aka servers) need to reflect the IPv6 Flow Label for PMTUD to work. The code is organized to be in parallel with ipv4 stack: ip_multipath_l3_keys -> ip6_multipath_l3_keys fib_multipath_hash -> rt6_multipath_hash Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 907d39a42f6b..882bc3c7ccde 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -115,6 +115,7 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int flags); +u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); -- cgit v1.2.3 From 3fd87127073292538047adf1c9c757e9cab0dd56 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 24 Aug 2017 14:38:51 -0700 Subject: strparser: initialize all callbacks commit bbb03029a899 ("strparser: Generalize strparser") added more function pointers to 'struct strp_callbacks'; however, kcm_attach() was not updated to initialize them. This could cause the ->lock() and/or ->unlock() function pointers to be set to garbage values, causing a crash in strp_work(). Fix the bug by moving the callback structs into static memory, so unspecified members are zeroed. Also constify them while we're at it. This bug was found by syzkaller, which encountered the following splat: IP: 0x55 PGD 3b1ca067 P4D 3b1ca067 PUD 3b12f067 PMD 0 Oops: 0010 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 2 PID: 1194 Comm: kworker/u8:1 Not tainted 4.13.0-rc4-next-20170811 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: kstrp strp_work task: ffff88006bb0e480 task.stack: ffff88006bb10000 RIP: 0010:0x55 RSP: 0018:ffff88006bb17540 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff88006ce4bd60 RCX: 0000000000000000 RDX: 1ffff1000d9c97bd RSI: 0000000000000000 RDI: ffff88006ce4bc48 RBP: ffff88006bb17558 R08: ffffffff81467ab2 R09: 0000000000000000 R10: ffff88006bb17438 R11: ffff88006bb17940 R12: ffff88006ce4bc48 R13: ffff88003c683018 R14: ffff88006bb17980 R15: ffff88003c683000 FS: 0000000000000000(0000) GS:ffff88006de00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000055 CR3: 000000003c145000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: process_one_work+0xbf3/0x1bc0 kernel/workqueue.c:2098 worker_thread+0x223/0x1860 kernel/workqueue.c:2233 kthread+0x35e/0x430 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Code: Bad RIP value. RIP: 0x55 RSP: ffff88006bb17540 CR2: 0000000000000055 ---[ end trace f0e4920047069cee ]--- Here is a C reproducer (requires CONFIG_BPF_SYSCALL=y and CONFIG_AF_KCM=y): #include #include #include #include #include #include #include #include static const struct bpf_insn bpf_insns[3] = { { .code = 0xb7 }, /* BPF_MOV64_IMM(0, 0) */ { .code = 0x95 }, /* BPF_EXIT_INSN() */ }; static const union bpf_attr bpf_attr = { .prog_type = 1, .insn_cnt = 2, .insns = (uintptr_t)&bpf_insns, .license = (uintptr_t)"", }; int main(void) { int bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &bpf_attr, sizeof(bpf_attr)); int inet_fd = socket(AF_INET, SOCK_STREAM, 0); int kcm_fd = socket(AF_KCM, SOCK_DGRAM, 0); ioctl(kcm_fd, SIOCKCMATTACH, &(struct kcm_attach) { .fd = inet_fd, .bpf_fd = bpf_fd }); } Fixes: bbb03029a899 ("strparser: Generalize strparser") Cc: Dmitry Vyukov Cc: Tom Herbert Signed-off-by: Eric Biggers Signed-off-by: David S. Miller --- include/net/strparser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/strparser.h b/include/net/strparser.h index 4fe966a0ad92..7dc131d62ad5 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -138,7 +138,7 @@ void strp_done(struct strparser *strp); void strp_stop(struct strparser *strp); void strp_check_rcv(struct strparser *strp); int strp_init(struct strparser *strp, struct sock *sk, - struct strp_callbacks *cb); + const struct strp_callbacks *cb); void strp_data_ready(struct strparser *strp); int strp_process(struct strparser *strp, struct sk_buff *orig_skb, unsigned int orig_offset, size_t orig_len, -- cgit v1.2.3 From fbb113f773482496d601e0bd934e680b35876016 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Thu, 29 Jun 2017 15:12:24 +0200 Subject: i40e/i40evf: rename vf_offload_flags to vf_cap_flags in struct virtchnl_vf_resource The current name of vf_offload_flags indicates that the bitmap is limited to offload related features. Make this more generic by renaming it to vf_cap_flags, which allows for other capabilities besides offloading to be added. Signed-off-by: Stefan Assmann Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index c893b9520a67..becfca2ae94e 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -223,7 +223,7 @@ struct virtchnl_vsi_resource { VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); -/* VF offload flags +/* VF capability flags * VIRTCHNL_VF_OFFLOAD_L2 flag is inclusive of base mode L2 offloads including * TX/RX Checksum offloading and TSO for non-tunnelled packets. */ @@ -251,7 +251,7 @@ struct virtchnl_vf_resource { u16 max_vectors; u16 max_mtu; - u32 vf_offload_flags; + u32 vf_cap_flags; u32 rss_key_size; u32 rss_lut_size; -- cgit v1.2.3 From 32d99d0b670299720dd0db92a974c9612c230889 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Fri, 25 Aug 2017 09:56:44 +0200 Subject: ipv6: sr: add support for ip4ip6 encapsulation This patch enables the SRv6 encapsulation mode to carry an IPv4 payload. All the infrastructure was already present, I just had to add a parameter to seg6_do_srh_encap() to specify the inner packet protocol, and perform some additional checks. Usage example: ip route add 1.2.3.4 encap seg6 mode encap segs fc00::1,fc00::2 dev eth0 Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/net/seg6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/seg6.h b/include/net/seg6.h index 5379f550f521..099bad59dc90 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -60,7 +60,8 @@ extern int seg6_local_init(void); extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); -extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); +extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, + int proto); extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); #endif -- cgit v1.2.3 From 38ee7f2d47565689f35662d488d25e7afc43477d Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Fri, 25 Aug 2017 09:56:45 +0200 Subject: ipv6: sr: add support for encapsulation of L2 frames This patch implements the L2 frame encapsulation mechanism, referred to as T.Encaps.L2 in the SRv6 specifications [1]. A new type of SRv6 tunnel mode is added (SEG6_IPTUN_MODE_L2ENCAP). It only accepts packets with an existing MAC header (i.e., it will not work for locally generated packets). The resulting packet looks like IPv6 -> SRH -> Ethernet -> original L3 payload. The next header field of the SRH is set to NEXTHDR_NONE. [1] https://tools.ietf.org/html/draft-filsfils-spring-srv6-network-programming-01 Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/uapi/linux/seg6_iptunnel.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h index b6e5a0a1afd7..b23df9f58354 100644 --- a/include/uapi/linux/seg6_iptunnel.h +++ b/include/uapi/linux/seg6_iptunnel.h @@ -33,16 +33,26 @@ struct seg6_iptunnel_encap { enum { SEG6_IPTUN_MODE_INLINE, SEG6_IPTUN_MODE_ENCAP, + SEG6_IPTUN_MODE_L2ENCAP, }; #ifdef __KERNEL__ static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) { - int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP); - - return ((tuninfo->srh->hdrlen + 1) << 3) + - (encap * sizeof(struct ipv6hdr)); + int head = 0; + + switch (tuninfo->mode) { + case SEG6_IPTUN_MODE_INLINE: + break; + case SEG6_IPTUN_MODE_ENCAP: + head = sizeof(struct ipv6hdr); + break; + case SEG6_IPTUN_MODE_L2ENCAP: + return 0; + } + + return ((tuninfo->srh->hdrlen + 1) << 3) + head; } #endif -- cgit v1.2.3 From 143976ce992fcf3bfc0f4d15d5726bb492dcf262 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 24 Aug 2017 16:51:29 -0700 Subject: net_sched: remove tc class reference counting For TC classes, their ->get() and ->put() are always paired, and the reference counting is completely useless, because: 1) For class modification and dumping paths, we already hold RTNL lock, so all of these ->get(),->change(),->put() are atomic. 2) For filter bindiing/unbinding, we use other reference counter than this one, and they should have RTNL lock too. 3) For ->qlen_notify(), it is special because it is called on ->enqueue() path, but we already hold qdisc tree lock there, and we hold this tree lock when graft or delete the class too, so it should not be gone or changed until we release the tree lock. Therefore, this patch removes ->get() and ->put(), but: 1) Adds a new ->find() to find the pointer to a class by classid, no refcnt. 2) Move the original class destroy upon the last refcnt into ->delete(), right after releasing tree lock. This is fine because the class is already removed from hash when holding the lock. For those who also use ->put() as ->unbind(), just rename them to reflect this change. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1688f0f6c7ba..d817585aa5df 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -147,8 +147,7 @@ struct Qdisc_class_ops { void (*qlen_notify)(struct Qdisc *, unsigned long); /* Class manipulation routines */ - unsigned long (*get)(struct Qdisc *, u32 classid); - void (*put)(struct Qdisc *, unsigned long); + unsigned long (*find)(struct Qdisc *, u32 classid); int (*change)(struct Qdisc *, u32, u32, struct nlattr **, unsigned long *); int (*delete)(struct Qdisc *, unsigned long); -- cgit v1.2.3 From 3cd904ecbb5d0bcf36dfca7e726bdfd6d3644334 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 24 Aug 2017 16:51:30 -0700 Subject: net_sched: kill u32_node pointer in Qdisc It is ugly to hide a u32-filter-specific pointer inside Qdisc, this breaks the TC layers: 1. Qdisc is a generic representation, should not have any specific data of any type 2. Qdisc layer is above filter layer, should only save filters in the list of struct tcf_proto. This pointer is used as the head of the chain of u32 hash tables, that is struct tc_u_hnode, because u32 filter is very special, it allows to create multiple hash tables within one qdisc and across multiple u32 filters. Instead of using this ugly pointer, we can just save it in a global hash table key'ed by (dev ifindex, qdisc handle), therefore we can still treat it as a per qdisc basis data structure conceptually. Of course, because of network namespaces, this key is not unique at all, but it is fine as we already have a pointer to Qdisc in struct tc_u_common, we can just compare the pointers when collision. And this only affects slow paths, has no impact to fast path, thanks to the pointer ->tp_c. Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d817585aa5df..c30b634c5f82 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -75,7 +75,6 @@ struct Qdisc { struct hlist_node hash; u32 handle; u32 parent; - void *u32_node; struct netdev_queue *dev_queue; -- cgit v1.2.3 From 8774370d268f2f43d8487d230e0d4fa1647759b3 Mon Sep 17 00:00:00 2001 From: Mariusz Stachura Date: Mon, 17 Jul 2017 22:09:45 -0700 Subject: i40e/i40evf: support for VF VLAN tag stripping control This patch gives VF capability to control VLAN tag stripping via ethtool. As rx-vlan-offload was fixed before, now the VF is able to change it using "ethtool --offload rxvlan on/off" settings. Signed-off-by: Mariusz Stachura Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index becfca2ae94e..2b038442c352 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -133,6 +133,8 @@ enum virtchnl_ops { VIRTCHNL_OP_CONFIG_RSS_LUT = 24, VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, VIRTCHNL_OP_SET_RSS_HENA = 26, + VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27, + VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28, }; /* This macro is used to generate a compilation error if a structure @@ -686,6 +688,9 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_SET_RSS_HENA: valid_len = sizeof(struct virtchnl_rss_hena); break; + case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: + case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: -- cgit v1.2.3 From 960632ece6949be1ab6f7a911faa4fa6e8305f4a Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Thu, 24 Aug 2017 00:08:32 +0200 Subject: netfilter: convert hook list to an array This converts the storage and layout of netfilter hook entries from a linked list to an array. After this commit, hook entries will be stored adjacent in memory. The next pointer is no longer required. The ops pointers are stored at the end of the array as they are only used in the register/unregister path and in the legacy br_netfilter code. nf_unregister_net_hooks() is slower than needed as it just calls nf_unregister_net_hook in a loop (i.e. at least n synchronize_net() calls), this will be addressed in followup patch. Test setup: - ixgbe 10gbit - netperf UDP_STREAM, 64 byte packets - 5 hooks: (raw + mangle prerouting, mangle+filter input, inet filter): empty mangle and raw prerouting, mangle and filter input hooks: 353.9 this patch: 364.2 Signed-off-by: Aaron Conole Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 2 +- include/linux/netfilter.h | 45 ++++++++++++++++++++------------------- include/linux/netfilter_ingress.h | 4 ++-- include/net/netfilter/nf_queue.h | 2 +- include/net/netns/netfilter.h | 2 +- 5 files changed, 28 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 614642eb7eb7..ca0a30127300 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1811,7 +1811,7 @@ struct net_device { #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS - struct nf_hook_entry __rcu *nf_hooks_ingress; + struct nf_hook_entries __rcu *nf_hooks_ingress; #endif unsigned char broadcast[MAX_ADDR_LEN]; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 22f081065d49..f84bca1703cd 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -72,25 +72,32 @@ struct nf_hook_ops { }; struct nf_hook_entry { - struct nf_hook_entry __rcu *next; nf_hookfn *hook; void *priv; - const struct nf_hook_ops *orig_ops; }; -static inline void -nf_hook_entry_init(struct nf_hook_entry *entry, const struct nf_hook_ops *ops) -{ - entry->next = NULL; - entry->hook = ops->hook; - entry->priv = ops->priv; - entry->orig_ops = ops; -} +struct nf_hook_entries { + u16 num_hook_entries; + /* padding */ + struct nf_hook_entry hooks[]; + + /* trailer: pointers to original orig_ops of each hook. + * + * This is not part of struct nf_hook_entry since its only + * needed in slow path (hook register/unregister). + * + * const struct nf_hook_ops *orig_ops[] + */ +}; -static inline int -nf_hook_entry_priority(const struct nf_hook_entry *entry) +static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e) { - return entry->orig_ops->priority; + unsigned int n = e->num_hook_entries; + const void *hook_end; + + hook_end = &e->hooks[n]; /* this is *past* ->hooks[]! */ + + return (struct nf_hook_ops **)hook_end; } static inline int @@ -100,12 +107,6 @@ nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, return entry->hook(entry->priv, skb, state); } -static inline const struct nf_hook_ops * -nf_hook_entry_ops(const struct nf_hook_entry *entry) -{ - return entry->orig_ops; -} - static inline void nf_hook_state_init(struct nf_hook_state *p, unsigned int hook, u_int8_t pf, @@ -168,7 +169,7 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #endif int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry *entry); + const struct nf_hook_entries *e, unsigned int i); /** * nf_hook - call a netfilter hook @@ -182,7 +183,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - struct nf_hook_entry *hook_head; + struct nf_hook_entries *hook_head; int ret = 1; #ifdef HAVE_JUMP_LABEL @@ -200,7 +201,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, nf_hook_state_init(&state, hook, pf, indev, outdev, sk, net, okfn); - ret = nf_hook_slow(skb, &state, hook_head); + ret = nf_hook_slow(skb, &state, hook_head, 0); } rcu_read_unlock(); diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 59476061de86..8d5dae1e2ff8 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -17,7 +17,7 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb) /* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { - struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); + struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; int ret; @@ -30,7 +30,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb) nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); - ret = nf_hook_slow(skb, &state, e); + ret = nf_hook_slow(skb, &state, e, 0); if (ret == 0) return -1; diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 4454719ff849..39468720fc19 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -10,9 +10,9 @@ struct nf_queue_entry { struct list_head list; struct sk_buff *skb; unsigned int id; + unsigned int hook_index; /* index in hook_entries->hook[] */ struct nf_hook_state state; - struct nf_hook_entry *hook; u16 size; /* sizeof(entry) + saved route keys */ /* extra space to store route keys */ diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index cea396b53a60..72d66c8763d0 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -16,7 +16,7 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif - struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; + struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) bool defrag_ipv4; #endif -- cgit v1.2.3 From 464bc0fd6273d518aee79fbd37211dd9bc35d863 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 28 Aug 2017 07:10:04 -0700 Subject: bpf: convert sockmap field attach_bpf_fd2 to type In the initial sockmap API we provided strparser and verdict programs using a single attach command by extending the attach API with a the attach_bpf_fd2 field. However, if we add other programs in the future we will be adding a field for every new possible type, attach_bpf_fd(3,4,..). This seems a bit clumsy for an API. So lets push the programs using two new type fields. BPF_SK_SKB_STREAM_PARSER BPF_SK_SKB_STREAM_VERDICT This has the advantage of having a readable name and can easily be extended in the future. Updates to samples and sockmap included here also generalize tests slightly to support upcoming patch for multiple map support. Signed-off-by: John Fastabend Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support") Suggested-by: Alexei Starovoitov Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 10 ++++++++-- include/uapi/linux/bpf.h | 9 +++------ 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 830f472d8df5..c2cb1b5c094e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -39,8 +39,6 @@ struct bpf_map_ops { void (*map_fd_put_ptr)(void *ptr); u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); - int (*map_attach)(struct bpf_map *map, - struct bpf_prog *p1, struct bpf_prog *p2); }; struct bpf_map { @@ -387,11 +385,19 @@ static inline void __dev_map_flush(struct bpf_map *map) #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); +int sock_map_attach_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); #else static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) { return NULL; } + +static inline int sock_map_attach_prog(struct bpf_map *map, + struct bpf_prog *prog, + u32 type) +{ + return -EOPNOTSUPP; +} #endif /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 843818dff96d..97227be3690c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -136,7 +136,8 @@ enum bpf_attach_type { BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, BPF_CGROUP_SOCK_OPS, - BPF_CGROUP_SMAP_INGRESS, + BPF_SK_SKB_STREAM_PARSER, + BPF_SK_SKB_STREAM_VERDICT, __MAX_BPF_ATTACH_TYPE }; @@ -224,7 +225,6 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; - __u32 attach_bpf_fd2; }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ @@ -580,14 +580,11 @@ union bpf_attr { * @flags: reserved for future use * Return: SK_REDIRECT * - * int bpf_sock_map_update(skops, map, key, flags, map_flags) + * int bpf_sock_map_update(skops, map, key, flags) * @skops: pointer to bpf_sock_ops * @map: pointer to sockmap to update * @key: key to insert/update sock in map * @flags: same flags as map update elem - * @map_flags: sock map specific flags - * bit 1: Enable strparser - * other bits: reserved */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ -- cgit v1.2.3 From 2f857d04601a1bb56958b95a9f180bce0e91e5e6 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 28 Aug 2017 07:10:25 -0700 Subject: bpf: sockmap, remove STRPARSER map_flags and add multi-map support The addition of map_flags BPF_SOCKMAP_STRPARSER flags was to handle a specific use case where we want to have BPF parse program disabled on an entry in a sockmap. However, Alexei found the API a bit cumbersome and I agreed. Lets remove the STRPARSER flag and support the use case by allowing socks to be in multiple maps. This allows users to create two maps one with programs attached and one without. When socks are added to maps they now inherit any programs attached to the map. This is a nice generalization and IMO improves the API. The API rules are less ambiguous and do not need a flag: - When a sock is added to a sockmap we have two cases, i. The sock map does not have any attached programs so we can add sock to map without inheriting bpf programs. The sock may exist in 0 or more other maps. ii. The sock map has an attached BPF program. To avoid duplicate bpf programs we only add the sock entry if it does not have an existing strparser/verdict attached, returning -EBUSY if a program is already attached. Otherwise attach the program and inherit strparser/verdict programs from the sock map. This allows for socks to be in a multiple maps for redirects and inherit a BPF program from a single map. Also this patch simplifies the logic around BPF_{EXIST|NOEXIST|ANY} flags. In the original patch I tried to be extra clever and only update map entries when necessary. Now I've decided the complexity is not worth it. If users constantly update an entry with the same sock for no reason (i.e. update an entry without actually changing any parameters on map or sock) we still do an alloc/release. Using this and allowing multiple entries of a sock to exist in a map the logic becomes much simpler. Note: Now that multiple maps are supported the "maps" pointer called when a socket is closed becomes a list of maps to remove the sock from. To keep the map up to date when a sock is added to the sockmap we must add the map/elem in the list. Likewise when it is removed we must remove it from the list. This results in searching the per psock list on delete operation. On TCP_CLOSE events we walk the list and remove the psock from all map/entry locations. I don't see any perf implications in this because at most I have a psock in two maps. If a psock were to be in many maps its possibly this might be noticeable on delete but I can't think of a reason to dup a psock in many maps. The sk_callback_lock is used to protect read/writes to the list. This was convenient because in all locations we were taking the lock anyways just after working on the list. Also the lock is per sock so in normal cases we shouldn't see any contention. Suggested-by: Alexei Starovoitov Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support") Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 97227be3690c..08c206a863e1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -143,9 +143,6 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */ -#define BPF_SOCKMAP_STRPARSER (1U << 0) - /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command * to the given target_fd cgroup the descendent cgroup will be able to * override effective bpf program that was inherited from this cgroup -- cgit v1.2.3 From 1a66a836da630cd70f3639208da549b549ce576b Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 25 Aug 2017 09:21:28 -0700 Subject: gre: add collect_md mode to ERSPAN tunnel Similar to gre, vxlan, geneve, ipip tunnels, allow ERSPAN tunnels to operate in 'collect metadata' mode. bpf_skb_[gs]et_tunnel_key() helpers can make use of it right away. OVS can use it as well in the future. Signed-off-by: William Tu Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 625c29329372..992652856fe8 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -154,8 +154,10 @@ struct ip_tunnel { #define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800) #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) #define TUNNEL_NOCACHE __cpu_to_be16(0x2000) +#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000) -#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT) +#define TUNNEL_OPTIONS_PRESENT \ + (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT) struct tnl_ptk_info { __be16 flags; -- cgit v1.2.3 From 5bf916ee0ab638c86edeaf4caeeade9ddf44d95d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 27 Aug 2017 17:03:33 +0200 Subject: irda: move include/net/irda into staging subdirectory And finally, move the irda include files into drivers/staging/irda/include/net/irda. Yes, it's a long path, but it makes it easy for us to just add a Makefile directory path addition and all of the net and drivers code "just works". Signed-off-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- include/net/irda/af_irda.h | 87 ---------- include/net/irda/crc.h | 29 ---- include/net/irda/discovery.h | 95 ----------- include/net/irda/ircomm_core.h | 106 ------------ include/net/irda/ircomm_event.h | 83 ---------- include/net/irda/ircomm_lmp.h | 36 ---- include/net/irda/ircomm_param.h | 147 ----------------- include/net/irda/ircomm_ttp.h | 37 ----- include/net/irda/ircomm_tty.h | 121 -------------- include/net/irda/ircomm_tty_attach.h | 92 ----------- include/net/irda/irda.h | 115 ------------- include/net/irda/irda_device.h | 285 -------------------------------- include/net/irda/iriap.h | 108 ------------ include/net/irda/iriap_event.h | 85 ---------- include/net/irda/irias_object.h | 108 ------------ include/net/irda/irlan_client.h | 42 ----- include/net/irda/irlan_common.h | 230 -------------------------- include/net/irda/irlan_eth.h | 32 ---- include/net/irda/irlan_event.h | 81 --------- include/net/irda/irlan_filter.h | 35 ---- include/net/irda/irlan_provider.h | 52 ------ include/net/irda/irlap.h | 311 ----------------------------------- include/net/irda/irlap_event.h | 129 --------------- include/net/irda/irlap_frame.h | 167 ------------------- include/net/irda/irlmp.h | 295 --------------------------------- include/net/irda/irlmp_event.h | 98 ----------- include/net/irda/irlmp_frame.h | 62 ------- include/net/irda/irmod.h | 109 ------------ include/net/irda/irqueue.h | 96 ----------- include/net/irda/irttp.h | 210 ----------------------- include/net/irda/parameters.h | 100 ----------- include/net/irda/qos.h | 101 ------------ include/net/irda/timer.h | 105 ------------ include/net/irda/wrapper.h | 58 ------- 34 files changed, 3847 deletions(-) delete mode 100644 include/net/irda/af_irda.h delete mode 100644 include/net/irda/crc.h delete mode 100644 include/net/irda/discovery.h delete mode 100644 include/net/irda/ircomm_core.h delete mode 100644 include/net/irda/ircomm_event.h delete mode 100644 include/net/irda/ircomm_lmp.h delete mode 100644 include/net/irda/ircomm_param.h delete mode 100644 include/net/irda/ircomm_ttp.h delete mode 100644 include/net/irda/ircomm_tty.h delete mode 100644 include/net/irda/ircomm_tty_attach.h delete mode 100644 include/net/irda/irda.h delete mode 100644 include/net/irda/irda_device.h delete mode 100644 include/net/irda/iriap.h delete mode 100644 include/net/irda/iriap_event.h delete mode 100644 include/net/irda/irias_object.h delete mode 100644 include/net/irda/irlan_client.h delete mode 100644 include/net/irda/irlan_common.h delete mode 100644 include/net/irda/irlan_eth.h delete mode 100644 include/net/irda/irlan_event.h delete mode 100644 include/net/irda/irlan_filter.h delete mode 100644 include/net/irda/irlan_provider.h delete mode 100644 include/net/irda/irlap.h delete mode 100644 include/net/irda/irlap_event.h delete mode 100644 include/net/irda/irlap_frame.h delete mode 100644 include/net/irda/irlmp.h delete mode 100644 include/net/irda/irlmp_event.h delete mode 100644 include/net/irda/irlmp_frame.h delete mode 100644 include/net/irda/irmod.h delete mode 100644 include/net/irda/irqueue.h delete mode 100644 include/net/irda/irttp.h delete mode 100644 include/net/irda/parameters.h delete mode 100644 include/net/irda/qos.h delete mode 100644 include/net/irda/timer.h delete mode 100644 include/net/irda/wrapper.h (limited to 'include') diff --git a/include/net/irda/af_irda.h b/include/net/irda/af_irda.h deleted file mode 100644 index 0df574931522..000000000000 --- a/include/net/irda/af_irda.h +++ /dev/null @@ -1,87 +0,0 @@ -/********************************************************************* - * - * Filename: af_irda.h - * Version: 1.0 - * Description: IrDA sockets declarations - * Status: Stable - * Author: Dag Brattli - * Created at: Tue Dec 9 21:13:12 1997 - * Modified at: Fri Jan 28 13:16:32 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef AF_IRDA_H -#define AF_IRDA_H - -#include -#include -#include /* struct iriap_cb */ -#include /* struct ias_value */ -#include /* struct lsap_cb */ -#include /* struct tsap_cb */ -#include /* struct discovery_t */ -#include - -/* IrDA Socket */ -struct irda_sock { - /* struct sock has to be the first member of irda_sock */ - struct sock sk; - __u32 saddr; /* my local address */ - __u32 daddr; /* peer address */ - - struct lsap_cb *lsap; /* LSAP used by Ultra */ - __u8 pid; /* Protocol IP (PID) used by Ultra */ - - struct tsap_cb *tsap; /* TSAP used by this connection */ - __u8 dtsap_sel; /* remote TSAP address */ - __u8 stsap_sel; /* local TSAP address */ - - __u32 max_sdu_size_rx; - __u32 max_sdu_size_tx; - __u32 max_data_size; - __u8 max_header_size; - struct qos_info qos_tx; - - __u16_host_order mask; /* Hint bits mask */ - __u16_host_order hints; /* Hint bits */ - - void *ckey; /* IrLMP client handle */ - void *skey; /* IrLMP service handle */ - - struct ias_object *ias_obj; /* Our service name + lsap in IAS */ - struct iriap_cb *iriap; /* Used to query remote IAS */ - struct ias_value *ias_result; /* Result of remote IAS query */ - - hashbin_t *cachelog; /* Result of discovery query */ - __u32 cachedaddr; /* Result of selective discovery query */ - - int nslots; /* Number of slots to use for discovery */ - - int errno; /* status of the IAS query */ - - wait_queue_head_t query_wait; /* Wait for the answer to a query */ - struct timer_list watchdog; /* Timeout for discovery */ - - LOCAL_FLOW tx_flow; - LOCAL_FLOW rx_flow; -}; - -static inline struct irda_sock *irda_sk(struct sock *sk) -{ - return (struct irda_sock *)sk; -} - -#endif /* AF_IRDA_H */ diff --git a/include/net/irda/crc.h b/include/net/irda/crc.h deleted file mode 100644 index f202296df9bb..000000000000 --- a/include/net/irda/crc.h +++ /dev/null @@ -1,29 +0,0 @@ -/********************************************************************* - * - * Filename: crc.h - * Version: - * Description: CRC routines - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Sun May 2 20:25:23 1999 - * Modified by: Dag Brattli - * - ********************************************************************/ - -#ifndef IRDA_CRC_H -#define IRDA_CRC_H - -#include -#include - -#define INIT_FCS 0xffff /* Initial FCS value */ -#define GOOD_FCS 0xf0b8 /* Good final FCS value */ - -/* Recompute the FCS with one more character appended. */ -#define irda_fcs(fcs, c) crc_ccitt_byte(fcs, c) - -/* Recompute the FCS with len bytes appended. */ -#define irda_calc_crc16(fcs, buf, len) crc_ccitt(fcs, buf, len) - -#endif diff --git a/include/net/irda/discovery.h b/include/net/irda/discovery.h deleted file mode 100644 index 63ae32530567..000000000000 --- a/include/net/irda/discovery.h +++ /dev/null @@ -1,95 +0,0 @@ -/********************************************************************* - * - * Filename: discovery.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Apr 6 16:53:53 1999 - * Modified at: Tue Oct 5 10:05:10 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef DISCOVERY_H -#define DISCOVERY_H - -#include - -#include -#include /* irda_queue_t */ -#include /* LAP_REASON */ - -#define DISCOVERY_EXPIRE_TIMEOUT (2*sysctl_discovery_timeout*HZ) -#define DISCOVERY_DEFAULT_SLOTS 0 - -/* - * This type is used by the protocols that transmit 16 bits words in - * little endian format. A little endian machine stores MSB of word in - * byte[1] and LSB in byte[0]. A big endian machine stores MSB in byte[0] - * and LSB in byte[1]. - * - * This structure is used in the code for things that are endian neutral - * but that fit in a word so that we can manipulate them efficiently. - * By endian neutral, I mean things that are really an array of bytes, - * and always used as such, for example the hint bits. Jean II - */ -typedef union { - __u16 word; - __u8 byte[2]; -} __u16_host_order; - -/* Types of discovery */ -typedef enum { - DISCOVERY_LOG, /* What's in our discovery log */ - DISCOVERY_ACTIVE, /* Doing our own discovery on the medium */ - DISCOVERY_PASSIVE, /* Peer doing discovery on the medium */ - EXPIRY_TIMEOUT, /* Entry expired due to timeout */ -} DISCOVERY_MODE; - -#define NICKNAME_MAX_LEN 21 - -/* Basic discovery information about a peer */ -typedef struct irda_device_info discinfo_t; /* linux/irda.h */ - -/* - * The DISCOVERY structure is used for both discovery requests and responses - */ -typedef struct discovery_t { - irda_queue_t q; /* Must be first! */ - - discinfo_t data; /* Basic discovery information */ - int name_len; /* Length of nickname */ - - LAP_REASON condition; /* More info about the discovery */ - int gen_addr_bit; /* Need to generate a new device - * address? */ - int nslots; /* Number of slots to use when - * discovering */ - unsigned long timestamp; /* Last time discovered */ - unsigned long firststamp; /* First time discovered */ -} discovery_t; - -void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *discovery); -void irlmp_add_discovery_log(hashbin_t *cachelog, hashbin_t *log); -void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force); -struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn, - __u16 mask, int old_entries); - -#endif diff --git a/include/net/irda/ircomm_core.h b/include/net/irda/ircomm_core.h deleted file mode 100644 index 2a580ce9edad..000000000000 --- a/include/net/irda/ircomm_core.h +++ /dev/null @@ -1,106 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_core.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Wed Jun 9 08:58:43 1999 - * Modified at: Mon Dec 13 11:52:29 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_CORE_H -#define IRCOMM_CORE_H - -#include -#include -#include - -#define IRCOMM_MAGIC 0x98347298 -#define IRCOMM_HEADER_SIZE 1 - -struct ircomm_cb; /* Forward decl. */ - -/* - * A small call-table, so we don't have to check the service-type whenever - * we want to do something - */ -typedef struct { - int (*data_request)(struct ircomm_cb *, struct sk_buff *, int clen); - int (*connect_request)(struct ircomm_cb *, struct sk_buff *, - struct ircomm_info *); - int (*connect_response)(struct ircomm_cb *, struct sk_buff *); - int (*disconnect_request)(struct ircomm_cb *, struct sk_buff *, - struct ircomm_info *); -} call_t; - -struct ircomm_cb { - irda_queue_t queue; - magic_t magic; - - notify_t notify; - call_t issue; - - int state; - int line; /* Which TTY line we are using */ - - struct tsap_cb *tsap; - struct lsap_cb *lsap; - - __u8 dlsap_sel; /* Destination LSAP/TSAP selector */ - __u8 slsap_sel; /* Source LSAP/TSAP selector */ - - __u32 saddr; /* Source device address (link we are using) */ - __u32 daddr; /* Destination device address */ - - int max_header_size; /* Header space we must reserve for each frame */ - int max_data_size; /* The amount of data we can fill in each frame */ - - LOCAL_FLOW flow_status; /* Used by ircomm_lmp */ - int pkt_count; /* Number of frames we have sent to IrLAP */ - - __u8 service_type; -}; - -extern hashbin_t *ircomm; - -struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line); -int ircomm_close(struct ircomm_cb *self); - -int ircomm_data_request(struct ircomm_cb *self, struct sk_buff *skb); -void ircomm_data_indication(struct ircomm_cb *self, struct sk_buff *skb); -void ircomm_process_data(struct ircomm_cb *self, struct sk_buff *skb); -int ircomm_control_request(struct ircomm_cb *self, struct sk_buff *skb); -int ircomm_connect_request(struct ircomm_cb *self, __u8 dlsap_sel, - __u32 saddr, __u32 daddr, struct sk_buff *skb, - __u8 service_type); -void ircomm_connect_indication(struct ircomm_cb *self, struct sk_buff *skb, - struct ircomm_info *info); -void ircomm_connect_confirm(struct ircomm_cb *self, struct sk_buff *skb, - struct ircomm_info *info); -int ircomm_connect_response(struct ircomm_cb *self, struct sk_buff *userdata); -int ircomm_disconnect_request(struct ircomm_cb *self, struct sk_buff *userdata); -void ircomm_disconnect_indication(struct ircomm_cb *self, struct sk_buff *skb, - struct ircomm_info *info); -void ircomm_flow_request(struct ircomm_cb *self, LOCAL_FLOW flow); - -#define ircomm_is_connected(self) (self->state == IRCOMM_CONN) - -#endif diff --git a/include/net/irda/ircomm_event.h b/include/net/irda/ircomm_event.h deleted file mode 100644 index 5bbc32998d57..000000000000 --- a/include/net/irda/ircomm_event.h +++ /dev/null @@ -1,83 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_event.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Jun 6 23:51:13 1999 - * Modified at: Thu Jun 10 08:36:25 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_EVENT_H -#define IRCOMM_EVENT_H - -#include - -typedef enum { - IRCOMM_IDLE, - IRCOMM_WAITI, - IRCOMM_WAITR, - IRCOMM_CONN, -} IRCOMM_STATE; - -/* IrCOMM Events */ -typedef enum { - IRCOMM_CONNECT_REQUEST, - IRCOMM_CONNECT_RESPONSE, - IRCOMM_TTP_CONNECT_INDICATION, - IRCOMM_LMP_CONNECT_INDICATION, - IRCOMM_TTP_CONNECT_CONFIRM, - IRCOMM_LMP_CONNECT_CONFIRM, - - IRCOMM_LMP_DISCONNECT_INDICATION, - IRCOMM_TTP_DISCONNECT_INDICATION, - IRCOMM_DISCONNECT_REQUEST, - - IRCOMM_TTP_DATA_INDICATION, - IRCOMM_LMP_DATA_INDICATION, - IRCOMM_DATA_REQUEST, - IRCOMM_CONTROL_REQUEST, - IRCOMM_CONTROL_INDICATION, -} IRCOMM_EVENT; - -/* - * Used for passing information through the state-machine - */ -struct ircomm_info { - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - __u8 dlsap_sel; - LM_REASON reason; /* Reason for disconnect */ - __u32 max_data_size; - __u32 max_header_size; - - struct qos_info *qos; -}; - -extern const char *const ircomm_state[]; - -struct ircomm_cb; /* Forward decl. */ - -int ircomm_do_event(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info); -void ircomm_next_state(struct ircomm_cb *self, IRCOMM_STATE state); - -#endif diff --git a/include/net/irda/ircomm_lmp.h b/include/net/irda/ircomm_lmp.h deleted file mode 100644 index 5042a5021a04..000000000000 --- a/include/net/irda/ircomm_lmp.h +++ /dev/null @@ -1,36 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_lmp.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Wed Jun 9 10:06:07 1999 - * Modified at: Fri Aug 13 07:32:32 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_LMP_H -#define IRCOMM_LMP_H - -#include - -int ircomm_open_lsap(struct ircomm_cb *self); - -#endif diff --git a/include/net/irda/ircomm_param.h b/include/net/irda/ircomm_param.h deleted file mode 100644 index 1f67432321c4..000000000000 --- a/include/net/irda/ircomm_param.h +++ /dev/null @@ -1,147 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_param.h - * Version: 1.0 - * Description: Parameter handling for the IrCOMM protocol - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Jun 7 08:47:28 1999 - * Modified at: Wed Aug 25 13:46:33 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_PARAMS_H -#define IRCOMM_PARAMS_H - -#include - -/* Parameters common to all service types */ -#define IRCOMM_SERVICE_TYPE 0x00 -#define IRCOMM_PORT_TYPE 0x01 /* Only used in LM-IAS */ -#define IRCOMM_PORT_NAME 0x02 /* Only used in LM-IAS */ - -/* Parameters for both 3 wire and 9 wire */ -#define IRCOMM_DATA_RATE 0x10 -#define IRCOMM_DATA_FORMAT 0x11 -#define IRCOMM_FLOW_CONTROL 0x12 -#define IRCOMM_XON_XOFF 0x13 -#define IRCOMM_ENQ_ACK 0x14 -#define IRCOMM_LINE_STATUS 0x15 -#define IRCOMM_BREAK 0x16 - -/* Parameters for 9 wire */ -#define IRCOMM_DTE 0x20 -#define IRCOMM_DCE 0x21 -#define IRCOMM_POLL 0x22 - -/* Service type (details) */ -#define IRCOMM_3_WIRE_RAW 0x01 -#define IRCOMM_3_WIRE 0x02 -#define IRCOMM_9_WIRE 0x04 -#define IRCOMM_CENTRONICS 0x08 - -/* Port type (details) */ -#define IRCOMM_SERIAL 0x00 -#define IRCOMM_PARALLEL 0x01 - -/* Data format (details) */ -#define IRCOMM_WSIZE_5 0x00 -#define IRCOMM_WSIZE_6 0x01 -#define IRCOMM_WSIZE_7 0x02 -#define IRCOMM_WSIZE_8 0x03 - -#define IRCOMM_1_STOP_BIT 0x00 -#define IRCOMM_2_STOP_BIT 0x04 /* 1.5 if char len 5 */ - -#define IRCOMM_PARITY_DISABLE 0x00 -#define IRCOMM_PARITY_ENABLE 0x08 - -#define IRCOMM_PARITY_ODD 0x00 -#define IRCOMM_PARITY_EVEN 0x10 -#define IRCOMM_PARITY_MARK 0x20 -#define IRCOMM_PARITY_SPACE 0x30 - -/* Flow control */ -#define IRCOMM_XON_XOFF_IN 0x01 -#define IRCOMM_XON_XOFF_OUT 0x02 -#define IRCOMM_RTS_CTS_IN 0x04 -#define IRCOMM_RTS_CTS_OUT 0x08 -#define IRCOMM_DSR_DTR_IN 0x10 -#define IRCOMM_DSR_DTR_OUT 0x20 -#define IRCOMM_ENQ_ACK_IN 0x40 -#define IRCOMM_ENQ_ACK_OUT 0x80 - -/* Line status */ -#define IRCOMM_OVERRUN_ERROR 0x02 -#define IRCOMM_PARITY_ERROR 0x04 -#define IRCOMM_FRAMING_ERROR 0x08 - -/* DTE (Data terminal equipment) line settings */ -#define IRCOMM_DELTA_DTR 0x01 -#define IRCOMM_DELTA_RTS 0x02 -#define IRCOMM_DTR 0x04 -#define IRCOMM_RTS 0x08 - -/* DCE (Data communications equipment) line settings */ -#define IRCOMM_DELTA_CTS 0x01 /* Clear to send has changed */ -#define IRCOMM_DELTA_DSR 0x02 /* Data set ready has changed */ -#define IRCOMM_DELTA_RI 0x04 /* Ring indicator has changed */ -#define IRCOMM_DELTA_CD 0x08 /* Carrier detect has changed */ -#define IRCOMM_CTS 0x10 /* Clear to send is high */ -#define IRCOMM_DSR 0x20 /* Data set ready is high */ -#define IRCOMM_RI 0x40 /* Ring indicator is high */ -#define IRCOMM_CD 0x80 /* Carrier detect is high */ -#define IRCOMM_DCE_DELTA_ANY 0x0f - -/* - * Parameter state - */ -struct ircomm_params { - /* General control params */ - __u8 service_type; - __u8 port_type; - char port_name[32]; - - /* Control params for 3- and 9-wire service type */ - __u32 data_rate; /* Data rate in bps */ - __u8 data_format; - __u8 flow_control; - char xonxoff[2]; - char enqack[2]; - __u8 line_status; - __u8 _break; - - __u8 null_modem; - - /* Control params for 9-wire service type */ - __u8 dte; - __u8 dce; - __u8 poll; - - /* Control params for Centronics service type */ -}; - -struct ircomm_tty_cb; /* Forward decl. */ - -int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush); - -extern pi_param_info_t ircomm_param_info; - -#endif /* IRCOMM_PARAMS_H */ - diff --git a/include/net/irda/ircomm_ttp.h b/include/net/irda/ircomm_ttp.h deleted file mode 100644 index c5627288bca3..000000000000 --- a/include/net/irda/ircomm_ttp.h +++ /dev/null @@ -1,37 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_ttp.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Wed Jun 9 10:06:07 1999 - * Modified at: Fri Aug 13 07:32:22 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_TTP_H -#define IRCOMM_TTP_H - -#include - -int ircomm_open_tsap(struct ircomm_cb *self); - -#endif - diff --git a/include/net/irda/ircomm_tty.h b/include/net/irda/ircomm_tty.h deleted file mode 100644 index 8d4f588974bc..000000000000 --- a/include/net/irda/ircomm_tty.h +++ /dev/null @@ -1,121 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_tty.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Jun 6 23:24:22 1999 - * Modified at: Fri Jan 28 13:16:57 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_TTY_H -#define IRCOMM_TTY_H - -#include -#include -#include -#include /* struct tty_struct */ - -#include -#include -#include - -#define IRCOMM_TTY_PORTS 32 -#define IRCOMM_TTY_MAGIC 0x3432 -#define IRCOMM_TTY_MAJOR 161 -#define IRCOMM_TTY_MINOR 0 - -/* This is used as an initial value to max_header_size before the proper - * value is filled in (5 for ttp, 4 for lmp). This allow us to detect - * the state of the underlying connection. - Jean II */ -#define IRCOMM_TTY_HDR_UNINITIALISED 16 -/* Same for payload size. See qos.c for the smallest max data size */ -#define IRCOMM_TTY_DATA_UNINITIALISED (64 - IRCOMM_TTY_HDR_UNINITIALISED) - -/* - * IrCOMM TTY driver state - */ -struct ircomm_tty_cb { - irda_queue_t queue; /* Must be first */ - struct tty_port port; - magic_t magic; - - int state; /* Connect state */ - - struct ircomm_cb *ircomm; /* IrCOMM layer instance */ - - struct sk_buff *tx_skb; /* Transmit buffer */ - struct sk_buff *ctrl_skb; /* Control data buffer */ - - /* Parameters */ - struct ircomm_params settings; - - __u8 service_type; /* The service that we support */ - int client; /* True if we are a client */ - LOCAL_FLOW flow; /* IrTTP flow status */ - - int line; - - __u8 dlsap_sel; - __u8 slsap_sel; - - __u32 saddr; - __u32 daddr; - - __u32 max_data_size; /* Max data we can transmit in one packet */ - __u32 max_header_size; /* The amount of header space we must reserve */ - __u32 tx_data_size; /* Max data size of current tx_skb */ - - struct iriap_cb *iriap; /* Instance used for querying remote IAS */ - struct ias_object* obj; - void *skey; - void *ckey; - - struct timer_list watchdog_timer; - struct work_struct tqueue; - - /* Protect concurent access to : - * o self->ctrl_skb - * o self->tx_skb - * Maybe other things may gain to be protected as well... - * Jean II */ - spinlock_t spinlock; -}; - -void ircomm_tty_start(struct tty_struct *tty); -void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self); - -int ircomm_tty_tiocmget(struct tty_struct *tty); -int ircomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, - unsigned int clear); -int ircomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, - unsigned long arg); -void ircomm_tty_set_termios(struct tty_struct *tty, - struct ktermios *old_termios); - -#endif - - - - - - - diff --git a/include/net/irda/ircomm_tty_attach.h b/include/net/irda/ircomm_tty_attach.h deleted file mode 100644 index 20dcbdf258cf..000000000000 --- a/include/net/irda/ircomm_tty_attach.h +++ /dev/null @@ -1,92 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_tty_attach.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Wed Jun 9 15:55:18 1999 - * Modified at: Fri Dec 10 21:04:55 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_TTY_ATTACH_H -#define IRCOMM_TTY_ATTACH_H - -#include - -typedef enum { - IRCOMM_TTY_IDLE, - IRCOMM_TTY_SEARCH, - IRCOMM_TTY_QUERY_PARAMETERS, - IRCOMM_TTY_QUERY_LSAP_SEL, - IRCOMM_TTY_SETUP, - IRCOMM_TTY_READY, -} IRCOMM_TTY_STATE; - -/* IrCOMM TTY Events */ -typedef enum { - IRCOMM_TTY_ATTACH_CABLE, - IRCOMM_TTY_DETACH_CABLE, - IRCOMM_TTY_DATA_REQUEST, - IRCOMM_TTY_DATA_INDICATION, - IRCOMM_TTY_DISCOVERY_REQUEST, - IRCOMM_TTY_DISCOVERY_INDICATION, - IRCOMM_TTY_CONNECT_CONFIRM, - IRCOMM_TTY_CONNECT_INDICATION, - IRCOMM_TTY_DISCONNECT_REQUEST, - IRCOMM_TTY_DISCONNECT_INDICATION, - IRCOMM_TTY_WD_TIMER_EXPIRED, - IRCOMM_TTY_GOT_PARAMETERS, - IRCOMM_TTY_GOT_LSAPSEL, -} IRCOMM_TTY_EVENT; - -/* Used for passing information through the state-machine */ -struct ircomm_tty_info { - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - __u8 dlsap_sel; -}; - -extern const char *const ircomm_state[]; -extern const char *const ircomm_tty_state[]; - -int ircomm_tty_do_event(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event, - struct sk_buff *skb, struct ircomm_tty_info *info); - - -int ircomm_tty_attach_cable(struct ircomm_tty_cb *self); -void ircomm_tty_detach_cable(struct ircomm_tty_cb *self); -void ircomm_tty_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -void ircomm_tty_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb); -void ircomm_tty_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -int ircomm_tty_send_initial_parameters(struct ircomm_tty_cb *self); -void ircomm_tty_link_established(struct ircomm_tty_cb *self); - -#endif /* IRCOMM_TTY_ATTACH_H */ diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h deleted file mode 100644 index 92c8fb575213..000000000000 --- a/include/net/irda/irda.h +++ /dev/null @@ -1,115 +0,0 @@ -/********************************************************************* - * - * Filename: irda.h - * Version: 1.0 - * Description: IrDA common include file for kernel internal use - * Status: Stable - * Author: Dag Brattli - * Created at: Tue Dec 9 21:13:12 1997 - * Modified at: Fri Jan 28 13:16:32 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef NET_IRDA_H -#define NET_IRDA_H - -#include /* struct sk_buff */ -#include -#include /* sa_family_t in */ -#include - -typedef __u32 magic_t; - -#ifndef TRUE -#define TRUE 1 -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -/* Hack to do small backoff when setting media busy in IrLAP */ -#ifndef SMALL -#define SMALL 5 -#endif - -#ifndef IRDA_MIN /* Lets not mix this MIN with other header files */ -#define IRDA_MIN(a, b) (((a) < (b)) ? (a) : (b)) -#endif - -#ifndef IRDA_ALIGN -# define IRDA_ALIGN __attribute__((aligned)) -#endif - -#ifdef CONFIG_IRDA_DEBUG -#define IRDA_ASSERT(expr, func) \ -do { if(!(expr)) { \ - printk( "Assertion failed! %s:%s:%d %s\n", \ - __FILE__,__func__,__LINE__,(#expr) ); \ - func } } while (0) -#define IRDA_ASSERT_LABEL(label) label -#else -#define IRDA_ASSERT(expr, func) do { (void)(expr); } while (0) -#define IRDA_ASSERT_LABEL(label) -#endif /* CONFIG_IRDA_DEBUG */ - -/* - * Magic numbers used by Linux-IrDA. Random numbers which must be unique to - * give the best protection - */ - -#define IRTTY_MAGIC 0x2357 -#define LAP_MAGIC 0x1357 -#define LMP_MAGIC 0x4321 -#define LMP_LSAP_MAGIC 0x69333 -#define LMP_LAP_MAGIC 0x3432 -#define IRDA_DEVICE_MAGIC 0x63454 -#define IAS_MAGIC 0x007 -#define TTP_MAGIC 0x241169 -#define TTP_TSAP_MAGIC 0x4345 -#define IROBEX_MAGIC 0x341324 -#define HB_MAGIC 0x64534 -#define IRLAN_MAGIC 0x754 -#define IAS_OBJECT_MAGIC 0x34234 -#define IAS_ATTRIB_MAGIC 0x45232 -#define IRDA_TASK_MAGIC 0x38423 - -#define IAS_DEVICE_ID 0x0000 /* Defined by IrDA, IrLMP section 4.1 (page 68) */ -#define IAS_PNP_ID 0xd342 -#define IAS_OBEX_ID 0x34323 -#define IAS_IRLAN_ID 0x34234 -#define IAS_IRCOMM_ID 0x2343 -#define IAS_IRLPT_ID 0x9876 - -struct net_device; -struct packet_type; - -void irda_proc_register(void); -void irda_proc_unregister(void); - -int irda_sysctl_register(void); -void irda_sysctl_unregister(void); - -int irsock_init(void); -void irsock_cleanup(void); - -int irda_nl_register(void); -void irda_nl_unregister(void); - -int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype, struct net_device *orig_dev); - -#endif /* NET_IRDA_H */ diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h deleted file mode 100644 index 664bf8178412..000000000000 --- a/include/net/irda/irda_device.h +++ /dev/null @@ -1,285 +0,0 @@ -/********************************************************************* - * - * Filename: irda_device.h - * Version: 0.9 - * Description: Contains various declarations used by the drivers - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Apr 14 12:41:42 1998 - * Modified at: Mon Mar 20 09:08:57 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 1998 Thomas Davis, , - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -/* - * This header contains all the IrDA definitions a driver really - * needs, and therefore the driver should not need to include - * any other IrDA headers - Jean II - */ - -#ifndef IRDA_DEVICE_H -#define IRDA_DEVICE_H - -#include -#include -#include -#include /* struct sk_buff */ -#include -#include - -#include -#include -#include /* struct qos_info */ -#include /* irda_queue_t */ - -/* A few forward declarations (to make compiler happy) */ -struct irlap_cb; - -/* Some non-standard interface flags (should not conflict with any in if.h) */ -#define IFF_SIR 0x0001 /* Supports SIR speeds */ -#define IFF_MIR 0x0002 /* Supports MIR speeds */ -#define IFF_FIR 0x0004 /* Supports FIR speeds */ -#define IFF_VFIR 0x0008 /* Supports VFIR speeds */ -#define IFF_PIO 0x0010 /* Supports PIO transfer of data */ -#define IFF_DMA 0x0020 /* Supports DMA transfer of data */ -#define IFF_SHM 0x0040 /* Supports shared memory data transfers */ -#define IFF_DONGLE 0x0080 /* Interface has a dongle attached */ -#define IFF_AIR 0x0100 /* Supports Advanced IR (AIR) standards */ - -#define IO_XMIT 0x01 -#define IO_RECV 0x02 - -typedef enum { - IRDA_IRLAP, /* IrDA mode, and deliver to IrLAP */ - IRDA_RAW, /* IrDA mode */ - SHARP_ASK, - TV_REMOTE, /* Also known as Consumer Electronics IR */ -} INFRARED_MODE; - -typedef enum { - IRDA_TASK_INIT, /* All tasks are initialized with this state */ - IRDA_TASK_DONE, /* Signals that the task is finished */ - IRDA_TASK_WAIT, - IRDA_TASK_WAIT1, - IRDA_TASK_WAIT2, - IRDA_TASK_WAIT3, - IRDA_TASK_CHILD_INIT, /* Initializing child task */ - IRDA_TASK_CHILD_WAIT, /* Waiting for child task to finish */ - IRDA_TASK_CHILD_DONE /* Child task is finished */ -} IRDA_TASK_STATE; - -struct irda_task; -typedef int (*IRDA_TASK_CALLBACK) (struct irda_task *task); - -struct irda_task { - irda_queue_t q; - magic_t magic; - - IRDA_TASK_STATE state; - IRDA_TASK_CALLBACK function; - IRDA_TASK_CALLBACK finished; - - struct irda_task *parent; - struct timer_list timer; - - void *instance; /* Instance being called */ - void *param; /* Parameter to be used by instance */ -}; - -/* Dongle info */ -struct dongle_reg; -typedef struct { - struct dongle_reg *issue; /* Registration info */ - struct net_device *dev; /* Device we are attached to */ - struct irda_task *speed_task; /* Task handling speed change */ - struct irda_task *reset_task; /* Task handling reset */ - __u32 speed; /* Current speed */ - - /* Callbacks to the IrDA device driver */ - int (*set_mode)(struct net_device *, int mode); - int (*read)(struct net_device *dev, __u8 *buf, int len); - int (*write)(struct net_device *dev, __u8 *buf, int len); - int (*set_dtr_rts)(struct net_device *dev, int dtr, int rts); -} dongle_t; - -/* Dongle registration info */ -struct dongle_reg { - irda_queue_t q; /* Must be first */ - IRDA_DONGLE type; - - void (*open)(dongle_t *dongle, struct qos_info *qos); - void (*close)(dongle_t *dongle); - int (*reset)(struct irda_task *task); - int (*change_speed)(struct irda_task *task); - struct module *owner; -}; - -/* - * Per-packet information we need to hide inside sk_buff - * (must not exceed 48 bytes, check with struct sk_buff) - * The default_qdisc_pad field is a temporary hack. - */ -struct irda_skb_cb { - unsigned int default_qdisc_pad; - magic_t magic; /* Be sure that we can trust the information */ - __u32 next_speed; /* The Speed to be set *after* this frame */ - __u16 mtt; /* Minimum turn around time */ - __u16 xbofs; /* Number of xbofs required, used by SIR mode */ - __u16 next_xbofs; /* Number of xbofs required *after* this frame */ - void *context; /* May be used by drivers */ - void (*destructor)(struct sk_buff *skb); /* Used for flow control */ - __u16 xbofs_delay; /* Number of xbofs used for generating the mtt */ - __u8 line; /* Used by IrCOMM in IrLPT mode */ -}; - -/* Chip specific info */ -typedef struct { - int cfg_base; /* Config register IO base */ - int sir_base; /* SIR IO base */ - int fir_base; /* FIR IO base */ - int mem_base; /* Shared memory base */ - int sir_ext; /* Length of SIR iobase */ - int fir_ext; /* Length of FIR iobase */ - int irq, irq2; /* Interrupts used */ - int dma, dma2; /* DMA channel(s) used */ - int fifo_size; /* FIFO size */ - int irqflags; /* interrupt flags (ie, IRQF_SHARED) */ - int direction; /* Link direction, used by some FIR drivers */ - int enabled; /* Powered on? */ - int suspended; /* Suspended by APM */ - __u32 speed; /* Currently used speed */ - __u32 new_speed; /* Speed we must change to when Tx is finished */ - int dongle_id; /* Dongle or transceiver currently used */ -} chipio_t; - -/* IO buffer specific info (inspired by struct sk_buff) */ -typedef struct { - int state; /* Receiving state (transmit state not used) */ - int in_frame; /* True if receiving frame */ - - __u8 *head; /* start of buffer */ - __u8 *data; /* start of data in buffer */ - - int len; /* current length of data */ - int truesize; /* total allocated size of buffer */ - __u16 fcs; - - struct sk_buff *skb; /* ZeroCopy Rx in async_unwrap_char() */ -} iobuff_t; - -/* Maximum SIR frame (skb) that we expect to receive *unwrapped*. - * Max LAP MTU (I field) is 2048 bytes max (IrLAP 1.1, chapt 6.6.5, p40). - * Max LAP header is 2 bytes (for now). - * Max CRC is 2 bytes at SIR, 4 bytes at FIR. - * Need 1 byte for skb_reserve() to align IP header for IrLAN. - * Add a few extra bytes just to be safe (buffer is power of two anyway) - * Jean II */ -#define IRDA_SKB_MAX_MTU 2064 -/* Maximum SIR frame that we expect to send, wrapped (i.e. with XBOFS - * and escaped characters on top of above). */ -#define IRDA_SIR_MAX_FRAME 4269 - -/* The SIR unwrapper async_unwrap_char() will use a Rx-copy-break mechanism - * when using the optional ZeroCopy Rx, where only small frames are memcpy - * to a smaller skb to save memory. This is the threshold under which copy - * will happen (and over which it won't happen). - * Some FIR drivers may use this #define as well... - * This is the same value as various Ethernet drivers. - Jean II */ -#define IRDA_RX_COPY_THRESHOLD 256 - -/* Function prototypes */ -int irda_device_init(void); -void irda_device_cleanup(void); - -/* IrLAP entry points used by the drivers. - * We declare them here to avoid the driver pulling a whole bunch stack - * headers they don't really need - Jean II */ -struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos, - const char *hw_name); -void irlap_close(struct irlap_cb *self); - -/* Interface to be uses by IrLAP */ -void irda_device_set_media_busy(struct net_device *dev, int status); -int irda_device_is_media_busy(struct net_device *dev); -int irda_device_is_receiving(struct net_device *dev); - -/* Interface for internal use */ -static inline int irda_device_txqueue_empty(const struct net_device *dev) -{ - return qdisc_all_tx_empty(dev); -} -int irda_device_set_raw_mode(struct net_device* self, int status); -struct net_device *alloc_irdadev(int sizeof_priv); - -void irda_setup_dma(int channel, dma_addr_t buffer, int count, int mode); - -/* - * Function irda_get_mtt (skb) - * - * Utility function for getting the minimum turnaround time out of - * the skb, where it has been hidden in the cb field. - */ -static inline __u16 irda_get_mtt(const struct sk_buff *skb) -{ - const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb; - return (cb->magic == LAP_MAGIC) ? cb->mtt : 10000; -} - -/* - * Function irda_get_next_speed (skb) - * - * Extract the speed that should be set *after* this frame from the skb - * - * Note : return -1 for user space frames - */ -static inline __u32 irda_get_next_speed(const struct sk_buff *skb) -{ - const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb; - return (cb->magic == LAP_MAGIC) ? cb->next_speed : -1; -} - -/* - * Function irda_get_next_xbofs (skb) - * - * Extract the xbofs that should be set for this frame from the skb - * - * Note : default to 10 for user space frames - */ -static inline __u16 irda_get_xbofs(const struct sk_buff *skb) -{ - const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb; - return (cb->magic == LAP_MAGIC) ? cb->xbofs : 10; -} - -/* - * Function irda_get_next_xbofs (skb) - * - * Extract the xbofs that should be set *after* this frame from the skb - * - * Note : return -1 for user space frames - */ -static inline __u16 irda_get_next_xbofs(const struct sk_buff *skb) -{ - const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb; - return (cb->magic == LAP_MAGIC) ? cb->next_xbofs : -1; -} -#endif /* IRDA_DEVICE_H */ - - diff --git a/include/net/irda/iriap.h b/include/net/irda/iriap.h deleted file mode 100644 index fcc896491a95..000000000000 --- a/include/net/irda/iriap.h +++ /dev/null @@ -1,108 +0,0 @@ -/********************************************************************* - * - * Filename: iriap.h - * Version: 0.5 - * Description: Information Access Protocol (IAP) - * Status: Experimental. - * Author: Dag Brattli - * Created at: Thu Aug 21 00:02:07 1997 - * Modified at: Sat Dec 25 16:42:09 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997-1999 Dag Brattli , - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRIAP_H -#define IRIAP_H - -#include -#include - -#include -#include -#include /* irda_queue_t */ -#include /* struct timer_list */ - -#define IAP_LST 0x80 -#define IAP_ACK 0x40 - -#define IAS_SERVER 0 -#define IAS_CLIENT 1 - -/* IrIAP Op-codes */ -#define GET_INFO_BASE 0x01 -#define GET_OBJECTS 0x02 -#define GET_VALUE 0x03 -#define GET_VALUE_BY_CLASS 0x04 -#define GET_OBJECT_INFO 0x05 -#define GET_ATTRIB_NAMES 0x06 - -#define IAS_SUCCESS 0 -#define IAS_CLASS_UNKNOWN 1 -#define IAS_ATTRIB_UNKNOWN 2 -#define IAS_DISCONNECT 10 - -typedef void (*CONFIRM_CALLBACK)(int result, __u16 obj_id, - struct ias_value *value, void *priv); - -struct iriap_cb { - irda_queue_t q; /* Must be first */ - magic_t magic; /* Magic cookie */ - - int mode; /* Client or server */ - - __u32 saddr; - __u32 daddr; - __u8 operation; - - struct sk_buff *request_skb; - struct lsap_cb *lsap; - __u8 slsap_sel; - - /* Client states */ - IRIAP_STATE client_state; - IRIAP_STATE call_state; - - /* Server states */ - IRIAP_STATE server_state; - IRIAP_STATE r_connect_state; - - CONFIRM_CALLBACK confirm; - void *priv; /* Used to identify client */ - - __u8 max_header_size; - __u32 max_data_size; - - struct timer_list watchdog_timer; -}; - -int iriap_init(void); -void iriap_cleanup(void); - -struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv, - CONFIRM_CALLBACK callback); -void iriap_close(struct iriap_cb *self); - -int iriap_getvaluebyclass_request(struct iriap_cb *self, - __u32 saddr, __u32 daddr, - char *name, char *attr); -void iriap_connect_request(struct iriap_cb *self); -void iriap_send_ack( struct iriap_cb *self); -void iriap_call_indication(struct iriap_cb *self, struct sk_buff *skb); - -void iriap_register_server(void); - -#endif - - diff --git a/include/net/irda/iriap_event.h b/include/net/irda/iriap_event.h deleted file mode 100644 index 89747f06d9eb..000000000000 --- a/include/net/irda/iriap_event.h +++ /dev/null @@ -1,85 +0,0 @@ -/********************************************************************* - * - * Filename: iriap_event.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Sun Oct 31 22:02:54 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRIAP_FSM_H -#define IRIAP_FSM_H - -/* Forward because of circular include dependecies */ -struct iriap_cb; - -/* IrIAP states */ -typedef enum { - /* Client */ - S_DISCONNECT, - S_CONNECTING, - S_CALL, - - /* S-Call */ - S_MAKE_CALL, - S_CALLING, - S_OUTSTANDING, - S_REPLYING, - S_WAIT_FOR_CALL, - S_WAIT_ACTIVE, - - /* Server */ - R_DISCONNECT, - R_CALL, - - /* R-Connect */ - R_WAITING, - R_WAIT_ACTIVE, - R_RECEIVING, - R_EXECUTE, - R_RETURNING, -} IRIAP_STATE; - -typedef enum { - IAP_CALL_REQUEST, - IAP_CALL_REQUEST_GVBC, - IAP_CALL_RESPONSE, - IAP_RECV_F_LST, - IAP_LM_DISCONNECT_INDICATION, - IAP_LM_CONNECT_INDICATION, - IAP_LM_CONNECT_CONFIRM, -} IRIAP_EVENT; - -void iriap_next_client_state (struct iriap_cb *self, IRIAP_STATE state); -void iriap_next_call_state (struct iriap_cb *self, IRIAP_STATE state); -void iriap_next_server_state (struct iriap_cb *self, IRIAP_STATE state); -void iriap_next_r_connect_state(struct iriap_cb *self, IRIAP_STATE state); - - -void iriap_do_client_event(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -void iriap_do_call_event (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); - -void iriap_do_server_event (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -void iriap_do_r_connect_event(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); - -#endif /* IRIAP_FSM_H */ - diff --git a/include/net/irda/irias_object.h b/include/net/irda/irias_object.h deleted file mode 100644 index 83f78081799c..000000000000 --- a/include/net/irda/irias_object.h +++ /dev/null @@ -1,108 +0,0 @@ -/********************************************************************* - * - * Filename: irias_object.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Thu Oct 1 22:49:50 1998 - * Modified at: Wed Dec 15 11:20:57 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef LM_IAS_OBJECT_H -#define LM_IAS_OBJECT_H - -#include -#include - -/* LM-IAS Attribute types */ -#define IAS_MISSING 0 -#define IAS_INTEGER 1 -#define IAS_OCT_SEQ 2 -#define IAS_STRING 3 - -/* Object ownership of attributes (user or kernel) */ -#define IAS_KERNEL_ATTR 0 -#define IAS_USER_ATTR 1 - -/* - * LM-IAS Object - */ -struct ias_object { - irda_queue_t q; /* Must be first! */ - magic_t magic; - - char *name; - int id; - hashbin_t *attribs; -}; - -/* - * Values used by LM-IAS attributes - */ -struct ias_value { - __u8 type; /* Value description */ - __u8 owner; /* Managed from user/kernel space */ - int charset; /* Only used by string type */ - int len; - - /* Value */ - union { - int integer; - char *string; - __u8 *oct_seq; - } t; -}; - -/* - * Attributes used by LM-IAS objects - */ -struct ias_attrib { - irda_queue_t q; /* Must be first! */ - int magic; - - char *name; /* Attribute name */ - struct ias_value *value; /* Attribute value */ -}; - -struct ias_object *irias_new_object(char *name, int id); -void irias_insert_object(struct ias_object *obj); -int irias_delete_object(struct ias_object *obj); -int irias_delete_attrib(struct ias_object *obj, struct ias_attrib *attrib, - int cleanobject); -void __irias_delete_object(struct ias_object *obj); - -void irias_add_integer_attrib(struct ias_object *obj, char *name, int value, - int user); -void irias_add_string_attrib(struct ias_object *obj, char *name, char *value, - int user); -void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets, - int len, int user); -int irias_object_change_attribute(char *obj_name, char *attrib_name, - struct ias_value *new_value); -struct ias_object *irias_find_object(char *name); -struct ias_attrib *irias_find_attrib(struct ias_object *obj, char *name); - -struct ias_value *irias_new_string_value(char *string); -struct ias_value *irias_new_integer_value(int integer); -struct ias_value *irias_new_octseq_value(__u8 *octseq , int len); -struct ias_value *irias_new_missing_value(void); -void irias_delete_value(struct ias_value *value); - -extern struct ias_value irias_missing; -extern hashbin_t *irias_objects; - -#endif diff --git a/include/net/irda/irlan_client.h b/include/net/irda/irlan_client.h deleted file mode 100644 index fa8455eda280..000000000000 --- a/include/net/irda/irlan_client.h +++ /dev/null @@ -1,42 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_client.h - * Version: 0.3 - * Description: IrDA LAN access layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Thu Apr 22 14:13:34 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998 Dag Brattli , All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_CLIENT_H -#define IRLAN_CLIENT_H - -#include -#include -#include -#include - -#include -#include - -void irlan_client_discovery_indication(discinfo_t *, DISCOVERY_MODE, void *); -void irlan_client_wakeup(struct irlan_cb *self, __u32 saddr, __u32 daddr); - -void irlan_client_parse_response(struct irlan_cb *self, struct sk_buff *skb); -void irlan_client_get_value_confirm(int result, __u16 obj_id, - struct ias_value *value, void *priv); -#endif diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h deleted file mode 100644 index 550c2d6ec7ff..000000000000 --- a/include/net/irda/irlan_common.h +++ /dev/null @@ -1,230 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_common.h - * Version: 0.8 - * Description: IrDA LAN access layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Sun Oct 31 19:41:24 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_H -#define IRLAN_H - -#include /* for HZ */ - -#include -#include -#include -#include -#include - -#include - -#define IRLAN_MTU 1518 -#define IRLAN_TIMEOUT 10*HZ /* 10 seconds */ - -/* Command packet types */ -#define CMD_GET_PROVIDER_INFO 0 -#define CMD_GET_MEDIA_CHAR 1 -#define CMD_OPEN_DATA_CHANNEL 2 -#define CMD_CLOSE_DATA_CHAN 3 -#define CMD_RECONNECT_DATA_CHAN 4 -#define CMD_FILTER_OPERATION 5 - -/* Some responses */ -#define RSP_SUCCESS 0 -#define RSP_INSUFFICIENT_RESOURCES 1 -#define RSP_INVALID_COMMAND_FORMAT 2 -#define RSP_COMMAND_NOT_SUPPORTED 3 -#define RSP_PARAM_NOT_SUPPORTED 4 -#define RSP_VALUE_NOT_SUPPORTED 5 -#define RSP_NOT_OPEN 6 -#define RSP_AUTHENTICATION_REQUIRED 7 -#define RSP_INVALID_PASSWORD 8 -#define RSP_PROTOCOL_ERROR 9 -#define RSP_ASYNCHRONOUS_ERROR 255 - -/* Media types */ -#define MEDIA_802_3 1 -#define MEDIA_802_5 2 - -/* Filter parameters */ -#define DATA_CHAN 1 -#define FILTER_TYPE 2 -#define FILTER_MODE 3 - -/* Filter types */ -#define IRLAN_DIRECTED 0x01 -#define IRLAN_FUNCTIONAL 0x02 -#define IRLAN_GROUP 0x04 -#define IRLAN_MAC_FRAME 0x08 -#define IRLAN_MULTICAST 0x10 -#define IRLAN_BROADCAST 0x20 -#define IRLAN_IPX_SOCKET 0x40 - -/* Filter modes */ -#define ALL 1 -#define FILTER 2 -#define NONE 3 - -/* Filter operations */ -#define GET 1 -#define CLEAR 2 -#define ADD 3 -#define REMOVE 4 -#define DYNAMIC 5 - -/* Access types */ -#define ACCESS_DIRECT 1 -#define ACCESS_PEER 2 -#define ACCESS_HOSTED 3 - -#define IRLAN_BYTE 0 -#define IRLAN_SHORT 1 -#define IRLAN_ARRAY 2 - -/* IrLAN sits on top if IrTTP */ -#define IRLAN_MAX_HEADER (TTP_HEADER+LMP_HEADER) -/* 1 byte for the command code and 1 byte for the parameter count */ -#define IRLAN_CMD_HEADER 2 - -#define IRLAN_STRING_PARAMETER_LEN(name, value) (1 + strlen((name)) + 2 \ - + strlen ((value))) -#define IRLAN_BYTE_PARAMETER_LEN(name) (1 + strlen((name)) + 2 + 1) -#define IRLAN_SHORT_PARAMETER_LEN(name) (1 + strlen((name)) + 2 + 2) - -/* - * IrLAN client - */ -struct irlan_client_cb { - int state; - - int open_retries; - - struct tsap_cb *tsap_ctrl; - __u32 max_sdu_size; - __u8 max_header_size; - - int access_type; /* Access type of provider */ - __u8 reconnect_key[255]; - __u8 key_len; - - __u16 recv_arb_val; - __u16 max_frame; - int filter_type; - - int unicast_open; - int broadcast_open; - - int tx_busy; - struct sk_buff_head txq; /* Transmit control queue */ - - struct iriap_cb *iriap; - - struct timer_list kick_timer; -}; - -/* - * IrLAN provider - */ -struct irlan_provider_cb { - int state; - - struct tsap_cb *tsap_ctrl; - __u32 max_sdu_size; - __u8 max_header_size; - - /* - * Store some values here which are used by the provider to parse - * the filter operations - */ - int data_chan; - int filter_type; - int filter_mode; - int filter_operation; - int filter_entry; - int access_type; /* Access type */ - __u16 send_arb_val; - - __u8 mac_address[ETH_ALEN]; /* Generated MAC address for peer device */ -}; - -/* - * IrLAN control block - */ -struct irlan_cb { - int magic; - struct list_head dev_list; - struct net_device *dev; /* Ethernet device structure*/ - - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - int disconnect_reason; /* Why we got disconnected */ - - int media; /* Media type */ - __u8 version[2]; /* IrLAN version */ - - struct tsap_cb *tsap_data; /* Data TSAP */ - - int use_udata; /* Use Unit Data transfers */ - - __u8 stsap_sel_data; /* Source data TSAP selector */ - __u8 dtsap_sel_data; /* Destination data TSAP selector */ - __u8 dtsap_sel_ctrl; /* Destination ctrl TSAP selector */ - - struct irlan_client_cb client; /* Client specific fields */ - struct irlan_provider_cb provider; /* Provider specific fields */ - - __u32 max_sdu_size; - __u8 max_header_size; - - wait_queue_head_t open_wait; - struct timer_list watchdog_timer; -}; - -void irlan_close(struct irlan_cb *self); -void irlan_close_tsaps(struct irlan_cb *self); - -int irlan_register_netdev(struct irlan_cb *self); -void irlan_ias_register(struct irlan_cb *self, __u8 tsap_sel); -void irlan_start_watchdog_timer(struct irlan_cb *self, int timeout); - -void irlan_open_data_tsap(struct irlan_cb *self); - -int irlan_run_ctrl_tx_queue(struct irlan_cb *self); - -struct irlan_cb *irlan_get_any(void); -void irlan_get_provider_info(struct irlan_cb *self); -void irlan_get_media_char(struct irlan_cb *self); -void irlan_open_data_channel(struct irlan_cb *self); -void irlan_close_data_channel(struct irlan_cb *self); -void irlan_set_multicast_filter(struct irlan_cb *self, int status); -void irlan_set_broadcast_filter(struct irlan_cb *self, int status); - -int irlan_insert_byte_param(struct sk_buff *skb, char *param, __u8 value); -int irlan_insert_short_param(struct sk_buff *skb, char *param, __u16 value); -int irlan_insert_string_param(struct sk_buff *skb, char *param, char *value); -int irlan_insert_array_param(struct sk_buff *skb, char *name, __u8 *value, - __u16 value_len); - -int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len); - -#endif - - diff --git a/include/net/irda/irlan_eth.h b/include/net/irda/irlan_eth.h deleted file mode 100644 index de5c81691f33..000000000000 --- a/include/net/irda/irlan_eth.h +++ /dev/null @@ -1,32 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_eth.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Thu Oct 15 08:36:58 1998 - * Modified at: Fri May 14 23:29:00 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_ETH_H -#define IRLAN_ETH_H - -struct net_device *alloc_irlandev(const char *name); -int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb); - -void irlan_eth_flow_indication( void *instance, void *sap, LOCAL_FLOW flow); -#endif diff --git a/include/net/irda/irlan_event.h b/include/net/irda/irlan_event.h deleted file mode 100644 index 018b5a77e610..000000000000 --- a/include/net/irda/irlan_event.h +++ /dev/null @@ -1,81 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_event.h - * Version: - * Description: LAN access - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Tue Feb 2 09:45:17 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997 Dag Brattli , All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_EVENT_H -#define IRLAN_EVENT_H - -#include -#include - -#include - -typedef enum { - IRLAN_IDLE, - IRLAN_QUERY, - IRLAN_CONN, - IRLAN_INFO, - IRLAN_MEDIA, - IRLAN_OPEN, - IRLAN_WAIT, - IRLAN_ARB, - IRLAN_DATA, - IRLAN_CLOSE, - IRLAN_SYNC -} IRLAN_STATE; - -typedef enum { - IRLAN_DISCOVERY_INDICATION, - IRLAN_IAS_PROVIDER_AVAIL, - IRLAN_IAS_PROVIDER_NOT_AVAIL, - IRLAN_LAP_DISCONNECT, - IRLAN_LMP_DISCONNECT, - IRLAN_CONNECT_COMPLETE, - IRLAN_DATA_INDICATION, - IRLAN_DATA_CONNECT_INDICATION, - IRLAN_RETRY_CONNECT, - - IRLAN_CONNECT_INDICATION, - IRLAN_GET_INFO_CMD, - IRLAN_GET_MEDIA_CMD, - IRLAN_OPEN_DATA_CMD, - IRLAN_FILTER_CONFIG_CMD, - - IRLAN_CHECK_CON_ARB, - IRLAN_PROVIDER_SIGNAL, - - IRLAN_WATCHDOG_TIMEOUT, -} IRLAN_EVENT; - -extern const char * const irlan_state[]; - -void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); - -void irlan_do_provider_event(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); - -void irlan_next_client_state(struct irlan_cb *self, IRLAN_STATE state); -void irlan_next_provider_state(struct irlan_cb *self, IRLAN_STATE state); - -#endif diff --git a/include/net/irda/irlan_filter.h b/include/net/irda/irlan_filter.h deleted file mode 100644 index a5a2539485bd..000000000000 --- a/include/net/irda/irlan_filter.h +++ /dev/null @@ -1,35 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_filter.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Fri Jan 29 15:24:08 1999 - * Modified at: Sun Feb 7 23:35:31 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_FILTER_H -#define IRLAN_FILTER_H - -void irlan_check_command_param(struct irlan_cb *self, char *param, - char *value); -void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb); -#ifdef CONFIG_PROC_FS -void irlan_print_filter(struct seq_file *seq, int filter_type); -#endif - -#endif /* IRLAN_FILTER_H */ diff --git a/include/net/irda/irlan_provider.h b/include/net/irda/irlan_provider.h deleted file mode 100644 index 92f3b0e1029b..000000000000 --- a/include/net/irda/irlan_provider.h +++ /dev/null @@ -1,52 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_provider.h - * Version: 0.1 - * Description: IrDA LAN access layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Sun May 9 12:26:11 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_SERVER_H -#define IRLAN_SERVER_H - -#include -#include -#include -#include - -#include - -void irlan_provider_ctrl_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb); - - -void irlan_provider_connect_response(struct irlan_cb *, struct tsap_cb *); - -int irlan_parse_open_data_cmd(struct irlan_cb *self, struct sk_buff *skb); -int irlan_provider_parse_command(struct irlan_cb *self, int cmd, - struct sk_buff *skb); - -void irlan_provider_send_reply(struct irlan_cb *self, int command, - int ret_code); -int irlan_provider_open_ctrl_tsap(struct irlan_cb *self); - -#endif - - diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h deleted file mode 100644 index 6f23e820618c..000000000000 --- a/include/net/irda/irlap.h +++ /dev/null @@ -1,311 +0,0 @@ -/********************************************************************* - * - * Filename: irlap.h - * Version: 0.8 - * Description: An IrDA LAP driver for Linux - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Fri Dec 10 13:21:17 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAP_H -#define IRLAP_H - -#include -#include -#include -#include - -#include /* irda_queue_t */ -#include /* struct qos_info */ -#include /* discovery_t */ -#include /* IRLAP_STATE, ... */ -#include /* struct notify_t */ - -#define CONFIG_IRDA_DYNAMIC_WINDOW 1 - -#define LAP_RELIABLE 1 -#define LAP_UNRELIABLE 0 - -#define LAP_ADDR_HEADER 1 /* IrLAP Address Header */ -#define LAP_CTRL_HEADER 1 /* IrLAP Control Header */ - -/* May be different when we get VFIR */ -#define LAP_MAX_HEADER (LAP_ADDR_HEADER + LAP_CTRL_HEADER) - -/* Each IrDA device gets a random 32 bits IRLAP device address */ -#define LAP_ALEN 4 - -#define BROADCAST 0xffffffff /* Broadcast device address */ -#define CBROADCAST 0xfe /* Connection broadcast address */ -#define XID_FORMAT 0x01 /* Discovery XID format */ - -/* Nobody seems to use this constant. */ -#define LAP_WINDOW_SIZE 8 -/* We keep the LAP queue very small to minimise the amount of buffering. - * this improve latency and reduce resource consumption. - * This work only because we have synchronous refilling of IrLAP through - * the flow control mechanism (via scheduler and IrTTP). - * 2 buffers is the minimum we can work with, one that we send while polling - * IrTTP, and another to know that we should not send the pf bit. - * Jean II */ -#define LAP_HIGH_THRESHOLD 2 -/* Some rare non TTP clients don't implement flow control, and - * so don't comply with the above limit (and neither with this one). - * For IAP and management, it doesn't matter, because they never transmit much. - *.For IrLPT, this should be fixed. - * - Jean II */ -#define LAP_MAX_QUEUE 10 -/* Please note that all IrDA management frames (LMP/TTP conn req/disc and - * IAS queries) fall in the second category and are sent to LAP even if TTP - * is stopped. This means that those frames will wait only a maximum of - * two (2) data frames before beeing sent on the "wire", which speed up - * new socket setup when the link is saturated. - * Same story for two sockets competing for the medium : if one saturates - * the LAP, when the other want to transmit it only has to wait for - * maximum three (3) packets (2 + one scheduling), which improve performance - * of delay sensitive applications. - * Jean II */ - -#define NR_EXPECTED 1 -#define NR_UNEXPECTED 0 -#define NR_INVALID -1 - -#define NS_EXPECTED 1 -#define NS_UNEXPECTED 0 -#define NS_INVALID -1 - -/* - * Meta information passed within the IrLAP state machine - */ -struct irlap_info { - __u8 caddr; /* Connection address */ - __u8 control; /* Frame type */ - __u8 cmd; - - __u32 saddr; - __u32 daddr; - - int pf; /* Poll/final bit set */ - - __u8 nr; /* Sequence number of next frame expected */ - __u8 ns; /* Sequence number of frame sent */ - - int S; /* Number of slots */ - int slot; /* Random chosen slot */ - int s; /* Current slot */ - - discovery_t *discovery; /* Discovery information */ -}; - -/* Main structure of IrLAP */ -struct irlap_cb { - irda_queue_t q; /* Must be first */ - magic_t magic; - - /* Device we are attached to */ - struct net_device *netdev; - char hw_name[2*IFNAMSIZ + 1]; - - /* Connection state */ - volatile IRLAP_STATE state; /* Current state */ - - /* Timers used by IrLAP */ - struct timer_list query_timer; - struct timer_list slot_timer; - struct timer_list discovery_timer; - struct timer_list final_timer; - struct timer_list poll_timer; - struct timer_list wd_timer; - struct timer_list backoff_timer; - - /* Media busy stuff */ - struct timer_list media_busy_timer; - int media_busy; - - /* Timeouts which will be different with different turn time */ - int slot_timeout; - int poll_timeout; - int final_timeout; - int wd_timeout; - - struct sk_buff_head txq; /* Frames to be transmitted */ - struct sk_buff_head txq_ultra; - - __u8 caddr; /* Connection address */ - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - - int retry_count; /* Times tried to establish connection */ - int add_wait; /* True if we are waiting for frame */ - - __u8 connect_pending; - __u8 disconnect_pending; - - /* To send a faster RR if tx queue empty */ -#ifdef CONFIG_IRDA_FAST_RR - int fast_RR_timeout; - int fast_RR; -#endif /* CONFIG_IRDA_FAST_RR */ - - int N1; /* N1 * F-timer = Negitiated link disconnect warning threshold */ - int N2; /* N2 * F-timer = Negitiated link disconnect time */ - int N3; /* Connection retry count */ - - int local_busy; - int remote_busy; - int xmitflag; - - __u8 vs; /* Next frame to be sent */ - __u8 vr; /* Next frame to be received */ - __u8 va; /* Last frame acked */ - int window; /* Nr of I-frames allowed to send */ - int window_size; /* Current negotiated window size */ - -#ifdef CONFIG_IRDA_DYNAMIC_WINDOW - __u32 line_capacity; /* Number of bytes allowed to send */ - __u32 bytes_left; /* Number of bytes still allowed to transmit */ -#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ - - struct sk_buff_head wx_list; - - __u8 ack_required; - - /* XID parameters */ - __u8 S; /* Number of slots */ - __u8 slot; /* Random chosen slot */ - __u8 s; /* Current slot */ - int frame_sent; /* Have we sent reply? */ - - hashbin_t *discovery_log; - discovery_t *discovery_cmd; - - __u32 speed; /* Link speed */ - - struct qos_info qos_tx; /* QoS requested by peer */ - struct qos_info qos_rx; /* QoS requested by self */ - struct qos_info *qos_dev; /* QoS supported by device */ - - notify_t notify; /* Callbacks to IrLMP */ - - int mtt_required; /* Minimum turnaround time required */ - int xbofs_delay; /* Nr of XBOF's used to MTT */ - int bofs_count; /* Negotiated extra BOFs */ - int next_bofs; /* Negotiated extra BOFs after next frame */ - - int mode; /* IrLAP mode (primary, secondary or monitor) */ -}; - -/* - * Function prototypes - */ -int irlap_init(void); -void irlap_cleanup(void); - -struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos, - const char *hw_name); -void irlap_close(struct irlap_cb *self); - -void irlap_connect_request(struct irlap_cb *self, __u32 daddr, - struct qos_info *qos, int sniff); -void irlap_connect_response(struct irlap_cb *self, struct sk_buff *skb); -void irlap_connect_indication(struct irlap_cb *self, struct sk_buff *skb); -void irlap_connect_confirm(struct irlap_cb *, struct sk_buff *skb); - -void irlap_data_indication(struct irlap_cb *, struct sk_buff *, int unreliable); -void irlap_data_request(struct irlap_cb *, struct sk_buff *, int unreliable); - -#ifdef CONFIG_IRDA_ULTRA -void irlap_unitdata_request(struct irlap_cb *, struct sk_buff *); -void irlap_unitdata_indication(struct irlap_cb *, struct sk_buff *); -#endif /* CONFIG_IRDA_ULTRA */ - -void irlap_disconnect_request(struct irlap_cb *); -void irlap_disconnect_indication(struct irlap_cb *, LAP_REASON reason); - -void irlap_status_indication(struct irlap_cb *, int quality_of_link); - -void irlap_test_request(__u8 *info, int len); - -void irlap_discovery_request(struct irlap_cb *, discovery_t *discovery); -void irlap_discovery_confirm(struct irlap_cb *, hashbin_t *discovery_log); -void irlap_discovery_indication(struct irlap_cb *, discovery_t *discovery); - -void irlap_reset_indication(struct irlap_cb *self); -void irlap_reset_confirm(void); - -void irlap_update_nr_received(struct irlap_cb *, int nr); -int irlap_validate_nr_received(struct irlap_cb *, int nr); -int irlap_validate_ns_received(struct irlap_cb *, int ns); - -int irlap_generate_rand_time_slot(int S, int s); -void irlap_initiate_connection_state(struct irlap_cb *); -void irlap_flush_all_queues(struct irlap_cb *); -void irlap_wait_min_turn_around(struct irlap_cb *, struct qos_info *); - -void irlap_apply_default_connection_parameters(struct irlap_cb *self); -void irlap_apply_connection_parameters(struct irlap_cb *self, int now); - -#define IRLAP_GET_HEADER_SIZE(self) (LAP_MAX_HEADER) -#define IRLAP_GET_TX_QUEUE_LEN(self) skb_queue_len(&self->txq) - -/* Return TRUE if the node is in primary mode (i.e. master) - * - Jean II */ -static inline int irlap_is_primary(struct irlap_cb *self) -{ - int ret; - switch(self->state) { - case LAP_XMIT_P: - case LAP_NRM_P: - ret = 1; - break; - case LAP_XMIT_S: - case LAP_NRM_S: - ret = 0; - break; - default: - ret = -1; - } - return ret; -} - -/* Clear a pending IrLAP disconnect. - Jean II */ -static inline void irlap_clear_disconnect(struct irlap_cb *self) -{ - self->disconnect_pending = FALSE; -} - -/* - * Function irlap_next_state (self, state) - * - * Switches state and provides debug information - * - */ -static inline void irlap_next_state(struct irlap_cb *self, IRLAP_STATE state) -{ - /* - if (!self || self->magic != LAP_MAGIC) - return; - - pr_debug("next LAP state = %s\n", irlap_state[state]); - */ - self->state = state; -} - -#endif diff --git a/include/net/irda/irlap_event.h b/include/net/irda/irlap_event.h deleted file mode 100644 index e4325fee1267..000000000000 --- a/include/net/irda/irlap_event.h +++ /dev/null @@ -1,129 +0,0 @@ -/********************************************************************* - * - * - * Filename: irlap_event.h - * Version: 0.1 - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sat Aug 16 00:59:29 1997 - * Modified at: Tue Dec 21 11:20:30 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRLAP_EVENT_H -#define IRLAP_EVENT_H - -#include - -/* A few forward declarations (to make compiler happy) */ -struct irlap_cb; -struct irlap_info; - -/* IrLAP States */ -typedef enum { - LAP_NDM, /* Normal disconnected mode */ - LAP_QUERY, - LAP_REPLY, - LAP_CONN, /* Connect indication */ - LAP_SETUP, /* Setting up connection */ - LAP_OFFLINE, /* A really boring state */ - LAP_XMIT_P, - LAP_PCLOSE, - LAP_NRM_P, /* Normal response mode as primary */ - LAP_RESET_WAIT, - LAP_RESET, - LAP_NRM_S, /* Normal response mode as secondary */ - LAP_XMIT_S, - LAP_SCLOSE, - LAP_RESET_CHECK, -} IRLAP_STATE; - -/* IrLAP Events */ -typedef enum { - /* Services events */ - DISCOVERY_REQUEST, - CONNECT_REQUEST, - CONNECT_RESPONSE, - DISCONNECT_REQUEST, - DATA_REQUEST, - RESET_REQUEST, - RESET_RESPONSE, - - /* Send events */ - SEND_I_CMD, - SEND_UI_FRAME, - - /* Receive events */ - RECV_DISCOVERY_XID_CMD, - RECV_DISCOVERY_XID_RSP, - RECV_SNRM_CMD, - RECV_TEST_CMD, - RECV_TEST_RSP, - RECV_UA_RSP, - RECV_DM_RSP, - RECV_RD_RSP, - RECV_I_CMD, - RECV_I_RSP, - RECV_UI_FRAME, - RECV_FRMR_RSP, - RECV_RR_CMD, - RECV_RR_RSP, - RECV_RNR_CMD, - RECV_RNR_RSP, - RECV_REJ_CMD, - RECV_REJ_RSP, - RECV_SREJ_CMD, - RECV_SREJ_RSP, - RECV_DISC_CMD, - - /* Timer events */ - SLOT_TIMER_EXPIRED, - QUERY_TIMER_EXPIRED, - FINAL_TIMER_EXPIRED, - POLL_TIMER_EXPIRED, - DISCOVERY_TIMER_EXPIRED, - WD_TIMER_EXPIRED, - BACKOFF_TIMER_EXPIRED, - MEDIA_BUSY_TIMER_EXPIRED, -} IRLAP_EVENT; - -/* - * Disconnect reason code - */ -typedef enum { /* FIXME check the two first reason codes */ - LAP_DISC_INDICATION=1, /* Received a disconnect request from peer */ - LAP_NO_RESPONSE, /* To many retransmits without response */ - LAP_RESET_INDICATION, /* To many retransmits, or invalid nr/ns */ - LAP_FOUND_NONE, /* No devices were discovered */ - LAP_MEDIA_BUSY, - LAP_PRIMARY_CONFLICT, -} LAP_REASON; - -extern const char *const irlap_state[]; - -void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -void irlap_print_event(IRLAP_EVENT event); - -int irlap_qos_negotiate(struct irlap_cb *self, struct sk_buff *skb); - -#endif diff --git a/include/net/irda/irlap_frame.h b/include/net/irda/irlap_frame.h deleted file mode 100644 index cbc12a926e5f..000000000000 --- a/include/net/irda/irlap_frame.h +++ /dev/null @@ -1,167 +0,0 @@ -/********************************************************************* - * - * Filename: irlap_frame.h - * Version: 0.9 - * Description: IrLAP frame declarations - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Aug 19 10:27:26 1997 - * Modified at: Sat Dec 25 21:07:26 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRLAP_FRAME_H -#define IRLAP_FRAME_H - -#include - -#include - -/* A few forward declarations (to make compiler happy) */ -struct irlap_cb; -struct discovery_t; - -/* Frame types and templates */ -#define INVALID 0xff - -/* Unnumbered (U) commands */ -#define SNRM_CMD 0x83 /* Set Normal Response Mode */ -#define DISC_CMD 0x43 /* Disconnect */ -#define XID_CMD 0x2f /* Exchange Station Identification */ -#define TEST_CMD 0xe3 /* Test */ - -/* Unnumbered responses */ -#define RNRM_RSP 0x83 /* Request Normal Response Mode */ -#define UA_RSP 0x63 /* Unnumbered Acknowledgement */ -#define FRMR_RSP 0x87 /* Frame Reject */ -#define DM_RSP 0x0f /* Disconnect Mode */ -#define RD_RSP 0x43 /* Request Disconnection */ -#define XID_RSP 0xaf /* Exchange Station Identification */ -#define TEST_RSP 0xe3 /* Test frame */ - -/* Supervisory (S) */ -#define RR 0x01 /* Receive Ready */ -#define REJ 0x09 /* Reject */ -#define RNR 0x05 /* Receive Not Ready */ -#define SREJ 0x0d /* Selective Reject */ - -/* Information (I) */ -#define I_FRAME 0x00 /* Information Format */ -#define UI_FRAME 0x03 /* Unnumbered Information */ - -#define CMD_FRAME 0x01 -#define RSP_FRAME 0x00 - -#define PF_BIT 0x10 /* Poll/final bit */ - -/* Some IrLAP field lengths */ -/* - * Only baud rate triplet is 4 bytes (PV can be 2 bytes). - * All others params (7) are 3 bytes, so that's 7*3 + 1*4 bytes. - */ -#define IRLAP_NEGOCIATION_PARAMS_LEN 25 -#define IRLAP_DISCOVERY_INFO_LEN 32 - -struct disc_frame { - __u8 caddr; /* Connection address */ - __u8 control; -} __packed; - -struct xid_frame { - __u8 caddr; /* Connection address */ - __u8 control; - __u8 ident; /* Should always be XID_FORMAT */ - __le32 saddr; /* Source device address */ - __le32 daddr; /* Destination device address */ - __u8 flags; /* Discovery flags */ - __u8 slotnr; - __u8 version; -} __packed; - -struct test_frame { - __u8 caddr; /* Connection address */ - __u8 control; - __le32 saddr; /* Source device address */ - __le32 daddr; /* Destination device address */ -} __packed; - -struct ua_frame { - __u8 caddr; - __u8 control; - __le32 saddr; /* Source device address */ - __le32 daddr; /* Dest device address */ -} __packed; - -struct dm_frame { - __u8 caddr; /* Connection address */ - __u8 control; -} __packed; - -struct rd_frame { - __u8 caddr; /* Connection address */ - __u8 control; -} __packed; - -struct rr_frame { - __u8 caddr; /* Connection address */ - __u8 control; -} __packed; - -struct i_frame { - __u8 caddr; - __u8 control; -} __packed; - -struct snrm_frame { - __u8 caddr; - __u8 control; - __le32 saddr; - __le32 daddr; - __u8 ncaddr; -} __packed; - -void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb); -void irlap_send_discovery_xid_frame(struct irlap_cb *, int S, __u8 s, - __u8 command, - struct discovery_t *discovery); -void irlap_send_snrm_frame(struct irlap_cb *, struct qos_info *); -void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr, - struct sk_buff *cmd); -void irlap_send_ua_response_frame(struct irlap_cb *, struct qos_info *); -void irlap_send_dm_frame(struct irlap_cb *self); -void irlap_send_rd_frame(struct irlap_cb *self); -void irlap_send_disc_frame(struct irlap_cb *self); -void irlap_send_rr_frame(struct irlap_cb *self, int command); - -void irlap_send_data_primary(struct irlap_cb *, struct sk_buff *); -void irlap_send_data_primary_poll(struct irlap_cb *, struct sk_buff *); -void irlap_send_data_secondary(struct irlap_cb *, struct sk_buff *); -void irlap_send_data_secondary_final(struct irlap_cb *, struct sk_buff *); -void irlap_resend_rejected_frames(struct irlap_cb *, int command); -void irlap_resend_rejected_frame(struct irlap_cb *self, int command); - -void irlap_send_ui_frame(struct irlap_cb *self, struct sk_buff *skb, - __u8 caddr, int command); - -int irlap_insert_qos_negotiation_params(struct irlap_cb *self, - struct sk_buff *skb); - -#endif diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h deleted file mode 100644 index f132924cc9da..000000000000 --- a/include/net/irda/irlmp.h +++ /dev/null @@ -1,295 +0,0 @@ -/********************************************************************* - * - * Filename: irlmp.h - * Version: 0.9 - * Description: IrDA Link Management Protocol (LMP) layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 17 20:54:32 1997 - * Modified at: Fri Dec 10 13:23:01 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLMP_H -#define IRLMP_H - -#include /* for HZ */ - -#include - -#include -#include -#include /* LAP_MAX_HEADER, ... */ -#include -#include -#include - -/* LSAP-SEL's */ -#define LSAP_MASK 0x7f -#define LSAP_IAS 0x00 -#define LSAP_ANY 0xff -#define LSAP_MAX 0x6f /* 0x70-0x7f are reserved */ -#define LSAP_CONNLESS 0x70 /* Connectionless LSAP, mostly used for Ultra */ - -#define DEV_ADDR_ANY 0xffffffff - -#define LMP_HEADER 2 /* Dest LSAP + Source LSAP */ -#define LMP_CONTROL_HEADER 4 /* LMP_HEADER + opcode + parameter */ -#define LMP_PID_HEADER 1 /* Used by Ultra */ -#define LMP_MAX_HEADER (LMP_CONTROL_HEADER+LAP_MAX_HEADER) - -#define LM_MAX_CONNECTIONS 10 - -#define LM_IDLE_TIMEOUT 2*HZ /* 2 seconds for now */ - -typedef enum { - S_PNP = 0, - S_PDA, - S_COMPUTER, - S_PRINTER, - S_MODEM, - S_FAX, - S_LAN, - S_TELEPHONY, - S_COMM, - S_OBEX, - S_ANY, - S_END, -} SERVICE; - -/* For selective discovery */ -typedef void (*DISCOVERY_CALLBACK1) (discinfo_t *, DISCOVERY_MODE, void *); -/* For expiry (the same) */ -typedef void (*DISCOVERY_CALLBACK2) (discinfo_t *, DISCOVERY_MODE, void *); - -typedef struct { - irda_queue_t queue; /* Must be first */ - - __u16_host_order hints; /* Hint bits */ -} irlmp_service_t; - -typedef struct { - irda_queue_t queue; /* Must be first */ - - __u16_host_order hint_mask; - - DISCOVERY_CALLBACK1 disco_callback; /* Selective discovery */ - DISCOVERY_CALLBACK2 expir_callback; /* Selective expiration */ - void *priv; /* Used to identify client */ -} irlmp_client_t; - -/* - * Information about each logical LSAP connection - */ -struct lsap_cb { - irda_queue_t queue; /* Must be first */ - magic_t magic; - - unsigned long connected; /* set_bit used on this */ - int persistent; - - __u8 slsap_sel; /* Source (this) LSAP address */ - __u8 dlsap_sel; /* Destination LSAP address (if connected) */ -#ifdef CONFIG_IRDA_ULTRA - __u8 pid; /* Used by connectionless LSAP */ -#endif /* CONFIG_IRDA_ULTRA */ - struct sk_buff *conn_skb; /* Store skb here while connecting */ - - struct timer_list watchdog_timer; - - LSAP_STATE lsap_state; /* Connection state */ - notify_t notify; /* Indication/Confirm entry points */ - struct qos_info qos; /* QoS for this connection */ - - struct lap_cb *lap; /* Pointer to LAP connection structure */ -}; - -/* - * Used for caching the last slsap->dlsap->handle mapping - * - * We don't need to keep/match the remote address in the cache because - * we are associated with a specific LAP (which implies it). - * Jean II - */ -typedef struct { - int valid; - - __u8 slsap_sel; - __u8 dlsap_sel; - struct lsap_cb *lsap; -} CACHE_ENTRY; - -/* - * Information about each registered IrLAP layer - */ -struct lap_cb { - irda_queue_t queue; /* Must be first */ - magic_t magic; - - int reason; /* LAP disconnect reason */ - - IRLMP_STATE lap_state; - - struct irlap_cb *irlap; /* Instance of IrLAP layer */ - hashbin_t *lsaps; /* LSAP associated with this link */ - struct lsap_cb *flow_next; /* Next lsap to be polled for Tx */ - - __u8 caddr; /* Connection address */ - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - - struct qos_info *qos; /* LAP QoS for this session */ - struct timer_list idle_timer; - -#ifdef CONFIG_IRDA_CACHE_LAST_LSAP - /* The lsap cache was moved from struct irlmp_cb to here because - * it must be associated with the specific LAP. Also, this - * improves performance. - Jean II */ - CACHE_ENTRY cache; /* Caching last slsap->dlsap->handle mapping */ -#endif -}; - -/* - * Main structure for IrLMP - */ -struct irlmp_cb { - magic_t magic; - - __u8 conflict_flag; - - discovery_t discovery_cmd; /* Discovery command to use by IrLAP */ - discovery_t discovery_rsp; /* Discovery response to use by IrLAP */ - - /* Last lsap picked automatically by irlmp_find_free_slsap() */ - int last_lsap_sel; - - struct timer_list discovery_timer; - - hashbin_t *links; /* IrLAP connection table */ - hashbin_t *unconnected_lsaps; - hashbin_t *clients; - hashbin_t *services; - - hashbin_t *cachelog; /* Current discovery log */ - - int running; - - __u16_host_order hints; /* Hint bits */ -}; - -/* Prototype declarations */ -int irlmp_init(void); -void irlmp_cleanup(void); -struct lsap_cb *irlmp_open_lsap(__u8 slsap, notify_t *notify, __u8 pid); -void irlmp_close_lsap( struct lsap_cb *self); - -__u16 irlmp_service_to_hint(int service); -void *irlmp_register_service(__u16 hints); -int irlmp_unregister_service(void *handle); -void *irlmp_register_client(__u16 hint_mask, DISCOVERY_CALLBACK1 disco_clb, - DISCOVERY_CALLBACK2 expir_clb, void *priv); -int irlmp_unregister_client(void *handle); -int irlmp_update_client(void *handle, __u16 hint_mask, - DISCOVERY_CALLBACK1 disco_clb, - DISCOVERY_CALLBACK2 expir_clb, void *priv); - -void irlmp_register_link(struct irlap_cb *, __u32 saddr, notify_t *); -void irlmp_unregister_link(__u32 saddr); - -int irlmp_connect_request(struct lsap_cb *, __u8 dlsap_sel, - __u32 saddr, __u32 daddr, - struct qos_info *, struct sk_buff *); -void irlmp_connect_indication(struct lsap_cb *self, struct sk_buff *skb); -int irlmp_connect_response(struct lsap_cb *, struct sk_buff *); -void irlmp_connect_confirm(struct lsap_cb *, struct sk_buff *); -struct lsap_cb *irlmp_dup(struct lsap_cb *self, void *instance); - -void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason, - struct sk_buff *userdata); -int irlmp_disconnect_request(struct lsap_cb *, struct sk_buff *userdata); - -void irlmp_discovery_confirm(hashbin_t *discovery_log, DISCOVERY_MODE mode); -void irlmp_discovery_request(int nslots); -discinfo_t *irlmp_get_discoveries(int *pn, __u16 mask, int nslots); -void irlmp_do_expiry(void); -void irlmp_do_discovery(int nslots); -discovery_t *irlmp_get_discovery_response(void); -void irlmp_discovery_expiry(discinfo_t *expiry, int number); - -int irlmp_data_request(struct lsap_cb *, struct sk_buff *); -void irlmp_data_indication(struct lsap_cb *, struct sk_buff *); - -int irlmp_udata_request(struct lsap_cb *, struct sk_buff *); -void irlmp_udata_indication(struct lsap_cb *, struct sk_buff *); - -#ifdef CONFIG_IRDA_ULTRA -int irlmp_connless_data_request(struct lsap_cb *, struct sk_buff *, __u8); -void irlmp_connless_data_indication(struct lsap_cb *, struct sk_buff *); -#endif /* CONFIG_IRDA_ULTRA */ - -void irlmp_status_indication(struct lap_cb *, LINK_STATUS link, LOCK_STATUS lock); -void irlmp_flow_indication(struct lap_cb *self, LOCAL_FLOW flow); - -LM_REASON irlmp_convert_lap_reason(LAP_REASON); - -static inline __u32 irlmp_get_saddr(const struct lsap_cb *self) -{ - return (self && self->lap) ? self->lap->saddr : 0; -} - -static inline __u32 irlmp_get_daddr(const struct lsap_cb *self) -{ - return (self && self->lap) ? self->lap->daddr : 0; -} - -const char *irlmp_reason_str(LM_REASON reason); - -extern int sysctl_discovery_timeout; -extern int sysctl_discovery_slots; -extern int sysctl_discovery; -extern int sysctl_lap_keepalive_time; /* in ms, default is LM_IDLE_TIMEOUT */ -extern struct irlmp_cb *irlmp; - -/* Check if LAP queue is full. - * Used by IrTTP for low control, see comments in irlap.h - Jean II */ -static inline int irlmp_lap_tx_queue_full(struct lsap_cb *self) -{ - if (self == NULL) - return 0; - if (self->lap == NULL) - return 0; - if (self->lap->irlap == NULL) - return 0; - - return IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD; -} - -/* After doing a irlmp_dup(), this get one of the two socket back into - * a state where it's waiting incoming connections. - * Note : this can be used *only* if the socket is not yet connected - * (i.e. NO irlmp_connect_response() done on this socket). - * - Jean II */ -static inline void irlmp_listen(struct lsap_cb *self) -{ - self->dlsap_sel = LSAP_ANY; - self->lap = NULL; - self->lsap_state = LSAP_DISCONNECTED; - /* Started when we received the LM_CONNECT_INDICATION */ - del_timer(&self->watchdog_timer); -} - -#endif diff --git a/include/net/irda/irlmp_event.h b/include/net/irda/irlmp_event.h deleted file mode 100644 index 9e4ec17a7449..000000000000 --- a/include/net/irda/irlmp_event.h +++ /dev/null @@ -1,98 +0,0 @@ -/********************************************************************* - * - * Filename: irlmp_event.h - * Version: 0.1 - * Description: IrDA-LMP event handling - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Thu Jul 8 12:18:54 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997, 1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLMP_EVENT_H -#define IRLMP_EVENT_H - -/* A few forward declarations (to make compiler happy) */ -struct irlmp_cb; -struct lsap_cb; -struct lap_cb; -struct discovery_t; - -/* LAP states */ -typedef enum { - /* IrLAP connection control states */ - LAP_STANDBY, /* No LAP connection */ - LAP_U_CONNECT, /* Starting LAP connection */ - LAP_ACTIVE, /* LAP connection is active */ -} IRLMP_STATE; - -/* LSAP connection control states */ -typedef enum { - LSAP_DISCONNECTED, /* No LSAP connection */ - LSAP_CONNECT, /* Connect indication from peer */ - LSAP_CONNECT_PEND, /* Connect request from service user */ - LSAP_DATA_TRANSFER_READY, /* LSAP connection established */ - LSAP_SETUP, /* Trying to set up LSAP connection */ - LSAP_SETUP_PEND, /* Request to start LAP connection */ -} LSAP_STATE; - -typedef enum { - /* LSAP events */ - LM_CONNECT_REQUEST, - LM_CONNECT_CONFIRM, - LM_CONNECT_RESPONSE, - LM_CONNECT_INDICATION, - - LM_DISCONNECT_INDICATION, - LM_DISCONNECT_REQUEST, - - LM_DATA_REQUEST, - LM_UDATA_REQUEST, - LM_DATA_INDICATION, - LM_UDATA_INDICATION, - - LM_WATCHDOG_TIMEOUT, - - /* IrLAP events */ - LM_LAP_CONNECT_REQUEST, - LM_LAP_CONNECT_INDICATION, - LM_LAP_CONNECT_CONFIRM, - LM_LAP_DISCONNECT_INDICATION, - LM_LAP_DISCONNECT_REQUEST, - LM_LAP_DISCOVERY_REQUEST, - LM_LAP_DISCOVERY_CONFIRM, - LM_LAP_IDLE_TIMEOUT, -} IRLMP_EVENT; - -extern const char *const irlmp_state[]; -extern const char *const irlsap_state[]; - -void irlmp_watchdog_timer_expired(void *data); -void irlmp_discovery_timer_expired(void *data); -void irlmp_idle_timer_expired(void *data); - -void irlmp_do_lap_event(struct lap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb); -int irlmp_do_lsap_event(struct lsap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb); - -#endif /* IRLMP_EVENT_H */ - - - - diff --git a/include/net/irda/irlmp_frame.h b/include/net/irda/irlmp_frame.h deleted file mode 100644 index 1906eb71422e..000000000000 --- a/include/net/irda/irlmp_frame.h +++ /dev/null @@ -1,62 +0,0 @@ -/********************************************************************* - * - * Filename: irlmp_frame.h - * Version: 0.9 - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Aug 19 02:09:59 1997 - * Modified at: Fri Dec 10 13:21:53 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997, 1999 Dag Brattli , - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRMLP_FRAME_H -#define IRMLP_FRAME_H - -#include - -#include - -/* IrLMP frame opcodes */ -#define CONNECT_CMD 0x01 -#define CONNECT_CNF 0x81 -#define DISCONNECT 0x02 -#define ACCESSMODE_CMD 0x03 -#define ACCESSMODE_CNF 0x83 - -#define CONTROL_BIT 0x80 - -void irlmp_send_data_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap, - int expedited, struct sk_buff *skb); -void irlmp_send_lcf_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap, - __u8 opcode, struct sk_buff *skb); -void irlmp_link_data_indication(struct lap_cb *, struct sk_buff *, - int unreliable); -#ifdef CONFIG_IRDA_ULTRA -void irlmp_link_unitdata_indication(struct lap_cb *, struct sk_buff *); -#endif /* CONFIG_IRDA_ULTRA */ - -void irlmp_link_connect_indication(struct lap_cb *, __u32 saddr, __u32 daddr, - struct qos_info *qos, struct sk_buff *skb); -void irlmp_link_connect_request(__u32 daddr); -void irlmp_link_connect_confirm(struct lap_cb *self, struct qos_info *qos, - struct sk_buff *skb); -void irlmp_link_disconnect_indication(struct lap_cb *, struct irlap_cb *, - LAP_REASON reason, struct sk_buff *); -void irlmp_link_discovery_confirm(struct lap_cb *self, hashbin_t *log); -void irlmp_link_discovery_indication(struct lap_cb *, discovery_t *discovery); - -#endif diff --git a/include/net/irda/irmod.h b/include/net/irda/irmod.h deleted file mode 100644 index 86f0dbb8ee5d..000000000000 --- a/include/net/irda/irmod.h +++ /dev/null @@ -1,109 +0,0 @@ -/********************************************************************* - * - * Filename: irmod.h - * Version: 0.3 - * Description: IrDA module and utilities functions - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Dec 15 13:58:52 1997 - * Modified at: Fri Jan 28 13:15:24 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charg. - * - ********************************************************************/ - -#ifndef IRMOD_H -#define IRMOD_H - -/* Misc status information */ -typedef enum { - STATUS_OK, - STATUS_ABORTED, - STATUS_NO_ACTIVITY, - STATUS_NOISY, - STATUS_REMOTE, -} LINK_STATUS; - -typedef enum { - LOCK_NO_CHANGE, - LOCK_LOCKED, - LOCK_UNLOCKED, -} LOCK_STATUS; - -typedef enum { FLOW_STOP, FLOW_START } LOCAL_FLOW; - -/* - * IrLMP disconnect reasons. The order is very important, since they - * correspond to disconnect reasons sent in IrLMP disconnect frames, so - * please do not touch :-) - */ -typedef enum { - LM_USER_REQUEST = 1, /* User request */ - LM_LAP_DISCONNECT, /* Unexpected IrLAP disconnect */ - LM_CONNECT_FAILURE, /* Failed to establish IrLAP connection */ - LM_LAP_RESET, /* IrLAP reset */ - LM_INIT_DISCONNECT, /* Link Management initiated disconnect */ - LM_LSAP_NOTCONN, /* Data delivered on unconnected LSAP */ - LM_NON_RESP_CLIENT, /* Non responsive LM-MUX client */ - LM_NO_AVAIL_CLIENT, /* No available LM-MUX client */ - LM_CONN_HALF_OPEN, /* Connection is half open */ - LM_BAD_SOURCE_ADDR, /* Illegal source address (i.e 0x00) */ -} LM_REASON; -#define LM_UNKNOWN 0xff /* Unspecified disconnect reason */ - -/* A few forward declarations (to make compiler happy) */ -struct qos_info; /* in */ - -/* - * Notify structure used between transport and link management layers - */ -typedef struct { - int (*data_indication)(void *priv, void *sap, struct sk_buff *skb); - int (*udata_indication)(void *priv, void *sap, struct sk_buff *skb); - void (*connect_confirm)(void *instance, void *sap, - struct qos_info *qos, __u32 max_sdu_size, - __u8 max_header_size, struct sk_buff *skb); - void (*connect_indication)(void *instance, void *sap, - struct qos_info *qos, __u32 max_sdu_size, - __u8 max_header_size, struct sk_buff *skb); - void (*disconnect_indication)(void *instance, void *sap, - LM_REASON reason, struct sk_buff *); - void (*flow_indication)(void *instance, void *sap, LOCAL_FLOW flow); - void (*status_indication)(void *instance, - LINK_STATUS link, LOCK_STATUS lock); - void *instance; /* Layer instance pointer */ - char name[16]; /* Name of layer */ -} notify_t; - -#define NOTIFY_MAX_NAME 16 - -/* Zero the notify structure */ -void irda_notify_init(notify_t *notify); - -/* Locking wrapper - Note the inverted logic on irda_lock(). - * Those function basically return false if the lock is already in the - * position you want to set it. - Jean II */ -#define irda_lock(lock) (! test_and_set_bit(0, (void *) (lock))) -#define irda_unlock(lock) (test_and_clear_bit(0, (void *) (lock))) - -#endif /* IRMOD_H */ - - - - - - - - - diff --git a/include/net/irda/irqueue.h b/include/net/irda/irqueue.h deleted file mode 100644 index 37f512bd6733..000000000000 --- a/include/net/irda/irqueue.h +++ /dev/null @@ -1,96 +0,0 @@ -/********************************************************************* - * - * Filename: irqueue.h - * Version: 0.3 - * Description: General queue implementation - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Jun 9 13:26:50 1998 - * Modified at: Thu Oct 7 13:25:16 1999 - * Modified by: Dag Brattli - * - * Copyright (C) 1998-1999, Aage Kvalnes - * Copyright (c) 1998, Dag Brattli - * All Rights Reserved. - * - * This code is taken from the Vortex Operating System written by Aage - * Kvalnes and has been ported to Linux and Linux/IR by Dag Brattli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include -#include - -#ifndef IRDA_QUEUE_H -#define IRDA_QUEUE_H - -#define NAME_SIZE 32 - -/* - * Hash types (some flags can be xored) - * See comments in irqueue.c for which one to use... - */ -#define HB_NOLOCK 0 /* No concurent access prevention */ -#define HB_LOCK 1 /* Prevent concurent write with global lock */ - -/* - * Hash defines - */ -#define HASHBIN_SIZE 8 -#define HASHBIN_MASK 0x7 - -#ifndef IRDA_ALIGN -#define IRDA_ALIGN __attribute__((aligned)) -#endif - -#define Q_NULL { NULL, NULL, "", 0 } - -typedef void (*FREE_FUNC)(void *arg); - -struct irda_queue { - struct irda_queue *q_next; - struct irda_queue *q_prev; - - char q_name[NAME_SIZE]; - long q_hash; /* Must be able to cast a (void *) */ -}; -typedef struct irda_queue irda_queue_t; - -typedef struct hashbin_t { - __u32 magic; - int hb_type; - int hb_size; - spinlock_t hb_spinlock; /* HB_LOCK - Can be used by the user */ - - irda_queue_t* hb_queue[HASHBIN_SIZE] IRDA_ALIGN; - - irda_queue_t* hb_current; -} hashbin_t; - -hashbin_t *hashbin_new(int type); -int hashbin_delete(hashbin_t* hashbin, FREE_FUNC func); -int hashbin_clear(hashbin_t* hashbin, FREE_FUNC free_func); -void hashbin_insert(hashbin_t* hashbin, irda_queue_t* entry, long hashv, - const char* name); -void* hashbin_remove(hashbin_t* hashbin, long hashv, const char* name); -void* hashbin_remove_first(hashbin_t *hashbin); -void* hashbin_remove_this( hashbin_t* hashbin, irda_queue_t* entry); -void* hashbin_find(hashbin_t* hashbin, long hashv, const char* name); -void* hashbin_lock_find(hashbin_t* hashbin, long hashv, const char* name); -void* hashbin_find_next(hashbin_t* hashbin, long hashv, const char* name, - void ** pnext); -irda_queue_t *hashbin_get_first(hashbin_t *hashbin); -irda_queue_t *hashbin_get_next(hashbin_t *hashbin); - -#define HASHBIN_GET_SIZE(hashbin) hashbin->hb_size - -#endif diff --git a/include/net/irda/irttp.h b/include/net/irda/irttp.h deleted file mode 100644 index 98682d4bae8f..000000000000 --- a/include/net/irda/irttp.h +++ /dev/null @@ -1,210 +0,0 @@ -/********************************************************************* - * - * Filename: irttp.h - * Version: 1.0 - * Description: Tiny Transport Protocol (TTP) definitions - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:31 1997 - * Modified at: Sun Dec 12 13:09:07 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRTTP_H -#define IRTTP_H - -#include -#include -#include - -#include -#include /* struct lsap_cb */ -#include /* struct qos_info */ -#include - -#define TTP_MAX_CONNECTIONS LM_MAX_CONNECTIONS -#define TTP_HEADER 1 -#define TTP_MAX_HEADER (TTP_HEADER + LMP_MAX_HEADER) -#define TTP_SAR_HEADER 5 -#define TTP_PARAMETERS 0x80 -#define TTP_MORE 0x80 - -/* Transmission queue sizes */ -/* Worst case scenario, two window of data - Jean II */ -#define TTP_TX_MAX_QUEUE 14 -/* We need to keep at least 5 frames to make sure that we can refill - * appropriately the LAP layer. LAP keeps only two buffers, and we need - * to have 7 to make a full window - Jean II */ -#define TTP_TX_LOW_THRESHOLD 5 -/* Most clients are synchronous with respect to flow control, so we can - * keep a low number of Tx buffers in TTP - Jean II */ -#define TTP_TX_HIGH_THRESHOLD 7 - -/* Receive queue sizes */ -/* Minimum of credit that the peer should hold. - * If the peer has less credits than 9 frames, we will explicitly send - * him some credits (through irttp_give_credit() and a specific frame). - * Note that when we give credits it's likely that it won't be sent in - * this LAP window, but in the next one. So, we make sure that the peer - * has something to send while waiting for credits (one LAP window == 7 - * + 1 frames while he process the credits). - Jean II */ -#define TTP_RX_MIN_CREDIT 8 -/* This is the default maximum number of credits held by the peer, so the - * default maximum number of frames he can send us before needing flow - * control answer from us (this may be negociated differently at TSAP setup). - * We want to minimise the number of times we have to explicitly send some - * credit to the peer, hoping we can piggyback it on the return data. In - * particular, it doesn't make sense for us to send credit more than once - * per LAP window. - * Moreover, giving credits has some latency, so we need strictly more than - * a LAP window, otherwise we may already have credits in our Tx queue. - * But on the other hand, we don't want to keep too many Rx buffer here - * before starting to flow control the other end, so make it exactly one - * LAP window + 1 + MIN_CREDITS. - Jean II */ -#define TTP_RX_DEFAULT_CREDIT 16 -/* Maximum number of credits we can allow the peer to have, and therefore - * maximum Rx queue size. - * Note that we try to deliver packets to the higher layer every time we - * receive something, so in normal mode the Rx queue will never contains - * more than one or two packets. - Jean II */ -#define TTP_RX_MAX_CREDIT 21 - -/* What clients should use when calling ttp_open_tsap() */ -#define DEFAULT_INITIAL_CREDIT TTP_RX_DEFAULT_CREDIT - -/* Some priorities for disconnect requests */ -#define P_NORMAL 0 -#define P_HIGH 1 - -#define TTP_SAR_DISABLE 0 -#define TTP_SAR_UNBOUND 0xffffffff - -/* Parameters */ -#define TTP_MAX_SDU_SIZE 0x01 - -/* - * This structure contains all data associated with one instance of a TTP - * connection. - */ -struct tsap_cb { - irda_queue_t q; /* Must be first */ - magic_t magic; /* Just in case */ - - __u8 stsap_sel; /* Source TSAP */ - __u8 dtsap_sel; /* Destination TSAP */ - - struct lsap_cb *lsap; /* Corresponding LSAP to this TSAP */ - - __u8 connected; /* TSAP connected */ - - __u8 initial_credit; /* Initial credit to give peer */ - - int avail_credit; /* Available credit to return to peer */ - int remote_credit; /* Credit held by peer TTP entity */ - int send_credit; /* Credit held by local TTP entity */ - - struct sk_buff_head tx_queue; /* Frames to be transmitted */ - struct sk_buff_head rx_queue; /* Received frames */ - struct sk_buff_head rx_fragments; - int tx_queue_lock; - int rx_queue_lock; - spinlock_t lock; - - notify_t notify; /* Callbacks to client layer */ - - struct net_device_stats stats; - struct timer_list todo_timer; - - __u32 max_seg_size; /* Max data that fit into an IrLAP frame */ - __u8 max_header_size; - - int rx_sdu_busy; /* RxSdu.busy */ - __u32 rx_sdu_size; /* Current size of a partially received frame */ - __u32 rx_max_sdu_size; /* Max receive user data size */ - - int tx_sdu_busy; /* TxSdu.busy */ - __u32 tx_max_sdu_size; /* Max transmit user data size */ - - int close_pend; /* Close, but disconnect_pend */ - unsigned long disconnect_pend; /* Disconnect, but still data to send */ - struct sk_buff *disconnect_skb; -}; - -struct irttp_cb { - magic_t magic; - hashbin_t *tsaps; -}; - -int irttp_init(void); -void irttp_cleanup(void); - -struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify); -int irttp_close_tsap(struct tsap_cb *self); - -int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb); -int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb); - -int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel, - __u32 saddr, __u32 daddr, - struct qos_info *qos, __u32 max_sdu_size, - struct sk_buff *userdata); -int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size, - struct sk_buff *userdata); -int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *skb, - int priority); -void irttp_flow_request(struct tsap_cb *self, LOCAL_FLOW flow); -struct tsap_cb *irttp_dup(struct tsap_cb *self, void *instance); - -static inline __u32 irttp_get_saddr(struct tsap_cb *self) -{ - return irlmp_get_saddr(self->lsap); -} - -static inline __u32 irttp_get_daddr(struct tsap_cb *self) -{ - return irlmp_get_daddr(self->lsap); -} - -static inline __u32 irttp_get_max_seg_size(struct tsap_cb *self) -{ - return self->max_seg_size; -} - -/* After doing a irttp_dup(), this get one of the two socket back into - * a state where it's waiting incoming connections. - * Note : this can be used *only* if the socket is not yet connected - * (i.e. NO irttp_connect_response() done on this socket). - * - Jean II */ -static inline void irttp_listen(struct tsap_cb *self) -{ - irlmp_listen(self->lsap); - self->dtsap_sel = LSAP_ANY; -} - -/* Return TRUE if the node is in primary mode (i.e. master) - * - Jean II */ -static inline int irttp_is_primary(struct tsap_cb *self) -{ - if ((self == NULL) || - (self->lsap == NULL) || - (self->lsap->lap == NULL) || - (self->lsap->lap->irlap == NULL)) - return -2; - return irlap_is_primary(self->lsap->lap->irlap); -} - -#endif /* IRTTP_H */ diff --git a/include/net/irda/parameters.h b/include/net/irda/parameters.h deleted file mode 100644 index 2d9cd0007cba..000000000000 --- a/include/net/irda/parameters.h +++ /dev/null @@ -1,100 +0,0 @@ -/********************************************************************* - * - * Filename: parameters.h - * Version: 1.0 - * Description: A more general way to handle (pi,pl,pv) parameters - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Jun 7 08:47:28 1999 - * Modified at: Sun Jan 30 14:05:14 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - * Michel Dänzer , 10/2001 - * - simplify irda_pv_t to avoid endianness issues - * - ********************************************************************/ - -#ifndef IRDA_PARAMS_H -#define IRDA_PARAMS_H - -/* - * The currently supported types. Beware not to change the sequence since - * it a good reason why the sized integers has a value equal to their size - */ -typedef enum { - PV_INTEGER, /* Integer of any (pl) length */ - PV_INT_8_BITS, /* Integer of 8 bits in length */ - PV_INT_16_BITS, /* Integer of 16 bits in length */ - PV_STRING, /* \0 terminated string */ - PV_INT_32_BITS, /* Integer of 32 bits in length */ - PV_OCT_SEQ, /* Octet sequence */ - PV_NO_VALUE /* Does not contain any value (pl=0) */ -} PV_TYPE; - -/* Bit 7 of type field */ -#define PV_BIG_ENDIAN 0x80 -#define PV_LITTLE_ENDIAN 0x00 -#define PV_MASK 0x7f /* To mask away endian bit */ - -#define PV_PUT 0 -#define PV_GET 1 - -typedef union { - char *c; - __u32 i; - __u32 *ip; -} irda_pv_t; - -typedef struct { - __u8 pi; - __u8 pl; - irda_pv_t pv; -} irda_param_t; - -typedef int (*PI_HANDLER)(void *self, irda_param_t *param, int get); -typedef int (*PV_HANDLER)(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func); - -typedef struct { - const PI_HANDLER func; /* Handler for this parameter identifier */ - PV_TYPE type; /* Data type for this parameter */ -} pi_minor_info_t; - -typedef struct { - const pi_minor_info_t *pi_minor_call_table; - int len; -} pi_major_info_t; - -typedef struct { - const pi_major_info_t *tables; - int len; - __u8 pi_mask; - int pi_major_offset; -} pi_param_info_t; - -int irda_param_pack(__u8 *buf, char *fmt, ...); - -int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len, - pi_param_info_t *info); -int irda_param_extract_all(void *self, __u8 *buf, int len, - pi_param_info_t *info); - -#define irda_param_insert_byte(buf,pi,pv) irda_param_pack(buf,"bbb",pi,1,pv) - -#endif /* IRDA_PARAMS_H */ - diff --git a/include/net/irda/qos.h b/include/net/irda/qos.h deleted file mode 100644 index 05a5a249956f..000000000000 --- a/include/net/irda/qos.h +++ /dev/null @@ -1,101 +0,0 @@ -/********************************************************************* - * - * Filename: qos.h - * Version: 1.0 - * Description: Quality of Service definitions - * Status: Experimental. - * Author: Dag Brattli - * Created at: Fri Sep 19 23:21:09 1997 - * Modified at: Thu Dec 2 13:51:54 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRDA_QOS_H -#define IRDA_QOS_H - -#include - -#include - -#define PI_BAUD_RATE 0x01 -#define PI_MAX_TURN_TIME 0x82 -#define PI_DATA_SIZE 0x83 -#define PI_WINDOW_SIZE 0x84 -#define PI_ADD_BOFS 0x85 -#define PI_MIN_TURN_TIME 0x86 -#define PI_LINK_DISC 0x08 - -#define IR_115200_MAX 0x3f - -/* Baud rates (first byte) */ -#define IR_2400 0x01 -#define IR_9600 0x02 -#define IR_19200 0x04 -#define IR_38400 0x08 -#define IR_57600 0x10 -#define IR_115200 0x20 -#define IR_576000 0x40 -#define IR_1152000 0x80 - -/* Baud rates (second byte) */ -#define IR_4000000 0x01 -#define IR_16000000 0x02 - -/* Quality of Service information */ -typedef struct { - __u32 value; - __u16 bits; /* LSB is first byte, MSB is second byte */ -} qos_value_t; - -struct qos_info { - magic_t magic; - - qos_value_t baud_rate; /* IR_11520O | ... */ - qos_value_t max_turn_time; - qos_value_t data_size; - qos_value_t window_size; - qos_value_t additional_bofs; - qos_value_t min_turn_time; - qos_value_t link_disc_time; - - qos_value_t power; -}; - -extern int sysctl_max_baud_rate; -extern int sysctl_max_inactive_time; - -void irda_init_max_qos_capabilies(struct qos_info *qos); -void irda_qos_compute_intersection(struct qos_info *, struct qos_info *); - -__u32 irlap_max_line_capacity(__u32 speed, __u32 max_turn_time); - -void irda_qos_bits_to_value(struct qos_info *qos); - -/* So simple, how could we not inline those two ? - * Note : one byte is 10 bits if you include start and stop bits - * Jean II */ -#define irlap_min_turn_time_in_bytes(speed, min_turn_time) ( \ - speed * min_turn_time / 10000000 \ -) -#define irlap_xbofs_in_usec(speed, xbofs) ( \ - xbofs * 10000000 / speed \ -) - -#endif - diff --git a/include/net/irda/timer.h b/include/net/irda/timer.h deleted file mode 100644 index d784f242cf7b..000000000000 --- a/include/net/irda/timer.h +++ /dev/null @@ -1,105 +0,0 @@ -/********************************************************************* - * - * Filename: timer.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sat Aug 16 00:59:29 1997 - * Modified at: Thu Oct 7 12:25:24 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997, 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef TIMER_H -#define TIMER_H - -#include -#include - -#include /* for HZ */ - -#include - -/* A few forward declarations (to make compiler happy) */ -struct irlmp_cb; -struct irlap_cb; -struct lsap_cb; -struct lap_cb; - -/* - * Timeout definitions, some defined in IrLAP 6.13.5 - p. 92 - */ -#define POLL_TIMEOUT (450*HZ/1000) /* Must never exceed 500 ms */ -#define FINAL_TIMEOUT (500*HZ/1000) /* Must never exceed 500 ms */ - -/* - * Normally twice of p-timer. Note 3, IrLAP 6.3.11.2 - p. 60 suggests - * at least twice duration of the P-timer. - */ -#define WD_TIMEOUT (POLL_TIMEOUT*2) - -#define MEDIABUSY_TIMEOUT (500*HZ/1000) /* 500 msec */ -#define SMALLBUSY_TIMEOUT (100*HZ/1000) /* 100 msec - IrLAP 6.13.4 */ - -/* - * Slot timer must never exceed 85 ms, and must always be at least 25 ms, - * suggested to 75-85 msec by IrDA lite. This doesn't work with a lot of - * devices, and other stackes uses a lot more, so it's best we do it as well - * (Note : this is the default value and sysctl overrides it - Jean II) - */ -#define SLOT_TIMEOUT (90*HZ/1000) - -/* - * The latest discovery frame (XID) is longer due to the extra discovery - * information (hints, device name...). This is its extra length. - * We use that when setting the query timeout. Jean II - */ -#define XIDEXTRA_TIMEOUT (34*HZ/1000) /* 34 msec */ - -#define WATCHDOG_TIMEOUT (20*HZ) /* 20 sec */ - -typedef void (*TIMER_CALLBACK)(void *); - -static inline void irda_start_timer(struct timer_list *ptimer, int timeout, - void* data, TIMER_CALLBACK callback) -{ - ptimer->function = (void (*)(unsigned long)) callback; - ptimer->data = (unsigned long) data; - - /* Set new value for timer (update or add timer). - * We use mod_timer() because it's more efficient and also - * safer with respect to race conditions - Jean II */ - mod_timer(ptimer, jiffies + timeout); -} - - -void irlap_start_slot_timer(struct irlap_cb *self, int timeout); -void irlap_start_query_timer(struct irlap_cb *self, int S, int s); -void irlap_start_final_timer(struct irlap_cb *self, int timeout); -void irlap_start_wd_timer(struct irlap_cb *self, int timeout); -void irlap_start_backoff_timer(struct irlap_cb *self, int timeout); - -void irlap_start_mbusy_timer(struct irlap_cb *self, int timeout); -void irlap_stop_mbusy_timer(struct irlap_cb *); - -void irlmp_start_watchdog_timer(struct lsap_cb *, int timeout); -void irlmp_start_discovery_timer(struct irlmp_cb *, int timeout); -void irlmp_start_idle_timer(struct lap_cb *, int timeout); -void irlmp_stop_idle_timer(struct lap_cb *self); - -#endif - diff --git a/include/net/irda/wrapper.h b/include/net/irda/wrapper.h deleted file mode 100644 index eef53ebe3d76..000000000000 --- a/include/net/irda/wrapper.h +++ /dev/null @@ -1,58 +0,0 @@ -/********************************************************************* - * - * Filename: wrapper.h - * Version: 1.2 - * Description: IrDA SIR async wrapper layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Tue Jan 11 12:37:29 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli , - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef WRAPPER_H -#define WRAPPER_H - -#include -#include -#include - -#include /* iobuff_t */ - -#define BOF 0xc0 /* Beginning of frame */ -#define XBOF 0xff -#define EOF 0xc1 /* End of frame */ -#define CE 0x7d /* Control escape */ - -#define STA BOF /* Start flag */ -#define STO EOF /* End flag */ - -#define IRDA_TRANS 0x20 /* Asynchronous transparency modifier */ - -/* States for receiving a frame in async mode */ -enum { - OUTSIDE_FRAME, - BEGIN_FRAME, - LINK_ESCAPE, - INSIDE_FRAME -}; - -/* Proto definitions */ -int async_wrap_skb(struct sk_buff *skb, __u8 *tx_buff, int buffsize); -void async_unwrap_char(struct net_device *dev, struct net_device_stats *stats, - iobuff_t *buf, __u8 byte); - -#endif -- cgit v1.2.3 From 21acf63013ed3d6fce3176cc34b74064052a31b4 Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Mon, 28 Aug 2017 16:18:42 +1000 Subject: net/ncsi: Configure VLAN tag filter Make use of the ndo_vlan_rx_{add,kill}_vid callbacks to have the NCSI stack process new VLAN tags and configure the channel VLAN filter appropriately. Several VLAN tags can be set and a "Set VLAN Filter" packet must be sent for each one, meaning the ncsi_dev_state_config_svf state must be repeated. An internal list of VLAN tags is maintained, and compared against the current channel's ncsi_channel_filter in order to keep track within the state. VLAN filters are removed in a similar manner, with the introduction of the ncsi_dev_state_config_clear_vids state. The maximum number of VLAN tag filters is determined by the "Get Capabilities" response from the channel. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- include/net/ncsi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ncsi.h b/include/net/ncsi.h index 68680baac0fd..1f96af46df49 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -28,6 +28,8 @@ struct ncsi_dev { }; #ifdef CONFIG_NET_NCSI +int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid); +int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid); struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*notifier)(struct ncsi_dev *nd)); int ncsi_start_dev(struct ncsi_dev *nd); -- cgit v1.2.3 From 10674a03c633379fadb5b314abde975fba270058 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 29 Aug 2017 10:15:40 +0100 Subject: net: rxrpc: Replace time_t type with time64_t type Since the 'expiry' variable of 'struct key_preparsed_payload' has been changed to 'time64_t' type, which is year 2038 safe on 32bits system. In net/rxrpc subsystem, we need convert 'u32' type to 'time64_t' type when copying ticket expires time to 'prep->expiry', then this patch introduces two helper functions to help convert 'u32' to 'time64_t' type. This patch also uses ktime_get_real_seconds() to get current time instead of get_seconds() which is not year 2038 safe on 32bits system. Signed-off-by: Baolin Wang Signed-off-by: David Howells --- include/keys/rxrpc-type.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h index 5de0673f333b..8cf829dbf20e 100644 --- a/include/keys/rxrpc-type.h +++ b/include/keys/rxrpc-type.h @@ -127,4 +127,27 @@ struct rxrpc_key_data_v1 { #define AFSTOKEN_K5_ADDRESSES_MAX 16 /* max K5 addresses */ #define AFSTOKEN_K5_AUTHDATA_MAX 16 /* max K5 pieces of auth data */ +/* + * Truncate a time64_t to the range from 1970 to 2106 as in the network + * protocol. + */ +static inline u32 rxrpc_time64_to_u32(time64_t time) +{ + if (time < 0) + return 0; + + if (time > UINT_MAX) + return UINT_MAX; + + return (u32)time; +} + +/* + * Extend u32 back to time64_t using the same 1970-2106 range. + */ +static inline time64_t rxrpc_u32_to_time64(u32 time) +{ + return (time64_t)time; +} + #endif /* _KEYS_RXRPC_TYPE_H */ -- cgit v1.2.3 From e833251ad813168253fef9915aaf6a8c883337b0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2017 10:18:56 +0100 Subject: rxrpc: Add notification of end-of-Tx phase Add a callback to rxrpc_kernel_send_data() so that a kernel service can get a notification that the AF_RXRPC call has transitioned out the Tx phase and is now waiting for a reply or a final ACK. This is called from AF_RXRPC with the call state lock held so the notification is guaranteed to come before any reply is passed back. Further, modify the AFS filesystem to make use of this so that we don't have to change the afs_call state before sending the last bit of data. Signed-off-by: David Howells --- include/net/af_rxrpc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index c172709787af..07a47ee6f783 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -21,6 +21,8 @@ struct rxrpc_call; typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); +typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *, + unsigned long); typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *, unsigned long); typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long); @@ -37,7 +39,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, gfp_t, rxrpc_notify_rx_t); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, - struct msghdr *, size_t); + struct msghdr *, size_t, + rxrpc_notify_end_tx_t); int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, void *, size_t, size_t *, bool, u32 *); bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, -- cgit v1.2.3 From c038a58ccfd6704d4d7d60ed3d6a0fca13cf13a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2017 10:19:01 +0100 Subject: rxrpc: Allow failed client calls to be retried Allow a client call that failed on network error to be retried, provided that the Tx queue still holds DATA packet 1. This allows an operation to be submitted to another server or another address for the same server without having to repackage and re-encrypt the data so far processed. Two new functions are provided: (1) rxrpc_kernel_check_call() - This is used to find out the completion state of a call to guess whether it can be retried and whether it should be retried. (2) rxrpc_kernel_retry_call() - Disconnect the call from its current connection, reset the state and submit it as a new client call to a new address. The new address need not match the previous address. A call may be retried even if all the data hasn't been loaded into it yet; a partially constructed will be retained at the same point it was at when an error condition was detected. msg_data_left() can be used to find out how much data was packaged before the error occurred. Signed-off-by: David Howells --- include/net/af_rxrpc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 07a47ee6f783..3ac79150291f 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -19,6 +19,18 @@ struct sock; struct socket; struct rxrpc_call; +/* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ + RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ + RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ + RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ + NR__RXRPC_CALL_COMPLETIONS +}; + typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *, @@ -51,5 +63,9 @@ void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); +int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, + struct sockaddr_rxrpc *, struct key *); +int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, + enum rxrpc_call_completion *, u32 *); #endif /* _NET_RXRPC_H */ -- cgit v1.2.3 From c31e5a4876cdfbfcc28c4fb201ad872c65671067 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 29 Aug 2017 16:37:40 +0200 Subject: xdp: remove redundant argument to trace_xdp_redirect Supplying the action argument XDP_REDIRECT to the tracepoint xdp_redirect is redundant as it is only called in-case this action was specified. Remove the argument, but keep "act" member of the tracepoint struct and populate it with XDP_REDIRECT. This makes it easier to write a common bpf_prog processing events. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 27cf2ef35f5f..f684f3b36bca 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -52,10 +52,10 @@ TRACE_EVENT(xdp_exception, TRACE_EVENT(xdp_redirect, TP_PROTO(const struct net_device *dev, - const struct bpf_prog *xdp, u32 act, + const struct bpf_prog *xdp, int to_index, int err), - TP_ARGS(dev, xdp, act, to_index, err), + TP_ARGS(dev, xdp, to_index, err), TP_STRUCT__entry( __array(u8, prog_tag, 8) @@ -68,7 +68,7 @@ TRACE_EVENT(xdp_redirect, TP_fast_assign( BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); - __entry->act = act; + __entry->act = XDP_REDIRECT; __entry->ifindex = dev->ifindex; __entry->to_index = to_index; __entry->err = err; -- cgit v1.2.3 From 8d3b778ff544b369f0847e6c15f3e73057298aa4 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 29 Aug 2017 16:37:45 +0200 Subject: xdp: tracepoint xdp_redirect also need a map argument To make sense of the map index, the tracepoint user also need to know that map we are talking about. Supply the map pointer but only expose the map->id. The 'to_index' is renamed 'to_ifindex'. In the xdp_redirect_map case, this is the result of the devmap lookup. The map lookup key is exposed as map_index, which is needed to troubleshoot in case the lookup failed. The 'to_ifindex' is placed after 'err' to keep TP_STRUCT as common as possible. This also keeps the TP_STRUCT similar enough, that userspace can write a monitor program, that doesn't need to care about whether bpf_redirect or bpf_redirect_map were used. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index f684f3b36bca..573dcfa1aeaa 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -49,20 +49,23 @@ TRACE_EVENT(xdp_exception, __entry->ifindex) ); -TRACE_EVENT(xdp_redirect, +DECLARE_EVENT_CLASS(xdp_redirect_template, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, - int to_index, int err), + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), - TP_ARGS(dev, xdp, to_index, err), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), TP_STRUCT__entry( __array(u8, prog_tag, 8) __field(u32, act) __field(int, ifindex) - __field(int, to_index) __field(int, err) + __field(int, to_ifindex) + __field(u32, map_id) + __field(int, map_index) ), TP_fast_assign( @@ -70,16 +73,35 @@ TRACE_EVENT(xdp_redirect, memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); __entry->act = XDP_REDIRECT; __entry->ifindex = dev->ifindex; - __entry->to_index = to_index; __entry->err = err; + __entry->to_ifindex = to_ifindex; + __entry->map_id = map ? map->id : 0; + __entry->map_index = map_index; ), - TP_printk("prog=%s action=%s ifindex=%d to_index=%d err=%d", + TP_printk("prog=%s action=%s ifindex=%d to_ifindex=%d err=%d" + " map_id=%d map_index=%d", __print_hex_str(__entry->prog_tag, 8), __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), - __entry->ifindex, __entry->to_index, - __entry->err) + __entry->ifindex, __entry->to_ifindex, + __entry->err, + __entry->map_id, __entry->map_index) ); + +DEFINE_EVENT(xdp_redirect_template, xdp_redirect, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index) +); + +#define _trace_xdp_redirect(dev, xdp, to, err) \ + trace_xdp_redirect(dev, xdp, to, err, NULL, 0); + +#define trace_xdp_redirect_map(dev, xdp, fwd, err, map, idx) \ + trace_xdp_redirect(dev, xdp, fwd ? fwd->ifindex : 0, err, map, idx); + #endif /* _TRACE_XDP_H */ #include -- cgit v1.2.3 From b06337dfdb16bc3f668326b6a618c472c671182a Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 29 Aug 2017 16:37:51 +0200 Subject: xdp: make xdp tracepoints report bpf prog id instead of prog_tag Given previous patch expose the map_id, it seems natural to also report the bpf prog id. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 573dcfa1aeaa..89eba564e199 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -31,20 +31,19 @@ TRACE_EVENT(xdp_exception, TP_ARGS(dev, xdp, act), TP_STRUCT__entry( - __array(u8, prog_tag, 8) + __field(int, prog_id) __field(u32, act) __field(int, ifindex) ), TP_fast_assign( - BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); - memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); + __entry->prog_id = xdp->aux->id; __entry->act = act; __entry->ifindex = dev->ifindex; ), - TP_printk("prog=%s action=%s ifindex=%d", - __print_hex_str(__entry->prog_tag, 8), + TP_printk("prog_id=%d action=%s ifindex=%d", + __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex) ); @@ -59,7 +58,7 @@ DECLARE_EVENT_CLASS(xdp_redirect_template, TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), TP_STRUCT__entry( - __array(u8, prog_tag, 8) + __field(int, prog_id) __field(u32, act) __field(int, ifindex) __field(int, err) @@ -69,8 +68,7 @@ DECLARE_EVENT_CLASS(xdp_redirect_template, ), TP_fast_assign( - BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); - memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); + __entry->prog_id = xdp->aux->id; __entry->act = XDP_REDIRECT; __entry->ifindex = dev->ifindex; __entry->err = err; @@ -79,9 +77,9 @@ DECLARE_EVENT_CLASS(xdp_redirect_template, __entry->map_index = map_index; ), - TP_printk("prog=%s action=%s ifindex=%d to_ifindex=%d err=%d" + TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" " map_id=%d map_index=%d", - __print_hex_str(__entry->prog_tag, 8), + __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex, __entry->to_ifindex, __entry->err, -- cgit v1.2.3 From f5836ca5e9867fa6ab88cadb9873af56d9ceb589 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 29 Aug 2017 16:37:56 +0200 Subject: xdp: separate xdp_redirect tracepoint in error case There is a need to separate the xdp_redirect tracepoint into two tracepoints, for separating the error case from the normal forward case. Due to the extreme speeds XDP is operating at, loading a tracepoint have a measurable impact. Single core XDP REDIRECT (ethtool tuned rx-usecs 25) can do 13.7 Mpps forwarding, but loading a simple bpf_prog at the tracepoint (with a return 0) reduce perf to 10.2 Mpps (CPU E5-1650 v4 @ 3.60GHz, driver: ixgbe) The overhead of loading a bpf-based tracepoint can be calculated to cost 25 nanosec ((1/13782002-1/10267937)*10^9 = -24.83 ns). Using perf record on the tracepoint event, with a non-matching --filter expression, the overhead is much larger. Performance drops to 8.3 Mpps, cost 48 nanosec ((1/13782002-1/8312497)*10^9 = -47.74)) Having a separate tracepoint for err cases, which should be less frequent, allow running a continuous monitor for errors while not affecting the redirect forward performance (this have also been verified by measurements). Signed-off-by: Jesper Dangaard Brouer Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 89eba564e199..1eebad55ebd4 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -94,11 +94,25 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect, TP_ARGS(dev, xdp, to_ifindex, err, map, map_index) ); -#define _trace_xdp_redirect(dev, xdp, to, err) \ - trace_xdp_redirect(dev, xdp, to, err, NULL, 0); +DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index) +); + +#define _trace_xdp_redirect(dev, xdp, to) \ + trace_xdp_redirect(dev, xdp, to, 0, NULL, 0); + +#define _trace_xdp_redirect_err(dev, xdp, to, err) \ + trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0); + +#define trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ + trace_xdp_redirect(dev, xdp, fwd ? fwd->ifindex : 0, 0, map, idx); -#define trace_xdp_redirect_map(dev, xdp, fwd, err, map, idx) \ - trace_xdp_redirect(dev, xdp, fwd ? fwd->ifindex : 0, err, map, idx); +#define trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ + trace_xdp_redirect_err(dev, xdp, fwd ? fwd->ifindex : 0, err, map, idx); #endif /* _TRACE_XDP_H */ -- cgit v1.2.3 From 59a308967589f5b3f1f42793ab49bc2e18069769 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 29 Aug 2017 16:38:01 +0200 Subject: xdp: separate xdp_redirect tracepoint in map case Creating as specific xdp_redirect_map variant of the xdp tracepoints allow users to write simpler/faster BPF progs that get attached to these tracepoints. Goal is to still keep the tracepoints in xdp_redirect and xdp_redirect_map similar enough, that a tool can read the top part of the TP_STRUCT and produce similar monitor statistics. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 1eebad55ebd4..862575ac8da9 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -77,13 +77,11 @@ DECLARE_EVENT_CLASS(xdp_redirect_template, __entry->map_index = map_index; ), - TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" - " map_id=%d map_index=%d", + TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d", __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex, __entry->to_ifindex, - __entry->err, - __entry->map_id, __entry->map_index) + __entry->err) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect, @@ -108,11 +106,43 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, #define _trace_xdp_redirect_err(dev, xdp, to, err) \ trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0); -#define trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ - trace_xdp_redirect(dev, xdp, fwd ? fwd->ifindex : 0, 0, map, idx); +DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), + TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" + " map_id=%d map_index=%d", + __entry->prog_id, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->ifindex, __entry->to_ifindex, + __entry->err, + __entry->map_id, __entry->map_index) +); + +DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), + TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" + " map_id=%d map_index=%d", + __entry->prog_id, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->ifindex, __entry->to_ifindex, + __entry->err, + __entry->map_id, __entry->map_index) +); + +#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ + trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0, \ + 0, map, idx); -#define trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ - trace_xdp_redirect_err(dev, xdp, fwd ? fwd->ifindex : 0, err, map, idx); +#define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ + trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0, \ + err, map, idx); #endif /* _TRACE_XDP_H */ -- cgit v1.2.3 From b74fd306ef2d48781e807dd506cca818bb83d749 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 29 Aug 2017 13:16:57 -0700 Subject: bridge: fdb add and delete tracepoints A few useful tracepoints to trace bridge forwarding database updates. Signed-off-by: Roopa Prabhu Reviewed-by: Florian Fainelli Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/trace/events/bridge.h | 98 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 include/trace/events/bridge.h (limited to 'include') diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h new file mode 100644 index 000000000000..0f1cde00af11 --- /dev/null +++ b/include/trace/events/bridge.h @@ -0,0 +1,98 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bridge + +#if !defined(_TRACE_BRIDGE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BRIDGE_H + +#include +#include + +#include "../../../net/bridge/br_private.h" + +TRACE_EVENT(br_fdb_add, + + TP_PROTO(struct ndmsg *ndm, struct net_device *dev, + const unsigned char *addr, u16 vid, u16 nlh_flags), + + TP_ARGS(ndm, dev, addr, vid, nlh_flags), + + TP_STRUCT__entry( + __field(u8, ndm_flags) + __string(dev, dev->name) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __field(u16, nlh_flags) + ), + + TP_fast_assign( + __assign_str(dev, dev->name); + memcpy(__entry->addr, addr, ETH_ALEN); + __entry->vid = vid; + __entry->nlh_flags = nlh_flags; + __entry->ndm_flags = ndm->ndm_flags; + ), + + TP_printk("dev %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u nlh_flags %04x ndm_flags %02x", + __get_str(dev), __entry->addr[0], __entry->addr[1], + __entry->addr[2], __entry->addr[3], __entry->addr[4], + __entry->addr[5], __entry->vid, + __entry->nlh_flags, __entry->ndm_flags) +); + +TRACE_EVENT(br_fdb_external_learn_add, + + TP_PROTO(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid), + + TP_ARGS(br, p, addr, vid), + + TP_STRUCT__entry( + __string(br_dev, br->dev->name) + __string(dev, p ? p->dev->name : "null") + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + ), + + TP_fast_assign( + __assign_str(br_dev, br->dev->name); + __assign_str(dev, p ? p->dev->name : "null"); + memcpy(__entry->addr, addr, ETH_ALEN); + __entry->vid = vid; + ), + + TP_printk("br_dev %s port %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u", + __get_str(br_dev), __get_str(dev), __entry->addr[0], + __entry->addr[1], __entry->addr[2], __entry->addr[3], + __entry->addr[4], __entry->addr[5], __entry->vid) +); + +TRACE_EVENT(fdb_delete, + + TP_PROTO(struct net_bridge *br, struct net_bridge_fdb_entry *f), + + TP_ARGS(br, f), + + TP_STRUCT__entry( + __string(br_dev, br->dev->name) + __string(dev, f->dst ? f->dst->dev->name : "null") + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + ), + + TP_fast_assign( + __assign_str(br_dev, br->dev->name); + __assign_str(dev, f->dst ? f->dst->dev->name : "null"); + memcpy(__entry->addr, f->addr.addr, ETH_ALEN); + __entry->vid = f->vlan_id; + ), + + TP_printk("br_dev %s dev %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u", + __get_str(br_dev), __get_str(dev), __entry->addr[0], + __entry->addr[1], __entry->addr[2], __entry->addr[3], + __entry->addr[4], __entry->addr[5], __entry->vid) +); + +#endif /* _TRACE_BRIDGE_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From c73c8b1e47ca275388f774fd36560ab7e994e99e Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 28 Aug 2017 16:38:20 +0300 Subject: net/mlx4_core: Dynamically allocate structs at mlx4_slave_cap In order to avoid temporary large structs on the stack, allocate them dynamically. Signed-off-by: Eran Ben Elisha Signed-off-by: Tal Alon Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b54517c05e9a..9844606f9491 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -518,6 +518,14 @@ struct mlx4_phys_caps { u32 base_tunnel_sqpn; }; +struct mlx4_spec_qps { + u32 qp0_qkey; + u32 qp0_proxy; + u32 qp0_tunnel; + u32 qp1_proxy; + u32 qp1_tunnel; +}; + struct mlx4_caps { u64 fw_ver; u32 function; @@ -547,11 +555,7 @@ struct mlx4_caps { int max_qp_init_rdma; int max_qp_dest_rdma; int max_tc_eth; - u32 *qp0_qkey; - u32 *qp0_proxy; - u32 *qp1_proxy; - u32 *qp0_tunnel; - u32 *qp1_tunnel; + struct mlx4_spec_qps *spec_qps; int num_srqs; int max_srq_wqes; int max_srq_sge; -- cgit v1.2.3 From be59960395f86991c6599c41d8c421fe4bf7a210 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 28 Aug 2017 16:38:23 +0300 Subject: net/mlx4: Add user mac FW update support Adding support for updating the FW on new port mac, when port mac change is requested by the user. This info is required by the FW as OEM management tools require this info directly from the NIC FW. Check device capability bit to verify the FW supports user mac. If the FW does support it, use set_port command to notify the FW on the new mac. The feature is relevant only to PF port mac. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9844606f9491..89c0e7f7cd9b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -224,6 +224,7 @@ enum { MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35, MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP = 1ULL << 36, MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 37, + MLX4_DEV_CAP_FLAG2_USER_MAC_EN = 1ULL << 38, }; enum { @@ -1377,6 +1378,7 @@ int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port); int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); +int mlx4_SET_PORT_user_mac(struct mlx4_dev *dev, u8 port, u8 *user_mac); int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); -- cgit v1.2.3 From 2804fd3af6ba5ae5737705b27146455eabe2e2f8 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 28 Aug 2017 15:03:13 -0400 Subject: if_ether: add forces ife lfb type This patch adds the forces IFE lfb type according to IEEE registered ethertypes. See http://standards-oui.ieee.org/ethertype/eth.txt for more information. Since there exists the IFE subsystem it can be used there. This patch also use the correct word "ForCES" instead of "FoRCES" which is a spelling error inside the IEEE ethertype specification. Signed-off-by: Alexander Aring Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index efeb1190c2ca..f68f6bf4a253 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -104,6 +104,7 @@ #define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */ #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value -- cgit v1.2.3 From 155e6f649757c902901e599c268f8b575ddac1f8 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Mon, 28 Aug 2017 21:43:21 +0200 Subject: ether: add NSH ethertype The NSH draft says: An IEEE EtherType, 0x894F, has been allocated for NSH. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index f68f6bf4a253..61f7ccce5b69 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -99,6 +99,7 @@ #define ETH_P_FIP 0x8914 /* FCoE Initialization Protocol */ #define ETH_P_80221 0x8917 /* IEEE 802.21 Media Independent Handover Protocol */ #define ETH_P_HSR 0x892F /* IEC 62439-3 HSRv1 */ +#define ETH_P_NSH 0x894F /* Network Service Header */ #define ETH_P_LOOPBACK 0x9000 /* Ethernet loopback packet, per IEEE 802.3 */ #define ETH_P_QINQ1 0x9100 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ -- cgit v1.2.3 From fa20e0e32cb3dfc1760b6254b64977f2fb5bd851 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Mon, 28 Aug 2017 21:43:22 +0200 Subject: vxlan: factor out VXLAN-GPE next protocol The values are shared between VXLAN-GPE and NSH. Originally probably by coincidence but I notified both working groups about this last year and they seem to keep the values in sync since then. Hopefully they'll get a single IANA registry for the values, too. (I asked them for that.) Factor out the code to be shared by the NSH implementation. NSH and MPLS values are added in this patch, too. For MPLS, the drafts incorrectly assign only a single value, while we have two MPLS ethertypes. I raised the problem with both groups. For now, I assume the value is for unicast. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/tun_proto.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ include/net/vxlan.h | 6 ------ 2 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 include/net/tun_proto.h (limited to 'include') diff --git a/include/net/tun_proto.h b/include/net/tun_proto.h new file mode 100644 index 000000000000..2ea3deba4c99 --- /dev/null +++ b/include/net/tun_proto.h @@ -0,0 +1,49 @@ +#ifndef __NET_TUN_PROTO_H +#define __NET_TUN_PROTO_H + +#include + +/* One byte protocol values as defined by VXLAN-GPE and NSH. These will + * hopefully get a shared IANA registry. + */ +#define TUN_P_IPV4 0x01 +#define TUN_P_IPV6 0x02 +#define TUN_P_ETHERNET 0x03 +#define TUN_P_NSH 0x04 +#define TUN_P_MPLS_UC 0x05 + +static inline __be16 tun_p_to_eth_p(u8 proto) +{ + switch (proto) { + case TUN_P_IPV4: + return htons(ETH_P_IP); + case TUN_P_IPV6: + return htons(ETH_P_IPV6); + case TUN_P_ETHERNET: + return htons(ETH_P_TEB); + case TUN_P_NSH: + return htons(ETH_P_NSH); + case TUN_P_MPLS_UC: + return htons(ETH_P_MPLS_UC); + } + return 0; +} + +static inline u8 tun_p_from_eth_p(__be16 proto) +{ + switch (proto) { + case htons(ETH_P_IP): + return TUN_P_IPV4; + case htons(ETH_P_IPV6): + return TUN_P_IPV6; + case htons(ETH_P_TEB): + return TUN_P_ETHERNET; + case htons(ETH_P_NSH): + return TUN_P_NSH; + case htons(ETH_P_MPLS_UC): + return TUN_P_MPLS_UC; + } + return 0; +} + +#endif diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 3f430e38ab82..4e3876dde295 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -168,12 +168,6 @@ reserved_flags2:2; #define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \ cpu_to_be32(0xff)) -/* VXLAN-GPE header Next Protocol. */ -#define VXLAN_GPE_NP_IPV4 0x01 -#define VXLAN_GPE_NP_IPV6 0x02 -#define VXLAN_GPE_NP_ETHERNET 0x03 -#define VXLAN_GPE_NP_NSH 0x04 - struct vxlan_metadata { u32 gbp; }; -- cgit v1.2.3 From 1f0b7744c50573df464ca33d8e5275be509f852b Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Mon, 28 Aug 2017 21:43:23 +0200 Subject: net: add NSH header structures and helpers NSH (Network Service Header)[1] is a new protocol for service function chaining, it can be handled as a L3 protocol like IPv4 and IPv6, Eth + NSH + Inner packet or VxLAN-gpe + NSH + Inner packet are two typical use cases. This patch adds NSH header structures and helpers for NSH GSO support and Open vSwitch NSH support. [1] https://datatracker.ietf.org/doc/draft-ietf-sfc-nsh/ [Jiri: added nsh_hdr() helper and renamed the header struct to "struct nshhdr" to match the usual pattern. Removed packet type defines, these are now shared with VXLAN-GPE.] Signed-off-by: Yi Yang Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/nsh.h | 307 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 307 insertions(+) create mode 100644 include/net/nsh.h (limited to 'include') diff --git a/include/net/nsh.h b/include/net/nsh.h new file mode 100644 index 000000000000..a1eaea20be96 --- /dev/null +++ b/include/net/nsh.h @@ -0,0 +1,307 @@ +#ifndef __NET_NSH_H +#define __NET_NSH_H 1 + +#include + +/* + * Network Service Header: + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Ver|O|U| TTL | Length |U|U|U|U|MD Type| Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Service Path Identifier (SPI) | Service Index | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * ~ Mandatory/Optional Context Headers ~ + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Version: The version field is used to ensure backward compatibility + * going forward with future NSH specification updates. It MUST be set + * to 0x0 by the sender, in this first revision of NSH. Given the + * widespread implementation of existing hardware that uses the first + * nibble after an MPLS label stack for ECMP decision processing, this + * document reserves version 01b and this value MUST NOT be used in + * future versions of the protocol. Please see [RFC7325] for further + * discussion of MPLS-related forwarding requirements. + * + * O bit: Setting this bit indicates an Operations, Administration, and + * Maintenance (OAM) packet. The actual format and processing of SFC + * OAM packets is outside the scope of this specification (see for + * example [I-D.ietf-sfc-oam-framework] for one approach). + * + * The O bit MUST be set for OAM packets and MUST NOT be set for non-OAM + * packets. The O bit MUST NOT be modified along the SFP. + * + * SF/SFF/SFC Proxy/Classifier implementations that do not support SFC + * OAM procedures SHOULD discard packets with O bit set, but MAY support + * a configurable parameter to enable forwarding received SFC OAM + * packets unmodified to the next element in the chain. Forwarding OAM + * packets unmodified by SFC elements that do not support SFC OAM + * procedures may be acceptable for a subset of OAM functions, but can + * result in unexpected outcomes for others, thus it is recommended to + * analyze the impact of forwarding an OAM packet for all OAM functions + * prior to enabling this behavior. The configurable parameter MUST be + * disabled by default. + * + * TTL: Indicates the maximum SFF hops for an SFP. This field is used + * for service plane loop detection. The initial TTL value SHOULD be + * configurable via the control plane; the configured initial value can + * be specific to one or more SFPs. If no initial value is explicitly + * provided, the default initial TTL value of 63 MUST be used. Each SFF + * involved in forwarding an NSH packet MUST decrement the TTL value by + * 1 prior to NSH forwarding lookup. Decrementing by 1 from an incoming + * value of 0 shall result in a TTL value of 63. The packet MUST NOT be + * forwarded if TTL is, after decrement, 0. + * + * All other flag fields, marked U, are unassigned and available for + * future use, see Section 11.2.1. Unassigned bits MUST be set to zero + * upon origination, and MUST be ignored and preserved unmodified by + * other NSH supporting elements. Elements which do not understand the + * meaning of any of these bits MUST NOT modify their actions based on + * those unknown bits. + * + * Length: The total length, in 4-byte words, of NSH including the Base + * Header, the Service Path Header, the Fixed Length Context Header or + * Variable Length Context Header(s). The length MUST be 0x6 for MD + * Type equal to 0x1, and MUST be 0x2 or greater for MD Type equal to + * 0x2. The length of the NSH header MUST be an integer multiple of 4 + * bytes, thus variable length metadata is always padded out to a + * multiple of 4 bytes. + * + * MD Type: Indicates the format of NSH beyond the mandatory Base Header + * and the Service Path Header. MD Type defines the format of the + * metadata being carried. + * + * 0x0 - This is a reserved value. Implementations SHOULD silently + * discard packets with MD Type 0x0. + * + * 0x1 - This indicates that the format of the header includes a fixed + * length Context Header (see Figure 4 below). + * + * 0x2 - This does not mandate any headers beyond the Base Header and + * Service Path Header, but may contain optional variable length Context + * Header(s). The semantics of the variable length Context Header(s) + * are not defined in this document. The format of the optional + * variable length Context Headers is provided in Section 2.5.1. + * + * 0xF - This value is reserved for experimentation and testing, as per + * [RFC3692]. Implementations not explicitly configured to be part of + * an experiment SHOULD silently discard packets with MD Type 0xF. + * + * Next Protocol: indicates the protocol type of the encapsulated data. + * NSH does not alter the inner payload, and the semantics on the inner + * protocol remain unchanged due to NSH service function chaining. + * Please see the IANA Considerations section below, Section 11.2.5. + * + * This document defines the following Next Protocol values: + * + * 0x1: IPv4 + * 0x2: IPv6 + * 0x3: Ethernet + * 0x4: NSH + * 0x5: MPLS + * 0xFE: Experiment 1 + * 0xFF: Experiment 2 + * + * Packets with Next Protocol values not supported SHOULD be silently + * dropped by default, although an implementation MAY provide a + * configuration parameter to forward them. Additionally, an + * implementation not explicitly configured for a specific experiment + * [RFC3692] SHOULD silently drop packets with Next Protocol values 0xFE + * and 0xFF. + * + * Service Path Identifier (SPI): Identifies a service path. + * Participating nodes MUST use this identifier for Service Function + * Path selection. The initial classifier MUST set the appropriate SPI + * for a given classification result. + * + * Service Index (SI): Provides location within the SFP. The initial + * classifier for a given SFP SHOULD set the SI to 255, however the + * control plane MAY configure the initial value of SI as appropriate + * (i.e., taking into account the length of the service function path). + * The Service Index MUST be decremented by a value of 1 by Service + * Functions or by SFC Proxy nodes after performing required services + * and the new decremented SI value MUST be used in the egress packet's + * NSH. The initial Classifier MUST send the packet to the first SFF in + * the identified SFP for forwarding along an SFP. If re-classification + * occurs, and that re-classification results in a new SPI, the + * (re)classifier is, in effect, the initial classifier for the + * resultant SPI. + * + * The SI is used in conjunction the with Service Path Identifier for + * Service Function Path Selection and for determining the next SFF/SF + * in the path. The SI is also valuable when troubleshooting or + * reporting service paths. Additionally, while the TTL field is the + * main mechanism for service plane loop detection, the SI can also be + * used for detecting service plane loops. + * + * When the Base Header specifies MD Type = 0x1, a Fixed Length Context + * Header (16-bytes) MUST be present immediately following the Service + * Path Header. The value of a Fixed Length Context + * Header that carries no metadata MUST be set to zero. + * + * When the base header specifies MD Type = 0x2, zero or more Variable + * Length Context Headers MAY be added, immediately following the + * Service Path Header (see Figure 5). Therefore, Length = 0x2, + * indicates that only the Base Header followed by the Service Path + * Header are present. The optional Variable Length Context Headers + * MUST be of an integer number of 4-bytes. The base header Length + * field MUST be used to determine the offset to locate the original + * packet or frame for SFC nodes that require access to that + * information. + * + * The format of the optional variable length Context Headers + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Metadata Class | Type |U| Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Variable Metadata | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Metadata Class (MD Class): Defines the scope of the 'Type' field to + * provide a hierarchical namespace. The IANA Considerations + * Section 11.2.4 defines how the MD Class values can be allocated to + * standards bodies, vendors, and others. + * + * Type: Indicates the explicit type of metadata being carried. The + * definition of the Type is the responsibility of the MD Class owner. + * + * Unassigned bit: One unassigned bit is available for future use. This + * bit MUST NOT be set, and MUST be ignored on receipt. + * + * Length: Indicates the length of the variable metadata, in bytes. In + * case the metadata length is not an integer number of 4-byte words, + * the sender MUST add pad bytes immediately following the last metadata + * byte to extend the metadata to an integer number of 4-byte words. + * The receiver MUST round up the length field to the nearest 4-byte + * word boundary, to locate and process the next field in the packet. + * The receiver MUST access only those bytes in the metadata indicated + * by the length field (i.e., actual number of bytes) and MUST ignore + * the remaining bytes up to the nearest 4-byte word boundary. The + * Length may be 0 or greater. + * + * A value of 0 denotes a Context Header without a Variable Metadata + * field. + * + * [0] https://datatracker.ietf.org/doc/draft-ietf-sfc-nsh/ + */ + +/** + * struct nsh_md1_ctx - Keeps track of NSH context data + * @nshc<1-4>: NSH Contexts. + */ +struct nsh_md1_ctx { + __be32 context[4]; +}; + +struct nsh_md2_tlv { + __be16 md_class; + u8 type; + u8 length; + u8 md_value[]; +}; + +struct nshhdr { + __be16 ver_flags_ttl_len; + u8 mdtype; + u8 np; + __be32 path_hdr; + union { + struct nsh_md1_ctx md1; + struct nsh_md2_tlv md2; + }; +}; + +/* Masking NSH header fields. */ +#define NSH_VER_MASK 0xc000 +#define NSH_VER_SHIFT 14 +#define NSH_FLAGS_MASK 0x3000 +#define NSH_FLAGS_SHIFT 12 +#define NSH_TTL_MASK 0x0fc0 +#define NSH_TTL_SHIFT 6 +#define NSH_LEN_MASK 0x003f +#define NSH_LEN_SHIFT 0 + +#define NSH_MDTYPE_MASK 0x0f +#define NSH_MDTYPE_SHIFT 0 + +#define NSH_SPI_MASK 0xffffff00 +#define NSH_SPI_SHIFT 8 +#define NSH_SI_MASK 0x000000ff +#define NSH_SI_SHIFT 0 + +/* MD Type Registry. */ +#define NSH_M_TYPE1 0x01 +#define NSH_M_TYPE2 0x02 +#define NSH_M_EXP1 0xFE +#define NSH_M_EXP2 0xFF + +/* NSH Base Header Length */ +#define NSH_BASE_HDR_LEN 8 + +/* NSH MD Type 1 header Length. */ +#define NSH_M_TYPE1_LEN 24 + +/* NSH header maximum Length. */ +#define NSH_HDR_MAX_LEN 256 + +/* NSH context headers maximum Length. */ +#define NSH_CTX_HDRS_MAX_LEN 248 + +static inline struct nshhdr *nsh_hdr(struct sk_buff *skb) +{ + return (struct nshhdr *)skb_network_header(skb); +} + +static inline u16 nsh_hdr_len(const struct nshhdr *nsh) +{ + return ((ntohs(nsh->ver_flags_ttl_len) & NSH_LEN_MASK) + >> NSH_LEN_SHIFT) << 2; +} + +static inline u8 nsh_get_ver(const struct nshhdr *nsh) +{ + return (ntohs(nsh->ver_flags_ttl_len) & NSH_VER_MASK) + >> NSH_VER_SHIFT; +} + +static inline u8 nsh_get_flags(const struct nshhdr *nsh) +{ + return (ntohs(nsh->ver_flags_ttl_len) & NSH_FLAGS_MASK) + >> NSH_FLAGS_SHIFT; +} + +static inline u8 nsh_get_ttl(const struct nshhdr *nsh) +{ + return (ntohs(nsh->ver_flags_ttl_len) & NSH_TTL_MASK) + >> NSH_TTL_SHIFT; +} + +static inline void __nsh_set_xflag(struct nshhdr *nsh, u16 xflag, u16 xmask) +{ + nsh->ver_flags_ttl_len + = (nsh->ver_flags_ttl_len & ~htons(xmask)) | htons(xflag); +} + +static inline void nsh_set_flags_and_ttl(struct nshhdr *nsh, u8 flags, u8 ttl) +{ + __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) | + ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK), + NSH_FLAGS_MASK | NSH_TTL_MASK); +} + +static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags, + u8 ttl, u8 len) +{ + len = len >> 2; + __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) | + ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK) | + ((len << NSH_LEN_SHIFT) & NSH_LEN_MASK), + NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK); +} + +#endif /* __NET_NSH_H */ -- cgit v1.2.3 From 1b70d792cf6775fb5d0737524387893daeb5374a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 28 Aug 2017 13:53:34 -0700 Subject: ipv6: Use rt6i_idev index for echo replies to a local address Tariq repored local pings to linklocal address is failing: $ ifconfig ens8 ens8: flags=4163 mtu 1500 inet 11.141.16.6 netmask 255.255.0.0 broadcast 11.141.255.255 inet6 fe80::7efe:90ff:fecb:7502 prefixlen 64 scopeid 0x20 ether 7c:fe:90:cb:75:02 txqueuelen 1000 (Ethernet) RX packets 12 bytes 1164 (1.1 KiB) RX errors 0 dropped 0 overruns 0 frame 0 TX packets 30 bytes 2484 (2.4 KiB) TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0 $ /bin/ping6 -c 3 fe80::7efe:90ff:fecb:7502%ens8 PING fe80::7efe:90ff:fecb:7502%ens8(fe80::7efe:90ff:fecb:7502) 56 data bytes Signed-off-by: David S. Miller --- include/net/ip6_route.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 882bc3c7ccde..ee96f402cb75 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -164,6 +164,16 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); +static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) +{ + const struct dst_entry *dst = skb_dst(skb); + const struct rt6_info *rt6 = NULL; + + if (dst) + rt6 = container_of(dst, struct rt6_info, dst); + + return rt6; +} /* * Store a destination cache entry in a socket -- cgit v1.2.3 From 0dd5759dbb1c9a862e7d90c09d6cf398c45f1100 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 29 Aug 2017 13:17:51 -0700 Subject: net: remove dmaengine.h inclusion from netdevice.h Since the removal of NET_DMA, dmaengine.h header file shouldn't be needed by netdevice.h anymore. Signed-off-by: Dave Jiang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c5475b37a631..29bf06ff157c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -35,7 +35,6 @@ #include #include -#include #include #include -- cgit v1.2.3 From eaa72dc47488d599439cd0fd0f8c4f1bcb3906bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Aug 2017 15:16:01 -0700 Subject: neigh: increase queue_len_bytes to match wmem_default Florian reported UDP xmit drops that could be root caused to the too small neigh limit. Current limit is 64 KB, meaning that even a single UDP socket would hit it, since its default sk_sndbuf comes from net.core.wmem_default (~212992 bytes on 64bit arches). Once ARP/ND resolution is in progress, we should allow a little more packets to be queued, at least for one producer. Once neigh arp_queue is filled, a rogue socket should hit its sk_sndbuf limit and either block in sendmsg() or return -EAGAIN. Signed-off-by: Eric Dumazet Reported-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 1c2912d433e8..03a362568357 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2368,6 +2368,16 @@ bool sk_net_capable(const struct sock *sk, int cap); void sk_get_meminfo(const struct sock *sk, u32 *meminfo); +/* Take into consideration the size of the struct sk_buff overhead in the + * determination of these values, since that is non-constant across + * platforms. This makes socket queueing behavior and performance + * not depend upon such differences. + */ +#define _SK_MEM_PACKETS 256 +#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) +#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) +#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) + extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; -- cgit v1.2.3 From c1d2b4c3e204e602c97680335d082b8d012d08cd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 30 Aug 2017 19:24:57 +0200 Subject: tcp: Revert "tcp: remove CA_ACK_SLOWPATH" This change was a followup to the header prediction removal, so first revert this as a prerequisite to back out hp removal. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index c614ff135b66..c546d13ffbca 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -910,8 +910,9 @@ enum tcp_ca_event { /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ enum tcp_ca_ack_event_flags { - CA_ACK_WIN_UPDATE = (1 << 0), /* ACK updated window */ - CA_ACK_ECE = (1 << 1), /* ECE bit is set on ack */ + CA_ACK_SLOWPATH = (1 << 0), /* In slow path processing */ + CA_ACK_WIN_UPDATE = (1 << 1), /* ACK updated window */ + CA_ACK_ECE = (1 << 2), /* ECE bit is set on ack */ }; /* -- cgit v1.2.3 From 31770e34e43d6c8dee129bfee77e56c34e61f0e5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 30 Aug 2017 19:24:58 +0200 Subject: tcp: Revert "tcp: remove header prediction" This reverts commit 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78. Eric Dumazet says: We found at Google a significant regression caused by 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78 tcp: remove header prediction In typical RPC (TCP_RR), when a TCP socket receives data, we now call tcp_ack() while we used to not call it. This touches enough cache lines to cause a slowdown. so problem does not seem to be HP removal itself but the tcp_ack() call. Therefore, it might be possible to remove HP after all, provided one finds a way to elide tcp_ack for most cases. Reported-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++++ include/net/tcp.h | 23 +++++++++++++++++++++++ include/uapi/linux/snmp.h | 2 ++ 3 files changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 267164a1d559..4aa40ef02d32 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -147,6 +147,12 @@ struct tcp_sock { u16 tcp_header_len; /* Bytes of tcp header to send */ u16 gso_segs; /* Max number of segs per GSO packet */ +/* + * Header prediction flags + * 0x5?10 << 16 + snd_wnd in net byte order + */ + __be32 pred_flags; + /* * RFC793 variables by their proper names. This means you can * read the code and the spec side by side (and laugh ...) diff --git a/include/net/tcp.h b/include/net/tcp.h index c546d13ffbca..9c3db054e47f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -634,6 +634,29 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } +static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) +{ + tp->pred_flags = htonl((tp->tcp_header_len << 26) | + ntohl(TCP_FLAG_ACK) | + snd_wnd); +} + +static inline void tcp_fast_path_on(struct tcp_sock *tp) +{ + __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); +} + +static inline void tcp_fast_path_check(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && + tp->rcv_wnd && + atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && + !tp->urg_data) + tcp_fast_path_on(tp); +} + /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(struct sock *sk) { diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index b3f346fb9fe3..758f12b58541 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -184,7 +184,9 @@ enum LINUX_MIB_DELAYEDACKLOST, /* DelayedACKLost */ LINUX_MIB_LISTENOVERFLOWS, /* ListenOverflows */ LINUX_MIB_LISTENDROPS, /* ListenDrops */ + LINUX_MIB_TCPHPHITS, /* TCPHPHits */ LINUX_MIB_TCPPUREACKS, /* TCPPureAcks */ + LINUX_MIB_TCPHPACKS, /* TCPHPAcks */ LINUX_MIB_TCPRENORECOVERY, /* TCPRenoRecovery */ LINUX_MIB_TCPSACKRECOVERY, /* TCPSackRecovery */ LINUX_MIB_TCPSACKRENEGING, /* TCPSACKReneging */ -- cgit v1.2.3 From 7373ae7e8f0bf2c0718422481da986db5058b005 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 29 Aug 2017 22:44:16 -0600 Subject: net: ether: Add support for multiplexing and aggregation type Define the Qualcomm multiplexing and aggregation (MAP) ether type 0x00F9. This is needed for receiving data in the MAP protocol like RMNET. This is not an officially registered ID. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 61f7ccce5b69..9037065e23d0 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -140,6 +140,9 @@ #define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */ #define ETH_P_CAIF 0x00F7 /* ST-Ericsson CAIF protocol */ #define ETH_P_XDSA 0x00F8 /* Multiplexed DSA protocol */ +#define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and + * aggregation protocol + */ /* * This is an Ethernet frame header. -- cgit v1.2.3 From cdf4969c42a6c1a376dd03a9e846cf638d3cd4b1 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 29 Aug 2017 22:44:17 -0600 Subject: net: arp: Add support for raw IP device Define the raw IP type. This is needed for raw IP net devices like rmnet. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- include/uapi/linux/if_arp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index cf73510b9238..a2a635620600 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -59,6 +59,7 @@ #define ARPHRD_LAPB 516 /* LAPB */ #define ARPHRD_DDCMP 517 /* Digital's DDCMP protocol */ #define ARPHRD_RAWHDLC 518 /* Raw HDLC */ +#define ARPHRD_RAWIP 519 /* Raw IP */ #define ARPHRD_TUNNEL 768 /* IPIP tunnel */ #define ARPHRD_TUNNEL6 769 /* IP6IP6 tunnel */ -- cgit v1.2.3 From 388f79fda74fd3d8700ed5d899573ec58c2e0253 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 30 Aug 2017 02:31:57 -0400 Subject: idr: Add new APIs to support unsigned long The following new APIs are added: int idr_alloc_ext(struct idr *idr, void *ptr, unsigned long *index, unsigned long start, unsigned long end, gfp_t gfp); void *idr_remove_ext(struct idr *idr, unsigned long id); void *idr_find_ext(const struct idr *idr, unsigned long id); void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id); void *idr_get_next_ext(struct idr *idr, unsigned long *nextid); Signed-off-by: Chris Mi Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/idr.h | 69 ++++++++++++++++++++++++++++++++++++++++++++-- include/linux/radix-tree.h | 21 ++++++++++++-- 2 files changed, 85 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index bf70b3ef0a07..7c3a365f7e12 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -80,19 +80,75 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val) */ void idr_preload(gfp_t gfp_mask); -int idr_alloc(struct idr *, void *entry, int start, int end, gfp_t); + +int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index, + unsigned long start, unsigned long end, gfp_t gfp, + bool ext); + +/** + * idr_alloc - allocate an id + * @idr: idr handle + * @ptr: pointer to be associated with the new id + * @start: the minimum id (inclusive) + * @end: the maximum id (exclusive) + * @gfp: memory allocation flags + * + * Allocates an unused ID in the range [start, end). Returns -ENOSPC + * if there are no unused IDs in that range. + * + * Note that @end is treated as max when <= 0. This is to always allow + * using @start + N as @end as long as N is inside integer range. + * + * Simultaneous modifications to the @idr are not allowed and should be + * prevented by the user, usually with a lock. idr_alloc() may be called + * concurrently with read-only accesses to the @idr, such as idr_find() and + * idr_for_each_entry(). + */ +static inline int idr_alloc(struct idr *idr, void *ptr, + int start, int end, gfp_t gfp) +{ + unsigned long id; + int ret; + + if (WARN_ON_ONCE(start < 0)) + return -EINVAL; + + ret = idr_alloc_cmn(idr, ptr, &id, start, end, gfp, false); + + if (ret) + return ret; + + return id; +} + +static inline int idr_alloc_ext(struct idr *idr, void *ptr, + unsigned long *index, + unsigned long start, + unsigned long end, + gfp_t gfp) +{ + return idr_alloc_cmn(idr, ptr, index, start, end, gfp, true); +} + int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t); int idr_for_each(const struct idr *, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *, int *nextid); +void *idr_get_next_ext(struct idr *idr, unsigned long *nextid); void *idr_replace(struct idr *, void *, int id); +void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id); void idr_destroy(struct idr *); -static inline void *idr_remove(struct idr *idr, int id) +static inline void *idr_remove_ext(struct idr *idr, unsigned long id) { return radix_tree_delete_item(&idr->idr_rt, id, NULL); } +static inline void *idr_remove(struct idr *idr, int id) +{ + return idr_remove_ext(idr, id); +} + static inline void idr_init(struct idr *idr) { INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER); @@ -128,11 +184,16 @@ static inline void idr_preload_end(void) * This function can be called under rcu_read_lock(), given that the leaf * pointers lifetimes are correctly managed. */ -static inline void *idr_find(const struct idr *idr, int id) +static inline void *idr_find_ext(const struct idr *idr, unsigned long id) { return radix_tree_lookup(&idr->idr_rt, id); } +static inline void *idr_find(const struct idr *idr, int id) +{ + return idr_find_ext(idr, id); +} + /** * idr_for_each_entry - iterate over an idr's elements of a given type * @idr: idr handle @@ -145,6 +206,8 @@ static inline void *idr_find(const struct idr *idr, int id) */ #define idr_for_each_entry(idr, entry, id) \ for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id) +#define idr_for_each_entry_ext(idr, entry, id) \ + for (id = 0; ((entry) = idr_get_next_ext(idr, &(id))) != NULL; ++id) /** * idr_for_each_entry_continue - continue iteration over an idr's elements of a given type diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 3e5735064b71..567ebb5eaab0 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -357,8 +357,25 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index, unsigned new_order); int radix_tree_join(struct radix_tree_root *, unsigned long index, unsigned new_order, void *); -void __rcu **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *, - gfp_t, int end); + +void __rcu **idr_get_free_cmn(struct radix_tree_root *root, + struct radix_tree_iter *iter, gfp_t gfp, + unsigned long max); +static inline void __rcu **idr_get_free(struct radix_tree_root *root, + struct radix_tree_iter *iter, + gfp_t gfp, + int end) +{ + return idr_get_free_cmn(root, iter, gfp, end > 0 ? end - 1 : INT_MAX); +} + +static inline void __rcu **idr_get_free_ext(struct radix_tree_root *root, + struct radix_tree_iter *iter, + gfp_t gfp, + unsigned long end) +{ + return idr_get_free_cmn(root, iter, gfp, end - 1); +} enum { RADIX_TREE_ITER_TAG_MASK = 0x0f, /* tag index in lower nybble */ -- cgit v1.2.3 From 65a206c01e8e7ffe971477a36419422099216eff Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 30 Aug 2017 02:31:59 -0400 Subject: net/sched: Change act_api and act_xxx modules to use IDR Typically, each TC filter has its own action. All the actions of the same type are saved in its hash table. But the hash buckets are too small that it degrades to a list. And the performance is greatly affected. For example, it takes about 0m11.914s to insert 64K rules. If we convert the hash table to IDR, it only takes about 0m1.500s. The improvement is huge. But please note that the test result is based on previous patch that cls_flower uses IDR. Signed-off-by: Chris Mi Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 76 ++++++++++++++++----------------------------------- 1 file changed, 24 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 26ffd8333f50..8f3d5d8b5ae0 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -10,12 +10,9 @@ #include #include - -struct tcf_hashinfo { - struct hlist_head *htab; - unsigned int hmask; - spinlock_t lock; - u32 index; +struct tcf_idrinfo { + spinlock_t lock; + struct idr action_idr; }; struct tc_action_ops; @@ -25,9 +22,8 @@ struct tc_action { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ __u32 order; struct list_head list; - struct tcf_hashinfo *hinfo; + struct tcf_idrinfo *idrinfo; - struct hlist_node tcfa_head; u32 tcfa_index; int tcfa_refcnt; int tcfa_bindcnt; @@ -44,7 +40,6 @@ struct tc_action { struct tc_cookie *act_cookie; struct tcf_chain *goto_chain; }; -#define tcf_head common.tcfa_head #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt #define tcf_bindcnt common.tcfa_bindcnt @@ -57,27 +52,6 @@ struct tc_action { #define tcf_lock common.tcfa_lock #define tcf_rcu common.tcfa_rcu -static inline unsigned int tcf_hash(u32 index, unsigned int hmask) -{ - return index & hmask; -} - -static inline int tcf_hashinfo_init(struct tcf_hashinfo *hf, unsigned int mask) -{ - int i; - - spin_lock_init(&hf->lock); - hf->index = 0; - hf->hmask = mask; - hf->htab = kzalloc((mask + 1) * sizeof(struct hlist_head), - GFP_KERNEL); - if (!hf->htab) - return -ENOMEM; - for (i = 0; i < mask + 1; i++) - INIT_HLIST_HEAD(&hf->htab[i]); - return 0; -} - /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. */ @@ -126,53 +100,51 @@ struct tc_action_ops { }; struct tc_action_net { - struct tcf_hashinfo *hinfo; + struct tcf_idrinfo *idrinfo; const struct tc_action_ops *ops; }; static inline int tc_action_net_init(struct tc_action_net *tn, - const struct tc_action_ops *ops, unsigned int mask) + const struct tc_action_ops *ops) { int err = 0; - tn->hinfo = kmalloc(sizeof(*tn->hinfo), GFP_KERNEL); - if (!tn->hinfo) + tn->idrinfo = kmalloc(sizeof(*tn->idrinfo), GFP_KERNEL); + if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; - err = tcf_hashinfo_init(tn->hinfo, mask); - if (err) - kfree(tn->hinfo); + spin_lock_init(&tn->idrinfo->lock); + idr_init(&tn->idrinfo->action_idr); return err; } -void tcf_hashinfo_destroy(const struct tc_action_ops *ops, - struct tcf_hashinfo *hinfo); +void tcf_idrinfo_destroy(const struct tc_action_ops *ops, + struct tcf_idrinfo *idrinfo); static inline void tc_action_net_exit(struct tc_action_net *tn) { - tcf_hashinfo_destroy(tn->ops, tn->hinfo); - kfree(tn->hinfo); + tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + kfree(tn->idrinfo); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops); -int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index); -u32 tcf_hash_new_index(struct tc_action_net *tn); -bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, +int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); +bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, int bind); -int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, - struct tc_action **a, const struct tc_action_ops *ops, int bind, - bool cpustats); -void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); -void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a); +int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, + struct tc_action **a, const struct tc_action_ops *ops, + int bind, bool cpustats); +void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est); +void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); -int __tcf_hash_release(struct tc_action *a, bool bind, bool strict); +int __tcf_idr_release(struct tc_action *a, bool bind, bool strict); -static inline int tcf_hash_release(struct tc_action *a, bool bind) +static inline int tcf_idr_release(struct tc_action *a, bool bind) { - return __tcf_hash_release(a, bind, false); + return __tcf_idr_release(a, bind, false); } int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops); -- cgit v1.2.3 From 5c23f2dc8eeb5a6010cb66119d942361bc8ec833 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 30 Aug 2017 10:29:12 +0200 Subject: phy: add sgmii and 10gkr modes to the phy_mode enum This patch adds more generic PHY modes to the phy_mode enum, to allow configuring generic PHYs to the SGMII and/or the 10GKR mode by using the set_mode callback. Signed-off-by: Antoine Tenart Acked-by: Kishon Vijay Abraham I Signed-off-by: David S. Miller --- include/linux/phy/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 78bb0d7f6b11..e694d4008c4a 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -27,6 +27,8 @@ enum phy_mode { PHY_MODE_USB_HOST, PHY_MODE_USB_DEVICE, PHY_MODE_USB_OTG, + PHY_MODE_SGMII, + PHY_MODE_10GKR, }; /** -- cgit v1.2.3 From 2729984149e6a23e849a40e16fc3efdc07dd3668 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 13 Aug 2017 13:34:42 +0300 Subject: net/mlx5e: Support TSO and TX checksum offloads for GRE tunnels Add TX offloads support for GRE tunneled packets by reporting the needed netdev features. Signed-off-by: Gal Pressman Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ae7d09b9c52f..3d5d32e5446c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -602,7 +602,7 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 reserved_at_1a[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; - u8 tunnel_statless_gre[0x1]; + u8 tunnel_stateless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; u8 swp[0x1]; -- cgit v1.2.3 From 47ebcc0bb1d5eb7f1b1eeab675409ea7f67b4a5c Mon Sep 17 00:00:00 2001 From: Yossi Kuperman Date: Wed, 30 Aug 2017 11:30:39 +0300 Subject: xfrm: Add support for network devices capable of removing the ESP trailer In conjunction with crypto offload [1], removing the ESP trailer by hardware can potentially improve the performance by avoiding (1) a cache miss incurred by reading the nexthdr field and (2) the necessity to calculate the csum value of the trailer in order to keep skb->csum valid. This patch introduces the changes to the xfrm stack and merely serves as an infrastructure. Subsequent patch to mlx5 driver will put this to a good use. [1] https://www.mail-archive.com/netdev@vger.kernel.org/msg175733.html Signed-off-by: Yossi Kuperman Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9c7b70cce6d6..f002a2c5e33c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1019,6 +1019,7 @@ struct xfrm_offload { #define CRYPTO_FALLBACK 8 #define XFRM_GSO_SEGMENT 16 #define XFRM_GRO 32 +#define XFRM_ESP_NO_TRAILER 64 __u32 status; #define CRYPTO_SUCCESS 1 -- cgit v1.2.3 From 07d79fc7d94e3f884b8b1c95aa615b202bb5e4c1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 30 Aug 2017 14:30:36 -0700 Subject: net_sched: add reverse binding for tc class TC filters when used as classifiers are bound to TC classes. However, there is a hidden difference when adding them in different orders: 1. If we add tc classes before its filters, everything is fine. Logically, the classes exist before we specify their ID's in filters, it is easy to bind them together, just as in the current code base. 2. If we add tc filters before the tc classes they bind, we have to do dynamic lookup in fast path. What's worse, this happens all the time not just once, because on fast path tcf_result is passed on stack, there is no way to propagate back to the one in tc filters. This hidden difference hurts performance silently if we have many tc classes in hierarchy. This patch intends to close this gap by doing the reverse binding when we create a new class, in this case we can actually search all the filters in its parent, match and fixup by classid. And because tcf_result is specific to each type of tc filter, we have to introduce a new ops for each filter to tell how to bind the class. Note, we still can NOT totally get rid of those class lookup in ->enqueue() because cgroup and flow filters have no way to determine the classid at setup time, they still have to go through dynamic lookup. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c30b634c5f82..d6247a3c40df 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -217,6 +217,7 @@ struct tcf_proto_ops { void **, bool); int (*delete)(struct tcf_proto*, void *, bool*); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); + void (*bind_class)(void *, u32, unsigned long); /* rtnetlink specific */ int (*dump)(struct net*, struct tcf_proto*, void *, -- cgit v1.2.3 From e3cfddd577e763496c25ddb855cfff48b1174b63 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 30 Aug 2017 22:18:13 -0700 Subject: bridge: add tracepoint in br_fdb_update This extends bridge fdb table tracepoints to also cover learned fdb entries in the br_fdb_update path. Note that unlike other tracepoints I have moved this to when the fdb is modified because this is in the datapath and can generate a lot of noise in the trace output. br_fdb_update is also called from added_by_user context in the NTF_USE case which is already traced ..hence the !added_by_user check. Signed-off-by: Roopa Prabhu Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h index 0f1cde00af11..1bee3e7fdf32 100644 --- a/include/trace/events/bridge.h +++ b/include/trace/events/bridge.h @@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete, __entry->addr[4], __entry->addr[5], __entry->vid) ); +TRACE_EVENT(br_fdb_update, + + TP_PROTO(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid, bool added_by_user), + + TP_ARGS(br, source, addr, vid, added_by_user), + + TP_STRUCT__entry( + __string(br_dev, br->dev->name) + __string(dev, source->dev->name) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __field(bool, added_by_user) + ), + + TP_fast_assign( + __assign_str(br_dev, br->dev->name); + __assign_str(dev, source->dev->name); + memcpy(__entry->addr, addr, ETH_ALEN); + __entry->vid = vid; + __entry->added_by_user = added_by_user; + ), + + TP_printk("br_dev %s source %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u added_by_user %d", + __get_str(br_dev), __get_str(dev), __entry->addr[0], + __entry->addr[1], __entry->addr[2], __entry->addr[3], + __entry->addr[4], __entry->addr[5], __entry->vid, + __entry->added_by_user) +); + + #endif /* _TRACE_BRIDGE_H */ /* This part must be outside protection */ -- cgit v1.2.3 From f16ded5948a5e6a399161bd71e866b198952ea5f Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Thu, 31 Aug 2017 13:41:40 +0300 Subject: net: fix two typos in net_device_ops documentation. This patch fixes two trivial typos in net_device_ops documentation, related to ndo_xdp_flush callback. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 29bf06ff157c..35de8312e0b5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1124,8 +1124,8 @@ struct xfrmdev_ops { * This function is used to submit a XDP packet for transmit on a * netdevice. * void (*ndo_xdp_flush)(struct net_device *dev); - * This function is used to inform the driver to flush a paticular - * xpd tx queue. Must be called on same CPU as xdp_xmit. + * This function is used to inform the driver to flush a particular + * xdp tx queue. Must be called on same CPU as xdp_xmit. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); -- cgit v1.2.3 From 1797f5b3cf0b3a73c42b89f7a8fd897417373730 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 31 Aug 2017 17:59:12 +0200 Subject: devlink: Add IPv6 header for dpipe This will be used by the IPv6 host table which will be introduced in the following patches. The fields in the header are added per-use. This header is global and can be reused by many drivers. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 1 + include/uapi/linux/devlink.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index aaf7178127a2..b9654e133599 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -330,6 +330,7 @@ int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_match *match); extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; extern struct devlink_dpipe_header devlink_dpipe_header_ipv4; +extern struct devlink_dpipe_header devlink_dpipe_header_ipv6; #else diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 6c172548589d..0cbca96c66b9 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -234,9 +234,14 @@ enum devlink_dpipe_field_ipv4_id { DEVLINK_DPIPE_FIELD_IPV4_DST_IP, }; +enum devlink_dpipe_field_ipv6_id { + DEVLINK_DPIPE_FIELD_IPV6_DST_IP, +}; + enum devlink_dpipe_header_id { DEVLINK_DPIPE_HEADER_ETHERNET, DEVLINK_DPIPE_HEADER_IPV4, + DEVLINK_DPIPE_HEADER_IPV6, }; #endif /* _UAPI_LINUX_DEVLINK_H_ */ -- cgit v1.2.3 From 482dca939fb7ee35ba20b944b4c2476133dbf0df Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 31 Aug 2017 15:05:44 -0700 Subject: bpf: Add mark and priority to sock options that can be set Add socket mark and priority to fields that can be set by ebpf program when a socket is created. Signed-off-by: David Ahern Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 08c206a863e1..ba848b761cfb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -758,6 +758,8 @@ struct bpf_sock { __u32 family; __u32 type; __u32 protocol; + __u32 mark; + __u32 priority; }; #define XDP_PACKET_HEADROOM 256 -- cgit v1.2.3 From 65bce46298d064dff9db1282e17bb26602715819 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 1 Sep 2017 13:41:17 -0700 Subject: Bluetooth: make baswap src const Signed-off-by: Loic Poulain Signed-off-by: Bjorn Andersson Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 01487192f628..020142bb9735 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -233,7 +233,7 @@ static inline void bacpy(bdaddr_t *dst, const bdaddr_t *src) memcpy(dst, src, sizeof(bdaddr_t)); } -void baswap(bdaddr_t *dst, bdaddr_t *src); +void baswap(bdaddr_t *dst, const bdaddr_t *src); /* Common socket structures and functions */ -- cgit v1.2.3 From b37e88407c1d78f157778d73427cd7e9e1d6369d Mon Sep 17 00:00:00 2001 From: Ivan Delalande Date: Thu, 31 Aug 2017 09:59:38 -0700 Subject: inet_diag: allow protocols to provide additional data Extend inet_diag_handler to allow individual protocols to report additional data on INET_DIAG_INFO through idiag_get_aux. The size can be dynamic and is computed by idiag_get_aux_size. Signed-off-by: Ivan Delalande Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 65da430e260f..ee251c585854 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -25,6 +25,13 @@ struct inet_diag_handler { struct inet_diag_msg *r, void *info); + int (*idiag_get_aux)(struct sock *sk, + bool net_admin, + struct sk_buff *skb); + + size_t (*idiag_get_aux_size)(struct sock *sk, + bool net_admin); + int (*destroy)(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req); -- cgit v1.2.3 From c03fa9bcacd9ac04595cc13f34f3445f0a5ecf13 Mon Sep 17 00:00:00 2001 From: Ivan Delalande Date: Thu, 31 Aug 2017 09:59:39 -0700 Subject: tcp_diag: report TCP MD5 signing keys and addresses Report TCP MD5 (RFC2385) signing keys, addresses and address prefixes to processes with CAP_NET_ADMIN requesting INET_DIAG_INFO. Currently it is not possible to retrieve these from the kernel once they have been configured on sockets. Signed-off-by: Ivan Delalande Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 1 + include/uapi/linux/tcp.h | 9 +++++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 678496897a68..f52ff62bfabe 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -143,6 +143,7 @@ enum { INET_DIAG_MARK, INET_DIAG_BBRINFO, INET_DIAG_CLASS_ID, + INET_DIAG_MD5SIG, __INET_DIAG_MAX, }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 030e594bab45..15c25eccab2b 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -256,4 +256,13 @@ struct tcp_md5sig { __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ }; +/* INET_DIAG_MD5SIG */ +struct tcp_diag_md5sig { + __u8 tcpm_family; + __u8 tcpm_prefixlen; + __u16 tcpm_keylen; + __be32 tcpm_addr[4]; + __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; +}; + #endif /* _UAPI_LINUX_TCP_H */ -- cgit v1.2.3 From c1d1b437816f0afa99202be3cb650c9d174667bc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Aug 2017 16:48:22 -0700 Subject: net: convert (struct ubuf_info)->refcnt to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. v2: added the change in drivers/vhost/net.c as spotted by Willem. Signed-off-by: Eric Dumazet Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f93cc01064cb..f751f3b93039 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -456,7 +457,7 @@ struct ubuf_info { u32 bytelen; }; }; - atomic_t refcnt; + refcount_t refcnt; struct mmpin { struct user_struct *user; @@ -472,7 +473,7 @@ struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, static inline void sock_zerocopy_get(struct ubuf_info *uarg) { - atomic_inc(&uarg->refcnt); + refcount_inc(&uarg->refcnt); } void sock_zerocopy_put(struct ubuf_info *uarg); -- cgit v1.2.3 From 864150dfa31dceab6ec5ca4579a2d35ede985cb7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 1 Sep 2017 12:15:17 +0300 Subject: net: Add module reference to FIB notifiers When a listener registers to the FIB notification chain it receives a dump of the FIB entries and rules from existing address families by invoking their dump operations. While we call into these modules we need to make sure they aren't removed. Do that by increasing their reference count before invoking their dump operations and decrease it afterwards. Fixes: 04b1d4e50e82 ("net: core: Make the FIB notification chain generic") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/fib_notifier.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index 241475224f74..669b9716dc7a 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -2,6 +2,7 @@ #define __NET_FIB_NOTIFIER_H #include +#include #include #include @@ -26,6 +27,7 @@ struct fib_notifier_ops { struct list_head list; unsigned int (*fib_seq_read)(struct net *net); int (*fib_dump)(struct net *net, struct notifier_block *nb); + struct module *owner; struct rcu_head rcu; }; -- cgit v1.2.3 From 604acb193b8383a3774673c324e576e4c6b4ffcd Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 5 Jun 2017 11:17:20 +0300 Subject: net/mlx5e: Refactor data-path lro header function Refactor function mlx5e_lro_update_hdr() to reduce number of branches. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 3c7442b56460..7031d655ec32 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -709,7 +709,7 @@ static inline int mlx5_get_cqe_format(struct mlx5_cqe64 *cqe) return (cqe->op_own >> 2) & 0x3; } -static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) +static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; } -- cgit v1.2.3 From 64327fc811268d4a24de03dac242ea29de6be75f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 31 Aug 2017 18:11:41 +0200 Subject: ipv4: Don't override return code from ip_route_input_noref() After ip_route_input() calls ip_route_input_noref(), another check on skb_dst() is done, but if this fails, we shouldn't override the return code from ip_route_input_noref(), as it could have been more specific (i.e. -EHOSTUNREACH). This also saves one call to skb_dst_force_safe() and one to skb_dst() in case the ip_route_input_noref() check fails. Reported-by: Sabrina Dubroca Fixes: 9df16efadd2a ("ipv4: call dst_hold_safe() properly") Signed-off-by: Stefano Brivio Acked-by: Wei Wang Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/route.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index cb0a76d9dde1..1b09a9368c68 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -189,10 +189,11 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, rcu_read_lock(); err = ip_route_input_noref(skb, dst, src, tos, devin); - if (!err) + if (!err) { skb_dst_force_safe(skb); - if (!skb_dst(skb)) - err = -EINVAL; + if (!skb_dst(skb)) + err = -EINVAL; + } rcu_read_unlock(); return err; -- cgit v1.2.3 From fb452a1aa3fd4034d7999e309c5466ff2d7005aa Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 1 Sep 2017 11:26:08 +0200 Subject: Revert "net: use lib/percpu_counter API for fragmentation mem accounting" This reverts commit 6d7b857d541ecd1d9bd997c97242d4ef94b19de2. There is a bug in fragmentation codes use of the percpu_counter API, that can cause issues on systems with many CPUs. The frag_mem_limit() just reads the global counter (fbc->count), without considering other CPUs can have upto batch size (130K) that haven't been subtracted yet. Due to the 3MBytes lower thresh limit, this become dangerous at >=24 CPUs (3*1024*1024/130000=24). The correct API usage would be to use __percpu_counter_compare() which does the right thing, and takes into account the number of (online) CPUs and batch size, to account for this and call __percpu_counter_sum() when needed. We choose to revert the use of the lib/percpu_counter API for frag memory accounting for several reasons: 1) On systems with CPUs > 24, the heavier fully locked __percpu_counter_sum() is always invoked, which will be more expensive than the atomic_t that is reverted to. Given systems with more than 24 CPUs are becoming common this doesn't seem like a good option. To mitigate this, the batch size could be decreased and thresh be increased. 2) The add_frag_mem_limit+sub_frag_mem_limit pairs happen on the RX CPU, before SKBs are pushed into sockets on remote CPUs. Given NICs can only hash on L2 part of the IP-header, the NIC-RXq's will likely be limited. Thus, a fair chance that atomic add+dec happen on the same CPU. Revert note that commit 1d6119baf061 ("net: fix percpu memory leaks") removed init_frag_mem_limit() and instead use inet_frags_init_net(). After this revert, inet_frags_uninit_net() becomes empty. Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") Fixes: 1d6119baf061 ("net: fix percpu memory leaks") Signed-off-by: Jesper Dangaard Brouer Acked-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 6fdcd2427776..fa635aa6d0b9 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -1,14 +1,9 @@ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ -#include - struct netns_frags { - /* The percpu_counter "mem" need to be cacheline aligned. - * mem.count must not share cacheline with other writers - */ - struct percpu_counter mem ____cacheline_aligned_in_smp; - + /* Keep atomic mem on separate cachelines in structs that include it */ + atomic_t mem ____cacheline_aligned_in_smp; /* sysctls */ int timeout; int high_thresh; @@ -110,11 +105,11 @@ void inet_frags_fini(struct inet_frags *); static inline int inet_frags_init_net(struct netns_frags *nf) { - return percpu_counter_init(&nf->mem, 0, GFP_KERNEL); + atomic_set(&nf->mem, 0); + return 0; } static inline void inet_frags_uninit_net(struct netns_frags *nf) { - percpu_counter_destroy(&nf->mem); } void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); @@ -140,31 +135,24 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q) /* Memory Tracking Functions. */ -/* The default percpu_counter batch size is not big enough to scale to - * fragmentation mem acct sizes. - * The mem size of a 64K fragment is approx: - * (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes - */ -static unsigned int frag_percpu_counter_batch = 130000; - static inline int frag_mem_limit(struct netns_frags *nf) { - return percpu_counter_read(&nf->mem); + return atomic_read(&nf->mem); } static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) { - percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch); + atomic_sub(i, &nf->mem); } static inline void add_frag_mem_limit(struct netns_frags *nf, int i) { - percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch); + atomic_add(i, &nf->mem); } -static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) +static inline int sum_frag_mem_limit(struct netns_frags *nf) { - return percpu_counter_sum_positive(&nf->mem); + return atomic_read(&nf->mem); } /* RFC 3168 support : -- cgit v1.2.3 From 5a63643e583b6a9789d7a225ae076fb4e603991c Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 1 Sep 2017 11:26:13 +0200 Subject: Revert "net: fix percpu memory leaks" This reverts commit 1d6119baf0610f813eb9d9580eb4fd16de5b4ceb. After reverting commit 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") then here is no need for this fix-up patch. As percpu_counter is no longer used, it cannot memory leak it any-longer. Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") Fixes: 1d6119baf061 ("net: fix percpu memory leaks") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/inet_frag.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index fa635aa6d0b9..fc59e0775e00 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -103,15 +103,10 @@ struct inet_frags { int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); -static inline int inet_frags_init_net(struct netns_frags *nf) +static inline void inet_frags_init_net(struct netns_frags *nf) { atomic_set(&nf->mem, 0); - return 0; } -static inline void inet_frags_uninit_net(struct netns_frags *nf) -{ -} - void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); -- cgit v1.2.3 From bea74641e3786d51dcf1175527cc1781420961c9 Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Fri, 18 Aug 2017 20:58:59 +0200 Subject: netfilter: xt_hashlimit: add rate match mode This patch adds a new feature to hashlimit that allows matching on the current packet/byte rate without rate limiting. This can be enabled with a new flag --hashlimit-rate-match. The match returns true if the current rate of packets is above/below the user specified value. The main difference between the existing algorithm and the new one is that the existing algorithm rate-limits the flow whereas the new algorithm does not. Instead it *classifies* the flow based on whether it is above or below a certain rate. I will demonstrate this with an example below. Let us assume this rule: iptables -A INPUT -m hashlimit --hashlimit-above 10/s -j new_chain If the packet rate is 15/s, the existing algorithm would ACCEPT 10 packets every second and send 5 packets to "new_chain". But with the new algorithm, as long as the rate of 15/s is sustained, all packets will continue to match and every packet is sent to new_chain. This new functionality will let us classify different flows based on their current rate, so that further decisions can be made on them based on what the current rate is. This is how the new algorithm works: We divide time into intervals of 1 (sec/min/hour) as specified by the user. We keep track of the number of packets/bytes processed in the current interval. After each interval we reset the counter to 0. When we receive a packet for match, we look at the packet rate during the current interval and the previous interval to make a decision: if [ prev_rate < user and cur_rate < user ] return Below else return Above Where cur_rate is the number of packets/bytes seen in the current interval, prev is the number of packets/bytes seen in the previous interval and 'user' is the rate specified by the user. We also provide flexibility to the user for choosing the time interval using the option --hashilmit-interval. For example the user can keep a low rate like x/hour but still keep the interval as small as 1 second. To preserve backwards compatibility we have to add this feature in a new revision, so I've created revision 3 for hashlimit. The two new options we add are: --hashlimit-rate-match --hashlimit-rate-interval I have updated the help text to add these new options. Also added a few tests for the new options. Suggested-by: Igor Lubashev Reviewed-by: Josh Hunt Signed-off-by: Vishwanath Pai Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/xt_hashlimit.h | 3 ++- include/uapi/linux/netfilter/xt_hashlimit.h | 36 ++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h index 074790c0cf74..0fc458bde80b 100644 --- a/include/linux/netfilter/xt_hashlimit.h +++ b/include/linux/netfilter/xt_hashlimit.h @@ -5,5 +5,6 @@ #define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \ XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \ - XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES) + XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES |\ + XT_HASHLIMIT_RATE_MATCH) #endif /*_XT_HASHLIMIT_H*/ diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h index 79da349f1060..aa98573248b1 100644 --- a/include/uapi/linux/netfilter/xt_hashlimit.h +++ b/include/uapi/linux/netfilter/xt_hashlimit.h @@ -19,12 +19,13 @@ struct xt_hashlimit_htable; enum { - XT_HASHLIMIT_HASH_DIP = 1 << 0, - XT_HASHLIMIT_HASH_DPT = 1 << 1, - XT_HASHLIMIT_HASH_SIP = 1 << 2, - XT_HASHLIMIT_HASH_SPT = 1 << 3, - XT_HASHLIMIT_INVERT = 1 << 4, - XT_HASHLIMIT_BYTES = 1 << 5, + XT_HASHLIMIT_HASH_DIP = 1 << 0, + XT_HASHLIMIT_HASH_DPT = 1 << 1, + XT_HASHLIMIT_HASH_SIP = 1 << 2, + XT_HASHLIMIT_HASH_SPT = 1 << 3, + XT_HASHLIMIT_INVERT = 1 << 4, + XT_HASHLIMIT_BYTES = 1 << 5, + XT_HASHLIMIT_RATE_MATCH = 1 << 6, }; struct hashlimit_cfg { @@ -79,6 +80,21 @@ struct hashlimit_cfg2 { __u8 srcmask, dstmask; }; +struct hashlimit_cfg3 { + __u64 avg; /* Average secs between packets * scale */ + __u64 burst; /* Period multiplier for upper limit. */ + __u32 mode; /* bitmask of XT_HASHLIMIT_HASH_* */ + + /* user specified */ + __u32 size; /* how many buckets */ + __u32 max; /* max number of entries */ + __u32 gc_interval; /* gc interval */ + __u32 expire; /* when do entries expire? */ + + __u32 interval; + __u8 srcmask, dstmask; +}; + struct xt_hashlimit_mtinfo1 { char name[IFNAMSIZ]; struct hashlimit_cfg1 cfg; @@ -95,4 +111,12 @@ struct xt_hashlimit_mtinfo2 { struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); }; +struct xt_hashlimit_mtinfo3 { + char name[NAME_MAX]; + struct hashlimit_cfg3 cfg; + + /* Used internally by the kernel */ + struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); +}; + #endif /* _UAPI_XT_HASHLIMIT_H */ -- cgit v1.2.3 From dfc46034b54af3abf594de75a1ee43ef2ec2a60a Mon Sep 17 00:00:00 2001 From: "Pablo M. Bermudo Garay" Date: Wed, 23 Aug 2017 22:41:23 +0200 Subject: netfilter: nf_tables: add select_ops for stateful objects This patch adds support for overloading stateful objects operations through the select_ops() callback, just as it is implemented for expressions. This change is needed for upcoming additions to the stateful objects infrastructure. Signed-off-by: Pablo M. Bermudo Garay Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f9795fe394f3..0f5b12a4ad09 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1007,12 +1007,12 @@ int nft_verdict_dump(struct sk_buff *skb, int type, * * @list: table stateful object list node * @table: table this object belongs to - * @type: pointer to object type - * @data: pointer to object data * @name: name of this stateful object * @genmask: generation mask * @use: number of references to this stateful object * @data: object data, layout depends on type + * @ops: object operations + * @data: pointer to object data */ struct nft_object { struct list_head list; @@ -1021,7 +1021,7 @@ struct nft_object { u32 genmask:2, use:30; /* runtime data below here */ - const struct nft_object_type *type ____cacheline_aligned; + const struct nft_object_ops *ops ____cacheline_aligned; unsigned char data[] __attribute__((aligned(__alignof__(u64)))); }; @@ -1044,27 +1044,39 @@ void nft_obj_notify(struct net *net, struct nft_table *table, /** * struct nft_object_type - stateful object type * - * @eval: stateful object evaluation function + * @select_ops: function to select nft_object_ops + * @ops: default ops, used when no select_ops functions is present * @list: list node in list of object types * @type: stateful object numeric type - * @size: stateful object size * @owner: module owner * @maxattr: maximum netlink attribute * @policy: netlink attribute policy + */ +struct nft_object_type { + const struct nft_object_ops *(*select_ops)(const struct nft_ctx *, + const struct nlattr * const tb[]); + const struct nft_object_ops *ops; + struct list_head list; + u32 type; + unsigned int maxattr; + struct module *owner; + const struct nla_policy *policy; +}; + +/** + * struct nft_object_ops - stateful object operations + * + * @eval: stateful object evaluation function + * @size: stateful object size * @init: initialize object from netlink attributes * @destroy: release existing stateful object * @dump: netlink dump stateful object */ -struct nft_object_type { +struct nft_object_ops { void (*eval)(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt); - struct list_head list; - u32 type; unsigned int size; - unsigned int maxattr; - struct module *owner; - const struct nla_policy *policy; int (*init)(const struct nft_ctx *ctx, const struct nlattr *const tb[], struct nft_object *obj); @@ -1072,6 +1084,7 @@ struct nft_object_type { int (*dump)(struct sk_buff *skb, struct nft_object *obj, bool reset); + const struct nft_object_type *type; }; int nft_register_obj(struct nft_object_type *obj_type); -- cgit v1.2.3 From a691205571723cb0544110ca91653ac4b0eb5b17 Mon Sep 17 00:00:00 2001 From: "Pablo M. Bermudo Garay" Date: Wed, 23 Aug 2017 22:41:25 +0200 Subject: netfilter: nft_limit: add stateful object type Register a new limit stateful object type into the stateful object infrastructure. Signed-off-by: Pablo M. Bermudo Garay Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b49da72efa68..871afa4871bf 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1282,7 +1282,8 @@ enum nft_ct_helper_attributes { #define NFT_OBJECT_COUNTER 1 #define NFT_OBJECT_QUOTA 2 #define NFT_OBJECT_CT_HELPER 3 -#define __NFT_OBJECT_MAX 4 +#define NFT_OBJECT_LIMIT 4 +#define __NFT_OBJECT_MAX 5 #define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) /** -- cgit v1.2.3 From d1c1e39de8357d66163da39e893e38ea1410e8f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 29 Aug 2017 12:04:10 +0200 Subject: netfilter: remove unused hooknum arg from packet functions tested with allmodconfig build. Signed-off-by: Florian Westphal --- include/net/netfilter/nf_conntrack_l4proto.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index d4933d56809d..738a0307a96b 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -43,7 +43,6 @@ struct nf_conntrack_l4proto { unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum, unsigned int *timeouts); /* Called when a new connection for this protocol found; -- cgit v1.2.3 From 44d6e2f27328b254111dd716fde45b3b59b8a4f7 Mon Sep 17 00:00:00 2001 From: Varsha Rao Date: Wed, 30 Aug 2017 13:37:11 +0530 Subject: net: Replace NF_CT_ASSERT() with WARN_ON(). This patch removes NF_CT_ASSERT() and instead uses WARN_ON(). Signed-off-by: Varsha Rao --- include/net/netfilter/nf_conntrack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 6e6f678aaac7..0385cb08c478 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -159,7 +159,7 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { - NF_CT_ASSERT(ct); + WARN_ON(!ct); nf_conntrack_put(&ct->ct_general); } -- cgit v1.2.3 From 9efdb14f76f4d7591cd4d7a436ebd716b19703b6 Mon Sep 17 00:00:00 2001 From: Varsha Rao Date: Wed, 30 Aug 2017 13:37:12 +0530 Subject: net: Remove CONFIG_NETFILTER_DEBUG and _ASSERT() macros. This patch removes CONFIG_NETFILTER_DEBUG and _ASSERT() macros as they are no longer required. Replace _ASSERT() macros with WARN_ON(). Signed-off-by: Varsha Rao Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 0385cb08c478..fdc9c64a1c94 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -44,12 +44,6 @@ union nf_conntrack_expect_proto { #include #include -#ifdef CONFIG_NETFILTER_DEBUG -#define NF_CT_ASSERT(x) WARN_ON(!(x)) -#else -#define NF_CT_ASSERT(x) -#endif - #include #include -- cgit v1.2.3 From 2335ba704f32b855651d0cd15dd9b271ec565fb6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 3 Sep 2017 23:55:59 +0200 Subject: netlink: add NLM_F_NONREC flag for deletion requests In the last NFWS in Faro, Portugal, we discussed that netlink is lacking the semantics to request non recursive deletions, ie. do not delete an object iff it has child objects that hang from this parent object that the user requests to be deleted. We need this new flag to solve a problem for the iptables-compat backward compatibility utility, that runs iptables commands using the existing nf_tables netlink interface. Specifically, custom chains in iptables cannot be deleted if there are rules in it, however, nf_tables allows to remove any chain that is populated with content. To sort out this asymmetry, iptables-compat userspace sets this new NLM_F_NONREC flag to obtain the same semantics that iptables provides. This new flag should only be used for deletion requests. Note this new flag value overlaps with the existing: * NLM_F_ROOT for get requests. * NLM_F_REPLACE for new requests. However, those flags should not ever be used in deletion requests. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index f4fc9c9e123d..e8af60a7c56d 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -69,6 +69,9 @@ struct nlmsghdr { #define NLM_F_CREATE 0x400 /* Create, if it does not exist */ #define NLM_F_APPEND 0x800 /* Add to end of list */ +/* Modifiers to DELETE request */ +#define NLM_F_NONREC 0x100 /* Do not delete recursively */ + /* Flags for ACK message */ #define NLM_F_CAPPED 0x100 /* request was capped */ #define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ -- cgit v1.2.3 From fd0c88b70060e03a2215259ed29b4b31497ad205 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 10:05:47 +0200 Subject: net/ncsi: fix ncsi_vlan_rx_{add,kill}_vid references We get a new link error in allmodconfig kernels after ftgmac100 started using the ncsi helpers: ERROR: "ncsi_vlan_rx_kill_vid" [drivers/net/ethernet/faraday/ftgmac100.ko] undefined! ERROR: "ncsi_vlan_rx_add_vid" [drivers/net/ethernet/faraday/ftgmac100.ko] undefined! Related to that, we get another error when CONFIG_NET_NCSI is disabled: drivers/net/ethernet/faraday/ftgmac100.c:1626:25: error: 'ncsi_vlan_rx_add_vid' undeclared here (not in a function); did you mean 'ncsi_start_dev'? drivers/net/ethernet/faraday/ftgmac100.c:1627:26: error: 'ncsi_vlan_rx_kill_vid' undeclared here (not in a function); did you mean 'ncsi_vlan_rx_add_vid'? This fixes both problems at once, using a 'static inline' stub helper for the disabled case, and exporting the functions when they are present. Fixes: 51564585d8c6 ("ftgmac100: Support NCSI VLAN filtering when available") Fixes: 21acf63013ed ("net/ncsi: Configure VLAN tag filter") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/net/ncsi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/ncsi.h b/include/net/ncsi.h index 1f96af46df49..fdc60ff2511d 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -36,6 +36,16 @@ int ncsi_start_dev(struct ncsi_dev *nd); void ncsi_stop_dev(struct ncsi_dev *nd); void ncsi_unregister_dev(struct ncsi_dev *nd); #else /* !CONFIG_NET_NCSI */ +static inline int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + return -EINVAL; +} + +static inline int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + return -EINVAL; +} + static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*notifier)(struct ncsi_dev *nd)) { -- cgit v1.2.3 From 2c08ab3f2504bc7ba816ce6fde051b8bd5f028e4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 10:31:35 +0200 Subject: soc: ti/knav_dma: include dmaengine header A header file cleanup apparently caused a build regression with one driver using the knav infrastructure: In file included from drivers/net/ethernet/ti/netcp_core.c:30:0: include/linux/soc/ti/knav_dma.h:129:30: error: field 'direction' has incomplete type enum dma_transfer_direction direction; ^~~~~~~~~ drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_txpipe_open': drivers/net/ethernet/ti/netcp_core.c:1349:21: error: 'DMA_MEM_TO_DEV' undeclared (first use in this function); did you mean 'DMA_MEMORY_MAP'? config.direction = DMA_MEM_TO_DEV; ^~~~~~~~~~~~~~ DMA_MEMORY_MAP drivers/net/ethernet/ti/netcp_core.c:1349:21: note: each undeclared identifier is reported only once for each function it appears in drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_setup_navigator_resources': drivers/net/ethernet/ti/netcp_core.c:1659:22: error: 'DMA_DEV_TO_MEM' undeclared (first use in this function); did you mean 'DMA_DESC_HOST'? config.direction = DMA_DEV_TO_MEM; As the header is no longer included implicitly through netdevice.h, we should include it in the header that references the enum. Fixes: 0dd5759dbb1c ("net: remove dmaengine.h inclusion from netdevice.h") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/linux/soc/ti/knav_dma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h index 2b7882666ef6..66693bc4c6ad 100644 --- a/include/linux/soc/ti/knav_dma.h +++ b/include/linux/soc/ti/knav_dma.h @@ -17,6 +17,8 @@ #ifndef __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ #define __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ +#include + /* * PKTDMA descriptor manipulation macros for host packet descriptor */ -- cgit v1.2.3 From 3a1214e8b06317b4e71cd3a36344df87b7858e19 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 1 Sep 2017 14:04:11 -0700 Subject: flow_dissector: Cleanup control flow __skb_flow_dissect is riddled with gotos that make discerning the flow, debugging, and extending the capability difficult. This patch reorganizes things so that we only perform goto's after the two main switch statements (no gotos within the cases now). It also eliminates several goto labels so that there are only two labels that can be target for goto. Reported-by: Alexander Popov Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index e2663e900b0a..fc3dce730a6b 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -19,6 +19,14 @@ struct flow_dissector_key_control { #define FLOW_DIS_FIRST_FRAG BIT(1) #define FLOW_DIS_ENCAPSULATION BIT(2) +enum flow_dissect_ret { + FLOW_DISSECT_RET_OUT_GOOD, + FLOW_DISSECT_RET_OUT_BAD, + FLOW_DISSECT_RET_PROTO_AGAIN, + FLOW_DISSECT_RET_IPPROTO_AGAIN, + FLOW_DISSECT_RET_CONTINUE, +}; + /** * struct flow_dissector_key_basic: * @thoff: Transport header offset -- cgit v1.2.3 From 55199df6d2af55be414f40856efcb527811001bb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 3 Sep 2017 20:27:00 -0700 Subject: net: dsa: Allow switch drivers to indicate number of TX queues Let switch drivers indicate how many TX queues they support. Some switches, such as Broadcom Starfighter 2 are designed with 8 egress queues. Future changes will allow us to leverage the queue mapping and direct the transmission towards a particular queue. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 398ca8d70ccd..dd44d6ce1097 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -243,6 +243,9 @@ struct dsa_switch { /* devlink used to represent this switch device */ struct devlink *devlink; + /* Number of switch port queues */ + unsigned int num_tx_queues; + /* Dynamically allocated ports, keep last */ size_t num_ports; struct dsa_port ports[]; -- cgit v1.2.3 From 5482a978b962abd23f17a004e46d255d11646c20 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 4 Sep 2017 18:30:14 +0200 Subject: net: mdio-mux: add mdio_mux parameter to mdio_mux_init() mdio_mux_init() use the parameter dev for two distinct thing: 1) Have a device for all devm_ functions 2) Get device_node from it Since it is two distinct purpose, this patch add a parameter mdio_mux that is linked to task 2. This will also permit to register an of_node mdio-mux that lacks a direct owning device. For example a mdio-mux which is a subnode of a real device. Signed-off-by: Corentin Labbe Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio-mux.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/mdio-mux.h b/include/linux/mdio-mux.h index 61f5b21b31c7..a5d58f221939 100644 --- a/include/linux/mdio-mux.h +++ b/include/linux/mdio-mux.h @@ -12,7 +12,16 @@ #include #include +/* mdio_mux_init() - Initialize a MDIO mux + * @dev The device owning the MDIO mux + * @mux_node The device node of the MDIO mux + * @switch_fn The function called for switching target MDIO child + * mux_handle A pointer to a (void *) used internaly by mdio-mux + * @data Private data used by switch_fn() + * @mux_bus An optional parent bus (Other case are to use parent_bus property) + */ int mdio_mux_init(struct device *dev, + struct device_node *mux_node, int (*switch_fn) (int cur, int desired, void *data), void **mux_handle, void *data, -- cgit v1.2.3