From 730fc4371333636a00fed32c587fc1e85c5367e2 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:37 -0700 Subject: mpls: Add definition for IPPROTO_MPLS Add uapi define for MPLS over IP. Acked-by: Jiri Pirko Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux/in.h') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 589ced069e8a..641338bef651 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -69,6 +69,8 @@ enum { #define IPPROTO_SCTP IPPROTO_SCTP IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */ #define IPPROTO_UDPLITE IPPROTO_UDPLITE + IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */ +#define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_MAX -- cgit v1.2.3 From 90c337da1524863838658078ec34241f45d8394d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 6 Jun 2015 21:17:57 -0700 Subject: inet: add IP_BIND_ADDRESS_NO_PORT to overcome bind(0) limitations When an application needs to force a source IP on an active TCP socket it has to use bind(IP, port=x). As most applications do not want to deal with already used ports, x is often set to 0, meaning the kernel is in charge to find an available port. But kernel does not know yet if this socket is going to be a listener or be connected. It has very limited choices (no full knowledge of final 4-tuple for a connect()) With limited ephemeral port range (about 32K ports), it is very easy to fill the space. This patch adds a new SOL_IP socket option, asking kernel to ignore the 0 port provided by application in bind(IP, port=0) and only remember the given IP address. The port will be automatically chosen at connect() time, in a way that allows sharing a source port as long as the 4-tuples are unique. This new feature is available for both IPv4 and IPv6 (Thanks Neal) Tested: Wrote a test program and checked its behavior on IPv4 and IPv6. strace(1) shows sequences of bind(IP=127.0.0.2, port=0) followed by connect(). Also getsockname() show that the port is still 0 right after bind() but properly allocated after connect(). socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5 setsockopt(5, SOL_IP, IP_BIND_ADDRESS_NO_PORT, [1], 4) = 0 bind(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("127.0.0.2")}, 16) = 0 getsockname(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("127.0.0.2")}, [16]) = 0 connect(5, {sa_family=AF_INET, sin_port=htons(53174), sin_addr=inet_addr("127.0.0.3")}, 16) = 0 getsockname(5, {sa_family=AF_INET, sin_port=htons(38050), sin_addr=inet_addr("127.0.0.2")}, [16]) = 0 IPv6 test : socket(PF_INET6, SOCK_STREAM, IPPROTO_IP) = 7 setsockopt(7, SOL_IP, IP_BIND_ADDRESS_NO_PORT, [1], 4) = 0 bind(7, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 0 getsockname(7, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, [28]) = 0 connect(7, {sa_family=AF_INET6, sin6_port=htons(57300), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 0 getsockname(7, {sa_family=AF_INET6, sin6_port=htons(60964), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, [28]) = 0 I was able to bind()/connect() a million concurrent IPv4 sockets, instead of ~32000 before patch. lpaa23:~# ulimit -n 1000010 lpaa23:~# ./bind --connect --num-flows=1000000 & 1000000 sockets lpaa23:~# grep TCP /proc/net/sockstat TCP: inuse 2000063 orphan 0 tw 47 alloc 2000157 mem 66 Check that a given source port is indeed used by many different connections : lpaa23:~# ss -t src :40000 | head -10 State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 0 127.0.0.2:40000 127.0.202.33:44983 ESTAB 0 0 127.0.0.2:40000 127.2.27.240:44983 ESTAB 0 0 127.0.0.2:40000 127.2.98.5:44983 ESTAB 0 0 127.0.0.2:40000 127.0.124.196:44983 ESTAB 0 0 127.0.0.2:40000 127.2.139.38:44983 ESTAB 0 0 127.0.0.2:40000 127.1.59.80:44983 ESTAB 0 0 127.0.0.2:40000 127.3.6.228:44983 ESTAB 0 0 127.0.0.2:40000 127.0.38.53:44983 ESTAB 0 0 127.0.0.2:40000 127.1.197.10:44983 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux/in.h') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 641338bef651..83d6236a2f08 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -112,6 +112,7 @@ struct in_addr { #define IP_MINTTL 21 #define IP_NODEFRAG 22 #define IP_CHECKSUM 23 +#define IP_BIND_ADDRESS_NO_PORT 24 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ -- cgit v1.2.3 From 279c6c7fa64f5763e6b9f05e7ab3840092e702e7 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 29 Jun 2015 14:57:48 -1000 Subject: api: fix compatibility of linux/in.h with netinet/in.h u This fixes breakage to iproute2 build with recent kernel headers caused by: commit a263653ed798216c0069922d7b5237ca49436007 Author: Pablo Neira Ayuso Date: Wed Jun 17 10:28:27 2015 -0500 netfilter: don't pull include/linux/netfilter.h from netns headers The issue is that definitions in linux/in.h overlap with those in netinet/in.h. This patch solves this by introducing the same mechanism as was used to solve the same problem with linux/in6.h Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux/in.h') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 83d6236a2f08..eaf94919291a 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -19,8 +19,10 @@ #define _UAPI_LINUX_IN_H #include +#include #include +#if __UAPI_DEF_IN_IPPROTO /* Standard well-defined IP protocols. */ enum { IPPROTO_IP = 0, /* Dummy protocol for TCP */ @@ -75,12 +77,14 @@ enum { #define IPPROTO_RAW IPPROTO_RAW IPPROTO_MAX }; +#endif - +#if __UAPI_DEF_IN_ADDR /* Internet address. */ struct in_addr { __be32 s_addr; }; +#endif #define IP_TOS 1 #define IP_TTL 2 @@ -158,6 +162,7 @@ struct in_addr { /* Request struct for multicast socket ops */ +#if __UAPI_DEF_IP_MREQ struct ip_mreq { struct in_addr imr_multiaddr; /* IP multicast address of group */ struct in_addr imr_interface; /* local IP address of interface */ @@ -209,14 +214,18 @@ struct group_filter { #define GROUP_FILTER_SIZE(numsrc) \ (sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \ + (numsrc) * sizeof(struct __kernel_sockaddr_storage)) +#endif +#if __UAPI_DEF_IN_PKTINFO struct in_pktinfo { int ipi_ifindex; struct in_addr ipi_spec_dst; struct in_addr ipi_addr; }; +#endif /* Structure describing an Internet (IP) socket address. */ +#if __UAPI_DEF_SOCKADDR_IN #define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */ struct sockaddr_in { __kernel_sa_family_t sin_family; /* Address family */ @@ -228,8 +237,9 @@ struct sockaddr_in { sizeof(unsigned short int) - sizeof(struct in_addr)]; }; #define sin_zero __pad /* for BSD UNIX comp. -FvK */ +#endif - +#if __UAPI_DEF_IN_CLASS /* * Definitions of the bits in an Internet address integer. * On subnets, host and network parts are found according @@ -280,7 +290,7 @@ struct sockaddr_in { #define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ #define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ #define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */ - +#endif /* contains the htonl type stuff.. */ #include -- cgit v1.2.3