diff options
| author | David S. Miller <davem@davemloft.net> | 2015-07-21 10:39:07 -0700 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2015-07-21 10:39:07 -0700 |
| commit | e69724f32e62502a6e686eae36b7aadfeea60dca (patch) | |
| tree | a1c33082de3ab1192f072605ab1abe106f11065c /include/uapi/linux | |
| parent | 2070c48cf2b78af89ba529c00992eaaa18df8ef7 (diff) | |
| parent | 614732eaa12dd462c0ab274700bed14f36afea5e (diff) | |
Merge branch 'lwtunnel'
Thomas Graf says:
====================
Lightweight & flow based encapsulation
This series combines the work previously posted by Roopa, Robert and
myself. It's according to what we discussed at NFWS. The motivation
of this series is to:
* Consolidate code between OVS and the rest of the kernel and get
rid of OVS vports and instead represent them as pure net_devices.
* Introduce a lightweight tunneling mechanism which enables flow
based encapsulation to improve scalability on both RX and TX.
* Do the above in an encapsulation unspecific way so that the
encapsulation type is eventually abstracted away from the user.
* Use the same forwarding decision for both native forwarding and
encapsulation thus allowing to switch between native IPv6 and
UDP encapsulation based on endpoint without requiring additional
logic
The fundamental changes introduces in this series are:
* A new RTA_ENCAP Netlink attribute for routes carrying encapsulation
instructions. Depending on the specified type, the instructions
apply to UDP encapsulations, MPLS and possible other in the future.
* Depending on the encapsulation type, the output function of the
dst is directly overwritten or the dst merely attaches metadata and
relies on a subsequent net_device to apply it to the packet. The
latter is typically used if an inner and outer IP header exist which
require two subsequent routing lookups to be performed.
* A new metadata_dst structure which can be attached to skbs to
carry metadata in between subsystems. This new metadata transport
is used to provide a single interface for VXLAN, routing and OVS
to communicate through metadata.
The OVS interfaces remain as-is but will transparently create a real
VXLAN net_device in the background. iproute2 is extended with a new
use cases:
VXLAN:
ip route add 40.1.1.1/32 encap vxlan id 10 dst 50.1.1.2 dev vxlan0
MPLS:
ip route add 10.1.1.0/30 encap mpls 200 via inet 10.1.1.1 dev swp1
Performance implications:
The additional memory allocation in the receive path should have
performance implications although it is not observable in standard
throughput tests if GRO is properly done. The correct net_device
model outweights the additional cost of the allocation. Furthermore,
this implication can be relaxed by reintroducing a direct unqueued
path from a software device to a consumer like bridge or OVS if
needed.
$ netperf -t TCP_STREAM -H 15.1.1.201
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
15.1.1.201 (15.1.1.201) port 0 AF_INET : demo
Recv Send Send
Socket Socket Message Elapsed
Size Size Size Time Throughput
bytes bytes bytes secs. 10^6bits/sec
87380 16384 16384 10.00 9118.17
Changes since v1:
* Properly initialize tun_id as reported by Julian
* Drop dupliate netif_keep_dst() as reported by Alexei
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/uapi/linux')
| -rw-r--r-- | include/uapi/linux/fib_rules.h | 2 | ||||
| -rw-r--r-- | include/uapi/linux/if_link.h | 1 | ||||
| -rw-r--r-- | include/uapi/linux/lwtunnel.h | 16 | ||||
| -rw-r--r-- | include/uapi/linux/mpls_iptunnel.h | 28 | ||||
| -rw-r--r-- | include/uapi/linux/openvswitch.h | 2 | ||||
| -rw-r--r-- | include/uapi/linux/rtnetlink.h | 17 |
6 files changed, 64 insertions, 2 deletions
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 2b82d7e30974..96161b8202b5 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -43,7 +43,7 @@ enum { FRA_UNUSED5, FRA_FWMARK, /* mark */ FRA_FLOW, /* flow/class id */ - FRA_UNUSED6, + FRA_TUN_ID, FRA_SUPPRESS_IFGROUP, FRA_SUPPRESS_PREFIXLEN, FRA_TABLE, /* Extended table id */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 24d68b797c59..9eeb5d9cf8f0 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -382,6 +382,7 @@ enum { IFLA_VXLAN_REMCSUM_RX, IFLA_VXLAN_GBP, IFLA_VXLAN_REMCSUM_NOPARTIAL, + IFLA_VXLAN_FLOWBASED, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h new file mode 100644 index 000000000000..31377bbea3f8 --- /dev/null +++ b/include/uapi/linux/lwtunnel.h @@ -0,0 +1,16 @@ +#ifndef _UAPI_LWTUNNEL_H_ +#define _UAPI_LWTUNNEL_H_ + +#include <linux/types.h> + +enum lwtunnel_encap_types { + LWTUNNEL_ENCAP_NONE, + LWTUNNEL_ENCAP_MPLS, + LWTUNNEL_ENCAP_IP, + __LWTUNNEL_ENCAP_MAX, +}; + +#define LWTUNNEL_ENCAP_MAX (__LWTUNNEL_ENCAP_MAX - 1) + + +#endif /* _UAPI_LWTUNNEL_H_ */ diff --git a/include/uapi/linux/mpls_iptunnel.h b/include/uapi/linux/mpls_iptunnel.h new file mode 100644 index 000000000000..d80a0498f77e --- /dev/null +++ b/include/uapi/linux/mpls_iptunnel.h @@ -0,0 +1,28 @@ +/* + * mpls tunnel api + * + * Authors: + * Roopa Prabhu <roopa@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_MPLS_IPTUNNEL_H +#define _UAPI_LINUX_MPLS_IPTUNNEL_H + +/* MPLS tunnel attributes + * [RTA_ENCAP] = { + * [MPLS_IPTUNNEL_DST] + * } + */ +enum { + MPLS_IPTUNNEL_UNSPEC, + MPLS_IPTUNNEL_DST, + __MPLS_IPTUNNEL_MAX, +}; +#define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1) + +#endif /* _UAPI_LINUX_MPLS_IPTUNNEL_H */ diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 1dab77601c21..d6b885460187 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -321,7 +321,7 @@ enum ovs_key_attr { * the accepted length of the array. */ #ifdef __KERNEL__ - OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */ + OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ #endif __OVS_KEY_ATTR_MAX }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index fdd8f07f1d34..47d24cb3fbc1 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -286,6 +286,21 @@ enum rt_class_t { /* Routing message attributes */ +enum ip_tunnel_t { + IP_TUN_UNSPEC, + IP_TUN_ID, + IP_TUN_DST, + IP_TUN_SRC, + IP_TUN_TTL, + IP_TUN_TOS, + IP_TUN_SPORT, + IP_TUN_DPORT, + IP_TUN_FLAGS, + __IP_TUN_MAX, +}; + +#define IP_TUN_MAX (__IP_TUN_MAX - 1) + enum rtattr_type_t { RTA_UNSPEC, RTA_DST, @@ -308,6 +323,8 @@ enum rtattr_type_t { RTA_VIA, RTA_NEWDST, RTA_PREF, + RTA_ENCAP_TYPE, + RTA_ENCAP, __RTA_MAX }; |
