From e91ded8db57472c20b59b2242b100764cc152a10 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 4 Aug 2014 04:50:41 -0400 Subject: uapi: netfilter_arp: use __u8 instead of u_int8_t Similarly, the u_int8_t type is non-standard and not defined. Change it to use __u8 like the rest of the netfilter headers. Signed-off-by: Mike Frysinger Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_arp/arpt_mangle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter_arp/arpt_mangle.h b/include/uapi/linux/netfilter_arp/arpt_mangle.h index 250f502902bb..8c2b16a1f5a0 100644 --- a/include/uapi/linux/netfilter_arp/arpt_mangle.h +++ b/include/uapi/linux/netfilter_arp/arpt_mangle.h @@ -13,7 +13,7 @@ struct arpt_mangle union { struct in_addr tgt_ip; } u_t; - u_int8_t flags; + __u8 flags; int target; }; -- cgit v1.2.3 From e2a093ff0dbfa4c5d99f25241cf33325e9691d91 Mon Sep 17 00:00:00 2001 From: Ana Rey Date: Wed, 6 Aug 2014 13:52:49 +0200 Subject: netfilter: nft_meta: add pkttype support Add pkttype support for ip, ipv6 and inet families of tables. This allows you to fetch the meta packet type based on the link layer information. The loopback traffic is a special case, the packet type is guessed from the network layer header. No special handling for bridge and arp since we're not going to see such traffic in the loopback interface. Joint work with Alvaro Neira Ayuso Signed-off-by: Alvaro Neira Ayuso Signed-off-by: Ana Rey Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 801bdd1e56e3..98144cdd8986 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -571,6 +571,7 @@ enum nft_exthdr_attributes { * @NFT_META_L4PROTO: layer 4 protocol number * @NFT_META_BRI_IIFNAME: packet input bridge interface name * @NFT_META_BRI_OIFNAME: packet output bridge interface name + * @NFT_META_PKTTYPE: packet type (skb->pkt_type), special handling for loopback */ enum nft_meta_keys { NFT_META_LEN, @@ -592,6 +593,7 @@ enum nft_meta_keys { NFT_META_L4PROTO, NFT_META_BRI_IIFNAME, NFT_META_BRI_OIFNAME, + NFT_META_PKTTYPE, }; /** -- cgit v1.2.3 From afc5be3079796b024823bad42dc5ebf716453575 Mon Sep 17 00:00:00 2001 From: Ana Rey Date: Sun, 24 Aug 2014 14:08:36 +0200 Subject: netfilter: nft_meta: Add cpu attribute support Add cpu support to meta expresion. This allows you to match packets with cpu number. Signed-off-by: Ana Rey Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 98144cdd8986..c9b6f00a3fb7 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -572,6 +572,7 @@ enum nft_exthdr_attributes { * @NFT_META_BRI_IIFNAME: packet input bridge interface name * @NFT_META_BRI_OIFNAME: packet output bridge interface name * @NFT_META_PKTTYPE: packet type (skb->pkt_type), special handling for loopback + * @NFT_META_CPU: cpu id through smp_processor_id() */ enum nft_meta_keys { NFT_META_LEN, @@ -594,6 +595,7 @@ enum nft_meta_keys { NFT_META_BRI_IIFNAME, NFT_META_BRI_OIFNAME, NFT_META_PKTTYPE, + NFT_META_CPU, }; /** -- cgit v1.2.3 From 0e227084aee36b3ba27b4fc9cd9e425be6ce2ab8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 12 Aug 2014 20:34:30 +0200 Subject: cfg80211: clarify BSS probe response vs. beacon data There are a few possible cases of where BSS data came from: 1) only a beacon has been received 2) only a probe response has been received 3) the driver didn't report what it received (this happens when using cfg80211_inform_bss[_width]()) 4) both probe response and beacon data has been received Unfortunately, in the userspace API, a few things weren't there: a) there was no way to differentiate cases 1) and 4) above without comparing the data of the IEs b) the TSF was always from the last frame, instead of being exposed for beacon/probe response separately like IEs Fix this by i) exporting a new flag attribute that indicates whether or not probe response data has been received - this addresses (a) ii) exporting a BEACON_TSF attribute that holds the beacon's TSF if a beacon has been received iii) not exporting the beacon attributes in case (3) above as that would just lead userspace into thinking the data actually came from a beacon when that isn't clear To implement this, track inside the IEs struct whether or not it (definitely) came from a beacon. Reported-by: William Seto Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f1db15b9c041..d097568da690 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3055,14 +3055,20 @@ enum nl80211_bss_scan_width { * @NL80211_BSS_BSSID: BSSID of the BSS (6 octets) * @NL80211_BSS_FREQUENCY: frequency in MHz (u32) * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64) + * (if @NL80211_BSS_PRESP_DATA is present then this is known to be + * from a probe response, otherwise it may be from the same beacon + * that the NL80211_BSS_BEACON_TSF will be from) * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16) * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16) * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the * raw information elements from the probe response/beacon (bin); - * if the %NL80211_BSS_BEACON_IES attribute is present, the IEs here are - * from a Probe Response frame; otherwise they are from a Beacon frame. + * if the %NL80211_BSS_BEACON_IES attribute is present and the data is + * different then the IEs here are from a Probe Response frame; otherwise + * they are from a Beacon frame. * However, if the driver does not indicate the source of the IEs, these * IEs may be from either frame subtype. + * If present, the @NL80211_BSS_PRESP_DATA attribute indicates that the + * data here is known to be from a probe response, without any heuristics. * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon * in mBm (100 * dBm) (s32) * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon @@ -3074,6 +3080,10 @@ enum nl80211_bss_scan_width { * yet been received * @NL80211_BSS_CHAN_WIDTH: channel width of the control channel * (u32, enum nl80211_bss_scan_width) + * @NL80211_BSS_BEACON_TSF: TSF of the last received beacon (u64) + * (not present if no beacon frame has been received yet) + * @NL80211_BSS_PRESP_DATA: the data in @NL80211_BSS_INFORMATION_ELEMENTS and + * @NL80211_BSS_TSF is known to be from a probe response (flag attribute) * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -3091,6 +3101,8 @@ enum nl80211_bss { NL80211_BSS_SEEN_MS_AGO, NL80211_BSS_BEACON_IES, NL80211_BSS_CHAN_WIDTH, + NL80211_BSS_BEACON_TSF, + NL80211_BSS_PRESP_DATA, /* keep last */ __NL80211_BSS_AFTER_LAST, -- cgit v1.2.3 From f111f780ae1abf4cdc464f24293be90c010a04f6 Mon Sep 17 00:00:00 2001 From: Alexey Perevalov Date: Wed, 20 Aug 2014 22:03:18 +0400 Subject: netfilter: nfnetlink_acct: add filter support to nfacct counter list/reset You can use this to skip accounting objects when listing/resetting via NFNL_MSG_ACCT_GET/NFNL_MSG_ACCT_GET_CTRZERO messages with the NLM_F_DUMP netlink flag. The filtering covers the following cases: 1. No filter specified. In this case, the client will get old behaviour, 2. List/reset counter object only: In this case, you have to use NFACCT_F_QUOTA as mask and value 0. 3. List/reset quota objects only: You have to use NFACCT_F_QUOTA_PKTS as mask and value - the same, for byte based quota mask should be NFACCT_F_QUOTA_BYTES and value - the same. If you want to obtain the object with any quota type (ie. NFACCT_F_QUOTA_PKTS|NFACCT_F_QUOTA_BYTES), you need to perform two dump requests, one to obtain NFACCT_F_QUOTA_PKTS objects and another for NFACCT_F_QUOTA_BYTES. Signed-off-by: Alexey Perevalov Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_acct.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h index 51404ec19022..f3e34dbbf966 100644 --- a/include/uapi/linux/netfilter/nfnetlink_acct.h +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h @@ -28,9 +28,17 @@ enum nfnl_acct_type { NFACCT_USE, NFACCT_FLAGS, NFACCT_QUOTA, + NFACCT_FILTER, __NFACCT_MAX }; #define NFACCT_MAX (__NFACCT_MAX - 1) +enum nfnl_attr_filter_type { + NFACCT_FILTER_UNSPEC, + NFACCT_FILTER_MASK, + NFACCT_FILTER_VALUE, + __NFACCT_FILTER_MAX +}; +#define NFACCT_FILTER_MAX (__NFACCT_FILTER_MAX - 1) #endif /* _UAPI_NFNL_ACCT_H_ */ -- cgit v1.2.3 From 3e8a72d1dae374cf6fc1dba97cec663585845ff9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 27 Aug 2014 17:04:46 -0700 Subject: net: dsa: reduce number of protocol hooks DSA is currently registering one packet_type function per EtherType it needs to intercept in the receive path of a DSA-enabled Ethernet device. Right now we have three of them: trailer, DSA and eDSA, and there might be more in the future, this will not scale to the addition of new protocols. This patch proceeds with adding a new layer of abstraction and two new functions: dsa_switch_rcv() which will dispatch into the tag-protocol specific receive function implemented by net/dsa/tag_*.c dsa_slave_xmit() which will dispatch into the tag-protocol specific transmit function implemented by net/dsa/tag_*.c When we do create the per-port slave network devices, we iterate over the switch protocol to assign the DSA-specific receive and transmit operations. A new fake ethertype value is used: ETH_P_XDSA to illustrate the fact that this is no longer going to look like ETH_P_DSA or ETH_P_TRAILER like it used to be. This allows us to greatly simplify the check in eth_type_trans() and always override the skb->protocol with ETH_P_XDSA for Ethernet switches tagged protocol, while also reducing the number repetitive slave netdevice_ops assignments. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 0f8210b8e0bc..aa63ed023c2b 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -128,6 +128,7 @@ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ #define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */ #define ETH_P_CAIF 0x00F7 /* ST-Ericsson CAIF protocol */ +#define ETH_P_XDSA 0x00F8 /* Multiplexed DSA protocol */ /* * This is an Ethernet frame header. -- cgit v1.2.3 From 880a6fab8f6ba5b5abe59ea68533202ddea1012c Mon Sep 17 00:00:00 2001 From: Christophe Gouault Date: Fri, 29 Aug 2014 16:16:05 +0200 Subject: xfrm: configure policy hash table thresholds by netlink Enable to specify local and remote prefix length thresholds for the policy hash table via a netlink XFRM_MSG_NEWSPDINFO message. prefix length thresholds are specified by XFRMA_SPD_IPV4_HTHRESH and XFRMA_SPD_IPV6_HTHRESH optional attributes (struct xfrmu_spdhthresh). example: struct xfrmu_spdhthresh thresh4 = { .lbits = 0; .rbits = 24; }; struct xfrmu_spdhthresh thresh6 = { .lbits = 0; .rbits = 56; }; struct nlmsghdr *hdr; struct nl_msg *msg; msg = nlmsg_alloc(); hdr = nlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, XFRMA_SPD_IPV4_HTHRESH, sizeof(__u32), NLM_F_REQUEST); nla_put(msg, XFRMA_SPD_IPV4_HTHRESH, sizeof(thresh4), &thresh4); nla_put(msg, XFRMA_SPD_IPV6_HTHRESH, sizeof(thresh6), &thresh6); nla_send_auto(sk, msg); The numbers are the policy selector minimum prefix lengths to put a policy in the hash table. - lbits is the local threshold (source address for out policies, destination address for in and fwd policies). - rbits is the remote threshold (destination address for out policies, source address for in and fwd policies). The default values are: XFRMA_SPD_IPV4_HTHRESH: 32 32 XFRMA_SPD_IPV6_HTHRESH: 128 128 Dynamic re-building of the SPD is performed when the thresholds values are changed. The current thresholds can be read via a XFRM_MSG_GETSPDINFO request: the kernel replies to XFRM_MSG_GETSPDINFO requests by an XFRM_MSG_NEWSPDINFO message, with both attributes XFRMA_SPD_IPV4_HTHRESH and XFRMA_SPD_IPV6_HTHRESH. Signed-off-by: Christophe Gouault Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 25e5dd916ba4..02d5125a5ee8 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -328,6 +328,8 @@ enum xfrm_spdattr_type_t { XFRMA_SPD_UNSPEC, XFRMA_SPD_INFO, XFRMA_SPD_HINFO, + XFRMA_SPD_IPV4_HTHRESH, + XFRMA_SPD_IPV6_HTHRESH, __XFRMA_SPD_MAX #define XFRMA_SPD_MAX (__XFRMA_SPD_MAX - 1) @@ -347,6 +349,11 @@ struct xfrmu_spdhinfo { __u32 spdhmcnt; }; +struct xfrmu_spdhthresh { + __u8 lbits; + __u8 rbits; +}; + struct xfrm_usersa_info { struct xfrm_selector sel; struct xfrm_id id; -- cgit v1.2.3 From 1c7e23bf50264a251de53ad9fb1604683b801258 Mon Sep 17 00:00:00 2001 From: Assaf Krauss Date: Wed, 3 Sep 2014 15:25:00 +0300 Subject: nl80211: Allow declaring RRM-related features Radio Resource Measurement (RRM) is a bundle of features which will require the entire stack to participate. In this patch, the driver is given the opportunity to advertise the device's support for these RRM-related features, using feature flags: 1. Support for Quiet IEs. 2. Support for adding DS Parameter Set IE to probe requests. 3. Support for adding WFA TPC Report IE to probe requests. 4. Support for inserting tx power value to tx-ed packets at a fixed offset. This is used in action frames, such as RRM's Link Measurement Report, where the actual tx power should be reported in the frame. Signed-off-by: Assaf Krauss Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d097568da690..c166d3d23e55 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3968,6 +3968,16 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE: This driver supports dynamic * channel bandwidth change (e.g., HT 20 <-> 40 MHz channel) during the * lifetime of a BSS. + * @NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES: This device adds a DS Parameter + * Set IE to probe requests. + * @NL80211_FEATURE_WFA_TPC_IE_IN_PROBES: This device adds a WFA TPC Report IE + * to probe requests. + * @NL80211_FEATURE_QUIET: This device, in client mode, supports Quiet Period + * requests sent to it by an AP. + * @NL80211_FEATURE_TX_POWER_INSERTION: This device is capable of inserting the + * current tx power value into the TPC Report IE in the spectrum + * management TPC Report action frame, and in the Radio Measurement Link + * Measurement Report action frame. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3989,6 +3999,10 @@ enum nl80211_feature_flags { NL80211_FEATURE_USERSPACE_MPM = 1 << 16, NL80211_FEATURE_ACTIVE_MONITOR = 1 << 17, NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE = 1 << 18, + NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES = 1 << 19, + NL80211_FEATURE_WFA_TPC_IE_IN_PROBES = 1 << 20, + NL80211_FEATURE_QUIET = 1 << 21, + NL80211_FEATURE_TX_POWER_INSERTION = 1 << 22, }; /** -- cgit v1.2.3 From bab5ab7d2a5466406e8003d038cc7ce6b2d5d804 Mon Sep 17 00:00:00 2001 From: Assaf Krauss Date: Wed, 3 Sep 2014 15:25:01 +0300 Subject: nl80211: Add flag attribute for RRM connections Add a flag attribute to use in associations, for tagging the target connection as supporting RRM. It is the responsibility of upper layers to set this flag only if both the underlying device, and the target network indeed support RRM. To be used in ASSOCIATE and CONNECT commands. Signed-off-by: Assaf Krauss Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c166d3d23e55..de69d3df5e55 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1594,6 +1594,17 @@ enum nl80211_commands { * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is * the TDLS link initiator. * + * @NL80211_ATTR_USE_RRM: flag for indicating whether the current connection + * shall support Radio Resource Measurements (11k). This attribute can be + * used with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests. + * User space applications are expected to use this flag only if the + * underlying device supports these minimal RRM features: + * %NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES, + * %NL80211_FEATURE_QUIET, + * If this flag is used, driver must add the Power Capabilities IE to the + * association request. In addition, it must also set the RRM capability + * flag in the association request's Capability Info field. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1936,6 +1947,8 @@ enum nl80211_attrs { NL80211_ATTR_TDLS_INITIATOR, + NL80211_ATTR_USE_RRM, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 3057dbfdab1b86a77ed6d512fc857b032f78663b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 4 Sep 2014 23:57:40 +0200 Subject: cfg80211: enable dynack through nl80211 Enable ACK timeout estimation algorithm (dynack) using mac80211 set_coverage_class API. Dynack is activated passing coverage class equals to -1 to lower drivers and it is automatically disabled setting valid value for coverage class. Define NL80211_ATTR_WIPHY_DYN_ACK flag attribute to enable dynack from userspace. In order to activate dynack NL80211_FEATURE_ACKTO_ESTIMATION feature flag must be set by lower drivers to indicate dynack capability. Signed-off-by: Lorenzo Bianconi Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index de69d3df5e55..29c4399e08b9 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1605,6 +1605,12 @@ enum nl80211_commands { * association request. In addition, it must also set the RRM capability * flag in the association request's Capability Info field. * + * @NL80211_ATTR_WIPHY_DYN_ACK: flag attribute used to enable ACK timeout + * estimation algorithm (dynack). In order to activate dynack + * %NL80211_FEATURE_ACKTO_ESTIMATION feature flag must be set by lower + * drivers to indicate dynack capability. Dynack is automatically disabled + * setting valid value for coverage class. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1949,6 +1955,8 @@ enum nl80211_attrs { NL80211_ATTR_USE_RRM, + NL80211_ATTR_WIPHY_DYN_ACK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3991,6 +3999,9 @@ enum nl80211_ap_sme_features { * current tx power value into the TPC Report IE in the spectrum * management TPC Report action frame, and in the Radio Measurement Link * Measurement Report action frame. + * @NL80211_FEATURE_ACKTO_ESTIMATION: This driver supports dynamic ACK timeout + * estimation (dynack). %NL80211_ATTR_WIPHY_DYN_ACK flag attribute is used + * to enable dynack. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -4016,6 +4027,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_WFA_TPC_IE_IN_PROBES = 1 << 20, NL80211_FEATURE_QUIET = 1 << 21, NL80211_FEATURE_TX_POWER_INSERTION = 1 << 22, + NL80211_FEATURE_ACKTO_ESTIMATION = 1 << 23, }; /** -- cgit v1.2.3 From f0db9b073415848709dd59a6394969882f517da9 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Wed, 3 Sep 2014 03:17:20 +0530 Subject: ethtool: Add generic options for tunables This patch adds new ethtool cmd, ETHTOOL_GTUNABLE & ETHTOOL_STUNABLE for getting tunable values from driver. Add get_tunable and set_tunable to ethtool_ops. Driver implements these functions for getting/setting tunable value. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index e3c7a719c76b..7a364f2f3d3f 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -209,6 +209,32 @@ struct ethtool_value { __u32 data; }; +enum tunable_id { + ETHTOOL_ID_UNSPEC, + ETHTOOL_RX_COPYBREAK, +}; + +enum tunable_type_id { + ETHTOOL_TUNABLE_UNSPEC, + ETHTOOL_TUNABLE_U8, + ETHTOOL_TUNABLE_U16, + ETHTOOL_TUNABLE_U32, + ETHTOOL_TUNABLE_U64, + ETHTOOL_TUNABLE_STRING, + ETHTOOL_TUNABLE_S8, + ETHTOOL_TUNABLE_S16, + ETHTOOL_TUNABLE_S32, + ETHTOOL_TUNABLE_S64, +}; + +struct ethtool_tunable { + __u32 cmd; + __u32 id; + __u32 type_id; + __u32 len; + void *data[0]; +}; + /** * struct ethtool_regs - hardware register dump * @cmd: Command number = %ETHTOOL_GREGS @@ -1152,6 +1178,8 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_GRSSH 0x00000046 /* Get RX flow hash configuration */ #define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */ +#define ETHTOOL_GTUNABLE 0x00000048 /* Get tunable configuration */ +#define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET -- cgit v1.2.3 From 3045d76070abe725dbb7fd8ff39c27b820d5a7eb Mon Sep 17 00:00:00 2001 From: Ana Rey Date: Tue, 2 Sep 2014 20:36:14 +0200 Subject: netfilter: nf_tables: add devgroup support in meta expresion Add devgroup support to let us match device group of a packets incoming or outgoing interface. Signed-off-by: Ana Rey Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c9b6f00a3fb7..c000947d3f38 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -573,6 +573,8 @@ enum nft_exthdr_attributes { * @NFT_META_BRI_OIFNAME: packet output bridge interface name * @NFT_META_PKTTYPE: packet type (skb->pkt_type), special handling for loopback * @NFT_META_CPU: cpu id through smp_processor_id() + * @NFT_META_IIFGROUP: packet input interface group + * @NFT_META_OIFGROUP: packet output interface group */ enum nft_meta_keys { NFT_META_LEN, @@ -596,6 +598,8 @@ enum nft_meta_keys { NFT_META_BRI_OIFNAME, NFT_META_PKTTYPE, NFT_META_CPU, + NFT_META_IIFGROUP, + NFT_META_OIFGROUP, }; /** -- cgit v1.2.3 From e42eff8a32f8b7bde88ea3c5a56391407cbe84f3 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Thu, 4 Sep 2014 14:06:14 +0200 Subject: netfilter: nft_nat: include a flag attribute Both SNAT and DNAT (and the upcoming masquerade) can have additional configuration parameters, such as port randomization and NAT addressing persistence. We can cover these scenarios by simply adding a flag attribute for userspace to fill when needed. The flags to use are defined in include/uapi/linux/netfilter/nf_nat.h: NF_NAT_RANGE_MAP_IPS NF_NAT_RANGE_PROTO_SPECIFIED NF_NAT_RANGE_PROTO_RANDOM NF_NAT_RANGE_PERSISTENT NF_NAT_RANGE_PROTO_RANDOM_FULLY NF_NAT_RANGE_PROTO_RANDOM_ALL The caller must take care of not messing up with the flags, as they are added unconditionally to the final resulting nf_nat_range. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_nat.h | 5 +++++ include/uapi/linux/netfilter/nf_tables.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h index 1ad3659102b6..0880781ad7b6 100644 --- a/include/uapi/linux/netfilter/nf_nat.h +++ b/include/uapi/linux/netfilter/nf_nat.h @@ -13,6 +13,11 @@ #define NF_NAT_RANGE_PROTO_RANDOM_ALL \ (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY) +#define NF_NAT_RANGE_MASK \ + (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED | \ + NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT | \ + NF_NAT_RANGE_PROTO_RANDOM_FULLY) + struct nf_nat_ipv4_range { unsigned int flags; __be32 min_ip; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c000947d3f38..6022c6e6be18 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -785,6 +785,7 @@ enum nft_nat_types { * @NFTA_NAT_REG_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) * @NFTA_NAT_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) * @NFTA_NAT_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + * @NFTA_NAT_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) */ enum nft_nat_attributes { NFTA_NAT_UNSPEC, @@ -794,6 +795,7 @@ enum nft_nat_attributes { NFTA_NAT_REG_ADDR_MAX, NFTA_NAT_REG_PROTO_MIN, NFTA_NAT_REG_PROTO_MAX, + NFTA_NAT_FLAGS, __NFTA_NAT_MAX }; #define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1) -- cgit v1.2.3 From 9ba1f726bec090399eb9bb9157eb32dedc8e8c45 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Mon, 8 Sep 2014 13:45:00 +0200 Subject: netfilter: nf_tables: add new nft_masq expression The nft_masq expression is intended to perform NAT in the masquerade flavour. We decided to have the masquerade functionality in a separated expression other than nft_nat. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 6022c6e6be18..eeec0ae845ef 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -800,4 +800,15 @@ enum nft_nat_attributes { }; #define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1) +/** + * enum nft_masq_attributes - nf_tables masquerade expression attributes + * + * @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) + */ +enum nft_masq_attributes { + NFTA_MASQ_FLAGS, + __NFTA_MASQ_MAX +}; +#define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1) + #endif /* _LINUX_NF_TABLES_H */ -- cgit v1.2.3 From daedfb22451dd02b35c0549566cbb7cc06bdd53b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 4 Sep 2014 22:17:18 -0700 Subject: net: filter: split filter.h and expose eBPF to user space allow user space to generate eBPF programs uapi/linux/bpf.h: eBPF instruction set definition linux/filter.h: the rest This patch only moves macro definitions, but practically it freezes existing eBPF instruction set, though new instructions can still be added in the future. These eBPF definitions cannot go into uapi/linux/filter.h, since the names may conflict with existing applications. Full eBPF ISA description is in Documentation/networking/filter.txt Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/Kbuild | 1 + include/uapi/linux/bpf.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 include/uapi/linux/bpf.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 24e9033f8b3f..fb3f7b675229 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -67,6 +67,7 @@ header-y += bfs_fs.h header-y += binfmts.h header-y += blkpg.h header-y += blktrace_api.h +header-y += bpf.h header-y += bpqether.h header-y += bsg.h header-y += btrfs.h diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h new file mode 100644 index 000000000000..479ed0b6be16 --- /dev/null +++ b/include/uapi/linux/bpf.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_H__ +#define _UAPI__LINUX_BPF_H__ + +#include + +/* Extended instruction set based on top of classic BPF */ + +/* instruction classes */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ + +/* ld/ldx fields */ +#define BPF_DW 0x18 /* double word */ +#define BPF_XADD 0xc0 /* exclusive add */ + +/* alu/jmp fields */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ + +/* change endianness of a register */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ + +/* Register numbers */ +enum { + BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, +}; + +/* BPF has 10 general purpose 64-bit registers and stack frame. */ +#define MAX_BPF_REG __MAX_BPF_REG + +struct bpf_insn { + __u8 code; /* opcode */ + __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ +}; + +#endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From e5c3ea5c668033b303e7ac835d7d91da32d97958 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 5 Sep 2014 15:51:31 +0200 Subject: bridge: implement rtnl_link_ops->get_size and rtnl_link_ops->fill_info Allow rtnetlink users to get bridge master info in IFLA_INFO_DATA attr This initial part implements forward_delay, hello_time, max_age options. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ff957604a721..c80f95f6ee78 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -215,6 +215,18 @@ enum in6_addr_gen_mode { IN6_ADDR_GEN_MODE_NONE, }; +/* Bridge section */ + +enum { + IFLA_BR_UNSPEC, + IFLA_BR_FORWARD_DELAY, + IFLA_BR_HELLO_TIME, + IFLA_BR_MAX_AGE, + __IFLA_BR_MAX, +}; + +#define IFLA_BR_MAX (__IFLA_BR_MAX - 1) + enum { BRIDGE_MODE_UNSPEC, BRIDGE_MODE_HAIRPIN, -- cgit v1.2.3 From 960d01acf62747d6518694f92be5b06f67473833 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Sep 2014 22:55:35 +0300 Subject: cfg80211: add WMM traffic stream API Add nl80211 and driver API to validate, add and delete traffic streams with appropriate settings. The API calls for userspace doing the action frame handshake with the peer, and then allows only to set up the parameters in the driver. To avoid setting up a session only to tear it down again, the validate API is provided, but the real usage later can still fail so userspace must be prepared for that. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 29c4399e08b9..e5b8caf5e466 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -722,6 +722,22 @@ * QoS mapping is relevant for IP packets, it is only valid during an * association. This is cleared on disassociation and AP restart. * + * @NL80211_CMD_ADD_TX_TS: Ask the kernel to add a traffic stream for the given + * %NL80211_ATTR_TSID and %NL80211_ATTR_MAC with %NL80211_ATTR_USER_PRIO + * and %NL80211_ATTR_ADMITTED_TIME parameters. + * Note that the action frame handshake with the AP shall be handled by + * userspace via the normal management RX/TX framework, this only sets + * up the TX TS in the driver/device. + * If the admitted time attribute is not added then the request just checks + * if a subsequent setup could be successful, the intent is to use this to + * avoid setting up a session with the AP when local restrictions would + * make that impossible. However, the subsequent "real" setup may still + * fail even if the check was successful. + * @NL80211_CMD_DEL_TX_TS: Remove an existing TS with the %NL80211_ATTR_TSID + * and %NL80211_ATTR_MAC parameters. It isn't necessary to call this + * before removing a station entry entirely, or before disassociating + * or similar, cleanup will happen in the driver/device in this case. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -893,6 +909,9 @@ enum nl80211_commands { NL80211_CMD_SET_QOS_MAP, + NL80211_CMD_ADD_TX_TS, + NL80211_CMD_DEL_TX_TS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1611,6 +1630,11 @@ enum nl80211_commands { * drivers to indicate dynack capability. Dynack is automatically disabled * setting valid value for coverage class. * + * @NL80211_ATTR_TSID: a TSID value (u8 attribute) + * @NL80211_ATTR_USER_PRIO: user priority value (u8 attribute) + * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds + * (per second) (u16 attribute) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1957,6 +1981,10 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_DYN_ACK, + NL80211_ATTR_TSID, + NL80211_ATTR_USER_PRIO, + NL80211_ATTR_ADMITTED_TIME, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 18998c381b19bfc3c285361ff6200ded7444aa2c Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 10 Sep 2014 14:07:34 +0300 Subject: cfg80211: allow requesting SMPS mode on ap start Add feature bits to indicate device support for static-smps and dynamic-smps modes. Add a new NL80211_ATTR_SMPS_MODE attribue to allow configuring the smps mode to be used by the ap (e.g. configuring to ap to dynamic smps mode will reduce power consumption while having minor effect on throughput) Signed-off-by: Eliad Peller Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e5b8caf5e466..4b28dc07bcb1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1635,6 +1635,9 @@ enum nl80211_commands { * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds * (per second) (u16 attribute) * + * @NL80211_ATTR_SMPS_MODE: SMPS mode to use (ap mode). see + * &enum nl80211_smps_mode. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1985,6 +1988,8 @@ enum nl80211_attrs { NL80211_ATTR_USER_PRIO, NL80211_ATTR_ADMITTED_TIME, + NL80211_ATTR_SMPS_MODE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -4030,6 +4035,13 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_ACKTO_ESTIMATION: This driver supports dynamic ACK timeout * estimation (dynack). %NL80211_ATTR_WIPHY_DYN_ACK flag attribute is used * to enable dynack. + * @NL80211_FEATURE_STATIC_SMPS: Device supports static spatial + * multiplexing powersave, ie. can turn off all but one chain + * even on HT connections that should be using more chains. + * @NL80211_FEATURE_DYNAMIC_SMPS: Device supports dynamic spatial + * multiplexing powersave, ie. can turn off all but one chain + * and then wake the rest up as required after, for example, + * rts/cts handshake. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -4056,6 +4068,8 @@ enum nl80211_feature_flags { NL80211_FEATURE_QUIET = 1 << 21, NL80211_FEATURE_TX_POWER_INSERTION = 1 << 22, NL80211_FEATURE_ACKTO_ESTIMATION = 1 << 23, + NL80211_FEATURE_STATIC_SMPS = 1 << 24, + NL80211_FEATURE_DYNAMIC_SMPS = 1 << 25, }; /** @@ -4129,6 +4143,25 @@ enum nl80211_acl_policy { NL80211_ACL_POLICY_DENY_UNLESS_LISTED, }; +/** + * enum nl80211_smps_mode - SMPS mode + * + * Requested SMPS mode (for AP mode) + * + * @NL80211_SMPS_OFF: SMPS off (use all antennas). + * @NL80211_SMPS_STATIC: static SMPS (use a single antenna) + * @NL80211_SMPS_DYNAMIC: dynamic smps (start with a single antenna and + * turn on other antennas after CTS/RTS). + */ +enum nl80211_smps_mode { + NL80211_SMPS_OFF, + NL80211_SMPS_STATIC, + NL80211_SMPS_DYNAMIC, + + __NL80211_SMPS_AFTER_LAST, + NL80211_SMPS_MAX = __NL80211_SMPS_AFTER_LAST - 1 +}; + /** * enum nl80211_radar_event - type of radar event for DFS operation * -- cgit v1.2.3 From 39e393bb4f653d38aea40190e1aa9a49062eed4d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 11 Sep 2014 11:02:39 +0200 Subject: netfilter: nf_tables: add NFTA_MASQ_UNSPEC to nft_masq_attributes To keep this consistent with other nft_*_attributes. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index eeec0ae845ef..66d66dd3ff79 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -806,6 +806,7 @@ enum nft_nat_attributes { * @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) */ enum nft_masq_attributes { + NFTA_MASQ_UNSPEC, NFTA_MASQ_FLAGS, __NFTA_MASQ_MAX }; -- cgit v1.2.3 From 0e9871e3f79fd17c691b50a9669220c54ff084a2 Mon Sep 17 00:00:00 2001 From: Anton Danilov Date: Thu, 28 Aug 2014 10:11:27 +0400 Subject: netfilter: ipset: Add skbinfo extension kernel support in the ipset core. Skbinfo extension provides mapping of metainformation with lookup in the ipset tables. This patch defines the flags, the constants, the functions and the structures for the data type independent support of the extension. Note the firewall mark stores in the kernel structures as two 32bit values, but transfered through netlink as one 64bit value. Signed-off-by: Anton Danilov Signed-off-by: Jozsef Kadlecsik --- include/uapi/linux/netfilter/ipset/ip_set.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 78c2f2e79920..ca03119111a2 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -115,6 +115,9 @@ enum { IPSET_ATTR_BYTES, IPSET_ATTR_PACKETS, IPSET_ATTR_COMMENT, + IPSET_ATTR_SKBMARK, + IPSET_ATTR_SKBPRIO, + IPSET_ATTR_SKBQUEUE, __IPSET_ATTR_ADT_MAX, }; #define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) @@ -147,6 +150,7 @@ enum ipset_errno { IPSET_ERR_COUNTER, IPSET_ERR_COMMENT, IPSET_ERR_INVALID_MARKMASK, + IPSET_ERR_SKBINFO, /* Type specific error codes */ IPSET_ERR_TYPE_SPECIFIC = 4352, @@ -170,6 +174,12 @@ enum ipset_cmd_flags { IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS), IPSET_FLAG_BIT_RETURN_NOMATCH = 7, IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH), + IPSET_FLAG_BIT_MAP_SKBMARK = 8, + IPSET_FLAG_MAP_SKBMARK = (1 << IPSET_FLAG_BIT_MAP_SKBMARK), + IPSET_FLAG_BIT_MAP_SKBPRIO = 9, + IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO), + IPSET_FLAG_BIT_MAP_SKBQUEUE = 10, + IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE), IPSET_FLAG_CMD_MAX = 15, }; @@ -187,6 +197,8 @@ enum ipset_cadt_flags { IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT), IPSET_FLAG_BIT_WITH_FORCEADD = 5, IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD), + IPSET_FLAG_BIT_WITH_SKBINFO = 6, + IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO), IPSET_FLAG_CADT_MAX = 15, }; -- cgit v1.2.3 From 76cea4109ca89dea218fdc652d2e1535fd9b5fc7 Mon Sep 17 00:00:00 2001 From: Anton Danilov Date: Tue, 2 Sep 2014 14:21:20 +0400 Subject: netfilter: ipset: Add skbinfo extension support to SET target. Signed-off-by: Anton Danilov Signed-off-by: Jozsef Kadlecsik --- include/uapi/linux/netfilter/xt_set.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h index 964d3d42f874..d6a1df1f2947 100644 --- a/include/uapi/linux/netfilter/xt_set.h +++ b/include/uapi/linux/netfilter/xt_set.h @@ -71,4 +71,14 @@ struct xt_set_info_match_v3 { __u32 flags; }; +/* Revision 3 target */ + +struct xt_set_info_target_v3 { + struct xt_set_info add_set; + struct xt_set_info del_set; + struct xt_set_info map_set; + __u32 flags; + __u32 timeout; +}; + #endif /*_XT_SET_H*/ -- cgit v1.2.3 From 6cff339bbd5f9eda7a5e8a521f91a88d046e6d0c Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Tue, 9 Sep 2014 16:40:20 -0700 Subject: ipvs: Add destination address family to netlink interface This is necessary to support heterogeneous pools. For example, if you have an ipv6 addressed network, you'll want to be able to forward ipv4 traffic into it. This patch enforces that destination address family is the same as service family, as none of the forwarding mechanisms support anything else. For the old setsockopt mechanism, we simply set the dest address family to AF_INET as we do with the service. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/uapi/linux/ip_vs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index fbcffe8041f7..cabe95d5b461 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -384,6 +384,9 @@ enum { IPVS_DEST_ATTR_PERSIST_CONNS, /* persistent connections */ IPVS_DEST_ATTR_STATS, /* nested attribute for dest stats */ + + IPVS_DEST_ATTR_ADDR_FAMILY, /* Address family of address */ + __IPVS_DEST_ATTR_MAX, }; -- cgit v1.2.3 From 971427f353f3c42c8dcef62e7124440df68eb809 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Mon, 15 Sep 2014 19:37:25 -0700 Subject: openvswitch: Add recirc and hash action. Recirc action allows a packet to reenter openvswitch processing. currently openvswitch lookup flow for packet received and execute set of actions on that packet, with help of recirc action we can process/modify the packet and recirculate it back in openvswitch for another pass. OVS hash action calculates 5-tupple hash and set hash in flow-key hash. This can be used along with recirculation for distributing packets among different ports for bond devices. For example: OVS bonding can use following actions: Match on: bond flow; Action: hash, recirc(id) Match on: recirc-id == id and hash lower bits == a; Action: output port_bond_a Signed-off-by: Andy Zhou Acked-by: Jesse Gross Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index a794d1dd7b40..f7fc507d82ab 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -289,6 +289,9 @@ enum ovs_key_attr { OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */ OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */ OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */ + OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash + is not computed by the datapath. */ + OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */ #ifdef __KERNEL__ OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ @@ -493,6 +496,27 @@ struct ovs_action_push_vlan { __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ }; +/* Data path hash algorithm for computing Datapath hash. + * + * The algorithm type only specifies the fields in a flow + * will be used as part of the hash. Each datapath is free + * to use its own hash algorithm. The hash value will be + * opaque to the user space daemon. + */ +enum ovs_hash_alg { + OVS_HASH_ALG_L4, +}; + +/* + * struct ovs_action_hash - %OVS_ACTION_ATTR_HASH action argument. + * @hash_alg: Algorithm used to compute hash prior to recirculation. + * @hash_basis: basis used for computing hash. + */ +struct ovs_action_hash { + uint32_t hash_alg; /* One of ovs_hash_alg. */ + uint32_t hash_basis; +}; + /** * enum ovs_action_attr - Action types. * @@ -521,6 +545,8 @@ enum ovs_action_attr { OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */ OVS_ACTION_ATTR_POP_VLAN, /* No argument. */ OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ + OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */ + OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */ __OVS_ACTION_ATTR_MAX }; -- cgit v1.2.3 From 84d7fce693884897c6196cc98228a2ad56ae2a9a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 4 Sep 2014 14:30:22 +0200 Subject: netfilter: nf_tables: export rule-set generation ID This patch exposes the ruleset generation ID in three ways: 1) The new command NFT_MSG_GETGEN that exposes the 32-bits ruleset generation ID. This ID is incremented in every commit and it should be large enough to avoid wraparound problems. 2) The less significant 16-bits of the generation ID are exposed through the nfgenmsg->res_id header field. This allows us to quickly catch if the ruleset has change between two consecutive list dumps from different object lists (in this specific case I think the risk of wraparound is unlikely). 3) Userspace subscribers may receive notifications of new rule-set generation after every commit. This also provides an alternative way to monitor the generation ID. If the events are lost, the userspace process hits a overrun error, so it knows that it is working with a stale ruleset anyway. Patrick spotted that rule-set transformations in userspace may take quite some time. In that case, it annotates the 32-bits generation ID before fetching the rule-set, then: 1) it compares it to what we obtain after the transformation to make sure it is not working with a stale rule-set and no wraparound has ocurred. 2) it subscribes to ruleset notifications, so it can watch for new generation ID. This is complementary to the NLM_F_DUMP_INTR approach, which allows us to detect an interference in the middle one single list dumping. There is no way to explicitly check that an interference has occurred between two list dumps from the kernel, since it doesn't know how many lists the userspace client is actually going to dump. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 66d66dd3ff79..b72ccfeaf865 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -51,6 +51,8 @@ enum nft_verdicts { * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes) * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes) * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes) + * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes) + * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -68,6 +70,8 @@ enum nf_tables_msg_types { NFT_MSG_NEWSETELEM, NFT_MSG_GETSETELEM, NFT_MSG_DELSETELEM, + NFT_MSG_NEWGEN, + NFT_MSG_GETGEN, NFT_MSG_MAX, }; @@ -812,4 +816,16 @@ enum nft_masq_attributes { }; #define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1) +/** + * enum nft_gen_attributes - nf_tables ruleset generation attributes + * + * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32) + */ +enum nft_gen_attributes { + NFTA_GEN_UNSPEC, + NFTA_GEN_ID, + __NFTA_GEN_MAX +}; +#define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) + #endif /* _LINUX_NF_TABLES_H */ -- cgit v1.2.3 From 23461551c00628c3f3fe9cf837bf53cf8f212b63 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 17 Sep 2014 12:25:56 -0700 Subject: fou: Support for foo-over-udp RX path This patch provides a receive path for foo-over-udp. This allows direct encapsulation of IP protocols over UDP. The bound destination port is used to map to an IP protocol, and the XFRM framework (udp_encap_rcv) is used to receive encapsulated packets. Upon reception, the encapsulation header is logically removed (pointer to transport header is advanced) and the packet is reinjected into the receive path with the IP protocol indicated by the mapping. Netlink is used to configure FOU ports. The configuration information includes the port number to bind to and the IP protocol corresponding to that port. This should support GRE/UDP (http://tools.ietf.org/html/draft-yong-tsvwg-gre-in-udp-encap-02), as will as the other IP tunneling protocols (IPIP, SIT). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/fou.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/uapi/linux/fou.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h new file mode 100644 index 000000000000..e03376de453d --- /dev/null +++ b/include/uapi/linux/fou.h @@ -0,0 +1,32 @@ +/* fou.h - FOU Interface */ + +#ifndef _UAPI_LINUX_FOU_H +#define _UAPI_LINUX_FOU_H + +/* NETLINK_GENERIC related info + */ +#define FOU_GENL_NAME "fou" +#define FOU_GENL_VERSION 0x1 + +enum { + FOU_ATTR_UNSPEC, + FOU_ATTR_PORT, /* u16 */ + FOU_ATTR_AF, /* u8 */ + FOU_ATTR_IPPROTO, /* u8 */ + + __FOU_ATTR_MAX, +}; + +#define FOU_ATTR_MAX (__FOU_ATTR_MAX - 1) + +enum { + FOU_CMD_UNSPEC, + FOU_CMD_ADD, + FOU_CMD_DEL, + + __FOU_CMD_MAX, +}; + +#define FOU_CMD_MAX (__FOU_CMD_MAX - 1) + +#endif /* _UAPI_LINUX_FOU_H */ -- cgit v1.2.3 From 56328486539ddd07cbaafec7a542a2c8a3043623 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 17 Sep 2014 12:25:58 -0700 Subject: net: Changes to ip_tunnel to support foo-over-udp encapsulation This patch changes IP tunnel to support (secondary) encapsulation, Foo-over-UDP. Changes include: 1) Adding tun_hlen as the tunnel header length, encap_hlen as the encapsulation header length, and hlen becomes the grand total of these. 2) Added common netlink define to support FOU encapsulation. 3) Routines to perform FOU encapsulation. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 3bce9e9d9f7c..9fedca7d8dae 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -53,10 +53,22 @@ enum { IFLA_IPTUN_6RD_RELAY_PREFIX, IFLA_IPTUN_6RD_PREFIXLEN, IFLA_IPTUN_6RD_RELAY_PREFIXLEN, + IFLA_IPTUN_ENCAP_TYPE, + IFLA_IPTUN_ENCAP_FLAGS, + IFLA_IPTUN_ENCAP_SPORT, + IFLA_IPTUN_ENCAP_DPORT, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) +enum tunnel_encap_types { + TUNNEL_ENCAP_NONE, + TUNNEL_ENCAP_FOU, +}; + +#define TUNNEL_ENCAP_FLAG_CSUM (1<<0) +#define TUNNEL_ENCAP_FLAG_CSUM6 (1<<1) + /* SIT-mode i_flags */ #define SIT_ISATAP 0x0001 -- cgit v1.2.3 From 4565e9919cda747815547e2e5d7b78f15efbffdf Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 17 Sep 2014 12:26:01 -0700 Subject: gre: Setup and TX path for gre/UDP foo-over-udp encapsulation Added netlink attrs to configure FOU encapsulation for GRE, netlink handling of these flags, and properly adjust MTU for encapsulation. ip_tunnel_encap is called from ip_tunnel_xmit to actually perform FOU encapsulation. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 9fedca7d8dae..7c832afdfa94 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -106,6 +106,10 @@ enum { IFLA_GRE_ENCAP_LIMIT, IFLA_GRE_FLOWINFO, IFLA_GRE_FLAGS, + IFLA_GRE_ENCAP_TYPE, + IFLA_GRE_ENCAP_FLAGS, + IFLA_GRE_ENCAP_SPORT, + IFLA_GRE_ENCAP_DPORT, __IFLA_GRE_MAX, }; -- cgit v1.2.3 From 99c55f7d47c0dc6fc64729f37bf435abf43f4c60 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 26 Sep 2014 00:16:57 -0700 Subject: bpf: introduce BPF syscall and maps BPF syscall is a multiplexor for a range of different operations on eBPF. This patch introduces syscall with single command to create a map. Next patch adds commands to access maps. 'maps' is a generic storage of different types for sharing data between kernel and userspace. Userspace example: /* this syscall wrapper creates a map with given type and attributes * and returns map_fd on success. * use close(map_fd) to delete the map */ int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries) { union bpf_attr attr = { .map_type = map_type, .key_size = key_size, .value_size = value_size, .max_entries = max_entries }; return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } 'union bpf_attr' is backwards compatible with future extensions. More details in Documentation/networking/filter.txt and in manpage Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 479ed0b6be16..f58a10f9670c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -62,4 +62,27 @@ struct bpf_insn { __s32 imm; /* signed immediate constant */ }; +/* BPF syscall commands */ +enum bpf_cmd { + /* create a map with given type and attributes + * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size) + * returns fd or negative error + * map is deleted when fd is closed + */ + BPF_MAP_CREATE, +}; + +enum bpf_map_type { + BPF_MAP_TYPE_UNSPEC, +}; + +union bpf_attr { + struct { /* anonymous struct used by BPF_MAP_CREATE command */ + __u32 map_type; /* one of enum bpf_map_type */ + __u32 key_size; /* size of key in bytes */ + __u32 value_size; /* size of value in bytes */ + __u32 max_entries; /* max number of entries in a map */ + }; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From db20fd2b01087bdfbe30bce314a198eefedcc42e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 26 Sep 2014 00:16:59 -0700 Subject: bpf: add lookup/update/delete/iterate methods to BPF maps 'maps' is a generic storage of different types for sharing data between kernel and userspace. The maps are accessed from user space via BPF syscall, which has commands: - create a map with given type and attributes fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size) returns fd or negative error - lookup key in a given map referenced by fd err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->value returns zero and stores found elem into value or negative error - create or update key/value pair in a given map err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->value returns zero or negative error - find and delete element by key in a given map err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key - iterate map elements (based on input key return next_key) err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->next_key - close(fd) deletes the map Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f58a10f9670c..395cabd2ca0a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -70,6 +70,35 @@ enum bpf_cmd { * map is deleted when fd is closed */ BPF_MAP_CREATE, + + /* lookup key in a given map + * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size) + * Using attr->map_fd, attr->key, attr->value + * returns zero and stores found elem into value + * or negative error + */ + BPF_MAP_LOOKUP_ELEM, + + /* create or update key/value pair in a given map + * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) + * Using attr->map_fd, attr->key, attr->value + * returns zero or negative error + */ + BPF_MAP_UPDATE_ELEM, + + /* find and delete elem by key in a given map + * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size) + * Using attr->map_fd, attr->key + * returns zero or negative error + */ + BPF_MAP_DELETE_ELEM, + + /* lookup key in a given map and return next key + * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size) + * Using attr->map_fd, attr->key, attr->next_key + * returns zero and stores next key or negative error + */ + BPF_MAP_GET_NEXT_KEY, }; enum bpf_map_type { @@ -83,6 +112,15 @@ union bpf_attr { __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ }; + + struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + __u32 map_fd; + __aligned_u64 key; + union { + __aligned_u64 value; + __aligned_u64 next_key; + }; + }; } __attribute__((aligned(8))); #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 09756af46893c18839062976c3252e93a1beeba7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 26 Sep 2014 00:17:00 -0700 Subject: bpf: expand BPF syscall with program load/unload eBPF programs are similar to kernel modules. They are loaded by the user process and automatically unloaded when process exits. Each eBPF program is a safe run-to-completion set of instructions. eBPF verifier statically determines that the program terminates and is safe to execute. The following syscall wrapper can be used to load the program: int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, int insn_cnt, const char *license) { union bpf_attr attr = { .prog_type = prog_type, .insns = ptr_to_u64(insns), .insn_cnt = insn_cnt, .license = ptr_to_u64(license), }; return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } where 'insns' is an array of eBPF instructions and 'license' is a string that must be GPL compatible to call helper functions marked gpl_only Upon succesful load the syscall returns prog_fd. Use close(prog_fd) to unload the program. User space tests and examples follow in the later patches Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 395cabd2ca0a..424f442016e7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -99,12 +99,23 @@ enum bpf_cmd { * returns zero and stores next key or negative error */ BPF_MAP_GET_NEXT_KEY, + + /* verify and load eBPF program + * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size) + * Using attr->prog_type, attr->insns, attr->license + * returns fd or negative error + */ + BPF_PROG_LOAD, }; enum bpf_map_type { BPF_MAP_TYPE_UNSPEC, }; +enum bpf_prog_type { + BPF_PROG_TYPE_UNSPEC, +}; + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -121,6 +132,21 @@ union bpf_attr { __aligned_u64 next_key; }; }; + + struct { /* anonymous struct used by BPF_PROG_LOAD command */ + __u32 prog_type; /* one of enum bpf_prog_type */ + __u32 insn_cnt; + __aligned_u64 insns; + __aligned_u64 license; + }; } __attribute__((aligned(8))); +/* integer value in 'imm' field of BPF_CALL instruction selects which helper + * function eBPF program intends to call + */ +enum bpf_func_id { + BPF_FUNC_unspec, + __BPF_FUNC_MAX_ID, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From cbd357008604925355ae7b54a09137dabb81b580 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 26 Sep 2014 00:17:03 -0700 Subject: bpf: verifier (add ability to receive verification log) add optional attributes for BPF_PROG_LOAD syscall: union bpf_attr { struct { ... __u32 log_level; /* verbosity level of eBPF verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied 'char *buffer' */ }; }; when log_level > 0 the verifier will return its verification log in the user supplied buffer 'log_buf' which can be used by program author to analyze why verifier rejected given program. 'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt provides several examples of these messages, like the program: BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0), BPF_EXIT_INSN(), will be rejected with the following multi-line message in log_buf: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (b7) r1 = 0 4: (85) call 1 5: (15) if r0 == 0x0 goto pc+1 R0=map_ptr R10=fp 6: (7a) *(u64 *)(r0 +4) = 0 misaligned access off 4 size 8 The format of the output can change at any time as verifier evolves. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 424f442016e7..31b0ac208a52 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -138,6 +138,9 @@ union bpf_attr { __u32 insn_cnt; __aligned_u64 insns; __aligned_u64 license; + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ }; } __attribute__((aligned(8))); -- cgit v1.2.3 From e3118e8359bb7c59555aca60c725106e6d78c5ce Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 26 Sep 2014 22:37:36 +0200 Subject: net: tcp: add DCTCP congestion control algorithm This work adds the DataCenter TCP (DCTCP) congestion control algorithm [1], which has been first published at SIGCOMM 2010 [2], resp. follow-up analysis at SIGMETRICS 2011 [3] (and also, more recently as an informational IETF draft available at [4]). DCTCP is an enhancement to the TCP congestion control algorithm for data center networks. Typical data center workloads are i.e. i) partition/aggregate (queries; bursty, delay sensitive), ii) short messages e.g. 50KB-1MB (for coordination and control state; delay sensitive), and iii) large flows e.g. 1MB-100MB (data update; throughput sensitive). DCTCP has therefore been designed for such environments to provide/achieve the following three requirements: * High burst tolerance (incast due to partition/aggregate) * Low latency (short flows, queries) * High throughput (continuous data updates, large file transfers) with commodity, shallow buffered switches The basic idea of its design consists of two fundamentals: i) on the switch side, packets are being marked when its internal queue length > threshold K (K is chosen so that a large enough headroom for marked traffic is still available in the switch queue); ii) the sender/host side maintains a moving average of the fraction of marked packets, so each RTT, F is being updated as follows: F := X / Y, where X is # of marked ACKs, Y is total # of ACKs alpha := (1 - g) * alpha + g * F, where g is a smoothing constant The resulting alpha (iow: probability that switch queue is congested) is then being used in order to adaptively decrease the congestion window W: W := (1 - (alpha / 2)) * W The means for receiving marked packets resp. marking them on switch side in DCTCP is the use of ECN. RFC3168 describes a mechanism for using Explicit Congestion Notification from the switch for early detection of congestion, rather than waiting for segment loss to occur. However, this method only detects the presence of congestion, not the *extent*. In the presence of mild congestion, it reduces the TCP congestion window too aggressively and unnecessarily affects the throughput of long flows [4]. DCTCP, as mentioned, enhances Explicit Congestion Notification (ECN) processing to estimate the fraction of bytes that encounter congestion, rather than simply detecting that some congestion has occurred. DCTCP then scales the TCP congestion window based on this estimate [4], thus it can derive multibit feedback from the information present in the single-bit sequence of marks in its control law. And thus act in *proportion* to the extent of congestion, not its *presence*. Switches therefore set the Congestion Experienced (CE) codepoint in packets when internal queue lengths exceed threshold K. Resulting, DCTCP delivers the same or better throughput than normal TCP, while using 90% less buffer space. It was found in [2] that DCTCP enables the applications to handle 10x the current background traffic, without impacting foreground traffic. Moreover, a 10x increase in foreground traffic did not cause any timeouts, and thus largely eliminates TCP incast collapse problems. The algorithm itself has already seen deployments in large production data centers since then. We did a long-term stress-test and analysis in a data center, short summary of our TCP incast tests with iperf compared to cubic: This test measured DCTCP throughput and latency and compared it with CUBIC throughput and latency for an incast scenario. In this test, 19 senders sent at maximum rate to a single receiver. The receiver simply ran iperf -s. The senders ran iperf -c -t 30. All senders started simultaneously (using local clocks synchronized by ntp). This test was repeated multiple times. Below shows the results from a single test. Other tests are similar. (DCTCP results were extremely consistent, CUBIC results show some variance induced by the TCP timeouts that CUBIC encountered.) For this test, we report statistics on the number of TCP timeouts, flow throughput, and traffic latency. 1) Timeouts (total over all flows, and per flow summaries): CUBIC DCTCP Total 3227 25 Mean 169.842 1.316 Median 183 1 Max 207 5 Min 123 0 Stddev 28.991 1.600 Timeout data is taken by measuring the net change in netstat -s "other TCP timeouts" reported. As a result, the timeout measurements above are not restricted to the test traffic, and we believe that it is likely that all of the "DCTCP timeouts" are actually timeouts for non-test traffic. We report them nevertheless. CUBIC will also include some non-test timeouts, but they are drawfed by bona fide test traffic timeouts for CUBIC. Clearly DCTCP does an excellent job of preventing TCP timeouts. DCTCP reduces timeouts by at least two orders of magnitude and may well have eliminated them in this scenario. 2) Throughput (per flow in Mbps): CUBIC DCTCP Mean 521.684 521.895 Median 464 523 Max 776 527 Min 403 519 Stddev 105.891 2.601 Fairness 0.962 0.999 Throughput data was simply the average throughput for each flow reported by iperf. By avoiding TCP timeouts, DCTCP is able to achieve much better per-flow results. In CUBIC, many flows experience TCP timeouts which makes flow throughput unpredictable and unfair. DCTCP, on the other hand, provides very clean predictable throughput without incurring TCP timeouts. Thus, the standard deviation of CUBIC throughput is dramatically higher than the standard deviation of DCTCP throughput. Mean throughput is nearly identical because even though cubic flows suffer TCP timeouts, other flows will step in and fill the unused bandwidth. Note that this test is something of a best case scenario for incast under CUBIC: it allows other flows to fill in for flows experiencing a timeout. Under situations where the receiver is issuing requests and then waiting for all flows to complete, flows cannot fill in for timed out flows and throughput will drop dramatically. 3) Latency (in ms): CUBIC DCTCP Mean 4.0088 0.04219 Median 4.055 0.0395 Max 4.2 0.085 Min 3.32 0.028 Stddev 0.1666 0.01064 Latency for each protocol was computed by running "ping -i 0.2 " from a single sender to the receiver during the incast test. For DCTCP, "ping -Q 0x6 -i 0.2 " was used to ensure that traffic traversed the DCTCP queue and was not dropped when the queue size was greater than the marking threshold. The summary statistics above are over all ping metrics measured between the single sender, receiver pair. The latency results for this test show a dramatic difference between CUBIC and DCTCP. CUBIC intentionally overflows the switch buffer which incurs the maximum queue latency (more buffer memory will lead to high latency.) DCTCP, on the other hand, deliberately attempts to keep queue occupancy low. The result is a two orders of magnitude reduction of latency with DCTCP - even with a switch with relatively little RAM. Switches with larger amounts of RAM will incur increasing amounts of latency for CUBIC, but not for DCTCP. 4) Convergence and stability test: This test measured the time that DCTCP took to fairly redistribute bandwidth when a new flow commences. It also measured DCTCP's ability to remain stable at a fair bandwidth distribution. DCTCP is compared with CUBIC for this test. At the commencement of this test, a single flow is sending at maximum rate (near 10 Gbps) to a single receiver. One second after that first flow commences, a new flow from a distinct server begins sending to the same receiver as the first flow. After the second flow has sent data for 10 seconds, the second flow is terminated. The first flow sends for an additional second. Ideally, the bandwidth would be evenly shared as soon as the second flow starts, and recover as soon as it stops. The results of this test are shown below. Note that the flow bandwidth for the two flows was measured near the same time, but not simultaneously. DCTCP performs nearly perfectly within the measurement limitations of this test: bandwidth is quickly distributed fairly between the two flows, remains stable throughout the duration of the test, and recovers quickly. CUBIC, in contrast, is slow to divide the bandwidth fairly, and has trouble remaining stable. CUBIC DCTCP Seconds Flow 1 Flow 2 Seconds Flow 1 Flow 2 0 9.93 0 0 9.92 0 0.5 9.87 0 0.5 9.86 0 1 8.73 2.25 1 6.46 4.88 1.5 7.29 2.8 1.5 4.9 4.99 2 6.96 3.1 2 4.92 4.94 2.5 6.67 3.34 2.5 4.93 5 3 6.39 3.57 3 4.92 4.99 3.5 6.24 3.75 3.5 4.94 4.74 4 6 3.94 4 5.34 4.71 4.5 5.88 4.09 4.5 4.99 4.97 5 5.27 4.98 5 4.83 5.01 5.5 4.93 5.04 5.5 4.89 4.99 6 4.9 4.99 6 4.92 5.04 6.5 4.93 5.1 6.5 4.91 4.97 7 4.28 5.8 7 4.97 4.97 7.5 4.62 4.91 7.5 4.99 4.82 8 5.05 4.45 8 5.16 4.76 8.5 5.93 4.09 8.5 4.94 4.98 9 5.73 4.2 9 4.92 5.02 9.5 5.62 4.32 9.5 4.87 5.03 10 6.12 3.2 10 4.91 5.01 10.5 6.91 3.11 10.5 4.87 5.04 11 8.48 0 11 8.49 4.94 11.5 9.87 0 11.5 9.9 0 SYN/ACK ECT test: This test demonstrates the importance of ECT on SYN and SYN-ACK packets by measuring the connection probability in the presence of competing flows for a DCTCP connection attempt *without* ECT in the SYN packet. The test was repeated five times for each number of competing flows. Competing Flows 1 | 2 | 4 | 8 | 16 ------------------------------ Mean Connection Probability 1 | 0.67 | 0.45 | 0.28 | 0 Median Connection Probability 1 | 0.65 | 0.45 | 0.25 | 0 As the number of competing flows moves beyond 1, the connection probability drops rapidly. Enabling DCTCP with this patch requires the following steps: DCTCP must be running both on the sender and receiver side in your data center, i.e.: sysctl -w net.ipv4.tcp_congestion_control=dctcp Also, ECN functionality must be enabled on all switches in your data center for DCTCP to work. The default ECN marking threshold (K) heuristic on the switch for DCTCP is e.g., 20 packets (30KB) at 1Gbps, and 65 packets (~100KB) at 10Gbps (K > 1/7 * C * RTT, [4]). In above tests, for each switch port, traffic was segregated into two queues. For any packet with a DSCP of 0x01 - or equivalently a TOS of 0x04 - the packet was placed into the DCTCP queue. All other packets were placed into the default drop-tail queue. For the DCTCP queue, RED/ECN marking was enabled, here, with a marking threshold of 75 KB. More details however, we refer you to the paper [2] under section 3). There are no code changes required to applications running in user space. DCTCP has been implemented in full *isolation* of the rest of the TCP code as its own congestion control module, so that it can run without a need to expose code to the core of the TCP stack, and thus nothing changes for non-DCTCP users. Changes in the CA framework code are minimal, and DCTCP algorithm operates on mechanisms that are already available in most Silicon. The gain (dctcp_shift_g) is currently a fixed constant (1/16) from the paper, but we leave the option that it can be chosen carefully to a different value by the user. In case DCTCP is being used and ECN support on peer site is off, DCTCP falls back after 3WHS to operate in normal TCP Reno mode. ss {-4,-6} -t -i diag interface: ... dctcp wscale:7,7 rto:203 rtt:2.349/0.026 mss:1448 cwnd:2054 ssthresh:1102 ce_state 0 alpha 15 ab_ecn 0 ab_tot 735584 send 10129.2Mbps pacing_rate 20254.1Mbps unacked:1822 retrans:0/15 reordering:101 rcv_space:29200 ... dctcp-reno wscale:7,7 rto:201 rtt:0.711/1.327 ato:40 mss:1448 cwnd:10 ssthresh:1102 fallback_mode send 162.9Mbps pacing_rate 325.5Mbps rcv_rtt:1.5 rcv_space:29200 More information about DCTCP can be found in [1-4]. [1] http://simula.stanford.edu/~alizade/Site/DCTCP.html [2] http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf [3] http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp_analysis-full.pdf [4] http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-00 Joint work with Florian Westphal and Glenn Judd. Signed-off-by: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: Glenn Judd Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index bbde90fa5838..d65c0a09efd3 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -110,10 +110,10 @@ enum { INET_DIAG_TCLASS, INET_DIAG_SKMEMINFO, INET_DIAG_SHUTDOWN, + INET_DIAG_DCTCPINFO, }; -#define INET_DIAG_MAX INET_DIAG_SHUTDOWN - +#define INET_DIAG_MAX INET_DIAG_DCTCPINFO /* INET_DIAG_MEM */ @@ -133,5 +133,14 @@ struct tcpvegas_info { __u32 tcpv_minrtt; }; +/* INET_DIAG_DCTCPINFO */ + +struct tcp_dctcp_info { + __u16 dctcp_enabled; + __u16 dctcp_ce_state; + __u32 dctcp_alpha; + __u32 dctcp_ab_ecn; + __u32 dctcp_ab_tot; +}; #endif /* _UAPI_INET_DIAG_H_ */ -- cgit v1.2.3 From 79cf79abce71eb7dbc40e2f3121048ca5405cb47 Mon Sep 17 00:00:00 2001 From: Michael Braun Date: Thu, 25 Sep 2014 16:31:08 +0200 Subject: macvlan: add source mode This patch adds a new mode of operation to macvlan, called "source". It allows one to set a list of allowed mac address, which is used to match against source mac address from received frames on underlying interface. This enables creating mac based VLAN associations, instead of standard port or tag based. The feature is useful to deploy 802.1x mac based behavior, where drivers of underlying interfaces doesn't allows that. Configuration is done through the netlink interface using e.g.: ip link add link eth0 name macvlan0 type macvlan mode source ip link add link eth0 name macvlan1 type macvlan mode source ip link set link dev macvlan0 type macvlan macaddr add 00:11:11:11:11:11 ip link set link dev macvlan0 type macvlan macaddr add 00:22:22:22:22:22 ip link set link dev macvlan0 type macvlan macaddr add 00:33:33:33:33:33 ip link set link dev macvlan1 type macvlan macaddr add 00:33:33:33:33:33 ip link set link dev macvlan1 type macvlan macaddr add 00:44:44:44:44:44 This allows clients with MAC addresses 00:11:11:11:11:11, 00:22:22:22:22:22 to be part of only VLAN associated with macvlan0 interface. Clients with MAC addresses 00:44:44:44:44:44 with only VLAN associated with macvlan1 interface. And client with MAC address 00:33:33:33:33:33 to be associated with both VLANs. Based on work of Stefan Gula v8: last version of Stefan Gula for Kernel 3.2.1 v9: rework onto linux-next 2014-03-12 by Michael Braun add MACADDR_SET command, enable to configure mac for source mode while creating interface v10: - reduce indention level - rename source_list to source_entry - use aligned 64bit ether address - use hash_64 instead of addr[5] v11: - rebase for 3.14 / linux-next 20.04.2014 v12 - rebase for linux-next 2014-09-25 Signed-off-by: Michael Braun Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index c80f95f6ee78..0bdb77e16875 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -303,6 +303,10 @@ enum { IFLA_MACVLAN_UNSPEC, IFLA_MACVLAN_MODE, IFLA_MACVLAN_FLAGS, + IFLA_MACVLAN_MACADDR_MODE, + IFLA_MACVLAN_MACADDR, + IFLA_MACVLAN_MACADDR_DATA, + IFLA_MACVLAN_MACADDR_COUNT, __IFLA_MACVLAN_MAX, }; @@ -313,6 +317,14 @@ enum macvlan_mode { MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */ MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */ MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */ + MACVLAN_MODE_SOURCE = 16,/* use source MAC address list to assign */ +}; + +enum macvlan_macaddr_mode { + MACVLAN_MACADDR_ADD, + MACVLAN_MACADDR_DEL, + MACVLAN_MACADDR_FLUSH, + MACVLAN_MACADDR_SET, }; #define MACVLAN_FLAG_NOPROMISC 1 -- cgit v1.2.3 From 51b0a5d8c21a91801bbef9bcc8639dc0b206c6cd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 26 Sep 2014 14:35:14 +0200 Subject: netfilter: nft_reject: introduce icmp code abstraction for inet and bridge This patch introduces the NFT_REJECT_ICMPX_UNREACH type which provides an abstraction to the ICMP and ICMPv6 codes that you can use from the inet and bridge tables, they are: * NFT_REJECT_ICMPX_NO_ROUTE: no route to host - network unreachable * NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable * NFT_REJECT_ICMPX_HOST_UNREACH: host unreachable * NFT_REJECT_ICMPX_ADMIN_PROHIBITED: administratevely prohibited You can still use the specific codes when restricting the rule to match the corresponding layer 3 protocol. I decided to not overload the existing NFT_REJECT_ICMP_UNREACH to have different semantics depending on the table family and to allow the user to specify ICMP family specific codes if they restrict it to the corresponding family. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b72ccfeaf865..c26df6787fb0 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -749,12 +749,33 @@ enum nft_queue_attributes { * * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable * @NFT_REJECT_TCP_RST: reject using TCP RST + * @NFT_REJECT_ICMPX_UNREACH: abstracted ICMP unreachable for bridge and inet */ enum nft_reject_types { NFT_REJECT_ICMP_UNREACH, NFT_REJECT_TCP_RST, + NFT_REJECT_ICMPX_UNREACH, }; +/** + * enum nft_reject_code - Generic reject codes for IPv4/IPv6 + * + * @NFT_REJECT_ICMPX_NO_ROUTE: no route to host / network unreachable + * @NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable + * @NFT_REJECT_ICMPX_HOST_UNREACH: host unreachable + * @NFT_REJECT_ICMPX_ADMIN_PROHIBITED: administratively prohibited + * + * These codes are mapped to real ICMP and ICMPv6 codes. + */ +enum nft_reject_inet_code { + NFT_REJECT_ICMPX_NO_ROUTE = 0, + NFT_REJECT_ICMPX_PORT_UNREACH, + NFT_REJECT_ICMPX_HOST_UNREACH, + NFT_REJECT_ICMPX_ADMIN_PROHIBITED, + __NFT_REJECT_ICMPX_MAX +}; +#define NFT_REJECT_ICMPX_MAX (__NFT_REJECT_ICMPX_MAX + 1) + /** * enum nft_reject_attributes - nf_tables reject expression netlink attributes * -- cgit v1.2.3 From dba4b74d2da8798626e2b702ad3f452671e335f7 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Wed, 1 Oct 2014 15:05:25 +0300 Subject: wil6210: atomic I/O for the card memory Introduce netdev IOCTLs, to be used by the debug tools. Allows to read/write single dword value or memory block, aligned to dword Different address modes supported: - BAR offset - Firmware "linker" address - target's AHB bus Signed-off-by: Vladimir Kondratiev Signed-off-by: John W. Linville --- include/uapi/linux/wil6210_uapi.h | 87 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 include/uapi/linux/wil6210_uapi.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/wil6210_uapi.h b/include/uapi/linux/wil6210_uapi.h new file mode 100644 index 000000000000..6a3cddd156c4 --- /dev/null +++ b/include/uapi/linux/wil6210_uapi.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2014 Qualcomm Atheros, Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __WIL6210_UAPI_H__ +#define __WIL6210_UAPI_H__ + +#if !defined(__KERNEL__) +#define __user +#endif + +#include + +/* Numbers SIOCDEVPRIVATE and SIOCDEVPRIVATE + 1 + * are used by Android devices to implement PNO (preferred network offload). + * Albeit it is temporary solution, use different numbers to avoid conflicts + */ + +/** + * Perform 32-bit I/O operation to the card memory + * + * User code should arrange data in memory like this: + * + * struct wil_memio io; + * struct ifreq ifr = { + * .ifr_data = &io, + * }; + */ +#define WIL_IOCTL_MEMIO (SIOCDEVPRIVATE + 2) + +/** + * Perform block I/O operation to the card memory + * + * User code should arrange data in memory like this: + * + * void *buf; + * struct wil_memio_block io = { + * .block = buf, + * }; + * struct ifreq ifr = { + * .ifr_data = &io, + * }; + */ +#define WIL_IOCTL_MEMIO_BLOCK (SIOCDEVPRIVATE + 3) + +/** + * operation to perform + * + * @wil_mmio_op_mask - bits defining operation, + * @wil_mmio_addr_mask - bits defining addressing mode + */ +enum wil_memio_op { + wil_mmio_read = 0, + wil_mmio_write = 1, + wil_mmio_op_mask = 0xff, + wil_mmio_addr_linker = 0 << 8, + wil_mmio_addr_ahb = 1 << 8, + wil_mmio_addr_bar = 2 << 8, + wil_mmio_addr_mask = 0xff00, +}; + +struct wil_memio { + uint32_t op; /* enum wil_memio_op */ + uint32_t addr; /* should be 32-bit aligned */ + uint32_t val; +}; + +struct wil_memio_block { + uint32_t op; /* enum wil_memio_op */ + uint32_t addr; /* should be 32-bit aligned */ + uint32_t size; /* should be multiple of 4 */ + void __user *block; /* block address */ +}; + +#endif /* __WIL6210_UAPI_H__ */ -- cgit v1.2.3 From 37dd0247797b168ad1cc7f5dbec825a1ee66535b Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 3 Oct 2014 15:48:09 -0700 Subject: gue: Receive side for Generic UDP Encapsulation This patch adds support receiving for GUE packets in the fou module. The fou module now supports direct foo-over-udp (no encapsulation header) and GUE. To support this a type parameter is added to the fou netlink parameters. For a GUE socket we define gue_udp_recv, gue_gro_receive, and gue_gro_complete to handle the specifics of the GUE protocol. Most of the code to manage and configure sockets is common with the fou. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/fou.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h index e03376de453d..8df06894da23 100644 --- a/include/uapi/linux/fou.h +++ b/include/uapi/linux/fou.h @@ -13,6 +13,7 @@ enum { FOU_ATTR_PORT, /* u16 */ FOU_ATTR_AF, /* u8 */ FOU_ATTR_IPPROTO, /* u8 */ + FOU_ATTR_TYPE, /* u8 */ __FOU_ATTR_MAX, }; @@ -27,6 +28,12 @@ enum { __FOU_CMD_MAX, }; +enum { + FOU_ENCAP_UNSPEC, + FOU_ENCAP_DIRECT, + FOU_ENCAP_GUE, +}; + #define FOU_CMD_MAX (__FOU_CMD_MAX - 1) #endif /* _UAPI_LINUX_FOU_H */ -- cgit v1.2.3 From bc1fc390e1728672b5b343b85185fcc1fe41043b Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 3 Oct 2014 15:48:10 -0700 Subject: ip_tunnel: Add GUE support This patch allows configuring IPIP, sit, and GRE tunnels to use GUE. This is very similar to fou excpet that we need to insert the GUE header in addition to the UDP header on transmit. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 7c832afdfa94..280d9e092283 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -64,6 +64,7 @@ enum { enum tunnel_encap_types { TUNNEL_ENCAP_NONE, TUNNEL_ENCAP_FOU, + TUNNEL_ENCAP_GUE, }; #define TUNNEL_ENCAP_FLAG_CSUM (1<<0) -- cgit v1.2.3 From 67fa034194bf82a3d5ca841759d921297daa63ca Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 3 Oct 2014 15:35:30 -0700 Subject: openvswitch: Add support for matching on OAM packets. Some tunnel formats have mechanisms for indicating that packets are OAM frames that should be handled specially (either as high priority or not forwarded beyond an endpoint). This provides support for allowing those types of packets to be matched. Signed-off-by: Jesse Gross Signed-off-by: Andy Zhou Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index f7fc507d82ab..7c06106f5af5 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -309,6 +309,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */ OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ + OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */ __OVS_TUNNEL_KEY_ATTR_MAX }; -- cgit v1.2.3 From f0b128c1e2cc33ad104daf0f51a51e34f7763c5f Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 3 Oct 2014 15:35:31 -0700 Subject: openvswitch: Wrap struct ovs_key_ipv4_tunnel in a new structure. Currently, the flow information that is matched for tunnels and the tunnel data passed around with packets is the same. However, as additional information is added this is not necessarily desirable, as in the case of pointers. This adds a new structure for tunnel metadata which currently contains only the existing struct. This change is purely internal to the kernel since the current OVS_KEY_ATTR_IPV4_TUNNEL is simply a compressed version of OVS_KEY_ATTR_TUNNEL that is translated at flow setup. Signed-off-by: Jesse Gross Signed-off-by: Andy Zhou Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 7c06106f5af5..6753032832e2 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -294,7 +294,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */ #ifdef __KERNEL__ - OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ + OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */ #endif __OVS_KEY_ATTR_MAX }; -- cgit v1.2.3 From f5796684069e0c71c65bce6a6d4766114aec1396 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 3 Oct 2014 15:35:33 -0700 Subject: openvswitch: Add support for Geneve tunneling. The Openvswitch implementation is completely agnostic to the options that are in use and can handle newly defined options without further work. It does this by simply matching on a byte array of options and allowing userspace to setup flows on this array. Signed-off-by: Jesse Gross Singed-off-by: Ansis Atteka Signed-off-by: Andy Zhou Acked-by: Thomas Graf Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 6753032832e2..435eabc5ffaa 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -192,6 +192,7 @@ enum ovs_vport_type { OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ OVS_VPORT_TYPE_GRE, /* GRE tunnel. */ OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel. */ + OVS_VPORT_TYPE_GENEVE, /* Geneve tunnel. */ __OVS_VPORT_TYPE_MAX }; @@ -310,6 +311,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */ + OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */ __OVS_TUNNEL_KEY_ATTR_MAX }; -- cgit v1.2.3 From 1255a5055449781a92076fc5429952f2b33cf309 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 5 Oct 2014 12:35:21 +0300 Subject: ethtool: Ethtool parameter to dynamically change tx_copybreak Use new ethtool [sg]et_tunable() to set tx_copybread (inline threshold) Signed-off-by: Eric Dumazet Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 7a364f2f3d3f..99b43056a6fe 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -212,6 +212,7 @@ struct ethtool_value { enum tunable_id { ETHTOOL_ID_UNSPEC, ETHTOOL_RX_COPYBREAK, + ETHTOOL_TX_COPYBREAK, }; enum tunable_type_id { -- cgit v1.2.3