From 53c3fa206415d8a3f8b2a4f77689ea044c4a9c65 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 9 Aug 2010 13:41:07 +0000 Subject: net/sock.h: add missing kernel-doc notation Add missing kernel-doc notation to struct sock: Warning(include/net/sock.h:324): No description found for parameter 'sk_peer_pid' Warning(include/net/sock.h:324): No description found for parameter 'sk_peer_cred' Warning(include/net/sock.h:324): No description found for parameter 'sk_classid' Warning(include/net/sock.h:324): Excess struct/union/enum/typedef member 'sk_peercred' description in 'sock' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/net/sock.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index a441c9cdd625..ac53bfbdfe16 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -195,7 +195,8 @@ struct sock_common { * @sk_priority: %SO_PRIORITY setting * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family - * @sk_peercred: %SO_PEERCRED setting + * @sk_peer_pid: &struct pid for this socket's peer + * @sk_peer_cred: %SO_PEERCRED setting * @sk_rcvlowat: %SO_RCVLOWAT setting * @sk_rcvtimeo: %SO_RCVTIMEO setting * @sk_sndtimeo: %SO_SNDTIMEO setting @@ -211,6 +212,7 @@ struct sock_common { * @sk_send_head: front of stuff to transmit * @sk_security: used by security modules * @sk_mark: generic packet mark + * @sk_classid: this socket's cgroup classid * @sk_write_pending: a write to stream socket waits to start * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed -- cgit v1.2.3 From fa235562fbde8703aabeeedfa0772f08608d1542 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 5 Aug 2010 15:54:20 -0700 Subject: Bluetooth: Change default L2CAP ERTM retransmit timeout The L2CAP specification requires that the ERTM retransmit timeout be at least 2 seconds for BR/EDR connections. Signed-off-by: Mat Martineau Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 636724b203ee..16e412f3722b 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -33,7 +33,7 @@ #define L2CAP_DEFAULT_FLUSH_TO 0xffff #define L2CAP_DEFAULT_TX_WINDOW 63 #define L2CAP_DEFAULT_MAX_TX 3 -#define L2CAP_DEFAULT_RETRANS_TO 1000 /* 1 second */ +#define L2CAP_DEFAULT_RETRANS_TO 2000 /* 2 seconds */ #define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */ #define L2CAP_DEFAULT_MAX_PDU_SIZE 672 #define L2CAP_DEFAULT_ACK_TO 200 -- cgit v1.2.3 From db12d647ccc971ed120199dc7ea5be2b5887d328 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 5 Aug 2010 15:54:27 -0700 Subject: Bluetooth: Use 3-DH5 payload size for default ERTM max PDU size The previous value of 672 for L2CAP_DEFAULT_MAX_PDU_SIZE is based on the default L2CAP MTU. That default MTU is calculated from the size of two DH5 packets, minus ACL and L2CAP b-frame header overhead. ERTM is used with newer basebands that typically support larger 3-DH5 packets, and i-frames and s-frames have more header overhead. With clean RF conditions, basebands will typically attempt to use 1021-byte 3-DH5 packets for maximum throughput. Adjusting for 2 bytes of ACL headers plus 10 bytes of worst-case L2CAP headers yields 1009 bytes of payload. This PDU size imposes less overhead for header bytes and gives the baseband the option to choose 3-DH5 packets, but is small enough for ERTM traffic to interleave well with other L2CAP or SCO data. 672-byte payloads do not allow the most efficient over-the-air packet choice, and cannot achieve maximum throughput over BR/EDR. Signed-off-by: Mat Martineau Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 16e412f3722b..6c241444f902 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -35,7 +35,7 @@ #define L2CAP_DEFAULT_MAX_TX 3 #define L2CAP_DEFAULT_RETRANS_TO 2000 /* 2 seconds */ #define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */ -#define L2CAP_DEFAULT_MAX_PDU_SIZE 672 +#define L2CAP_DEFAULT_MAX_PDU_SIZE 1009 /* Sized for 3-DH5 packet */ #define L2CAP_DEFAULT_ACK_TO 200 #define L2CAP_LOCAL_BUSY_TRIES 12 -- cgit v1.2.3 From 4e6cbfd09c66893e5134c9896e9af353c2322b66 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Thu, 29 Jul 2010 16:14:13 -0400 Subject: mac80211: support use of NAPI for bottom-half processing This patch implement basic infrastructure to support use of NAPI by mac80211-based hardware drivers. Because mac80211 devices can support multiple netdevs, a dummy netdev is used for interfacing with the NAPI code in the core of the network stack. That structure is hidden from the hardware drivers, but the actual napi_struct is exposed in the ieee80211_hw structure so that the poll routines in drivers can retrieve that structure. Hardware drivers can also specify their own weight value for NAPI polling. Signed-off-by: John W. Linville --- include/net/mac80211.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b0787a1dea90..3f1e03b521ec 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1102,6 +1102,10 @@ enum ieee80211_hw_flags { * * @max_rates: maximum number of alternate rate retry stages * @max_rate_tries: maximum number of tries for each stage + * + * @napi_weight: weight used for NAPI polling. You must specify an + * appropriate value here if a napi_poll operation is provided + * by your driver. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -1113,6 +1117,7 @@ struct ieee80211_hw { int channel_change_time; int vif_data_size; int sta_data_size; + int napi_weight; u16 queues; u16 max_listen_interval; s8 max_signal; @@ -1687,6 +1692,8 @@ enum ieee80211_ampdu_mlme_action { * switch operation for CSAs received from the AP may implement this * callback. They must then call ieee80211_chswitch_done() to indicate * completion of the channel switch. + * + * @napi_poll: Poll Rx queue for incoming data frames. */ struct ieee80211_ops { int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -1752,6 +1759,7 @@ struct ieee80211_ops { void (*flush)(struct ieee80211_hw *hw, bool drop); void (*channel_switch)(struct ieee80211_hw *hw, struct ieee80211_channel_switch *ch_switch); + int (*napi_poll)(struct ieee80211_hw *hw, int budget); }; /** @@ -1897,6 +1905,22 @@ void ieee80211_free_hw(struct ieee80211_hw *hw); */ void ieee80211_restart_hw(struct ieee80211_hw *hw); +/** ieee80211_napi_schedule - schedule NAPI poll + * + * Use this function to schedule NAPI polling on a device. + * + * @hw: the hardware to start polling + */ +void ieee80211_napi_schedule(struct ieee80211_hw *hw); + +/** ieee80211_napi_complete - complete NAPI polling + * + * Use this function to finish NAPI polling on a device. + * + * @hw: the hardware to stop polling + */ +void ieee80211_napi_complete(struct ieee80211_hw *hw); + /** * ieee80211_rx - receive frame * -- cgit v1.2.3 From 7da7cc1d42d8ce02cca16df8c021e6d657f1f8fd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:02:38 +0200 Subject: mac80211: per interface idle notification Sometimes we don't just need to know whether or not the device is idle, but also per interface. This adds that reporting capability to mac80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3f1e03b521ec..3a3c26f647b7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -149,6 +149,7 @@ struct ieee80211_low_level_stats { * @BSS_CHANGED_ARP_FILTER: Hardware ARP filter address list or state changed. * @BSS_CHANGED_QOS: QoS for this association was enabled/disabled. Note * that it is only ever disabled for station mode. + * @BSS_CHANGED_IDLE: Idle changed for this BSS/interface. */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -165,6 +166,7 @@ enum ieee80211_bss_change { BSS_CHANGED_IBSS = 1<<11, BSS_CHANGED_ARP_FILTER = 1<<12, BSS_CHANGED_QOS = 1<<13, + BSS_CHANGED_IDLE = 1<<14, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -223,6 +225,9 @@ enum ieee80211_bss_change { * hardware must not perform any ARP filtering. Note, that the filter will * be enabled also in promiscuous mode. * @qos: This is a QoS-enabled BSS. + * @idle: This interface is idle. There's also a global idle flag in the + * hardware config which may be more appropriate depending on what + * your driver/device needs to do. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -247,6 +252,7 @@ struct ieee80211_bss_conf { u8 arp_addr_cnt; bool arp_filter_enabled; bool qos; + bool idle; }; /** -- cgit v1.2.3 From d1f5b7a34aa5ff703c4966ea2652d4212ac75940 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:05:55 +0200 Subject: mac80211: allow drivers to request SM PS mode change Sometimes drivers have more information than the stack about how their antennas/chains are used, and may require that the SM PS mode be changed. This could happen, for example, when detecting that the user disconnected an antenna. Thus this patch introduces API to allow drivers to request SM PS mode changes. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3a3c26f647b7..871ed1de736a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2548,6 +2548,18 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, */ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); +/** + * ieee80211_request_smps - request SM PS transition + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @mode: new SM PS mode + * + * This allows the driver to request an SM PS transition in managed + * mode. This is useful when the driver has more information than + * the stack about possible interference, for example by bluetooth. + */ +void ieee80211_request_smps(struct ieee80211_vif *vif, + enum ieee80211_smps_mode smps_mode); + /* Rate control API */ /** -- cgit v1.2.3 From 97359d1235eaf634fe706c9faa6e40181cc95fb8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Aug 2010 09:46:38 +0200 Subject: mac80211: use cipher suite selectors Currently, mac80211 translates the cfg80211 cipher suite selectors into ALG_* values. That isn't all too useful, and some drivers benefit from the distinction between WEP40 and WEP104 as well. Therefore, convert it all to use the cipher suite selectors. Signed-off-by: Johannes Berg Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville --- include/net/mac80211.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 871ed1de736a..914c747ac3a0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -788,20 +788,6 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) return false; } -/** - * enum ieee80211_key_alg - key algorithm - * @ALG_WEP: WEP40 or WEP104 - * @ALG_TKIP: TKIP - * @ALG_CCMP: CCMP (AES) - * @ALG_AES_CMAC: AES-128-CMAC - */ -enum ieee80211_key_alg { - ALG_WEP, - ALG_TKIP, - ALG_CCMP, - ALG_AES_CMAC, -}; - /** * enum ieee80211_key_flags - key flags * @@ -839,7 +825,7 @@ enum ieee80211_key_flags { * @hw_key_idx: To be set by the driver, this is the key index the driver * wants to be given when a frame is transmitted and needs to be * encrypted in hardware. - * @alg: The key algorithm. + * @cipher: The key's cipher suite selector. * @flags: key flags, see &enum ieee80211_key_flags. * @keyidx: the key index (0-3) * @keylen: key material length @@ -852,7 +838,7 @@ enum ieee80211_key_flags { * @iv_len: The IV length for this key type */ struct ieee80211_key_conf { - enum ieee80211_key_alg alg; + u32 cipher; u8 icv_len; u8 iv_len; u8 hw_key_idx; -- cgit v1.2.3 From 2244d07bfa2097cb00600da91c715a8aa547917e Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 17 Aug 2010 08:59:14 +0000 Subject: net: simplify flags for tx timestamping This patch removes the abstraction introduced by the union skb_shared_tx in the shared skb data. The access of the different union elements at several places led to some confusion about accessing the shared tx_flags e.g. in skb_orphan_try(). http://marc.info/?l=linux-netdev&m=128084897415886&w=2 Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- include/net/sock.h | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 890f9725d681..7691aca133db 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -53,7 +53,7 @@ struct ipcm_cookie { __be32 addr; int oif; struct ip_options *opt; - union skb_shared_tx shtx; + __u8 tx_flags; }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) diff --git a/include/net/sock.h b/include/net/sock.h index ac53bfbdfe16..100e43bf95fb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1669,17 +1669,13 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped - * @msg: outgoing packet * @sk: socket sending this packet - * @shtx: filled with instructions for time stamping + * @tx_flags: filled with instructions for time stamping * * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if * parameters are invalid. */ -extern int sock_tx_timestamp(struct msghdr *msg, - struct sock *sk, - union skb_shared_tx *shtx); - +extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed -- cgit v1.2.3 From d34a16661ed0fed433c9469d7cfa3ca4d30ca42e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 14 Jun 2010 17:06:21 -0700 Subject: net: convert to rcu_dereference_index_check() The task_cls_classid() function applies rcu_dereference() to integers, which does not work with the shiny new sparse-based checking in rcu_dereference(). This commit therefore moves to the new RCU API rcu_dereference_index_check(). Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: David S. Miller Acked-by: Herbert Xu --- include/net/cls_cgroup.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 726cc3536409..dd1fdb8293f5 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -45,7 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p) return 0; rcu_read_lock(); - id = rcu_dereference(net_cls_subsys_id); + id = rcu_dereference_index_check(net_cls_subsys_id, + rcu_read_lock_held()); if (id >= 0) classid = container_of(task_subsys_state(p, id), struct cgroup_cls_state, css)->classid; -- cgit v1.2.3 From 0906a372f2aa0fec1e59bd12b896883b6e41307a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 9 Mar 2010 20:59:15 +0100 Subject: net/netfilter: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: Patrick McHardy Cc: "David S. Miller" Cc: Eric Dumazet Reviewed-by: Josh Triplett --- include/net/netfilter/nf_conntrack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index e624dae54fa4..caf17db87dbc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -75,7 +75,7 @@ struct nf_conntrack_helper; /* nf_conn feature for connections that have a helper */ struct nf_conn_help { /* Helper. if any */ - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper __rcu *helper; union nf_conntrack_help help; -- cgit v1.2.3 From eb4d40654505e47aa9d2035bb97f631fa61d14b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20Baron?= Date: Wed, 18 Aug 2010 13:10:35 +0000 Subject: net/sched: add ACT_CSUM action to update packets checksums net/sched: add ACT_CSUM action to update packets checksums ACT_CSUM can be called just after ACT_PEDIT in order to re-compute some altered checksums in IPv4 and IPv6 packets. The following checksums are supported by this patch: - IPv4: IPv4 header, ICMP, IGMP, TCP, UDP & UDPLite - IPv6: ICMPv6, TCP, UDP & UDPLite It's possible to request in the same action to update different kind of checksums, if the packets flow mix TCP, UDP and UDPLite, ... An example of usage is done in the associated iproute2 patch. Version 3 changes: - remove useless goto instructions - improve IPv6 hop options decoding Version 2 changes: - coding style correction - remove useless arguments of some functions - use stack in tcf_csum_dump() - add tcf_csum_skb_nextlayer() to factor code Signed-off-by: Gregoire Baron Acked-by: jamal Signed-off-by: David S. Miller --- include/net/tc_act/tc_csum.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/net/tc_act/tc_csum.h (limited to 'include/net') diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h new file mode 100644 index 000000000000..9e8710be7a04 --- /dev/null +++ b/include/net/tc_act/tc_csum.h @@ -0,0 +1,15 @@ +#ifndef __NET_TC_CSUM_H +#define __NET_TC_CSUM_H + +#include +#include + +struct tcf_csum { + struct tcf_common common; + + u32 update_flags; +}; +#define to_tcf_csum(pc) \ + container_of(pc,struct tcf_csum,common) + +#endif /* __NET_TC_CSUM_H */ -- cgit v1.2.3 From 00959ade36acadc00e757f87060bf6e4501d545f Mon Sep 17 00:00:00 2001 From: Dmitry Kozlov Date: Sat, 21 Aug 2010 23:05:39 -0700 Subject: PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol) PPP: introduce "pptp" module which implements point-to-point tunneling protocol using pppox framework NET: introduce the "gre" module for demultiplexing GRE packets on version criteria (required to pptp and ip_gre may coexists) NET: ip_gre: update to use the "gre" module This patch introduces then pptp support to the linux kernel which dramatically speeds up pptp vpn connections and decreases cpu usage in comparison of existing user-space implementation (poptop/pptpclient). There is accel-pptp project (https://sourceforge.net/projects/accel-pptp/) to utilize this module, it contains plugin for pppd to use pptp in client-mode and modified pptpd (poptop) to build high-performance pptp NAS. There was many changes from initial submitted patch, most important are: 1. using rcu instead of read-write locks 2. using static bitmap instead of dynamically allocated 3. using vmalloc for memory allocation instead of BITS_PER_LONG + __get_free_pages 4. fixed many coding style issues Thanks to Eric Dumazet. Signed-off-by: Dmitry Kozlov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/gre.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/net/gre.h (limited to 'include/net') diff --git a/include/net/gre.h b/include/net/gre.h new file mode 100644 index 000000000000..82665474bcb7 --- /dev/null +++ b/include/net/gre.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_GRE_H +#define __LINUX_GRE_H + +#include + +#define GREPROTO_CISCO 0 +#define GREPROTO_PPTP 1 +#define GREPROTO_MAX 2 + +struct gre_protocol { + int (*handler)(struct sk_buff *skb); + void (*err_handler)(struct sk_buff *skb, u32 info); +}; + +int gre_add_protocol(const struct gre_protocol *proto, u8 version); +int gre_del_protocol(const struct gre_protocol *proto, u8 version); + +#endif -- cgit v1.2.3 From 81ce790bd75d49a0d119f5d7b27405e1d9b1bd57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Aug 2010 23:51:33 +0000 Subject: irda: use net_device_stats from struct net_device struct net_device has its own struct net_device_stats member, so use this one instead of a private copy in the irlan_cb struct. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/irda/irlan_common.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h index 73cacb3ac16c..0af8b8dfbc22 100644 --- a/include/net/irda/irlan_common.h +++ b/include/net/irda/irlan_common.h @@ -171,7 +171,6 @@ struct irlan_cb { int magic; struct list_head dev_list; struct net_device *dev; /* Ethernet device structure*/ - struct net_device_stats stats; __u32 saddr; /* Source device address */ __u32 daddr; /* Destination device address */ -- cgit v1.2.3 From 633adf1ad1c92c02bd3f10bbd73737a969179378 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Aug 2010 14:49:58 +0200 Subject: cfg80211: mark ieee80211_hdrlen const This function analyses only its single, value-passed argument, and has no side effects. Thus it can be const, which makes mac80211 smaller, for example: text data bss dec hex filename 362518 16720 884 380122 5ccda mac80211.ko (before) 362358 16720 884 379962 5cc3a mac80211.ko (after) a 160 byte saving in text size, and an optimisation because the function won't be called as often. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2fd06c60ffbb..2b403c7ee32e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1715,7 +1715,7 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); * ieee80211_hdrlen - get header length in bytes from frame control * @fc: frame control field in little-endian format */ -unsigned int ieee80211_hdrlen(__le16 fc); +unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc); /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 -- cgit v1.2.3 From 2e161f78e5f63a7f9fd25a766bb7f816a01eb14a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Aug 2010 15:38:38 +0200 Subject: cfg80211/mac80211: extensible frame processing Allow userspace to register for more than just action frames by giving the frame subtype, and make it possible to use this in various modes as well. With some tweaks and some added functionality this will, in the future, also be usable in AP mode and be able to replace the cooked monitor interface currently used in that case. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 56 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2b403c7ee32e..6a98b1b3bfde 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1020,7 +1020,7 @@ struct cfg80211_pmksa { * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. * This allows the operation to be terminated prior to timeout based on * the duration value. - * @action: Transmit an action frame + * @mgmt_tx: Transmit a management frame * * @testmode_cmd: run a test mode command * @@ -1172,7 +1172,7 @@ struct cfg80211_ops { struct net_device *dev, u64 cookie); - int (*action)(struct wiphy *wiphy, struct net_device *dev, + int (*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, bool channel_type_valid, @@ -1236,6 +1236,10 @@ struct mac_address { u8 addr[ETH_ALEN]; }; +struct ieee80211_txrx_stypes { + u16 tx, rx; +}; + /** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback @@ -1286,6 +1290,10 @@ struct mac_address { * @privid: a pointer that drivers can use to identify if an arbitrary * wiphy is theirs, e.g. in global notifiers * @bands: information about bands/channels supported by this device + * + * @mgmt_stypes: bitmasks of frame subtypes that can be subscribed to or + * transmitted through nl80211, points to an array indexed by interface + * type */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -1294,9 +1302,12 @@ struct wiphy { u8 perm_addr[ETH_ALEN]; u8 addr_mask[ETH_ALEN]; - u16 n_addresses; struct mac_address *addresses; + const struct ieee80211_txrx_stypes *mgmt_stypes; + + u16 n_addresses; + /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; @@ -1492,8 +1503,8 @@ struct cfg80211_cached_keys; * set by driver (if supported) on add_interface BEFORE registering the * netdev and may otherwise be used by driver read-only, will be update * by cfg80211 on change_interface - * @action_registrations: list of registrations for action frames - * @action_registrations_lock: lock for the list + * @mgmt_registrations: list of registrations for management frames + * @mgmt_registrations_lock: lock for the list * @mtx: mutex used to lock data in this struct * @cleanup_work: work struct used for cleanup that can't be done directly */ @@ -1505,8 +1516,8 @@ struct wireless_dev { struct list_head list; struct net_device *netdev; - struct list_head action_registrations; - spinlock_t action_registrations_lock; + struct list_head mgmt_registrations; + spinlock_t mgmt_registrations_lock; struct mutex mtx; @@ -2373,38 +2384,39 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); /** - * cfg80211_rx_action - notification of received, unprocessed Action frame + * cfg80211_rx_mgmt - notification of received, unprocessed management frame * @dev: network device * @freq: Frequency on which the frame was received in MHz - * @buf: Action frame (header + body) + * @buf: Management frame (header + body) * @len: length of the frame data * @gfp: context flags - * Returns %true if a user space application is responsible for rejecting the - * unrecognized Action frame; %false if no such application is registered - * (i.e., the driver is responsible for rejecting the unrecognized Action - * frame) + * + * Returns %true if a user space application has registered for this frame. + * For action frames, that makes it responsible for rejecting unrecognized + * action frames; %false otherwise, in which case for action frames the + * driver is responsible for rejecting the frame. * * This function is called whenever an Action frame is received for a station * mode interface, but is not processed in kernel. */ -bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, - size_t len, gfp_t gfp); +bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, + size_t len, gfp_t gfp); /** - * cfg80211_action_tx_status - notification of TX status for Action frame + * cfg80211_mgmt_tx_status - notification of TX status for management frame * @dev: network device - * @cookie: Cookie returned by cfg80211_ops::action() - * @buf: Action frame (header + body) + * @cookie: Cookie returned by cfg80211_ops::mgmt_tx() + * @buf: Management frame (header + body) * @len: length of the frame data * @ack: Whether frame was acknowledged * @gfp: context flags * - * This function is called whenever an Action frame was requested to be - * transmitted with cfg80211_ops::action() to report the TX status of the + * This function is called whenever a management frame was requested to be + * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the * transmission attempt. */ -void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp); +void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp); /** -- cgit v1.2.3 From 633dd1ea683d907af944bcd9814092efe9869b05 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Aug 2010 15:01:23 +0200 Subject: mac80211: fix docbook Fix a small problem in the documentation for ieee80211_request_smps, and a now erroneous inclusion of enum ieee80211_key_alg, which no longer exists after the change to ciphers. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 914c747ac3a0..2a1811366076 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2537,7 +2537,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); /** * ieee80211_request_smps - request SM PS transition * @vif: &struct ieee80211_vif pointer from the add_interface callback. - * @mode: new SM PS mode + * @smps_mode: new SM PS mode * * This allows the driver to request an SM PS transition in managed * mode. This is useful when the driver has more information than -- cgit v1.2.3 From d70e96932de55fb2c05b1c0af1dff178651a9b77 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Aug 2010 16:11:27 +0200 Subject: cfg80211: add some documentation Add some documentation for cfg80211. I'm hoping some of the regulatory documentation will be filled by somebody more familiar with it, hint hint! :) Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 121 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 115 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6a98b1b3bfde..f2740537b5d6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -25,6 +25,43 @@ #include +/** + * DOC: Introduction + * + * cfg80211 is the configuration API for 802.11 devices in Linux. It bridges + * userspace and drivers, and offers some utility functionality associated + * with 802.11. cfg80211 must, directly or indirectly via mac80211, be used + * by all modern wireless drivers in Linux, so that they offer a consistent + * API through nl80211. For backward compatibility, cfg80211 also offers + * wireless extensions to userspace, but hides them from drivers completely. + * + * Additionally, cfg80211 contains code to help enforce regulatory spectrum + * use restrictions. + */ + + +/** + * DOC: Device registration + * + * In order for a driver to use cfg80211, it must register the hardware device + * with cfg80211. This happens through a number of hardware capability structs + * described below. + * + * The fundamental structure for each device is the 'wiphy', of which each + * instance describes a physical wireless device connected to the system. Each + * such wiphy can have zero, one, or many virtual interfaces associated with + * it, which need to be identified as such by pointing the network interface's + * @ieee80211_ptr pointer to a &struct wireless_dev which further describes + * the wireless part of the interface, normally this struct is embedded in the + * network interface's private data area. Drivers can optionally allow creating + * or destroying virtual interfaces on the fly, but without at least one or the + * ability to create some the wireless device isn't useful. + * + * Each wiphy structure contains device capability information, and also has + * a pointer to the various operations the driver offers. The definitions and + * structures here describe these capabilities in detail. + */ + /* * wireless hardware capability structures */ @@ -204,6 +241,21 @@ struct ieee80211_supported_band { * Wireless hardware/device configuration structures and methods */ +/** + * DOC: Actions and configuration + * + * Each wireless device and each virtual interface offer a set of configuration + * operations and other actions that are invoked by userspace. Each of these + * actions is described in the operations structure, and the parameters these + * operations use are described separately. + * + * Additionally, some operations are asynchronous and expect to get status + * information via some functions that drivers need to call. + * + * Scanning and BSS list handling with its associated functionality is described + * in a separate chapter. + */ + /** * struct vif_params - describes virtual interface parameters * @mesh_id: mesh ID to use @@ -570,8 +622,28 @@ struct ieee80211_txq_params { /* from net/wireless.h */ struct wiphy; -/* from net/ieee80211.h */ -struct ieee80211_channel; +/** + * DOC: Scanning and BSS list handling + * + * The scanning process itself is fairly simple, but cfg80211 offers quite + * a bit of helper functionality. To start a scan, the scan operation will + * be invoked with a scan definition. This scan definition contains the + * channels to scan, and the SSIDs to send probe requests for (including the + * wildcard, if desired). A passive scan is indicated by having no SSIDs to + * probe. Additionally, a scan request may contain extra information elements + * that should be added to the probe request. The IEs are guaranteed to be + * well-formed, and will not exceed the maximum length the driver advertised + * in the wiphy structure. + * + * When scanning finds a BSS, cfg80211 needs to be notified of that, because + * it is responsible for maintaining the BSS list; the driver should not + * maintain a list itself. For this notification, various functions exist. + * + * Since drivers do not maintain a BSS list, there are also a number of + * functions to search for a BSS and obtain information about it from the + * BSS structure cfg80211 maintains. The BSS list is also made available + * to userspace. + */ /** * struct cfg80211_ssid - SSID description @@ -1574,8 +1646,10 @@ static inline void *wdev_priv(struct wireless_dev *wdev) return wiphy_priv(wdev->wiphy); } -/* - * Utility functions +/** + * DOC: Utility functions + * + * cfg80211 offers a number of utility functions that can be useful. */ /** @@ -1728,6 +1802,14 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); */ unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc); +/** + * DOC: Data path helpers + * + * In addition to generic utilities, cfg80211 also offers + * functions that help implement the data path for devices + * that do not do the 802.11/802.3 conversion on the device. + */ + /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 * @skb: the 802.11 data frame @@ -1788,8 +1870,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb); */ const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); -/* - * Regulatory helper functions for wiphys +/** + * DOC: Regulatory enforcement infrastructure + * + * TODO */ /** @@ -2191,6 +2275,20 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, */ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp); +/** + * DOC: RFkill integration + * + * RFkill integration in cfg80211 is almost invisible to drivers, + * as cfg80211 automatically registers an rfkill instance for each + * wireless device it knows about. Soft kill is also translated + * into disconnecting and turning all interfaces off, drivers are + * expected to turn off the device when all interfaces are down. + * + * However, devices may have a hard RFkill line, in which case they + * also need to interact with the rfkill subsystem, via cfg80211. + * They can do this with a few helper functions documented here. + */ + /** * wiphy_rfkill_set_hw_state - notify cfg80211 about hw block state * @wiphy: the wiphy @@ -2211,6 +2309,17 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy); void wiphy_rfkill_stop_polling(struct wiphy *wiphy); #ifdef CONFIG_NL80211_TESTMODE +/** + * DOC: Test mode + * + * Test mode is a set of utility functions to allow drivers to + * interact with driver-specific tools to aid, for instance, + * factory programming. + * + * This chapter describes how drivers interact with it, for more + * information see the nl80211 book's chapter on it. + */ + /** * cfg80211_testmode_alloc_reply_skb - allocate testmode reply * @wiphy: the wiphy -- cgit v1.2.3 From ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 25 Aug 2010 02:27:49 -0700 Subject: tcp: Combat per-cpu skew in orphan tests. As reported by Anton Blanchard when we use percpu_counter_read_positive() to make our orphan socket limit checks, the check can be off by up to num_cpus_online() * batch (which is 32 by default) which on a 128 cpu machine can be as large as the default orphan limit itself. Fix this by doing the full expensive sum check if the optimized check triggers. Reported-by: Anton Blanchard Signed-off-by: David S. Miller Acked-by: Eric Dumazet --- include/net/tcp.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index df6a2eb20193..eaa9582779d0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -268,11 +268,21 @@ static inline int between(__u32 seq1, __u32 seq2, __u32 seq3) return seq3 - seq2 >= seq1 - seq2; } -static inline int tcp_too_many_orphans(struct sock *sk, int num) +static inline bool tcp_too_many_orphans(struct sock *sk, int shift) { - return (num > sysctl_tcp_max_orphans) || - (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]); + struct percpu_counter *ocp = sk->sk_prot->orphan_count; + int orphans = percpu_counter_read_positive(ocp); + + if (orphans << shift > sysctl_tcp_max_orphans) { + orphans = percpu_counter_sum_positive(ocp); + if (orphans << shift > sysctl_tcp_max_orphans) + return true; + } + + if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && + atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) + return true; + return false; } /* syncookies: remember time of last synqueue overflow */ -- cgit v1.2.3 From 2738bd682df546f34654ed3d59dfc9ebe8d04979 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sat, 21 Aug 2010 16:39:01 -0400 Subject: mac80211: trivial spelling fixes Fix spelling and readability of a few lines of kernel doc: s/issueing/issuing/g s/approriate/appropriate/g s/supported by simply/supported simply by/ s/IEEE80211_HW_BEACON_FILTERING/IEEE80211_HW_BEACON_FILTER/g Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- include/net/mac80211.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2a1811366076..dcc8c2bf986e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1242,8 +1242,8 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * %IEEE80211_CONF_PS flag enabled means that the powersave mode defined in * IEEE 802.11-2007 section 11.2 is enabled. This is not to be confused * with hardware wakeup and sleep states. Driver is responsible for waking - * up the hardware before issueing commands to the hardware and putting it - * back to sleep at approriate times. + * up the hardware before issuing commands to the hardware and putting it + * back to sleep at appropriate times. * * When PS is enabled, hardware needs to wakeup for beacons and receive the * buffered multicast/broadcast frames after the beacon. Also it must be @@ -1264,7 +1264,7 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * there's data traffic and still saving significantly power in idle * periods. * - * Dynamic powersave is supported by simply mac80211 enabling and disabling + * Dynamic powersave is simply supported by mac80211 enabling and disabling * PS based on traffic. Driver needs to only set %IEEE80211_HW_SUPPORTS_PS * flag and mac80211 will handle everything automatically. Additionally, * hardware having support for the dynamic PS feature may set the @@ -2458,7 +2458,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING and + * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER and * %IEEE80211_CONF_PS is set, the driver needs to inform whenever the * hardware is not receiving beacons with this function. */ @@ -2469,7 +2469,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif); * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING, and + * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER, and * %IEEE80211_CONF_PS and %IEEE80211_HW_CONNECTION_MONITOR are set, the driver * needs to inform if the connection to the AP has been lost. * -- cgit v1.2.3 From 145ce502e44b57c074c72cfdc855557e19026999 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 24 Aug 2010 13:21:08 +0000 Subject: net/sctp: Use pr_fmt and pr_ Change SCTP_DEBUG_PRINTK and SCTP_DEBUG_PRINTK_IPADDR to use do { print } while (0) guards. Add SCTP_DEBUG_PRINTK_CONT to fix errors in log when lines were continued. Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt Add a missing newline in "Failed bind hash alloc" Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 48 ++++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 65946bc43d00..2cb3980b1616 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -275,24 +275,35 @@ struct sctp_mib { /* Print debugging messages. */ #if SCTP_DEBUG extern int sctp_debug_flag; -#define SCTP_DEBUG_PRINTK(whatever...) \ - ((void) (sctp_debug_flag && printk(KERN_DEBUG whatever))) -#define SCTP_DEBUG_PRINTK_IPADDR(lead, trail, leadparm, saddr, otherparms...) \ - if (sctp_debug_flag) { \ - if (saddr->sa.sa_family == AF_INET6) { \ - printk(KERN_DEBUG \ - lead "%pI6" trail, \ - leadparm, \ - &saddr->v6.sin6_addr, \ - otherparms); \ - } else { \ - printk(KERN_DEBUG \ - lead "%pI4" trail, \ - leadparm, \ - &saddr->v4.sin_addr.s_addr, \ - otherparms); \ - } \ - } +#define SCTP_DEBUG_PRINTK(fmt, args...) \ +do { \ + if (sctp_debug_flag) \ + printk(KERN_DEBUG pr_fmt(fmt), ##args); \ +} while (0) +#define SCTP_DEBUG_PRINTK_CONT(fmt, args...) \ +do { \ + if (sctp_debug_flag) \ + pr_cont(fmt, ##args); \ +} while (0) +#define SCTP_DEBUG_PRINTK_IPADDR(fmt_lead, fmt_trail, \ + args_lead, saddr, args_trail...) \ +do { \ + if (sctp_debug_flag) { \ + if (saddr->sa.sa_family == AF_INET6) { \ + printk(KERN_DEBUG \ + pr_fmt(fmt_lead "%pI6" fmt_trail), \ + args_lead, \ + &saddr->v6.sin6_addr, \ + args_trail); \ + } else { \ + printk(KERN_DEBUG \ + pr_fmt(fmt_lead "%pI4" fmt_trail), \ + args_lead, \ + &saddr->v4.sin_addr.s_addr, \ + args_trail); \ + } \ + } \ +} while (0) #define SCTP_ENABLE_DEBUG { sctp_debug_flag = 1; } #define SCTP_DISABLE_DEBUG { sctp_debug_flag = 0; } @@ -306,6 +317,7 @@ extern int sctp_debug_flag; #else /* SCTP_DEBUG */ #define SCTP_DEBUG_PRINTK(whatever...) +#define SCTP_DEBUG_PRINTK_CONT(fmt, args...) #define SCTP_DEBUG_PRINTK_IPADDR(whatever...) #define SCTP_ENABLE_DEBUG #define SCTP_DISABLE_DEBUG -- cgit v1.2.3 From 8789d459bc5e837bf37d261453df96ef54018d7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Aug 2010 13:30:26 +0200 Subject: mac80211: allow scan to complete from any context The ieee80211_scan_completed() function was a frequent source of potential deadlocks, since it is called by drivers but may call back into drivers, so drivers had to make sure to call it without any locks held, which frequently lead to more complex code in drivers. Avoid that problem by allowing the function to be called in any context, and queueing the actual work it does. Also update the documentation for it to indicate this. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dcc8c2bf986e..8f97548b6d80 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2268,7 +2268,8 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw); * * When hardware scan offload is used (i.e. the hw_scan() callback is * assigned) this function needs to be called by the driver to notify - * mac80211 that the scan finished. + * mac80211 that the scan finished. This function can be called from + * any context, including hardirq context. * * @hw: the hardware that finished the scan * @aborted: set to true if scan was aborted -- cgit v1.2.3 From c0692b8fe29fb4d4dad33487aabf3ed7e1e880c0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Aug 2010 14:26:53 +0300 Subject: cfg80211: allow changing port control protocol Some vendor specified mechanisms for 802.1X-style functionality use a different protocol than EAP (even if EAP is vendor-extensible). Allow setting the ethertype for the protocol when a driver has support for this. The default if unspecified is EAP, of course. Note: This is suitable only for station mode, not for AP implementation. Signed-off-by: Johannes Berg Signed-off-by: Juuso Oikarinen Signed-off-by: John W. Linville --- include/net/cfg80211.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f2740537b5d6..4c8c727d0cca 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -763,6 +763,10 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie); * sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is * required to assume that the port is unauthorized until authorized by * user space. Otherwise, port is marked authorized by default. + * @control_port_ethertype: the control port protocol that should be + * allowed through even on unauthorized ports + * @control_port_no_encrypt: TRUE to prevent encryption of control port + * protocol frames. */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -772,6 +776,8 @@ struct cfg80211_crypto_settings { int n_akm_suites; u32 akm_suites[NL80211_MAX_NR_AKM_SUITES]; bool control_port; + __be16 control_port_ethertype; + bool control_port_no_encrypt; }; /** @@ -1293,15 +1299,19 @@ struct cfg80211_ops { * @WIPHY_FLAG_4ADDR_AP: supports 4addr mode even on AP (with a single station * on a VLAN interface) * @WIPHY_FLAG_4ADDR_STATION: supports 4addr mode even as a station + * @WIPHY_FLAG_CONTROL_PORT_PROTOCOL: This device supports setting the + * control port protocol ethertype. The device also honours the + * control_port_no_encrypt flag. */ enum wiphy_flags { - WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), - WIPHY_FLAG_STRICT_REGULATORY = BIT(1), - WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), - WIPHY_FLAG_NETNS_OK = BIT(3), - WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), - WIPHY_FLAG_4ADDR_AP = BIT(5), - WIPHY_FLAG_4ADDR_STATION = BIT(6), + WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), + WIPHY_FLAG_STRICT_REGULATORY = BIT(1), + WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), + WIPHY_FLAG_NETNS_OK = BIT(3), + WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), + WIPHY_FLAG_4ADDR_AP = BIT(5), + WIPHY_FLAG_4ADDR_STATION = BIT(6), + WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), }; struct mac_address { -- cgit v1.2.3 From 34d4bc4d41d282a66dafe1b01a7d46bad468cefb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Aug 2010 12:35:58 +0200 Subject: mac80211: support runtime interface type changes Add support to mac80211 for changing the interface type even when the interface is UP, if the driver supports it. To achieve this * add a new driver callback for switching, * split some of the interface up/down code out into new functions (do_open/do_stop), and * maintain an own __SDATA_RUNNING bit that will not be set during interface type, so that any other code doesn't use the interface. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8f97548b6d80..f91fc331369b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1537,6 +1537,12 @@ enum ieee80211_ampdu_mlme_action { * negative error code (which will be seen in userspace.) * Must be implemented and can sleep. * + * @change_interface: Called when a netdevice changes type. This callback + * is optional, but only if it is supported can interface types be + * switched while the interface is UP. The callback may sleep. + * Note that while an interface is being switched, it will not be + * found by the interface iteration callbacks. + * * @remove_interface: Notifies a driver that an interface is going down. * The @stop callback is called after this if it is the last interface * and no monitor interfaces are present. @@ -1693,6 +1699,9 @@ struct ieee80211_ops { void (*stop)(struct ieee80211_hw *hw); int (*add_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); + int (*change_interface)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + enum nl80211_iftype new_type); void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*config)(struct ieee80211_hw *hw, u32 changed); -- cgit v1.2.3 From dca43c75e7e545694a9dd6288553f55c53e2a3a3 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Fri, 27 Aug 2010 19:13:28 +0000 Subject: tcp: Add TCP_USER_TIMEOUT socket option. This patch provides a "user timeout" support as described in RFC793. The socket option is also needed for the the local half of RFC5482 "TCP User Timeout Option". TCP_USER_TIMEOUT is a TCP level socket option that takes an unsigned int, when > 0, to specify the maximum amount of time in ms that transmitted data may remain unacknowledged before TCP will forcefully close the corresponding connection and return ETIMEDOUT to the application. If 0 is given, TCP will continue to use the system default. Increasing the user timeouts allows a TCP connection to survive extended periods without end-to-end connectivity. Decreasing the user timeouts allows applications to "fail fast" if so desired. Otherwise it may take upto 20 minutes with the current system defaults in a normal WAN environment. The socket option can be made during any state of a TCP connection, but is only effective during the synchronized states of a connection (ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, or LAST-ACK). Moreover, when used with the TCP keepalive (SO_KEEPALIVE) option, TCP_USER_TIMEOUT will overtake keepalive to determine when to close a connection due to keepalive failure. The option does not change in anyway when TCP retransmits a packet, nor when a keepalive probe will be sent. This option, like many others, will be inherited by an acceptor from its listener. Signed-off-by: H.K. Jerry Chu Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b6d3b55da19b..e4f494b42e06 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -125,6 +125,7 @@ struct inet_connection_sock { int probe_size; } icsk_mtup; u32 icsk_ca_priv[16]; + u32 icsk_user_timeout; #define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) }; -- cgit v1.2.3 From 22b71c8f4f3db8df92f5e7b081c265bc56c0bd2f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 29 Aug 2010 19:23:12 +0000 Subject: tcp/dccp: Consolidate common code for RFC 3390 conversion This patch consolidates initial-window code common to TCP and CCID-2: * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341). Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/tcp.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index df6a2eb20193..a64022199b62 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -779,6 +779,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * + * John Heffner states: + * + * The RFC specifies a window of no more than 4380 bytes + * unless 2*MSS > 4380. Reading the pseudocode in the RFC + * is a bit misleading because they use a clamp at 4380 bytes + * rather than a multiplier in the relevant range. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3); +} + extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); -- cgit v1.2.3 From 3d5b99ae82f8742e3bb1f8634fd11ac36ea19ee1 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 29 Aug 2010 19:27:34 +0000 Subject: TCP: update initial windows according to RFC 5681 This updates the use of larger initial windows, as originally specified in RFC 3390, to use the newer IW values specified in RFC 5681, section 3.1. The changes made in RFC 5681 are: a) the setting now is more clearly specified in units of segments (as the comments by John Heffner emphasized, this was not very clear in RFC 3390); b) for connections with 1095 < SMSS <= 2190 there is now a change: - RFC 3390 says that IW <= 4380, - RFC 5681 says that IW = 3 * SMSS <= 6570. Since RFC 3390 is older and "only" proposed standard, whereas the newer RFC 5681 is already draft standard, it seems preferable to use the newer IW variant. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/tcp.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index a64022199b62..11dbacc886c0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -781,17 +781,11 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* * Convert RFC 3390 larger initial window into an equivalent number of packets. - * - * John Heffner states: - * - * The RFC specifies a window of no more than 4380 bytes - * unless 2*MSS > 4380. Reading the pseudocode in the RFC - * is a bit misleading because they use a clamp at 4380 bytes - * rather than a multiplier in the relevant range. + * This is based on the numbers specified in RFC 5681, 3.1. */ static inline u32 rfc3390_bytes_to_packets(const u32 smss) { - return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3); + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); } extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); -- cgit v1.2.3 From 3fb5a991916091a908d53608a5899240039fb51e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 2 Sep 2010 15:42:43 +0000 Subject: cls_cgroup: Fix rcu lockdep warning Dave reported an rcu lockdep warning on 2.6.35.4 kernel task->cgroups and task->cgroups->subsys[i] are protected by RCU. So we avoid accessing invalid pointers here. This might happen, for example, when you are deref-ing those pointers while someone move @task from one cgroup to another. Reported-by: Dave Jones Signed-off-by: Li Zefan Signed-off-by: David S. Miller --- include/net/cls_cgroup.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 726cc3536409..ef6c24a529e1 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -27,11 +27,17 @@ struct cgroup_cls_state #ifdef CONFIG_NET_CLS_CGROUP static inline u32 task_cls_classid(struct task_struct *p) { + int classid; + if (in_interrupt()) return 0; - return container_of(task_subsys_state(p, net_cls_subsys_id), - struct cgroup_cls_state, css)->classid; + rcu_read_lock(); + classid = container_of(task_subsys_state(p, net_cls_subsys_id), + struct cgroup_cls_state, css)->classid; + rcu_read_unlock(); + + return classid; } #else extern int net_cls_subsys_id; -- cgit v1.2.3 From 6523ce1525e88c598c75a1a6b8c4edddfa9defe8 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 5 Sep 2010 18:02:29 +0000 Subject: ipvs: fix active FTP - Do not create expectation when forwarding the PORT command to avoid blocking the connection. The problem is that nf_conntrack_ftp.c:help() tries to create the same expectation later in POST_ROUTING and drops the packet with "dropping packet" message after failure in nf_ct_expect_related. - Change ip_vs_update_conntrack to alter the conntrack for related connections from real server. If we do not alter the reply in this direction the next packet from client sent to vport 20 comes as NEW connection. We alter it but may be some collision happens for both conntracks and the second conntrack gets destroyed immediately. The connection stucks too. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip_vs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a4747a0f7303..f976885f686f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -955,6 +955,9 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, + int outin); + #endif /* __KERNEL__ */ #endif /* _NET_IP_VS_H */ -- cgit v1.2.3 From e3634169bcc0cce33c815865d62ab378739f7389 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 7 Sep 2010 05:55:38 +0000 Subject: include/net/raw.h: Convert raw_seq_private macro to inline Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/raw.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/raw.h b/include/net/raw.h index 43c57502659b..42ce6fe7a2d5 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -45,7 +45,10 @@ struct raw_iter_state { struct raw_hashinfo *h; }; -#define raw_seq_private(seq) ((struct raw_iter_state *)(seq)->private) +static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq) +{ + return seq->private; +} void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); -- cgit v1.2.3 From 719f835853a92f6090258114a72ffe41f09155cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Sep 2010 05:08:44 +0000 Subject: udp: add rehash on connect() commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation) added a secondary hash on UDP, hashed on (local addr, local port). Problem is that following sequence : fd = socket(...) connect(fd, &remote, ...) not only selects remote end point (address and port), but also sets local address, while UDP stack stored in secondary hash table the socket while its local address was INADDR_ANY (or ipv6 equivalent) Sequence is : - autobind() : choose a random local port, insert socket in hash tables [while local address is INADDR_ANY] - connect() : set remote address and port, change local address to IP given by a route lookup. When an incoming UDP frame comes, if more than 10 sockets are found in primary hash table, we switch to secondary table, and fail to find socket because its local address changed. One solution to this problem is to rehash datagram socket if needed. We add a new rehash(struct socket *) method in "struct proto", and implement this method for UDP v4 & v6, using a common helper. This rehashing only takes care of secondary hash table, since primary hash (based on local port only) is not changed. Reported-by: Krzysztof Piotr Oledzki Signed-off-by: Eric Dumazet Tested-by: Krzysztof Piotr Oledzki Signed-off-by: David S. Miller --- include/net/sock.h | 1 + include/net/udp.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index ac53bfbdfe16..adab9dc58183 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -752,6 +752,7 @@ struct proto { /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); + void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); /* Keeping track of sockets in use */ diff --git a/include/net/udp.h b/include/net/udp.h index 7abdf305da50..a184d3496b13 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -151,6 +151,7 @@ static inline void udp_lib_hash(struct sock *sk) } extern void udp_lib_unhash(struct sock *sk); +extern void udp_lib_rehash(struct sock *sk, u16 new_hash); static inline void udp_lib_close(struct sock *sk, long timeout) { -- cgit v1.2.3 From 2944f45d9db851e186774df7c9cbf075f4a585c6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 14 Sep 2010 18:37:20 +0200 Subject: mac80211: add a note about iterating interfaces during add_interface() Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f91fc331369b..19a5cb4a6582 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2293,6 +2293,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted); * This function allows the iterator function to sleep, when the iterator * function is atomic @ieee80211_iterate_active_interfaces_atomic can * be used. + * Does not iterate over a new interface during add_interface() * * @hw: the hardware struct of which the interfaces should be iterated over * @iterator: the iterator function to call @@ -2310,6 +2311,7 @@ void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw, * hardware that are currently active and calls the callback for them. * This function requires the iterator callback function to be atomic, * if that is not desired, use @ieee80211_iterate_active_interfaces instead. + * Does not iterate over a new interface during add_interface() * * @hw: the hardware struct of which the interfaces should be iterated over * @iterator: the iterator function to call, cannot sleep -- cgit v1.2.3 From 55b1804c678e679e1018671bd40b583eab124689 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 13 Sep 2010 18:24:01 +0000 Subject: net/irda: Use static const char * const where possible Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/irda/irlan_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/irda/irlan_event.h b/include/net/irda/irlan_event.h index 6d9539f05806..018b5a77e610 100644 --- a/include/net/irda/irlan_event.h +++ b/include/net/irda/irlan_event.h @@ -67,7 +67,7 @@ typedef enum { IRLAN_WATCHDOG_TIMEOUT, } IRLAN_EVENT; -extern char *irlan_state[]; +extern const char * const irlan_state[]; void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event, struct sk_buff *skb); -- cgit v1.2.3 From 01f83d69844d307be2aa6fea88b0e8fe5cbdb2f4 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Wed, 15 Sep 2010 10:27:52 -0700 Subject: tcp: Prevent overzealous packetization by SWS logic. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If peer uses tiny MSS (say, 75 bytes) and similarly tiny advertised window, the SWS logic will packetize to half the MSS unnecessarily. This causes problems with some embedded devices. However for large MSS devices we do want to half-MSS packetize otherwise we never get enough packets into the pipe for things like fast retransmit and recovery to work. Be careful also to handle the case where MSS > window, otherwise we'll never send until the probe timer. Reported-by: ツ Leandro Melo de Sales Signed-off-by: David S. Miller --- include/net/tcp.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index eaa9582779d0..3e4b33e36602 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -475,8 +475,22 @@ extern unsigned int tcp_current_mss(struct sock *sk); /* Bound MSS / TSO packet size with the half of the window */ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) { - if (tp->max_window && pktsize > (tp->max_window >> 1)) - return max(tp->max_window >> 1, 68U - tp->tcp_header_len); + int cutoff; + + /* When peer uses tiny windows, there is no use in packetizing + * to sub-MSS pieces for the sake of SWS or making sure there + * are enough packets in the pipe for fast recovery. + * + * On the other hand, for extremely large MSS devices, handling + * smaller than MSS windows in this way does make sense. + */ + if (tp->max_window >= 512) + cutoff = (tp->max_window >> 1); + else + cutoff = tp->max_window; + + if (cutoff && pktsize > cutoff) + return max_t(int, cutoff, 68U - tp->tcp_header_len); else return pktsize; } -- cgit v1.2.3 From 6482f554e2b9cbe733d63124765104f29cf0c9ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 15 Sep 2010 12:19:53 +0000 Subject: Phonet: remove dangling pipe if an endpoint is closed early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closing a pipe endpoint is not normally allowed by the Phonet pipe, other than as a side after-effect of removing the pipe between two endpoints. But there is no way to prevent Linux userspace processes from being killed or suffering from bugs, so this can still happen. We might as well forcefully close Phonet pipe endpoints then. The cellular modem supports only a few existing pipes at a time. So we really should not leak them. This change instructs the modem to destroy the pipe if either of the pipe's endpoint (Linux socket) is closed too early. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 35672b1cf44a..37f23dc05de8 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -77,6 +77,11 @@ static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb) #define MAX_PNPIPE_HEADER (MAX_PHONET_HEADER + 4) enum { + PNS_PIPE_CREATE_REQ = 0x00, + PNS_PIPE_CREATE_RESP, + PNS_PIPE_REMOVE_REQ, + PNS_PIPE_REMOVE_RESP, + PNS_PIPE_DATA = 0x20, PNS_PIPE_ALIGNED_DATA, -- cgit v1.2.3 From 4e3d16ce5e82648d7f4dfd28b6cf8fe2e9a9efc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 15 Sep 2010 12:30:11 +0000 Subject: Phonet: resource routing backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When both destination device and object are nul, Phonet routes the packet according to the resource field. In fact, this is the most common pattern when sending Phonet "request" packets. In this case, the packet is delivered to whichever endpoint (socket) has registered the resource. This adds a new table so that Linux processes can register their Phonet sockets to Phonet resources, if they have adequate privileges. (Namespace support is not implemented at the moment.) Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 7b114079a51b..d5df797f9540 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -54,6 +54,11 @@ void pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); int pn_sock_get_port(struct sock *sk, unsigned short sport); +struct sock *pn_find_sock_by_res(struct net *net, u8 res); +int pn_sock_bind_res(struct sock *sock, u8 res); +int pn_sock_unbind_res(struct sock *sk, u8 res); +void pn_sock_unbind_all_res(struct sock *sk); + int pn_skb_send(struct sock *sk, struct sk_buff *skb, const struct sockaddr_pn *target); -- cgit v1.2.3 From 507215f8d04f9e61f38c975e61d93bcafd30815f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 15 Sep 2010 12:30:14 +0000 Subject: Phonet: list subscribed resources via proc_fs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pn_dev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 2d16783d5e20..13649eb57413 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -57,5 +57,6 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr); #define PN_NO_ADDR 0xff extern const struct file_operations pn_sock_seq_fops; +extern const struct file_operations pn_res_seq_fops; #endif -- cgit v1.2.3 From 9c376639297d3dd82d40e54c9cdca8da9dfc22f1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 20 Aug 2010 15:13:59 -0700 Subject: include/net/cfg80211.h: wiphy_ messages use dev_printk The output becomes: [ 41.261941] ieee80211 phy0: Selected rate control algorithm 'minstrel_ht' Signed-off-by: Joe Perches Signed-off-by: John W. Linville --- include/net/cfg80211.h | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4c8c727d0cca..a0613ff62c97 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2558,49 +2558,36 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, /* wiphy_printk helpers, similar to dev_printk */ #define wiphy_printk(level, wiphy, format, args...) \ - printk(level "%s: " format, wiphy_name(wiphy), ##args) + dev_printk(level, &(wiphy)->dev, format, ##args) #define wiphy_emerg(wiphy, format, args...) \ - wiphy_printk(KERN_EMERG, wiphy, format, ##args) + dev_emerg(&(wiphy)->dev, format, ##args) #define wiphy_alert(wiphy, format, args...) \ - wiphy_printk(KERN_ALERT, wiphy, format, ##args) + dev_alert(&(wiphy)->dev, format, ##args) #define wiphy_crit(wiphy, format, args...) \ - wiphy_printk(KERN_CRIT, wiphy, format, ##args) + dev_crit(&(wiphy)->dev, format, ##args) #define wiphy_err(wiphy, format, args...) \ - wiphy_printk(KERN_ERR, wiphy, format, ##args) + dev_err(&(wiphy)->dev, format, ##args) #define wiphy_warn(wiphy, format, args...) \ - wiphy_printk(KERN_WARNING, wiphy, format, ##args) + dev_warn(&(wiphy)->dev, format, ##args) #define wiphy_notice(wiphy, format, args...) \ - wiphy_printk(KERN_NOTICE, wiphy, format, ##args) + dev_notice(&(wiphy)->dev, format, ##args) #define wiphy_info(wiphy, format, args...) \ - wiphy_printk(KERN_INFO, wiphy, format, ##args) + dev_info(&(wiphy)->dev, format, ##args) -int wiphy_debug(const struct wiphy *wiphy, const char *format, ...) - __attribute__ ((format (printf, 2, 3))); - -#if defined(DEBUG) -#define wiphy_dbg(wiphy, format, args...) \ +#define wiphy_debug(wiphy, format, args...) \ wiphy_printk(KERN_DEBUG, wiphy, format, ##args) -#elif defined(CONFIG_DYNAMIC_DEBUG) + #define wiphy_dbg(wiphy, format, args...) \ - dynamic_pr_debug("%s: " format, wiphy_name(wiphy), ##args) -#else -#define wiphy_dbg(wiphy, format, args...) \ -({ \ - if (0) \ - wiphy_printk(KERN_DEBUG, wiphy, format, ##args); \ - 0; \ -}) -#endif + dev_dbg(&(wiphy)->dev, format, ##args) #if defined(VERBOSE_DEBUG) #define wiphy_vdbg wiphy_dbg #else - #define wiphy_vdbg(wiphy, format, args...) \ ({ \ if (0) \ wiphy_printk(KERN_DEBUG, wiphy, format, ##args); \ - 0; \ + 0; \ }) #endif -- cgit v1.2.3 From 2ca27bcff7127da1aa7dd39cd2a6f7cb187e327f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 Sep 2010 14:58:23 +0200 Subject: mac80211: add p2p device type support When a driver advertises p2p device support, mac80211 will handle it, but internally it will rewrite the interface type to STA/AP rather than P2P-STA/GO since otherwise a lot of paths need to be touched that are otherwise identical. A p2p boolean tells drivers whether or not a given interface will be used for p2p or not. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 19a5cb4a6582..12a49f0ba32c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -769,6 +769,8 @@ struct ieee80211_channel_switch { * @bss_conf: BSS configuration for this interface, either our own * or the BSS we're associated to * @addr: address of this interface + * @p2p: indicates whether this AP or STA interface is a p2p + * interface, i.e. a GO or p2p-sta respectively * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *). */ @@ -776,6 +778,7 @@ struct ieee80211_vif { enum nl80211_iftype type; struct ieee80211_bss_conf bss_conf; u8 addr[ETH_ALEN]; + bool p2p; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); }; @@ -1701,7 +1704,7 @@ struct ieee80211_ops { struct ieee80211_vif *vif); int (*change_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - enum nl80211_iftype new_type); + enum nl80211_iftype new_type, bool p2p); void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*config)(struct ieee80211_hw *hw, u32 changed); @@ -2721,4 +2724,26 @@ conf_is_ht(struct ieee80211_conf *conf) return conf->channel_type != NL80211_CHAN_NO_HT; } +static inline enum nl80211_iftype +ieee80211_iftype_p2p(enum nl80211_iftype type, bool p2p) +{ + if (p2p) { + switch (type) { + case NL80211_IFTYPE_STATION: + return NL80211_IFTYPE_P2P_CLIENT; + case NL80211_IFTYPE_AP: + return NL80211_IFTYPE_P2P_GO; + default: + break; + } + } + return type; +} + +static inline enum nl80211_iftype +ieee80211_vif_type_p2p(struct ieee80211_vif *vif) +{ + return ieee80211_iftype_p2p(vif->type, vif->p2p); +} + #endif /* MAC80211_H */ -- cgit v1.2.3 From 3575792e005dc9994f15ae72c1c6f401d134177d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 17 Sep 2010 14:18:16 +0200 Subject: ipvs: extend connection flags to 32 bits - the sync protocol supports 16 bits only, so bits 0..15 should be used only for flags that should go to backup server, bits 16 and above should be allocated for flags not sent to backup. - use IP_VS_CONN_F_DEST_MASK as mask of connection flags in destination that can be changed by user space - allow IP_VS_CONN_F_ONE_PACKET to be set in destination Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f976885f686f..62698a9c1631 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -366,6 +366,7 @@ struct ip_vs_conn { union nf_inet_addr caddr; /* client address */ union nf_inet_addr vaddr; /* virtual address */ union nf_inet_addr daddr; /* destination address */ + volatile __u32 flags; /* status flags */ __be16 cport; __be16 vport; __be16 dport; @@ -378,7 +379,6 @@ struct ip_vs_conn { /* Flags and state transition */ spinlock_t lock; /* lock for state transition */ - volatile __u16 flags; /* status flags */ volatile __u16 state; /* state info */ volatile __u16 old_state; /* old state, to be used for * state transition triggerd -- cgit v1.2.3 From 8444cf712c5f71845cba9dc30d8f530ff0d5ff83 Mon Sep 17 00:00:00 2001 From: Thomas Egerer Date: Mon, 20 Sep 2010 11:11:38 -0700 Subject: xfrm: Allow different selector family in temporary state The family parameter xfrm_state_find is used to find a state matching a certain policy. This value is set to the template's family (encap_family) right before xfrm_state_find is called. The family parameter is however also used to construct a temporary state in xfrm_state_find itself which is wrong for inter-family scenarios because it produces a selector for the wrong family. Since this selector is included in the xfrm_user_acquire structure, user space programs misinterpret IPv6 addresses as IPv4 and vice versa. This patch splits up the original init_tempsel function into a part that initializes the selector respectively the props and id of the temporary state, to allow for differing ip address families whithin the state. Signed-off-by: Thomas Egerer Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fc8f36dd0f5c..4f53532d4c2f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -298,8 +298,8 @@ struct xfrm_state_afinfo { const struct xfrm_type *type_map[IPPROTO_MAX]; struct xfrm_mode *mode_map[XFRM_MODE_MAX]; int (*init_flags)(struct xfrm_state *x); - void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, + void (*init_tempsel)(struct xfrm_selector *sel, struct flowi *fl); + void (*init_temprop)(struct xfrm_state *x, struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); -- cgit v1.2.3 From f4bc17cdd205ebaa3807c2aa973719bb5ce6a5b2 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 21 Sep 2010 17:35:41 +0200 Subject: ipvs: netfilter connection tracking changes Add more code to IPVS to work with Netfilter connection tracking and fix some problems. - Allow IPVS to be compiled without connection tracking as in 2.6.35 and before. This can avoid keeping conntracks for all IPVS connections because this costs memory. ip_vs_ftp still depends on connection tracking and NAT as implemented for 2.6.36. - Add sysctl var "conntrack" to enable connection tracking for all IPVS connections. For loaded IPVS directors it needs tuning of nf_conntrack_max limit. - Add IP_VS_CONN_F_NFCT connection flag to request the connection to use connection tracking. This allows user space to provide this flag, for example, in dest->conn_flags. This can be useful to request connection tracking per real server instead of forcing it for all connections with the "conntrack" sysctl. This flag is set currently only by ip_vs_ftp and of course by "conntrack" sysctl. - Add ip_vs_nfct.c file to hold all connection tracking code, by this way main code should not depend of netfilter conntrack support. - Return back the ip_vs_post_routing handler as in 2.6.35 and use skb->ipvs_property=1 to allow IPVS to work without connection tracking Connection tracking: - most of the code is already in 2.6.36-rc - alter conntrack reply tuple for LVS-NAT connections when first packet from client is forwarded and conntrack state is NEW or RELATED. Additionally, alter reply for RELATED connections from real server, again for packet in original direction. - add IP_VS_XMIT_TUNNEL to confirm conntrack (without altering reply) for LVS-TUN early because we want to call nf_reset. It is needed because we add IPIP header and the original conntrack should be preserved, not destroyed. The transmitted IPIP packets can reuse same conntrack, so we do not set skb->ipvs_property. - try to destroy conntrack when the IPVS connection is destroyed. It is not fatal if conntrack disappears before that, it depends on the used timers. Fix problems from long time: - add skb->ip_summed = CHECKSUM_NONE for the LVS-TUN transmitters Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 62698a9c1631..e8ec5231eae9 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -25,7 +25,9 @@ #include #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ - +#ifdef CONFIG_IP_VS_NFCT +#include +#endif /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; @@ -798,6 +800,7 @@ extern int sysctl_ip_vs_expire_nodest_conn; extern int sysctl_ip_vs_expire_quiescent_template; extern int sysctl_ip_vs_sync_threshold[2]; extern int sysctl_ip_vs_nat_icmp_send; +extern int sysctl_ip_vs_conntrack; extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; @@ -955,8 +958,47 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +#ifdef CONFIG_IP_VS_NFCT +/* + * Netfilter connection tracking + * (from ip_vs_nfct.c) + */ +static inline int ip_vs_conntrack_enabled(void) +{ + return sysctl_ip_vs_conntrack; +} + extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin); +extern int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp); +extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, + struct ip_vs_conn *cp, u_int8_t proto, + const __be16 port, int from_rs); +extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp); + +#else + +static inline int ip_vs_conntrack_enabled(void) +{ + return 0; +} + +static inline void ip_vs_update_conntrack(struct sk_buff *skb, + struct ip_vs_conn *cp, int outin) +{ +} + +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, + struct ip_vs_conn *cp) +{ + return NF_ACCEPT; +} + +static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) +{ +} +/* CONFIG_IP_VS_NFCT */ +#endif #endif /* __KERNEL__ */ -- cgit v1.2.3 From 8a8030407f55a6aaedb51167c1a2383311fcd707 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 21 Sep 2010 17:38:57 +0200 Subject: ipvs: make rerouting optional with snat_reroute Add new sysctl flag "snat_reroute". Recent kernels use ip_route_me_harder() to route LVS-NAT responses properly by VIP when there are multiple paths to client. But setups that do not have alternative default routes can skip this routing lookup by using snat_reroute=0. Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index e8ec5231eae9..3915a4f4cd30 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -801,6 +801,7 @@ extern int sysctl_ip_vs_expire_quiescent_template; extern int sysctl_ip_vs_sync_threshold[2]; extern int sysctl_ip_vs_nat_icmp_send; extern int sysctl_ip_vs_conntrack; +extern int sysctl_ip_vs_snat_reroute; extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; -- cgit v1.2.3 From 48daa3bb84d547828871534caa51427a3fe90748 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Sep 2010 06:57:39 +0000 Subject: ipv6: addrconf.h cleanups - Use rcu_dereference_rtnl() in __in6_dev_get - kerneldoc for __in6_dev_get() and in6_dev_get() - Use inline functions instead of macros Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/addrconf.h | 63 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 45375b41a2a0..7d178a758acf 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -174,20 +174,32 @@ extern int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, extern int register_inet6addr_notifier(struct notifier_block *nb); extern int unregister_inet6addr_notifier(struct notifier_block *nb); -static inline struct inet6_dev * -__in6_dev_get(struct net_device *dev) +/** + * __in6_dev_get - get inet6_dev pointer from netdevice + * @dev: network device + * + * Caller must hold rcu_read_lock or RTNL, because this function + * does not take a reference on the inet6_dev. + */ +static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) { - return rcu_dereference_check(dev->ip6_ptr, - rcu_read_lock_held() || - lockdep_rtnl_is_held()); + return rcu_dereference_rtnl(dev->ip6_ptr); } -static inline struct inet6_dev * -in6_dev_get(struct net_device *dev) +/** + * in6_dev_get - get inet6_dev pointer from netdevice + * @dev: network device + * + * This version can be used in any context, and takes a reference + * on the inet6_dev. Callers must use in6_dev_put() later to + * release this reference. + */ +static inline struct inet6_dev *in6_dev_get(const struct net_device *dev) { - struct inet6_dev *idev = NULL; + struct inet6_dev *idev; + rcu_read_lock(); - idev = __in6_dev_get(dev); + idev = rcu_dereference(dev->ip6_ptr); if (idev) atomic_inc(&idev->refcnt); rcu_read_unlock(); @@ -196,16 +208,21 @@ in6_dev_get(struct net_device *dev) extern void in6_dev_finish_destroy(struct inet6_dev *idev); -static inline void -in6_dev_put(struct inet6_dev *idev) +static inline void in6_dev_put(struct inet6_dev *idev) { if (atomic_dec_and_test(&idev->refcnt)) in6_dev_finish_destroy(idev); } -#define __in6_dev_put(idev) atomic_dec(&(idev)->refcnt) -#define in6_dev_hold(idev) atomic_inc(&(idev)->refcnt) +static inline void __in6_dev_put(struct inet6_dev *idev) +{ + atomic_dec(&idev->refcnt); +} +static inline void in6_dev_hold(struct inet6_dev *idev) +{ + atomic_inc(&idev->refcnt); +} extern void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); @@ -215,9 +232,15 @@ static inline void in6_ifa_put(struct inet6_ifaddr *ifp) inet6_ifa_finish_destroy(ifp); } -#define __in6_ifa_put(ifp) atomic_dec(&(ifp)->refcnt) -#define in6_ifa_hold(ifp) atomic_inc(&(ifp)->refcnt) +static inline void __in6_ifa_put(struct inet6_ifaddr *ifp) +{ + atomic_dec(&ifp->refcnt); +} +static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) +{ + atomic_inc(&ifp->refcnt); +} /* @@ -240,23 +263,23 @@ static inline int ipv6_addr_is_multicast(const struct in6_addr *addr) static inline int ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr) { - return (((addr->s6_addr32[0] ^ htonl(0xff020000)) | + return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | - (addr->s6_addr32[3] ^ htonl(0x00000001))) == 0); + (addr->s6_addr32[3] ^ htonl(0x00000001))) == 0; } static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) { - return (((addr->s6_addr32[0] ^ htonl(0xff020000)) | + return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | - (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0); + (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0; } extern int __ipv6_isatap_ifid(u8 *eui, __be32 addr); static inline int ipv6_addr_is_isatap(const struct in6_addr *addr) { - return ((addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE)); + return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); } #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From 8b008faf92ac8f7eeb65e8cd36077601af7c46db Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 22 Sep 2010 08:36:59 +0200 Subject: netfilter: ctnetlink: allow to specify the expectation flags With this patch, you can specify the expectation flags for user-space created expectations. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 11e815084fcf..96bb42af5fae 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -67,9 +67,6 @@ struct nf_conntrack_expect_policy { #define NF_CT_EXPECT_CLASS_DEFAULT 0 -#define NF_CT_EXPECT_PERMANENT 0x1 -#define NF_CT_EXPECT_INACTIVE 0x2 - int nf_conntrack_expect_init(struct net *net); void nf_conntrack_expect_fini(struct net *net); -- cgit v1.2.3 From a02cec2155fbea457eca8881870fd2de1a4c4c76 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Sep 2010 20:43:57 +0000 Subject: net: return operator cleanup Change "return (EXPR);" to "return EXPR;" return is not a function, parentheses are not required. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/bluetooth/hci_core.h | 2 +- include/net/bluetooth/l2cap.h | 2 +- include/net/inet_ecn.h | 2 +- include/net/ip.h | 4 ++-- include/net/ipv6.h | 35 +++++++++++++++++------------------ include/net/irda/irlap.h | 2 +- include/net/irda/irlmp.h | 2 +- include/net/irda/irttp.h | 2 +- include/net/sch_generic.h | 2 +- include/net/sctp/sctp.h | 12 ++++++------ include/net/sctp/sm.h | 10 +++++----- include/net/sctp/structs.h | 2 +- include/net/sctp/tsnmap.h | 2 +- include/net/tipc/tipc_msg.h | 10 +++++----- 14 files changed, 44 insertions(+), 45 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4568b938ca35..ebec8c9a929d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -233,7 +233,7 @@ static inline void inquiry_cache_init(struct hci_dev *hdev) static inline int inquiry_cache_empty(struct hci_dev *hdev) { struct inquiry_cache *c = &hdev->inq_cache; - return (c->list == NULL); + return c->list == NULL; } static inline long inquiry_cache_age(struct hci_dev *hdev) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 6c241444f902..c819c8bf9b68 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -414,7 +414,7 @@ static inline int l2cap_tx_window_full(struct sock *sk) if (sub < 0) sub += 64; - return (sub == pi->remote_tx_win); + return sub == pi->remote_tx_win; } #define __get_txseq(ctrl) ((ctrl) & L2CAP_CTRL_TXSEQ) >> 1 diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 9b5d08f4f6e8..88bdd010d65d 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -27,7 +27,7 @@ static inline int INET_ECN_is_not_ect(__u8 dsfield) static inline int INET_ECN_is_capable(__u8 dsfield) { - return (dsfield & INET_ECN_ECT_0); + return dsfield & INET_ECN_ECT_0; } static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) diff --git a/include/net/ip.h b/include/net/ip.h index 7691aca133db..dbee3fe260e1 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -238,9 +238,9 @@ int ip_decrease_ttl(struct iphdr *iph) static inline int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) { - return (inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO || + return inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO || (inet_sk(sk)->pmtudisc == IP_PMTUDISC_WANT && - !(dst_metric_locked(dst, RTAX_MTU)))); + !(dst_metric_locked(dst, RTAX_MTU))); } extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 1f8412410998..4a3cd2cd2f5e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -262,7 +262,7 @@ static inline int ipv6_addr_scope(const struct in6_addr *addr) static inline int __ipv6_addr_src_scope(int type) { - return (type == IPV6_ADDR_ANY ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16)); + return (type == IPV6_ADDR_ANY) ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16); } static inline int ipv6_addr_src_scope(const struct in6_addr *addr) @@ -279,10 +279,10 @@ static inline int ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, const struct in6_addr *a2) { - return (!!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | - ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | - ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | - ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3]))); + return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | + ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | + ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | + ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3])); } static inline void ipv6_addr_copy(struct in6_addr *a1, const struct in6_addr *a2) @@ -317,10 +317,10 @@ static inline void ipv6_addr_set(struct in6_addr *addr, static inline int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) { - return (((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | - (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | - (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | - (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0); + return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | + (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | + (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | + (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; } static inline int __ipv6_prefix_equal(const __be32 *a1, const __be32 *a2, @@ -373,20 +373,20 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a); static inline int ipv6_addr_any(const struct in6_addr *a) { - return ((a->s6_addr32[0] | a->s6_addr32[1] | - a->s6_addr32[2] | a->s6_addr32[3] ) == 0); + return (a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | a->s6_addr32[3]) == 0; } static inline int ipv6_addr_loopback(const struct in6_addr *a) { - return ((a->s6_addr32[0] | a->s6_addr32[1] | - a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0); + return (a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0; } static inline int ipv6_addr_v4mapped(const struct in6_addr *a) { - return ((a->s6_addr32[0] | a->s6_addr32[1] | - (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0); + return (a->s6_addr32[0] | a->s6_addr32[1] | + (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0; } /* @@ -395,8 +395,7 @@ static inline int ipv6_addr_v4mapped(const struct in6_addr *a) */ static inline int ipv6_addr_orchid(const struct in6_addr *a) { - return ((a->s6_addr32[0] & htonl(0xfffffff0)) - == htonl(0x20010010)); + return (a->s6_addr32[0] & htonl(0xfffffff0)) == htonl(0x20010010); } static inline void ipv6_addr_set_v4mapped(const __be32 addr, @@ -441,7 +440,7 @@ static inline int __ipv6_addr_diff(const void *token1, const void *token2, int a * if returned value is greater than prefix length. * --ANK (980803) */ - return (addrlen << 5); + return addrlen << 5; } static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2) diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h index 9d0c78ea92f5..17fcd964f9d9 100644 --- a/include/net/irda/irlap.h +++ b/include/net/irda/irlap.h @@ -282,7 +282,7 @@ static inline int irlap_is_primary(struct irlap_cb *self) default: ret = -1; } - return(ret); + return ret; } /* Clear a pending IrLAP disconnect. - Jean II */ diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h index 3ffc1d0f93d6..fff11b7fe8a4 100644 --- a/include/net/irda/irlmp.h +++ b/include/net/irda/irlmp.h @@ -274,7 +274,7 @@ static inline int irlmp_lap_tx_queue_full(struct lsap_cb *self) if (self->lap->irlap == NULL) return 0; - return(IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD); + return IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD; } /* After doing a irlmp_dup(), this get one of the two socket back into diff --git a/include/net/irda/irttp.h b/include/net/irda/irttp.h index 11aee7a2972a..af4b87721d13 100644 --- a/include/net/irda/irttp.h +++ b/include/net/irda/irttp.h @@ -204,7 +204,7 @@ static inline int irttp_is_primary(struct tsap_cb *self) (self->lsap->lap == NULL) || (self->lsap->lap->irlap == NULL)) return -2; - return(irlap_is_primary(self->lsap->lap->irlap)); + return irlap_is_primary(self->lsap->lap->irlap); } #endif /* IRTTP_H */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 3c8728aaab4e..eda8808fdacd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -601,7 +601,7 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen) slot = 0; slot >>= rtab->rate.cell_log; if (slot > 255) - return (rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF]); + return rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF]; return rtab->data[slot]; } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 2cb3980b1616..505845ddb0be 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -405,7 +405,7 @@ static inline void sctp_v6_del_protocol(void) { return; } /* Map an association to an assoc_id. */ static inline sctp_assoc_t sctp_assoc2id(const struct sctp_association *asoc) { - return (asoc?asoc->assoc_id:0); + return asoc ? asoc->assoc_id : 0; } /* Look up the association by its id. */ @@ -473,7 +473,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) /* Tests if the list has one and only one entry. */ static inline int sctp_list_single_entry(struct list_head *head) { - return ((head->next != head) && (head->next == head->prev)); + return (head->next != head) && (head->next == head->prev); } /* Generate a random jitter in the range of -50% ~ +50% of input RTO. */ @@ -631,13 +631,13 @@ static inline int sctp_sanity_check(void) /* This is the hash function for the SCTP port hash table. */ static inline int sctp_phashfn(__u16 lport) { - return (lport & (sctp_port_hashsize - 1)); + return lport & (sctp_port_hashsize - 1); } /* This is the hash function for the endpoint hash table. */ static inline int sctp_ep_hashfn(__u16 lport) { - return (lport & (sctp_ep_hashsize - 1)); + return lport & (sctp_ep_hashsize - 1); } /* This is the hash function for the association hash table. */ @@ -645,7 +645,7 @@ static inline int sctp_assoc_hashfn(__u16 lport, __u16 rport) { int h = (lport << 16) + rport; h ^= h>>8; - return (h & (sctp_assoc_hashsize - 1)); + return h & (sctp_assoc_hashsize - 1); } /* This is the hash function for the association hash table. This is @@ -656,7 +656,7 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag) { int h = (lport << 16) + rport; h ^= vtag; - return (h & (sctp_assoc_hashsize-1)); + return h & (sctp_assoc_hashsize - 1); } #define sctp_for_each_hentry(epb, node, head) \ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 4088c89a9055..9352d12f02de 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -345,12 +345,12 @@ enum { static inline int TSN_lt(__u32 s, __u32 t) { - return (((s) - (t)) & TSN_SIGN_BIT); + return ((s) - (t)) & TSN_SIGN_BIT; } static inline int TSN_lte(__u32 s, __u32 t) { - return (((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT)); + return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); } /* Compare two SSNs */ @@ -369,12 +369,12 @@ enum { static inline int SSN_lt(__u16 s, __u16 t) { - return (((s) - (t)) & SSN_SIGN_BIT); + return ((s) - (t)) & SSN_SIGN_BIT; } static inline int SSN_lte(__u16 s, __u16 t) { - return (((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT)); + return ((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT); } /* @@ -388,7 +388,7 @@ enum { static inline int ADDIP_SERIAL_gte(__u16 s, __u16 t) { - return (((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT)); + return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); } /* Check VTAG of the packet matches the sender's own tag. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f9e7473613bd..69fef4fb79c0 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -847,7 +847,7 @@ void sctp_packet_free(struct sctp_packet *); static inline int sctp_packet_empty(struct sctp_packet *packet) { - return (packet->size == packet->overhead); + return packet->size == packet->overhead; } /* This represents a remote transport address. diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index 4aabc5a96cf6..e7728bc14ccf 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -157,7 +157,7 @@ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map); /* Is there a gap in the TSN map? */ static inline int sctp_tsnmap_has_gap(const struct sctp_tsnmap *map) { - return (map->cumulative_tsn_ack_point != map->max_tsn_seen); + return map->cumulative_tsn_ack_point != map->max_tsn_seen; } /* Mark a duplicate TSN. Note: limit the storage of duplicate TSN diff --git a/include/net/tipc/tipc_msg.h b/include/net/tipc/tipc_msg.h index 2e159a812f83..ffe50b4e7b93 100644 --- a/include/net/tipc/tipc_msg.h +++ b/include/net/tipc/tipc_msg.h @@ -107,7 +107,7 @@ static inline u32 msg_hdr_sz(struct tipc_msg *m) static inline int msg_short(struct tipc_msg *m) { - return (msg_hdr_sz(m) == 24); + return msg_hdr_sz(m) == 24; } static inline u32 msg_size(struct tipc_msg *m) @@ -117,7 +117,7 @@ static inline u32 msg_size(struct tipc_msg *m) static inline u32 msg_data_sz(struct tipc_msg *m) { - return (msg_size(m) - msg_hdr_sz(m)); + return msg_size(m) - msg_hdr_sz(m); } static inline unchar *msg_data(struct tipc_msg *m) @@ -132,17 +132,17 @@ static inline u32 msg_type(struct tipc_msg *m) static inline u32 msg_named(struct tipc_msg *m) { - return (msg_type(m) == TIPC_NAMED_MSG); + return msg_type(m) == TIPC_NAMED_MSG; } static inline u32 msg_mcast(struct tipc_msg *m) { - return (msg_type(m) == TIPC_MCAST_MSG); + return msg_type(m) == TIPC_MCAST_MSG; } static inline u32 msg_connected(struct tipc_msg *m) { - return (msg_type(m) == TIPC_CONN_MSG); + return msg_type(m) == TIPC_CONN_MSG; } static inline u32 msg_errcode(struct tipc_msg *m) -- cgit v1.2.3 From eb7d3066cf864342e8ae6a5c1126a1602c4d06c0 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Tue, 21 Sep 2010 21:36:18 +0200 Subject: mac80211: clear txflags for ps-filtered frames This patch fixes stale mac80211_tx_control_flags for filtered / retried frames. Because ieee80211_handle_filtered_frame feeds skbs back into the tx path, they have to be stripped of some tx flags so they won't confuse the stack, driver or device. Cc: Acked-by: Johannes Berg Signed-off-by: Christian Lamparter Signed-off-by: John W. Linville --- include/net/mac80211.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 12a49f0ba32c..5d1187d7c5e5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -321,6 +321,9 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_CTL_LDPC: tells the driver to use LDPC for this frame * @IEEE80211_TX_CTL_STBC: Enables Space-Time Block Coding (STBC) for this * frame and selects the maximum number of streams that it can use. + * + * Note: If you have to add new flags to the enumeration, then don't + * forget to update %IEEE80211_TX_TEMPORARY_FLAGS when necessary. */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), @@ -350,6 +353,19 @@ enum mac80211_tx_control_flags { #define IEEE80211_TX_CTL_STBC_SHIFT 23 +/* + * This definition is used as a mask to clear all temporary flags, which are + * set by the tx handlers for each transmission attempt by the mac80211 stack. + */ +#define IEEE80211_TX_TEMPORARY_FLAGS (IEEE80211_TX_CTL_NO_ACK | \ + IEEE80211_TX_CTL_CLEAR_PS_FILT | IEEE80211_TX_CTL_FIRST_FRAGMENT | \ + IEEE80211_TX_CTL_SEND_AFTER_DTIM | IEEE80211_TX_CTL_AMPDU | \ + IEEE80211_TX_STAT_TX_FILTERED | IEEE80211_TX_STAT_ACK | \ + IEEE80211_TX_STAT_AMPDU | IEEE80211_TX_STAT_AMPDU_NO_BACK | \ + IEEE80211_TX_CTL_RATE_CTRL_PROBE | IEEE80211_TX_CTL_PSPOLL_RESPONSE | \ + IEEE80211_TX_CTL_MORE_FRAMES | IEEE80211_TX_CTL_LDPC | \ + IEEE80211_TX_CTL_STBC) + /** * enum mac80211_rate_control_flags - per-rate flags set by the * Rate Control algorithm. -- cgit v1.2.3 From 693019e90ca45d881109d32c0c6d29adf03f6447 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 23 Sep 2010 11:19:54 +0000 Subject: net: reset skb queue mapping when rx'ing over tunnel Reset queue mapping when an skb is reentering the stack via a tunnel. On second pass, the queue mapping from the original device is no longer valid. Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 81d1413a8701..02386505033d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -242,6 +242,7 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) dev->stats.rx_packets++; dev->stats.rx_bytes += skb->len; skb->rxhash = 0; + skb_set_queue_mapping(skb, 0); skb_dst_drop(skb); nf_reset(skb); } -- cgit v1.2.3 From 7a91b434e2bad554b709265db7603b1aa52dd92e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 26 Sep 2010 18:53:07 -0700 Subject: net: update SOCK_MIN_RCVBUF SOCK_MIN_RCVBUF current value is 256 bytes It doesnt permit to receive the smallest possible frame, considering socket sk_rmem_alloc/sk_rcvbuf account skb truesizes. On 64bit arches, sizeof(struct sk_buff) is 240 bytes. Add the typical 64 bytes of headroom, and we go over the limit. With old kernels and 32bit arches, we were under the limit, if netdriver was doing copybreak. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 8ae97c4970df..73a4f9702a65 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1558,7 +1558,11 @@ static inline void sk_wake_async(struct sock *sk, int how, int band) } #define SOCK_MIN_SNDBUF 2048 -#define SOCK_MIN_RCVBUF 256 +/* + * Since sk_rmem_alloc sums skb->truesize, even a small frame might need + * sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak + */ +#define SOCK_MIN_RCVBUF (2048 + sizeof(struct sk_buff)) static inline void sk_stream_moderate_sndbuf(struct sock *sk) { -- cgit v1.2.3 From 2cc6d2bf3d6195fabcf0febc192c01f99519a8f3 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 24 Sep 2010 09:55:52 +0000 Subject: ipv6: add a missing unregister_pernet_subsys call Clean up a missing exit path in the ipv6 module init routines. In addrconf_init we call ipv6_addr_label_init which calls register_pernet_subsys for the ipv6_addr_label_ops structure. But if module loading fails, or if the ipv6 module is removed, there is no corresponding unregister_pernet_subsys call, which leaves a now-bogus address on the pernet_list, leading to oopses in subsequent registrations. This patch cleans up both the failed load path and the unload path. Tested by myself with good results. Signed-off-by: Neil Horman include/net/addrconf.h | 1 + net/ipv6/addrconf.c | 11 ++++++++--- net/ipv6/addrlabel.c | 5 +++++ 3 files changed, 14 insertions(+), 3 deletions(-) Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 45375b41a2a0..4d40c4d0230b 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -121,6 +121,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout) * IPv6 Address Label subsystem (addrlabel.c) */ extern int ipv6_addr_label_init(void); +extern void ipv6_addr_label_cleanup(void); extern void ipv6_addr_label_rtnl_register(void); extern u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, -- cgit v1.2.3 From 686b9cb994f5f74be790df4cd12873dfdc8a6984 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 23 Sep 2010 09:44:36 -0700 Subject: mac80211/ath9k: Support AMPDU with multiple VIFs. The old ieee80211_find_sta_by_hw method didn't properly find VIFS when there was more than one per AP. This caused AMPDU logic in ath9k to get the wrong VIF when trying to account for transmitted SKBs. This patch changes ieee80211_find_sta_by_hw to take a localaddr argument to distinguish between VIFs with the same AP but different local addresses. The method name is changed to ieee80211_find_sta_by_ifaddr. Signed-off-by: Ben Greear Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5d1187d7c5e5..73469d8b64bb 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2432,25 +2432,28 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, const u8 *addr); /** - * ieee80211_find_sta_by_hw - find a station on hardware + * ieee80211_find_sta_by_ifaddr - find a station on hardware * * @hw: pointer as obtained from ieee80211_alloc_hw() - * @addr: station's address + * @addr: remote station's address + * @localaddr: local address (vif->sdata->vif.addr). Use NULL for 'any'. * * This function must be called under RCU lock and the * resulting pointer is only valid under RCU lock as well. * - * NOTE: This function should not be used! When mac80211 is converted - * internally to properly keep track of stations on multiple - * virtual interfaces, it will not always know which station to - * return here since a single address might be used by multiple - * logical stations (e.g. consider a station connecting to another - * BSSID on the same AP hardware without disconnecting first). + * NOTE: You may pass NULL for localaddr, but then you will just get + * the first STA that matches the remote address 'addr'. + * We can have multiple STA associated with multiple + * logical stations (e.g. consider a station connecting to another + * BSSID on the same AP hardware without disconnecting first). + * In this case, the result of this method with localaddr NULL + * is not reliable. * - * DO NOT USE THIS FUNCTION. + * DO NOT USE THIS FUNCTION with localaddr NULL if at all possible. */ -struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, - const u8 *addr); +struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, + const u8 *addr, + const u8 *localaddr); /** * ieee80211_sta_block_awake - block station from waking up -- cgit v1.2.3 From 554891e63a29af35cc6bb403ef34e319518114d0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Sep 2010 12:38:25 +0200 Subject: mac80211: move packet flags into packet commit 8c0c709eea5cbab97fb464cd68b06f24acc58ee1 Author: Johannes Berg Date: Wed Nov 25 17:46:15 2009 +0100 mac80211: move cmntr flag out of rx flags moved the CMNTR flag into the skb RX flags for some aggregation cleanups, but this was wrong since the optimisation this flag tried to make requires that it is kept across the processing of multiple interfaces -- which isn't true for flags in the skb. The patch not only broke the optimisation, it also introduced a bug: under some (common!) circumstances the flag will be set on an already freed skb! However, investigating this in more detail, I found that most of the flags that we set should be per packet, _except_ for this one, due to a-MPDU processing. Additionally, the flags used for processing (currently just this one) need to be reset before processing a new packet. Since we haven't actually seen bugs reported as a result of the wrong flags handling (which is not too surprising -- the only real bug case I can come up with is an a-MSDU contained in an a-MPDU), I'll make a different fix for rc. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 73469d8b64bb..fe8b9dae4dee 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -581,9 +581,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_HT: HT MCS was used and rate_idx is MCS index * @RX_FLAG_40MHZ: HT40 (40 MHz) was used * @RX_FLAG_SHORT_GI: Short guard interval was used - * @RX_FLAG_INTERNAL_CMTR: set internally after frame was reported - * on cooked monitor to avoid double-reporting it for multiple - * virtual interfaces */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = 1<<0, @@ -597,7 +594,6 @@ enum mac80211_rx_flags { RX_FLAG_HT = 1<<9, RX_FLAG_40MHZ = 1<<10, RX_FLAG_SHORT_GI = 1<<11, - RX_FLAG_INTERNAL_CMTR = 1<<12, }; /** @@ -618,6 +614,7 @@ enum mac80211_rx_flags { * @rate_idx: index of data rate into band's supported rates or MCS index if * HT rates are use (RX_FLAG_HT) * @flag: %RX_FLAG_* + * @rx_flags: internal RX flags for mac80211 */ struct ieee80211_rx_status { u64 mactime; @@ -627,6 +624,7 @@ struct ieee80211_rx_status { int antenna; int rate_idx; int flag; + unsigned int rx_flags; }; /** -- cgit v1.2.3 From fb0c5f0bc8b69b40549449ee7fc65f3706f12062 Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Mon, 27 Sep 2010 03:31:00 +0000 Subject: tproxy: check for transparent flag in ip_route_newports as done in ip_route_connect() Signed-off-by: Ulrich Weber Signed-off-by: David S. Miller --- include/net/route.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index bd732d62e1c3..7e5e73bfa4de 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -199,6 +199,8 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol, fl.fl_ip_sport = sport; fl.fl_ip_dport = dport; fl.proto = protocol; + if (inet_sk(sk)->transparent) + fl.flags |= FLOWI_FLAG_ANYSRC; ip_rt_put(*rp); *rp = NULL; security_sk_classify_flow(sk, &fl); -- cgit v1.2.3 From 8d98efa84b790bdd62248eb0dfff17e9baf5c844 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Sun, 26 Sep 2010 19:07:59 +0000 Subject: Phonet: Implement Pipe Controller to support Nokia Slim Modems Phonet stack assumes the presence of Pipe Controller, either in Modem or on Application Processing Engine user-space for the Pipe data. Nokia Slim Modems like WG2.5 used in ST-Ericsson U8500 platform do not implement Pipe controller in them. This patch adds Pipe Controller implemenation to Phonet stack to support Pipe data over Phonet stack for Nokia Slim Modems. Signed-off-by: Kumar Sanghvi Acked-by: Linus Walleij Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/net') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 37f23dc05de8..def6cfa3f451 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -45,6 +45,10 @@ struct pep_sock { u8 tx_fc; /* TX flow control */ u8 init_enable; /* auto-enable at creation */ u8 aligned; +#ifdef CONFIG_PHONET_PIPECTRLR + u16 remote_pep; + u8 pipe_state; +#endif }; static inline struct pep_sock *pep_sk(struct sock *sk) @@ -165,4 +169,21 @@ enum { PEP_IND_READY, }; +#ifdef CONFIG_PHONET_PIPECTRLR +#define PNS_PEP_CONNECT_UTID 0x02 +#define PNS_PIPE_CREATED_IND_UTID 0x04 +#define PNS_PIPE_ENABLE_UTID 0x0A +#define PNS_PIPE_ENABLED_IND_UTID 0x0C +#define PNS_PIPE_DISABLE_UTID 0x0F +#define PNS_PIPE_DISABLED_IND_UTID 0x11 +#define PNS_PEP_DISCONNECT_UTID 0x06 + +/* Used for tracking state of a pipe */ +enum { + PIPE_IDLE, + PIPE_DISABLED, + PIPE_ENABLED, +}; +#endif /* CONFIG_PHONET_PIPECTRLR */ + #endif -- cgit v1.2.3 From 290b895e0ba4552dfcfc4bd35759c192345b934a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Sep 2010 00:33:35 +0000 Subject: tunnels: prepare percpu accounting Tunnels are going to use percpu for their accounting. They are going to use a new tstats field in net_device. skb_tunnel_rx() is changed to be a wrapper around __skb_tunnel_rx() IPTUNNEL_XMIT() is changed to be a wrapper around __IPTUNNEL_XMIT() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 24 +++++++++++++++++++----- include/net/ipip.h | 12 +++++++----- 2 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 02386505033d..aa53fbc34b2b 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -227,6 +227,23 @@ static inline void skb_dst_force(struct sk_buff *skb) } +/** + * __skb_tunnel_rx - prepare skb for rx reinsert + * @skb: buffer + * @dev: tunnel device + * + * After decapsulation, packet is going to re-enter (netif_rx()) our stack, + * so make some cleanups. (no accounting done) + */ +static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) +{ + skb->dev = dev; + skb->rxhash = 0; + skb_set_queue_mapping(skb, 0); + skb_dst_drop(skb); + nf_reset(skb); +} + /** * skb_tunnel_rx - prepare skb for rx reinsert * @skb: buffer @@ -234,17 +251,14 @@ static inline void skb_dst_force(struct sk_buff *skb) * * After decapsulation, packet is going to re-enter (netif_rx()) our stack, * so make some cleanups, and perform accounting. + * Note: this accounting is not SMP safe. */ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) { - skb->dev = dev; /* TODO : stats should be SMP safe */ dev->stats.rx_packets++; dev->stats.rx_bytes += skb->len; - skb->rxhash = 0; - skb_set_queue_mapping(skb, 0); - skb_dst_drop(skb); - nf_reset(skb); + __skb_tunnel_rx(skb, dev); } /* Children define the path of the packet through the diff --git a/include/net/ipip.h b/include/net/ipip.h index 65caea8b414f..58abbf966b0c 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -45,7 +45,7 @@ struct ip_tunnel_prl_entry { struct rcu_head rcu_head; }; -#define IPTUNNEL_XMIT() do { \ +#define __IPTUNNEL_XMIT(stats1, stats2) do { \ int err; \ int pkt_len = skb->len - skb_transport_offset(skb); \ \ @@ -54,12 +54,14 @@ struct ip_tunnel_prl_entry { \ err = ip_local_out(skb); \ if (likely(net_xmit_eval(err) == 0)) { \ - txq->tx_bytes += pkt_len; \ - txq->tx_packets++; \ + (stats1)->tx_bytes += pkt_len; \ + (stats1)->tx_packets++; \ } else { \ - stats->tx_errors++; \ - stats->tx_aborted_errors++; \ + (stats2)->tx_errors++; \ + (stats2)->tx_aborted_errors++; \ } \ } while (0) +#define IPTUNNEL_XMIT() __IPTUNNEL_XMIT(txq, stats) + #endif -- cgit v1.2.3 From bc01befdcf3e40979eb518085a075cbf0aacede0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Sep 2010 21:06:34 +0200 Subject: netfilter: ctnetlink: add support for user-space expectation helpers This patch adds the basic infrastructure to support user-space expectation helpers via ctnetlink and the netfilter queuing infrastructure NFQUEUE. Basically, this patch: * adds NF_CT_EXPECT_USERSPACE flag to identify user-space created expectations. I have also added a sanity check in __nf_ct_expect_check() to avoid that kernel-space helpers may create an expectation if the master conntrack has no helper assigned. * adds some branches to check if the master conntrack helper exists, otherwise we skip the code that refers to kernel-space helper such as the local expectation list and the expectation policy. * allows to set the timeout for user-space expectations with no helper assigned. * a list of expectations created from user-space that depends on ctnetlink (if this module is removed, they are deleted). * includes USERSPACE in the /proc output for expectations that have been created by a user-space helper. This patch also modifies ctnetlink to skip including the helper name in the Netlink messages if no kernel-space helper is set (since no user-space expectation has not kernel-space kernel assigned). You can access an example user-space FTP conntrack helper at: http://people.netfilter.org/pablo/userspace-conntrack-helpers/nf-ftp-helper-userspace-POC.tar.bz Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 96bb42af5fae..416b83844485 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -85,6 +85,7 @@ nf_ct_find_expectation(struct net *net, u16 zone, void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); +void nf_ct_remove_userspace_expectations(void); /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ -- cgit v1.2.3 From 4465b469008bc03b98a1b8df4e9ae501b6c69d4b Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 23 May 2010 19:54:12 +0000 Subject: ipv4: Allow configuring subnets as local addresses This patch allows a host to be configured to respond to any address in a specified range as if it were local, without actually needing to configure the address on an interface. This is done through routing table configuration. For instance, to configure a host to respond to any address in 10.1/16 received on eth0 as a local address we can do: ip rule add from all iif eth0 lookup 200 ip route add local 10.1/16 dev lo proto kernel scope host src 127.0.0.1 table 200 This host is now reachable by any 10.1/16 address (route lookup on input for packets received on eth0 can find the route). On output, the rule will not be matched so that this host can still send packets to 10.1/16 (not sent on loopback). Presumably, external routing can be configured to make sense out of this. To make this work, we needed to modify the logic in finding the interface which is assigned a given source address for output (dev_ip_find). We perform a normal fib_lookup instead of just a lookup on the local table, and in the lookup we ignore the input interface for matching. This patch is useful to implement IP-anycast for subnets of virtual addresses. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index bb08692a20b0..0ac3fb5e0973 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -49,6 +49,7 @@ struct flowi { __u8 proto; __u8 flags; #define FLOWI_FLAG_ANYSRC 0x01 +#define FLOWI_FLAG_MATCH_ANY_IIF 0x02 union { struct { __be16 sport; -- cgit v1.2.3 From a64de47c091e4a337fa9763315cb6f2fbf0c583b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 28 Sep 2010 17:08:02 +0000 Subject: arp: remove unnecessary export of arp_broken_ops arp_broken_ops is only used in arp.c Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/arp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/arp.h b/include/net/arp.h index 716f43c5c98e..f4cf6ce66586 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -26,6 +26,4 @@ extern struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, const unsigned char *target_hw); extern void arp_xmit(struct sk_buff *skb); -extern const struct neigh_ops arp_broken_ops; - #endif /* _ARP_H */ -- cgit v1.2.3 From 1b9f409293529da4630bfc5d6d8e7d7451a6ccb5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 28 Sep 2010 19:30:14 +0000 Subject: tcp: tcp_enter_quickack_mode can be static Function only used in tcp_input.c Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 914a60c7ad62..4fee0424af7e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -346,8 +346,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, } } -extern void tcp_enter_quickack_mode(struct sock *sk); - #define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 -- cgit v1.2.3 From e454c844644683571617896ab2a4ce0109c1943e Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 21 Sep 2010 16:31:11 -0300 Subject: Bluetooth: Fix deadlock in the ERTM logic The Enhanced Retransmission Mode(ERTM) is a realiable mode of operation of the Bluetooth L2CAP layer. Think on it like a simplified version of TCP. The problem we were facing here was a deadlock. ERTM uses a backlog queue to queue incomimg packets while the user is helding the lock. At some moment the sk_sndbuf can be exceeded and we can't alloc new skbs then the code sleep with the lock to wait for memory, that stalls the ERTM connection once we can't read the acknowledgements packets in the backlog queue to free memory and make the allocation of outcoming skb successful. This patch actually affect all users of bt_skb_send_alloc(), i.e., all L2CAP modes and SCO. We are safe against socket states changes or channels deletion while the we are sleeping wait memory. Checking for the sk->sk_err and sk->sk_shutdown make the code safe, since any action that can leave the socket or the channel in a not usable state set one of the struct members at least. Then we can check both of them when getting the lock again and return with the proper error if something unexpected happens. Signed-off-by: Gustavo F. Padovan Signed-off-by: Ulisses Furquim --- include/net/bluetooth/bluetooth.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 27a902d9b3a9..30fce0128dd7 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -161,12 +161,30 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, unsigned long l { struct sk_buff *skb; + release_sock(sk); if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) { skb_reserve(skb, BT_SKB_RESERVE); bt_cb(skb)->incoming = 0; } + lock_sock(sk); + + if (!skb && *err) + return NULL; + + *err = sock_error(sk); + if (*err) + goto out; + + if (sk->sk_shutdown) { + *err = -ECONNRESET; + goto out; + } return skb; + +out: + kfree_skb(skb); + return NULL; } int bt_err(__u16 code); -- cgit v1.2.3 From 367e5e376922dcf52f92e1db436010fb828d3bfa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Sep 2010 05:36:29 +0000 Subject: neigh: reorder fields in struct neighbour On 64bit arches, there are two 32bit holes that we can remove. sizeof(struct neighbour) shrinks from 0xf8 to 0xf0 bytes Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 242879b6c4df..7d08fd1062f0 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -94,7 +94,7 @@ struct neighbour { struct neighbour *next; struct neigh_table *tbl; struct neigh_parms *parms; - struct net_device *dev; + struct net_device *dev; unsigned long used; unsigned long confirmed; unsigned long updated; @@ -102,11 +102,11 @@ struct neighbour { __u8 nud_state; __u8 type; __u8 dead; + atomic_t refcnt; atomic_t probes; rwlock_t lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; - atomic_t refcnt; int (*output)(struct sk_buff *skb); struct sk_buff_head arp_queue; struct timer_list timer; @@ -163,7 +163,7 @@ struct neigh_table { atomic_t entries; rwlock_t lock; unsigned long last_rand; - struct kmem_cache *kmem_cachep; + struct kmem_cache *kmem_cachep; struct neigh_statistics __percpu *stats; struct neighbour **hash_buckets; unsigned int hash_mask; -- cgit v1.2.3 From c7d4426a98a5f6654cd0b4b33d9dab2e77192c18 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 Oct 2010 22:17:54 -0700 Subject: net: introduce DST_NOCACHE flag While doing stress tests with IP route cache disabled, and multi queue devices, I noticed a very high contention on one rwlock used in neighbour code. When many cpus are trying to send frames (possibly using a high performance multiqueue device) to the same neighbour, they fight for the neigh->lock rwlock in order to call neigh_hh_init(), and fight on hh->hh_refcnt (a pair of atomic_inc/atomic_dec_and_test()) But we dont need to call neigh_hh_init() for dst that are used only once. It costs four atomic operations at least, on two contended cache lines, plus the high contention on neigh->lock rwlock. Introduce a new dst flag, DST_NOCACHE, that is set when dst was not inserted in route cache. With the stress test bench, sending 160000000 frames on one neighbour, results are : Before patch: real 2m28.406s user 0m11.781s sys 36m17.964s After patch: real 1m26.532s user 0m12.185s sys 20m3.903s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index aa53fbc34b2b..a217c838ec0d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -43,10 +43,11 @@ struct dst_entry { short error; short obsolete; int flags; -#define DST_HOST 1 -#define DST_NOXFRM 2 -#define DST_NOPOLICY 4 -#define DST_NOHASH 8 +#define DST_HOST 0x0001 +#define DST_NOXFRM 0x0002 +#define DST_NOPOLICY 0x0004 +#define DST_NOHASH 0x0008 +#define DST_NOCACHE 0x0010 unsigned long expires; unsigned short header_len; /* more space at head required */ -- cgit v1.2.3 From f11017ec2d1859c661f4e2b12c4a8d250e1f47cf Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:52 +0900 Subject: IPVS: Add struct ip_vs_conn_param Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/net/ip_vs.h | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3915a4f4cd30..d4da774eca75 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -357,6 +357,15 @@ struct ip_vs_protocol { extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto); +struct ip_vs_conn_param { + const union nf_inet_addr *caddr; + const union nf_inet_addr *vaddr; + __be16 cport; + __be16 vport; + __u16 protocol; + u16 af; +}; + /* * IP_VS structure allocated for each dynamically scheduled connection */ @@ -626,13 +635,23 @@ enum { IP_VS_DIR_LAST, }; -extern struct ip_vs_conn *ip_vs_conn_in_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port); +static inline void ip_vs_conn_fill_param(int af, int protocol, + const union nf_inet_addr *caddr, + __be16 cport, + const union nf_inet_addr *vaddr, + __be16 vport, + struct ip_vs_conn_param *p) +{ + p->af = af; + p->protocol = protocol; + p->caddr = caddr; + p->cport = cport; + p->vaddr = vaddr; + p->vport = vport; +} -extern struct ip_vs_conn *ip_vs_ct_in_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port); +struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); +struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, @@ -640,9 +659,7 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, unsigned int proto_off, int inverse); -extern struct ip_vs_conn *ip_vs_conn_out_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port); +struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, @@ -658,11 +675,10 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) extern void ip_vs_conn_put(struct ip_vs_conn *cp); extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); -extern struct ip_vs_conn * -ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, - const union nf_inet_addr *vaddr, __be16 vport, - const union nf_inet_addr *daddr, __be16 dport, unsigned flags, - struct ip_vs_dest *dest); +struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, + const union nf_inet_addr *daddr, + __be16 dport, unsigned flags, + struct ip_vs_dest *dest); extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); extern const char * ip_vs_state_name(__u16 proto, int state); -- cgit v1.2.3 From 85999283a21ab2dd37427fdd8c8e8af57223977c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:53 +0900 Subject: IPVS: Add struct ip_vs_pe Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/net/ip_vs.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d4da774eca75..b6b309d05e4e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -364,6 +364,10 @@ struct ip_vs_conn_param { __be16 vport; __u16 protocol; u16 af; + + const struct ip_vs_pe *pe; + char *pe_data; + __u8 pe_data_len; }; /* @@ -416,6 +420,9 @@ struct ip_vs_conn { void *app_data; /* Application private data */ struct ip_vs_seq in_seq; /* incoming seq. struct */ struct ip_vs_seq out_seq; /* outgoing seq. struct */ + + char *pe_data; + __u8 pe_data_len; }; @@ -486,6 +493,9 @@ struct ip_vs_service { struct ip_vs_scheduler *scheduler; /* bound scheduler object */ rwlock_t sched_lock; /* lock sched_data */ void *sched_data; /* scheduler application data */ + + /* alternate persistence engine */ + struct ip_vs_pe *pe; }; @@ -549,6 +559,20 @@ struct ip_vs_scheduler { const struct sk_buff *skb); }; +/* The persistence engine object */ +struct ip_vs_pe { + struct list_head n_list; /* d-linked list head */ + char *name; /* scheduler name */ + atomic_t refcnt; /* reference counter */ + struct module *module; /* THIS_MODULE/NULL */ + + /* get the connection template, if any */ + int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb); + bool (*ct_match)(const struct ip_vs_conn_param *p, + struct ip_vs_conn *ct); + u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, + bool inverse); +}; /* * The application module object (a.k.a. app incarnation) @@ -648,6 +672,8 @@ static inline void ip_vs_conn_fill_param(int af, int protocol, p->cport = cport; p->vaddr = vaddr; p->vport = vport; + p->pe = NULL; + p->pe_data = NULL; } struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); @@ -803,7 +829,7 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * -ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb); +ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb); extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp); -- cgit v1.2.3 From a3c918acd29a96aba3b46bf50136e7953a480d17 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:53 +0900 Subject: IPVS: Add persistence engine data to /proc/net/ip_vs_conn This shouldn't break compatibility with userspace as the new data is at the end of the line. I have confirmed that this doesn't break ipvsadm, the main (only?) user-space user of this data. Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/net/ip_vs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index b6b309d05e4e..974daf52ba76 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -572,6 +572,7 @@ struct ip_vs_pe { struct ip_vs_conn *ct); u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, bool inverse); + int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); }; /* -- cgit v1.2.3 From 8be67a6617b3403551fccb67b1c624c659419515 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:54 +0900 Subject: IPVS: management of persistence engine modules This is based heavily on the scheduler management code Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/net/ip_vs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 974daf52ba76..a6b93a26d4e2 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -796,6 +796,12 @@ extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_app_init(void); extern void ip_vs_app_cleanup(void); +void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe); +void ip_vs_unbind_pe(struct ip_vs_service *svc); +int register_ip_vs_pe(struct ip_vs_pe *pe); +int unregister_ip_vs_pe(struct ip_vs_pe *pe); +extern struct ip_vs_pe *ip_vs_pe_get(const char *name); +extern void ip_vs_pe_put(struct ip_vs_pe *pe); /* * IPVS protocol functions (from ip_vs_proto.c) -- cgit v1.2.3 From 0d1e71b04a04b6912e50926b9987c1e72facb1f3 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:54 +0900 Subject: IPVS: Allow configuration of persistence engines Allow the persistence engine of a virtual service to be set, edited and unset. This feature only works with the netlink user-space interface. Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/net/ip_vs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a6b93a26d4e2..52fbe2308c38 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -444,6 +444,7 @@ struct ip_vs_service_user_kern { /* virtual service options */ char *sched_name; + char *pe_name; unsigned flags; /* virtual service flags */ unsigned timeout; /* persistent timeout in sec */ u32 netmask; /* persistent netmask */ -- cgit v1.2.3 From 0c200d935346fe0ebde9b6dffbb683dddd166fb9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 4 Oct 2010 20:53:18 +0200 Subject: netfilter: nf_nat: make find/put static The functions nf_nat_proto_find_get and nf_nat_proto_put are only used internally in nf_nat_core. This might break some out of tree NAT module. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_nat_protocol.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h index df17bac46bf5..93cc90d28e66 100644 --- a/include/net/netfilter/nf_nat_protocol.h +++ b/include/net/netfilter/nf_nat_protocol.h @@ -45,9 +45,6 @@ struct nf_nat_protocol { extern int nf_nat_protocol_register(const struct nf_nat_protocol *proto); extern void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto); -extern const struct nf_nat_protocol *nf_nat_proto_find_get(u_int8_t protocol); -extern void nf_nat_proto_put(const struct nf_nat_protocol *proto); - /* Built-in protocols. */ extern const struct nf_nat_protocol nf_nat_protocol_tcp; extern const struct nf_nat_protocol nf_nat_protocol_udp; -- cgit v1.2.3 From eecc545856c8a0f27783a440d25f4ceaa1f95ce8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 4 Oct 2010 23:24:21 +0200 Subject: netfilter: add missing xt_log.h file Forgot to add xt_log.h in commit a8defca0 (netfilter: ipt_LOG: add bufferisation to call printk() once) Signed-off-by: Patrick McHardy --- include/net/netfilter/xt_log.h | 54 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 include/net/netfilter/xt_log.h (limited to 'include/net') diff --git a/include/net/netfilter/xt_log.h b/include/net/netfilter/xt_log.h new file mode 100644 index 000000000000..0dfb34a5b53c --- /dev/null +++ b/include/net/netfilter/xt_log.h @@ -0,0 +1,54 @@ +#define S_SIZE (1024 - (sizeof(unsigned int) + 1)) + +struct sbuff { + unsigned int count; + char buf[S_SIZE + 1]; +}; +static struct sbuff emergency, *emergency_ptr = &emergency; + +static int sb_add(struct sbuff *m, const char *f, ...) +{ + va_list args; + int len; + + if (likely(m->count < S_SIZE)) { + va_start(args, f); + len = vsnprintf(m->buf + m->count, S_SIZE - m->count, f, args); + va_end(args); + if (likely(m->count + len < S_SIZE)) { + m->count += len; + return 0; + } + } + m->count = S_SIZE; + printk_once(KERN_ERR KBUILD_MODNAME " please increase S_SIZE\n"); + return -1; +} + +static struct sbuff *sb_open(void) +{ + struct sbuff *m = kmalloc(sizeof(*m), GFP_ATOMIC); + + if (unlikely(!m)) { + local_bh_disable(); + do { + m = xchg(&emergency_ptr, NULL); + } while (!m); + } + m->count = 0; + return m; +} + +static void sb_close(struct sbuff *m) +{ + m->buf[m->count] = 0; + printk("%s\n", m->buf); + + if (likely(m != &emergency)) + kfree(m); + else { + xchg(&emergency_ptr, m); + local_bh_enable(); + } +} + -- cgit v1.2.3 From 1df9916e46451533463f227e6be57cc2cfca4c5f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 4 Oct 2010 20:14:17 +0000 Subject: fib: fib_rules_cleanup can be static fib_rules_cleanup_ups is only defined and used in one place. Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/fib_rules.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index e8923bc20f9f..ac2fd002812e 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -106,7 +106,6 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) extern struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); extern void fib_rules_unregister(struct fib_rules_ops *); -extern void fib_rules_cleanup_ops(struct fib_rules_ops *); extern int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, -- cgit v1.2.3 From c61393ea83573ff422af505b6fd49ef9ec9b91ca Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 4 Oct 2010 20:17:53 +0000 Subject: ipv6: make __ipv6_isatap_ifid static Another exported symbol only used in one file Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 958d2749b7a9..a9441249306c 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -276,8 +276,6 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0; } -extern int __ipv6_isatap_ifid(u8 *eui, __be32 addr); - static inline int ipv6_addr_is_isatap(const struct in6_addr *addr) { return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); -- cgit v1.2.3 From 17e5a8082894a4b66cb69e7ec16074f0f01281e1 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 29 Sep 2010 17:15:30 +0200 Subject: nl80211: allow drivers to indicate whether the survey data channel is in use Some user space applications only want to display survey data for the operating channel, however there is no API to get that yet. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a0613ff62c97..ecc0403b918a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -293,12 +293,14 @@ struct key_params { * enum survey_info_flags - survey information flags * * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in + * @SURVEY_INFO_IN_USE: channel is currently being used * * Used by the driver to indicate which info in &struct survey_info * it has filled in during the get_survey(). */ enum survey_info_flags { SURVEY_INFO_NOISE_DBM = 1<<0, + SURVEY_INFO_IN_USE = 1<<1, }; /** -- cgit v1.2.3 From ea229e682633a18c1fa2c408400a6923cfc47910 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2010 21:21:25 +0200 Subject: cfg80211: remove spurious __KERNEL__ ifdef The net/cfg80211.h header file isn't exported to userspace, so there's no need for any kind of __KERNEL__ protection in it. If it was exported, everything else in it would need protection as well, not just the logging stuff ... Cc:Joe Perches Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ecc0403b918a..5f4d8acf7abb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2553,8 +2553,6 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); -#ifdef __KERNEL__ - /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ @@ -2601,6 +2599,4 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, #define wiphy_WARN(wiphy, format, args...) \ WARN(1, "wiphy: %s\n" format, wiphy_name(wiphy), ##args); -#endif - #endif /* __NET_CFG80211_H */ -- cgit v1.2.3 From 78be49ec2a0df34de9441930fdced20311fd709f Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Sat, 2 Oct 2010 11:31:55 +0200 Subject: mac80211: distinct between max rates and the number of rates the hw can report Some drivers cannot handle multiple retry rates specified by the rc algorithm but instead use their own retry table (for example rt2800). However, if such a device registers itself with a max_rates value of 1 the rc algorithm cannot make use of the extended information the device can provide about retried rates. On the other hand, if a device registers itself with a max_rates value > 1 the rc algorithm assumes that the device can handle multi rate retries. Fix this issue by introducing another hw parameter max_report_rates that can be set to a different value then max_rates to indicate if a device is capable of reporting more rates then specified in max_rates. Signed-off-by: Helmut Schaa Signed-off-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fe8b9dae4dee..47316a653ae1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1109,7 +1109,10 @@ enum ieee80211_hw_flags { * @sta_data_size: size (in bytes) of the drv_priv data area * within &struct ieee80211_sta. * - * @max_rates: maximum number of alternate rate retry stages + * @max_rates: maximum number of alternate rate retry stages the hw + * can handle. + * @max_report_rates: maximum number of alternate rate retry stages + * the hw can report back. * @max_rate_tries: maximum number of tries for each stage * * @napi_weight: weight used for NAPI polling. You must specify an @@ -1131,6 +1134,7 @@ struct ieee80211_hw { u16 max_listen_interval; s8 max_signal; u8 max_rates; + u8 max_report_rates; u8 max_rate_tries; }; -- cgit v1.2.3 From ff4c92d85c6f2777d2067f8552e7fefb4d1754ae Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 4 Oct 2010 21:14:03 +0200 Subject: genetlink: introduce pre_doit/post_doit hooks Each family may have some amount of boilerplate locking code that applies to most, or even all, commands. This allows a family to handle such things in a more generic way, by allowing it to a) include private flags in each operation b) specify a pre_doit hook that is called, before an operation's doit() callback and may return an error directly, c) specify a post_doit hook that can undo locking or similar things done by pre_doit, and finally d) include two private pointers in each info struct passed between all these operations including doit(). (It's two because I'll need two in nl80211 -- can be extended.) Signed-off-by: Johannes Berg Acked-by: David S. Miller Signed-off-by: John W. Linville --- include/net/genetlink.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index f7dcd2c70412..8a64b811a39a 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -20,6 +20,9 @@ struct genl_multicast_group { u32 id; }; +struct genl_ops; +struct genl_info; + /** * struct genl_family - generic netlink family * @id: protocol family idenfitier @@ -29,6 +32,10 @@ struct genl_multicast_group { * @maxattr: maximum number of attributes supported * @netnsok: set to true if the family can handle network * namespaces and should be presented in all of them + * @pre_doit: called before an operation's doit callback, it may + * do additional, common, filtering and return an error + * @post_doit: called after an operation's doit callback, it may + * undo operations done by pre_doit, for example release locks * @attrbuf: buffer to store parsed attributes * @ops_list: list of all assigned operations * @family_list: family list @@ -41,6 +48,12 @@ struct genl_family { unsigned int version; unsigned int maxattr; bool netnsok; + int (*pre_doit)(struct genl_ops *ops, + struct sk_buff *skb, + struct genl_info *info); + void (*post_doit)(struct genl_ops *ops, + struct sk_buff *skb, + struct genl_info *info); struct nlattr ** attrbuf; /* private */ struct list_head ops_list; /* private */ struct list_head family_list; /* private */ @@ -55,6 +68,8 @@ struct genl_family { * @genlhdr: generic netlink message header * @userhdr: user specific header * @attrs: netlink attributes + * @_net: network namespace + * @user_ptr: user pointers */ struct genl_info { u32 snd_seq; @@ -66,6 +81,7 @@ struct genl_info { #ifdef CONFIG_NET_NS struct net * _net; #endif + void * user_ptr[2]; }; static inline struct net *genl_info_net(struct genl_info *info) @@ -81,6 +97,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) /** * struct genl_ops - generic netlink operations * @cmd: command identifier + * @internal_flags: flags used by the family * @flags: flags * @policy: attribute validation policy * @doit: standard command callback @@ -90,6 +107,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) */ struct genl_ops { u8 cmd; + u8 internal_flags; unsigned int flags; const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, -- cgit v1.2.3 From d6bf781712a1d25cc8987036b3a48535b331eb91 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Oct 2010 06:15:44 +0000 Subject: net neigh: RCU conversion of neigh hash table David This is the first step for RCU conversion of neigh code. Next patches will convert hash_buckets[] and "struct neighbour" to RCU protected objects. Thanks [PATCH net-next] net neigh: RCU conversion of neigh hash table Instead of storing hash_buckets, hash_mask and hash_rnd in "struct neigh_table", a new structure is defined : struct neigh_hash_table { struct neighbour **hash_buckets; unsigned int hash_mask; __u32 hash_rnd; struct rcu_head rcu; }; And "struct neigh_table" has an RCU protected pointer to such a neigh_hash_table. This means the signature of (*hash)() function changed: We need to add a third parameter with the actual hash_rnd value, since this is not anymore a neigh_table field. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7d08fd1062f0..37845dae6488 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -138,13 +138,22 @@ struct pneigh_entry { * neighbour table manipulation */ +struct neigh_hash_table { + struct neighbour **hash_buckets; + unsigned int hash_mask; + __u32 hash_rnd; + struct rcu_head rcu; +}; + struct neigh_table { struct neigh_table *next; int family; int entry_size; int key_len; - __u32 (*hash)(const void *pkey, const struct net_device *); + __u32 (*hash)(const void *pkey, + const struct net_device *dev, + __u32 hash_rnd); int (*constructor)(struct neighbour *); int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); @@ -165,9 +174,7 @@ struct neigh_table { unsigned long last_rand; struct kmem_cache *kmem_cachep; struct neigh_statistics __percpu *stats; - struct neighbour **hash_buckets; - unsigned int hash_mask; - __u32 hash_rnd; + struct neigh_hash_table __rcu *nht; struct pneigh_entry **phash_buckets; }; @@ -237,6 +244,7 @@ extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_en struct neigh_seq_state { struct seq_net_private p; struct neigh_table *tbl; + struct neigh_hash_table *nht; void *(*neigh_sub_iter)(struct neigh_seq_state *state, struct neighbour *n, loff_t *pos); unsigned int bucket; -- cgit v1.2.3 From ebc0ffae5dfb4447e0a431ffe7fe1d467c48bbb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Oct 2010 10:41:36 +0000 Subject: fib: RCU conversion of fib_lookup() fib_lookup() converted to be called in RCU protected context, no reference taken and released on a contended cache line (fib_clntref) fib_table_lookup() and fib_semantic_match() get an additional parameter. struct fib_info gets an rcu_head field, and is freed after an rcu grace period. Stress test : (Sending 160.000.000 UDP frames on same neighbour, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_HASH) (about same results for FIB_TRIE) Before patch : real 1m31.199s user 0m13.761s sys 23m24.780s After patch: real 1m5.375s user 0m14.997s sys 15m50.115s Before patch Profile : 13044.00 15.4% __ip_route_output_key vmlinux 8438.00 10.0% dst_destroy vmlinux 5983.00 7.1% fib_semantic_match vmlinux 5410.00 6.4% fib_rules_lookup vmlinux 4803.00 5.7% neigh_lookup vmlinux 4420.00 5.2% _raw_spin_lock vmlinux 3883.00 4.6% rt_set_nexthop vmlinux 3261.00 3.9% _raw_read_lock vmlinux 2794.00 3.3% fib_table_lookup vmlinux 2374.00 2.8% neigh_resolve_output vmlinux 2153.00 2.5% dst_alloc vmlinux 1502.00 1.8% _raw_read_lock_bh vmlinux 1484.00 1.8% kmem_cache_alloc vmlinux 1407.00 1.7% eth_header vmlinux 1406.00 1.7% ipv4_dst_destroy vmlinux 1298.00 1.5% __copy_from_user_ll vmlinux 1174.00 1.4% dev_queue_xmit vmlinux 1000.00 1.2% ip_output vmlinux After patch Profile : 13712.00 15.8% dst_destroy vmlinux 8548.00 9.9% __ip_route_output_key vmlinux 7017.00 8.1% neigh_lookup vmlinux 4554.00 5.3% fib_semantic_match vmlinux 4067.00 4.7% _raw_read_lock vmlinux 3491.00 4.0% dst_alloc vmlinux 3186.00 3.7% neigh_resolve_output vmlinux 3103.00 3.6% fib_table_lookup vmlinux 2098.00 2.4% _raw_read_lock_bh vmlinux 2081.00 2.4% kmem_cache_alloc vmlinux 2013.00 2.3% _raw_spin_lock vmlinux 1763.00 2.0% __copy_from_user_ll vmlinux 1763.00 2.0% ip_output vmlinux 1761.00 2.0% ipv4_dst_destroy vmlinux 1631.00 1.9% eth_header vmlinux 1440.00 1.7% _raw_read_unlock_bh vmlinux Reference results, if IP route cache is enabled : real 0m29.718s user 0m10.845s sys 7m37.341s 25213.00 29.5% __ip_route_output_key vmlinux 9011.00 10.5% dst_release vmlinux 4817.00 5.6% ip_push_pending_frames vmlinux 4232.00 5.0% ip_finish_output vmlinux 3940.00 4.6% udp_sendmsg vmlinux 3730.00 4.4% __copy_from_user_ll vmlinux 3716.00 4.4% ip_route_output_flow vmlinux 2451.00 2.9% __xfrm_lookup vmlinux 2221.00 2.6% ip_append_data vmlinux 1718.00 2.0% _raw_spin_lock_bh vmlinux 1655.00 1.9% __alloc_skb vmlinux 1572.00 1.8% sock_wfree vmlinux 1345.00 1.6% kfree vmlinux Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/fib_rules.h | 2 ++ include/net/ip_fib.h | 17 ++++------------- 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index ac2fd002812e..106f3097d384 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -31,6 +31,8 @@ struct fib_lookup_arg { void *lookup_ptr; void *result; struct fib_rule *rule; + int flags; +#define FIB_LOOKUP_NOREF 1 }; struct fib_rules_ops { diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c93f94edc610..ba3666d31766 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -86,6 +86,7 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_power; #endif + struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev }; @@ -148,7 +149,7 @@ struct fib_table { }; extern int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, - struct fib_result *res); + struct fib_result *res, int fib_flags); extern int fib_table_insert(struct fib_table *, struct fib_config *); extern int fib_table_delete(struct fib_table *, struct fib_config *); extern int fib_table_dump(struct fib_table *table, struct sk_buff *skb, @@ -185,11 +186,11 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp, struct fib_table *table; table = fib_get_table(net, RT_TABLE_LOCAL); - if (!fib_table_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF)) return 0; table = fib_get_table(net, RT_TABLE_MAIN); - if (!fib_table_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF)) return 0; return -ENETUNREACH; } @@ -254,16 +255,6 @@ static inline void fib_info_put(struct fib_info *fi) free_fib_info(fi); } -static inline void fib_res_put(struct fib_result *res) -{ - if (res->fi) - fib_info_put(res->fi); -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (res->r) - fib_rule_put(res->r); -#endif -} - #ifdef CONFIG_PROC_FS extern int __net_init fib_proc_init(struct net *net); extern void __net_exit fib_proc_exit(struct net *net); -- cgit v1.2.3 From e31b82136d1adc7a599b6e99d3321e5831841f5a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Oct 2010 19:39:30 +0200 Subject: cfg80211/mac80211: allow per-station GTKs This adds API to allow adding per-station GTKs, updates mac80211 to support it, and also allows drivers to remove a key from hwaccel again when this may be necessary due to multiple GTKs. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 9 ++++++--- include/net/mac80211.h | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5f4d8acf7abb..0f77515266b8 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1130,13 +1130,14 @@ struct cfg80211_ops { struct vif_params *params); int (*add_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr, + u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params); int (*get_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr, void *cookie, + u8 key_index, bool pairwise, const u8 *mac_addr, + void *cookie, void (*callback)(void *cookie, struct key_params*)); int (*del_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr); + u8 key_index, bool pairwise, const u8 *mac_addr); int (*set_default_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index); @@ -1304,6 +1305,7 @@ struct cfg80211_ops { * @WIPHY_FLAG_CONTROL_PORT_PROTOCOL: This device supports setting the * control port protocol ethertype. The device also honours the * control_port_no_encrypt flag. + * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN. */ enum wiphy_flags { WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), @@ -1314,6 +1316,7 @@ enum wiphy_flags { WIPHY_FLAG_4ADDR_AP = BIT(5), WIPHY_FLAG_4ADDR_STATION = BIT(6), WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), + WIPHY_FLAG_IBSS_RSN = BIT(7), }; struct mac_address { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 47316a653ae1..33aa2e39147b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1041,6 +1041,13 @@ enum ieee80211_tkip_key_type { * @IEEE80211_HW_NEED_DTIM_PERIOD: * This device needs to know the DTIM period for the BSS before * associating. + * + * @IEEE80211_HW_SUPPORTS_PER_STA_GTK: The device's crypto engine supports + * per-station GTKs as used by IBSS RSN or during fast transition. If + * the device doesn't support per-station GTKs, but can be asked not + * to decrypt group addressed frames, then IBSS RSN support is still + * possible but software crypto will be used. Advertise the wiphy flag + * only in that case. */ enum ieee80211_hw_flags { IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, @@ -1064,6 +1071,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_REPORTS_TX_ACK_STATUS = 1<<18, IEEE80211_HW_CONNECTION_MONITOR = 1<<19, IEEE80211_HW_SUPPORTS_CQM_RSSI = 1<<20, + IEEE80211_HW_SUPPORTS_PER_STA_GTK = 1<<21, }; /** @@ -2582,6 +2590,22 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); void ieee80211_request_smps(struct ieee80211_vif *vif, enum ieee80211_smps_mode smps_mode); +/** + * ieee80211_key_removed - disable hw acceleration for key + * @key_conf: The key hw acceleration should be disabled for + * + * This allows drivers to indicate that the given key has been + * removed from hardware acceleration, due to a new key that + * was added. Don't use this if the key can continue to be used + * for TX, if the key restriction is on RX only it is permitted + * to keep the key for TX only and not call this function. + * + * Due to locking constraints, it may only be called during + * @set_key. This function must be allowed to sleep, and the + * key it tries to disable may still be used until it returns. + */ +void ieee80211_key_removed(struct ieee80211_key_conf *key_conf); + /* Rate control API */ /** -- cgit v1.2.3 From b206b4ef062d83c0875a085672ed50e8c8b01521 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 6 Oct 2010 18:34:12 +0900 Subject: nl80211/mac80211: Add retry and failed transmission count to station info This information is already available in mac80211, we just need to export it via cfg80211 and nl80211. Signed-off-by: Bruno Randolf Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0f77515266b8..e76daaa7dc25 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -401,6 +401,8 @@ struct station_parameters { * (tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs) * @STATION_INFO_RX_PACKETS: @rx_packets filled * @STATION_INFO_TX_PACKETS: @tx_packets filled + * @STATION_INFO_TX_RETRIES: @tx_retries filled + * @STATION_INFO_TX_FAILED: @tx_failed filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -413,6 +415,8 @@ enum station_info_flags { STATION_INFO_TX_BITRATE = 1<<7, STATION_INFO_RX_PACKETS = 1<<8, STATION_INFO_TX_PACKETS = 1<<9, + STATION_INFO_TX_RETRIES = 1<<10, + STATION_INFO_TX_FAILED = 1<<11, }; /** @@ -462,6 +466,8 @@ struct rate_info { * @txrate: current unicast bitrate to this station * @rx_packets: packets received from this station * @tx_packets: packets transmitted to this station + * @tx_retries: cumulative retry counts + * @tx_failed: number of failed transmissions (retries exceeded, no ACK) * @generation: generation number for nl80211 dumps. * This number should increase every time the list of stations * changes, i.e. when a station is added or removed, so that @@ -479,6 +485,8 @@ struct station_info { struct rate_info txrate; u32 rx_packets; u32 tx_packets; + u32 tx_retries; + u32 tx_failed; int generation; }; -- cgit v1.2.3 From 767e97e1e0db0d0f3152cd2f3bd3403596aedbad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Oct 2010 17:49:21 -0700 Subject: neigh: RCU conversion of struct neighbour This is the second step for neighbour RCU conversion. (first was commit d6bf7817 : RCU conversion of neigh hash table) neigh_lookup() becomes lockless, but still take a reference on found neighbour. (no more read_lock()/read_unlock() on tbl->lock) struct neighbour gets an additional rcu_head field and is freed after an RCU grace period. Future work would need to eventually not take a reference on neighbour for temporary dst (DST_NOCACHE), but this would need dst->_neighbour to use a noref bit like we did for skb->_dst. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 37845dae6488..a4538d553704 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -91,7 +91,7 @@ struct neigh_statistics { #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) struct neighbour { - struct neighbour *next; + struct neighbour __rcu *next; struct neigh_table *tbl; struct neigh_parms *parms; struct net_device *dev; @@ -111,6 +111,7 @@ struct neighbour { struct sk_buff_head arp_queue; struct timer_list timer; const struct neigh_ops *ops; + struct rcu_head rcu; u8 primary_key[0]; }; @@ -139,7 +140,7 @@ struct pneigh_entry { */ struct neigh_hash_table { - struct neighbour **hash_buckets; + struct neighbour __rcu **hash_buckets; unsigned int hash_mask; __u32 hash_rnd; struct rcu_head rcu; -- cgit v1.2.3 From 388ac775be95e510c2095ed6cd59422a5183a9fb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 7 Oct 2010 13:11:09 +0200 Subject: cfg80211: constify WDS address There's no need for the WDS peer address to not be const, so make it const. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e76daaa7dc25..0778d04b3bbe 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1229,7 +1229,7 @@ struct cfg80211_ops { int (*get_tx_power)(struct wiphy *wiphy, int *dbm); int (*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev, - u8 *addr); + const u8 *addr); void (*rfkill_poll)(struct wiphy *wiphy); -- cgit v1.2.3 From 5a5c731aa59cc2c44ca20f45b1a577cd4f5435e2 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 7 Oct 2010 16:39:20 -0700 Subject: wireless: Set some stats used by /proc/net/wireless (wext) Some stats for /proc/net/wireless (and wext in general) are not being set. This patch addresses a few of those with values easily obtained from mac80211 core. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- include/net/cfg80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0778d04b3bbe..f920a06f363e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -403,6 +403,7 @@ struct station_parameters { * @STATION_INFO_TX_PACKETS: @tx_packets filled * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled + * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -417,6 +418,7 @@ enum station_info_flags { STATION_INFO_TX_PACKETS = 1<<9, STATION_INFO_TX_RETRIES = 1<<10, STATION_INFO_TX_FAILED = 1<<11, + STATION_INFO_RX_DROP_MISC = 1<<12, }; /** @@ -468,6 +470,7 @@ struct rate_info { * @tx_packets: packets transmitted to this station * @tx_retries: cumulative retry counts * @tx_failed: number of failed transmissions (retries exceeded, no ACK) + * @rx_dropped_misc: Dropped for un-specified reason. * @generation: generation number for nl80211 dumps. * This number should increase every time the list of stations * changes, i.e. when a station is added or removed, so that @@ -487,6 +490,7 @@ struct station_info { u32 tx_packets; u32 tx_retries; u32 tx_failed; + u32 rx_dropped_misc; int generation; }; -- cgit v1.2.3 From 8610c29a2c9f273886b1c31ae4d92c69d4326262 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 9 Oct 2010 02:39:29 +0200 Subject: cfg80211: add channel utilization stats to the survey command Using these, user space can calculate a relative channel utilization with arbitrary intervals by regularly taking snapshots of the survey results. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/net/cfg80211.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f920a06f363e..24d5b5869272 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -294,6 +294,11 @@ struct key_params { * * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in * @SURVEY_INFO_IN_USE: channel is currently being used + * @SURVEY_INFO_CHANNEL_TIME: channel active time (in ms) was filled in + * @SURVEY_INFO_CHANNEL_TIME_BUSY: channel busy time was filled in + * @SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: extension channel busy time was filled in + * @SURVEY_INFO_CHANNEL_TIME_RX: channel receive time was filled in + * @SURVEY_INFO_CHANNEL_TIME_TX: channel transmit time was filled in * * Used by the driver to indicate which info in &struct survey_info * it has filled in during the get_survey(). @@ -301,6 +306,11 @@ struct key_params { enum survey_info_flags { SURVEY_INFO_NOISE_DBM = 1<<0, SURVEY_INFO_IN_USE = 1<<1, + SURVEY_INFO_CHANNEL_TIME = 1<<2, + SURVEY_INFO_CHANNEL_TIME_BUSY = 1<<3, + SURVEY_INFO_CHANNEL_TIME_EXT_BUSY = 1<<4, + SURVEY_INFO_CHANNEL_TIME_RX = 1<<5, + SURVEY_INFO_CHANNEL_TIME_TX = 1<<6, }; /** @@ -310,6 +320,11 @@ enum survey_info_flags { * @filled: bitflag of flags from &enum survey_info_flags * @noise: channel noise in dBm. This and all following fields are * optional + * @channel_time: amount of time in ms the radio spent on the channel + * @channel_time_busy: amount of time the primary channel was sensed busy + * @channel_time_ext_busy: amount of time the extension channel was sensed busy + * @channel_time_rx: amount of time the radio spent receiving data + * @channel_time_tx: amount of time the radio spent transmitting data * * Used by dump_survey() to report back per-channel survey information. * @@ -318,6 +333,11 @@ enum survey_info_flags { */ struct survey_info { struct ieee80211_channel *channel; + u64 channel_time; + u64 channel_time_busy; + u64 channel_time_ext_busy; + u64 channel_time_rx; + u64 channel_time_tx; u32 filled; s8 noise; }; -- cgit v1.2.3 From 0ed8ddf4045fcfcac36bad753dc4046118c603ec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Oct 2010 10:44:07 +0000 Subject: neigh: Protect neigh->ha[] with a seqlock Add a seqlock in struct neighbour to protect neigh->ha[], and avoid dirtying neighbour in stress situation (many different flows / dsts) Dirtying takes place because of read_lock(&n->lock) and n->used writes. Switching to a seqlock, and writing n->used only on jiffies changes permits less dirtying. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index a4538d553704..f04e7a2522c5 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -105,6 +105,7 @@ struct neighbour { atomic_t refcnt; atomic_t probes; rwlock_t lock; + seqlock_t ha_lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; int (*output)(struct sk_buff *skb); @@ -302,7 +303,10 @@ static inline void neigh_confirm(struct neighbour *neigh) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { - neigh->used = jiffies; + unsigned long now = ACCESS_ONCE(jiffies); + + if (neigh->used != now) + neigh->used = now; if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; @@ -373,4 +377,14 @@ struct neighbour_cb { #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) +static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, + const struct net_device *dev) +{ + unsigned int seq; + + do { + seq = read_seqbegin(&n->ha_lock); + memcpy(dst, n->ha, dev->addr_len); + } while (read_seqretry(&n->ha_lock, seq)); +} #endif -- cgit v1.2.3 From fc66f95c68b6d4535a0ea2ea15d5cf626e310956 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Oct 2010 06:37:34 +0000 Subject: net dst: use a percpu_counter to track entries struct dst_ops tracks number of allocated dst in an atomic_t field, subject to high cache line contention in stress workload. Switch to a percpu_counter, to reduce number of time we need to dirty a central location. Place it on a separate cache line to avoid dirtying read only fields. Stress test : (Sending 160.000.000 UDP frames, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_TRIE, SLUB/NUMA) Before: real 0m51.179s user 0m15.329s sys 10m15.942s After: real 0m45.570s user 0m15.525s sys 9m56.669s With a small reordering of struct neighbour fields, subject of a following patch, (to separate refcnt from other read mostly fields) real 0m41.841s user 0m15.261s sys 8m45.949s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst_ops.h | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index d1ff9b7e99b8..1fa5306e3e23 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -1,6 +1,7 @@ #ifndef _NET_DST_OPS_H #define _NET_DST_OPS_H #include +#include struct dst_entry; struct kmem_cachep; @@ -22,7 +23,41 @@ struct dst_ops { void (*update_pmtu)(struct dst_entry *dst, u32 mtu); int (*local_out)(struct sk_buff *skb); - atomic_t entries; struct kmem_cache *kmem_cachep; + + struct percpu_counter pcpuc_entries ____cacheline_aligned_in_smp; }; + +static inline int dst_entries_get_fast(struct dst_ops *dst) +{ + return percpu_counter_read_positive(&dst->pcpuc_entries); +} + +static inline int dst_entries_get_slow(struct dst_ops *dst) +{ + int res; + + local_bh_disable(); + res = percpu_counter_sum_positive(&dst->pcpuc_entries); + local_bh_enable(); + return res; +} + +static inline void dst_entries_add(struct dst_ops *dst, int val) +{ + local_bh_disable(); + percpu_counter_add(&dst->pcpuc_entries, val); + local_bh_enable(); +} + +static inline int dst_entries_init(struct dst_ops *dst) +{ + return percpu_counter_init(&dst->pcpuc_entries, 0); +} + +static inline void dst_entries_destroy(struct dst_ops *dst) +{ + percpu_counter_destroy(&dst->pcpuc_entries); +} + #endif -- cgit v1.2.3 From e37ef961e50d74f55e9edb48e54dd2e7963aad39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Oct 2010 12:20:54 +0000 Subject: neigh: reorder struct neighbour fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le mardi 12 octobre 2010 à 00:02 +0200, Eric Dumazet a écrit : > Here is the followup patch. > > Thanks ! > Oops, this was an old version, the up2date ones also took care of "used" field. I guess its time for a sleep, sorry again. [PATCH net-next V2] neigh: reorder struct neighbour fields (refcnt) and (ha_lock, ha, used, dev, output, ops, primary_key) should be placed on a separate cache lines. refcnt can be often written, while other fields are mostly read. This gave me good result on stress test : before: real 0m45.570s user 0m15.525s sys 9m56.669s After: real 0m41.841s user 0m15.261s sys 8m45.949s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index f04e7a2522c5..55590ab16b3e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -94,8 +94,6 @@ struct neighbour { struct neighbour __rcu *next; struct neigh_table *tbl; struct neigh_parms *parms; - struct net_device *dev; - unsigned long used; unsigned long confirmed; unsigned long updated; __u8 flags; @@ -103,16 +101,18 @@ struct neighbour { __u8 type; __u8 dead; atomic_t refcnt; + struct sk_buff_head arp_queue; + struct timer_list timer; + unsigned long used; atomic_t probes; rwlock_t lock; seqlock_t ha_lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; int (*output)(struct sk_buff *skb); - struct sk_buff_head arp_queue; - struct timer_list timer; const struct neigh_ops *ops; struct rcu_head rcu; + struct net_device *dev; u8 primary_key[0]; }; -- cgit v1.2.3 From 8f1e1742233cd1c3444dfc6c945a2efb2814e157 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 9 Aug 2010 17:38:10 -0400 Subject: Bluetooth: HCI devices are either BR/EDR or AMP radios HCI transport drivers may not know what type of radio an AMP device has so only say whether they're BR/EDR or AMP devices. Signed-off-by: David Vrabel Signed-off-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/hci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index bcbdd6d4e6dd..e30e00834340 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -54,7 +54,7 @@ /* HCI controller types */ #define HCI_BREDR 0x00 -#define HCI_80211 0x01 +#define HCI_AMP 0x01 /* HCI device quirks */ enum { -- cgit v1.2.3 From 796c86eec84ddfd02281c5071838ed1fefda6b90 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Wed, 8 Sep 2010 10:05:27 -0700 Subject: Bluetooth: Add common code for stream-oriented recvmsg() This commit adds a bt_sock_stream_recvmsg() function for use by any Bluetooth code that uses SOCK_STREAM sockets. This code is copied from rfcomm_sock_recvmsg() with minimal modifications to remove RFCOMM-specific functionality and improve readability. L2CAP (with the SOCK_STREAM socket type) and RFCOMM have common needs when it comes to reading data. Proper stream read semantics require that applications can read from a stream one byte at a time and not lose any data. The RFCOMM code already operated on and pulled data from the underlying L2CAP socket, so very few changes were required to make the code more generic for use with non-RFCOMM data over L2CAP. Applications that need more awareness of L2CAP frame boundaries are still free to use SOCK_SEQPACKET sockets, and may verify that they connection did not fall back to basic mode by calling getsockopt(). Signed-off-by: Mat Martineau Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/bluetooth.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 30fce0128dd7..d81ea7997701 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -126,6 +126,8 @@ int bt_sock_unregister(int proto); void bt_sock_link(struct bt_sock_list *l, struct sock *s); void bt_sock_unlink(struct bt_sock_list *l, struct sock *s); int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags); +int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags); uint bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); -- cgit v1.2.3 From 534c92fde74c8d2143fc15b5f1d957f42cb43570 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Fri, 1 Oct 2010 12:05:11 +0300 Subject: Bluetooth: clean up rfcomm code Remove dead code and unused rfcomm thread events Signed-off-by: Andrei Emeltchenko Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/rfcomm.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index a140847d622c..71047bc0af84 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -213,11 +213,6 @@ struct rfcomm_dlc { #define RFCOMM_DEFER_SETUP 8 /* Scheduling flags and events */ -#define RFCOMM_SCHED_STATE 0 -#define RFCOMM_SCHED_RX 1 -#define RFCOMM_SCHED_TX 2 -#define RFCOMM_SCHED_TIMEO 3 -#define RFCOMM_SCHED_AUTH 4 #define RFCOMM_SCHED_WAKEUP 31 /* MSC exchange flags */ -- cgit v1.2.3 From 271733cf844a2f5f186ef3b40c26d6397b71039a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Oct 2010 12:06:23 +0200 Subject: cfg80211: notify drivers about frame registrations Drivers may need to adjust their filters according to frame registrations, so notify them about them. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 24d5b5869272..2a7936d7851d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1147,6 +1147,9 @@ struct cfg80211_pmksa { * allows the driver to adjust the dynamic ps timeout value. * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold. * + * @mgmt_frame_register: Notify driver that a management frame type was + * registered. Note that this callback may not sleep, and cannot run + * concurrently with itself. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy); @@ -1297,6 +1300,10 @@ struct cfg80211_ops { int (*set_cqm_rssi_config)(struct wiphy *wiphy, struct net_device *dev, s32 rssi_thold, u32 rssi_hyst); + + void (*mgmt_frame_register)(struct wiphy *wiphy, + struct net_device *dev, + u16 frame_type, bool reg); }; /* -- cgit v1.2.3 From 7be5086d4cb7cceb71d724a9524d5e927785d04f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Oct 2010 12:06:24 +0200 Subject: mac80211: add probe request filter flag Using the frame registration notification, we can see when probe requests are requested and notify the low-level driver via filtering. The flag is also set in AP and IBSS modes. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 33aa2e39147b..9fdf982d1286 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1478,12 +1478,14 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * honour this flag if possible. * * @FIF_CONTROL: pass control frames (except for PS Poll), if PROMISC_IN_BSS - * is not set then only those addressed to this station. + * is not set then only those addressed to this station. * * @FIF_OTHER_BSS: pass frames destined to other BSSes * - * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS is not set then only - * those addressed to this station. + * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS is not set then only + * those addressed to this station. + * + * @FIF_PROBE_REQ: pass probe request frames */ enum ieee80211_filter_flags { FIF_PROMISC_IN_BSS = 1<<0, @@ -1494,6 +1496,7 @@ enum ieee80211_filter_flags { FIF_CONTROL = 1<<5, FIF_OTHER_BSS = 1<<6, FIF_PSPOLL = 1<<7, + FIF_PROBE_REQ = 1<<8, }; /** -- cgit v1.2.3 From b3d6255388de0680a14f0907deb7b7f4fa0d25d5 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Tue, 12 Oct 2010 20:14:43 +0000 Subject: Phonet: 'connect' socket implementation for Pipe controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on suggestion by Rémi Denis-Courmont to implement 'connect' for Pipe controller logic, this patch implements 'connect' socket call for the Pipe controller logic. The patch does following:- - Removes setsockopts for PNPIPE_CREATE and PNPIPE_DESTROY - Adds setsockopt for setting the Pipe handle value - Implements connect socket call - Updates the Pipe controller logic User-space should now follow below sequence with Pipe controller:- -socket -bind -setsockopt for PNPIPE_PIPE_HANDLE -connect -setsockopt for PNPIPE_ENCAP_IP -setsockopt for PNPIPE_ENABLE GPRS/3G data has been tested working fine with this. Signed-off-by: Kumar Sanghvi Acked-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index def6cfa3f451..b60b28c99e87 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -46,8 +46,8 @@ struct pep_sock { u8 init_enable; /* auto-enable at creation */ u8 aligned; #ifdef CONFIG_PHONET_PIPECTRLR - u16 remote_pep; - u8 pipe_state; + u8 pipe_state; + struct sockaddr_pn remote_pep; #endif }; -- cgit v1.2.3 From 31e3c3f6f1f9b154981a0e6620df700463db30ee Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 13 Oct 2010 13:20:35 +0000 Subject: tipc: cleanup function namespace Do some cleanups of TIPC based on make namespacecheck 1. Don't export unused symbols 2. Eliminate dead code 3. Make functions and variables local 4. Rename buf_acquire to tipc_buf_acquire since it is used in several files Compile tested only. This make break out of tree kernel modules that depend on TIPC routines. Signed-off-by: Stephen Hemminger Acked-by: Jon Maloy Acked-by: Paul Gortmaker Signed-off-by: David S. Miller --- include/net/tipc/tipc.h | 71 -------------------------------------------- include/net/tipc/tipc_port.h | 2 -- 2 files changed, 73 deletions(-) (limited to 'include/net') diff --git a/include/net/tipc/tipc.h b/include/net/tipc/tipc.h index 15af6dca0b49..1e0645e1eed2 100644 --- a/include/net/tipc/tipc.h +++ b/include/net/tipc/tipc.h @@ -50,8 +50,6 @@ * TIPC operating mode routines */ -u32 tipc_get_addr(void); - #define TIPC_NOT_RUNNING 0 #define TIPC_NODE_MODE 1 #define TIPC_NET_MODE 2 @@ -62,8 +60,6 @@ int tipc_attach(unsigned int *userref, tipc_mode_event, void *usr_handle); void tipc_detach(unsigned int userref); -int tipc_get_mode(void); - /* * TIPC port manipulation routines */ @@ -153,12 +149,6 @@ int tipc_disconnect(u32 portref); int tipc_shutdown(u32 ref); -int tipc_isconnected(u32 portref, int *isconnected); - -int tipc_peer(u32 portref, struct tipc_portid *peer); - -int tipc_ref_valid(u32 portref); - /* * TIPC messaging routines */ @@ -170,38 +160,12 @@ int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect); -int tipc_send_buf(u32 portref, - struct sk_buff *buf, - unsigned int dsz); - int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain, unsigned int num_sect, struct iovec const *msg_sect); -int tipc_send_buf2name(u32 portref, - struct tipc_name const *name, - u32 domain, - struct sk_buff *buf, - unsigned int dsz); - -int tipc_forward2name(u32 portref, - struct tipc_name const *name, - u32 domain, - unsigned int section_count, - struct iovec const *msg_sect, - struct tipc_portid const *origin, - unsigned int importance); - -int tipc_forward_buf2name(u32 portref, - struct tipc_name const *name, - u32 domain, - struct sk_buff *buf, - unsigned int dsz, - struct tipc_portid const *orig, - unsigned int importance); - int tipc_send2port(u32 portref, struct tipc_portid const *dest, unsigned int num_sect, @@ -212,46 +176,11 @@ int tipc_send_buf2port(u32 portref, struct sk_buff *buf, unsigned int dsz); -int tipc_forward2port(u32 portref, - struct tipc_portid const *dest, - unsigned int num_sect, - struct iovec const *msg_sect, - struct tipc_portid const *origin, - unsigned int importance); - -int tipc_forward_buf2port(u32 portref, - struct tipc_portid const *dest, - struct sk_buff *buf, - unsigned int dsz, - struct tipc_portid const *orig, - unsigned int importance); - int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, u32 domain, /* currently unused */ unsigned int section_count, struct iovec const *msg); - -#if 0 -int tipc_multicast_buf(u32 portref, - struct tipc_name_seq const *seq, - u32 domain, - void *buf, - unsigned int size); -#endif - -/* - * TIPC subscription routines - */ - -int tipc_ispublished(struct tipc_name const *name); - -/* - * Get number of available nodes within specified domain (excluding own node) - */ - -unsigned int tipc_available_nodes(const u32 domain); - #endif #endif diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h index c54917cbfa48..1893aaf49426 100644 --- a/include/net/tipc/tipc_port.h +++ b/include/net/tipc/tipc_port.h @@ -88,8 +88,6 @@ void tipc_acknowledge(u32 port_ref,u32 ack); struct tipc_port *tipc_get_port(const u32 ref); -void *tipc_get_handle(const u32 ref); - /* * The following routines require that the port be locked on entry */ -- cgit v1.2.3 From 8e602ce2980fd6941dc0d3dda12e5095e8206f34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Oct 2010 05:56:18 +0000 Subject: netns: reorder fields in struct net In a network bench, I noticed an unfortunate false sharing between 'loopback_dev' and 'count' fields in "struct net". 'count' is written each time a socket is created or destroyed, while loopback_dev might be often read in routing code. Move loopback_dev in a read mostly section of "struct net" Note: struct netns_xfrm is cache line aligned on SMP. (It contains a "struct dst_ops") Move it at the end to avoid holes, and reduce sizeof(struct net) by 128 bytes on ia32. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/net_namespace.h | 17 ++++++++++------- include/net/netns/xfrm.h | 9 +++++---- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bd10a7908993..65af9a07cf76 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -41,6 +41,8 @@ struct net { * destroy on demand */ #endif + spinlock_t rules_mod_lock; + struct list_head list; /* list of network namespaces */ struct list_head cleanup_list; /* namespaces on death row */ struct list_head exit_list; /* Use only net_mutex */ @@ -52,7 +54,8 @@ struct net { struct ctl_table_set sysctls; #endif - struct net_device *loopback_dev; /* The loopback */ + struct sock *rtnl; /* rtnetlink socket */ + struct sock *genl_sock; struct list_head dev_base_head; struct hlist_head *dev_name_head; @@ -60,11 +63,9 @@ struct net { /* core fib_rules */ struct list_head rules_ops; - spinlock_t rules_mod_lock; - struct sock *rtnl; /* rtnetlink socket */ - struct sock *genl_sock; + struct net_device *loopback_dev; /* The loopback */ struct netns_core core; struct netns_mib mib; struct netns_packet packet; @@ -84,13 +85,15 @@ struct net { struct sock *nfnl; struct sock *nfnl_stash; #endif -#ifdef CONFIG_XFRM - struct netns_xfrm xfrm; -#endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; #endif struct net_generic *gen; + + /* Note : following structs are cache line aligned */ +#ifdef CONFIG_XFRM + struct netns_xfrm xfrm; +#endif }; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 74f119a2829a..748f91f87cd5 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -43,10 +43,6 @@ struct netns_xfrm { unsigned int policy_count[XFRM_POLICY_MAX * 2]; struct work_struct policy_hash_work; - struct dst_ops xfrm4_dst_ops; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct dst_ops xfrm6_dst_ops; -#endif struct sock *nlsk; struct sock *nlsk_stash; @@ -58,6 +54,11 @@ struct netns_xfrm { #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_hdr; #endif + + struct dst_ops xfrm4_dst_ops; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct dst_ops xfrm6_dst_ops; +#endif }; #endif -- cgit v1.2.3 From ebbf41df4aabb6d506fa18ea8cb4c2b4388a18b9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Oct 2010 10:19:06 +0200 Subject: netfilter: ctnetlink: add expectation deletion events This patch allows to listen to events that inform about expectations destroyed. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 416b83844485..0f8a8c587532 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -82,7 +82,13 @@ struct nf_conntrack_expect * nf_ct_find_expectation(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple); -void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); +void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, + u32 pid, int report); +static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) +{ + nf_ct_unlink_expect_report(exp, 0, 0); +} + void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); void nf_ct_remove_userspace_expectations(void); -- cgit v1.2.3 From 714f095f74582764d629785f03b459a3d0503624 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Tue, 19 Oct 2010 10:38:48 +0200 Subject: ipvs: IPv6 tunnel mode IPv6 encapsulation uses a bad source address for the tunnel. i.e. VIP will be used as local-addr and encap. dst addr. Decapsulation will not accept this. Example LVS (eth1 2003::2:0:1/96, VIP 2003::2:0:100) (eth0 2003::1:0:1/96) RS (ethX 2003::1:0:5/96) tcpdump 2003::2:0:100 > 2003::1:0:5: IP6 (hlim 63, next-header TCP (6) payload length: 40) 2003::3:0:10.50991 > 2003::2:0:100.http: Flags [S], cksum 0x7312 (correct), seq 3006460279, win 5760, options [mss 1440,sackOK,TS val 1904932 ecr 0,nop,wscale 3], length 0 In Linux IPv6 impl. you can't have a tunnel with an any cast address receiving packets (I have not tried to interpret RFC 2473) To have receive capabilities the tunnel must have: - Local address set as multicast addr or an unicast addr - Remote address set as an unicast addr. - Loop back addres or Link local address are not allowed. This causes us to setup a tunnel in the Real Server with the LVS as the remote address, here you can't use the VIP address since it's used inside the tunnel. Solution Use outgoing interface IPv6 address (match against the destination). i.e. use ip6_route_output() to look up the route cache and then use ipv6_dev_get_saddr(...) to set the source address of the encapsulated packet. Additionally, cache the results in new destination fields: dst_cookie and dst_saddr and properly check the returned dst from ip6_route_output. We now add xfrm_lookup call only for the tunneling method where the source address is a local one. Signed-off-by:Hans Schillstrom Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 52fbe2308c38..6e8a6192e574 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -529,6 +529,10 @@ struct ip_vs_dest { spinlock_t dst_lock; /* lock of dst_cache */ struct dst_entry *dst_cache; /* destination cache entry */ u32 dst_rtos; /* RT_TOS(tos) for dst */ + u32 dst_cookie; +#ifdef CONFIG_IP_VS_IPV6 + struct in6_addr dst_saddr; +#endif /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ -- cgit v1.2.3 From 8b27b10f5863a5b63e46304a71aa01463d1efac4 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:17:20 +0300 Subject: ipvs: optimize checksums for apps Avoid full checksum calculation for apps that can provide info whether csum was broken after payload mangling. For now only ip_vs_ftp mangles payload and it updates the csum, so the full recalculation is avoided for all packets. Add CHECKSUM_UNNECESSARY for snat_handler (TCP and UDP). It is needed to support SNAT from local address for the case when csum is fully recalculated. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 6e8a6192e574..adcdba9dd183 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -597,11 +597,19 @@ struct ip_vs_app { __be16 port; /* port number in net order */ atomic_t usecnt; /* usage counter */ - /* output hook: return false if can't linearize. diff set for TCP. */ + /* + * output hook: Process packet in inout direction, diff set for TCP. + * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, + * 2=Mangled but checksum was not updated + */ int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, struct sk_buff *, int *diff); - /* input hook: return false if can't linearize. diff set for TCP. */ + /* + * input hook: Process packet in outin direction, diff set for TCP. + * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, + * 2=Mangled but checksum was not updated + */ int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, struct sk_buff *, int *diff); -- cgit v1.2.3 From cf356d69db0afef692cd640917bc70f708c27f14 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:21:07 +0300 Subject: ipvs: switch to notrack mode Change skb->ipvs_property semantic. This is preparation to support ip_vs_out processing in LOCAL_OUT. ipvs_property=1 will be used to avoid expensive lookups for traffic sent by transmitters. Now when conntrack support is not used we call ip_vs_notrack method to avoid problems in OUTPUT and POST_ROUTING hooks instead of exiting POST_ROUTING as before. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index adcdba9dd183..0e4618470cee 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -25,7 +25,7 @@ #include #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ -#ifdef CONFIG_IP_VS_NFCT +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #endif @@ -1021,6 +1021,24 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +/* + * Forget current conntrack (unconfirmed) and attach notrack entry + */ +static inline void ip_vs_notrack(struct sk_buff *skb) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); + + if (!ct || !nf_ct_is_untracked(ct)) { + nf_reset(skb); + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); + } +#endif +} + #ifdef CONFIG_IP_VS_NFCT /* * Netfilter connection tracking -- cgit v1.2.3 From 190ecd27cd7294105e3b26ca71663c7d940acbbb Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:24:37 +0300 Subject: ipvs: do not schedule conns from real servers This patch is needed to avoid scheduling of packets from local real server when we add ip_vs_in in LOCAL_OUT hook to support local client. Currently, when ip_vs_in can not find existing connection it tries to create new one by calling ip_vs_schedule. The default indication from ip_vs_schedule was if connection was scheduled to real server. If real server is not available we try to use the bypass forwarding method or to send ICMP error. But in some cases we do not want to use the bypass feature. So, add flag 'ignored' to indicate if the scheduler ignores this packet. Make sure we do not create new connections from replies. We can hit this problem for persistent services and local real server when ip_vs_in is added to LOCAL_OUT hook to handle local clients. Also, make sure ip_vs_schedule ignores SYN packets for Active FTP DATA from local real server. The FTP DATA connection should be created on SYN+ACK from client to assign correct connection daddr. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 0e4618470cee..9d5c1b965304 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -849,7 +849,8 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * -ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb); +ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, + struct ip_vs_protocol *pp, int *ignored); extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp); -- cgit v1.2.3 From fc604767613b6d2036cdc35b660bc39451040a47 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:38:15 +0300 Subject: ipvs: changes for local real server This patch deals with local real servers: - Add support for DNAT to local address (different real server port). It needs ip_vs_out hook in LOCAL_OUT for both families because skb->protocol is not set for locally generated packets and can not be used to set 'af'. - Skip packets in ip_vs_in marked with skb->ipvs_property because ip_vs_out processing can be executed in LOCAL_OUT but we still have the conn_out_get check in ip_vs_in. - Ignore packets with inet->nodefrag from local stack - Require skb_dst(skb) != NULL because we use it to get struct net - Add support for changing the route to local IPv4 stack after DNAT depending on the source address type. Local client sets output route and the remote client sets input route. It looks like IPv6 does not need such rerouting because the replies use addresses from initial incoming header, not from skb route. - All transmitters now have strict checks for the destination address type: redirect from non-local address to local real server requires NAT method, local address can not be used as source address when talking to remote real server. - Now LOCALNODE is not set explicitly as forwarding method in real server to allow the connections to provide correct forwarding method to the backup server. Not sure if this breaks tools that expect to see 'Local' real server type. If needed, this can be supported with new flag IP_VS_DEST_F_LOCAL. Now it should be possible connections in backup that lost their fwmark information during sync to be forwarded properly to their daddr, even if it is local address in the backup server. By this way backup could be used as real server for DR or TUN, for NAT there are some restrictions because tuple collisions in conntracks can create problems for the traffic. - Call ip_vs_dst_reset when destination is updated in case some real server IP type is changed between local and remote. [ horms@verge.net.au: removed trailing whitespace ] Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 9d5c1b965304..2f88d5942332 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -409,6 +409,7 @@ struct ip_vs_conn { /* packet transmitter for different forwarding methods. If it mangles the packet, it must return NF_DROP or better NF_STOLEN, otherwise this must be changed to a sk_buff **. + NF_ACCEPT can be returned when destination is local. */ int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); -- cgit v1.2.3 From 0d79641a96d612aaa6d57a4d4f521d7ed9c9ccdd Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:46:17 +0300 Subject: ipvs: provide address family for debugging As skb->protocol is not valid in LOCAL_OUT add parameter for address family in packet debugging functions. Even if ports are not present in AH and ESP change them to use ip_vs_tcpudp_debug_packet to show at least valid addresses as before. This patch removes the last user of skb->protocol in IPVS. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 2f88d5942332..b7bbd6c28cfa 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -136,24 +136,24 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, if (net_ratelimit()) \ printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__); \ } while (0) -#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) \ +#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) \ do { \ if (level <= ip_vs_get_debug_level()) \ - pp->debug_packet(pp, skb, ofs, msg); \ + pp->debug_packet(af, pp, skb, ofs, msg); \ } while (0) -#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) \ +#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) \ do { \ if (level <= ip_vs_get_debug_level() && \ net_ratelimit()) \ - pp->debug_packet(pp, skb, ofs, msg); \ + pp->debug_packet(af, pp, skb, ofs, msg); \ } while (0) #else /* NO DEBUGGING at ALL */ #define IP_VS_DBG_BUF(level, msg...) do {} while (0) #define IP_VS_ERR_BUF(msg...) do {} while (0) #define IP_VS_DBG(level, msg...) do {} while (0) #define IP_VS_DBG_RL(msg...) do {} while (0) -#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0) -#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) do {} while (0) +#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) do {} while (0) +#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) do {} while (0) #endif #define IP_VS_BUG() BUG() @@ -345,7 +345,7 @@ struct ip_vs_protocol { int (*app_conn_bind)(struct ip_vs_conn *cp); - void (*debug_packet)(struct ip_vs_protocol *pp, + void (*debug_packet)(int af, struct ip_vs_protocol *pp, const struct sk_buff *skb, int offset, const char *msg); @@ -828,7 +828,8 @@ extern int ip_vs_set_state_timeout(int *table, int num, const char *const *names, const char *name, int to); extern void -ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, +ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, + const struct sk_buff *skb, int offset, const char *msg); extern struct ip_vs_protocol ip_vs_protocol_tcp; -- cgit v1.2.3 From 6f747aca5e61778190d1e27bdc469f49149f0230 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 15 Oct 2010 05:15:59 +0000 Subject: xfrm6: make xfrm6_tunnel_free_spi local Function only defined and used in one file. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4f53532d4c2f..36fdcb3fab9e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1419,7 +1419,6 @@ extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family); extern __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); -extern void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr); extern __be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr); extern int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); -- cgit v1.2.3 From 8d8a0b1cc2a8f9794a3f1f747089b6a93774408d Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 15 Oct 2010 05:12:01 +0000 Subject: rtnetlink: remove rtnl_kill_links The function rtnl_kill_links is defined but never used. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index af60fd050844..e013c68bfb00 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -79,7 +79,6 @@ struct rtnl_link_ops { extern int __rtnl_link_register(struct rtnl_link_ops *ops); extern void __rtnl_link_unregister(struct rtnl_link_ops *ops); -extern void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops); extern int rtnl_link_register(struct rtnl_link_ops *ops); extern void rtnl_link_unregister(struct rtnl_link_ops *ops); -- cgit v1.2.3 From 1c4c40c42da468ef02dc04940930c1926c964558 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 15 Oct 2010 05:14:19 +0000 Subject: xfrm: make xfrm_bundle_ok local Only used in one place. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 36fdcb3fab9e..f28d7c9b9f8d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1465,8 +1465,6 @@ struct xfrm_state *xfrm_find_acq(struct net *net, struct xfrm_mark *mark, xfrm_address_t *saddr, int create, unsigned short family); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); -extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, - struct flowi *fl, int family, int strict); #ifdef CONFIG_XFRM_MIGRATE extern int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, -- cgit v1.2.3 From 3511c9132f8b1e1b5634e41a3331c44b0c13be70 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 16 Oct 2010 13:04:08 +0000 Subject: net_sched: remove the unused parameter of qdisc_create_dflt() The first parameter dev isn't in use in qdisc_create_dflt(). Signed-off-by: Changli Gao Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index eda8808fdacd..ea1f8a83160d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -328,8 +328,7 @@ extern void qdisc_destroy(struct Qdisc *qdisc); extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n); extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc_ops *ops); -extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, - struct netdev_queue *dev_queue, +extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, struct Qdisc_ops *ops, u32 parentid); extern void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab); -- cgit v1.2.3 From 106e4c26b1529e559d1aae777f11b4f8f7bafc26 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 12:45:14 +0200 Subject: tproxy: kick out TIME_WAIT sockets in case a new connection comes in with the same tuple Without tproxy redirections an incoming SYN kicks out conflicting TIME_WAIT sockets, in order to handle clients that reuse ports within the TIME_WAIT period. The same mechanism didn't work in case TProxy is involved in finding the proper socket, as the time_wait processing code looked up the listening socket assuming that the listener addr/port matches those of the established connection. This is not the case with TProxy as the listener addr/port is possibly changed with the tproxy rule. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tproxy_core.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index 208b46f4d6d2..b3a8942a4e6a 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -8,12 +8,16 @@ #include #include +#define NFT_LOOKUP_ANY 0 +#define NFT_LOOKUP_LISTENER 1 +#define NFT_LOOKUP_ESTABLISHED 2 + /* look up and get a reference to a matching socket */ extern struct sock * nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, - const struct net_device *in, bool listening); + const struct net_device *in, int lookup_type); static inline void nf_tproxy_put_sock(struct sock *sk) -- cgit v1.2.3 From 6006db84a91838813cdad8a6622a4e39efe9ea47 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 12:47:34 +0200 Subject: tproxy: add lookup type checks for UDP in nf_tproxy_get_sock_v4() Also, inline this function as the lookup_type is always a literal and inlining removes branches performed at runtime. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tproxy_core.h | 116 ++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index b3a8942a4e6a..1027d7f901a0 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -13,11 +13,123 @@ #define NFT_LOOKUP_ESTABLISHED 2 /* look up and get a reference to a matching socket */ -extern struct sock * + + +/* This function is used by the 'TPROXY' target and the 'socket' + * match. The following lookups are supported: + * + * Explicit TProxy target rule + * =========================== + * + * This is used when the user wants to intercept a connection matching + * an explicit iptables rule. In this case the sockets are assumed + * matching in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple, it is returned, assuming the redirection + * already took place and we process a packet belonging to an + * established connection + * + * - match: if there's a listening socket matching the redirection + * (e.g. on-port & on-ip of the connection), it is returned, + * regardless if it was bound to 0.0.0.0 or an explicit + * address. The reasoning is that if there's an explicit rule, it + * does not really matter if the listener is bound to an interface + * or to 0. The user already stated that he wants redirection + * (since he added the rule). + * + * "socket" match based redirection (no specific rule) + * =================================================== + * + * There are connections with dynamic endpoints (e.g. FTP data + * connection) that the user is unable to add explicit rules + * for. These are taken care of by a generic "socket" rule. It is + * assumed that the proxy application is trusted to open such + * connections without explicit iptables rule (except of course the + * generic 'socket' rule). In this case the following sockets are + * matched in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple + * + * - match: if there's a non-zero bound listener (possibly with a + * non-local address) We don't accept zero-bound listeners, since + * then local services could intercept traffic going through the + * box. + * + * Please note that there's an overlap between what a TPROXY target + * and a socket match will match. Normally if you have both rules the + * "socket" match will be the first one, effectively all packets + * belonging to established connections going through that one. + */ +static inline struct sock * nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, - const struct net_device *in, int lookup_type); + const struct net_device *in, int lookup_type) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NFT_LOOKUP_ANY: + sk = __inet_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case NFT_LOOKUP_LISTENER: + sk = inet_lookup_listener(net, &tcp_hashinfo, + daddr, dport, + in->ifindex); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + + break; + case NFT_LOOKUP_ESTABLISHED: + sk = inet_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + break; + } + break; + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk && lookup_type != NFT_LOOKUP_ANY) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || + (lookup_type == NFT_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk); + + return sk; +} + static inline void nf_tproxy_put_sock(struct sock *sk) -- cgit v1.2.3 From 093d282321daeb19c107e5f1f16d7f68484f3ade Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 13:06:43 +0200 Subject: tproxy: fix hash locking issue when using port redirection in __inet_inherit_port() When __inet_inherit_port() is called on a tproxy connection the wrong locks are held for the inet_bind_bucket it is added to. __inet_inherit_port() made an implicit assumption that the listener's port number (and thus its bind bucket). Unfortunately, if you're using the TPROXY target to redirect skbs to a transparent proxy that assumption is not true anymore and things break. This patch adds code to __inet_inherit_port() so that it can handle this case by looking up or creating a new bind bucket for the child socket and updates callers of __inet_inherit_port() to gracefully handle __inet_inherit_port() failing. Reported by and original patch from Stephen Buck . See http://marc.info/?t=128169268200001&r=1&w=2 for the original discussion. Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/inet_hashtables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 74358d1b3f43..e9c2ed8af864 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -245,7 +245,7 @@ static inline int inet_sk_listen_hashfn(const struct sock *sk) } /* Caller must disable local BH processing. */ -extern void __inet_inherit_port(struct sock *sk, struct sock *child); +extern int __inet_inherit_port(struct sock *sk, struct sock *child); extern void inet_put_port(struct sock *sk); -- cgit v1.2.3 From 32a875adcdcf5f470bf967250cfd01722e23844f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 19 Oct 2010 06:48:16 +0000 Subject: 9p: client code cleanup Make p9_client_version static since only used in one file. Remove p9_client_auth because it is defined but never used. Compile tested only. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/9p/client.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/9p/client.h b/include/net/9p/client.h index d1aa2cfb30f0..7f63d5ab7b44 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -212,15 +212,12 @@ struct p9_dirent { int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb); int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name); -int p9_client_version(struct p9_client *); struct p9_client *p9_client_create(const char *dev_name, char *options); void p9_client_destroy(struct p9_client *clnt); void p9_client_disconnect(struct p9_client *clnt); void p9_client_begin_disconnect(struct p9_client *clnt); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, char *uname, u32 n_uname, char *aname); -struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, - u32 n_uname, char *aname); struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, int clone); int p9_client_open(struct p9_fid *fid, int mode); -- cgit v1.2.3 From e97c3e278e951501c2f385de70c3ceacdea78c4a Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 16:03:43 +0200 Subject: tproxy: split off ipv6 defragmentation to a separate module Like with IPv4, TProxy needs IPv6 defragmentation but does not require connection tracking. Since defragmentation was coupled with conntrack, I split off the two, creating an nf_defrag_ipv6 module, similar to the already existing nf_defrag_ipv4. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/net/netfilter/ipv6/nf_defrag_ipv6.h (limited to 'include/net') diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h new file mode 100644 index 000000000000..94dd54d76b48 --- /dev/null +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -0,0 +1,6 @@ +#ifndef _NF_DEFRAG_IPV6_H +#define _NF_DEFRAG_IPV6_H + +extern void nf_defrag_ipv6_enable(void); + +#endif /* _NF_DEFRAG_IPV6_H */ -- cgit v1.2.3 From aa976fc011efa1f0e3290c6c9addf7c20757f885 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 16:05:41 +0200 Subject: tproxy: added udp6_lib_lookup function Just like with IPv4, we need access to the UDP hash table to look up local sockets, but instead of exporting the global udp_table, export a lookup function. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/udp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index a184d3496b13..200b82848c9a 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -183,6 +183,9 @@ extern int udp_lib_setsockopt(struct sock *sk, int level, int optname, extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); +extern struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif); /* * SNMP statistics for UDP and UDP-Lite -- cgit v1.2.3 From 3b9afb29917f4ab08decf358ecfd354a72a91ac0 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 16:12:14 +0200 Subject: tproxy: added IPv6 socket lookup function to nf_tproxy_core Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tproxy_core.h | 72 +++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index 1027d7f901a0..cd85b3bc8327 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -5,7 +5,8 @@ #include #include #include -#include +#include +#include #include #define NFT_LOOKUP_ANY 0 @@ -130,6 +131,75 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, return sk; } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static inline struct sock * +nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, int lookup_type) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NFT_LOOKUP_ANY: + sk = inet6_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case NFT_LOOKUP_LISTENER: + sk = inet6_lookup_listener(net, &tcp_hashinfo, + daddr, ntohs(dport), + in->ifindex); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + + break; + case NFT_LOOKUP_ESTABLISHED: + sk = __inet6_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, ntohs(dport), + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + break; + } + break; + case IPPROTO_UDP: + sk = udp6_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk && lookup_type != NFT_LOOKUP_ANY) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || + (lookup_type == NFT_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n", + protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk); + + return sk; +} +#endif static inline void nf_tproxy_put_sock(struct sock *sk) -- cgit v1.2.3 From 3cc77ec74e1583b50b8405114cdbd6b8ebb8c474 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 24 Oct 2010 21:32:36 +0000 Subject: net/802: add __rcu annotations (struct net_device)->garp_port is rcu protected : (struct garp_port)->applicants is rcu protected : add __rcu annotation and proper rcu primitives. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/garp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/garp.h b/include/net/garp.h index 825f172caba9..f4c295984c45 100644 --- a/include/net/garp.h +++ b/include/net/garp.h @@ -107,7 +107,7 @@ struct garp_applicant { }; struct garp_port { - struct garp_applicant *applicants[GARP_APPLICATION_MAX + 1]; + struct garp_applicant __rcu *applicants[GARP_APPLICATION_MAX + 1]; }; extern int garp_register_application(struct garp_application *app); -- cgit v1.2.3 From 6f0bcf152582e7403155627a38e07bf3ef7f3cf5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 24 Oct 2010 21:33:16 +0000 Subject: tunnels: add _rcu annotations (struct ip6_tnl)->next is rcu protected : (struct ip_tunnel)->next is rcu protected : (struct xfrm6_tunnel)->next is rcu protected : add __rcu annotation and proper rcu primitives. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 2 +- include/net/ipip.h | 2 +- include/net/xfrm.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index fc94ec568a50..fc73e667b50e 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -13,7 +13,7 @@ /* IPv6 tunnel */ struct ip6_tnl { - struct ip6_tnl *next; /* next tunnel in list */ + struct ip6_tnl __rcu *next; /* next tunnel in list */ struct net_device *dev; /* virtual device associated with tunnel */ struct ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ diff --git a/include/net/ipip.h b/include/net/ipip.h index 58abbf966b0c..0403fe4c4519 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -16,7 +16,7 @@ struct ip_tunnel_6rd_parm { }; struct ip_tunnel { - struct ip_tunnel *next; + struct ip_tunnel __rcu *next; struct net_device *dev; int err_count; /* Number of arrived ICMP errors */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f28d7c9b9f8d..ffcd47820a5b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1272,7 +1272,7 @@ struct xfrm6_tunnel { int (*handler)(struct sk_buff *skb); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); - struct xfrm6_tunnel *next; + struct xfrm6_tunnel __rcu *next; int priority; }; -- cgit v1.2.3 From 1c87733d0682547050ccccb400cdac425fa43b39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2010 03:20:11 +0000 Subject: net_ns: add __rcu annotations add __rcu annotation to (struct net)->gen, and use rcu_dereference_protected() in net_assign_generic() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 65af9a07cf76..1bf812b21fb7 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -88,7 +88,7 @@ struct net { #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; #endif - struct net_generic *gen; + struct net_generic __rcu *gen; /* Note : following structs are cache line aligned */ #ifdef CONFIG_XFRM -- cgit v1.2.3 From 0d7da9ddd9a4eb7808698d04b98bf9d62d02649b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2010 03:47:05 +0000 Subject: net: add __rcu annotation to sk_filter Add __rcu annotation to : (struct sock)->sk_filter And use appropriate rcu primitives to reduce sparse warnings if CONFIG_SPARSE_RCU_POINTER=y Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 73a4f9702a65..c7a736228ca2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -301,7 +301,7 @@ struct sock { const struct cred *sk_peer_cred; long sk_rcvtimeo; long sk_sndtimeo; - struct sk_filter *sk_filter; + struct sk_filter __rcu *sk_filter; void *sk_protinfo; struct timer_list sk_timer; ktime_t sk_stamp; -- cgit v1.2.3 From 43a951e9994fd218ab4e641f94a2fc53556c3675 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2010 03:32:44 +0000 Subject: ipv4: add __rcu annotations to ip_ra_chain Add __rcu annotations to : (struct ip_ra_chain)->next struct ip_ra_chain *ip_ra_chain; And use appropriate rcu primitives. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index dbee3fe260e1..86e2b182a0c0 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -59,7 +59,7 @@ struct ipcm_cookie { #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) struct ip_ra_chain { - struct ip_ra_chain *next; + struct ip_ra_chain __rcu *next; struct sock *sk; union { void (*destructor)(struct sock *); @@ -68,7 +68,7 @@ struct ip_ra_chain { struct rcu_head rcu; }; -extern struct ip_ra_chain *ip_ra_chain; +extern struct ip_ra_chain __rcu *ip_ra_chain; /* IP flags. */ #define IP_CE 0x8000 /* Flag: "Congestion" */ -- cgit v1.2.3 From 1c31720a74e19bb57f301350a3b03210fa2ba9e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2010 21:02:07 +0000 Subject: ipv4: add __rcu annotations to routes.c Add __rcu annotations to : (struct dst_entry)->rt_next (struct rt_hash_bucket)->chain And use appropriate rcu primitives to reduce sparse warnings if CONFIG_SPARSE_RCU_POINTER=y Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index a217c838ec0d..ffe9cb719c0e 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -95,7 +95,7 @@ struct dst_entry { unsigned long lastuse; union { struct dst_entry *next; - struct rtable *rt_next; + struct rtable __rcu *rt_next; struct rt6_info *rt6_next; struct dn_route *dn_next; }; -- cgit v1.2.3 From e0ad61ec867fdd262804afa7a68e11fc9930c2b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2010 21:02:28 +0000 Subject: net: add __rcu annotations to protocol Add __rcu annotations to : struct net_protocol *inet_protos struct net_protocol *inet6_protos And use appropriate casts to reduce sparse warnings if CONFIG_SPARSE_RCU_POINTER=y Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/protocol.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/protocol.h b/include/net/protocol.h index f1effdd3c265..dc07495bce4c 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -89,10 +89,10 @@ struct inet_protosw { #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */ #define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */ -extern const struct net_protocol *inet_protos[MAX_INET_PROTOS]; +extern const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS]; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) -extern const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS]; +extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS]; #endif extern int inet_add_protocol(const struct net_protocol *prot, unsigned char num); -- cgit v1.2.3 From b33eab08445d86c3d0dec3111ce10df561328705 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2010 21:01:26 +0000 Subject: tunnels: add __rcu annotations Add __rcu annotations to : (struct ip_tunnel)->prl (struct ip_tunnel_prl_entry)->next (struct xfrm_tunnel)->next struct xfrm_tunnel *tunnel4_handlers struct xfrm_tunnel *tunnel64_handlers And use appropriate rcu primitives to reduce sparse warnings if CONFIG_SPARSE_RCU_POINTER=y Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ipip.h | 4 ++-- include/net/xfrm.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ipip.h b/include/net/ipip.h index 0403fe4c4519..a32654d52730 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -34,12 +34,12 @@ struct ip_tunnel { #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd_parm ip6rd; #endif - struct ip_tunnel_prl_entry *prl; /* potential router list */ + struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ unsigned int prl_count; /* # of entries in PRL */ }; struct ip_tunnel_prl_entry { - struct ip_tunnel_prl_entry *next; + struct ip_tunnel_prl_entry __rcu *next; __be32 addr; u16 flags; struct rcu_head rcu_head; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ffcd47820a5b..bcfb6b24b019 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1264,7 +1264,7 @@ struct xfrm_tunnel { int (*handler)(struct sk_buff *skb); int (*err_handler)(struct sk_buff *skb, u32 info); - struct xfrm_tunnel *next; + struct xfrm_tunnel __rcu *next; int priority; }; -- cgit v1.2.3 From 7a2b03c5175e9ddcc2a2d48ca86dea8a88b68383 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Oct 2010 09:24:55 +0000 Subject: fib_rules: __rcu annotates ctarget Adds __rcu annotation to (struct fib_rule)->ctarget Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/fib_rules.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 106f3097d384..075f1e3a0fed 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -20,7 +20,7 @@ struct fib_rule { u32 table; u8 action; u32 target; - struct fib_rule * ctarget; + struct fib_rule __rcu *ctarget; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; struct rcu_head rcu; -- cgit v1.2.3 From b914c4ea929a4ba6fb97967800dc473c31552b98 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2010 23:55:38 +0000 Subject: inetpeer: __rcu annotations Adds __rcu annotations to inetpeer (struct inet_peer)->avl_left (struct inet_peer)->avl_right This is a tedious cleanup, but removes one smp_wmb() from link_to_pool() since we now use more self documenting rcu_assign_pointer(). Note the use of RCU_INIT_POINTER() instead of rcu_assign_pointer() in all cases we dont need a memory barrier. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inetpeer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 417d0c894f29..fe239bfe5f7f 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -15,7 +15,7 @@ struct inet_peer { /* group together avl_left,avl_right,v4daddr to speedup lookups */ - struct inet_peer *avl_left, *avl_right; + struct inet_peer __rcu *avl_left, *avl_right; __be32 v4daddr; /* peer's address */ __u32 avl_height; struct list_head unused; -- cgit v1.2.3 From a10c02036f82b1fa30d69a62f7c7d9a927e8adbc Mon Sep 17 00:00:00 2001 From: Amarnath Revanna Date: Wed, 27 Oct 2010 08:34:39 +0000 Subject: caif-u5500: Adding shared memory include Signed-off-by: Sjur Braendeland Signed-off-by: David S. Miller --- include/net/caif/caif_shm.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 include/net/caif/caif_shm.h (limited to 'include/net') diff --git a/include/net/caif/caif_shm.h b/include/net/caif/caif_shm.h new file mode 100644 index 000000000000..5bcce55438cf --- /dev/null +++ b/include/net/caif/caif_shm.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) ST-Ericsson AB 2010 + * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com + * Author: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com + * License terms: GNU General Public License (GPL) version 2 + */ + +#ifndef CAIF_SHM_H_ +#define CAIF_SHM_H_ + +struct shmdev_layer { + u32 shm_base_addr; + u32 shm_total_sz; + u32 shm_id; + u32 shm_loopback; + void *hmbx; + int (*pshmdev_mbxsend) (u32 shm_id, u32 mbx_msg); + int (*pshmdev_mbxsetup) (void *pshmdrv_cb, + struct shmdev_layer *pshm_dev, void *pshm_drv); + struct net_device *pshm_netdev; +}; + +extern int caif_shmcore_probe(struct shmdev_layer *pshm_dev); +extern void caif_shmcore_remove(struct net_device *pshm_netdev); + +#endif -- cgit v1.2.3 From 4f7ebe807242898ee08ed732d56982874442c304 Mon Sep 17 00:00:00 2001 From: Arun R Bharadwaj Date: Wed, 28 Jul 2010 14:17:26 +0530 Subject: net/9p: This patch implements TLERROR/RLERROR on the 9P client. Signed-off-by: Arun R Bharadwaj Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/9p.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index a8de812ccbc8..a4a1b043a8c4 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -86,6 +86,8 @@ do { \ /** * enum p9_msg_t - 9P message types + * @P9_TLERROR: not used + * @P9_RLERROR: response for any failed request for 9P2000.L * @P9_TSTATFS: file system status request * @P9_RSTATFS: file system status response * @P9_TSYMLINK: make symlink request @@ -137,6 +139,8 @@ do { \ */ enum p9_msg_t { + P9_TLERROR = 6, + P9_RLERROR, P9_TSTATFS = 8, P9_RSTATFS, P9_TLOPEN = 12, -- cgit v1.2.3 From 920e65dc6911da28a58e17f4b683302636fc6d8e Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Wed, 22 Sep 2010 17:19:19 -0700 Subject: [9p] Introduce client side TFSYNC/RFSYNC for dotl. SYNOPSIS size[4] Tfsync tag[2] fid[4] size[4] Rfsync tag[2] DESCRIPTION The Tfsync transaction transfers ("flushes") all modified in-core data of file identified by fid to the disk device (or other permanent storage device) where that file resides. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/9p.h | 2 ++ include/net/9p/client.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index a4a1b043a8c4..55e96057f47f 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -163,6 +163,8 @@ enum p9_msg_t { P9_RXATTRCREATE, P9_TREADDIR = 40, P9_RREADDIR, + P9_TFSYNC = 50, + P9_RFSYNC, P9_TLINK = 70, P9_RLINK, P9_TMKDIR = 72, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 7f63d5ab7b44..8744e3ad4a07 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -229,6 +229,7 @@ int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, gid_t gid, int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, gid_t gid, struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); +int p9_client_fsync(struct p9_fid *fid); int p9_client_remove(struct p9_fid *fid); int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count); -- cgit v1.2.3 From a099027c779068b834f335cfdc3f2bf10f531dd9 Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Mon, 27 Sep 2010 11:34:24 +0530 Subject: 9p: Implement TLOCK Synopsis size[4] TLock tag[2] fid[4] flock[n] size[4] RLock tag[2] status[1] Description Tlock is used to acquire/release byte range posix locks on a file identified by given fid. The reply contains status of the lock request flock structure: type[1] - Type of lock: F_RDLCK, F_WRLCK, F_UNLCK flags[4] - Flags could be either of P9_LOCK_FLAGS_BLOCK - Blocked lock request, if there is a conflicting lock exists, wait for that lock to be released. P9_LOCK_FLAGS_RECLAIM - Reclaim lock request, used when client is trying to reclaim a lock after a server restrart (due to crash) start[8] - Starting offset for lock length[8] - Number of bytes to lock If length is 0, lock all bytes starting at the location 'start' through to the end of file pid[4] - PID of the process that wants to take lock client_id[4] - Unique client id status[1] - Status of the lock request, can be P9_LOCK_SUCCESS(0), P9_LOCK_BLOCKED(1), P9_LOCK_ERROR(2) or P9_LOCK_GRACE(3) P9_LOCK_SUCCESS - Request was successful P9_LOCK_BLOCKED - A conflicting lock is held by another process P9_LOCK_ERROR - Error while processing the lock request P9_LOCK_GRACE - Server is in grace period, it can't accept new lock requests in this period (except locks with P9_LOCK_FLAGS_RECLAIM flag set) Signed-off-by: M. Mohan Kumar Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/9p.h | 28 ++++++++++++++++++++++++++++ include/net/9p/client.h | 1 + 2 files changed, 29 insertions(+) (limited to 'include/net') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 55e96057f47f..1859a2560cc5 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -165,6 +165,8 @@ enum p9_msg_t { P9_RREADDIR, P9_TFSYNC = 50, P9_RFSYNC, + P9_TLOCK = 52, + P9_RLOCK, P9_TLINK = 70, P9_RLINK, P9_TMKDIR = 72, @@ -464,6 +466,32 @@ struct p9_iattr_dotl { u64 mtime_nsec; }; +#define P9_LOCK_SUCCESS 0 +#define P9_LOCK_BLOCKED 1 +#define P9_LOCK_ERROR 2 +#define P9_LOCK_GRACE 3 + +#define P9_LOCK_FLAGS_BLOCK 1 +#define P9_LOCK_FLAGS_RECLAIM 2 + +/* struct p9_flock: POSIX lock structure + * @type - type of lock + * @flags - lock flags + * @start - starting offset of the lock + * @length - number of bytes + * @proc_id - process id which wants to take lock + * @client_id - client id + */ + +struct p9_flock { + u8 type; + u32 flags; + u64 start; + u64 length; + u32 proc_id; + char *client_id; +}; + /* Structures for Protocol Operations */ struct p9_tstatfs { u32 fid; diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 8744e3ad4a07..d7dcb142e3bb 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -249,6 +249,7 @@ int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode, dev_t rdev, gid_t gid, struct p9_qid *); int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, gid_t gid, struct p9_qid *); +int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); void p9_client_cb(struct p9_client *c, struct p9_req_t *req); -- cgit v1.2.3 From 1d769cd192fc8c4097b1e2cd41fdee6ba3d1b2af Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Mon, 27 Sep 2010 12:22:13 +0530 Subject: 9p: Implement TGETLOCK Synopsis size[4] TGetlock tag[2] fid[4] getlock[n] size[4] RGetlock tag[2] getlock[n] Description TGetlock is used to test for the existence of byte range posix locks on a file identified by given fid. The reply contains getlock structure. If the lock could be placed it returns F_UNLCK in type field of getlock structure. Otherwise it returns the details of the conflicting locks in the getlock structure getlock structure: type[1] - Type of lock: F_RDLCK, F_WRLCK start[8] - Starting offset for lock length[8] - Number of bytes to check for the lock If length is 0, check for lock in all bytes starting at the location 'start' through to the end of file pid[4] - PID of the process that wants to take lock/owns the task in case of reply client[4] - Client id of the system that owns the process which has the conflicting lock Signed-off-by: M. Mohan Kumar Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/9p.h | 18 ++++++++++++++++++ include/net/9p/client.h | 1 + 2 files changed, 19 insertions(+) (limited to 'include/net') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 1859a2560cc5..6367a71d84fc 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -167,6 +167,8 @@ enum p9_msg_t { P9_RFSYNC, P9_TLOCK = 52, P9_RLOCK, + P9_TGETLOCK = 54, + P9_RGETLOCK, P9_TLINK = 70, P9_RLINK, P9_TMKDIR = 72, @@ -492,6 +494,22 @@ struct p9_flock { char *client_id; }; +/* struct p9_getlock: getlock structure + * @type - type of lock + * @start - starting offset of the lock + * @length - number of bytes + * @proc_id - process id which wants to take lock + * @client_id - client id + */ + +struct p9_getlock { + u8 type; + u64 start; + u64 length; + u32 proc_id; + char *client_id; +}; + /* Structures for Protocol Operations */ struct p9_tstatfs { u32 fid; diff --git a/include/net/9p/client.h b/include/net/9p/client.h index d7dcb142e3bb..127c9f2a9cb8 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -250,6 +250,7 @@ int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode, int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, gid_t gid, struct p9_qid *); int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); +int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); void p9_client_cb(struct p9_client *c, struct p9_req_t *req); -- cgit v1.2.3 From 329176cc2c50e63c580ddaabb385876db5af1360 Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Tue, 28 Sep 2010 19:59:25 +0530 Subject: 9p: Implement TREADLINK operation for 9p2000.L Synopsis size[4] TReadlink tag[2] fid[4] size[4] RReadlink tag[2] target[s] Description Readlink is used to return the contents of the symoblic link referred by fid. Contents of symboic link is returned as a response. target[s] - Contents of the symbolic link referred by fid. Signed-off-by: M. Mohan Kumar Reviewed-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/9p.h | 2 ++ include/net/9p/client.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 6367a71d84fc..071fd7a8d781 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -153,6 +153,8 @@ enum p9_msg_t { P9_RMKNOD, P9_TRENAME = 20, P9_RRENAME, + P9_TREADLINK = 22, + P9_RREADLINK, P9_TGETATTR = 24, P9_RGETATTR, P9_TSETATTR = 26, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 127c9f2a9cb8..335b088e7672 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -262,5 +262,6 @@ int p9_is_proto_dotu(struct p9_client *clnt); int p9_is_proto_dotl(struct p9_client *clnt); struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *); int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int); +int p9_client_readlink(struct p9_fid *fid, char **target); #endif /* NET_9P_CLIENT_H */ -- cgit v1.2.3 From b165d60145b717261a0234f989c442c2b68b6ec0 Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Fri, 22 Oct 2010 10:13:12 -0700 Subject: 9p: Add datasync to client side TFSYNC/RFSYNC for dotl SYNOPSIS size[4] Tfsync tag[2] fid[4] datasync[4] size[4] Rfsync tag[2] DESCRIPTION The Tfsync transaction transfers ("flushes") all modified in-core data of file identified by fid to the disk device (or other permanent storage device) where that file resides. If datasync flag is specified data will be fleshed but does not flush modified metadata unless that metadata is needed in order to allow a subsequent data retrieval to be correctly handled. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 335b088e7672..83ba6a4d58a3 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -229,7 +229,7 @@ int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, gid_t gid, int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, gid_t gid, struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); -int p9_client_fsync(struct p9_fid *fid); +int p9_client_fsync(struct p9_fid *fid, int datasync); int p9_client_remove(struct p9_fid *fid); int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count); -- cgit v1.2.3 From 4aa2c466a7733af093a526e9d1cdd0b3b90d47e9 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 28 Oct 2010 02:00:43 +0000 Subject: fib: Fix fib zone and its hash leak on namespace stop When we stop a namespace we flush the table and free one, but the added fn_zone-s (and their hashes if grown) are leaked. Need to free. Tries releases all its stuff in the flushing code. Shame on us - this bug exists since the very first make-fib-per-net patches in 2.6.27 :( Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ba3666d31766..07bdb5e9e8ac 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -158,6 +158,8 @@ extern int fib_table_flush(struct fib_table *table); extern void fib_table_select_default(struct fib_table *table, const struct flowi *flp, struct fib_result *res); +extern void fib_free_table(struct fib_table *tb); + #ifndef CONFIG_IP_MULTIPLE_TABLES -- cgit v1.2.3 From f2527ec436fd675f08a8e7434f6e940688cb96d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Carvalho=20de=20Matos?= Date: Mon, 1 Nov 2010 11:52:47 +0000 Subject: caif: Bugfix for socket priority, bindtodev and dbg channel. Changes: o Bugfix: SO_PRIORITY for SOL_SOCKET could not be handled in caif's setsockopt, using the struct sock attribute priority instead. o Bugfix: SO_BINDTODEVICE for SOL_SOCKET could not be handled in caif's setsockopt, using the struct sock attribute ifindex instead. o Wrong assert statement for RFM layer segmentation. o CAIF Debug channels was not working over SPI, caif_payload_info containing padding info must be initialized. o Check on pointer before dereferencing when unregister dev in caif_dev.c Signed-off-by: Sjur Braendeland Signed-off-by: David S. Miller --- include/net/caif/caif_dev.h | 4 ++-- include/net/caif/cfcnfg.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h index 6da573c75d54..8eff83b95366 100644 --- a/include/net/caif/caif_dev.h +++ b/include/net/caif/caif_dev.h @@ -28,7 +28,7 @@ struct caif_param { * @sockaddr: Socket address to connect. * @priority: Priority of the connection. * @link_selector: Link selector (high bandwidth or low latency) - * @link_name: Name of the CAIF Link Layer to use. + * @ifindex: kernel index of the interface. * @param: Connect Request parameters (CAIF_SO_REQ_PARAM). * * This struct is used when connecting a CAIF channel. @@ -39,7 +39,7 @@ struct caif_connect_request { struct sockaddr_caif sockaddr; enum caif_channel_priority priority; enum caif_link_selector link_selector; - char link_name[16]; + int ifindex; struct caif_param param; }; diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h index bd646faffa47..f688478bfb84 100644 --- a/include/net/caif/cfcnfg.h +++ b/include/net/caif/cfcnfg.h @@ -139,10 +139,10 @@ struct dev_info *cfcnfg_get_phyid(struct cfcnfg *cnfg, enum cfcnfg_phy_preference phy_pref); /** - * cfcnfg_get_named() - Get the Physical Identifier of CAIF Link Layer + * cfcnfg_get_id_from_ifi() - Get the Physical Identifier of ifindex, + * it matches caif physical id with the kernel interface id. * @cnfg: Configuration object - * @name: Name of the Physical Layer (Caif Link Layer) + * @ifi: ifindex obtained from socket.c bindtodevice. */ -int cfcnfg_get_named(struct cfcnfg *cnfg, char *name); - +int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi); #endif /* CFCNFG_H_ */ -- cgit v1.2.3 From 2c24a5d1b4f48900f3ed1b1ad70c51f1983df822 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sjur=20Br=C3=A6ndeland?= Date: Mon, 1 Nov 2010 11:52:48 +0000 Subject: caif: SPI-driver bugfix - incorrect padding. Signed-off-by: Sjur Braendeland Signed-off-by: David S. Miller --- include/net/caif/caif_spi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/caif/caif_spi.h b/include/net/caif/caif_spi.h index ce4570dff020..87c3d11b8e55 100644 --- a/include/net/caif/caif_spi.h +++ b/include/net/caif/caif_spi.h @@ -121,6 +121,8 @@ struct cfspi { wait_queue_head_t wait; spinlock_t lock; bool flow_stop; + bool slave; + bool slave_talked; #ifdef CONFIG_DEBUG_FS enum cfspi_state dbg_state; u16 pcmd; -- cgit v1.2.3 From 6b8c92ba07287578718335ce409de8e8d7217e40 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Wed, 3 Nov 2010 16:35:40 +0000 Subject: netlink: Make nlmsg_find_attr take a const nlmsghdr*. This will let us use it on a nlmsghdr stored inside a netlink_callback. Signed-off-by: Nelson Elhage Signed-off-by: David S. Miller --- include/net/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index f3b201d335b3..9801c55de5d6 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -384,7 +384,7 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, * * Returns the first attribute which matches the specified type. */ -static inline struct nlattr *nlmsg_find_attr(struct nlmsghdr *nlh, +static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, int hdrlen, int attrtype) { return nla_find(nlmsg_attrdata(nlh, hdrlen), -- cgit v1.2.3 From 43b81f85ebfbed9c720f3c27ec7f364930aa3b5e Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Sun, 7 Nov 2010 19:58:05 -0800 Subject: net dst: need linux/cache.h for ____cacheline_aligned_in_smp. Presently the b43legacy build fails on an sh randconfig: In file included from include/net/dst.h:12, from drivers/net/wireless/b43legacy/xmit.c:32: include/net/dst_ops.h:28: error: expected ':', ',', ';', '}' or '__attribute__' before '____cacheline_aligned_in_smp' include/net/dst_ops.h: In function 'dst_entries_get_fast': include/net/dst_ops.h:33: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_get_slow': include/net/dst_ops.h:41: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_add': include/net/dst_ops.h:49: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_init': include/net/dst_ops.h:55: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_destroy': include/net/dst_ops.h:60: error: 'struct dst_ops' has no member named 'pcpuc_entries' make[5]: *** [drivers/net/wireless/b43legacy/xmit.o] Error 1 make[5]: *** Waiting for unfinished jobs.... Signed-off-by: Paul Mundt Signed-off-by: David S. Miller --- include/net/dst_ops.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 1fa5306e3e23..51665b3461b8 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -2,6 +2,7 @@ #define _NET_DST_OPS_H #include #include +#include struct dst_entry; struct kmem_cachep; -- cgit v1.2.3 From 8d987e5c75107ca7515fa19e857cfa24aab6ec8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Nov 2010 23:24:26 +0000 Subject: net: avoid limits overflow Robin Holt tried to boot a 16TB machine and found some limits were reached : sysctl_tcp_mem[2], sysctl_udp_mem[2] We can switch infrastructure to use long "instead" of "int", now atomic_long_t primitives are available for free. Signed-off-by: Eric Dumazet Reported-by: Robin Holt Reviewed-by: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/net/dn.h | 2 +- include/net/sock.h | 4 ++-- include/net/tcp.h | 6 +++--- include/net/udp.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/dn.h b/include/net/dn.h index e5469f7b67a3..a514a3cf4573 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -225,7 +225,7 @@ extern int decnet_di_count; extern int decnet_dr_count; extern int decnet_no_fc_max_cwnd; -extern int sysctl_decnet_mem[3]; +extern long sysctl_decnet_mem[3]; extern int sysctl_decnet_wmem[3]; extern int sysctl_decnet_rmem[3]; diff --git a/include/net/sock.h b/include/net/sock.h index c7a736228ca2..a6338d039857 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -762,7 +762,7 @@ struct proto { /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); - atomic_t *memory_allocated; /* Current allocated memory. */ + atomic_long_t *memory_allocated; /* Current allocated memory. */ struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* * Pressure flag: try to collapse. @@ -771,7 +771,7 @@ struct proto { * is strict, actions are advisory and have some latency. */ int *memory_pressure; - int *sysctl_mem; + long *sysctl_mem; int *sysctl_wmem; int *sysctl_rmem; int max_header; diff --git a/include/net/tcp.h b/include/net/tcp.h index 4fee0424af7e..e36c874c7fb1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -224,7 +224,7 @@ extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_ecn; extern int sysctl_tcp_dsack; -extern int sysctl_tcp_mem[3]; +extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; @@ -247,7 +247,7 @@ extern int sysctl_tcp_cookie_size; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; -extern atomic_t tcp_memory_allocated; +extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; @@ -280,7 +280,7 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift) } if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) + atomic_long_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) return true; return false; } diff --git a/include/net/udp.h b/include/net/udp.h index 200b82848c9a..bb967dd59bf7 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -105,10 +105,10 @@ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, extern struct proto udp_prot; -extern atomic_t udp_memory_allocated; +extern atomic_long_t udp_memory_allocated; /* sysctl variables for udp */ -extern int sysctl_udp_mem[3]; +extern long sysctl_udp_mem[3]; extern int sysctl_udp_rmem_min; extern int sysctl_udp_wmem_min; -- cgit v1.2.3 From 62370e2b9376ea7b76e0423de28ccb322c17e2da Mon Sep 17 00:00:00 2001 From: Arnd Hannemann Date: Thu, 11 Nov 2010 11:44:32 -0600 Subject: b43legacy: Fix compile on ARM architecture When b43legacy is compiled on the arm platform, the following errors are seen: CC [M] drivers/net/wireless/b43legacy/xmit.o In file included from include/net/dst.h:11, from drivers/net/wireless/b43legacy/xmit.c:31: include/net/dst_ops.h:28: error: expected ':', ',', ';', '}' or '__attribute__' before '____cacheline_aligned_in_smp' include/net/dst_ops.h: In function 'dst_entries_get_fast': include/net/dst_ops.h:33: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_get_slow': include/net/dst_ops.h:41: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_add': include/net/dst_ops.h:49: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_init': include/net/dst_ops.h:55: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_destroy': include/net/dst_ops.h:60: error: 'struct dst_ops' has no member named 'pcpuc_entries' make[4]: *** [drivers/net/wireless/b43legacy/xmit.o] Error 1 make[3]: *** [drivers/net/wireless/b43legacy] Error 2 make[2]: *** [drivers/net/wireless] Error 2 make[1]: *** [drivers/net] Error 2 make: *** [drivers] Error 2 The cause is a missing include of , which is present for i386 and x86_64 architectures, but not for arm. Signed-off-by: Arnd Hannemann Signed-off-by: Larry Finger Cc: Stable Signed-off-by: John W. Linville --- include/net/dst_ops.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 1fa5306e3e23..51665b3461b8 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -2,6 +2,7 @@ #define _NET_DST_OPS_H #include #include +#include struct dst_entry; struct kmem_cachep; -- cgit v1.2.3 From 309075cf08ed92a7d2c0e22b7653c5daabbd7ad1 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Fri, 12 Nov 2010 08:53:56 +0200 Subject: cfg80211: fix WIPHY_FLAG_IBSS_RSN bit WIPHY_FLAG_IBSS_RSN is BIT(7) as is WIPHY_FLAG_CONTROL_PORT_PROTOCOL. Change to BIT(8). Signed-off-by: Jussi Kivilinna Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2a7936d7851d..97b8b7c9b63c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1355,7 +1355,7 @@ enum wiphy_flags { WIPHY_FLAG_4ADDR_AP = BIT(5), WIPHY_FLAG_4ADDR_STATION = BIT(6), WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), - WIPHY_FLAG_IBSS_RSN = BIT(7), + WIPHY_FLAG_IBSS_RSN = BIT(8), }; struct mac_address { -- cgit v1.2.3 From ef22b7b65f0eda9015becc7bff225a399914a242 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 18 Nov 2010 09:40:04 -0800 Subject: net: Fix duplicate volatile warning. jiffies is defined as "volatile". extern unsigned long volatile __jiffy_data jiffies; ACCESS_ONCE() uses "volatile". As a result, some compilers warn duplicate `volatile' for ACCESS_ONCE(jiffies). Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- include/net/neighbour.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 55590ab16b3e..6beb1ffc2b7f 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -303,7 +303,7 @@ static inline void neigh_confirm(struct neighbour *neigh) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { - unsigned long now = ACCESS_ONCE(jiffies); + unsigned long now = jiffies; if (neigh->used != now) neigh->used = now; -- cgit v1.2.3 From 25888e30319f8896fc656fc68643e6a078263060 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Nov 2010 04:11:39 +0000 Subject: af_unix: limit recursion level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Its easy to eat all kernel memory and trigger NMI watchdog, using an exploit program that queues unix sockets on top of others. lkml ref : http://lkml.org/lkml/2010/11/25/8 This mechanism is used in applications, one choice we have is to have a recursion limit. Other limits might be needed as well (if we queue other types of files), since the passfd mechanism is currently limited by socket receive queue sizes only. Add a recursion_level to unix socket, allowing up to 4 levels. Each time we send an unix socket through sendfd mechanism, we copy its recursion level (plus one) to receiver. This recursion level is cleared when socket receive queue is emptied. Reported-by: Марк Коренберг Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/af_unix.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 90c9e2872f27..18e5c3f67580 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -10,6 +10,7 @@ extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); extern void wait_for_unix_gc(void); +extern struct sock *unix_get_socket(struct file *filp); #define UNIX_HASH_SIZE 256 @@ -56,6 +57,7 @@ struct unix_sock { spinlock_t lock; unsigned int gc_candidate : 1; unsigned int gc_maybe_cycle : 1; + unsigned char recursion_level; struct socket_wq peer_wq; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) -- cgit v1.2.3 From 46bcf14f44d8f31ecfdc8b6708ec15a3b33316d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Dec 2010 09:29:43 -0800 Subject: filter: fix sk_filter rcu handling Pavel Emelyanov tried to fix a race between sk_filter_(de|at)tach and sk_clone() in commit 47e958eac280c263397 Problem is we can have several clones sharing a common sk_filter, and these clones might want to sk_filter_attach() their own filters at the same time, and can overwrite old_filter->rcu, corrupting RCU queues. We can not use filter->rcu without being sure no other thread could do the same thing. Switch code to a more conventional ref-counting technique : Do the atomic decrement immediately and queue one rcu call back when last reference is released. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index a6338d039857..659d968d95c5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1155,6 +1155,8 @@ extern void sk_common_release(struct sock *sk); /* Initialise core socket variables */ extern void sock_init_data(struct socket *sock, struct sock *sk); +extern void sk_filter_release_rcu(struct rcu_head *rcu); + /** * sk_filter_release - release a socket filter * @fp: filter to remove @@ -1165,7 +1167,7 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); static inline void sk_filter_release(struct sk_filter *fp) { if (atomic_dec_and_test(&fp->refcnt)) - kfree(fp); + call_rcu_bh(&fp->rcu, sk_filter_release_rcu); } static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) -- cgit v1.2.3 From fcbdf09d9652c8919dcf47072e3ae7dcb4eb98ac Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Thu, 16 Dec 2010 14:26:56 -0800 Subject: net: fix nulls list corruptions in sk_prot_alloc Special care is taken inside sk_port_alloc to avoid overwriting skc_node/skc_nulls_node. We should also avoid overwriting skc_bind_node/skc_portaddr_node. The patch fixes the following crash: BUG: unable to handle kernel paging request at fffffffffffffff0 IP: [] udp4_lib_lookup2+0xad/0x370 [] __udp4_lib_lookup+0x282/0x360 [] __udp4_lib_rcv+0x31e/0x700 [] ? ip_local_deliver_finish+0x65/0x190 [] ? ip_local_deliver+0x88/0xa0 [] udp_rcv+0x15/0x20 [] ip_local_deliver_finish+0x65/0x190 [] ip_local_deliver+0x88/0xa0 [] ip_rcv_finish+0x32d/0x6f0 [] ? netif_receive_skb+0x99c/0x11c0 [] ip_rcv+0x2bb/0x350 [] netif_receive_skb+0x99c/0x11c0 Signed-off-by: Leonard Crestez Signed-off-by: Octavian Purdila Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 659d968d95c5..7d3f7ce239b5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -754,6 +754,7 @@ struct proto { void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); + void (*clear_sk)(struct sock *sk, int size); /* Keeping track of sockets in use */ #ifdef CONFIG_PROC_FS @@ -852,6 +853,8 @@ static inline void __sk_prot_rehash(struct sock *sk) sk->sk_prot->hash(sk); } +void sk_prot_clear_portaddr_nulls(struct sock *sk, int size); + /* About 10 seconds */ #define SOCK_DESTROY_TIME (10*HZ) -- cgit v1.2.3 From ad0081e43af6de3fecf308b0d098f9611835766b Mon Sep 17 00:00:00 2001 From: David Stevens Date: Fri, 17 Dec 2010 11:42:42 +0000 Subject: ipv6: Fragment locally generated tunnel-mode IPSec6 packets as needed. This patch modifies IPsec6 to fragment IPv6 packets that are locally generated as needed. This version of the patch only fragments in tunnel mode, so that fragment headers will not be obscured by ESP in transport mode. Signed-off-by: David L Stevens Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/ip6_route.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 278312c95f96..2ab926860cd8 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -164,5 +164,15 @@ static inline int ipv6_unicast_destination(struct sk_buff *skb) return rt->rt6i_flags & RTF_LOCAL; } +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); + +static inline int ip6_skb_dst_mtu(struct sk_buff *skb) +{ + struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + + return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? + skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); +} + #endif #endif -- cgit v1.2.3 From 173021072e86a0a5b3d2271347493a3e0d5f68e8 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Mon, 20 Dec 2010 04:35:30 +0000 Subject: net_sched: always clone skbs Pawel reported a panic related to handling shared skbs in ixgbe incorrectly. So we need to revert my previous patch to work around this bug. Instead of reverting the patch completely, I just revert the essential lines, so we can add the previous optimization back more easily in future. commit 3511c9132f8b1e1b5634e41a3331c44b0c13be70 Author: Changli Gao Date: Sat Oct 16 13:04:08 2010 +0000 net_sched: remove the unused parameter of qdisc_create_dflt() Reported-by: Pawel Staszewski Signed-off-by: Changli Gao Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sch_generic.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index ea1f8a83160d..79f34e2b752f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -610,11 +610,7 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask, { struct sk_buff *n; - if ((action == TC_ACT_STOLEN || action == TC_ACT_QUEUED) && - !skb_shared(skb)) - n = skb_get(skb); - else - n = skb_clone(skb, gfp_mask); + n = skb_clone(skb, gfp_mask); if (n) { n->tc_verd = SET_TC_VERD(n->tc_verd, 0); -- cgit v1.2.3 From 9f333281a7da4c3a59bccc0cb53f7590eb850d93 Mon Sep 17 00:00:00 2001 From: Johannes Stezenbach Date: Tue, 30 Nov 2010 16:49:23 +0100 Subject: mac80211/rt2x00: add ieee80211_tx_status_ni() All rt2x00 drivers except rt2800pci call ieee80211_tx_status() from a workqueue, which causes "NOHZ: local_softirq_pending 08" messages. To fix it, add ieee80211_tx_status_ni() similar to ieee80211_rx_ni() which can be called from process context, and call it from rt2x00lib_txdone(). For the rt2800pci special case a driver flag is introduced. https://bugzilla.kernel.org/show_bug.cgi?id=24892 Signed-off-by: Johannes Stezenbach Signed-off-by: John W. Linville --- include/net/mac80211.h | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9fdf982d1286..365359b24177 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2024,8 +2024,8 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw, * * This function may not be called in IRQ context. Calls to this function * for a single hardware must be synchronized against each other. Calls - * to this function and ieee80211_tx_status_irqsafe() may not be mixed - * for a single hardware. + * to this function, ieee80211_tx_status_ni() and ieee80211_tx_status_irqsafe() + * may not be mixed for a single hardware. * * @hw: the hardware the frame was transmitted by * @skb: the frame that was transmitted, owned by mac80211 after this call @@ -2033,14 +2033,34 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw, void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb); +/** + * ieee80211_tx_status_ni - transmit status callback (in process context) + * + * Like ieee80211_tx_status() but can be called in process context. + * + * Calls to this function, ieee80211_tx_status() and + * ieee80211_tx_status_irqsafe() may not be mixed + * for a single hardware. + * + * @hw: the hardware the frame was transmitted by + * @skb: the frame that was transmitted, owned by mac80211 after this call + */ +static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw, + struct sk_buff *skb) +{ + local_bh_disable(); + ieee80211_tx_status(hw, skb); + local_bh_enable(); +} + /** * ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback * * Like ieee80211_tx_status() but can be called in IRQ context * (internally defers to a tasklet.) * - * Calls to this function and ieee80211_tx_status() may not be mixed for a - * single hardware. + * Calls to this function, ieee80211_tx_status() and + * ieee80211_tx_status_ni() may not be mixed for a single hardware. * * @hw: the hardware the frame was transmitted by * @skb: the frame that was transmitted, owned by mac80211 after this call -- cgit v1.2.3 From da521b2c4f046383bc8941604174bc0e8bffb430 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 21 Dec 2010 12:43:16 -0800 Subject: net: Fix range checks in tcf_valid_offset(). This function has three bugs: 1) The offset should be valid most of the time, this is just a sanity check, therefore we should use "likely" not "unlikely" 2) This is the only place where we can check for arithmetic overflow of the pointer plus the length. 3) The existing range checks are off by one, the valid range is skb->head to skb_tail_pointer(), inclusive. Based almost entirely upon a patch by Ralph Loader. Reported-by: Ralph Loader Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index dd3031aed9d5..9fcc680ab6b9 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -323,7 +323,9 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer) static inline int tcf_valid_offset(const struct sk_buff *skb, const unsigned char *ptr, const int len) { - return unlikely((ptr + len) < skb_tail_pointer(skb) && ptr > skb->head); + return likely((ptr + len) <= skb_tail_pointer(skb) && + ptr >= skb->head && + (ptr <= (ptr + len))); } #ifdef CONFIG_NET_CLS_IND -- cgit v1.2.3 From e058464990c2ef1f3ecd6b83a154913c3c06f02a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 23 Dec 2010 12:03:57 -0800 Subject: Revert "ipv4: Allow configuring subnets as local addresses" This reverts commit 4465b469008bc03b98a1b8df4e9ae501b6c69d4b. Conflicts: net/ipv4/fib_frontend.c As reported by Ben Greear, this causes regressions: > Change 4465b469008bc03b98a1b8df4e9ae501b6c69d4b caused rules > to stop matching the input device properly because the > FLOWI_FLAG_MATCH_ANY_IIF is always defined in ip_dev_find(). > > This breaks rules such as: > > ip rule add pref 512 lookup local > ip rule del pref 0 lookup local > ip link set eth2 up > ip -4 addr add 172.16.0.102/24 broadcast 172.16.0.255 dev eth2 > ip rule add to 172.16.0.102 iif eth2 lookup local pref 10 > ip rule add iif eth2 lookup 10001 pref 20 > ip route add 172.16.0.0/24 dev eth2 table 10001 > ip route add unreachable 0/0 table 10001 > > If you had a second interface 'eth0' that was on a different > subnet, pinging a system on that interface would fail: > > [root@ct503-60 ~]# ping 192.168.100.1 > connect: Invalid argument Reported-by: Ben Greear Signed-off-by: David S. Miller --- include/net/flow.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 0ac3fb5e0973..bb08692a20b0 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -49,7 +49,6 @@ struct flowi { __u8 proto; __u8 flags; #define FLOWI_FLAG_ANYSRC 0x01 -#define FLOWI_FLAG_MATCH_ANY_IIF 0x02 union { struct { __be16 sport; -- cgit v1.2.3