From c1bc667e844c2677cdf927102ab384fe7b033762 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Thu, 7 Aug 2008 16:43:38 +0200 Subject: IPVS: Add genetlink interface definitions to ip_vs.h Add IPVS Generic Netlink interface definitions to include/linux/ip_vs.h. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/linux/ip_vs.h | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) (limited to 'include') diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index ec6eb49af2d8..0f434a28fb58 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -242,4 +242,164 @@ struct ip_vs_daemon_user { int syncid; }; +/* + * + * IPVS Generic Netlink interface definitions + * + */ + +/* Generic Netlink family info */ + +#define IPVS_GENL_NAME "IPVS" +#define IPVS_GENL_VERSION 0x1 + +struct ip_vs_flags { + __be32 flags; + __be32 mask; +}; + +/* Generic Netlink command attributes */ +enum { + IPVS_CMD_UNSPEC = 0, + + IPVS_CMD_NEW_SERVICE, /* add service */ + IPVS_CMD_SET_SERVICE, /* modify service */ + IPVS_CMD_DEL_SERVICE, /* delete service */ + IPVS_CMD_GET_SERVICE, /* get service info */ + + IPVS_CMD_NEW_DEST, /* add destination */ + IPVS_CMD_SET_DEST, /* modify destination */ + IPVS_CMD_DEL_DEST, /* delete destination */ + IPVS_CMD_GET_DEST, /* get destination info */ + + IPVS_CMD_NEW_DAEMON, /* start sync daemon */ + IPVS_CMD_DEL_DAEMON, /* stop sync daemon */ + IPVS_CMD_GET_DAEMON, /* get sync daemon status */ + + IPVS_CMD_SET_CONFIG, /* set config settings */ + IPVS_CMD_GET_CONFIG, /* get config settings */ + + IPVS_CMD_SET_INFO, /* only used in GET_INFO reply */ + IPVS_CMD_GET_INFO, /* get general IPVS info */ + + IPVS_CMD_ZERO, /* zero all counters and stats */ + IPVS_CMD_FLUSH, /* flush services and dests */ + + __IPVS_CMD_MAX, +}; + +#define IPVS_CMD_MAX (__IPVS_CMD_MAX - 1) + +/* Attributes used in the first level of commands */ +enum { + IPVS_CMD_ATTR_UNSPEC = 0, + IPVS_CMD_ATTR_SERVICE, /* nested service attribute */ + IPVS_CMD_ATTR_DEST, /* nested destination attribute */ + IPVS_CMD_ATTR_DAEMON, /* nested sync daemon attribute */ + IPVS_CMD_ATTR_TIMEOUT_TCP, /* TCP connection timeout */ + IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, /* TCP FIN wait timeout */ + IPVS_CMD_ATTR_TIMEOUT_UDP, /* UDP timeout */ + __IPVS_CMD_ATTR_MAX, +}; + +#define IPVS_CMD_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1) + +/* + * Attributes used to describe a service + * + * Used inside nested attribute IPVS_CMD_ATTR_SERVICE + */ +enum { + IPVS_SVC_ATTR_UNSPEC = 0, + IPVS_SVC_ATTR_AF, /* address family */ + IPVS_SVC_ATTR_PROTOCOL, /* virtual service protocol */ + IPVS_SVC_ATTR_ADDR, /* virtual service address */ + IPVS_SVC_ATTR_PORT, /* virtual service port */ + IPVS_SVC_ATTR_FWMARK, /* firewall mark of service */ + + IPVS_SVC_ATTR_SCHED_NAME, /* name of scheduler */ + IPVS_SVC_ATTR_FLAGS, /* virtual service flags */ + IPVS_SVC_ATTR_TIMEOUT, /* persistent timeout */ + IPVS_SVC_ATTR_NETMASK, /* persistent netmask */ + + IPVS_SVC_ATTR_STATS, /* nested attribute for service stats */ + __IPVS_SVC_ATTR_MAX, +}; + +#define IPVS_SVC_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1) + +/* + * Attributes used to describe a destination (real server) + * + * Used inside nested attribute IPVS_CMD_ATTR_DEST + */ +enum { + IPVS_DEST_ATTR_UNSPEC = 0, + IPVS_DEST_ATTR_ADDR, /* real server address */ + IPVS_DEST_ATTR_PORT, /* real server port */ + + IPVS_DEST_ATTR_FWD_METHOD, /* forwarding method */ + IPVS_DEST_ATTR_WEIGHT, /* destination weight */ + + IPVS_DEST_ATTR_U_THRESH, /* upper threshold */ + IPVS_DEST_ATTR_L_THRESH, /* lower threshold */ + + IPVS_DEST_ATTR_ACTIVE_CONNS, /* active connections */ + IPVS_DEST_ATTR_INACT_CONNS, /* inactive connections */ + IPVS_DEST_ATTR_PERSIST_CONNS, /* persistent connections */ + + IPVS_DEST_ATTR_STATS, /* nested attribute for dest stats */ + __IPVS_DEST_ATTR_MAX, +}; + +#define IPVS_DEST_ATTR_MAX (__IPVS_DEST_ATTR_MAX - 1) + +/* + * Attributes describing a sync daemon + * + * Used inside nested attribute IPVS_CMD_ATTR_DAEMON + */ +enum { + IPVS_DAEMON_ATTR_UNSPEC = 0, + IPVS_DAEMON_ATTR_STATE, /* sync daemon state (master/backup) */ + IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */ + IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */ + __IPVS_DAEMON_ATTR_MAX, +}; + +#define IPVS_DAEMON_ATTR_MAX (__IPVS_DAEMON_ATTR_MAX - 1) + +/* + * Attributes used to describe service or destination entry statistics + * + * Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS + */ +enum { + IPVS_STATS_ATTR_UNSPEC = 0, + IPVS_STATS_ATTR_CONNS, /* connections scheduled */ + IPVS_STATS_ATTR_INPKTS, /* incoming packets */ + IPVS_STATS_ATTR_OUTPKTS, /* outgoing packets */ + IPVS_STATS_ATTR_INBYTES, /* incoming bytes */ + IPVS_STATS_ATTR_OUTBYTES, /* outgoing bytes */ + + IPVS_STATS_ATTR_CPS, /* current connection rate */ + IPVS_STATS_ATTR_INPPS, /* current in packet rate */ + IPVS_STATS_ATTR_OUTPPS, /* current out packet rate */ + IPVS_STATS_ATTR_INBPS, /* current in byte rate */ + IPVS_STATS_ATTR_OUTBPS, /* current out byte rate */ + __IPVS_STATS_ATTR_MAX, +}; + +#define IPVS_STATS_ATTR_MAX (__IPVS_STATS_ATTR_MAX - 1) + +/* Attributes used in response to IPVS_CMD_GET_INFO command */ +enum { + IPVS_INFO_ATTR_UNSPEC = 0, + IPVS_INFO_ATTR_VERSION, /* IPVS version number */ + IPVS_INFO_ATTR_CONN_TAB_SIZE, /* size of connection hash table */ + __IPVS_INFO_ATTR_MAX, +}; + +#define IPVS_INFO_ATTR_MAX (__IPVS_INFO_ATTR_MAX - 1) + #endif /* _IP_VS_H */ -- cgit v1.2.3 From a919cf4b6b499416b6e2247dbc79196c4325f2e6 Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Thu, 14 Aug 2008 00:47:16 +0200 Subject: ipvs: Create init functions for estimator code Commit 8ab19ea36c5c5340ff598e4d15fc084eb65671dc ("ipvs: Fix possible deadlock in estimator code") fixed a deadlock condition, but that condition can only happen during unload of IPVS, because during normal operation there is at least our global stats structure in the estimator list. The mod_timer() and del_timer_sync() calls are actually initialization and cleanup code in disguise. Let's make it explicit and move them to their own init and cleanup function. Signed-off-by: Sven Wegener Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 7312c3dd309f..a25ad243031d 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -683,6 +683,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp); /* * IPVS rate estimator prototypes (from ip_vs_est.c) */ +extern int ip_vs_estimator_init(void); +extern void ip_vs_estimator_cleanup(void); extern void ip_vs_new_estimator(struct ip_vs_stats *stats); extern void ip_vs_kill_estimator(struct ip_vs_stats *stats); extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); -- cgit v1.2.3 From 92ab85354993ac3a364c65cab45745af470ffc67 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 24 Jul 2008 21:02:04 +0300 Subject: mac80211: add ieee80211_queue_stopped) This patch adds ieee80211_queue_stopped that let drivers to query queue status Signed-off-by: Tomas Winkler Signed-off-by: John W. Linville --- include/net/mac80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ff137fd7714f..25cc192cbe4c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1607,6 +1607,16 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue); */ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue); +/** + * ieee80211_queue_stopped - test status of the queue + * @hw: pointer as obtained from ieee80211_alloc_hw(). + * @queue: queue number (counted from zero). + * + * Drivers should use this function instead of netif_stop_queue. + */ + +int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue); + /** * ieee80211_stop_queues - stop all queues * @hw: pointer as obtained from ieee80211_alloc_hw(). -- cgit v1.2.3 From b4f28bbb9bf0b2c829ecf97ce2173f204fde4f10 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 30 Jul 2008 17:19:55 +0200 Subject: mac80211: add rx status flag for short preamble and use it for the radiotap header Signed-off-by: Bruno Randolf Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 25cc192cbe4c..dcc05a197dc1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -363,6 +363,7 @@ static inline struct ieee80211_tx_info *IEEE80211_SKB_CB(struct sk_buff *skb) * @RX_FLAG_TSFT: The timestamp passed in the RX status (@mactime field) * is valid. This is useful in monitor mode and necessary for beacon frames * to enable IBSS merging. + * @RX_FLAG_SHORTPRE: Short preamble was used for this frame */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = 1<<0, @@ -373,6 +374,7 @@ enum mac80211_rx_flags { RX_FLAG_FAILED_FCS_CRC = 1<<5, RX_FLAG_FAILED_PLCP_CRC = 1<<6, RX_FLAG_TSFT = 1<<7, + RX_FLAG_SHORTPRE = 1<<8 }; /** -- cgit v1.2.3 From 6b644e524bbd4089a28e0711de4f1cf2daa5db50 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 15 Jul 2008 18:44:12 -0700 Subject: mac80211: remove ieee80211_get_hdrlen All users have been moved over to the version taking a le16 frame control rather than a cpu-endian value. Signed-off-by: Harvey Harrison Signed-off-by: John W. Linville --- include/net/mac80211.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dcc05a197dc1..0fdc3dabc964 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1558,16 +1558,6 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, struct ieee80211_vif *vif); */ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); -/** - * ieee80211_get_hdrlen - get header length from frame control - * - * This function returns the 802.11 header length in bytes (not including - * encryption headers.) - * - * @fc: the frame control field (in CPU endianness) - */ -int ieee80211_get_hdrlen(u16 fc); - /** * ieee80211_hdrlen - get header length in bytes from frame control * @fc: frame control field in little-endian format -- cgit v1.2.3 From 9961920199ec88d6b581d3e38502088935925c04 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sat, 2 Aug 2008 15:10:58 -0300 Subject: rfkill: add default global states (v2) Add a second set of global states, "rfkill_default_states", to track the state that will be used when the first rfkill class of a given type is registered, and also to save "undo" information when rfkill_epo is called. Add a new exported function, rfkill_set_default(), which can be used by platform drivers to restore radio state saved by the platform across reboots or shutdown. Also, fix rfkill_epo to properly update rfkill_states, but still preserve a copy of the state so that we can undo the effect of rfkill_epo later if we want to. Add rfkill_restore_states() to restore rfkill_states from the copy. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/linux/rfkill.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 741d1a62cc3f..aa3c7d5852f6 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -116,6 +116,7 @@ int rfkill_register(struct rfkill *rfkill); void rfkill_unregister(struct rfkill *rfkill); int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state); +int rfkill_set_default(enum rfkill_type type, enum rfkill_state state); /** * rfkill_state_complement - return complementar state -- cgit v1.2.3 From 77fba13ccc3a2a3db100892a4a6cc5e2f8290cc7 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sat, 2 Aug 2008 15:10:59 -0300 Subject: rfkill: add __must_check annotations rfkill is not a small, mere detail in wireless support. Once it starts supporting rfkill and users start counting on that support, a wireless device is at risk of operating in dangerous conditions should rfkill support fail to properly activate. Therefore, add the required __must_check annotations on some key functions of the rfkill API, for which the wireless drivers absolutely MUST handle the failure mode safely in order to avoid a potentially dangerous situation where the wireless transmitter is left enabled when the user don't want it to. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Cc: Matthew Garrett Signed-off-by: John W. Linville --- include/linux/rfkill.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index aa3c7d5852f6..e92d8e94bb88 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -110,9 +110,10 @@ struct rfkill { }; #define to_rfkill(d) container_of(d, struct rfkill, dev) -struct rfkill *rfkill_allocate(struct device *parent, enum rfkill_type type); +struct rfkill * __must_check rfkill_allocate(struct device *parent, + enum rfkill_type type); void rfkill_free(struct rfkill *rfkill); -int rfkill_register(struct rfkill *rfkill); +int __must_check rfkill_register(struct rfkill *rfkill); void rfkill_unregister(struct rfkill *rfkill); int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state); -- cgit v1.2.3 From 96c87607ac8f9b0e641d11ba6e57f8ec0214ea1c Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sat, 2 Aug 2008 15:11:00 -0300 Subject: rfkill: introduce RFKILL_STATE_MAX While it is interesting to not add last-enum-markers because it allows gcc to warn us of switch() statements missing a valid state, we really should be handling memory corruption on a rfkill state with default clauses, anyway. So add RFKILL_STATE_MAX and use it where applicable. It makes for safer code in the long run. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/linux/rfkill.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index e92d8e94bb88..4cd64b0d9825 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -49,6 +49,7 @@ enum rfkill_state { RFKILL_STATE_SOFT_BLOCKED = 0, /* Radio output blocked */ RFKILL_STATE_UNBLOCKED = 1, /* Radio output allowed */ RFKILL_STATE_HARD_BLOCKED = 2, /* Output blocked, non-overrideable */ + RFKILL_STATE_MAX, /* marker for last valid state */ }; /* -- cgit v1.2.3 From fef1643bf0cdd092a52dc3378479e4811fd65152 Mon Sep 17 00:00:00 2001 From: Jasper Bryant-Greene Date: Sun, 3 Aug 2008 11:30:55 +1200 Subject: move ETH_P_PAE from ieee80211_i.h to if_ether.h ETH_P_PAE belongs in if_ether.h with the other ETH_P_* definitions. This patch moves it there. Signed-off-by: Jasper Bryant-Greene Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index e157c1399b61..5028e0b6082b 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -74,6 +74,7 @@ #define ETH_P_ATMFATE 0x8884 /* Frame-based ATM Transport * over Ethernet */ +#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_TIPC 0x88CA /* TIPC */ -- cgit v1.2.3 From af01d537463714e36e2c96d2da35902b76cd6827 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 28 Aug 2008 02:53:51 -0700 Subject: net: more #ifdef CONFIG_COMPAT All users of struct proto::compat_[gs]etsockopt and struct inet_connection_sock_af_ops::compat_[gs]etsockopt are under #ifdef already, so use it in structure definition too. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 ++ include/net/sock.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 2ff545a56fb5..03cffd9f64e3 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -51,12 +51,14 @@ struct inet_connection_sock_af_ops { char __user *optval, int optlen); int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); +#ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct sock *sk, int level, int optname, char __user *optval, int optlen); int (*compat_getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); +#endif void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); int (*bind_conflict)(const struct sock *sk, const struct inet_bind_bucket *tb); diff --git a/include/net/sock.h b/include/net/sock.h index 06c5259aff30..75a312d3888a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -532,6 +532,7 @@ struct proto { int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *option); +#ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct sock *sk, int level, int optname, char __user *optval, @@ -540,6 +541,7 @@ struct proto { int level, int optname, char __user *optval, int __user *option); +#endif int (*sendmsg)(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len); int (*recvmsg)(struct kiocb *iocb, struct sock *sk, -- cgit v1.2.3 From 9f1ba9062e032fb7b395cd27fc564754fe4e9867 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 7 Aug 2008 20:07:01 +0300 Subject: mac80211/cfg80211: Add BSS configuration options for AP mode This change adds a new cfg80211 command, NL80211_CMD_SET_BSS, to allow AP mode BSS parameters to be changed from user space (e.g., hostapd). The drivers using mac80211 are expected to be modified with separate changes to use the new BSS info parameter for short slot time in the bss_info_changed() handler. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 19 +++++++++++++++++++ include/net/cfg80211.h | 22 ++++++++++++++++++++++ include/net/mac80211.h | 9 +++++++++ 3 files changed, 50 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2be7c63bc0f2..447c02a5190e 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -89,6 +89,8 @@ * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC * or, if no MAC address given, all mesh paths, on the interface identified * by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by + * %NL80211_ATTR_IFINDEX. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -127,6 +129,8 @@ enum nl80211_commands { NL80211_CMD_NEW_MPATH, NL80211_CMD_DEL_MPATH, + NL80211_CMD_SET_BSS, + /* add commands here */ /* used to define NL80211_CMD_MAX below */ @@ -134,6 +138,11 @@ enum nl80211_commands { NL80211_CMD_MAX = __NL80211_CMD_AFTER_LAST - 1 }; +/* + * Allow user space programs to use #ifdef on new commands by defining them + * here + */ +#define NL80211_CMD_SET_BSS NL80211_CMD_SET_BSS /** * enum nl80211_attrs - nl80211 netlink attributes @@ -192,6 +201,12 @@ enum nl80211_commands { * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of * &enum nl80211_mntr_flags. * + * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1) + * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled + * (u8, 0 or 1) + * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled + * (u8, 0 or 1) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -235,6 +250,10 @@ enum nl80211_attrs { NL80211_ATTR_MPATH_NEXT_HOP, NL80211_ATTR_MPATH_INFO, + NL80211_ATTR_BSS_CTS_PROT, + NL80211_ATTR_BSS_SHORT_PREAMBLE, + NL80211_ATTR_BSS_SHORT_SLOT_TIME, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e00750836ba5..7afef14d5c5b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -268,6 +268,23 @@ struct mpath_info { u8 flags; }; +/** + * struct bss_parameters - BSS parameters + * + * Used to change BSS parameters (mainly for AP mode). + * + * @use_cts_prot: Whether to use CTS protection + * (0 = no, 1 = yes, -1 = do not change) + * @use_short_preamble: Whether the use of short preambles is allowed + * (0 = no, 1 = yes, -1 = do not change) + * @use_short_slot_time: Whether the use of short slot time is allowed + * (0 = no, 1 = yes, -1 = do not change) + */ +struct bss_parameters { + int use_cts_prot; + int use_short_preamble; + int use_short_slot_time; +}; /* from net/wireless.h */ struct wiphy; @@ -318,6 +335,8 @@ struct wiphy; * @change_station: Modify a given station. * * @set_mesh_cfg: set mesh parameters (by now, just mesh id) + * + * @change_bss: Modify parameters for a given BSS. */ struct cfg80211_ops { int (*add_virtual_intf)(struct wiphy *wiphy, char *name, @@ -370,6 +389,9 @@ struct cfg80211_ops { int (*dump_mpath)(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo); + + int (*change_bss)(struct wiphy *wiphy, struct net_device *dev, + struct bss_parameters *params); }; #endif /* __NET_CFG80211_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0fdc3dabc964..7c399a9c11da 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -158,12 +158,14 @@ struct ieee80211_low_level_stats { * also implies a change in the AID. * @BSS_CHANGED_ERP_CTS_PROT: CTS protection changed * @BSS_CHANGED_ERP_PREAMBLE: preamble changed + * @BSS_CHANGED_ERP_SLOT: slot timing changed * @BSS_CHANGED_HT: 802.11n parameters changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, BSS_CHANGED_ERP_CTS_PROT = 1<<1, BSS_CHANGED_ERP_PREAMBLE = 1<<2, + BSS_CHANGED_ERP_SLOT = 1<<3, BSS_CHANGED_HT = 1<<4, }; @@ -177,6 +179,7 @@ enum ieee80211_bss_change { * @aid: association ID number, valid only when @assoc is true * @use_cts_prot: use CTS protection * @use_short_preamble: use 802.11b short preamble + * @use_short_slot: use short slot time (only relevant for ERP) * @dtim_period: num of beacons before the next DTIM, for PSM * @timestamp: beacon timestamp * @beacon_int: beacon interval @@ -192,6 +195,7 @@ struct ieee80211_bss_conf { /* erp related data */ bool use_cts_prot; bool use_short_preamble; + bool use_short_slot; u8 dtim_period; u16 beacon_int; u16 assoc_capability; @@ -420,6 +424,11 @@ struct ieee80211_rx_status { * @IEEE80211_CONF_PS: Enable 802.11 power save mode */ enum ieee80211_conf_flags { + /* + * TODO: IEEE80211_CONF_SHORT_SLOT_TIME will be removed once drivers + * have been converted to use bss_info_changed() for slot time + * configuration + */ IEEE80211_CONF_SHORT_SLOT_TIME = (1<<0), IEEE80211_CONF_RADIOTAP = (1<<1), IEEE80211_CONF_SUPPORT_HT_MODE = (1<<2), -- cgit v1.2.3 From 18d7c65ba6628d2759bd650e7039d6307f2c99e9 Mon Sep 17 00:00:00 2001 From: Sujith Date: Thu, 14 Aug 2008 13:27:41 +0530 Subject: mac80211: Add an 802.11n definition This patch adds a HT Capability (DSSS/CCK Mode in 40MHz BSS) definition to ieee80211.h Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7f4df7c7659d..be456450cd2e 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -714,6 +714,7 @@ struct ieee80211_ht_addt_info { #define IEEE80211_HT_CAP_SGI_40 0x0040 #define IEEE80211_HT_CAP_DELAY_BA 0x0400 #define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 +#define IEEE80211_HT_CAP_DSSSCCK40 0x1000 /* 802.11n HT capability AMPDU settings */ #define IEEE80211_HT_CAP_AMPDU_FACTOR 0x03 #define IEEE80211_HT_CAP_AMPDU_DENSITY 0x1C -- cgit v1.2.3 From 095f695cbb07281682462da0618fffabb499d0be Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 19 Aug 2008 12:50:31 -0500 Subject: ssb: Update for Rev. 5 SPROM Although a revision 5 SPROM has not been seen in the wild, the open-source portion of the MIPS driver 4.150.10.5 describes its layout, which is mostly inherited from revision 4. This patch implements the differences. Signed-off-by: Larry Finger Acked-by: Michael Buesch Signed-off-by: John W. Linville --- include/linux/ssb/ssb_regs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index ebad0bac9801..271bb4b6446e 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -316,6 +316,21 @@ #define SSB_SPROM4_PA1B1 0x1090 #define SSB_SPROM4_PA1B2 0x1092 +/* SPROM Revision 5 (inherits most data from rev 4) */ +#define SSB_SPROM5_BFLLO 0x104A /* Boardflags (low 16 bits) */ +#define SSB_SPROM5_BFLHI 0x104C /* Board Flags Hi */ +#define SSB_SPROM5_IL0MAC 0x1052 /* 6 byte MAC address for a/b/g/n */ +#define SSB_SPROM5_CCODE 0x1044 /* Country Code (2 bytes) */ +#define SSB_SPROM5_GPIOA 0x1076 /* Gen. Purpose IO # 0 and 1 */ +#define SSB_SPROM5_GPIOA_P0 0x00FF /* Pin 0 */ +#define SSB_SPROM5_GPIOA_P1 0xFF00 /* Pin 1 */ +#define SSB_SPROM5_GPIOA_P1_SHIFT 8 +#define SSB_SPROM5_GPIOB 0x1078 /* Gen. Purpose IO # 2 and 3 */ +#define SSB_SPROM5_GPIOB_P2 0x00FF /* Pin 2 */ +#define SSB_SPROM5_GPIOB_P3 0xFF00 /* Pin 3 */ +#define SSB_SPROM5_GPIOB_P3_SHIFT 8 + + /* Values for SSB_SPROM1_BINF_CCODE */ enum { SSB_SPROM1CCODE_WORLD = 0, -- cgit v1.2.3 From 31ce12fb3ebf88b054deb99ad729e84888bf6125 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 20 Aug 2008 17:45:06 -0500 Subject: ssb: Clean up extraction of MAC addresses from SPROM Only rev 1 and 2 ssb SPROMs have fields named et0mac and et1mac; however, all of the extraction routines extract pseudo data for these fields from regions that are all 1's resulting in a hardware address of FF:FF:FF:FF:FF:FF. This patch forces such a fill at the beginning of the data extraction process, and only does the formal extraction if the SPROM rev is 1 or 2. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- include/linux/ssb/ssb_regs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index 271bb4b6446e..99a0f991e850 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -245,8 +245,6 @@ /* SPROM Revision 3 (inherits most data from rev 2) */ #define SSB_SPROM3_IL0MAC 0x104A /* 6 bytes MAC address for 802.11b/g */ -#define SSB_SPROM3_ET0MAC 0x1050 /* 6 bytes MAC address for Ethernet ?? */ -#define SSB_SPROM3_ET1MAC 0x1050 /* 6 bytes MAC address for 802.11a ?? */ #define SSB_SPROM3_OFDMAPO 0x102C /* A-PHY OFDM Mid Power Offset (4 bytes, BigEndian) */ #define SSB_SPROM3_OFDMALPO 0x1030 /* A-PHY OFDM Low Power Offset (4 bytes, BigEndian) */ #define SSB_SPROM3_OFDMAHPO 0x1034 /* A-PHY OFDM High Power Offset (4 bytes, BigEndian) */ @@ -267,8 +265,6 @@ /* SPROM Revision 4 */ #define SSB_SPROM4_IL0MAC 0x104C /* 6 byte MAC address for a/b/g/n */ -#define SSB_SPROM4_ET0MAC 0x1018 /* 6 bytes MAC address for Ethernet ?? */ -#define SSB_SPROM4_ET1MAC 0x1018 /* 6 bytes MAC address for 802.11a ?? */ #define SSB_SPROM4_ETHPHY 0x105A /* Ethernet PHY settings ?? */ #define SSB_SPROM4_ETHPHY_ET0A 0x001F /* MII Address for enet0 */ #define SSB_SPROM4_ETHPHY_ET1A 0x03E0 /* MII Address for enet1 */ -- cgit v1.2.3 From 36aedc903ea11a4188de0a118d26c9f20afdd272 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 25 Aug 2008 11:58:58 +0300 Subject: mac80211/cfg80211: HT capabilities for NEW_STA Allow userspace (e.g., hostapd) to set HT capabilities for associated STAs. This is based on a patch from Zhu Yi (only the NL80211_ATTR_HT_CAPABILITY for NEW_STA part is included here). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 12 ++++++++++++ include/net/cfg80211.h | 1 + 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 447c02a5190e..0c1147de3ec7 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -207,6 +207,9 @@ enum nl80211_commands { * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled * (u8, 0 or 1) * + * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -254,16 +257,25 @@ enum nl80211_attrs { NL80211_ATTR_BSS_SHORT_PREAMBLE, NL80211_ATTR_BSS_SHORT_SLOT_TIME, + NL80211_ATTR_HT_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1 }; +/* + * Allow user space programs to use #ifdef on new attributes by defining them + * here + */ +#define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY + #define NL80211_MAX_SUPP_RATES 32 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 +#define NL80211_HT_CAPABILITY_LEN 26 /** * enum nl80211_iftype - (virtual) interface types diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7afef14d5c5b..0a72d1e3d3ab 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -152,6 +152,7 @@ struct station_parameters { u16 aid; u8 supported_rates_len; u8 plink_action; + struct ieee80211_ht_cap *ht_capa; }; /** -- cgit v1.2.3 From 2c10b32bf57db7ec6d4cca4c4aa3d86bacb01c8a Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 2 Sep 2008 17:30:27 -0700 Subject: netlink: Remove compat API for nested attributes Removes all _nested_compat() functions from the API. The prio qdisc no longer requires them and netem has its own format anyway. Their existance is only confusing. Resend: Also remove the wrapper macro. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/netlink.h | 82 --------------------------------------------------- 1 file changed, 82 deletions(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 18024b8cecb8..76c43ff38f64 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -119,9 +119,6 @@ * Nested Attributes Construction: * nla_nest_start(skb, type) start a nested attribute * nla_nest_end(skb, nla) finalize a nested attribute - * nla_nest_compat_start(skb, type, start a nested compat attribute - * len, data) - * nla_nest_compat_end(skb, type) finalize a nested compat attribute * nla_nest_cancel(skb, nla) cancel nested attribute construction * * Attribute Length Calculations: @@ -156,7 +153,6 @@ * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attribuets - * nla_parse_nested_compat() parse nested compat attributes * nla_for_each_attr() loop over all attributes * nla_for_each_nested() loop over the nested attributes *========================================================================= @@ -751,39 +747,6 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); } -/** - * nla_parse_nested_compat - parse nested compat attributes - * @tb: destination array with maxtype+1 elements - * @maxtype: maximum attribute type to be expected - * @nla: attribute containing the nested attributes - * @data: pointer to point to contained structure - * @len: length of contained structure - * @policy: validation policy - * - * Parse a nested compat attribute. The compat attribute contains a structure - * and optionally a set of nested attributes. On success the data pointer - * points to the nested data and tb contains the parsed attributes - * (see nla_parse). - */ -static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype, - struct nlattr *nla, - const struct nla_policy *policy, - int len) -{ - int nested_len = nla_len(nla) - NLA_ALIGN(len); - - if (nested_len < 0) - return -EINVAL; - if (nested_len >= nla_attr_size(0)) - return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), - nested_len, policy); - memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); - return 0; -} - -#define nla_parse_nested_compat(tb, maxtype, nla, policy, data, len) \ -({ data = nla_len(nla) >= len ? nla_data(nla) : NULL; \ - __nla_parse_nested_compat(tb, maxtype, nla, policy, len); }) /** * nla_put_u8 - Add a u8 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to @@ -1030,51 +993,6 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) return skb->len; } -/** - * nla_nest_compat_start - Start a new level of nested compat attributes - * @skb: socket buffer to add attributes to - * @attrtype: attribute type of container - * @attrlen: length of structure - * @data: pointer to structure - * - * Start a nested compat attribute that contains both a structure and - * a set of nested attributes. - * - * Returns the container attribute - */ -static inline struct nlattr *nla_nest_compat_start(struct sk_buff *skb, - int attrtype, int attrlen, - const void *data) -{ - struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb); - - if (nla_put(skb, attrtype, attrlen, data) < 0) - return NULL; - if (nla_nest_start(skb, attrtype) == NULL) { - nlmsg_trim(skb, start); - return NULL; - } - return start; -} - -/** - * nla_nest_compat_end - Finalize nesting of compat attributes - * @skb: socket buffer the attributes are stored in - * @start: container attribute - * - * Corrects the container attribute header to include the all - * appeneded attributes. - * - * Returns the total data length of the skb. - */ -static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start) -{ - struct nlattr *nest = (void *)start + NLMSG_ALIGN(start->nla_len); - - start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start; - return nla_nest_end(skb, nest); -} - /** * nla_nest_cancel - Cancel nesting of attributes * @skb: socket buffer the message is stored in -- cgit v1.2.3 From b4eec206370b7154dc354dc30f0a3f02ea8468b2 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Implement lookup table for feature-negotiation information A lookup table for feature-negotiation information, extracted from RFC 4340/42, is provided by this patch. All currently known features can be found in this table, along with their feature location, their default value, and type. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6080449fbec9..3978aff197d9 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -176,19 +176,20 @@ enum { }; /* DCCP features (RFC 4340 section 6.4) */ -enum { +enum dccp_feature_numbers { DCCPF_RESERVED = 0, DCCPF_CCID = 1, - DCCPF_SHORT_SEQNOS = 2, /* XXX: not yet implemented */ + DCCPF_SHORT_SEQNOS = 2, DCCPF_SEQUENCE_WINDOW = 3, - DCCPF_ECN_INCAPABLE = 4, /* XXX: not yet implemented */ + DCCPF_ECN_INCAPABLE = 4, DCCPF_ACK_RATIO = 5, DCCPF_SEND_ACK_VECTOR = 6, DCCPF_SEND_NDP_COUNT = 7, DCCPF_MIN_CSUM_COVER = 8, - DCCPF_DATA_CHECKSUM = 9, /* XXX: not yet implemented */ + DCCPF_DATA_CHECKSUM = 9, /* 10-127 reserved */ DCCPF_MIN_CCID_SPECIFIC = 128, + DCCPF_SEND_LEV_RATE = 192, /* RFC 4342, sec. 8.4 */ DCCPF_MAX_CCID_SPECIFIC = 255, }; -- cgit v1.2.3 From 828755cee087e4a34f45d6c9db661ccd0631cc6d Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Per-socket initialisation of feature negotiation This provides feature-negotiation initialisation for both DCCP sockets and DCCP request_sockets, to support feature negotiation during connection setup. It also resolves a FIXME regarding the congestion control initialisation. Thanks to Wei Yongjun for help with the IPv6 side of this patch. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 3978aff197d9..484b8a1fb023 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -412,6 +412,7 @@ extern void dccp_minisock_init(struct dccp_minisock *dmsk); * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1) * @dreq_isr: initial sequence number received on the Request * @dreq_service: service code present on the Request (there is just one) + * @dreq_featneg: feature negotiation options for this connection * The following two fields are analogous to the ones in dccp_sock: * @dreq_timestamp_echo: last received timestamp to echo (13.1) * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo @@ -421,6 +422,7 @@ struct dccp_request_sock { __u64 dreq_iss; __u64 dreq_isr; __be32 dreq_service; + struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; }; @@ -498,6 +500,7 @@ struct dccp_ackvec; * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) * @dccps_minisock - associated minisock (accessed via dccp_msk) + * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) @@ -535,6 +538,7 @@ struct dccp_sock { __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; + struct list_head dccps_featneg; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; -- cgit v1.2.3 From 71bb49596bbf4e5a3328e1704d18604e822ba181 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Query supported CCIDs This provides a data structure to record which CCIDs are locally supported and three accessor functions: - a test function for internal use which is used to validate CCID requests made by the user; - a copy function so that the list can be used for feature-negotiation; - documented getsockopt() support so that the user can query capabilities. The data structure is a table which is filled in at compile-time with the list of available CCIDs (which in turn depends on the Kconfig choices). Using the copy function for cloning the list of supported CCIDs is useful for feature negotiation, since the negotiation is now with the full list of available CCIDs (e.g. {2, 3}) instead of the default value {2}. This means negotiation will not fail if the peer requests to use CCID3 instead of CCID2. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 484b8a1fb023..d3ac1bde60b4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -209,6 +209,7 @@ struct dccp_so_feat { #define DCCP_SOCKOPT_SERVER_TIMEWAIT 6 #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 +#define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 -- cgit v1.2.3 From 668144f7b41716a9efe1b398e15ead32a26cd101 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Deprecate old setsockopt framework The previous setsockopt interface, which passed socket options via struct dccp_so_feat, is complicated/difficult to use. Continuing to support it leads to ugly code since the old approach did not distinguish between NN and SP values. This patch removes the old setsockopt interface and replaces it with two new functions to register NN/SP values for feature negotiation. These are essentially wrappers around the internal __feat_register functions, with checking added to avoid * wrong usage (type); * changing values while the connection is in progress. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index d3ac1bde60b4..6eaaca9b037a 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -193,13 +193,6 @@ enum dccp_feature_numbers { DCCPF_MAX_CCID_SPECIFIC = 255, }; -/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */ -struct dccp_so_feat { - __u8 dccpsf_feat; - __u8 __user *dccpsf_val; - __u8 dccpsf_len; -}; - /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */ #define DCCP_SOCKOPT_SERVICE 2 -- cgit v1.2.3 From 20f41eee82864e308a5499308a1722dc3181cc3a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Feature negotiation for minimum-checksum-coverage This provides feature negotiation for server minimum checksum coverage which so far has been missing. Since sender/receiver coverage values range only from 0...15, their type has also been reduced in size from u16 to u4. Feature-negotiation options are now generated for both sender and receiver coverage, i.e. when the peer has `forgotten' to enable partial coverage then feature negotiation will automatically enable (negotiate) the partial coverage value for this connection. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6eaaca9b037a..5a5a89935dbc 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -527,8 +527,8 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; - __u16 dccps_pcslen; - __u16 dccps_pcrlen; + __u8 dccps_pcslen:4; + __u8 dccps_pcrlen:4; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; -- cgit v1.2.3 From 17c30b40ed79e9f3955e884632c8f01e577b204a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Deprecate Ack Ratio sysctl This patch deprecates the Ack Ratio sysctl, since * Ack Ratio is entirely ignored by CCID-3 and CCID-4, * Ack Ratio currently doesn't work in CCID-2 (i.e. is always set to 1); * even if it would work in CCID-2, there is no point for a user to change it: - Ack Ratio is constrained by cwnd (RFC 4341, 6.1.2), - if Ack Ratio > cwnd, the system resorts to spurious RTO timeouts (since waiting for Acks which will never arrive in this window), - cwnd is not a user-configurable value. The only reasonable place for Ack Ratio is to print it for debugging. It is planned to do this later on, as part of e.g. dccp_probe. With this patch Ack Ratio is now under full control of feature negotiation: * Ack Ratio is resolved as a dependency of the selected CCID; * if the chosen CCID supports it (i.e. CCID == CCID-2), Ack Ratio is set to the default of 2, following RFC 4340, 11.3 - "New connections start with Ack Ratio 2 for both endpoints"; * what happens then is part of another patch set, since it concerns the dynamic update of Ack Ratio while the connection is in full flight. Thanks to Tomasz Grobelny for discussion leading up to this patch. Signed-off-by: Gerrit Renker Acked-by: Arnaldo Carvalho de Melo --- include/linux/dccp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 5a5a89935dbc..eda389ce04f4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -368,7 +368,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * @dccpms_ccid - Congestion Control Id (CCID) (section 10) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) - * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ @@ -378,7 +377,6 @@ struct dccp_minisock { __u8 dccpms_tx_ccid; __u8 dccpms_send_ack_vector; __u8 dccpms_send_ndp_count; - __u8 dccpms_ack_ratio; struct list_head dccpms_pending; struct list_head dccpms_conf; }; -- cgit v1.2.3 From fade756f18d42694e3acb00e3471ab43002cba16 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Set per-connection CCIDs via socket options With this patch, TX/RX CCIDs can now be changed on a per-connection basis, which overrides the defaults set by the global sysctl variables for TX/RX CCIDs. To make full use of this facility, the remaining patches of this patch set are needed, which track dependencies and activate negotiated feature values. Note on the maximum number of CCIDs that can be registered: ----------------------------------------------------------- The maximum number of CCIDs that can be registered on the socket is constrained by the space in a Confirm/Change feature negotiation option. The space in these in turn depends on the size of header options as defined in RFC 4340, 5.8. Since this is a recurring constant, it has been moved from ackvec.h into linux/dccp.h, clarifying its purpose. Relative to this size, the maximum number of CCID identifiers that can be present in a Confirm option (which always consumes 1 byte more than a Change option, cf. 6.1) is 2 bytes less than the maximum TLV size: one for the CCID-feature-type and one for the selected value. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index eda389ce04f4..6a72ff52a8a4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -168,6 +168,8 @@ enum { DCCPO_MIN_CCID_SPECIFIC = 128, DCCPO_MAX_CCID_SPECIFIC = 255, }; +/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ +#define DCCP_SINGLE_OPT_MAXLEN 253 /* DCCP CCIDS */ enum { @@ -203,6 +205,9 @@ enum dccp_feature_numbers { #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 #define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 +#define DCCP_SOCKOPT_CCID 13 +#define DCCP_SOCKOPT_TX_CCID 14 +#define DCCP_SOCKOPT_RX_CCID 15 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 -- cgit v1.2.3 From 78673e24df27c76ec75565f4024d45c2c74ef148 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Remove obsolete parts of the old CCID interface The TX/RX CCIDs of the minisock are now redundant: similar to the Ack Vector case, their value equals initially that of the sysctl, but at the end of feature negotiation may be something different. The old interface removed by this patch thus has been replaced by the newer interface to dynamically query the currently loaded CCIDs earlier in this patch set. Also removed the constructors for the TX CCID and the RX CCID, since the switch rx/non-rx is done by the handler in minisocks.c (and the handler is the only place in the code where CCIDs are loaded). Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6a72ff52a8a4..46daea312d92 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -370,7 +370,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_ccid - Congestion Control Id (CCID) (section 10) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) * @dccpms_pending - List of features being negotiated @@ -378,8 +377,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) */ struct dccp_minisock { __u64 dccpms_sequence_window; - __u8 dccpms_rx_ccid; - __u8 dccpms_tx_ccid; __u8 dccpms_send_ack_vector; __u8 dccpms_send_ndp_count; struct list_head dccpms_pending; -- cgit v1.2.3 From 68e074bfcef269bc61006c2740d7f89ccbbd93d7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Remove manual influence on NDP Count feature Updating the NDP count feature is handled automatically now: * for CCID-2 it is disabled, since the code does not use NDP counts; * for CCID-3 it is enabled, as NDP counts are used to determine loss lengths. Allowing the user to change NDP values leads to unpredictable and failing behaviour, since it is then possible to disable NDP counts even when they are needed (e.g. in CCID-3). This means that only those user settings are sensible that agree with the values for Send NDP Count implied by the choice of CCID. But those settings are already activated by the feature negotiation (CCID dependency tracking), hence this form of support is redundant. At startup the initialisation of the NDP count feature is with the default value of 0, which is done implicitly by the zeroing-out of the socket when it is allocated. If the choice of CCID or feature negotiation enables NDP count, this will then be updated via the NDP activation handler. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 46daea312d92..60e94438eadd 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -371,14 +371,12 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) - * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; __u8 dccpms_send_ack_vector; - __u8 dccpms_send_ndp_count; struct list_head dccpms_pending; struct list_head dccpms_conf; }; @@ -490,6 +488,7 @@ struct dccp_ackvec; * @dccps_r_ack_ratio - feature-remote Ack Ratio * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) + * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) @@ -529,6 +528,7 @@ struct dccp_sock { __u16 dccps_r_ack_ratio; __u8 dccps_pcslen:4; __u8 dccps_pcrlen:4; + __u8 dccps_send_ndp_count:1; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; -- cgit v1.2.3 From b235dc4abbc1356284bd0dc730efa711f394e0e2 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Phase out the use of boolean Ack Vector sysctl This removes the use of the sysctl and the minisock variable for the Send Ack Vector feature, which is now handled fully dynamically via feature negotiation; i.e. when CCID2 is enabled, Ack Vectors are automatically enabled (as per RFC 4341, 4.). Using a sysctl in parallel to this implementation would open the door to crashes, since much of the code relies on tests of the boolean minisock / sysctl variable. Thus, this patch replaces all tests of type if (dccp_msk(sk)->dccpms_send_ack_vector) /* ... */ with if (dp->dccps_hc_rx_ackvec != NULL) /* ... */ The dccps_hc_rx_ackvec is allocated by the dccp_hdlr_ackvec() when feature negotiation concluded that Ack Vectors are to be used on the half-connection. Otherwise, it is NULL (due to dccp_init_sock/dccp_create_openreq_child), so that the test is a valid one. The activation handler for Ack Vectors is called as soon as the feature negotiation has concluded at the * server when the Ack marking the transition RESPOND => OPEN arrives; * client after it has sent its ACK, marking the transition REQUEST => PARTOPEN. Adding the sequence number of the Response packet to the Ack Vector has been removed, since (a) connection establishment implies that the Response has been received; (b) the CCIDs only look at packets received in the (PART)OPEN state, i.e. this entry will always be ignored; (c) it can not be used for anything useful - to detect loss for instance, only packets received after the loss can serve as pseudo-dupacks. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 60e94438eadd..61734e27abb7 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -360,7 +360,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 #define DCCPF_INITIAL_ACK_RATIO 2 #define DCCPF_INITIAL_CCID DCCPC_CCID2 -#define DCCPF_INITIAL_SEND_ACK_VECTOR 1 /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 @@ -370,13 +369,11 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; - __u8 dccpms_send_ack_vector; struct list_head dccpms_pending; struct list_head dccpms_conf; }; -- cgit v1.2.3 From 5d3dac267a7fd0811ec777e76a81f97f5cdcb395 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Initialisation framework for feature negotiation This initialises feature negotiation from two tables, which are initialised from sysctls. As a novel feature, specifics of the implementation (e.g. currently short seqnos and ECN are not supported) are advertised for robustness. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 61734e27abb7..990e97fa1f07 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -369,28 +369,9 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_pending - List of features being negotiated - * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; - struct list_head dccpms_pending; - struct list_head dccpms_conf; -}; - -struct dccp_opt_conf { - __u8 *dccpoc_val; - __u8 dccpoc_len; -}; - -struct dccp_opt_pend { - struct list_head dccpop_node; - __u8 dccpop_type; - __u8 dccpop_feat; - __u8 *dccpop_val; - __u8 dccpop_len; - int dccpop_conf; - struct dccp_opt_conf *dccpop_sc; }; extern void dccp_minisock_init(struct dccp_minisock *dmsk); -- cgit v1.2.3 From 51c7d4fa2675c106a980ddcdbe308b54b5151945 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Implement both feature-local and feature-remote Sequence Window feature This adds full support for local/remote Sequence Window feature, from which the * sequence-number-validity (W) and * acknowledgment-number-validity (W') windows derive as specified in RFC 4340, 7.5.3. Specifically, the following changes are introduced: * integrated new socket fields into dccp_sk; * updated the update_gsr/gss routines with regard to these fields; * updated handler code: the Sequence Window feature is located at the TX side, so the local feature is meant if the handler-rx flag is false; * the initialisation of `rcv_wnd' in reqsk is removed, since - rcv_wnd is not used by the code anywhere; - sequence number checks are not done in the LISTEN state (cf. 7.5.3); - dccp_check_req checks the Ack number validity more rigorously; * the `struct dccp_minisock' became empty and is now removed. Until the handshake completes with activating negotiated values, the local/remote Sequence-Window values are undefined and thus can not reliably be estimated. This issue is addressed in a separate patch. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 990e97fa1f07..7a0502ab383a 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -363,19 +363,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 -/** - * struct dccp_minisock - Minimal DCCP connection representation - * - * Will be used to pass the state from dccp_request_sock to dccp_sock. - * - * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - */ -struct dccp_minisock { - __u64 dccpms_sequence_window; -}; - -extern void dccp_minisock_init(struct dccp_minisock *dmsk); - /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from @@ -464,13 +451,14 @@ struct dccp_ackvec; * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo * @dccps_l_ack_ratio - feature-local Ack Ratio * @dccps_r_ack_ratio - feature-remote Ack Ratio + * @dccps_l_seq_win - local Sequence Window (influences ack number validity) + * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) - * @dccps_minisock - associated minisock (accessed via dccp_msk) * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) @@ -504,12 +492,13 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; + __u64 dccps_l_seq_win:48; + __u64 dccps_r_seq_win:48; __u8 dccps_pcslen:4; __u8 dccps_pcrlen:4; __u8 dccps_send_ndp_count:1; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; - struct dccp_minisock dccps_minisock; struct list_head dccps_featneg; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; @@ -527,11 +516,6 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk) return (struct dccp_sock *)sk; } -static inline struct dccp_minisock *dccp_msk(const struct sock *sk) -{ - return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock; -} - static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { -- cgit v1.2.3 From 0a4822679d94e2b0117aeead06a19fad59533905 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Initialisation and type-checking of feature sysctls This patch takes care of initialising and type-checking sysctls related to feature negotiation. Type checking is important since some of the sysctls now directly act on the feature-negotiation process. The sysctls are initialised with the known default values for each feature. For the type-checking the value constraints from RFC 4340 are used: * Sequence Window uses the specified Wmin=32, the maximum is ulong (4 bytes), tested and confirmed that it works up to 4294967295 - for Gbps speed; * Ack Ratio is between 0 .. 0xffff (2-byte unsigned integer); * CCIDs are between 0 .. 255; * request_retries, retries1, retries2 also between 0..255 for good measure; * tx_qlen is checked to be non-negative; * sync_ratelimit remains as before. Further changes: ---------------- Performed s@sysctl_dccp_feat@sysctl_dccp@g since the sysctls are now in feat.c. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7a0502ab383a..7434a8353e23 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -355,14 +355,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) return __dccp_hdr_len(dccp_hdr(skb)); } - -/* initial values for each feature */ -#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 -#define DCCPF_INITIAL_ACK_RATIO 2 -#define DCCPF_INITIAL_CCID DCCPC_CCID2 -/* FIXME: for now we're default to 1 but it should really be 0 */ -#define DCCPF_INITIAL_SEND_NDP_COUNT 1 - /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from -- cgit v1.2.3 From f10ecaee6dc2c6d56783462b2a82e98bc81b55f4 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Replace magic CCID-specific numbers by symbolic constants The constants DCCPO_{MIN,MAX}_CCID_SPECIFIC are nowhere used in the code, but instead for the CCID-specific options numbers are used. This patch unifies the use of CCID-specific option numbers, by adding symbolic names reflecting the definitions in RFC 4340, 10.3. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7434a8353e23..7187bd8a75f6 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -165,8 +165,10 @@ enum { DCCPO_TIMESTAMP_ECHO = 42, DCCPO_ELAPSED_TIME = 43, DCCPO_MAX = 45, - DCCPO_MIN_CCID_SPECIFIC = 128, - DCCPO_MAX_CCID_SPECIFIC = 255, + DCCPO_MIN_RX_CCID_SPECIFIC = 128, /* from sender to receiver */ + DCCPO_MAX_RX_CCID_SPECIFIC = 191, + DCCPO_MIN_TX_CCID_SPECIFIC = 192, /* from receiver to sender */ + DCCPO_MAX_TX_CCID_SPECIFIC = 255, }; /* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ #define DCCP_SINGLE_OPT_MAXLEN 253 -- cgit v1.2.3 From c2f42077bd06f300ae959204f3c007f820f5e769 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Schedule Sync as out-of-band mechanism The problem with Ack Vectors is that i) their length is variable and can in principle grow quite large, ii) it is hard to predict exactly how large they will be. Due to the second point it seems not a good idea to reduce the MPS; in particular when on average there is enough room for the Ack Vector and an increase in length is momentarily due to some burst loss, after which the Ack Vector returns to its normal/average length. The solution taken by this patch is to subtract a minimum-expected Ack Vector length from the MPS (previous patch), and to defer any larger Ack Vectors onto a separate Sync - but only if indeed there is no space left on the skb. This patch provides the infrastructure to schedule Sync-packets for transporting (urgent) out-of-band data. Its signalling is quicker than scheduling an Ack, since it does not need to wait for new application data. It can thus serve other parts of the DCCP code as well. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7187bd8a75f6..83197b601a4f 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -462,6 +462,7 @@ struct dccp_ackvec; * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) + * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" * @dccps_xmit_timer - timer for when CCID is not ready to send * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ @@ -502,6 +503,7 @@ struct dccp_sock { __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; + __u8 dccps_sync_scheduled:1; struct timer_list dccps_xmit_timer; }; -- cgit v1.2.3 From e7937772d7a2b0127cc4cbc67bc594e139fdaf63 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Extend CCID packet dequeueing interface This extends the packet dequeuing interface of dccp_write_xmit() to allow 1. CCIDs to take care of timing when the next packet may be sent; 2. delayed sending (as before, with an inter-packet gap up to 65.535 seconds). The main purpose is to take CCID2 out of its polling mode (when it is network- limited, it tries every millisecond to send, without interruption). The interface can also be used to support other CCIDs. The mode of operation for (2) is as follows: * new packet is enqueued via dccp_sendmsg() => dccp_write_xmit(), * ccid_hc_tx_send_packet() detects that it may not send (e.g. window full), * it signals this condition via `CCID_PACKET_WILL_DEQUEUE_LATER', * dccp_write_xmit() returns without further action; * after some time the wait-condition for CCID becomes true, * that CCID schedules the tasklet, * tasklet function calls ccid_hc_tx_send_packet() via dccp_write_xmit(), * since the wait-condition is now true, ccid_hc_tx_packet() returns "send now", * packet is sent, and possibly more (since dccp_write_xmit() loops). Code reuse: the taskled function calls dccp_write_xmit(), the timer function reduces to a wrapper around the same code. If the tasklet finds that the socket is locked, it re-schedules the tasklet function (not the tasklet) after one jiffy. Changed DCCP_BUG to dccp_pr_debug when transmit_skb returns an error (e.g. when a local qdisc is used, NET_XMIT_DROP=1 can be returned for many packets). Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 83197b601a4f..eed52bcd35d0 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -463,7 +463,8 @@ struct dccp_ackvec; * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" - * @dccps_xmit_timer - timer for when CCID is not ready to send + * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets + * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ struct dccp_sock { @@ -504,6 +505,7 @@ struct dccp_sock { __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; __u8 dccps_sync_scheduled:1; + struct tasklet_struct dccps_xmitlet; struct timer_list dccps_xmit_timer; }; -- cgit v1.2.3 From 6224877b2ca4be5de96270a8ae490fe2ba11b0e0 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: tcp/dccp: Consolidate common code for RFC 3390 conversion This patch consolidates the code common to TCP and CCID-2: * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341). Signed-off-by: Gerrit Renker --- include/net/tcp.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8983386356a5..6bc4b8148ca0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -782,6 +782,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) +/* + * Convert RFC3390 larger initial windows into an equivalent number of packets. + * + * John Heffner states: + * + * The RFC specifies a window of no more than 4380 bytes + * unless 2*MSS > 4380. Reading the pseudocode in the RFC + * is a bit misleading because they use a clamp at 4380 bytes + * rather than a multiplier in the relevant range. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 bytes) +{ + return bytes <= 1095 ? 4 : (bytes > 1460 ? 2 : 3); +} + extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); -- cgit v1.2.3 From d6da3511d6b558d0b017777b61dc08b8fbc06ea4 Mon Sep 17 00:00:00 2001 From: Tomasz Grobelny Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Policy-based packet dequeueing infrastructure This patch adds a generic infrastructure for policy-based dequeueing of TX packets and provides two policies: * a simple FIFO policy (which is the default) and * a priority based policy (set via socket options). Both policies honour the tx_qlen sysctl for the maximum size of the write queue (can be overridden via socket options). The priority policy uses skb->priority internally to assign an u32 priority identifier, using the same ranking as SO_PRIORITY. The skb->priority field is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary data using cmsg(3), the patch also provides the requisite parsing routines. Signed-off-by: Tomasz Grobelny Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index eed52bcd35d0..010e2d87ed75 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -197,6 +197,21 @@ enum dccp_feature_numbers { DCCPF_MAX_CCID_SPECIFIC = 255, }; +/* DCCP socket control message types for cmsg */ +enum dccp_cmsg_type { + DCCP_SCM_PRIORITY = 1, + DCCP_SCM_QPOLICY_MAX = 0xFFFF, + /* ^-- Up to here reserved exclusively for qpolicy parameters */ + DCCP_SCM_MAX +}; + +/* DCCP priorities for outgoing/queued packets */ +enum dccp_packet_dequeueing_policy { + DCCPQ_POLICY_SIMPLE, + DCCPQ_POLICY_PRIO, + DCCPQ_POLICY_MAX +}; + /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */ #define DCCP_SOCKOPT_SERVICE 2 @@ -210,6 +225,8 @@ enum dccp_feature_numbers { #define DCCP_SOCKOPT_CCID 13 #define DCCP_SOCKOPT_TX_CCID 14 #define DCCP_SOCKOPT_RX_CCID 15 +#define DCCP_SOCKOPT_QPOLICY_ID 16 +#define DCCP_SOCKOPT_QPOLICY_TXQLEN 17 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 @@ -458,6 +475,8 @@ struct dccp_ackvec; * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) * @dccps_options_received - parsed set of retrieved options + * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy + * @dccps_tx_qlen - maximum length of the TX queue * @dccps_role - role of this sock, one of %dccp_role * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending @@ -500,6 +519,8 @@ struct dccp_sock { struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; + __u8 dccps_qpolicy; + __u32 dccps_tx_qlen; enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; -- cgit v1.2.3 From e7ade46a53055c19a01c8becbe7807f9075d6fee Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:33 +0200 Subject: IPVS: Change IPVS data structures to support IPv6 addresses Introduce new 'af' fields into IPVS data structures for specifying an entry's address family. Convert IP addresses to be of type union nf_inet_addr. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a25ad243031d..d32825585500 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -21,6 +21,9 @@ #include #include +#include /* for union nf_inet_addr */ +#include /* for struct ipv6hdr */ +#include /* for ipv6_addr_copy */ #ifdef CONFIG_IP_VS_DEBUG #include @@ -259,9 +262,10 @@ struct ip_vs_conn { struct list_head c_list; /* hashed list heads */ /* Protocol, addresses and port numbers */ - __be32 caddr; /* client address */ - __be32 vaddr; /* virtual address */ - __be32 daddr; /* destination address */ + u16 af; /* address family */ + union nf_inet_addr caddr; /* client address */ + union nf_inet_addr vaddr; /* virtual address */ + union nf_inet_addr daddr; /* destination address */ __be16 cport; __be16 vport; __be16 dport; @@ -314,8 +318,9 @@ struct ip_vs_service { atomic_t refcnt; /* reference counter */ atomic_t usecnt; /* use counter */ + u16 af; /* address family */ __u16 protocol; /* which protocol (TCP/UDP) */ - __be32 addr; /* IP address for virtual service */ + union nf_inet_addr addr; /* IP address for virtual service */ __be16 port; /* port number for the service */ __u32 fwmark; /* firewall mark of the service */ unsigned flags; /* service status flags */ @@ -342,7 +347,8 @@ struct ip_vs_dest { struct list_head n_list; /* for the dests in the service */ struct list_head d_list; /* for table with all the dests */ - __be32 addr; /* IP address of the server */ + u16 af; /* address family */ + union nf_inet_addr addr; /* IP address of the server */ __be16 port; /* port number of the server */ volatile unsigned flags; /* dest status flags */ atomic_t conn_flags; /* flags to copy to conn */ @@ -366,7 +372,7 @@ struct ip_vs_dest { /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ __u16 protocol; /* which protocol (TCP/UDP) */ - __be32 vaddr; /* virtual IP address */ + union nf_inet_addr vaddr; /* virtual IP address */ __be16 vport; /* virtual port number */ __u32 vfwmark; /* firewall mark of service */ }; -- cgit v1.2.3 From 64aae3cb9fd22f33e491c4730d363eb2229ef910 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:34 +0200 Subject: IPVS: Add general v4/v6 helper functions / data structures Add a struct ip_vs_iphdr for easier handling of common v4 and v6 header fields in the same code path. ip_vs_fill_iphdr() helps to fill this struct from an IPv4 or IPv6 header. Add further helper functions for copying and comparing addresses. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d32825585500..5d6313d972fc 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -25,6 +25,55 @@ #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ +struct ip_vs_iphdr { + int len; + __u8 protocol; + union nf_inet_addr saddr; + union nf_inet_addr daddr; +}; + +static inline void +ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + const struct ipv6hdr *iph = nh; + iphdr->len = sizeof(struct ipv6hdr); + iphdr->protocol = iph->nexthdr; + ipv6_addr_copy(&iphdr->saddr.in6, &iph->saddr); + ipv6_addr_copy(&iphdr->daddr.in6, &iph->daddr); + } else +#endif + { + const struct iphdr *iph = nh; + iphdr->len = iph->ihl * 4; + iphdr->protocol = iph->protocol; + iphdr->saddr.ip = iph->saddr; + iphdr->daddr.ip = iph->daddr; + } +} + +static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst, + const union nf_inet_addr *src) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + ipv6_addr_copy(&dst->in6, &src->in6); + else +#endif + dst->ip = src->ip; +} + +static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a, + const union nf_inet_addr *b) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + return ipv6_addr_equal(&a->in6, &b->in6); +#endif + return a->ip == b->ip; +} + #ifdef CONFIG_IP_VS_DEBUG #include -- cgit v1.2.3 From c842a3ada9ba8f0cca38a70de3fe0effcfab254c Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:35 +0200 Subject: IPVS: Add debug macros for v4 and v6 address output Add some debugging macros that allow conditional output of either v4 or v6 addresses, depending on an 'af' parameter. This is done by creating a temporary string buffer in an outer debug macro and writing addresses' string representations into it from another macro which can only be used when inside the outer one. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 5d6313d972fc..0400e590b6a2 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -78,6 +78,46 @@ static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a, #include extern int ip_vs_get_debug_level(void); + +static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, + const union nf_inet_addr *addr, + int *idx) +{ + int len; +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + len = snprintf(&buf[*idx], buf_len - *idx, "[" NIP6_FMT "]", + NIP6(addr->in6)) + 1; + else +#endif + len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT, + NIPQUAD(addr->ip)) + 1; + + *idx += len; + BUG_ON(*idx > buf_len + 1); + return &buf[*idx - len]; +} + +#define IP_VS_DBG_BUF(level, msg...) \ + do { \ + char ip_vs_dbg_buf[160]; \ + int ip_vs_dbg_idx = 0; \ + if (level <= ip_vs_get_debug_level()) \ + printk(KERN_DEBUG "IPVS: " msg); \ + } while (0) +#define IP_VS_ERR_BUF(msg...) \ + do { \ + char ip_vs_dbg_buf[160]; \ + int ip_vs_dbg_idx = 0; \ + printk(KERN_ERR "IPVS: " msg); \ + } while (0) + +/* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */ +#define IP_VS_DBG_ADDR(af, addr) \ + ip_vs_dbg_addr(af, ip_vs_dbg_buf, \ + sizeof(ip_vs_dbg_buf), addr, \ + &ip_vs_dbg_idx) + #define IP_VS_DBG(level, msg...) \ do { \ if (level <= ip_vs_get_debug_level()) \ @@ -100,6 +140,8 @@ extern int ip_vs_get_debug_level(void); pp->debug_packet(pp, skb, ofs, msg); \ } while (0) #else /* NO DEBUGGING at ALL */ +#define IP_VS_DBG_BUF(level, msg...) do {} while (0) +#define IP_VS_ERR_BUF(msg...) do {} while (0) #define IP_VS_DBG(level, msg...) do {} while (0) #define IP_VS_DBG_RL(msg...) do {} while (0) #define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0) -- cgit v1.2.3 From c860c6b1479992440e4962e9c95d258bfdce4fca Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:36 +0200 Subject: IPVS: Add internal versions of sockopt interface structs Add extended internal versions of struct ip_vs_service_user and struct ip_vs_dest_user (the originals can't be modified as they are part of the old sockopt interface). Adjust ip_vs_ctl.c to work with the new data structures and add some minor AF-awareness. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 0400e590b6a2..1adf8a026e46 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -399,6 +399,45 @@ struct ip_vs_conn { }; +/* + * Extended internal versions of struct ip_vs_service_user and + * ip_vs_dest_user for IPv6 support. + * + * We need these to conveniently pass around service and destination + * options, but unfortunately, we also need to keep the old definitions to + * maintain userspace backwards compatibility for the setsockopt interface. + */ +struct ip_vs_service_user_kern { + /* virtual service addresses */ + u16 af; + u16 protocol; + union nf_inet_addr addr; /* virtual ip address */ + u16 port; + u32 fwmark; /* firwall mark of service */ + + /* virtual service options */ + char *sched_name; + unsigned flags; /* virtual service flags */ + unsigned timeout; /* persistent timeout in sec */ + u32 netmask; /* persistent netmask */ +}; + + +struct ip_vs_dest_user_kern { + /* destination server address */ + union nf_inet_addr addr; + u16 port; + + /* real server options */ + unsigned conn_flags; /* connection flags */ + int weight; /* destination weight */ + + /* thresholds for active connections */ + u32 u_threshold; /* upper threshold */ + u32 l_threshold; /* lower threshold */ +}; + + /* * The information about the virtual service offered to the net * and the forwarding entries -- cgit v1.2.3 From 3c2e0505d25cdc9425336f167fd4ff5f505aecff Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:38 +0200 Subject: IPVS: Add v6 support to ip_vs_service_get() Add support for selecting services based on their address family to ip_vs_service_get() and adjust the callers. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1adf8a026e46..30baed0e696d 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -783,7 +783,8 @@ extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; extern struct ip_vs_service * -ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport); +ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport); static inline void ip_vs_service_put(struct ip_vs_service *svc) { -- cgit v1.2.3 From b14198f6c1bea1687d20723db35d8effecd9d899 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:39 +0200 Subject: IPVS: Add IPv6 support flag to schedulers Add 'supports_ipv6' flag to struct ip_vs_scheduler to indicate whether a scheduler supports IPv6. Set the flag to 1 in schedulers that work with IPv6, 0 otherwise. This flag is checked in a later patch while trying to add a service with a specific scheduler. Adjust debug in v6-supporting schedulers to work with both address families. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 30baed0e696d..3d3b699dc022 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -516,6 +516,9 @@ struct ip_vs_scheduler { char *name; /* scheduler name */ atomic_t refcnt; /* reference counter */ struct module *module; /* THIS_MODULE/NULL */ +#ifdef CONFIG_IP_VS_IPV6 + int supports_ipv6; /* scheduler has IPv6 support */ +#endif /* scheduler initializing service */ int (*init_service)(struct ip_vs_service *svc); -- cgit v1.2.3 From 51ef348b14183789e4cb3444d05ce83b1b69d8fb Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:40 +0200 Subject: IPVS: Add 'af' args to protocol handler functions Add 'af' arguments to conn_schedule(), conn_in_get(), conn_out_get() and csum_check() function pointers in struct ip_vs_protocol. Extend the respective functions for TCP, UDP, AH and ESP and adjust the callers. The changes in the callers need to be somewhat extensive, since they now need to pass a filled out struct ip_vs_iphdr * to the modified functions instead of a struct iphdr *. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3d3b699dc022..68f004f9bcc2 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -296,21 +296,23 @@ struct ip_vs_protocol { void (*exit)(struct ip_vs_protocol *pp); - int (*conn_schedule)(struct sk_buff *skb, + int (*conn_schedule)(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp); struct ip_vs_conn * - (*conn_in_get)(const struct sk_buff *skb, + (*conn_in_get)(int af, + const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, + const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); struct ip_vs_conn * - (*conn_out_get)(const struct sk_buff *skb, + (*conn_out_get)(int af, + const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, + const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); @@ -320,7 +322,8 @@ struct ip_vs_protocol { int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp); - int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp); + int (*csum_check)(int af, struct sk_buff *skb, + struct ip_vs_protocol *pp); const char *(*state_name)(int state); -- cgit v1.2.3 From 0bbdd42b7efa66685b6d74701bcde3a596a3a59d Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:42 +0200 Subject: IPVS: Extend protocol DNAT/SNAT and state handlers Extend protocol DNAT/SNAT and state handlers to work with IPv6. Also change/introduce new checksumming helper functions for this. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 68f004f9bcc2..c173f1a7de2c 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -904,6 +904,17 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum) return csum_partial((char *) diff, sizeof(diff), oldsum); } +#ifdef CONFIG_IP_VS_IPV6 +static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new, + __wsum oldsum) +{ + __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0], + new[3], new[2], new[1], new[0] }; + + return csum_partial((char *) diff, sizeof(diff), oldsum); +} +#endif + static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) { __be16 diff[2] = { ~old, new }; -- cgit v1.2.3 From 28364a59f3dfe7fed3560ec7aff9b7aeb02824fb Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:43 +0200 Subject: IPVS: Extend functions for getting/creating connections Extend functions for getting/creating connections and connection templates for IPv6 support and fix the callers. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index c173f1a7de2c..26893499eb6f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -642,11 +642,16 @@ enum { }; extern struct ip_vs_conn *ip_vs_conn_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port); +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port); + extern struct ip_vs_conn *ip_vs_ct_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port); +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port); + extern struct ip_vs_conn *ip_vs_conn_out_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port); +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port); /* put back the conn without restarting its timer */ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) @@ -657,8 +662,9 @@ extern void ip_vs_conn_put(struct ip_vs_conn *cp); extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); extern struct ip_vs_conn * -ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport, - __be32 daddr, __be16 dport, unsigned flags, +ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, + const union nf_inet_addr *vaddr, __be16 vport, + const union nf_inet_addr *daddr, __be16 dport, unsigned flags, struct ip_vs_dest *dest); extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); -- cgit v1.2.3 From b3cdd2a73867d309dca288b8e820c09e3b7f1da1 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:45 +0200 Subject: IPVS: Add and bind IPv6 xmit functions Add xmit functions for IPv6. Also add the already needed __ip_vs_get_out_rt_v6() to ip_vs_core.c. Bind the new xmit functions to v6 connections. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 26893499eb6f..ac709fa5a79b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -855,6 +855,19 @@ extern int ip_vs_icmp_xmit (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset); extern void ip_vs_dst_reset(struct ip_vs_dest *dest); +#ifdef CONFIG_IP_VS_IPV6 +extern int ip_vs_bypass_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); +extern int ip_vs_nat_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); +extern int ip_vs_tunnel_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); +extern int ip_vs_dr_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); +extern int ip_vs_icmp_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, + int offset); +#endif /* * This is a simple mechanism to ignore packets when @@ -899,7 +912,12 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp) } extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, - struct ip_vs_conn *cp, int dir); + struct ip_vs_conn *cp, int dir); + +#ifdef CONFIG_IP_VS_IPV6 +extern void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, int dir); +#endif extern __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset); -- cgit v1.2.3 From 7937df1564783806c285d34a1c6fd63d8da29d7a Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:48 +0200 Subject: IPVS: Convert real server lookup functions Convert functions for looking up destinations (real servers) to support IPv6 services/dests. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ac709fa5a79b..a719c0ef99ec 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -804,14 +804,16 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc) } extern struct ip_vs_dest * -ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport); +ip_vs_lookup_real_service(int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport); + extern int ip_vs_use_count_inc(void); extern void ip_vs_use_count_dec(void); extern int ip_vs_control_init(void); extern void ip_vs_control_cleanup(void); extern struct ip_vs_dest * -ip_vs_find_dest(__be32 daddr, __be16 dport, - __be32 vaddr, __be16 vport, __u16 protocol); +ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport, + const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol); extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); -- cgit v1.2.3 From cfc78c5a09241a3a9561466834996a7fb90c4228 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:53 +0200 Subject: IPVS: Adjust various debug outputs to use new macros Adjust various debug outputs to use the new *_BUF macro variants for correct output of v4/v6 addresses. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 53 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a719c0ef99ec..1b13cef4b547 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -680,24 +680,32 @@ static inline void ip_vs_control_del(struct ip_vs_conn *cp) { struct ip_vs_conn *ctl_cp = cp->control; if (!ctl_cp) { - IP_VS_ERR("request control DEL for uncontrolled: " - "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(cp->vaddr),ntohs(cp->vport)); + IP_VS_ERR_BUF("request control DEL for uncontrolled: " + "%s:%d to %s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport)); + return; } - IP_VS_DBG(7, "DELeting control for: " - "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport)); + IP_VS_DBG_BUF(7, "DELeting control for: " + "cp.dst=%s:%d ctl_cp.dst=%s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr), + ntohs(ctl_cp->cport)); cp->control = NULL; if (atomic_read(&ctl_cp->n_control) == 0) { - IP_VS_ERR("BUG control DEL with n=0 : " - "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(cp->vaddr),ntohs(cp->vport)); + IP_VS_ERR_BUF("BUG control DEL with n=0 : " + "%s:%d to %s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport)); + return; } atomic_dec(&ctl_cp->n_control); @@ -707,17 +715,22 @@ static inline void ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) { if (cp->control) { - IP_VS_ERR("request control ADD for already controlled: " - "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(cp->vaddr),ntohs(cp->vport)); + IP_VS_ERR_BUF("request control ADD for already controlled: " + "%s:%d to %s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport)); + ip_vs_control_del(cp); } - IP_VS_DBG(7, "ADDing control for: " - "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport)); + IP_VS_DBG_BUF(7, "ADDing control for: " + "cp.dst=%s:%d ctl_cp.dst=%s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr), + ntohs(ctl_cp->cport)); cp->control = ctl_cp; atomic_inc(&ctl_cp->n_control); -- cgit v1.2.3 From f7981c1c67b53abb4a7d8a501e68585b9826179a Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 26 Aug 2008 10:23:22 +0200 Subject: mv643xx_eth: require contiguous receive and transmit queue numbering Simplify receive and transmit queue handling by requiring the set of queue numbers to be contiguous starting from zero. Signed-off-by: Lennert Buytenhek --- include/linux/mv643xx_eth.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 12078577aef6..eb78b00edcde 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -49,10 +49,10 @@ struct mv643xx_eth_platform_data { int duplex; /* - * Which RX/TX queues to use. + * How many RX/TX queues to use. */ - int rx_queue_mask; - int tx_queue_mask; + int rx_queue_count; + int tx_queue_count; /* * Override default RX/TX queue sizes if nonzero. -- cgit v1.2.3 From fc0eb9f226d8ecc8e3b563bf808bd6d61a6153a1 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 26 Aug 2008 12:56:56 +0200 Subject: mv643xx_eth: smi sharing is a per-unit property, not a per-port one Which top-level unit's SMI interface to use should be a property of the top-level unit, not of the individual ports. This patch moves the ->shared_smi pointer from the per-port platform data to the global platform data. Signed-off-by: Lennert Buytenhek --- include/linux/mv643xx_eth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index eb78b00edcde..12339eb65704 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -17,6 +17,7 @@ struct mv643xx_eth_shared_platform_data { struct mbus_dram_target_info *dram; + struct platform_device *shared_smi; unsigned int t_clk; }; @@ -30,7 +31,6 @@ struct mv643xx_eth_platform_data { /* * Whether a PHY is present, and if yes, at which address. */ - struct platform_device *shared_smi; int force_phy_addr; int phy_addr; -- cgit v1.2.3 From ac840605f3b1d9b99e1e6629a54994f8e003ff91 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 26 Aug 2008 14:06:47 +0200 Subject: mv643xx_eth: remove force_phy_addr field Currently, there are two different fields in the mv643xx_eth_platform_data struct that together describe the PHY address -- one field (phy_addr) has the address of the PHY, but if that address is zero, a second field (force_phy_addr) needs to be set to distinguish the actual address zero from a zero due to not having filled in the PHY address explicitly (which should mean 'use the default PHY address'). If we are a bit smarter about the encoding of the phy_addr field, we can avoid the need for a second field -- this patch does that. Signed-off-by: Lennert Buytenhek --- include/linux/mv643xx_eth.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 12339eb65704..cbbbe9bfecad 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -21,6 +21,10 @@ struct mv643xx_eth_shared_platform_data { unsigned int t_clk; }; +#define MV643XX_ETH_PHY_ADDR_DEFAULT 0 +#define MV643XX_ETH_PHY_ADDR(x) (0x80 | (x)) +#define MV643XX_ETH_PHY_NONE 0xff + struct mv643xx_eth_platform_data { /* * Pointer back to our parent instance, and our port number. @@ -31,7 +35,6 @@ struct mv643xx_eth_platform_data { /* * Whether a PHY is present, and if yes, at which address. */ - int force_phy_addr; int phy_addr; /* -- cgit v1.2.3 From f59ac0481660e66cec67f1d6b024e78b9dc715fe Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 29 Aug 2008 16:26:43 -0700 Subject: cfg80211: keep track of supported interface modes It is obviously good for userspace to know up front which interface modes a given piece of hardware might support (even if adding such an interface might fail later because of concurrency issues), so let's make cfg80211 aware of that. For good measure, disallow adding interfaces in all other modes so drivers don't forget to announce support for one mode when they add it. Signed-off-by: Johannes Berg Signed-off-by: Stephen Blackheath Signed-off-by: Ivo van Doorn Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/linux/nl80211.h | 6 ++++++ include/net/wireless.h | 3 +++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 0c1147de3ec7..5e51f4e7600b 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -210,6 +210,10 @@ enum nl80211_commands { * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) * + * @NL80211_ATTR_SUPPORTED_IFTYPES: nested attribute containing all + * supported interface types, each a flag attribute with the number + * of the interface mode. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -259,6 +263,8 @@ enum nl80211_attrs { NL80211_ATTR_HT_CAPABILITY, + NL80211_ATTR_SUPPORTED_IFTYPES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/wireless.h b/include/net/wireless.h index 9324f8dd183e..1dc8ec3daa2f 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -185,6 +185,9 @@ struct wiphy { /* permanent MAC address */ u8 perm_addr[ETH_ALEN]; + /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ + u16 interface_modes; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered -- cgit v1.2.3 From 5337407c673e2c7c66a84b9838d55a45a760ecff Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 8 Sep 2008 16:17:42 -0700 Subject: warn: Turn the netdev timeout WARN_ON() into a WARN() this patch turns the netdev timeout WARN_ON_ONCE() into a WARN_ONCE(), so that the device and driver names are inside the warning message. This helps automated tools like kerneloops.org to collect the data and do statistics, as well as making it more likely that humans cut-n-paste the important message as part of a bugreport. Signed-off-by: Arjan van de Ven Signed-off-by: David S. Miller --- include/asm-generic/bug.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index a3f738cffdb6..edc6ba82e090 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -97,6 +97,16 @@ extern void warn_slowpath(const char *file, const int line, unlikely(__ret_warn_once); \ }) +#define WARN_ONCE(condition, format...) ({ \ + static int __warned; \ + int __ret_warn_once = !!(condition); \ + \ + if (unlikely(__ret_warn_once)) \ + if (WARN(!__warned, format)) \ + __warned = 1; \ + unlikely(__ret_warn_once); \ +}) + #define WARN_ON_RATELIMIT(condition, state) \ WARN_ON((condition) && __ratelimit(state)) -- cgit v1.2.3 From 2206a3f5b75be5dadf11541961bd7c924857eb5d Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Mon, 8 Sep 2008 13:38:11 +0200 Subject: ipvs: Restrict connection table size via Kconfig Instead of checking the value in include/net/ip_vs.h, we can just restrict the range in our Kconfig file. This will prevent values outside of the range early. Signed-off-by: Sven Wegener Reviewed-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1b13cef4b547..38f4f690b186 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -621,16 +621,8 @@ extern void ip_vs_init_hash_table(struct list_head *table, int rows); #ifndef CONFIG_IP_VS_TAB_BITS #define CONFIG_IP_VS_TAB_BITS 12 #endif -/* make sure that IP_VS_CONN_TAB_BITS is located in [8, 20] */ -#if CONFIG_IP_VS_TAB_BITS < 8 -#define IP_VS_CONN_TAB_BITS 8 -#endif -#if CONFIG_IP_VS_TAB_BITS > 20 -#define IP_VS_CONN_TAB_BITS 20 -#endif -#if 8 <= CONFIG_IP_VS_TAB_BITS && CONFIG_IP_VS_TAB_BITS <= 20 + #define IP_VS_CONN_TAB_BITS CONFIG_IP_VS_TAB_BITS -#endif #define IP_VS_CONN_TAB_SIZE (1 << IP_VS_CONN_TAB_BITS) #define IP_VS_CONN_TAB_MASK (IP_VS_CONN_TAB_SIZE - 1) -- cgit v1.2.3 From e9c0ce232e7a36daae1ca08282609d7f0c57c567 Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Mon, 8 Sep 2008 13:39:04 +0200 Subject: ipvs: Embed user stats structure into kernel stats structure Instead of duplicating the fields, integrate a user stats structure into the kernel stats structure. This is more robust when the members are changed, because they are now automatically kept in sync. Signed-off-by: Sven Wegener Reviewed-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 38f4f690b186..33e2ac6ceb3e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -254,27 +254,10 @@ struct ip_vs_estimator { struct ip_vs_stats { - __u32 conns; /* connections scheduled */ - __u32 inpkts; /* incoming packets */ - __u32 outpkts; /* outgoing packets */ - __u64 inbytes; /* incoming bytes */ - __u64 outbytes; /* outgoing bytes */ - - __u32 cps; /* current connection rate */ - __u32 inpps; /* current in packet rate */ - __u32 outpps; /* current out packet rate */ - __u32 inbps; /* current in byte rate */ - __u32 outbps; /* current out byte rate */ - - /* - * Don't add anything before the lock, because we use memcpy() to copy - * the members before the lock to struct ip_vs_stats_user in - * ip_vs_ctl.c. - */ + struct ip_vs_stats_user ustats; /* statistics */ + struct ip_vs_estimator est; /* estimator */ spinlock_t lock; /* spin lock */ - - struct ip_vs_estimator est; /* estimator */ }; struct dst_entry; -- cgit v1.2.3 From 410e27a49bb98bc7fa3ff5fc05cc313817b9f253 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 9 Sep 2008 13:27:22 +0200 Subject: This reverts "Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/dccp_exp" as it accentally contained the wrong set of patches. These will be submitted separately. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 122 ++++++++++++++++++++++++++++++--------------------- include/net/tcp.h | 15 ------- 2 files changed, 71 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 010e2d87ed75..6080449fbec9 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -165,13 +165,9 @@ enum { DCCPO_TIMESTAMP_ECHO = 42, DCCPO_ELAPSED_TIME = 43, DCCPO_MAX = 45, - DCCPO_MIN_RX_CCID_SPECIFIC = 128, /* from sender to receiver */ - DCCPO_MAX_RX_CCID_SPECIFIC = 191, - DCCPO_MIN_TX_CCID_SPECIFIC = 192, /* from receiver to sender */ - DCCPO_MAX_TX_CCID_SPECIFIC = 255, + DCCPO_MIN_CCID_SPECIFIC = 128, + DCCPO_MAX_CCID_SPECIFIC = 255, }; -/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ -#define DCCP_SINGLE_OPT_MAXLEN 253 /* DCCP CCIDS */ enum { @@ -180,36 +176,27 @@ enum { }; /* DCCP features (RFC 4340 section 6.4) */ -enum dccp_feature_numbers { +enum { DCCPF_RESERVED = 0, DCCPF_CCID = 1, - DCCPF_SHORT_SEQNOS = 2, + DCCPF_SHORT_SEQNOS = 2, /* XXX: not yet implemented */ DCCPF_SEQUENCE_WINDOW = 3, - DCCPF_ECN_INCAPABLE = 4, + DCCPF_ECN_INCAPABLE = 4, /* XXX: not yet implemented */ DCCPF_ACK_RATIO = 5, DCCPF_SEND_ACK_VECTOR = 6, DCCPF_SEND_NDP_COUNT = 7, DCCPF_MIN_CSUM_COVER = 8, - DCCPF_DATA_CHECKSUM = 9, + DCCPF_DATA_CHECKSUM = 9, /* XXX: not yet implemented */ /* 10-127 reserved */ DCCPF_MIN_CCID_SPECIFIC = 128, - DCCPF_SEND_LEV_RATE = 192, /* RFC 4342, sec. 8.4 */ DCCPF_MAX_CCID_SPECIFIC = 255, }; -/* DCCP socket control message types for cmsg */ -enum dccp_cmsg_type { - DCCP_SCM_PRIORITY = 1, - DCCP_SCM_QPOLICY_MAX = 0xFFFF, - /* ^-- Up to here reserved exclusively for qpolicy parameters */ - DCCP_SCM_MAX -}; - -/* DCCP priorities for outgoing/queued packets */ -enum dccp_packet_dequeueing_policy { - DCCPQ_POLICY_SIMPLE, - DCCPQ_POLICY_PRIO, - DCCPQ_POLICY_MAX +/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */ +struct dccp_so_feat { + __u8 dccpsf_feat; + __u8 __user *dccpsf_val; + __u8 dccpsf_len; }; /* DCCP socket options */ @@ -221,12 +208,6 @@ enum dccp_packet_dequeueing_policy { #define DCCP_SOCKOPT_SERVER_TIMEWAIT 6 #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 -#define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 -#define DCCP_SOCKOPT_CCID 13 -#define DCCP_SOCKOPT_TX_CCID 14 -#define DCCP_SOCKOPT_RX_CCID 15 -#define DCCP_SOCKOPT_QPOLICY_ID 16 -#define DCCP_SOCKOPT_QPOLICY_TXQLEN 17 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 @@ -374,13 +355,62 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) return __dccp_hdr_len(dccp_hdr(skb)); } + +/* initial values for each feature */ +#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 +#define DCCPF_INITIAL_ACK_RATIO 2 +#define DCCPF_INITIAL_CCID DCCPC_CCID2 +#define DCCPF_INITIAL_SEND_ACK_VECTOR 1 +/* FIXME: for now we're default to 1 but it should really be 0 */ +#define DCCPF_INITIAL_SEND_NDP_COUNT 1 + +/** + * struct dccp_minisock - Minimal DCCP connection representation + * + * Will be used to pass the state from dccp_request_sock to dccp_sock. + * + * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) + * @dccpms_ccid - Congestion Control Id (CCID) (section 10) + * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) + * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) + * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3) + * @dccpms_pending - List of features being negotiated + * @dccpms_conf - + */ +struct dccp_minisock { + __u64 dccpms_sequence_window; + __u8 dccpms_rx_ccid; + __u8 dccpms_tx_ccid; + __u8 dccpms_send_ack_vector; + __u8 dccpms_send_ndp_count; + __u8 dccpms_ack_ratio; + struct list_head dccpms_pending; + struct list_head dccpms_conf; +}; + +struct dccp_opt_conf { + __u8 *dccpoc_val; + __u8 dccpoc_len; +}; + +struct dccp_opt_pend { + struct list_head dccpop_node; + __u8 dccpop_type; + __u8 dccpop_feat; + __u8 *dccpop_val; + __u8 dccpop_len; + int dccpop_conf; + struct dccp_opt_conf *dccpop_sc; +}; + +extern void dccp_minisock_init(struct dccp_minisock *dmsk); + /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1) * @dreq_isr: initial sequence number received on the Request * @dreq_service: service code present on the Request (there is just one) - * @dreq_featneg: feature negotiation options for this connection * The following two fields are analogous to the ones in dccp_sock: * @dreq_timestamp_echo: last received timestamp to echo (13.1) * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo @@ -390,7 +420,6 @@ struct dccp_request_sock { __u64 dreq_iss; __u64 dreq_isr; __be32 dreq_service; - struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; }; @@ -462,28 +491,21 @@ struct dccp_ackvec; * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo * @dccps_l_ack_ratio - feature-local Ack Ratio * @dccps_r_ack_ratio - feature-remote Ack Ratio - * @dccps_l_seq_win - local Sequence Window (influences ack number validity) - * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) - * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) - * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) + * @dccps_minisock - associated minisock (accessed via dccp_msk) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) * @dccps_options_received - parsed set of retrieved options - * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy - * @dccps_tx_qlen - maximum length of the TX queue * @dccps_role - role of this sock, one of %dccp_role * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) - * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" - * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets - * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) + * @dccps_xmit_timer - timer for when CCID is not ready to send * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ struct dccp_sock { @@ -507,26 +529,19 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; - __u64 dccps_l_seq_win:48; - __u64 dccps_r_seq_win:48; - __u8 dccps_pcslen:4; - __u8 dccps_pcrlen:4; - __u8 dccps_send_ndp_count:1; + __u16 dccps_pcslen; + __u16 dccps_pcrlen; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; - struct list_head dccps_featneg; + struct dccp_minisock dccps_minisock; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; - __u8 dccps_qpolicy; - __u32 dccps_tx_qlen; enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; - __u8 dccps_sync_scheduled:1; - struct tasklet_struct dccps_xmitlet; struct timer_list dccps_xmit_timer; }; @@ -535,6 +550,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk) return (struct dccp_sock *)sk; } +static inline struct dccp_minisock *dccp_msk(const struct sock *sk) +{ + return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock; +} + static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 6bc4b8148ca0..8983386356a5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -782,21 +782,6 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) -/* - * Convert RFC3390 larger initial windows into an equivalent number of packets. - * - * John Heffner states: - * - * The RFC specifies a window of no more than 4380 bytes - * unless 2*MSS > 4380. Reading the pseudocode in the RFC - * is a bit misleading because they use a clamp at 4380 bytes - * rather than a multiplier in the relevant range. - */ -static inline u32 rfc3390_bytes_to_packets(const u32 bytes) -{ - return bytes <= 1095 ? 4 : (bytes > 1460 ? 2 : 3); -} - extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); -- cgit v1.2.3 From abb81c4f3cb9b8d421f1e5474811ef1d461d341c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 9 Sep 2008 19:58:29 -0700 Subject: ipsec: Use RCU-like construct for saved state within a walk Now that we save states within a walk we need synchronisation so that the list the saved state is on doesn't disappear from under us. As it stands this is done by keeping the state on the list which is bad because it gets in the way of the management of the state life-cycle. An alternative is to make our own pseudo-RCU system where we use counters to indicate which state can't be freed immediately as it may be referenced by an ongoing walk when that resumes. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2933d7474a79..4bb94992b5fa 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -120,9 +120,11 @@ extern struct mutex xfrm_cfg_mutex; /* Full description of state of transformer. */ struct xfrm_state { - /* Note: bydst is re-used during gc */ struct list_head all; - struct hlist_node bydst; + union { + struct list_head gclist; + struct hlist_node bydst; + }; struct hlist_node bysrc; struct hlist_node byspi; @@ -1286,16 +1288,9 @@ static inline void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) walk->count = 0; } -static inline void xfrm_state_walk_done(struct xfrm_state_walk *walk) -{ - if (walk->state != NULL) { - xfrm_state_put(walk->state); - walk->state = NULL; - } -} - extern int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); +extern void xfrm_state_walk_done(struct xfrm_state_walk *walk); extern struct xfrm_state *xfrm_state_alloc(void); extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, -- cgit v1.2.3 From 271bff7afbb2cbaa81e744006ad2fff1f3e10b1b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 11 Sep 2008 04:48:58 -0700 Subject: net: Add DMA mapping tokens to skb_shared_info. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 909923717830..4b2be23903c4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -146,8 +146,14 @@ struct skb_shared_info { unsigned short gso_segs; unsigned short gso_type; __be32 ip6_frag_id; +#ifdef CONFIG_HAS_DMA + unsigned int num_dma_maps; +#endif struct sk_buff *frag_list; skb_frag_t frags[MAX_SKB_FRAGS]; +#ifdef CONFIG_HAS_DMA + dma_addr_t dma_maps[MAX_SKB_FRAGS + 1]; +#endif }; /* We divide dataref into two halves. The higher 16 bits hold references -- cgit v1.2.3 From a40c24a13366e324bc0ff8c3bb107db89312c984 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 11 Sep 2008 04:51:14 -0700 Subject: net: Add SKB DMA mapping helper functions. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4b2be23903c4..aa80ad9cbc88 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -359,6 +359,14 @@ struct sk_buff { #include +#ifdef CONFIG_HAS_DMA +#include +extern int skb_dma_map(struct device *dev, struct sk_buff *skb, + enum dma_data_direction dir); +extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb, + enum dma_data_direction dir); +#endif + extern void kfree_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, -- cgit v1.2.3 From 00c5ae2fa0f8191a1b204e71f0ee11359e3b2c06 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 3 Sep 2008 11:26:42 +0800 Subject: mac80211: change MIMO_PS to SM_PS This patch follows 11n spec naming more rigorously replacing MIMO_PS with SM_PS (Spatial Multiplexing Power Save). (Originally submitted as 4 patches, "mac80211: change MIMO_PS to SM_PS", "iwlwifi: change MIMO_PS to SM_PS", "ath9k: change MIMO_PS to SM_PS", and "iwlwifi: remove double definition of SM PS". -- JWL) Signed-off-by: Ron Rindjunsky Signed-off-by: Tomas Winkler Signed-off-by: Zhu Yi Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index be456450cd2e..333d3ae76883 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -708,7 +708,7 @@ struct ieee80211_ht_addt_info { /* 802.11n HT capabilities masks */ #define IEEE80211_HT_CAP_SUP_WIDTH 0x0002 -#define IEEE80211_HT_CAP_MIMO_PS 0x000C +#define IEEE80211_HT_CAP_SM_PS 0x000C #define IEEE80211_HT_CAP_GRN_FLD 0x0010 #define IEEE80211_HT_CAP_SGI_20 0x0020 #define IEEE80211_HT_CAP_SGI_40 0x0040 @@ -737,11 +737,11 @@ struct ieee80211_ht_addt_info { #define IEEE80211_HT_IE_NON_GF_STA_PRSNT 0x0004 #define IEEE80211_HT_IE_NON_HT_STA_PRSNT 0x0010 -/* MIMO Power Save Modes */ -#define WLAN_HT_CAP_MIMO_PS_STATIC 0 -#define WLAN_HT_CAP_MIMO_PS_DYNAMIC 1 -#define WLAN_HT_CAP_MIMO_PS_INVALID 2 -#define WLAN_HT_CAP_MIMO_PS_DISABLED 3 +/* Spatial Multiplexing Power Save Modes */ +#define WLAN_HT_CAP_SM_PS_STATIC 0 +#define WLAN_HT_CAP_SM_PS_DYNAMIC 1 +#define WLAN_HT_CAP_SM_PS_INVALID 2 +#define WLAN_HT_CAP_SM_PS_DISABLED 3 /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 -- cgit v1.2.3 From fe3fa827314b877486c515a001c3e6f604f6f16f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Sep 2008 11:05:09 +0200 Subject: mac80211: make conf_tx non-atomic The conf_tx callback currently needs to be atomic, this requirement is just because it can be called from scanning. This rearranges it slightly to only update while not scanning (which is fine, we'll be getting beacons when associated) and thus removes the atomic requirement. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7c399a9c11da..fb9e62211c34 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1142,7 +1142,7 @@ enum ieee80211_ampdu_mlme_action { * of assocaited station or AP. * * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max), - * bursting) for a hardware TX queue. Must be atomic. + * bursting) for a hardware TX queue. * * @get_tx_stats: Get statistics of the current TX queue status. This is used * to get number of currently queued packets (queue length), maximum queue -- cgit v1.2.3 From 44d414dbff9d5bf46fc09f2e68567b5848cbbfd3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Sep 2008 17:44:28 +0200 Subject: mac80211: move some HT code out of mlme.c Some of the HT code in mlme.c is misplaced: * constants/definitions belong to the ieee80211.h header * code being used in other modes as well shouldn't be there Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 333d3ae76883..abc1abc63bf0 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -643,6 +643,9 @@ struct ieee80211_mgmt { } u; } __attribute__ ((packed)); +/* mgmt header + 1 byte category code */ +#define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) + /* Control frames */ struct ieee80211_rts { @@ -737,6 +740,21 @@ struct ieee80211_ht_addt_info { #define IEEE80211_HT_IE_NON_GF_STA_PRSNT 0x0004 #define IEEE80211_HT_IE_NON_HT_STA_PRSNT 0x0010 +/* block-ack parameters */ +#define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 +#define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C +#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0 +#define IEEE80211_DELBA_PARAM_TID_MASK 0xF000 +#define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800 + +/* + * A-PMDU buffer sizes + * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2) + */ +#define IEEE80211_MIN_AMPDU_BUF 0x8 +#define IEEE80211_MAX_AMPDU_BUF 0x40 + + /* Spatial Multiplexing Power Save Modes */ #define WLAN_HT_CAP_SM_PS_STATIC 0 #define WLAN_HT_CAP_SM_PS_DYNAMIC 1 -- cgit v1.2.3 From 92651940ab00dbe64722e908f70d816713d677b7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Sep 2008 16:29:34 -0700 Subject: pkt_sched: Add multiqueue scheduler support This patch is intended to add a qdisc to support the new tx multiqueue architecture by providing a band for each hardware queue. By doing this it is possible to support a different qdisc per physical hardware queue. This qdisc uses the skb->queue_mapping to select which band to place the traffic onto. It then uses a round robin w/ a check to see if the subqueue is stopped to determine which band to dequeue the packet from. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index e5de421ac7b4..5d921fa91a5b 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -123,6 +123,13 @@ struct tc_prio_qopt __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ }; +/* MULTIQ section */ + +struct tc_multiq_qopt { + __u16 bands; /* Number of bands */ + __u16 max_bands; /* Maximum number of queues */ +}; + /* TBF section */ struct tc_tbf_qopt -- cgit v1.2.3 From ca9b0e27e072be4cef2f5f0cbc0b0fd94eae3520 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Sep 2008 16:30:20 -0700 Subject: pkt_action: add new action skbedit This new action will have the ability to change the priority and/or queue_mapping fields on an sk_buff. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/tc_act/Kbuild | 1 + include/linux/tc_act/tc_skbedit.h | 44 +++++++++++++++++++++++++++++++++++++++ include/net/tc_act/tc_skbedit.h | 34 ++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+) create mode 100644 include/linux/tc_act/tc_skbedit.h create mode 100644 include/net/tc_act/tc_skbedit.h (limited to 'include') diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild index 6dac0d7365cc..76990937f4c9 100644 --- a/include/linux/tc_act/Kbuild +++ b/include/linux/tc_act/Kbuild @@ -3,3 +3,4 @@ header-y += tc_ipt.h header-y += tc_mirred.h header-y += tc_pedit.h header-y += tc_nat.h +header-y += tc_skbedit.h diff --git a/include/linux/tc_act/tc_skbedit.h b/include/linux/tc_act/tc_skbedit.h new file mode 100644 index 000000000000..a14e461a7af7 --- /dev/null +++ b/include/linux/tc_act/tc_skbedit.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck + */ + +#ifndef __LINUX_TC_SKBEDIT_H +#define __LINUX_TC_SKBEDIT_H + +#include + +#define TCA_ACT_SKBEDIT 11 + +#define SKBEDIT_F_PRIORITY 0x1 +#define SKBEDIT_F_QUEUE_MAPPING 0x2 + +struct tc_skbedit { + tc_gen; +}; + +enum { + TCA_SKBEDIT_UNSPEC, + TCA_SKBEDIT_TM, + TCA_SKBEDIT_PARMS, + TCA_SKBEDIT_PRIORITY, + TCA_SKBEDIT_QUEUE_MAPPING, + __TCA_SKBEDIT_MAX +}; +#define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) + +#endif diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h new file mode 100644 index 000000000000..6abb3ed3ebf7 --- /dev/null +++ b/include/net/tc_act/tc_skbedit.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck + */ + +#ifndef __NET_TC_SKBEDIT_H +#define __NET_TC_SKBEDIT_H + +#include + +struct tcf_skbedit { + struct tcf_common common; + u32 flags; + u32 priority; + u16 queue_mapping; +}; +#define to_skbedit(pc) \ + container_of(pc, struct tcf_skbedit, common) + +#endif /* __NET_TC_SKBEDIT_H */ -- cgit v1.2.3 From b2e1b30290539b344cbaff0d9da38012e03aa347 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 9 Sep 2008 23:19:48 -0700 Subject: cfg80211: Add new wireless regulatory infrastructure This adds the new wireless regulatory infrastructure. The main motiviation behind this was to centralize regulatory code as each driver was implementing their own regulatory solution, and to replace the initial centralized code we have where: * only 3 regulatory domains are supported: US, JP and EU * regulatory domains can only be changed through module parameter * all rules were built statically in the kernel We now have support for regulatory domains for many countries and regulatory domains are now queried through a userspace agent through udev allowing distributions to update regulatory rules without updating the kernel. Each driver can regulatory_hint() a regulatory domain based on either their EEPROM mapped regulatory domain value to a respective ISO/IEC 3166-1 country code or pass an internally built regulatory domain. We also add support to let the user set the regulatory domain through userspace in case of faulty EEPROMs to further help compliance. Support for world roaming will be added soon for cards capable of this. For more information see: http://wireless.kernel.org/en/developers/Regulatory/CRDA For now we leave an option to enable the old module parameter, ieee80211_regdom, and to build the 3 old regdomains statically (US, JP and EU). This option is CONFIG_WIRELESS_OLD_REGULATORY. These old static definitions and the module parameter is being scheduled for removal for 2.6.29. Note that if you use this you won't make use of a world regulatory domain as its pointless. If you leave this option enabled and if CRDA is present and you use US or JP we will try to ask CRDA to update us a regulatory domain for us. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/linux/nl80211.h | 96 +++++++++++++++++++++++++++++++++++++++++++++++-- include/net/cfg80211.h | 60 +++++++++++++++++++++++++++++++ include/net/mac80211.h | 2 ++ include/net/wireless.h | 58 ++++++++++++++++++++++++++++++ 4 files changed, 214 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 5e51f4e7600b..9bad65400fba 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -92,6 +92,20 @@ * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by * %NL80211_ATTR_IFINDEX. * + * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command + * after being queried by the kernel. CRDA replies by sending a regulatory + * domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our + * current alpha2 if it found a match. It also provides + * NL80211_ATTR_REG_RULE_FLAGS, and a set of regulatory rules. Each + * regulatory rule is a nested set of attributes given by + * %NL80211_ATTR_REG_RULE_FREQ_[START|END] and + * %NL80211_ATTR_FREQ_RANGE_MAX_BW with an attached power rule given by + * %NL80211_ATTR_REG_RULE_POWER_MAX_ANT_GAIN and + * %NL80211_ATTR_REG_RULE_POWER_MAX_EIRP. + * @NL80211_CMD_REQ_SET_REG: ask the wireless core to set the regulatory domain + * to the the specified ISO/IEC 3166-1 alpha2 country code. The core will + * store this as a valid request and then query userspace for it. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -131,7 +145,10 @@ enum nl80211_commands { NL80211_CMD_SET_BSS, - /* add commands here */ + NL80211_CMD_SET_REG, + NL80211_CMD_REQ_SET_REG, + + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ __NL80211_CMD_AFTER_LAST, @@ -197,10 +214,21 @@ enum nl80211_commands { * info given for %NL80211_CMD_GET_MPATH, nested attribute described at * &enum nl80211_mpath_info. * - * * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of * &enum nl80211_mntr_flags. * + * @NL80211_ATTR_REG_ALPHA2: an ISO-3166-alpha2 country code for which the + * current regulatory domain should be set to or is already set to. + * For example, 'CR', for Costa Rica. This attribute is used by the kernel + * to query the CRDA to retrieve one regulatory domain. This attribute can + * also be used by userspace to query the kernel for the currently set + * regulatory domain. We chose an alpha2 as that is also used by the + * IEEE-802.11d country information element to identify a country. + * Users can also simply ask the wireless core to set regulatory domain + * to a specific alpha2. + * @NL80211_ATTR_REG_RULES: a nested array of regulatory domain regulatory + * rules. + * * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1) * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled * (u8, 0 or 1) @@ -265,6 +293,9 @@ enum nl80211_attrs { NL80211_ATTR_SUPPORTED_IFTYPES, + NL80211_ATTR_REG_ALPHA2, + NL80211_ATTR_REG_RULES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -278,6 +309,7 @@ enum nl80211_attrs { #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY #define NL80211_MAX_SUPP_RATES 32 +#define NL80211_MAX_SUPP_REG_RULES 32 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 @@ -472,6 +504,66 @@ enum nl80211_bitrate_attr { NL80211_BITRATE_ATTR_MAX = __NL80211_BITRATE_ATTR_AFTER_LAST - 1 }; +/** + * enum nl80211_reg_rule_attr - regulatory rule attributes + * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional + * considerations for a given frequency range. These are the + * &enum nl80211_reg_rule_flags. + * @NL80211_ATTR_FREQ_RANGE_START: starting frequencry for the regulatory + * rule in KHz. This is not a center of frequency but an actual regulatory + * band edge. + * @NL80211_ATTR_FREQ_RANGE_END: ending frequency for the regulatory rule + * in KHz. This is not a center a frequency but an actual regulatory + * band edge. + * @NL80211_ATTR_FREQ_RANGE_MAX_BW: maximum allowed bandwidth for this + * frequency range, in KHz. + * @NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN: the maximum allowed antenna gain + * for a given frequency range. The value is in mBi (100 * dBi). + * If you don't have one then don't send this. + * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for + * a given frequency range. The value is in mBm (100 * dBm). + */ +enum nl80211_reg_rule_attr { + __NL80211_REG_RULE_ATTR_INVALID, + NL80211_ATTR_REG_RULE_FLAGS, + + NL80211_ATTR_FREQ_RANGE_START, + NL80211_ATTR_FREQ_RANGE_END, + NL80211_ATTR_FREQ_RANGE_MAX_BW, + + NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN, + NL80211_ATTR_POWER_RULE_MAX_EIRP, + + /* keep last */ + __NL80211_REG_RULE_ATTR_AFTER_LAST, + NL80211_REG_RULE_ATTR_MAX = __NL80211_REG_RULE_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_reg_rule_flags - regulatory rule flags + * + * @NL80211_RRF_NO_OFDM: OFDM modulation not allowed + * @NL80211_RRF_NO_CCK: CCK modulation not allowed + * @NL80211_RRF_NO_INDOOR: indoor operation not allowed + * @NL80211_RRF_NO_OUTDOOR: outdoor operation not allowed + * @NL80211_RRF_DFS: DFS support is required to be used + * @NL80211_RRF_PTP_ONLY: this is only for Point To Point links + * @NL80211_RRF_PTMP_ONLY: this is only for Point To Multi Point links + * @NL80211_RRF_PASSIVE_SCAN: passive scan is required + * @NL80211_RRF_NO_IBSS: no IBSS is allowed + */ +enum nl80211_reg_rule_flags { + NL80211_RRF_NO_OFDM = 1<<0, + NL80211_RRF_NO_CCK = 1<<1, + NL80211_RRF_NO_INDOOR = 1<<2, + NL80211_RRF_NO_OUTDOOR = 1<<3, + NL80211_RRF_DFS = 1<<4, + NL80211_RRF_PTP_ONLY = 1<<5, + NL80211_RRF_PTMP_ONLY = 1<<6, + NL80211_RRF_PASSIVE_SCAN = 1<<7, + NL80211_RRF_NO_IBSS = 1<<8, +}; + /** * enum nl80211_mntr_flags - monitor configuration flags * diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0a72d1e3d3ab..9f40c4d417d7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -287,6 +287,66 @@ struct bss_parameters { int use_short_slot_time; }; +/** + * enum reg_set_by - Indicates who is trying to set the regulatory domain + * @REGDOM_SET_BY_INIT: regulatory domain was set by initialization. We will be + * using a static world regulatory domain by default. + * @REGDOM_SET_BY_CORE: Core queried CRDA for a dynamic world regulatory domain. + * @REGDOM_SET_BY_USER: User asked the wireless core to set the + * regulatory domain. + * @REGDOM_SET_BY_DRIVER: a wireless drivers has hinted to the wireless core + * it thinks its knows the regulatory domain we should be in. + * @REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an 802.11 country + * information element with regulatory information it thinks we + * should consider. + */ +enum reg_set_by { + REGDOM_SET_BY_INIT, + REGDOM_SET_BY_CORE, + REGDOM_SET_BY_USER, + REGDOM_SET_BY_DRIVER, + REGDOM_SET_BY_COUNTRY_IE, +}; + +struct ieee80211_freq_range { + u32 start_freq_khz; + u32 end_freq_khz; + u32 max_bandwidth_khz; +}; + +struct ieee80211_power_rule { + u32 max_antenna_gain; + u32 max_eirp; +}; + +struct ieee80211_reg_rule { + struct ieee80211_freq_range freq_range; + struct ieee80211_power_rule power_rule; + u32 flags; +}; + +struct ieee80211_regdomain { + u32 n_reg_rules; + char alpha2[2]; + struct ieee80211_reg_rule reg_rules[]; +}; + +#define MHZ_TO_KHZ(freq) (freq * 1000) +#define KHZ_TO_MHZ(freq) (freq / 1000) +#define DBI_TO_MBI(gain) (gain * 100) +#define MBI_TO_DBI(gain) (gain / 100) +#define DBM_TO_MBM(gain) (gain * 100) +#define MBM_TO_DBM(gain) (gain / 100) + +#define REG_RULE(start, end, bw, gain, eirp, reg_flags) { \ + .freq_range.start_freq_khz = (start) * 1000, \ + .freq_range.end_freq_khz = (end) * 1000, \ + .freq_range.max_bandwidth_khz = (bw) * 1000, \ + .power_rule.max_antenna_gain = (gain) * 100, \ + .power_rule.max_eirp = (eirp) * 100, \ + .flags = reg_flags, \ + } + /* from net/wireless.h */ struct wiphy; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fb9e62211c34..f504e3eca7d3 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -833,6 +833,8 @@ struct ieee80211_hw { s8 max_signal; }; +struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy); + /** * SET_IEEE80211_DEV - set device for 802.11 hardware * diff --git a/include/net/wireless.h b/include/net/wireless.h index 1dc8ec3daa2f..e4378cc6bf8e 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -60,6 +60,7 @@ enum ieee80211_channel_flags { * with cfg80211. * * @center_freq: center frequency in MHz + * @max_bandwidth: maximum allowed bandwidth for this channel, in MHz * @hw_value: hardware-specific value for the channel * @flags: channel flags from &enum ieee80211_channel_flags. * @orig_flags: channel flags at registration time, used by regulatory @@ -73,6 +74,7 @@ enum ieee80211_channel_flags { struct ieee80211_channel { enum ieee80211_band band; u16 center_freq; + u8 max_bandwidth; u16 hw_value; u32 flags; int max_antenna_gain; @@ -178,6 +180,7 @@ struct ieee80211_supported_band { * struct wiphy - wireless hardware description * @idx: the wiphy index assigned to this item * @class_dev: the class device representing /sys/class/ieee80211/ + * @reg_notifier: the driver's regulatory notification callback */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -197,6 +200,9 @@ struct wiphy { struct ieee80211_supported_band *bands[IEEE80211_NUM_BANDS]; + /* Lets us get back the wiphy on the callback */ + int (*reg_notifier)(struct wiphy *wiphy, enum reg_set_by setby); + /* fields below are read-only, assigned by cfg80211 */ /* the item in /sys/class/ieee80211/ points to this, @@ -322,6 +328,58 @@ extern int ieee80211_frequency_to_channel(int freq); */ extern struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy, int freq); +/** + * __regulatory_hint - hint to the wireless core a regulatory domain + * @wiphy: if a driver is providing the hint this is the driver's very + * own &struct wiphy + * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain + * should be in. If @rd is set this should be NULL + * @rd: a complete regulatory domain, if passed the caller need not worry + * about freeing it + * + * The Wireless subsystem can use this function to hint to the wireless core + * what it believes should be the current regulatory domain by + * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory + * domain should be in or by providing a completely build regulatory domain. + * + * Returns -EALREADY if *a regulatory domain* has already been set. Note that + * this could be by another driver. It is safe for drivers to continue if + * -EALREADY is returned, if drivers are not capable of world roaming they + * should not register more channels than they support. Right now we only + * support listening to the first driver hint. If the driver is capable + * of world roaming but wants to respect its own EEPROM mappings for + * specific regulatory domains it should register the @reg_notifier callback + * on the &struct wiphy. Returns 0 if the hint went through fine or through an + * intersection operation. Otherwise a standard error code is returned. + * + */ +extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, + const char *alpha2, struct ieee80211_regdomain *rd); +/** + * regulatory_hint - driver hint to the wireless core a regulatory domain + * @wiphy: the driver's very own &struct wiphy + * @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain + * should be in. If @rd is set this should be NULL. Note that if you + * set this to NULL you should still set rd->alpha2 to some accepted + * alpha2. + * @rd: a complete regulatory domain provided by the driver. If passed + * the driver does not need to worry about freeing it. + * + * Wireless drivers can use this function to hint to the wireless core + * what it believes should be the current regulatory domain by + * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory + * domain should be in or by providing a completely build regulatory domain. + * If the driver provides an ISO/IEC 3166 alpha2 userspace will be queried + * for a regulatory domain structure for the respective country. If + * a regulatory domain is build and passed you should set the alpha2 + * if possible, otherwise set it to the special value of "99" which tells + * the wireless core it is unknown. If you pass a built regulatory domain + * and we return non zero you are in charge of kfree()'ing the structure. + * + * See __regulatory_hint() documentation for possible return values. + */ +extern int regulatory_hint(struct wiphy *wiphy, + const char *alpha2, struct ieee80211_regdomain *rd); /** * ieee80211_get_channel - get channel struct from wiphy for specified frequency -- cgit v1.2.3 From 5bc75728fd43bb15b46f16ef465bcf9d487393cf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Sep 2008 00:01:51 +0200 Subject: mac80211: fix scan vs. interface removal race When we remove an interface, we can currently end up having a pointer to it left in local->scan_sdata after it has been set down, and then with a hardware scan the scan completion can try to access it which is a bug. Alternatively, a scan that started as a hardware scan may terminate as though it was a software scan, if the timing is just right. On SMP systems, software scan also has a similar problem, just canceling the delayed work and setting a flag isn't enough since it may be running concurrently; in this case we would also never restore state of other interfaces. This patch hopefully fixes the problems by always invoking ieee80211_scan_completed or requiring it to be invoked by the driver, I suspect the drivers that have ->hw_scan() are buggy. The bug will not manifest itself unless you remove the interface while hw-scanning which will also turn off the hw, and then add a new interface which will be unusable until you scan once. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f504e3eca7d3..d67882dd3604 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1124,7 +1124,9 @@ enum ieee80211_ampdu_mlme_action { * @hw_scan: Ask the hardware to service the scan request, no need to start * the scan state machine in stack. The scan must honour the channel * configuration done by the regulatory agent in the wiphy's registered - * bands. + * bands. When the scan finishes, ieee80211_scan_completed() must be + * called; note that it also must be called when the scan cannot finish + * because the hardware is turned off! Anything else is a bug! * * @get_stats: return low-level statistics * -- cgit v1.2.3 From 96dd22ac06b0dbfb069fdf530c72046a941e9694 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Sep 2008 00:01:57 +0200 Subject: mac80211: inform driver of basic rateset Drivers need to know the basic rateset to be able to configure the ACK/CTS programming in hardware correctly. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d67882dd3604..c81e579c17f3 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -160,6 +160,7 @@ struct ieee80211_low_level_stats { * @BSS_CHANGED_ERP_PREAMBLE: preamble changed * @BSS_CHANGED_ERP_SLOT: slot timing changed * @BSS_CHANGED_HT: 802.11n parameters changed + * @BSS_CHANGED_BASIC_RATES: Basic rateset changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -167,6 +168,7 @@ enum ieee80211_bss_change { BSS_CHANGED_ERP_PREAMBLE = 1<<2, BSS_CHANGED_ERP_SLOT = 1<<3, BSS_CHANGED_HT = 1<<4, + BSS_CHANGED_BASIC_RATES = 1<<5, }; /** @@ -187,6 +189,9 @@ enum ieee80211_bss_change { * @assoc_ht: association in HT mode * @ht_conf: ht capabilities * @ht_bss_conf: ht extended capabilities + * @basic_rates: bitmap of basic rates, each bit stands for an + * index into the rate table configured by the driver in + * the current band. */ struct ieee80211_bss_conf { /* association related data */ @@ -200,6 +205,7 @@ struct ieee80211_bss_conf { u16 beacon_int; u16 assoc_capability; u64 timestamp; + u64 basic_rates; /* ht related data */ bool assoc_ht; struct ieee80211_ht_info *ht_conf; -- cgit v1.2.3 From 05c914fe330fa8e1cc67870dc0d3809dfd96c107 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Sep 2008 00:01:58 +0200 Subject: mac80211: use nl80211 interface types There's really no reason for mac80211 to be using its own interface type defines. Use the nl80211 types and simplify the configuration code a bit: there's no need to translate them any more now. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 35 ++++------------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c81e579c17f3..7f5ea55e00f5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -477,33 +477,6 @@ struct ieee80211_conf { struct ieee80211_ht_bss_info ht_bss_conf; }; -/** - * enum ieee80211_if_types - types of 802.11 network interfaces - * - * @IEEE80211_IF_TYPE_INVALID: invalid interface type, not used - * by mac80211 itself - * @IEEE80211_IF_TYPE_AP: interface in AP mode. - * @IEEE80211_IF_TYPE_MGMT: special interface for communication with hostap - * daemon. Drivers should never see this type. - * @IEEE80211_IF_TYPE_STA: interface in STA (client) mode. - * @IEEE80211_IF_TYPE_IBSS: interface in IBSS (ad-hoc) mode. - * @IEEE80211_IF_TYPE_MNTR: interface in monitor (rfmon) mode. - * @IEEE80211_IF_TYPE_WDS: interface in WDS mode. - * @IEEE80211_IF_TYPE_VLAN: VLAN interface bound to an AP, drivers - * will never see this type. - * @IEEE80211_IF_TYPE_MESH_POINT: 802.11s mesh point - */ -enum ieee80211_if_types { - IEEE80211_IF_TYPE_INVALID, - IEEE80211_IF_TYPE_AP, - IEEE80211_IF_TYPE_STA, - IEEE80211_IF_TYPE_IBSS, - IEEE80211_IF_TYPE_MESH_POINT, - IEEE80211_IF_TYPE_MNTR, - IEEE80211_IF_TYPE_WDS, - IEEE80211_IF_TYPE_VLAN, -}; - /** * struct ieee80211_vif - per-interface data * @@ -515,7 +488,7 @@ enum ieee80211_if_types { * sizeof(void *). */ struct ieee80211_vif { - enum ieee80211_if_types type; + enum nl80211_iftype type; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); }; @@ -523,7 +496,7 @@ struct ieee80211_vif { static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) { #ifdef CONFIG_MAC80211_MESH - return vif->type == IEEE80211_IF_TYPE_MESH_POINT; + return vif->type == NL80211_IFTYPE_MESH_POINT; #endif return false; } @@ -534,7 +507,7 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) * @vif: pointer to a driver-use per-interface structure. The pointer * itself is also used for various functions including * ieee80211_beacon_get() and ieee80211_get_buffered_bc(). - * @type: one of &enum ieee80211_if_types constants. Determines the type of + * @type: one of &enum nl80211_iftype constants. Determines the type of * added/removed interface. * @mac_addr: pointer to MAC address of the interface. This pointer is valid * until the interface is removed (i.e. it cannot be used after @@ -550,7 +523,7 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) * in pure monitor mode. */ struct ieee80211_if_init_conf { - enum ieee80211_if_types type; + enum nl80211_iftype type; struct ieee80211_vif *vif; void *mac_addr; }; -- cgit v1.2.3 From 17741cdc264e4d768167766a252210e201c1519a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Sep 2008 00:02:02 +0200 Subject: mac80211: share STA information with driver This patch changes mac80211 to share some more data about stations with drivers. Should help iwlwifi and ath9k when they get around to updating, and might also help with implementing rate control algorithms without internals. Signed-off-by: Johannes Berg Cc: Sujith Manoharan Signed-off-by: John W. Linville --- include/net/mac80211.h | 53 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7f5ea55e00f5..5a6a029da4da 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -300,6 +300,9 @@ enum mac80211_tx_control_flags { * (2) driver internal use (if applicable) * (3) TX status information - driver tells mac80211 what happened * + * The TX control's sta pointer is only valid during the ->tx call, + * it may be NULL. + * * @flags: transmit info flags, defined above * @band: TBD * @tx_rate_idx: TBD @@ -329,8 +332,8 @@ struct ieee80211_tx_info { struct { struct ieee80211_vif *vif; struct ieee80211_key_conf *hw_key; + struct ieee80211_sta *sta; unsigned long jiffies; - u16 aid; s8 rts_cts_rate_idx, alt_retry_rate_idx; u8 retry_limit; u8 icv_len; @@ -651,6 +654,29 @@ enum set_key_cmd { SET_KEY, DISABLE_KEY, }; +/** + * struct ieee80211_sta - station table entry + * + * A station table entry represents a station we are possibly + * communicating with. Since stations are RCU-managed in + * mac80211, any ieee80211_sta pointer you get access to must + * either be protected by rcu_read_lock() explicitly or implicitly, + * or you must take good care to not use such a pointer after a + * call to your sta_notify callback that removed it. + * + * @addr: MAC address + * @aid: AID we assigned to the station if we're an AP + * @drv_priv: data area for driver use, will always be aligned to + * sizeof(void *), size is determined in hw information. + */ +struct ieee80211_sta { + u8 addr[ETH_ALEN]; + u16 aid; + + /* must be last */ + u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); +}; + /** * enum sta_notify_cmd - sta notify command * @@ -795,6 +821,8 @@ enum ieee80211_hw_flags { * * @vif_data_size: size (in bytes) of the drv_priv data area * within &struct ieee80211_vif. + * @sta_data_size: size (in bytes) of the drv_priv data area + * within &struct ieee80211_sta. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -806,6 +834,7 @@ struct ieee80211_hw { unsigned int extra_tx_headroom; int channel_change_time; int vif_data_size; + int sta_data_size; u16 queues; u16 ampdu_queues; u16 max_listen_interval; @@ -1089,7 +1118,7 @@ enum ieee80211_ampdu_mlme_action { * This callback must be implemented and atomic. * * @set_tim: Set TIM bit. mac80211 calls this function when a TIM bit - * must be set or cleared for a given AID. Must be atomic. + * must be set or cleared for a given STA. Must be atomic. * * @set_key: See the section "Hardware crypto acceleration" * This callback can sleep, and is only called between add_interface @@ -1175,7 +1204,8 @@ struct ieee80211_ops { unsigned int changed_flags, unsigned int *total_flags, int mc_count, struct dev_addr_list *mc_list); - int (*set_tim)(struct ieee80211_hw *hw, int aid, int set); + int (*set_tim)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, + bool set); int (*set_key)(struct ieee80211_hw *hw, enum set_key_cmd cmd, const u8 *local_address, const u8 *address, struct ieee80211_key_conf *key); @@ -1192,7 +1222,7 @@ struct ieee80211_ops { int (*set_retry_limit)(struct ieee80211_hw *hw, u32 short_retry, u32 long_retr); void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - enum sta_notify_cmd, const u8 *addr); + enum sta_notify_cmd, struct ieee80211_sta *sta); int (*conf_tx)(struct ieee80211_hw *hw, u16 queue, const struct ieee80211_tx_queue_params *params); int (*get_tx_stats)(struct ieee80211_hw *hw, @@ -1202,7 +1232,7 @@ struct ieee80211_ops { int (*tx_last_beacon)(struct ieee80211_hw *hw); int (*ampdu_action)(struct ieee80211_hw *hw, enum ieee80211_ampdu_mlme_action action, - const u8 *addr, u16 tid, u16 *ssn); + struct ieee80211_sta *sta, u16 tid, u16 *ssn); }; /** @@ -1752,4 +1782,17 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra, */ void ieee80211_notify_mac(struct ieee80211_hw *hw, enum ieee80211_notification_types notif_type); + +/** + * ieee80211_find_sta - find a station + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @addr: station's address + * + * This function must be called under RCU lock and the + * resulting pointer is only valid under RCU lock as well. + */ +struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, + const u8 *addr); + #endif /* MAC80211_H */ -- cgit v1.2.3 From 323ce79a9cdbf838ea577677b1ddace8e0b4d4c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Sep 2008 02:45:11 +0200 Subject: mac80211: share sta->supp_rates As more preparation for a saner rate control algorithm API, share the supported rates bitmap in the public API. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5a6a029da4da..ef8e4cc32c2e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -666,10 +666,12 @@ enum set_key_cmd { * * @addr: MAC address * @aid: AID we assigned to the station if we're an AP + * @supp_rates: Bitmap of supported rates (per band) * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *), size is determined in hw information. */ struct ieee80211_sta { + u64 supp_rates[IEEE80211_NUM_BANDS]; u8 addr[ETH_ALEN]; u16 aid; -- cgit v1.2.3 From 687c7c0807371aeaa94ff2fff511eeb326b5c5de Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Sep 2008 03:14:11 +0200 Subject: mac80211: share sta_info->ht_info Rate control algorithms may need access to a station's HT capabilities, so share the ht_info struct in the public station API. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ef8e4cc32c2e..d6669fd3ffa8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -667,6 +667,7 @@ enum set_key_cmd { * @addr: MAC address * @aid: AID we assigned to the station if we're an AP * @supp_rates: Bitmap of supported rates (per band) + * @ht_info: HT capabilities of this STA * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *), size is determined in hw information. */ @@ -674,6 +675,7 @@ struct ieee80211_sta { u64 supp_rates[IEEE80211_NUM_BANDS]; u8 addr[ETH_ALEN]; u16 aid; + struct ieee80211_ht_info ht_info; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); -- cgit v1.2.3 From 25d834e16294c8dfd923dae6bdb8a055391a99a5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 12 Sep 2008 22:52:47 +0200 Subject: mac80211: fix virtual interfaces vs. injection Currently, virtual interface pointers passed to drivers might be from monitor interfaces and as such completely uninitialised because we do not tell the driver about monitor interfaces when those are created. Instead of passing them, we should therefore indicate to the driver that there is no information; do that by passing a NULL value and adjust drivers to cope with it. As a result, some mac80211 API functions also need to cope with a NULL vif pointer so drivers can still call them unconditionally. Also, when injecting frames we really don't want to pass NULL all the time, if we know we are the source address of a frame and have a local interface for that address, we can to use that interface. This also helps with processing the frame correctly for that interface which will help the 802.11w implementation. It's not entirely correct for VLANs or WDS interfaces because there the MAC address isn't unique, but it's already a lot better than what we do now. Finally, when injecting without a matching local interface, don't assign sequence numbers at all. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d6669fd3ffa8..003e4a03874e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -330,6 +330,7 @@ struct ieee80211_tx_info { union { struct { + /* NB: vif can be NULL for injected frames */ struct ieee80211_vif *vif; struct ieee80211_key_conf *hw_key; struct ieee80211_sta *sta; -- cgit v1.2.3 From 452c1ce218a68b5dbd626397ecfc65ca89dd3cbb Mon Sep 17 00:00:00 2001 From: Chris Snook Date: Sun, 14 Sep 2008 19:56:10 -0500 Subject: atl2: add atl2 driver Driver for Atheros L2 10/100 network device. Includes necessary changes for Kconfig, Makefile, and pci_ids.h. Signed-off-by: Chris Snook Signed-off-by: Jay Cliburn Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f1624b396754..90a132ab84a6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2213,6 +2213,7 @@ #define PCI_VENDOR_ID_ATTANSIC 0x1969 #define PCI_DEVICE_ID_ATTANSIC_L1 0x1048 +#define PCI_DEVICE_ID_ATTANSIC_L2 0x2048 #define PCI_VENDOR_ID_JMICRON 0x197B #define PCI_DEVICE_ID_JMICRON_JMB360 0x2360 -- cgit v1.2.3 From 01f2e4ead2c51226ed1283ef6a8388ca6f4cff8f Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Mon, 15 Sep 2008 09:17:11 -0700 Subject: enic: add Cisco 10G Ethernet NIC driver Signed-off-by: Scott Feldman Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 90a132ab84a6..6f4276d461c0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1411,6 +1411,8 @@ #define PCI_DEVICE_ID_EICON_MAESTRAQ_U 0xe013 #define PCI_DEVICE_ID_EICON_MAESTRAP 0xe014 +#define PCI_VENDOR_ID_CISCO 0x1137 + #define PCI_VENDOR_ID_ZIATECH 0x1138 #define PCI_DEVICE_ID_ZIATECH_5550_HC 0x5550 -- cgit v1.2.3 From 4fd5f812c23c7deee6425f4a318e85c317cd1d6c Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 26 Aug 2008 13:08:46 +0200 Subject: phylib: allow incremental scanning of an mii bus This patch splits the bus scanning code in mdiobus_register() off into a separate function, and makes this function available for calling from external code. This allows incrementally scanning an mii bus, e.g. as information about which addresses are 'safe' to scan becomes available. Signed-off-by: Lennert Buytenhek Acked-by: Andy Fleming --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7224c4099a28..5f170f5b1a30 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -410,6 +410,8 @@ int phy_start_aneg(struct phy_device *phydev); int mdiobus_register(struct mii_bus *bus); void mdiobus_unregister(struct mii_bus *bus); +struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); + void phy_sanitize_settings(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); int phy_enable_interrupts(struct phy_device *phydev); -- cgit v1.2.3 From 02a1416478b70cd49bd74827438c8ba797784728 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 19 Sep 2008 12:44:54 -0700 Subject: net: Fix build with ARCH=um If UM is going to claim that it supports DMA by setting HAS_DMA, it should provide a dma_mapping_error() implementation. Based upon a report by Julius Volz. Signed-off-by: David S. Miller --- include/asm-um/dma-mapping.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/asm-um/dma-mapping.h b/include/asm-um/dma-mapping.h index f0ee4fb55911..90fc708b320e 100644 --- a/include/asm-um/dma-mapping.h +++ b/include/asm-um/dma-mapping.h @@ -118,4 +118,11 @@ dma_cache_sync(struct device *dev, void *vaddr, size_t size, BUG(); } +static inline int +dma_mapping_error(struct device *dev, dma_addr_t dma_handle) +{ + BUG(); + return 0; +} + #endif -- cgit v1.2.3 From 64edc2736e23994e0334b70c5ff08dc33e2ebbd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 20 Sep 2008 21:18:32 -0700 Subject: tcp: Partial hint clearing has again become meaningless MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ie., the difference between partial and all clearing doesn't exists anymore since the SACK optimizations got dropped by an sacktag rewrite. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8983386356a5..b71676326950 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1035,7 +1035,7 @@ static inline void tcp_mib_init(struct net *net) } /* from STCP */ -static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) +static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) { tp->lost_skb_hint = NULL; tp->scoreboard_skb_hint = NULL; @@ -1043,11 +1043,6 @@ static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) tp->forward_skb_hint = NULL; } -static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) -{ - tcp_clear_retrans_hints_partial(tp); -} - /* MD5 Signature */ struct crypto_hash; -- cgit v1.2.3 From 006f582c73f4eda35e06fd323193c3df43fb3459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 20 Sep 2008 21:20:20 -0700 Subject: tcp: convert retransmit_cnt_hint to seqno MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Main benefit in this is that we can then freely point the retransmit_skb_hint to anywhere we want to because there's no longer need to know what would be the count changes involve, and since this is really used only as a terminator, unnecessary work is one time walk at most, and if some retransmissions are necessary after that point later on, the walk is not full waste of time anyway. Since retransmit_high must be kept valid, all lost markers must ensure that. Now I also have learned how those "holes" in the rexmittable skbs can appear, mtu probe does them. So I removed the misleading comment as well. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2e2557388e36..d7637c4b2840 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -358,7 +358,7 @@ struct tcp_sock { */ int lost_cnt_hint; - int retransmit_cnt_hint; + u32 retransmit_high; /* L-bits may be on up to this seqno */ u32 lost_retrans_low; /* Sent seq after any rxmit (lowest) */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b71676326950..d0e90c50722b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -472,6 +472,8 @@ extern void tcp_send_delayed_ack(struct sock *sk); /* tcp_input.c */ extern void tcp_cwnd_application_limited(struct sock *sk); +extern void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, + struct sk_buff *skb); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); -- cgit v1.2.3 From 0e1c54c2a405494281e0639aacc90db03b50ae77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 20 Sep 2008 21:24:21 -0700 Subject: tcp: reorganize retransmit code loops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both loops are quite similar, so they can be combined with little effort. As a result, forward_skb_hint becomes obsolete as well. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - include/net/tcp.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d7637c4b2840..767290628292 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -342,7 +342,6 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *scoreboard_skb_hint; struct sk_buff *retransmit_skb_hint; - struct sk_buff *forward_skb_hint; struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ diff --git a/include/net/tcp.h b/include/net/tcp.h index d0e90c50722b..220f54cf42ec 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1042,7 +1042,6 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) tp->lost_skb_hint = NULL; tp->scoreboard_skb_hint = NULL; tp->retransmit_skb_hint = NULL; - tp->forward_skb_hint = NULL; } /* MD5 Signature */ -- cgit v1.2.3 From ef9da47c7cc64d69526331f315e76b5680d4048f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 20 Sep 2008 21:25:15 -0700 Subject: tcp: don't clear retransmit_skb_hint when not necessary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most importantly avoid doing it with cumulative ACK. Not clearing means that we no longer need n^2 processing in resolution of each fast recovery. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 220f54cf42ec..ea815723d414 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1037,10 +1037,15 @@ static inline void tcp_mib_init(struct net *net) } /* from STCP */ -static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) +static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) { tp->lost_skb_hint = NULL; tp->scoreboard_skb_hint = NULL; +} + +static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) +{ + tcp_clear_retrans_hints_partial(tp); tp->retransmit_skb_hint = NULL; } -- cgit v1.2.3 From 43f59c89399fd76883a06c551f24794e98409432 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Sep 2008 21:28:51 -0700 Subject: net: Remove __skb_insert() calls outside of skbuff internals. This minor cleanup simplifies later changes which will convert struct sk_buff and friends over to using struct list_head. Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index ea815723d414..f857c3eff710 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1268,12 +1268,12 @@ static inline void tcp_insert_write_queue_after(struct sk_buff *skb, __skb_queue_after(&sk->sk_write_queue, skb, buff); } -/* Insert skb between prev and next on the write queue of sk. */ +/* Insert new before skb on the write queue of sk. */ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sk_buff *skb, struct sock *sk) { - __skb_insert(new, skb->prev, skb, &sk->sk_write_queue); + __skb_queue_before(&sk->sk_write_queue, skb, new); if (sk->sk_send_head == skb) sk->sk_send_head = new; -- cgit v1.2.3 From 67fed45930fa31e92c11beb3a3dbf83a1a92a58d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Sep 2008 22:36:24 -0700 Subject: net: Add new interfaces for SKB list light-weight init and splicing. This will be used by subsequent changesets. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 96 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index aa80ad9cbc88..027b06170b40 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -660,6 +660,22 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) return list_->qlen; } +/** + * __skb_queue_head_init - initialize non-spinlock portions of sk_buff_head + * @list: queue to initialize + * + * This initializes only the list and queue length aspects of + * an sk_buff_head object. This allows to initialize the list + * aspects of an sk_buff_head without reinitializing things like + * the spinlock. It can also be used for on-stack sk_buff_head + * objects where the spinlock is known to not be used. + */ +static inline void __skb_queue_head_init(struct sk_buff_head *list) +{ + list->prev = list->next = (struct sk_buff *)list; + list->qlen = 0; +} + /* * This function creates a split out lock class for each invocation; * this is needed for now since a whole lot of users of the skb-queue @@ -671,8 +687,7 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) static inline void skb_queue_head_init(struct sk_buff_head *list) { spin_lock_init(&list->lock); - list->prev = list->next = (struct sk_buff *)list; - list->qlen = 0; + __skb_queue_head_init(list); } static inline void skb_queue_head_init_class(struct sk_buff_head *list, @@ -699,6 +714,83 @@ static inline void __skb_insert(struct sk_buff *newsk, list->qlen++; } +static inline void __skb_queue_splice(const struct sk_buff_head *list, + struct sk_buff *prev, + struct sk_buff *next) +{ + struct sk_buff *first = list->next; + struct sk_buff *last = list->prev; + + first->prev = prev; + prev->next = first; + + last->next = next; + next->prev = last; +} + +/** + * skb_queue_splice - join two skb lists, this is designed for stacks + * @list: the new list to add + * @head: the place to add it in the first list + */ +static inline void skb_queue_splice(const struct sk_buff_head *list, + struct sk_buff_head *head) +{ + if (!skb_queue_empty(list)) { + __skb_queue_splice(list, (struct sk_buff *) head, head->next); + head->qlen = list->qlen; + } +} + +/** + * skb_queue_splice - join two skb lists and reinitialise the emptied list + * @list: the new list to add + * @head: the place to add it in the first list + * + * The list at @list is reinitialised + */ +static inline void skb_queue_splice_init(struct sk_buff_head *list, + struct sk_buff_head *head) +{ + if (!skb_queue_empty(list)) { + __skb_queue_splice(list, (struct sk_buff *) head, head->next); + head->qlen = list->qlen; + __skb_queue_head_init(list); + } +} + +/** + * skb_queue_splice_tail - join two skb lists, each list being a queue + * @list: the new list to add + * @head: the place to add it in the first list + */ +static inline void skb_queue_splice_tail(const struct sk_buff_head *list, + struct sk_buff_head *head) +{ + if (!skb_queue_empty(list)) { + __skb_queue_splice(list, head->prev, (struct sk_buff *) head); + head->qlen = list->qlen; + } +} + +/** + * skb_queue_splice_tail - join two skb lists and reinitialise the emptied list + * @list: the new list to add + * @head: the place to add it in the first list + * + * Each of the lists is a queue. + * The list at @list is reinitialised + */ +static inline void skb_queue_splice_tail_init(struct sk_buff_head *list, + struct sk_buff_head *head) +{ + if (!skb_queue_empty(list)) { + __skb_queue_splice(list, head->prev, (struct sk_buff *) head); + head->qlen = list->qlen; + __skb_queue_head_init(list); + } +} + /** * __skb_queue_after - queue a buffer at the list head * @list: list to use -- cgit v1.2.3 From 38783e671399b5405f1fd177d602c400a9577ae6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 01:15:02 -0700 Subject: isdn: isdn_ppp: Use SKB list facilities instead of home-grown implementation. Signed-off-by: David S. Miller --- include/linux/isdn_ppp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h index 8687a7dc0632..4c218ee7587a 100644 --- a/include/linux/isdn_ppp.h +++ b/include/linux/isdn_ppp.h @@ -157,7 +157,7 @@ typedef struct { typedef struct { int mp_mrru; /* unused */ - struct sk_buff * frags; /* fragments sl list -- use skb->next */ + struct sk_buff_head frags; /* fragments sl list */ long frames; /* number of frames in the frame list */ unsigned int seq; /* last processed packet seq #: any packets * with smaller seq # will be dropped -- cgit v1.2.3 From 5c1824587f0797373c95719a196f6098f7c6d20c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 22 Sep 2008 19:48:19 -0700 Subject: ipsec: Fix xfrm_state_walk race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As discovered by Timo Teräs, the currently xfrm_state_walk scheme is racy because if a second dump finishes before the first, we may free xfrm states that the first dump would walk over later. This patch fixes this by storing the dumps in a list in order to calculate the correct completion counter which cures this problem. I've expanded netlink_cb in order to accomodate the extra state related to this. It shouldn't be a big deal since netlink_cb is kmalloced for each dump and we're just increasing it by 4 or 8 bytes. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- include/net/xfrm.h | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9ff1b54908f3..cbba7760545b 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -220,7 +220,7 @@ struct netlink_callback int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); int family; - long args[6]; + long args[7]; }; struct netlink_notify diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4bb94992b5fa..48630b266593 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1246,6 +1246,8 @@ struct xfrm6_tunnel { }; struct xfrm_state_walk { + struct list_head list; + unsigned long genid; struct xfrm_state *state; int count; u8 proto; @@ -1281,13 +1283,7 @@ static inline void xfrm6_fini(void) extern int xfrm_proc_init(void); #endif -static inline void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) -{ - walk->proto = proto; - walk->state = NULL; - walk->count = 0; -} - +extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); extern int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); extern void xfrm_state_walk_done(struct xfrm_state_walk *walk); -- cgit v1.2.3 From bce7b15426cac3000bf6a9cf59d9356ef0be2dec Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 19:51:15 -0700 Subject: Phonet: global definitions Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/if_phonet.h | 14 ++++++ include/linux/phonet.h | 125 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 4 ++ include/linux/socket.h | 4 +- 5 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 include/linux/if_phonet.h create mode 100644 include/linux/phonet.h (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 5028e0b6082b..723a1c5fbc6c 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -100,6 +100,7 @@ #define ETH_P_ECONET 0x0018 /* Acorn Econet */ #define ETH_P_HDLC 0x0019 /* HDLC frames */ #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ +#define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ /* * This is an Ethernet frame header. diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h new file mode 100644 index 000000000000..22df25fbc4e2 --- /dev/null +++ b/include/linux/if_phonet.h @@ -0,0 +1,14 @@ +/* + * File: if_phonet.h + * + * Phonet interface kernel definitions + * + * Copyright (C) 2008 Nokia Corporation. All rights reserved. + */ + +#define PHONET_HEADER_LEN 8 /* Phonet header length */ + +#define PHONET_MIN_MTU 6 +/* 6 bytes header + 65535 bytes payload */ +#define PHONET_MAX_MTU 65541 +#define PHONET_DEV_MTU PHONET_MAX_MTU diff --git a/include/linux/phonet.h b/include/linux/phonet.h new file mode 100644 index 000000000000..6a764f8584a4 --- /dev/null +++ b/include/linux/phonet.h @@ -0,0 +1,125 @@ +/** + * file phonet.h + * + * Phonet sockets kernel interface + * + * Copyright (C) 2008 Nokia Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef LINUX_PHONET_H +#define LINUX_PHONET_H + +/* Automatic protocol selection */ +#define PN_PROTO_TRANSPORT 0 +/* Phonet datagram socket */ +#define PN_PROTO_PHONET 1 +#define PHONET_NPROTO 2 + +#define PNADDR_ANY 0 +#define PNPORT_RESOURCE_ROUTING 0 + +/* Phonet protocol header */ +struct phonethdr { + __u8 pn_rdev; + __u8 pn_sdev; + __u8 pn_res; + __be16 pn_length; + __u8 pn_robj; + __u8 pn_sobj; +} __attribute__((packed)); + +/* Phonet socket address structure */ +struct sockaddr_pn { + sa_family_t spn_family; + __u8 spn_obj; + __u8 spn_dev; + __u8 spn_resource; + __u8 spn_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - 3]; +} __attribute__ ((packed)); + +static inline __u16 pn_object(__u8 addr, __u16 port) +{ + return (addr << 8) | (port & 0x3ff); +} + +static inline __u8 pn_obj(__u16 handle) +{ + return handle & 0xff; +} + +static inline __u8 pn_dev(__u16 handle) +{ + return handle >> 8; +} + +static inline __u16 pn_port(__u16 handle) +{ + return handle & 0x3ff; +} + +static inline __u8 pn_addr(__u16 handle) +{ + return (handle >> 8) & 0xfc; +} + +static inline void pn_sockaddr_set_addr(struct sockaddr_pn *spn, __u8 addr) +{ + spn->spn_dev &= 0x03; + spn->spn_dev |= addr & 0xfc; +} + +static inline void pn_sockaddr_set_port(struct sockaddr_pn *spn, __u16 port) +{ + spn->spn_dev &= 0xfc; + spn->spn_dev |= (port >> 8) & 0x03; + spn->spn_obj = port & 0xff; +} + +static inline void pn_sockaddr_set_object(struct sockaddr_pn *spn, + __u16 handle) +{ + spn->spn_dev = pn_dev(handle); + spn->spn_obj = pn_obj(handle); +} + +static inline void pn_sockaddr_set_resource(struct sockaddr_pn *spn, + __u8 resource) +{ + spn->spn_resource = resource; +} + +static inline __u8 pn_sockaddr_get_addr(const struct sockaddr_pn *spn) +{ + return spn->spn_dev & 0xfc; +} + +static inline __u16 pn_sockaddr_get_port(const struct sockaddr_pn *spn) +{ + return ((spn->spn_dev & 0x03) << 8) | spn->spn_obj; +} + +static inline __u16 pn_sockaddr_get_object(const struct sockaddr_pn *spn) +{ + return pn_object(spn->spn_dev, spn->spn_obj); +} + +static inline __u8 pn_sockaddr_get_resource(const struct sockaddr_pn *spn) +{ + return spn->spn_resource; +} + +#endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ca643b13b026..2b3d51c6ec9c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -582,6 +582,10 @@ enum rtnetlink_groups { #define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE RTNLGRP_ND_USEROPT, #define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT + RTNLGRP_PHONET_IFADDR, +#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR + RTNLGRP_PHONET_ROUTE, +#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/include/linux/socket.h b/include/linux/socket.h index dc5086fe7736..818ca33bf79f 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -190,7 +190,8 @@ struct ucred { #define AF_IUCV 32 /* IUCV sockets */ #define AF_RXRPC 33 /* RxRPC sockets */ #define AF_ISDN 34 /* mISDN sockets */ -#define AF_MAX 35 /* For now.. */ +#define AF_PHONET 35 /* Phonet sockets */ +#define AF_MAX 36 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -227,6 +228,7 @@ struct ucred { #define PF_IUCV AF_IUCV #define PF_RXRPC AF_RXRPC #define PF_ISDN AF_ISDN +#define PF_PHONET AF_PHONET #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ -- cgit v1.2.3 From 4b07b3f69a8471cdc142c51461a331226fef248a Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:02:10 -0700 Subject: Phonet: PF_PHONET protocol family support This is the basis for the Phonet protocol families, and introduces the ETH_P_PHONET packet type and the PF_PHONET socket family. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 74 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 include/net/phonet/phonet.h (limited to 'include') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h new file mode 100644 index 000000000000..c53f2abc0595 --- /dev/null +++ b/include/net/phonet/phonet.h @@ -0,0 +1,74 @@ +/* + * File: af_phonet.h + * + * Phonet sockets kernel definitions + * + * Copyright (C) 2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef AF_PHONET_H +#define AF_PHONET_H + +/* + * The lower layers may not require more space, ever. Make sure it's + * enough. + */ +#define MAX_PHONET_HEADER 8 + +static inline struct phonethdr *pn_hdr(struct sk_buff *skb) +{ + return (struct phonethdr *)skb_network_header(skb); +} + +/* + * Get the other party's sockaddr from received skb. The skb begins + * with a Phonet header. + */ +static inline +void pn_skb_get_src_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa) +{ + struct phonethdr *ph = pn_hdr(skb); + u16 obj = pn_object(ph->pn_sdev, ph->pn_sobj); + + sa->spn_family = AF_PHONET; + pn_sockaddr_set_object(sa, obj); + pn_sockaddr_set_resource(sa, ph->pn_res); + memset(sa->spn_zero, 0, sizeof(sa->spn_zero)); +} + +static inline +void pn_skb_get_dst_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa) +{ + struct phonethdr *ph = pn_hdr(skb); + u16 obj = pn_object(ph->pn_rdev, ph->pn_robj); + + sa->spn_family = AF_PHONET; + pn_sockaddr_set_object(sa, obj); + pn_sockaddr_set_resource(sa, ph->pn_res); + memset(sa->spn_zero, 0, sizeof(sa->spn_zero)); +} + +/* Protocols in Phonet protocol family. */ +struct phonet_protocol { + struct proto *prot; + int sock_type; +}; + +int phonet_proto_register(int protocol, struct phonet_protocol *pp); +void phonet_proto_unregister(int protocol, struct phonet_protocol *pp); + +#endif -- cgit v1.2.3 From f8ff60283de2b6775d7a14619056a08e3083bd40 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:03:44 -0700 Subject: Phonet: network device and address handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides support for adding Phonet addresses to and removing Phonet addresses from network devices. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pn_dev.h | 50 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 include/net/phonet/pn_dev.h (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h new file mode 100644 index 000000000000..bbd2a836e04c --- /dev/null +++ b/include/net/phonet/pn_dev.h @@ -0,0 +1,50 @@ +/* + * File: pn_dev.h + * + * Phonet network device + * + * Copyright (C) 2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef PN_DEV_H +#define PN_DEV_H + +struct phonet_device_list { + struct list_head list; + spinlock_t lock; +}; + +extern struct phonet_device_list pndevs; + +struct phonet_device { + struct list_head list; + struct net_device *netdev; + DECLARE_BITMAP(addrs, 64); +}; + +void phonet_device_init(void); +void phonet_device_exit(void); +struct net_device *phonet_device_get(struct net *net); + +int phonet_address_add(struct net_device *dev, u8 addr); +int phonet_address_del(struct net_device *dev, u8 addr); +u8 phonet_address_get(struct net_device *dev, u8 addr); +int phonet_address_lookup(u8 addr); + +#define PN_NO_ADDR 0xff + +#endif -- cgit v1.2.3 From 8fb397406f6470f79040c41eec49af20900a9e3b Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:04:30 -0700 Subject: Phonet: Netlink interface This provides support for configuring Phonet addresses, notifying Phonet configuration changes, and dumping the configuration. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index c53f2abc0595..8b777943d201 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -71,4 +71,5 @@ struct phonet_protocol { int phonet_proto_register(int protocol, struct phonet_protocol *pp); void phonet_proto_unregister(int protocol, struct phonet_protocol *pp); +void phonet_netlink_register(void); #endif -- cgit v1.2.3 From ba113a94b7503ee23ffe819e7045134b0c1d31de Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:05:19 -0700 Subject: Phonet: common socket glue This provides the socket API for the Phonet protocols family. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 3 +++ include/net/phonet/phonet.h | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 6a764f8584a4..001c0e679099 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -32,6 +32,9 @@ #define PNADDR_ANY 0 #define PNPORT_RESOURCE_ROUTING 0 +/* ioctls */ +#define SIOCPNGETOBJECT (SIOCPROTOPRIVATE + 0) + /* Phonet protocol header */ struct phonethdr { __u8 pn_rdev; diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 8b777943d201..2ae5cbb59b60 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -29,6 +29,28 @@ */ #define MAX_PHONET_HEADER 8 +/* + * Every Phonet* socket has this structure first in its + * protocol-specific structure under name c. + */ +struct pn_sock { + struct sock sk; + u16 sobject; + u8 resource; +}; + +static inline struct pn_sock *pn_sk(struct sock *sk) +{ + return (struct pn_sock *)sk; +} + +extern const struct proto_ops phonet_dgram_ops; + +struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *sa); +void pn_sock_hash(struct sock *sk); +void pn_sock_unhash(struct sock *sk); +int pn_sock_get_port(struct sock *sk, unsigned short sport); + static inline struct phonethdr *pn_hdr(struct sk_buff *skb) { return (struct phonethdr *)skb_network_header(skb); @@ -64,6 +86,7 @@ void pn_skb_get_dst_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa) /* Protocols in Phonet protocol family. */ struct phonet_protocol { + const struct proto_ops *ops; struct proto *prot; int sock_type; }; -- cgit v1.2.3 From 107d0d9b8d9a236883db72841fb61cedd5be845e Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:05:57 -0700 Subject: Phonet: Phonet datagram transport protocol This provides the basic SOCK_DGRAM transport protocol for Phonet. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 2ae5cbb59b60..d3957d3be0fd 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -51,6 +51,9 @@ void pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); int pn_sock_get_port(struct sock *sk, unsigned short sport); +int pn_skb_send(struct sock *sk, struct sk_buff *skb, + const struct sockaddr_pn *target); + static inline struct phonethdr *pn_hdr(struct sk_buff *skb) { return (struct phonethdr *)skb_network_header(skb); @@ -95,4 +98,7 @@ int phonet_proto_register(int protocol, struct phonet_protocol *pp); void phonet_proto_unregister(int protocol, struct phonet_protocol *pp); void phonet_netlink_register(void); +int isi_register(void); +void isi_unregister(void); + #endif -- cgit v1.2.3 From 5f77076d75d35c9f5619e1f9d7e7428a627f65e6 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:08:04 -0700 Subject: Phonet: provide MAC header operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_phonet.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h index 22df25fbc4e2..7e989216ec17 100644 --- a/include/linux/if_phonet.h +++ b/include/linux/if_phonet.h @@ -12,3 +12,7 @@ /* 6 bytes header + 65535 bytes payload */ #define PHONET_MAX_MTU 65541 #define PHONET_DEV_MTU PHONET_MAX_MTU + +#ifdef __KERNEL__ +extern struct header_ops phonet_header_ops; +#endif -- cgit v1.2.3 From 87ab4e20b445c6d2d2727ab4f96fa17f7259511e Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:08:39 -0700 Subject: Phonet: proc interface for port range Phonet endpoints are bound to individual ports. This provides a /proc/sys/net/phonet (or sysctl) interface for selecting the range of automatically allocated ports (much like the ip_local_port_range with IPv4). Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index d3957d3be0fd..1c6f7e7d5fea 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -47,6 +47,7 @@ static inline struct pn_sock *pn_sk(struct sock *sk) extern const struct proto_ops phonet_dgram_ops; struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *sa); +void phonet_get_local_port_range(int *min, int *max); void pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); int pn_sock_get_port(struct sock *sk, unsigned short sport); @@ -97,6 +98,8 @@ struct phonet_protocol { int phonet_proto_register(int protocol, struct phonet_protocol *pp); void phonet_proto_unregister(int protocol, struct phonet_protocol *pp); +int phonet_sysctl_init(void); +void phonet_sysctl_exit(void); void phonet_netlink_register(void); int isi_register(void); void isi_unregister(void); -- cgit v1.2.3 From be0c52bfed7f7828494fa00060efd5d758e92580 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:09:13 -0700 Subject: Phonet: emit errors when a packet cannot be delivered locally When there is no listener socket for a received packet, send an error back to the sender. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 32 ++++++++++++++++++++++++++++++++ include/net/phonet/phonet.h | 5 +++++ 2 files changed, 37 insertions(+) (limited to 'include') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 001c0e679099..3a027f588a4a 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -45,6 +45,38 @@ struct phonethdr { __u8 pn_sobj; } __attribute__((packed)); +/* Common Phonet payload header */ +struct phonetmsg { + __u8 pn_trans_id; /* transaction ID */ + __u8 pn_msg_id; /* message type */ + union { + struct { + __u8 pn_submsg_id; /* message subtype */ + __u8 pn_data[5]; + } base; + struct { + __u16 pn_e_res_id; /* extended resource ID */ + __u8 pn_e_submsg_id; /* message subtype */ + __u8 pn_e_data[3]; + } ext; + } pn_msg_u; +}; +#define PN_COMMON_MESSAGE 0xF0 +#define PN_PREFIX 0xE0 /* resource for extended messages */ +#define pn_submsg_id pn_msg_u.base.pn_submsg_id +#define pn_e_submsg_id pn_msg_u.ext.pn_e_submsg_id +#define pn_e_res_id pn_msg_u.ext.pn_e_res_id +#define pn_data pn_msg_u.base.pn_data +#define pn_e_data pn_msg_u.ext.pn_e_data + +/* data for unreachable errors */ +#define PN_COMM_SERVICE_NOT_IDENTIFIED_RESP 0x01 +#define PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP 0x14 +#define pn_orig_msg_id pn_data[0] +#define pn_status pn_data[1] +#define pn_e_orig_msg_id pn_e_data[0] +#define pn_e_status pn_e_data[1] + /* Phonet socket address structure */ struct sockaddr_pn { sa_family_t spn_family; diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 1c6f7e7d5fea..d4e72508e145 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -60,6 +60,11 @@ static inline struct phonethdr *pn_hdr(struct sk_buff *skb) return (struct phonethdr *)skb_network_header(skb); } +static inline struct phonetmsg *pn_msg(struct sk_buff *skb) +{ + return (struct phonetmsg *)skb_transport_header(skb); +} + /* * Get the other party's sockaddr from received skb. The skb begins * with a Phonet header. -- cgit v1.2.3 From 0b815a1a6d43ab498674b8430c8c35ab08487a16 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 22 Sep 2008 21:28:11 -0700 Subject: net: network device name ifalias support This patch add support for keeping an additional character alias associated with an network interface. This is useful for maintaining the SNMP ifAlias value which is a user defined value. Routers use this to hold information like which circuit or line it is connected to. It is just an arbitrary text label on the network device. There are two exposed interfaces with this patch, the value can be read/written either via netlink or sysfs. This could be maintained just by the snmp daemon, but it is more generally useful for other management tools, and the kernel is good place to act as an agreed upon interface to store it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/if.h | 1 + include/linux/if_link.h | 1 + include/linux/netdevice.h | 3 +++ 3 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/if.h b/include/linux/if.h index 5c9d1fa93fef..65246846c844 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -24,6 +24,7 @@ #include /* for "__user" et al */ #define IFNAMSIZ 16 +#define IFALIASZ 256 #include /* Standard interface flags (netdevice->flags). */ diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 84c3492ae5cb..f9032c88716a 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -79,6 +79,7 @@ enum IFLA_LINKINFO, #define IFLA_LINKINFO IFLA_LINKINFO IFLA_NET_NS_PID, + IFLA_IFALIAS, __IFLA_MAX }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 488c56e649b5..d675df08b946 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -471,6 +471,8 @@ struct net_device char name[IFNAMSIZ]; /* device name hash chain */ struct hlist_node name_hlist; + /* snmp alias */ + char *ifalias; /* * I/O specific fields @@ -1224,6 +1226,7 @@ extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); extern int dev_change_flags(struct net_device *, unsigned); extern int dev_change_name(struct net_device *, char *); +extern int dev_set_alias(struct net_device *, const char *, size_t); extern int dev_change_net_namespace(struct net_device *, struct net *, const char *); extern int dev_set_mtu(struct net_device *, int); -- cgit v1.2.3 From 1d4a31dde95af56edac4dee268249a29a21fa7c0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 21:57:21 -0700 Subject: net: Fix bus in SKB queue splicing interfaces. Handle the case of head being non-empty, by adding list->qlen to head->qlen instead of using direct assignment. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 027b06170b40..4a144e8d0538 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -738,7 +738,7 @@ static inline void skb_queue_splice(const struct sk_buff_head *list, { if (!skb_queue_empty(list)) { __skb_queue_splice(list, (struct sk_buff *) head, head->next); - head->qlen = list->qlen; + head->qlen += list->qlen; } } @@ -754,7 +754,7 @@ static inline void skb_queue_splice_init(struct sk_buff_head *list, { if (!skb_queue_empty(list)) { __skb_queue_splice(list, (struct sk_buff *) head, head->next); - head->qlen = list->qlen; + head->qlen += list->qlen; __skb_queue_head_init(list); } } @@ -769,7 +769,7 @@ static inline void skb_queue_splice_tail(const struct sk_buff_head *list, { if (!skb_queue_empty(list)) { __skb_queue_splice(list, head->prev, (struct sk_buff *) head); - head->qlen = list->qlen; + head->qlen += list->qlen; } } @@ -786,7 +786,7 @@ static inline void skb_queue_splice_tail_init(struct sk_buff_head *list, { if (!skb_queue_empty(list)) { __skb_queue_splice(list, head->prev, (struct sk_buff *) head); - head->qlen = list->qlen; + head->qlen += list->qlen; __skb_queue_head_init(list); } } -- cgit v1.2.3 From 3d09274cc9d816d62945408840a9cb76a5e7aac7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 22:14:36 -0700 Subject: sctp: Use skb_queue_walk_safe() and skb_queue_split_tail_init(). Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 17b932b8a55a..703305d00365 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -406,10 +406,7 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id); /* A macro to walk a list of skbs. */ #define sctp_skb_for_each(pos, head, tmp) \ -for (pos = (head)->next;\ - tmp = (pos)->next, pos != ((struct sk_buff *)(head));\ - pos = tmp) - + skb_queue_walk_safe(head, pos, tmp) /* A helper to append an entire skb list (list) to another (head). */ static inline void sctp_skb_list_tail(struct sk_buff_head *list, @@ -420,10 +417,7 @@ static inline void sctp_skb_list_tail(struct sk_buff_head *list, sctp_spin_lock_irqsave(&head->lock, flags); sctp_spin_lock(&list->lock); - list_splice((struct list_head *)list, (struct list_head *)head->prev); - - head->qlen += list->qlen; - list->qlen = 0; + skb_queue_splice_tail_init(list, head); sctp_spin_unlock(&list->lock); sctp_spin_unlock_irqrestore(&head->lock, flags); -- cgit v1.2.3 From 242f8bfefe4bed626df4e4727ac8f315d80b567a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 22:15:30 -0700 Subject: pkt_sched: Make qdisc->gso_skb a list. The idea is that we can use this to get rid of ->requeue(). Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e5569625d2a5..3b983e8a0555 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -52,7 +52,7 @@ struct Qdisc u32 parent; atomic_t refcnt; unsigned long state; - struct sk_buff *gso_skb; + struct sk_buff_head requeue; struct sk_buff_head q; struct netdev_queue *dev_queue; struct Qdisc *next_sched; -- cgit v1.2.3 From fc7ebb212d3e51d1188948d975aa93dbb0f58b25 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:34:07 -0700 Subject: net: Add skb_queue_is_last(). Several bits of code want to know "is this the last SKB in a queue", and all of them implement this by hand. Provide an common interface to make this check. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4a144e8d0538..3a5838da160e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -472,6 +472,19 @@ static inline int skb_queue_empty(const struct sk_buff_head *list) return list->next == (struct sk_buff *)list; } +/** + * skb_queue_is_last - check if skb is the last entry in the queue + * @list: queue head + * @skb: buffer + * + * Returns true if @skb is the last buffer on the list. + */ +static inline bool skb_queue_is_last(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + return (skb->next == (struct sk_buff *) list); +} + /** * skb_get - reference buffer * @skb: buffer to reference -- cgit v1.2.3 From d258b4914bcda9177bcc7bbd8e1a97b281b460af Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:34:37 -0700 Subject: tcp: Use skb_queue_is_last() instead of by-hand version. Signed-off-by: David S. Miller --- include/net/tcp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index f857c3eff710..5c5327e9a557 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1284,10 +1284,10 @@ static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) __skb_unlink(skb, &sk->sk_write_queue); } -static inline int tcp_skb_is_last(const struct sock *sk, - const struct sk_buff *skb) +static inline bool tcp_skb_is_last(const struct sock *sk, + const struct sk_buff *skb) { - return skb->next == (struct sk_buff *)&sk->sk_write_queue; + return skb_queue_is_last(&sk->sk_write_queue, skb); } static inline int tcp_write_queue_empty(struct sock *sk) -- cgit v1.2.3 From 249c8b42c7e5e6f33d0ff983041f08278b137e53 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:44:42 -0700 Subject: net: Add skb_queue_next(). A lot of code wants to iterate over an SKB queue at the top level using it's own control structure and iterator scheme. Provide skb_queue_next(), which is only valid to invoke if skb_queue_is_last() returns false. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a5838da160e..d2f1778877d7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -485,6 +485,24 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, return (skb->next == (struct sk_buff *) list); } +/** + * skb_queue_next - return the next packet in the queue + * @list: queue head + * @skb: current buffer + * + * Return the next packet in @list after @skb. It is only valid to + * call this if skb_queue_is_last() evaluates to false. + */ +static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + /* This BUG_ON may seem severe, but if we just return then we + * are going to dereference garbage. + */ + BUG_ON(skb_queue_is_last(list, skb)); + return skb->next; +} + /** * skb_get - reference buffer * @skb: buffer to reference -- cgit v1.2.3 From 1164f52a244204830c7625b3c22812781996d7b4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:49:44 -0700 Subject: net: Add skb_queue_walk_from() and skb_queue_walk_from_safe(). These will be used by TCP write queue handling and elsewhere. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d2f1778877d7..a19ea43fea02 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1571,6 +1571,15 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) skb != (struct sk_buff *)(queue); \ skb = tmp, tmp = skb->next) +#define skb_queue_walk_from(queue, skb) \ + for (; prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ + skb = skb->next) + +#define skb_queue_walk_from_safe(queue, skb, tmp) \ + for (tmp = skb->next; \ + skb != (struct sk_buff *)(queue); \ + skb = tmp, tmp = skb->next) + #define skb_queue_reverse_walk(queue, skb) \ for (skb = (queue)->prev; \ prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ -- cgit v1.2.3 From cd07a8ea0dd4b204919b4c9ced8d9efdd9924495 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:50:13 -0700 Subject: tcp: Use SKB queue handling interfaces instead of by-hand versions. Signed-off-by: David S. Miller --- include/net/tcp.h | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 5c5327e9a557..12c9b4fec040 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1181,49 +1181,45 @@ static inline void tcp_write_queue_purge(struct sock *sk) static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) { - struct sk_buff *skb = sk->sk_write_queue.next; - if (skb == (struct sk_buff *) &sk->sk_write_queue) - return NULL; - return skb; + return skb_peek(&sk->sk_write_queue); } static inline struct sk_buff *tcp_write_queue_tail(struct sock *sk) { - struct sk_buff *skb = sk->sk_write_queue.prev; - if (skb == (struct sk_buff *) &sk->sk_write_queue) - return NULL; - return skb; + return skb_peek_tail(&sk->sk_write_queue); } static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_buff *skb) { - return skb->next; + return skb_queue_next(&sk->sk_write_queue, skb); } #define tcp_for_write_queue(skb, sk) \ - for (skb = (sk)->sk_write_queue.next; \ - (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ - skb = skb->next) + skb_queue_walk(&(sk)->sk_write_queue, skb) #define tcp_for_write_queue_from(skb, sk) \ - for (; (skb != (struct sk_buff *)&(sk)->sk_write_queue);\ - skb = skb->next) + skb_queue_walk_from(&(sk)->sk_write_queue, skb) #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ - for (tmp = skb->next; \ - (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ - skb = tmp, tmp = skb->next) + skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) static inline struct sk_buff *tcp_send_head(struct sock *sk) { return sk->sk_send_head; } +static inline bool tcp_skb_is_last(const struct sock *sk, + const struct sk_buff *skb) +{ + return skb_queue_is_last(&sk->sk_write_queue, skb); +} + static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb) { - sk->sk_send_head = skb->next; - if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) + if (tcp_skb_is_last(sk, skb)) sk->sk_send_head = NULL; + else + sk->sk_send_head = tcp_write_queue_next(sk, skb); } static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) @@ -1284,12 +1280,6 @@ static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) __skb_unlink(skb, &sk->sk_write_queue); } -static inline bool tcp_skb_is_last(const struct sock *sk, - const struct sk_buff *skb) -{ - return skb_queue_is_last(&sk->sk_write_queue, skb); -} - static inline int tcp_write_queue_empty(struct sock *sk) { return skb_queue_empty(&sk->sk_write_queue); -- cgit v1.2.3 From f4ab543201992fe499bef5c406e09f23aa97b4d5 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 23 Sep 2008 01:05:56 -0700 Subject: pkt_sched: Remove the tx queue state check in qdisc_run() The current check wrongly uses the state of one (currently the first) tx queue for all tx queues in case of non-default qdiscs. This check mainly prevented requeuing loop with __netif_schedule(), but now it's controlled inside __qdisc_run(), while dequeuing. The wrongness of this check was first noticed by Herbert Xu. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index b786a5b09253..4082f39f5079 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -90,10 +90,7 @@ extern void __qdisc_run(struct Qdisc *q); static inline void qdisc_run(struct Qdisc *q) { - struct netdev_queue *txq = q->dev_queue; - - if (!netif_tx_queue_stopped(txq) && - !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) + if (!test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) __qdisc_run(q); } -- cgit v1.2.3 From e07aa3783e9f66b03d72e7afd9f709d7f7059662 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Sep 2008 13:11:19 +0200 Subject: cfg80211: fix code ordering in header file Luis added the regulatory hint stuff to this file without observing that __ieee80211_get_channel and ieee80211_get_channel really belong together. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/wireless.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/wireless.h b/include/net/wireless.h index e4378cc6bf8e..0de7fde9f336 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -328,6 +328,15 @@ extern int ieee80211_frequency_to_channel(int freq); */ extern struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy, int freq); +/** + * ieee80211_get_channel - get channel struct from wiphy for specified frequency + */ +static inline struct ieee80211_channel * +ieee80211_get_channel(struct wiphy *wiphy, int freq) +{ + return __ieee80211_get_channel(wiphy, freq); +} + /** * __regulatory_hint - hint to the wireless core a regulatory domain * @wiphy: if a driver is providing the hint this is the driver's very @@ -380,13 +389,4 @@ extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, */ extern int regulatory_hint(struct wiphy *wiphy, const char *alpha2, struct ieee80211_regdomain *rd); - -/** - * ieee80211_get_channel - get channel struct from wiphy for specified frequency - */ -static inline struct ieee80211_channel * -ieee80211_get_channel(struct wiphy *wiphy, int freq) -{ - return __ieee80211_get_channel(wiphy, freq); -} #endif /* __NET_WIRELESS_H */ -- cgit v1.2.3 From 60719ffd721f6764b7d07ca188c0d944a4330b69 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 16 Sep 2008 14:55:09 +0200 Subject: cfg80211: show interface type This patch makes cfg80211 show the interface in the nl80211 information about a specific interface. API users are required to keep the type updated (everything else is fairly complicated) but you will get a warning if you fail to keep it updated. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 6 ++++-- include/net/wireless.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9f40c4d417d7..0e85ec39b638 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -363,11 +363,13 @@ struct wiphy; * wireless extensions but this is subject to reevaluation as soon as this * code is used more widely and we have a first user without wext. * - * @add_virtual_intf: create a new virtual interface with the given name + * @add_virtual_intf: create a new virtual interface with the given name, + * must set the struct wireless_dev's iftype. * * @del_virtual_intf: remove the virtual interface determined by ifindex. * - * @change_virtual_intf: change type of virtual interface + * @change_virtual_intf: change type/configuration of virtual interface, + * keep the struct wireless_dev's iftype updated. * * @add_key: add a key with the given parameters. @mac_addr will be %NULL * when adding a group key. diff --git a/include/net/wireless.h b/include/net/wireless.h index 0de7fde9f336..721efb363db7 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -223,9 +223,11 @@ struct wiphy { * the netdev.) * * @wiphy: pointer to hardware description + * @iftype: interface type */ struct wireless_dev { struct wiphy *wiphy; + enum nl80211_iftype iftype; /* private to the generic wireless code */ struct list_head list; -- cgit v1.2.3 From 79617deeebb9cf089e2bc2aad19743b1209043f6 Mon Sep 17 00:00:00 2001 From: YanBo Date: Mon, 22 Sep 2008 13:30:32 +0800 Subject: mac80211: mesh portal functionality support Currently the mesh code doesn't support bridging mesh point interfaces with wired ethernet or AP to construct an MPP or MAP. This patch adds code to support the "6 address frame format packet" functionality to mesh point interfaces. Now the mesh network can be used as backhaul for end to end communication. Signed-off-by: Li YanBo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index abc1abc63bf0..14126bc36641 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -471,6 +471,11 @@ struct ieee80211s_hdr { u8 eaddr3[6]; } __attribute__ ((packed)); +/* Mesh flags */ +#define MESH_FLAGS_AE_A4 0x1 +#define MESH_FLAGS_AE_A5_A6 0x2 +#define MESH_FLAGS_PS_DEEP 0x4 + /** * struct ieee80211_quiet_ie * -- cgit v1.2.3 From 4b7679a561e552eeda1e3567119bef2bca99b66e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Sep 2008 18:14:18 +0200 Subject: mac80211: clean up rate control API Long awaited, hard work. This patch totally cleans up the rate control API to remove the requirement to include internal headers outside of net/mac80211/. There's one internal use in the PID algorithm left for mesh networking, we'll have to figure out a way to clean that one up and decide how to do the peer link evaluation, possibly independent of the rate control algorithm or via new API. Additionally, ath9k is left using the cross-inclusion hack for now, we will add new API where necessary to make this work properly, but right now I'm not expert enough to do it. It's still off better than before. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 003e4a03874e..f5f5b1ff1584 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1800,4 +1800,72 @@ void ieee80211_notify_mac(struct ieee80211_hw *hw, struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, const u8 *addr); + +/* Rate control API */ +/** + * struct rate_selection - rate information for/from rate control algorithms + * + * @rate_idx: selected transmission rate index + * @nonerp_idx: Non-ERP rate to use instead if ERP cannot be used + * @probe_idx: rate for probing (or -1) + * @max_rate_idx: maximum rate index that can be used, this is + * input to the algorithm and will be enforced + */ +struct rate_selection { + s8 rate_idx, nonerp_idx, probe_idx, max_rate_idx; +}; + +struct rate_control_ops { + struct module *module; + const char *name; + void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir); + void (*clear)(void *priv); + void (*free)(void *priv); + + void *(*alloc_sta)(void *priv, struct ieee80211_sta *sta, gfp_t gfp); + void (*rate_init)(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta); + void (*free_sta)(void *priv, struct ieee80211_sta *sta, + void *priv_sta); + + void (*tx_status)(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb); + void (*get_rate)(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb, + struct rate_selection *sel); + + void (*add_sta_debugfs)(void *priv, void *priv_sta, + struct dentry *dir); + void (*remove_sta_debugfs)(void *priv, void *priv_sta); +}; + +static inline int rate_supported(struct ieee80211_sta *sta, + enum ieee80211_band band, + int index) +{ + return (sta == NULL || sta->supp_rates[band] & BIT(index)); +} + +static inline s8 +rate_lowest_index(struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta) +{ + int i; + + for (i = 0; i < sband->n_bitrates; i++) + if (rate_supported(sta, sband->band, i)) + return i; + + /* warn when we cannot find a rate. */ + WARN_ON(1); + + return 0; +} + + +int ieee80211_rate_control_register(struct rate_control_ops *ops); +void ieee80211_rate_control_unregister(struct rate_control_ops *ops); + #endif /* MAC80211_H */ -- cgit v1.2.3 From 040dec3b37e4b9ec15b359bf5744f1ceba39fe3e Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Fri, 12 Sep 2008 06:55:14 -0700 Subject: netxen: add pci ids Define old and new pci vendor and device ids. Signed-off-by: Dhananjay Phadke Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6f4276d461c0..a65b082a888a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2247,6 +2247,16 @@ #define PCI_DEVICE_ID_3DLABS_PERMEDIA2 0x0007 #define PCI_DEVICE_ID_3DLABS_PERMEDIA2V 0x0009 +#define PCI_VENDOR_ID_NETXEN 0x4040 +#define PCI_DEVICE_ID_NX2031_10GXSR 0x0001 +#define PCI_DEVICE_ID_NX2031_10GCX4 0x0002 +#define PCI_DEVICE_ID_NX2031_4GCU 0x0003 +#define PCI_DEVICE_ID_NX2031_IMEZ 0x0004 +#define PCI_DEVICE_ID_NX2031_HMEZ 0x0005 +#define PCI_DEVICE_ID_NX2031_XG_MGMT 0x0024 +#define PCI_DEVICE_ID_NX2031_XG_MGMT2 0x0025 +#define PCI_DEVICE_ID_NX3031 0x0100 + #define PCI_VENDOR_ID_AKS 0x416c #define PCI_DEVICE_ID_AKS_ALADDINCARD 0x0100 -- cgit v1.2.3 From cf04a4c764cd3e651a64b3e667bb6a673ead99e1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 30 Sep 2008 02:22:14 -0700 Subject: netdev: use const for some name functions dev_change_name and netdev_drivername should use const char on parameters that are read-only input values. The strcpy to newname is not needed since newname is not used later in function. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d675df08b946..9cfd20be8b7f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1225,7 +1225,7 @@ extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); extern int dev_change_flags(struct net_device *, unsigned); -extern int dev_change_name(struct net_device *, char *); +extern int dev_change_name(struct net_device *, const char *); extern int dev_set_alias(struct net_device *, const char *, size_t); extern int dev_change_net_namespace(struct net_device *, struct net *, const char *); @@ -1670,7 +1670,7 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern int netdev_class_create_file(struct class_attribute *class_attr); extern void netdev_class_remove_file(struct class_attribute *class_attr); -extern char *netdev_drivername(struct net_device *dev, char *buffer, int len); +extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len); extern void linkwatch_run_queue(void); -- cgit v1.2.3 From a57334e95e4fb132acca05bdc0efa2f9dda194af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 30 Sep 2008 02:53:18 -0700 Subject: Phonet: declare headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/Kbuild | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index b68ec09399be..f431e40725d6 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -126,6 +126,7 @@ header-y += pci_regs.h header-y += pfkeyv2.h header-y += pg.h header-y += phantom.h +header-y += phonet.h header-y += pkt_cls.h header-y += pkt_sched.h header-y += posix_types.h @@ -232,6 +233,7 @@ unifdef-y += if_fddi.h unifdef-y += if_frad.h unifdef-y += if_ltalk.h unifdef-y += if_link.h +unifdef-y += if_phonet.h unifdef-y += if_pppol2tp.h unifdef-y += if_pppox.h unifdef-y += if_tr.h -- cgit v1.2.3 From 55ad175fb65a4a3a7e4d1aa13c460de281b4e8ac Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 29 Sep 2008 16:28:21 -0400 Subject: ieee80211.h: remove superfluous ETH_P_PAE definition Signed-off-by: John W. Linville --- include/net/ieee80211.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index b31399e1fd83..6048579d0b24 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -190,10 +190,6 @@ const char *escape_essid(const char *essid, u8 essid_len); #endif #include /* new driver API */ -#ifndef ETH_P_PAE -#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ -#endif /* ETH_P_PAE */ - #define ETH_P_PREAUTH 0x88C7 /* IEEE 802.11i pre-authentication */ #ifndef ETH_P_80211_RAW -- cgit v1.2.3 From 6e50e8a2136f1a90de251c653226ded447c5c915 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Wed, 1 Oct 2008 01:30:19 -0700 Subject: phonet: Protect if_phonet.h against multiple inclusions. From: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_phonet.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h index 7e989216ec17..d70034bcec05 100644 --- a/include/linux/if_phonet.h +++ b/include/linux/if_phonet.h @@ -5,14 +5,15 @@ * * Copyright (C) 2008 Nokia Corporation. All rights reserved. */ +#ifndef LINUX_IF_PHONET_H +#define LINUX_IF_PHONET_H -#define PHONET_HEADER_LEN 8 /* Phonet header length */ - -#define PHONET_MIN_MTU 6 -/* 6 bytes header + 65535 bytes payload */ -#define PHONET_MAX_MTU 65541 +#define PHONET_MIN_MTU 6 /* pn_length = 0 */ +#define PHONET_MAX_MTU 65541 /* pn_length = 0xffff */ #define PHONET_DEV_MTU PHONET_MAX_MTU #ifdef __KERNEL__ extern struct header_ops phonet_header_ops; #endif + +#endif -- cgit v1.2.3 From 04a4bb55bcf35b63d40fd2725e58599ff8310dd7 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 1 Oct 2008 02:33:12 -0700 Subject: net: add skb_recycle_check() to enable netdriver skb recycling This patch adds skb_recycle_check(), which can be used by a network driver after transmitting an skb to check whether this skb can be recycled as a receive buffer. skb_recycle_check() checks that the skb is not shared or cloned, and that it is linear and its head portion large enough (as determined by the driver) to be recycled as a receive buffer. If these conditions are met, it does any necessary reference count dropping and cleans up the skbuff as if it just came from __alloc_skb(). Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a19ea43fea02..720b688c22b6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -383,6 +383,8 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, 1, -1); } +extern int skb_recycle_check(struct sk_buff *skb, int skb_size); + extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); -- cgit v1.2.3 From 93c8b90f01f0dc73891da4e84b26524b61d29d66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 1 Oct 2008 02:48:31 -0700 Subject: ipv6: almost identical frag hashing funcs combined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit $ diff-funcs ip6qhashfn reassembly.c netfilter/nf_conntrack_reasm.c --- reassembly.c:ip6qhashfn() +++ netfilter/nf_conntrack_reasm.c:ip6qhashfn() @@ -1,5 +1,5 @@ -static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, - struct in6_addr *daddr) +static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr) { u32 a, b, c; @@ -9,7 +9,7 @@ a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += ip6_frags.rnd; + c += nf_frags.rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; And codiff xx.o.old xx.o.new: net/ipv6/netfilter/nf_conntrack_reasm.c: ip6qhashfn | -512 nf_hashfn | +6 nf_ct_frag6_gather | +36 3 functions changed, 42 bytes added, 512 bytes removed, diff: -470 net/ipv6/reassembly.c: ip6qhashfn | -512 ip6_hashfn | +7 ipv6_frag_rcv | +89 3 functions changed, 96 bytes added, 512 bytes removed, diff: -416 net/ipv6/reassembly.c: inet6_hash_frag | +510 1 function changed, 510 bytes added, diff: +510 Total: -376 Compile tested. Signed-off-by: Ilpo Järvinen Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 113028fb8f66..dfa7ae3c5607 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -576,6 +576,8 @@ extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, struct group_filter __user *optval, int __user *optlen); +extern unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr, u32 rnd); #ifdef CONFIG_PROC_FS extern int ac6_proc_init(struct net *net); -- cgit v1.2.3 From 12a169e7d8f4b1c95252d8b04ed0f1033ed7cfe2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 1 Oct 2008 07:03:24 -0700 Subject: ipsec: Put dumpers on the dump list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Herbert Xu came up with the idea and the original patch to make xfrm_state dump list contain also dumpers: As it is we go to extraordinary lengths to ensure that states don't go away while dumpers go to sleep. It's much easier if we just put the dumpers themselves on the list since they can't go away while they're going. I've also changed the order of addition on new states to prevent a never-ending dump. Timo Teräs improved the patch to apply cleanly to latest tree, modified iteration code to be more readable by using a common struct for entries in the list, implemented the same idea for xfrm_policy dumping and moved the af_key specific "last" entry caching to af_key. Signed-off-by: Herbert Xu Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- include/net/xfrm.h | 70 ++++++++++++++++++++----------------------------- 2 files changed, 29 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index cbba7760545b..9ff1b54908f3 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -220,7 +220,7 @@ struct netlink_callback int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); int family; - long args[7]; + long args[6]; }; struct netlink_notify diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 48630b266593..b98d2056f27f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -117,12 +117,21 @@ extern struct mutex xfrm_cfg_mutex; metrics. Plus, it will be made via sk->sk_dst_cache. Solved. */ +struct xfrm_state_walk { + struct list_head all; + u8 state; + union { + u8 dying; + u8 proto; + }; + u32 seq; +}; + /* Full description of state of transformer. */ struct xfrm_state { - struct list_head all; union { - struct list_head gclist; + struct hlist_node gclist; struct hlist_node bydst; }; struct hlist_node bysrc; @@ -136,12 +145,8 @@ struct xfrm_state u32 genid; - /* Key manger bits */ - struct { - u8 state; - u8 dying; - u32 seq; - } km; + /* Key manager bits */ + struct xfrm_state_walk km; /* Parameters of this state. */ struct { @@ -449,10 +454,20 @@ struct xfrm_tmpl #define XFRM_MAX_DEPTH 6 +struct xfrm_policy_walk_entry { + struct list_head all; + u8 dead; +}; + +struct xfrm_policy_walk { + struct xfrm_policy_walk_entry walk; + u8 type; + u32 seq; +}; + struct xfrm_policy { struct xfrm_policy *next; - struct list_head bytype; struct hlist_node bydst; struct hlist_node byidx; @@ -467,13 +482,12 @@ struct xfrm_policy struct xfrm_lifetime_cfg lft; struct xfrm_lifetime_cur curlft; struct dst_entry *bundles; - u16 family; + struct xfrm_policy_walk_entry walk; u8 type; u8 action; u8 flags; - u8 dead; u8 xfrm_nr; - /* XXX 1 byte hole, try to pack */ + u16 family; struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; }; @@ -1245,20 +1259,6 @@ struct xfrm6_tunnel { int priority; }; -struct xfrm_state_walk { - struct list_head list; - unsigned long genid; - struct xfrm_state *state; - int count; - u8 proto; -}; - -struct xfrm_policy_walk { - struct xfrm_policy *policy; - int count; - u8 type, cur_type; -}; - extern void xfrm_init(void); extern void xfrm4_init(void); extern void xfrm_state_init(void); @@ -1410,24 +1410,10 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); -static inline void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) -{ - walk->cur_type = XFRM_POLICY_TYPE_MAIN; - walk->type = type; - walk->policy = NULL; - walk->count = 0; -} - -static inline void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) -{ - if (walk->policy != NULL) { - xfrm_pol_put(walk->policy); - walk->policy = NULL; - } -} - +extern void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); extern int xfrm_policy_walk(struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *); +extern void xfrm_policy_walk_done(struct xfrm_policy_walk *walk); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_selector *sel, -- cgit v1.2.3 From a210d01ae3ee006b59e54e772a7f212486e0f021 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 1 Oct 2008 07:28:28 -0700 Subject: ipv4: Loosen source address check on IPv4 output ip_route_output() contains a check to make sure that no flows with non-local source IP addresses are routed. This obviously makes using such addresses impossible. This patch introduces a flowi flag which makes omitting this check possible. The new flag provides a way of handling transparent and non-transparent connections differently. Signed-off-by: Julian Anastasov Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/flow.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 228b2477ceec..b45a5e4fcadd 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -47,6 +47,8 @@ struct flowi { #define fl4_scope nl_u.ip4_u.scope __u8 proto; + __u8 flags; +#define FLOWI_FLAG_ANYSRC 0x01 union { struct { __be16 sport; -- cgit v1.2.3 From f5715aea4564f233767ea1d944b2637a5fd7cd2e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:30:02 -0700 Subject: ipv4: Implement IP_TRANSPARENT socket option This patch introduces the IP_TRANSPARENT socket option: enabling that will make the IPv4 routing omit the non-local source address check on output. Setting IP_TRANSPARENT requires NET_ADMIN capability. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/linux/in.h | 1 + include/net/inet_sock.h | 3 ++- include/net/inet_timewait_sock.h | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/in.h b/include/linux/in.h index 4065313cd7ee..db458beef19d 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -75,6 +75,7 @@ struct in_addr { #define IP_IPSEC_POLICY 16 #define IP_XFRM_POLICY 17 #define IP_PASSSEC 18 +#define IP_TRANSPARENT 19 /* BSD compatibility */ #define IP_RECVRETOPTS IP_RETOPTS diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 643e26be058e..e97b66e2a9d0 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -129,7 +129,8 @@ struct inet_sock { is_icsk:1, freebind:1, hdrincl:1, - mc_loop:1; + mc_loop:1, + transparent:1; int mc_index; __be32 mc_addr; struct ip_mc_socklist *mc_list; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 91324908fccd..80e4977631b8 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -128,7 +128,8 @@ struct inet_timewait_sock { __be16 tw_dport; __u16 tw_num; /* And these are ours. */ - __u8 tw_ipv6only:1; + __u8 tw_ipv6only:1, + tw_transparent:1; /* 15 bits hole, try to pack */ __u16 tw_ipv6_offset; unsigned long tw_ttd; -- cgit v1.2.3 From 1668e010cbe1a7567c81d4c02d31dde9859e9da1 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:33:10 -0700 Subject: ipv4: Make inet_sock.h independent of route.h inet_iif() in inet_sock.h requires route.h. Since users of inet_iif() usually require other route.h functionality anyway this patch moves inet_iif() to route.h. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/inet_sock.h | 7 ------- include/net/ip_vs.h | 1 + include/net/route.h | 5 +++++ 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index e97b66e2a9d0..139b78b4dfeb 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -24,7 +24,6 @@ #include #include #include -#include #include /** struct ip_options - IP Options @@ -195,12 +194,6 @@ static inline int inet_sk_ehashfn(const struct sock *sk) return inet_ehashfn(net, laddr, lport, faddr, fport); } - -static inline int inet_iif(const struct sk_buff *skb) -{ - return skb->rtable->rt_iif; -} - static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops) { struct request_sock *req = reqsk_alloc(ops); diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 33e2ac6ceb3e..0b2071d9326d 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -22,6 +22,7 @@ #include #include /* for union nf_inet_addr */ +#include #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ diff --git a/include/net/route.h b/include/net/route.h index 4f0d8c14736c..31d1485b624d 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -204,4 +204,9 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) return rt->peer; } +static inline int inet_iif(const struct sk_buff *skb) +{ + return skb->rtable->rt_iif; +} + #endif /* _ROUTE_H */ -- cgit v1.2.3 From 79876874ce20d37ecdc7f481ebf142466999152f Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:35:39 -0700 Subject: ipv4: Conditionally enable transparent flow flag when connecting Set FLOWI_FLAG_ANYSRC in flowi->flags if the socket has the transparent socket option set. This way we selectively enable certain connections with non-local source addresses to be routed. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/route.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 31d1485b624d..4e8cae0e5841 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -161,6 +161,10 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst, int err; struct net *net = sock_net(sk); + + if (inet_sk(sk)->transparent) + fl.flags |= FLOWI_FLAG_ANYSRC; + if (!dst || !src) { err = __ip_route_output_key(net, rp, &fl); if (err) -- cgit v1.2.3 From 88ef4a5a78e63420dd1dd770f1bd1dc198926b04 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:41:00 -0700 Subject: tcp: Handle TCP SYN+ACK/ACK/RST transparency The TCP stack sends out SYN+ACK/ACK/RST reply packets in response to incoming packets. The non-local source address check on output bites us again, as replies for transparently redirected traffic won't have a chance to leave the node. This patch selectively sets the FLOWI_FLAG_ANYSRC flag when doing the route lookup for those replies. Transparent replies are enabled if the listening socket has the transparent socket flag set. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/inet_sock.h | 8 +++++++- include/net/ip.h | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 139b78b4dfeb..dced3f64f975 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -72,7 +72,8 @@ struct inet_request_sock { sack_ok : 1, wscale_ok : 1, ecn_ok : 1, - acked : 1; + acked : 1, + no_srccheck: 1; struct ip_options *opt; }; @@ -204,4 +205,9 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops return req; } +static inline __u8 inet_sk_flowi_flags(const struct sock *sk) +{ + return inet_sk(sk)->transparent ? FLOWI_FLAG_ANYSRC : 0; +} + #endif /* _INET_SOCK_H */ diff --git a/include/net/ip.h b/include/net/ip.h index 250e6ef025a4..90b27f634b76 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -140,12 +140,15 @@ static inline void ip_tr_mc_map(__be32 addr, char *buf) struct ip_reply_arg { struct kvec iov[1]; + int flags; __wsum csum; int csumoffset; /* u16 offset of csum in iov[0].iov_base */ /* -1 if not needed */ int bound_dev_if; }; +#define IP_REPLY_ARG_NOSRCCHECK 1 + void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); -- cgit v1.2.3 From 86b08d867d7de001ab224180ed7865fab93fd56e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:44:42 -0700 Subject: ipv4: Make Netfilter's ip_route_me_harder() non-local address compatible Netfilter's ip_route_me_harder() tries to re-route packets either generated or re-routed by Netfilter. This patch changes ip_route_me_harder() to handle packets from non-locally-bound sockets with IP_TRANSPARENT set as local and to set the appropriate flowi flags when re-doing the routing lookup. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/ip.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 90b27f634b76..d678ea3d474a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -29,6 +29,7 @@ #include #include +#include struct sock; @@ -149,6 +150,11 @@ struct ip_reply_arg { #define IP_REPLY_ARG_NOSRCCHECK 1 +static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) +{ + return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; +} + void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); -- cgit v1.2.3 From a3116ac5c216fc3c145906a46df9ce542ff7dcf2 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:46:49 -0700 Subject: tcp: Port redirection support for TCP Current TCP code relies on the local port of the listening socket being the same as the destination address of the incoming connection. Port redirection used by many transparent proxying techniques obviously breaks this, so we have to store the original destination port address. This patch extends struct inet_request_sock and stores the incoming destination port value there. It also modifies the handshake code to use that value as the source port when sending reply packets. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/inet_sock.h | 2 +- include/net/tcp.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index dced3f64f975..de0ecc71cf03 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -61,8 +61,8 @@ struct inet_request_sock { struct request_sock req; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) u16 inet6_rsk_offset; - /* 2 bytes hole, try to pack */ #endif + __be16 loc_port; __be32 loc_addr; __be32 rmt_addr; __be16 rmt_port; diff --git a/include/net/tcp.h b/include/net/tcp.h index 12c9b4fec040..f6cc34143154 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -976,6 +976,7 @@ static inline void tcp_openreq_init(struct request_sock *req, ireq->acked = 0; ireq->ecn_ok = 0; ireq->rmt_port = tcp_hdr(skb)->source; + ireq->loc_port = tcp_hdr(skb)->dest; } extern void tcp_enter_memory_pressure(struct sock *sk); -- cgit v1.2.3 From bcd41303f422015ab662c9276d108414aa75b796 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:48:10 -0700 Subject: udp: Export UDP socket lookup function The iptables tproxy code has to be able to do UDP socket hash lookups, so we have to provide an exported lookup function for this purpose. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/udp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index addcdc67234c..d38f6f2419f9 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -148,6 +148,10 @@ extern int udp_lib_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen, int (*push_pending_frames)(struct sock *)); +extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, + int dif); + DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); /* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */ -- cgit v1.2.3 From c226ef9b83694311327f3ab0036c6de9c22e9daf Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 25 Jul 2008 12:44:09 -0400 Subject: sctp: reduce memory footprint of sctp_chunk structure sctp_chunks should be put on a diet. This is some of the low hanging fruit that we can strip out. Changes all the __s8/__u8 flags to bitfields. Saves 12 bytes per chunk. Signed-off-by: Neil Horman Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ab1c472ea753..0dc1b7267c37 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -731,20 +731,23 @@ struct sctp_chunk { */ struct sk_buff *auth_chunk; - __u8 rtt_in_progress; /* Is this chunk used for RTT calculation? */ - __u8 resent; /* Has this chunk ever been retransmitted. */ - __u8 has_tsn; /* Does this chunk have a TSN yet? */ - __u8 has_ssn; /* Does this chunk have a SSN yet? */ - __u8 singleton; /* Was this the only chunk in the packet? */ - __u8 end_of_packet; /* Was this the last chunk in the packet? */ - __u8 ecn_ce_done; /* Have we processed the ECN CE bit? */ - __u8 pdiscard; /* Discard the whole packet now? */ - __u8 tsn_gap_acked; /* Is this chunk acked by a GAP ACK? */ - __s8 fast_retransmit; /* Is this chunk fast retransmitted? */ - __u8 tsn_missing_report; /* Data chunk missing counter. */ - __u8 data_accepted; /* At least 1 chunk in this packet accepted */ - __u8 auth; /* IN: was auth'ed | OUT: needs auth */ - __u8 has_asconf; /* IN: have seen an asconf before */ +#define SCTP_CAN_FRTX 0x0 +#define SCTP_NEED_FRTX 0x1 +#define SCTP_DONT_FRTX 0x2 + __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ + resent:1, /* Has this chunk ever been resent. */ + has_tsn:1, /* Does this chunk have a TSN yet? */ + has_ssn:1, /* Does this chunk have a SSN yet? */ + singleton:1, /* Only chunk in the packet? */ + end_of_packet:1, /* Last chunk in the packet? */ + ecn_ce_done:1, /* Have we processed the ECN CE bit? */ + pdiscard:1, /* Discard the whole packet now? */ + tsn_gap_acked:1, /* Is this chunk acked by a GAP ACK? */ + data_accepted:1, /* At least 1 chunk accepted */ + auth:1, /* IN: was auth'ed | OUT: needs auth */ + has_asconf:1, /* IN: have seen an asconf before */ + tsn_missing_report:2, /* Data chunk missing counter. */ + fast_retransmit:2; /* Is this chunk fast retransmitted? */ }; void sctp_chunk_hold(struct sctp_chunk *); -- cgit v1.2.3 From 52cae8f06babf9eed327479c1aa024ce3732f912 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 18 Aug 2008 10:34:34 -0400 Subject: sctp: try harder to figure out address family when checking wildcards sctp_is_any() function that is used to check for wildcard addresses only looks at the address itself to determine the address family. This function is used in the API to check the address passed in from the user. If the user simply zerroes out the sockaddr_storage and pass that in, we'll end up failing. So, let's try harder to determine the address family by also checking the socket if it's possible. Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0dc1b7267c37..94c62e4ddea9 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1228,7 +1228,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len, sctp_scope_t sctp_scope(const union sctp_addr *); int sctp_in_scope(const union sctp_addr *addr, const sctp_scope_t scope); -int sctp_is_any(const union sctp_addr *addr); +int sctp_is_any(struct sock *sk, const union sctp_addr *addr); int sctp_addr_is_valid(const union sctp_addr *addr); -- cgit v1.2.3 From 9995a32b4d14dcda2f8df58030526bee91114c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:14:48 -0700 Subject: Phonet: connected sockets glue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 include/net/phonet/pep.h (limited to 'include') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h new file mode 100644 index 000000000000..b2f8c54c5333 --- /dev/null +++ b/include/net/phonet/pep.h @@ -0,0 +1,43 @@ +/* + * File: pep.h + * + * Phonet Pipe End Point sockets definitions + * + * Copyright (C) 2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef NET_PHONET_PEP_H +#define NET_PHONET_PEP_H + +struct pep_sock { + struct pn_sock pn_sk; + + /* Listening socket stuff: */ + struct hlist_head ackq; + + /* Connected socket stuff: */ + u8 tx_credits; +}; + +static inline struct pep_sock *pep_sk(struct sock *sk) +{ + return (struct pep_sock *)sk; +} + +extern const struct proto_ops phonet_stream_ops; + +#endif -- cgit v1.2.3 From 9641458d3ec42def729fde64669abf07f3220cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:15:13 -0700 Subject: Phonet: Pipe End Point for Phonet Pipes protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This protocol provides some connection handling and negotiated congestion control. Nokia cellular modems use it for bulk transfers. It provides packet boundaries (hence SOCK_SEQPACKET). Congestion control is per packet rather per byte, so we do not re-use the generic socket memory accounting. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 4 +- include/net/phonet/pep.h | 114 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 3a027f588a4a..f92185242078 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -27,7 +27,9 @@ #define PN_PROTO_TRANSPORT 0 /* Phonet datagram socket */ #define PN_PROTO_PHONET 1 -#define PHONET_NPROTO 2 +/* Phonet pipe */ +#define PN_PROTO_PIPE 2 +#define PHONET_NPROTO 3 #define PNADDR_ANY 0 #define PNPORT_RESOURCE_ROUTING 0 diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index b2f8c54c5333..fb024e186860 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -26,11 +26,21 @@ struct pep_sock { struct pn_sock pn_sk; + /* XXX: union-ify listening vs connected stuff ? */ /* Listening socket stuff: */ struct hlist_head ackq; + struct hlist_head hlist; /* Connected socket stuff: */ + struct sock *listener; + u16 peer_type; /* peer type/subtype */ + u8 pipe_handle; + + u8 rx_credits; u8 tx_credits; + u8 rx_fc; /* RX flow control */ + u8 tx_fc; /* TX flow control */ + u8 init_enable; /* auto-enable at creation */ }; static inline struct pep_sock *pep_sk(struct sock *sk) @@ -40,4 +50,108 @@ static inline struct pep_sock *pep_sk(struct sock *sk) extern const struct proto_ops phonet_stream_ops; +/* Pipe protocol definitions */ +struct pnpipehdr { + u8 utid; /* transaction ID */ + u8 message_id; + u8 pipe_handle; + union { + u8 state_after_connect; /* connect request */ + u8 state_after_reset; /* reset request */ + u8 error_code; /* any response */ + u8 pep_type; /* status indication */ + u8 data[1]; + }; +}; +#define other_pep_type data[1] + +static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb) +{ + return (struct pnpipehdr *)skb_transport_header(skb); +} + +#define MAX_PNPIPE_HEADER (MAX_PHONET_HEADER + 4) + +enum { + PNS_PIPE_DATA = 0x20, + + PNS_PEP_CONNECT_REQ = 0x40, + PNS_PEP_CONNECT_RESP, + PNS_PEP_DISCONNECT_REQ, + PNS_PEP_DISCONNECT_RESP, + PNS_PEP_RESET_REQ, + PNS_PEP_RESET_RESP, + PNS_PEP_ENABLE_REQ, + PNS_PEP_ENABLE_RESP, + PNS_PEP_CTRL_REQ, + PNS_PEP_CTRL_RESP, + PNS_PEP_DISABLE_REQ = 0x4C, + PNS_PEP_DISABLE_RESP, + + PNS_PEP_STATUS_IND = 0x60, + PNS_PIPE_CREATED_IND, + PNS_PIPE_RESET_IND = 0x63, + PNS_PIPE_ENABLED_IND, + PNS_PIPE_REDIRECTED_IND, + PNS_PIPE_DISABLED_IND = 0x66, +}; + +#define PN_PIPE_INVALID_HANDLE 0xff +#define PN_PEP_TYPE_COMMON 0x00 + +/* Phonet pipe status indication */ +enum { + PN_PEP_IND_FLOW_CONTROL, + PN_PEP_IND_ID_MCFC_GRANT_CREDITS, +}; + +/* Phonet pipe error codes */ +enum { + PN_PIPE_NO_ERROR, + PN_PIPE_ERR_INVALID_PARAM, + PN_PIPE_ERR_INVALID_HANDLE, + PN_PIPE_ERR_INVALID_CTRL_ID, + PN_PIPE_ERR_NOT_ALLOWED, + PN_PIPE_ERR_PEP_IN_USE, + PN_PIPE_ERR_OVERLOAD, + PN_PIPE_ERR_DEV_DISCONNECTED, + PN_PIPE_ERR_TIMEOUT, + PN_PIPE_ERR_ALL_PIPES_IN_USE, + PN_PIPE_ERR_GENERAL, + PN_PIPE_ERR_NOT_SUPPORTED, +}; + +/* Phonet pipe states */ +enum { + PN_PIPE_DISABLE, + PN_PIPE_ENABLE, +}; + +/* Phonet pipe sub-block types */ +enum { + PN_PIPE_SB_CREATE_REQ_PEP_SUB_TYPE, + PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE, + PN_PIPE_SB_REDIRECT_REQ_PEP_SUB_TYPE, + PN_PIPE_SB_NEGOTIATED_FC, + PN_PIPE_SB_REQUIRED_FC_TX, + PN_PIPE_SB_PREFERRED_FC_RX, +}; + +/* Phonet pipe flow control models */ +enum { + PN_NO_FLOW_CONTROL, + PN_LEGACY_FLOW_CONTROL, + PN_ONE_CREDIT_FLOW_CONTROL, + PN_MULTI_CREDIT_FLOW_CONTROL, +}; + +#define pn_flow_safe(fc) ((fc) >> 1) + +/* Phonet pipe flow control states */ +enum { + PEP_IND_EMPTY, + PEP_IND_BUSY, + PEP_IND_READY, +}; + #endif -- cgit v1.2.3 From c41bd97f815720f9404f97da0c4f4400b52c243d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:15:43 -0700 Subject: Phonet: receive pipe control requests as out-of-band data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index fb024e186860..4d79564850ae 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -33,6 +33,8 @@ struct pep_sock { /* Connected socket stuff: */ struct sock *listener; + struct sk_buff_head ctrlreq_queue; +#define PNPIPE_CTRLREQ_MAX 10 u16 peer_type; /* peer type/subtype */ u8 pipe_handle; -- cgit v1.2.3 From 02a47617cdce440f60c71a51f3a93f9f5fcc5a7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:16:16 -0700 Subject: Phonet: implement GPRS virtual interface over PEP socket MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 8 ++++++++ include/linux/socket.h | 1 + include/net/phonet/gprs.h | 38 ++++++++++++++++++++++++++++++++++++++ include/net/phonet/pep.h | 1 + 4 files changed, 48 insertions(+) create mode 100644 include/net/phonet/gprs.h (limited to 'include') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index f92185242078..c9609f9aedac 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -31,9 +31,17 @@ #define PN_PROTO_PIPE 2 #define PHONET_NPROTO 3 +/* Socket options for SOL_PNPIPE level */ +#define PNPIPE_ENCAP 1 +#define PNPIPE_IFINDEX 2 + #define PNADDR_ANY 0 #define PNPORT_RESOURCE_ROUTING 0 +/* Values for PNPIPE_ENCAP option */ +#define PNPIPE_ENCAP_NONE 0 +#define PNPIPE_ENCAP_IP 1 + /* ioctls */ #define SIOCPNGETOBJECT (SIOCPROTOPRIVATE + 0) diff --git a/include/linux/socket.h b/include/linux/socket.h index 818ca33bf79f..20fc4bbfca42 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -297,6 +297,7 @@ struct ucred { #define SOL_RXRPC 272 #define SOL_PPPOL2TP 273 #define SOL_BLUETOOTH 274 +#define SOL_PNPIPE 275 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/net/phonet/gprs.h b/include/net/phonet/gprs.h new file mode 100644 index 000000000000..928daf595beb --- /dev/null +++ b/include/net/phonet/gprs.h @@ -0,0 +1,38 @@ +/* + * File: pep_gprs.h + * + * GPRS over Phonet pipe end point socket + * + * Copyright (C) 2008 Nokia Corporation. + * + * Author: Rémi Denis-Courmont + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef NET_PHONET_GPRS_H +#define NET_PHONET_GPRS_H + +struct sock; +struct sk_buff; + +int pep_writeable(struct sock *sk); +int pep_write(struct sock *sk, struct sk_buff *skb); +struct sk_buff *pep_read(struct sock *sk); + +int gprs_attach(struct sock *sk); +void gprs_detach(struct sock *sk); + +#endif diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 4d79564850ae..fcd793030e4d 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -35,6 +35,7 @@ struct pep_sock { struct sock *listener; struct sk_buff_head ctrlreq_queue; #define PNPIPE_CTRLREQ_MAX 10 + int ifindex; u16 peer_type; /* peer type/subtype */ u8 pipe_handle; -- cgit v1.2.3 From 13c1d18931ebb5cf407cb348ef2cd6284d68902d Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Sun, 5 Oct 2008 13:33:42 -0700 Subject: xfrm: MIGRATE enhancements (draft-ebalard-mext-pfkey-enhanced-migrate) Provides implementation of the enhancements of XFRM/PF_KEY MIGRATE mechanism specified in draft-ebalard-mext-pfkey-enhanced-migrate-00. Defines associated PF_KEY SADB_X_EXT_KMADDRESS extension and XFRM/netlink XFRMA_KMADDRESS attribute. Signed-off-by: Arnaud Ebalard Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 13 ++++++++++++- include/linux/xfrm.h | 10 ++++++++++ include/net/xfrm.h | 15 ++++++++++++--- 3 files changed, 34 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index 700725ddcaae..01b262959f2e 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -226,6 +226,15 @@ struct sadb_x_sec_ctx { } __attribute__((packed)); /* sizeof(struct sadb_sec_ctx) = 8 */ +/* Used by MIGRATE to pass addresses IKE will use to perform + * negotiation with the peer */ +struct sadb_x_kmaddress { + uint16_t sadb_x_kmaddress_len; + uint16_t sadb_x_kmaddress_exttype; + uint32_t sadb_x_kmaddress_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_x_kmaddress) == 8 */ + /* Message types */ #define SADB_RESERVED 0 #define SADB_GETSPI 1 @@ -346,7 +355,9 @@ struct sadb_x_sec_ctx { #define SADB_X_EXT_NAT_T_DPORT 22 #define SADB_X_EXT_NAT_T_OA 23 #define SADB_X_EXT_SEC_CTX 24 -#define SADB_EXT_MAX 24 +/* Used with MIGRATE to pass @ to IKE for negotiation */ +#define SADB_X_EXT_KMADDRESS 25 +#define SADB_EXT_MAX 25 /* Identity Extension values */ #define SADB_IDENTTYPE_RESERVED 0 diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index fb0c215a3051..4bc1e6b86cb2 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -279,6 +279,7 @@ enum xfrm_attr_type_t { XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ XFRMA_MIGRATE, XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */ + XFRMA_KMADDRESS, /* struct xfrm_user_kmaddress */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -415,6 +416,15 @@ struct xfrm_user_report { struct xfrm_selector sel; }; +/* Used by MIGRATE to pass addresses IKE should use to perform + * SA negotiation with the peer */ +struct xfrm_user_kmaddress { + xfrm_address_t local; + xfrm_address_t remote; + __u32 reserved; + __u16 family; +}; + struct xfrm_user_migrate { xfrm_address_t old_daddr; xfrm_address_t old_saddr; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b98d2056f27f..11c890ad8ebb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -492,6 +492,13 @@ struct xfrm_policy struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; }; +struct xfrm_kmaddress { + xfrm_address_t local; + xfrm_address_t remote; + u32 reserved; + u16 family; +}; + struct xfrm_migrate { xfrm_address_t old_daddr; xfrm_address_t old_saddr; @@ -531,7 +538,7 @@ struct xfrm_mgr int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); int (*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); - int (*migrate)(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles); + int (*migrate)(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k); }; extern int xfrm_register_km(struct xfrm_mgr *km); @@ -1432,12 +1439,14 @@ extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, #ifdef CONFIG_XFRM_MIGRATE extern int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles); + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k); extern struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m); extern struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m); extern int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles); + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k); #endif extern wait_queue_head_t km_waitq; -- cgit v1.2.3 From 554794de7949d1a6279336404c066f974d4c2bde Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 6 Oct 2008 09:54:39 -0700 Subject: pkt_sched: Fix handling of gso skbs on requeuing Jay Cliburn noticed and diagnosed a bug triggered in dev_gso_skb_destructor() after last change from qdisc->gso_skb to qdisc->requeue list. Since gso_segmented skbs can't be queued to another list this patch brings back qdisc->gso_skb for them. Reported-by: Jay Cliburn Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 3b983e8a0555..3fe49d808957 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -52,6 +52,7 @@ struct Qdisc u32 parent; atomic_t refcnt; unsigned long state; + struct sk_buff *gso_skb; struct sk_buff_head requeue; struct sk_buff_head q; struct netdev_queue *dev_queue; -- cgit v1.2.3 From 76708dee382a69b2f9d0e50f413f99fefb2dc509 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 5 Oct 2008 18:02:48 +0200 Subject: mac80211: free up 2 bytes in skb->cb Free up 2 bytes in skb->cb to be used for multi-rate retry later. Move iv_len and icv_len initialization into key alloc. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/net/mac80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f5f5b1ff1584..feb3be81dfa6 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -337,8 +337,6 @@ struct ieee80211_tx_info { unsigned long jiffies; s8 rts_cts_rate_idx, alt_retry_rate_idx; u8 retry_limit; - u8 icv_len; - u8 iv_len; } control; struct { u64 ampdu_ack_map; @@ -635,6 +633,8 @@ enum ieee80211_key_flags { */ struct ieee80211_key_conf { enum ieee80211_key_alg alg; + u8 icv_len; + u8 iv_len; u8 hw_key_idx; u8 flags; s8 keyidx; -- cgit v1.2.3 From 870abdf67170daa9f1022e55a35c469239fcc74c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 5 Oct 2008 18:04:24 +0200 Subject: mac80211: add multi-rate retry support This patch adjusts the rate control API to allow multi-rate retry if supported by the driver. The ieee80211_hw struct specifies how many alternate rate selections the driver supports. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/net/mac80211.h | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index feb3be81dfa6..5617a1613c91 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -292,6 +292,20 @@ enum mac80211_tx_control_flags { #define IEEE80211_TX_INFO_DRIVER_DATA_PTRS \ (IEEE80211_TX_INFO_DRIVER_DATA_SIZE / sizeof(void *)) +/* maximum number of alternate rate retry stages */ +#define IEEE80211_TX_MAX_ALTRATE 3 + +/** + * struct ieee80211_tx_altrate - alternate rate selection/status + * + * @rate_idx: rate index to attempt to send with + * @limit: number of retries before fallback + */ +struct ieee80211_tx_altrate { + s8 rate_idx; + u8 limit; +}; + /** * struct ieee80211_tx_info - skb transmit information * @@ -335,12 +349,14 @@ struct ieee80211_tx_info { struct ieee80211_key_conf *hw_key; struct ieee80211_sta *sta; unsigned long jiffies; - s8 rts_cts_rate_idx, alt_retry_rate_idx; + s8 rts_cts_rate_idx; u8 retry_limit; + struct ieee80211_tx_altrate retries[IEEE80211_TX_MAX_ALTRATE]; } control; struct { u64 ampdu_ack_map; int ack_signal; + struct ieee80211_tx_altrate retries[IEEE80211_TX_MAX_ALTRATE + 1]; u8 retry_count; bool excessive_retries; u8 ampdu_ack_len; @@ -828,6 +844,9 @@ enum ieee80211_hw_flags { * within &struct ieee80211_vif. * @sta_data_size: size (in bytes) of the drv_priv data area * within &struct ieee80211_sta. + * + * @max_altrates: maximum number of alternate rate retry stages + * @max_altrate_tries: maximum number of tries for each stage */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -844,6 +863,8 @@ struct ieee80211_hw { u16 ampdu_queues; u16 max_listen_interval; s8 max_signal; + u8 max_altrates; + u8 max_altrate_tries; }; struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy); @@ -900,11 +921,11 @@ ieee80211_get_rts_cts_rate(const struct ieee80211_hw *hw, static inline struct ieee80211_rate * ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, - const struct ieee80211_tx_info *c) + const struct ieee80211_tx_info *c, int idx) { - if (c->control.alt_retry_rate_idx < 0) + if (c->control.retries[idx].rate_idx < 0) return NULL; - return &hw->wiphy->bands[c->band]->bitrates[c->control.alt_retry_rate_idx]; + return &hw->wiphy->bands[c->band]->bitrates[c->control.retries[idx].rate_idx]; } /** -- cgit v1.2.3 From 9a1f27c48065ce713eb47f2fd475b717e63ef239 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Oct 2008 11:41:57 -0700 Subject: inet_hashtables: Add inet_lookup_skb helpers To be able to use the cached socket reference in the skb during input processing we add a new set of lookup functions that receive the skb on their argument list. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 11 +++++++++++ include/net/inet_hashtables.h | 14 ++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index e48989f04c24..995efbb031ab 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -91,6 +91,17 @@ static inline struct sock *__inet6_lookup(struct net *net, return inet6_lookup_listener(net, hashinfo, daddr, hnum, dif); } +static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, + struct sk_buff *skb, + const __be16 sport, + const __be16 dport) +{ + return __inet6_lookup(dev_net(skb->dst->dev), hashinfo, + &ipv6_hdr(skb)->saddr, sport, + &ipv6_hdr(skb)->daddr, ntohs(dport), + inet6_iif(skb)); +} + extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index bb619d80f2e2..3522bbcd546d 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -28,6 +29,7 @@ #include #include #include +#include #include #include @@ -371,6 +373,18 @@ static inline struct sock *inet_lookup(struct net *net, return sk; } +static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, + struct sk_buff *skb, + const __be16 sport, + const __be16 dport) +{ + const struct iphdr *iph = ip_hdr(skb); + + return __inet_lookup(dev_net(skb->dst->dev), hashinfo, + iph->saddr, sport, + iph->daddr, dport, inet_iif(skb)); +} + extern int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u32 port_offset, int (*check_established)(struct inet_timewait_death_row *, -- cgit v1.2.3 From 23542618deb77cfed312842fe8c41ed19fb16470 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Tue, 7 Oct 2008 12:41:01 -0700 Subject: inet: Don't lookup the socket if there's a socket attached to the skb Use the socket cached in the skb if it's present. Signed-off-by: KOVACS Krisztian Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 12 ++++++++---- include/net/inet_hashtables.h | 10 +++++++--- include/net/sock.h | 12 ++++++++++++ 3 files changed, 27 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 995efbb031ab..f74665d7bea8 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -96,10 +96,14 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, const __be16 sport, const __be16 dport) { - return __inet6_lookup(dev_net(skb->dst->dev), hashinfo, - &ipv6_hdr(skb)->saddr, sport, - &ipv6_hdr(skb)->daddr, ntohs(dport), - inet6_iif(skb)); + struct sock *sk; + + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else return __inet6_lookup(dev_net(skb->dst->dev), hashinfo, + &ipv6_hdr(skb)->saddr, sport, + &ipv6_hdr(skb)->daddr, ntohs(dport), + inet6_iif(skb)); } extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3522bbcd546d..5cc182f9ecae 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -378,11 +378,15 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, const __be16 sport, const __be16 dport) { + struct sock *sk; const struct iphdr *iph = ip_hdr(skb); - return __inet_lookup(dev_net(skb->dst->dev), hashinfo, - iph->saddr, sport, - iph->daddr, dport, inet_iif(skb)); + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else + return __inet_lookup(dev_net(skb->dst->dev), hashinfo, + iph->saddr, sport, + iph->daddr, dport, inet_iif(skb)); } extern int __inet_hash_connect(struct inet_timewait_death_row *death_row, diff --git a/include/net/sock.h b/include/net/sock.h index 75a312d3888a..18f96708f3a6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1324,6 +1324,18 @@ static inline void sk_change_net(struct sock *sk, struct net *net) sock_net_set(sk, hold_net(net)); } +static inline struct sock *skb_steal_sock(struct sk_buff *skb) +{ + if (unlikely(skb->sk)) { + struct sock *sk = skb->sk; + + skb->destructor = NULL; + skb->sk = NULL; + return sk; + } + return NULL; +} + extern void sock_enable_timestamp(struct sock *sk); extern int sock_get_timestamp(struct sock *, struct timeval __user *); extern int sock_get_timestampns(struct sock *, struct timespec __user *); -- cgit v1.2.3 From c57943a1c96214ee68f3890bb6772841ffbfd606 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Oct 2008 14:18:42 -0700 Subject: net: wrap sk->sk_backlog_rcv() Wrap calling sk->sk_backlog_rcv() in a function. This will allow extending the generic sk_backlog_rcv behaviour. Signed-off-by: Peter Zijlstra Signed-off-by: David S. Miller --- include/net/sock.h | 5 +++++ include/net/tcp.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 18f96708f3a6..ada50c04d09f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -482,6 +482,11 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) skb->next = NULL; } +static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + return sk->sk_backlog_rcv(sk, skb); +} + #define sk_wait_event(__sk, __timeo, __condition) \ ({ int __rc; \ release_sock(__sk); \ diff --git a/include/net/tcp.h b/include/net/tcp.h index f6cc34143154..438014d57610 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -896,7 +896,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) BUG_ON(sock_owned_by_user(sk)); while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { - sk->sk_backlog_rcv(sk, skb1); + sk_backlog_rcv(sk, skb1); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED); } -- cgit v1.2.3 From 654bed16cf86a9ef94495d9e6131b7ff7840a3dd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Oct 2008 14:22:33 -0700 Subject: net: packet split receive api Add some packet-split receive hooks. For one this allows to do NUMA node affine page allocs. Later on these hooks will be extended to do emergency reserve allocations for fragments. Signed-off-by: Peter Zijlstra Signed-off-by: David S. Miller --- include/linux/skbuff.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 720b688c22b6..2725f4e5a9bf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -968,6 +968,9 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, skb_shinfo(skb)->nr_frags = i + 1; } +extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, + int off, int size); + #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) #define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) @@ -1382,6 +1385,26 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, return __netdev_alloc_skb(dev, length, GFP_ATOMIC); } +extern struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask); + +/** + * netdev_alloc_page - allocate a page for ps-rx on a specific device + * @dev: network device to receive on + * + * Allocate a new page node local to the specified device. + * + * %NULL is returned if there is no free memory. + */ +static inline struct page *netdev_alloc_page(struct net_device *dev) +{ + return __netdev_alloc_page(dev, GFP_ATOMIC); +} + +static inline void netdev_free_page(struct net_device *dev, struct page *page) +{ + __free_page(page); +} + /** * skb_clone_writable - is the header of a clone writable * @skb: buffer to check -- cgit v1.2.3 From 33f5f57eeb0c6386fdd85f9c690dc8d700ba7928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 7 Oct 2008 14:43:06 -0700 Subject: tcp: kill pointless urg_mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It all started from me noticing that this urgent check in tcp_clean_rtx_queue is unnecessarily inside the loop. Then I took a longer look to it and found out that the users of urg_mode can trivially do without, well almost, there was one gotcha. Bonus: those funny people who use urg with >= 2^31 write_seq - snd_una could now rejoice too (that's the only purpose for the between being there, otherwise a simple compare would have done the thing). Not that I assume that the rest of the tcp code happily lives with such mind-boggling numbers :-). Alas, it turned out to be impossible to set wmem to such numbers anyway, yes I really tried a big sendfile after setting some wmem but nothing happened :-). ...Tcp_wmem is int and so is sk_sndbuf... So I hacked a bit variable to long and found out that it seems to work... :-) Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 767290628292..fe77e1499ab7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -312,8 +312,11 @@ struct tcp_sock { u32 retrans_out; /* Retransmitted packets out */ u16 urg_data; /* Saved octet of OOB data and control flags */ - u8 urg_mode; /* In urgent mode */ u8 ecn_flags; /* ECN status bits. */ + u8 reordering; /* Packet reordering metric. */ + u32 snd_up; /* Urgent pointer */ + + u8 keepalive_probes; /* num of allowed keep alive probes */ /* * Options received (usually on last packet, some only on SYN packets). */ @@ -361,8 +364,6 @@ struct tcp_sock { u32 lost_retrans_low; /* Sent seq after any rxmit (lowest) */ - u8 reordering; /* Packet reordering metric. */ - u8 keepalive_probes; /* num of allowed keep alive probes */ u32 prior_ssthresh; /* ssthresh saved at recovery start */ u32 high_seq; /* snd_nxt at onset of congestion */ @@ -374,8 +375,6 @@ struct tcp_sock { u32 total_retrans; /* Total retransmits for entire connection */ u32 urg_seq; /* Seq of received urgent pointer */ - u32 snd_up; /* Urgent pointer */ - unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ -- cgit v1.2.3 From 835bcc0497e18f54153ac9e32b598dd8ffb7aa66 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 7 Oct 2008 14:45:55 -0700 Subject: netns: move /proc/net/dev_snmp6 to struct net Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/netns/mib.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index 449147604642..ffcef5de3d1d 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -11,6 +11,10 @@ struct netns_mib { DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics); DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics); DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct proc_dir_entry *proc_net_devsnmp6; +#endif }; #endif -- cgit v1.2.3 From 0c7ed677fb7013c8028045d409a48ac42151187a Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 7 Oct 2008 14:49:36 -0700 Subject: netns: make udpv6 mib per/namespace Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/netns/mib.h | 1 + include/net/udp.h | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index ffcef5de3d1d..ba622b24b847 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -14,6 +14,7 @@ struct netns_mib { #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct proc_dir_entry *proc_net_devsnmp6; + DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6); #endif }; diff --git a/include/net/udp.h b/include/net/udp.h index d38f6f2419f9..72f28c3ddaae 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -152,8 +152,6 @@ extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); -DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); - /* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6); @@ -167,12 +165,14 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6); if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_statistics, field); \ else SNMP_INC_STATS_BH((net)->mib.udp_statistics, field); } while(0) -#define UDP6_INC_STATS_BH(net, field, is_udplite) do { (void)net; \ +#define UDP6_INC_STATS_BH(net, field, is_udplite) do { \ if (is_udplite) SNMP_INC_STATS_BH(udplite_stats_in6, field); \ - else SNMP_INC_STATS_BH(udp_stats_in6, field); } while(0) -#define UDP6_INC_STATS_USER(net, field, is_udplite) do { (void)net; \ + else SNMP_INC_STATS_BH((net)->mib.udp_stats_in6, field); \ +} while(0) +#define UDP6_INC_STATS_USER(net, field, is_udplite) do { \ if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field); \ - else SNMP_INC_STATS_USER(udp_stats_in6, field); } while(0) + else SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field); \ +} while(0) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #define UDPX_INC_STATS_BH(sk, field) \ -- cgit v1.2.3 From be713a443ee019489890e93654557916fbf72612 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 7 Oct 2008 14:50:06 -0700 Subject: netns: make uplitev6 mib per/namespace Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/netns/mib.h | 1 + include/net/udp.h | 11 ++++------- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index ba622b24b847..4e58f0519ce8 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -15,6 +15,7 @@ struct netns_mib { #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct proc_dir_entry *proc_net_devsnmp6; DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6); + DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6); #endif }; diff --git a/include/net/udp.h b/include/net/udp.h index 72f28c3ddaae..1e205095ea68 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -152,9 +152,6 @@ extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); -/* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */ -DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6); - /* * SNMP statistics for UDP and UDP-Lite */ @@ -166,12 +163,12 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6); else SNMP_INC_STATS_BH((net)->mib.udp_statistics, field); } while(0) #define UDP6_INC_STATS_BH(net, field, is_udplite) do { \ - if (is_udplite) SNMP_INC_STATS_BH(udplite_stats_in6, field); \ + if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_stats_in6, field);\ else SNMP_INC_STATS_BH((net)->mib.udp_stats_in6, field); \ } while(0) -#define UDP6_INC_STATS_USER(net, field, is_udplite) do { \ - if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field); \ - else SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field); \ +#define UDP6_INC_STATS_USER(net, field, __lite) do { \ + if (__lite) SNMP_INC_STATS_USER((net)->mib.udplite_stats_in6, field); \ + else SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field); \ } while(0) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -- cgit v1.2.3 From b8bae41ed6a53cce56c50811a91cd963e3187d1c Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Tue, 7 Oct 2008 15:34:37 -0700 Subject: ipv4: add mc_count to in_device. This patch add mc_count to struct in_device and updates increment/decrement/initilaize of this field in IPv4 and in IPv6. - Also printing the vfs /proc entry (/proc/net/igmp) is adjusted to use the new mc_count. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index c6f51ad52d5b..06fcdb45106b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -25,6 +25,7 @@ struct in_device struct in_ifaddr *ifa_list; /* IP ifaddr chain */ rwlock_t mc_list_lock; struct ip_mc_list *mc_list; /* IP multicast filter chain */ + int mc_count; /* Number of installed mcasts */ spinlock_t mc_tomb_lock; struct ip_mc_list *mc_tomb; unsigned long mr_v1_seen; -- cgit v1.2.3 From 76108cea065cda58366d16a7eb6ca90d717a1396 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:00 +0200 Subject: netfilter: Use unsigned types for hooknum and pf vars and (try to) consistently use u_int8_t for the L3 family. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 30 +++++++++++++++------------- include/linux/netfilter/x_tables.h | 28 +++++++++++++------------- include/net/netfilter/nf_conntrack_core.h | 2 +- include/net/netfilter/nf_conntrack_expect.h | 2 +- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- include/net/netfilter/nf_log.h | 8 ++++---- include/net/netfilter/nf_queue.h | 6 +++--- 7 files changed, 41 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0c5eb7ed8b3f..8c83d2e23bde 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -92,8 +92,8 @@ struct nf_hook_ops /* User fills in from here down. */ nf_hookfn *hook; struct module *owner; - int pf; - int hooknum; + u_int8_t pf; + unsigned int hooknum; /* Hooks are ordered in ascending priority. */ int priority; }; @@ -102,7 +102,7 @@ struct nf_sockopt_ops { struct list_head list; - int pf; + u_int8_t pf; /* Non-inclusive ranges: use 0/0/NULL to never get called. */ int set_optmin; @@ -140,7 +140,7 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, +int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh); @@ -151,7 +151,7 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ -static inline int nf_hook_thresh(int pf, unsigned int hook, +static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, @@ -167,7 +167,7 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh); } -static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { @@ -212,14 +212,14 @@ __ret;}) NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN) /* Call setsockopt() */ -int nf_setsockopt(struct sock *sk, int pf, int optval, char __user *opt, +int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int len); -int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, +int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); -int compat_nf_setsockopt(struct sock *sk, int pf, int optval, +int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int len); -int compat_nf_getsockopt(struct sock *sk, int pf, int optval, +int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); /* Call this before modifying an existing packet: ensures it is @@ -292,7 +292,7 @@ extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo); extern void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); static inline void -nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) +nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { #ifdef CONFIG_NF_NAT_NEEDED void (*decodefn)(struct sk_buff *, struct flowi *); @@ -315,7 +315,7 @@ extern struct proc_dir_entry *proc_net_netfilter; #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) -static inline int nf_hook_thresh(int pf, unsigned int hook, +static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, @@ -324,7 +324,7 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, { return okfn(skb); } -static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { @@ -332,7 +332,9 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, } struct flowi; static inline void -nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {} +nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) +{ +} #endif /*CONFIG_NETFILTER*/ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 2326296b6f25..6989b22716e6 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -292,7 +292,7 @@ struct xt_table /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; - int af; /* address/protocol family */ + u_int8_t af; /* address/protocol family */ }; #include @@ -346,19 +346,19 @@ extern struct xt_table_info *xt_replace_table(struct xt_table *table, struct xt_table_info *newinfo, int *error); -extern struct xt_match *xt_find_match(int af, const char *name, u8 revision); -extern struct xt_target *xt_find_target(int af, const char *name, u8 revision); -extern struct xt_target *xt_request_find_target(int af, const char *name, +extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); +extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision); +extern struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); -extern int xt_find_revision(int af, const char *name, u8 revision, int target, - int *err); +extern int xt_find_revision(u8 af, const char *name, u8 revision, + int target, int *err); -extern struct xt_table *xt_find_table_lock(struct net *net, int af, +extern struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name); extern void xt_table_unlock(struct xt_table *t); -extern int xt_proto_init(struct net *net, int af); -extern void xt_proto_fini(struct net *net, int af); +extern int xt_proto_init(struct net *net, u_int8_t af); +extern void xt_proto_fini(struct net *net, u_int8_t af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); @@ -423,12 +423,12 @@ struct compat_xt_counters_info #define COMPAT_XT_ALIGN(s) (((s) + (__alignof__(struct compat_xt_counters)-1)) \ & ~(__alignof__(struct compat_xt_counters)-1)) -extern void xt_compat_lock(int af); -extern void xt_compat_unlock(int af); +extern void xt_compat_lock(u_int8_t af); +extern void xt_compat_unlock(u_int8_t af); -extern int xt_compat_add_offset(int af, unsigned int offset, short delta); -extern void xt_compat_flush_offsets(int af); -extern short xt_compat_calc_jump(int af, unsigned int offset); +extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); +extern void xt_compat_flush_offsets(u_int8_t af); +extern short xt_compat_calc_jump(u_int8_t af, unsigned int offset); extern int xt_compat_match_offset(const struct xt_match *match); extern int xt_compat_match_from_user(struct xt_entry_match *m, diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index a81771210934..05760d6a706e 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -20,7 +20,7 @@ /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ -extern unsigned int nf_conntrack_in(int pf, +extern unsigned int nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index dfdf4b459475..4c4d894cb9b5 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -86,7 +86,7 @@ void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me); -void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, int, +void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t, const union nf_inet_addr *, const union nf_inet_addr *, u_int8_t, const __be16 *, const __be16 *); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 723df9d1cc35..d4376e97bae8 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -39,7 +39,7 @@ struct nf_conntrack_l4proto const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum); /* Called when a new connection for this protocol found; @@ -52,7 +52,7 @@ struct nf_conntrack_l4proto int (*error)(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, - int pf, unsigned int hooknum); + u_int8_t pf, unsigned int hooknum); /* Print out the per-protocol part of the tuple. Return like seq_* */ int (*print_tuple)(struct seq_file *s, diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 8c6b5ae45534..7182c06974f4 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -28,7 +28,7 @@ struct nf_loginfo { } u; }; -typedef void nf_logfn(unsigned int pf, +typedef void nf_logfn(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -43,12 +43,12 @@ struct nf_logger { }; /* Function to register/unregister log function. */ -int nf_log_register(int pf, const struct nf_logger *logger); +int nf_log_register(u_int8_t pf, const struct nf_logger *logger); void nf_log_unregister(const struct nf_logger *logger); -void nf_log_unregister_pf(int pf); +void nf_log_unregister_pf(u_int8_t pf); /* Calls the registered backend logging function */ -void nf_log_packet(int pf, +void nf_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index d030044e9235..252fd1010b77 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -8,7 +8,7 @@ struct nf_queue_entry { unsigned int id; struct nf_hook_ops *elem; - int pf; + u_int8_t pf; unsigned int hook; struct net_device *indev; struct net_device *outdev; @@ -24,9 +24,9 @@ struct nf_queue_handler { char *name; }; -extern int nf_register_queue_handler(int pf, +extern int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh); -extern int nf_unregister_queue_handler(int pf, +extern int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh); extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh); extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); -- cgit v1.2.3 From e948b20a71a06a740c925d6ea22b59b4e17cfa0c Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:00 +0200 Subject: netfilter: rename ipt_recent to xt_recent Like with other modules (such as ipt_state), ipt_recent.h is changed to forward definitions to (IOW include) xt_recent.h, and xt_recent.c is changed to use the new constant names. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_recent.h | 26 ++++++++++++++++++++++++++ include/linux/netfilter_ipv4/ipt_recent.h | 28 +++++++++++----------------- 3 files changed, 38 insertions(+), 17 deletions(-) create mode 100644 include/linux/netfilter/xt_recent.h (limited to 'include') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 3aff513d12c8..5a8af875bce2 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -32,6 +32,7 @@ header-y += xt_owner.h header-y += xt_pkttype.h header-y += xt_rateest.h header-y += xt_realm.h +header-y += xt_recent.h header-y += xt_sctp.h header-y += xt_state.h header-y += xt_statistic.h diff --git a/include/linux/netfilter/xt_recent.h b/include/linux/netfilter/xt_recent.h new file mode 100644 index 000000000000..5cfeb81c6794 --- /dev/null +++ b/include/linux/netfilter/xt_recent.h @@ -0,0 +1,26 @@ +#ifndef _LINUX_NETFILTER_XT_RECENT_H +#define _LINUX_NETFILTER_XT_RECENT_H 1 + +enum { + XT_RECENT_CHECK = 1 << 0, + XT_RECENT_SET = 1 << 1, + XT_RECENT_UPDATE = 1 << 2, + XT_RECENT_REMOVE = 1 << 3, + XT_RECENT_TTL = 1 << 4, + + XT_RECENT_SOURCE = 0, + XT_RECENT_DEST = 1, + + XT_RECENT_NAME_LEN = 200, +}; + +struct xt_recent_mtinfo { + u_int32_t seconds; + u_int32_t hit_count; + u_int8_t check_set; + u_int8_t invert; + char name[XT_RECENT_NAME_LEN]; + u_int8_t side; +}; + +#endif /* _LINUX_NETFILTER_XT_RECENT_H */ diff --git a/include/linux/netfilter_ipv4/ipt_recent.h b/include/linux/netfilter_ipv4/ipt_recent.h index 6508a4592651..d636cca133c2 100644 --- a/include/linux/netfilter_ipv4/ipt_recent.h +++ b/include/linux/netfilter_ipv4/ipt_recent.h @@ -1,27 +1,21 @@ #ifndef _IPT_RECENT_H #define _IPT_RECENT_H -#define RECENT_NAME "ipt_recent" -#define RECENT_VER "v0.3.1" +#include -#define IPT_RECENT_CHECK 1 -#define IPT_RECENT_SET 2 -#define IPT_RECENT_UPDATE 4 -#define IPT_RECENT_REMOVE 8 -#define IPT_RECENT_TTL 16 +#define ipt_recent_info xt_recent_mtinfo -#define IPT_RECENT_SOURCE 0 -#define IPT_RECENT_DEST 1 +enum { + IPT_RECENT_CHECK = XT_RECENT_CHECK, + IPT_RECENT_SET = XT_RECENT_SET, + IPT_RECENT_UPDATE = XT_RECENT_UPDATE, + IPT_RECENT_REMOVE = XT_RECENT_REMOVE, + IPT_RECENT_TTL = XT_RECENT_TTL, -#define IPT_RECENT_NAME_LEN 200 + IPT_RECENT_SOURCE = XT_RECENT_SOURCE, + IPT_RECENT_DEST = XT_RECENT_DEST, -struct ipt_recent_info { - u_int32_t seconds; - u_int32_t hit_count; - u_int8_t check_set; - u_int8_t invert; - char name[IPT_RECENT_NAME_LEN]; - u_int8_t side; + IPT_RECENT_NAME_LEN = XT_RECENT_NAME_LEN, }; #endif /*_IPT_RECENT_H*/ -- cgit v1.2.3 From 7e9c6eeb136a46dfd941852803b3a9dd78939b69 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:00 +0200 Subject: netfilter: Introduce NFPROTO_* constants The netfilter subsystem only supports a handful of protocols (much less than PF_*) and even non-PF protocols like ARP and pseudo-protocols like PF_BRIDGE. By creating NFPROTO_*, we can earn a few memory savings on arrays that previously were always PF_MAX-sized and keep the pseudo-protocols to ourselves. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 8c83d2e23bde..bf3afb0844f7 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -52,6 +52,16 @@ enum nf_inet_hooks { NF_INET_NUMHOOKS }; +enum { + NFPROTO_UNSPEC = 0, + NFPROTO_IPV4 = 2, + NFPROTO_ARP = 3, + NFPROTO_BRIDGE = 7, + NFPROTO_IPV6 = 10, + NFPROTO_DECNET = 12, + NFPROTO_NUMPROTO, +}; + union nf_inet_addr { __u32 all[4]; __be32 ip; @@ -138,7 +148,7 @@ extern struct ctl_path nf_net_netfilter_sysctl_path[]; extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; #endif /* CONFIG_SYSCTL */ -extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; +extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, @@ -247,7 +257,7 @@ struct nf_afinfo { int route_key_size; }; -extern const struct nf_afinfo *nf_afinfo[NPROTO]; +extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO]; static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family) { return rcu_dereference(nf_afinfo[family]); -- cgit v1.2.3 From 48dc7865aa3db9404aedc8677d9daf8f8f469ab0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:01 +0200 Subject: netfilter: netns: remove nf_*_net() wrappers Now that dev_net() exists, the usefullness of them is even less. Also they're a big problem in resolving circular header dependencies necessary for NOTRACK-in-netns patch. See below. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 53 ----------------------------------------------- 1 file changed, 53 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index bf3afb0844f7..48cfe51bfddc 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -5,13 +5,11 @@ #include #include #include -#include #include #include #include #include #include -#include #endif #include #include @@ -355,56 +353,5 @@ extern void (*nf_ct_destroy)(struct nf_conntrack *); static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} #endif -static inline struct net *nf_pre_routing_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - return in->nd_net; -#else - return &init_net; -#endif -} - -static inline struct net *nf_local_in_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - return in->nd_net; -#else - return &init_net; -#endif -} - -static inline struct net *nf_forward_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - BUG_ON(in->nd_net != out->nd_net); - return in->nd_net; -#else - return &init_net; -#endif -} - -static inline struct net *nf_local_out_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - return out->nd_net; -#else - return &init_net; -#endif -} - -static inline struct net *nf_post_routing_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - return out->nd_net; -#else - return &init_net; -#endif -} - #endif /*__KERNEL__*/ #endif /*__LINUX_NETFILTER_H*/ -- cgit v1.2.3 From dfdb8d791877052bbb527d9688d94a064721d8f7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:02 +0200 Subject: netfilter: netns nf_conntrack: add netns boilerplate One comment: #ifdefs around #include is necessary to overcome amazing compile breakages in NOTRACK-in-netns patch (see below). Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/net_namespace.h | 6 ++++++ include/net/netfilter/nf_conntrack_core.h | 4 ++-- include/net/netns/conntrack.h | 6 ++++++ 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 include/net/netns/conntrack.h (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index a8eb43cf0c7e..708009be88b6 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -16,6 +16,9 @@ #include #include #include +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#include +#endif struct proc_dir_entry; struct net_device; @@ -67,6 +70,9 @@ struct net { #endif #ifdef CONFIG_NETFILTER struct netns_xt xt; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + struct netns_ct ct; +#endif #endif struct net_generic *gen; }; diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 05760d6a706e..532aa200cbc9 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -24,8 +24,8 @@ extern unsigned int nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb); -extern int nf_conntrack_init(void); -extern void nf_conntrack_cleanup(void); +extern int nf_conntrack_init(struct net *net); +extern void nf_conntrack_cleanup(struct net *net); extern int nf_conntrack_proto_init(void); extern void nf_conntrack_proto_fini(void); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h new file mode 100644 index 000000000000..82d80b834779 --- /dev/null +++ b/include/net/netns/conntrack.h @@ -0,0 +1,6 @@ +#ifndef __NETNS_CONNTRACK_H +#define __NETNS_CONNTRACK_H + +struct netns_ct { +}; +#endif -- cgit v1.2.3 From 5a1fb391d881905e89623d78858d05b248cbc86a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:02 +0200 Subject: netfilter: netns nf_conntrack: add ->ct_net -- pointer from conntrack to netns Conntrack (struct nf_conn) gets pointer to netns: ->ct_net -- netns in which it was created. It comes from netdevice. ->ct_net is write-once field. Every conntrack in system has ->ct_net initialized, no exceptions. ->ct_net doesn't pin netns: conntracks are recycled after timeouts and pinning background traffic will prevent netns from even starting shutdown sequence. Right now every conntrack is created in init_net. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 0741ad592da0..2b8d6efecf32 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -123,7 +123,9 @@ struct nf_conn /* Extensions */ struct nf_ct_ext *ext; - +#ifdef CONFIG_NET_NS + struct net *ct_net; +#endif struct rcu_head rcu; }; @@ -147,6 +149,17 @@ static inline u_int8_t nf_ct_protonum(const struct nf_conn *ct) /* get master conntrack via master expectation */ #define master_ct(conntr) (conntr->master) +extern struct net init_net; + +static inline struct net *nf_ct_net(const struct nf_conn *ct) +{ +#ifdef CONFIG_NET_NS + return ct->ct_net; +#else + return &init_net; +#endif +} + /* Alter reply tuple (maybe alter helper). */ extern void nf_conntrack_alter_reply(struct nf_conn *ct, @@ -251,7 +264,8 @@ extern void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data); extern void nf_conntrack_free(struct nf_conn *ct); extern struct nf_conn * -nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, +nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp); -- cgit v1.2.3 From 49ac8713b6d064adf7474080fdccebd7cce76be0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:03 +0200 Subject: netfilter: netns nf_conntrack: per-netns conntrack count Sysctls and proc files are stubbed to init_net's one. This is temporary. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 1 - include/net/netns/conntrack.h | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2b8d6efecf32..5999c5313d0b 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -288,7 +288,6 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb) extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; extern int nf_conntrack_checksum; -extern atomic_t nf_conntrack_count; extern int nf_conntrack_max; DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 82d80b834779..edf84714d7c7 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -1,6 +1,9 @@ #ifndef __NETNS_CONNTRACK_H #define __NETNS_CONNTRACK_H +#include + struct netns_ct { + atomic_t count; }; #endif -- cgit v1.2.3 From 400dad39d1c33fe797e47326d87a3f54d0ac5181 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:03 +0200 Subject: netfilter: netns nf_conntrack: per-netns conntrack hash * make per-netns conntrack hash Other solution is to add ->ct_net pointer to tuplehashes and still has one hash, I tried that it's ugly and requires more code deep down in protocol modules et al. * propagate netns pointer to where needed, e. g. to conntrack iterators. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 6 +++--- include/net/netfilter/nf_conntrack_core.h | 3 +-- include/net/netns/conntrack.h | 2 ++ 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5999c5313d0b..f5447f143047 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -195,11 +195,11 @@ extern void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size); extern struct nf_conntrack_tuple_hash * -__nf_conntrack_find(const struct nf_conntrack_tuple *tuple); +__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); extern void nf_conntrack_hash_insert(struct nf_conn *ct); -extern void nf_conntrack_flush(void); +extern void nf_conntrack_flush(struct net *net); extern bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, @@ -261,7 +261,7 @@ extern struct nf_conn nf_conntrack_untracked; /* Iterate over all conntracks: if iter returns true, it's deleted. */ extern void -nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data); +nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data); extern void nf_conntrack_free(struct nf_conn *ct); extern struct nf_conn * nf_conntrack_alloc(struct net *net, diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 532aa200cbc9..1c373564396a 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -48,7 +48,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, /* Find a connection corresponding to a tuple. */ extern struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple); +nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); extern int __nf_conntrack_confirm(struct sk_buff *skb); @@ -71,7 +71,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *proto); -extern struct hlist_head *nf_conntrack_hash; extern spinlock_t nf_conntrack_lock ; extern struct hlist_head unconfirmed; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index edf84714d7c7..b767683f112b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -5,5 +5,7 @@ struct netns_ct { atomic_t count; + struct hlist_head *hash; + int hash_vmalloc; }; #endif -- cgit v1.2.3 From 9b03f38d0487f3908696242286d934c9b38f9d2a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:03 +0200 Subject: netfilter: netns nf_conntrack: per-netns expectations Make per-netns a) expectation hash and b) expectations count. Expectations always belongs to netns to which it's master conntrack belong. This is natural and doesn't bloat expectation. Proc files and leaf users are stubbed to init_net, this is temporary. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 20 ++++++++++++++------ include/net/netns/conntrack.h | 3 +++ 2 files changed, 17 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 4c4d894cb9b5..37a7fc1164b0 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -6,7 +6,6 @@ #define _NF_CONNTRACK_EXPECT_H #include -extern struct hlist_head *nf_ct_expect_hash; extern unsigned int nf_ct_expect_hsize; extern unsigned int nf_ct_expect_max; @@ -56,6 +55,15 @@ struct nf_conntrack_expect struct rcu_head rcu; }; +static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) +{ +#ifdef CONFIG_NET_NS + return exp->master->ct_net; /* by definition */ +#else + return &init_net; +#endif +} + struct nf_conntrack_expect_policy { unsigned int max_expected; @@ -67,17 +75,17 @@ struct nf_conntrack_expect_policy #define NF_CT_EXPECT_PERMANENT 0x1 #define NF_CT_EXPECT_INACTIVE 0x2 -int nf_conntrack_expect_init(void); -void nf_conntrack_expect_fini(void); +int nf_conntrack_expect_init(struct net *net); +void nf_conntrack_expect_fini(struct net *net); struct nf_conntrack_expect * -__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple); +__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple); +nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple); +nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); void nf_ct_remove_expectations(struct nf_conn *ct); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index b767683f112b..e453a33f3e93 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -5,7 +5,10 @@ struct netns_ct { atomic_t count; + unsigned int expect_count; struct hlist_head *hash; + struct hlist_head *expect_hash; int hash_vmalloc; + int expect_vmalloc; }; #endif -- cgit v1.2.3 From 63c9a26264be108b52de087724673f8664570e34 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:04 +0200 Subject: netfilter: netns nf_conntrack: per-netns unconfirmed list What is confirmed connection in one netns can very well be unconfirmed in another one. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_core.h | 1 - include/net/netns/conntrack.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 1c373564396a..b4b45c541da6 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -72,6 +72,5 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *proto); extern spinlock_t nf_conntrack_lock ; -extern struct hlist_head unconfirmed; #endif /* _NF_CONNTRACK_CORE_H */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index e453a33f3e93..6ddf58e142a9 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -1,6 +1,7 @@ #ifndef __NETNS_CONNTRACK_H #define __NETNS_CONNTRACK_H +#include #include struct netns_ct { @@ -8,6 +9,7 @@ struct netns_ct { unsigned int expect_count; struct hlist_head *hash; struct hlist_head *expect_hash; + struct hlist_head unconfirmed; int hash_vmalloc; int expect_vmalloc; }; -- cgit v1.2.3 From a702a65fc1376fc1f6757ec2a6960348af3f1876 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:04 +0200 Subject: netfilter: netns nf_conntrack: pass netns pointer to nf_conntrack_in() It's deducible from skb->dev or skb->dst->dev, but we know netns at the moment of call, so pass it down and use for finding and creating conntracks. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_core.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index b4b45c541da6..e78afe7f28e3 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -20,7 +20,8 @@ /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ -extern unsigned int nf_conntrack_in(u_int8_t pf, +extern unsigned int nf_conntrack_in(struct net *net, + u_int8_t pf, unsigned int hooknum, struct sk_buff *skb); -- cgit v1.2.3 From 74c51a1497033e6ff7b8096797daca233a4a30df Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:05 +0200 Subject: netfilter: netns nf_conntrack: pass netns pointer to L4 protocol's ->error hook Again, it's deducible from skb, but we're going to use it for nf_conntrack_checksum and statistics, so just pass it from upper layer. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index d4376e97bae8..97723d33c950 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -50,7 +50,7 @@ struct nf_conntrack_l4proto /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); - int (*error)(struct sk_buff *skb, unsigned int dataoff, + int (*error)(struct net *net, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum); -- cgit v1.2.3 From a71996fccce4b2086a26036aa3c915365ca36926 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:07 +0200 Subject: netfilter: netns nf_conntrack: pass conntrack to nf_conntrack_event_cache() not skb This is cleaner, we already know conntrack to which event is relevant. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_ecache.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index f0b9078235c9..c1b406cecf9b 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -28,10 +28,8 @@ extern void __nf_ct_event_cache_init(struct nf_conn *ct); extern void nf_ct_event_cache_flush(void); static inline void -nf_conntrack_event_cache(enum ip_conntrack_events event, - const struct sk_buff *skb) +nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) { - struct nf_conn *ct = (struct nf_conn *)skb->nfct; struct nf_conntrack_ecache *ecache; local_bh_disable(); -- cgit v1.2.3 From 6058fa6bb96a5b6145cba10c5171f09c2783ca69 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:07 +0200 Subject: netfilter: netns nf_conntrack: per-netns event cache Heh, last minute proof-reading of this patch made me think, that this is actually unneeded, simply because "ct" pointers will be different for different conntracks in different netns, just like they are different in one netns. Not so sure anymore. [Patrick: pointers will be different, flushing can only be done while inactive though and thus it needs to be per netns] Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_ecache.h | 22 ++++++++++++++++------ include/net/netns/conntrack.h | 5 +++++ 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index c1b406cecf9b..35f814c1e2ca 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -8,6 +8,7 @@ #include #include +#include #include #ifdef CONFIG_NF_CONNTRACK_EVENTS @@ -15,9 +16,6 @@ struct nf_conntrack_ecache { struct nf_conn *ct; unsigned int events; }; -DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); - -#define CONNTRACK_ECACHE(x) (__get_cpu_var(nf_conntrack_ecache).x) extern struct atomic_notifier_head nf_conntrack_chain; extern int nf_conntrack_register_notifier(struct notifier_block *nb); @@ -25,15 +23,16 @@ extern int nf_conntrack_unregister_notifier(struct notifier_block *nb); extern void nf_ct_deliver_cached_events(const struct nf_conn *ct); extern void __nf_ct_event_cache_init(struct nf_conn *ct); -extern void nf_ct_event_cache_flush(void); +extern void nf_ct_event_cache_flush(struct net *net); static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache; local_bh_disable(); - ecache = &__get_cpu_var(nf_conntrack_ecache); + ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); if (ct != ecache->ct) __nf_ct_event_cache_init(ct); ecache->events |= event; @@ -58,6 +57,9 @@ nf_ct_expect_event(enum ip_conntrack_expect_events event, atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp); } +extern int nf_conntrack_ecache_init(struct net *net); +extern void nf_conntrack_ecache_fini(struct net *net); + #else /* CONFIG_NF_CONNTRACK_EVENTS */ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, @@ -67,7 +69,15 @@ static inline void nf_conntrack_event(enum ip_conntrack_events event, static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp) {} -static inline void nf_ct_event_cache_flush(void) {} +static inline void nf_ct_event_cache_flush(struct net *net) {} + +static inline int nf_conntrack_ecache_init(struct net *net) +{ + return 0; + +static inline void nf_conntrack_ecache_fini(struct net *net) +{ +} #endif /* CONFIG_NF_CONNTRACK_EVENTS */ #endif /*_NF_CONNTRACK_ECACHE_H*/ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 6ddf58e142a9..9d5c1623c51f 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -4,12 +4,17 @@ #include #include +struct nf_conntrack_ecache; + struct netns_ct { atomic_t count; unsigned int expect_count; struct hlist_head *hash; struct hlist_head *expect_hash; struct hlist_head unconfirmed; +#ifdef CONFIG_NF_CONNTRACK_EVENTS + struct nf_conntrack_ecache *ecache; +#endif int hash_vmalloc; int expect_vmalloc; }; -- cgit v1.2.3 From 0d55af8791bfb42e04cc456b348910582f230343 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:07 +0200 Subject: netfilter: netns nf_conntrack: per-netns statistics Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 8 ++++---- include/net/netns/conntrack.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f5447f143047..c95561050f78 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -290,12 +290,12 @@ extern unsigned int nf_conntrack_htable_size; extern int nf_conntrack_checksum; extern int nf_conntrack_max; -DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); -#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++) -#define NF_CT_STAT_INC_ATOMIC(count) \ +#define NF_CT_STAT_INC(net, count) \ + (per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++) +#define NF_CT_STAT_INC_ATOMIC(net, count) \ do { \ local_bh_disable(); \ - __get_cpu_var(nf_conntrack_stat).count++; \ + per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++; \ local_bh_enable(); \ } while (0) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 9d5c1623c51f..fc0a46d64cc9 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -12,6 +12,7 @@ struct netns_ct { struct hlist_head *hash; struct hlist_head *expect_hash; struct hlist_head unconfirmed; + struct ip_conntrack_stat *stat; #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; #endif -- cgit v1.2.3 From 802507071b72ed5025747126099cbc6d1542f596 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:08 +0200 Subject: netfilter: netns nf_conntrack: per-netns net.netfilter.nf_conntrack_count sysctl Note, sysctl table is always duplicated, this is simpler and less special-cased. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/conntrack.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index fc0a46d64cc9..2b50758df6a1 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -4,6 +4,7 @@ #include #include +struct ctl_table_header; struct nf_conntrack_ecache; struct netns_ct { @@ -15,6 +16,9 @@ struct netns_ct { struct ip_conntrack_stat *stat; #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; +#endif +#ifdef CONFIG_SYSCTL + struct ctl_table_header *sysctl_header; #endif int hash_vmalloc; int expect_vmalloc; -- cgit v1.2.3 From c04d05529a6e0bf97183a2caf76a0c7f07f5b78c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:08 +0200 Subject: netfilter: netns nf_conntrack: per-netns net.netfilter.nf_conntrack_checksum sysctl Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 1 - include/net/netns/conntrack.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index c95561050f78..b76a8685b5b5 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -287,7 +287,6 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb) extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; -extern int nf_conntrack_checksum; extern int nf_conntrack_max; #define NF_CT_STAT_INC(net, count) \ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 2b50758df6a1..38b6dae4d3de 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -17,6 +17,7 @@ struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; #endif + int sysctl_checksum; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; #endif -- cgit v1.2.3 From c2a2c7e0cc39e7f9336cd67e8307a110bdba82f3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:08 +0200 Subject: netfilter: netns nf_conntrack: per-netns net.netfilter.nf_conntrack_log_invalid sysctl Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 15 +++++++-------- include/net/netns/conntrack.h | 1 + 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 97723d33c950..7f2f43c77284 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -117,20 +117,19 @@ extern int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t); extern const struct nla_policy nf_ct_port_nla_policy[]; -/* Log invalid packets */ -extern unsigned int nf_ct_log_invalid; - #ifdef CONFIG_SYSCTL #ifdef DEBUG_INVALID_PACKETS -#define LOG_INVALID(proto) \ - (nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) +#define LOG_INVALID(net, proto) \ + ((net)->ct.sysctl_log_invalid == (proto) || \ + (net)->ct.sysctl_log_invalid == IPPROTO_RAW) #else -#define LOG_INVALID(proto) \ - ((nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) \ +#define LOG_INVALID(net, proto) \ + (((net)->ct.sysctl_log_invalid == (proto) || \ + (net)->ct.sysctl_log_invalid == IPPROTO_RAW) \ && net_ratelimit()) #endif #else -#define LOG_INVALID(proto) 0 +#define LOG_INVALID(net, proto) 0 #endif /* CONFIG_SYSCTL */ #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 38b6dae4d3de..503e37551b17 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -18,6 +18,7 @@ struct netns_ct { struct nf_conntrack_ecache *ecache; #endif int sysctl_checksum; + unsigned int sysctl_log_invalid; /* Log invalid packets */ #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; #endif -- cgit v1.2.3 From d716a4dfbbdf0d4731d596a96e5f4b0d892ac168 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:09 +0200 Subject: netfilter: netns nf_conntrack: per-netns conntrack accounting Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_acct.h | 10 +++++----- include/net/netns/conntrack.h | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index 5d5ae55d54c4..03e218f0be43 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -8,6 +8,7 @@ #ifndef _NF_CONNTRACK_ACCT_H #define _NF_CONNTRACK_ACCT_H +#include #include #include #include @@ -18,8 +19,6 @@ struct nf_conn_counter { u_int64_t bytes; }; -extern int nf_ct_acct; - static inline struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct) { @@ -29,9 +28,10 @@ struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct) static inline struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) { + struct net *net = nf_ct_net(ct); struct nf_conn_counter *acct; - if (!nf_ct_acct) + if (!net->ct.sysctl_acct) return NULL; acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp); @@ -45,7 +45,7 @@ struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) extern unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir); -extern int nf_conntrack_acct_init(void); -extern void nf_conntrack_acct_fini(void); +extern int nf_conntrack_acct_init(struct net *net); +extern void nf_conntrack_acct_fini(struct net *net); #endif /* _NF_CONNTRACK_ACCT_H */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 503e37551b17..f4498a62881b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -17,10 +17,12 @@ struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; #endif + int sysctl_acct; int sysctl_checksum; unsigned int sysctl_log_invalid; /* Log invalid packets */ #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; + struct ctl_table_header *acct_sysctl_header; #endif int hash_vmalloc; int expect_vmalloc; -- cgit v1.2.3 From 3bb0d1c00f86b13bb184193a8f0189ddd6f0459f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:10 +0200 Subject: netfilter: netns nf_conntrack: GRE conntracking in netns * make keymap list per-netns * per-netns keymal lock (not strictly necessary) * flush keymap at netns stop and module unload. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_proto_gre.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 535e4219d2bb..2a10efda17fb 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -87,7 +87,7 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); -extern void nf_ct_gre_keymap_flush(void); +extern void nf_ct_gre_keymap_flush(struct net *net); extern void nf_nat_need_gre(void); #endif /* __KERNEL__ */ -- cgit v1.2.3 From e099a173573ce1ba171092aee7bb3c72ea686e59 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:10 +0200 Subject: netfilter: netns nat: per-netns NAT table Same story as with iptable_filter, iptables_raw tables. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/ipv4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a6ed83853dcc..b286b840493c 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -38,6 +38,7 @@ struct netns_ipv4 { struct xt_table *iptable_raw; struct xt_table *arptable_filter; struct xt_table *iptable_security; + struct xt_table *nat_table; #endif int sysctl_icmp_echo_ignore_all; -- cgit v1.2.3 From 0c4c9288ada0e6642d511ef872f10a4781a896ff Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:11 +0200 Subject: netfilter: netns nat: per-netns bysource hash Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/ipv4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b286b840493c..ece1c926b5d1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -39,6 +39,8 @@ struct netns_ipv4 { struct xt_table *arptable_filter; struct xt_table *iptable_security; struct xt_table *nat_table; + struct hlist_head *nat_bysource; + int nat_vmalloced; #endif int sysctl_icmp_echo_ignore_all; -- cgit v1.2.3 From 73e4022f78acdbe420e8c24a7afbd90f4c8f5077 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 8 Oct 2008 11:35:12 +0200 Subject: netfilter: split netfilter IPv4 defragmentation into a separate module Netfilter connection tracking requires all IPv4 packets to be defragmented. Both the socket match and the TPROXY target depend on this functionality, so this patch separates the Netfilter IPv4 defrag hooks into a separate module. Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/ipv4/nf_defrag_ipv4.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/net/netfilter/ipv4/nf_defrag_ipv4.h (limited to 'include') diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h new file mode 100644 index 000000000000..6b00ea38546b --- /dev/null +++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h @@ -0,0 +1,6 @@ +#ifndef _NF_DEFRAG_IPV4_H +#define _NF_DEFRAG_IPV4_H + +extern void nf_defrag_ipv4_enable(void); + +#endif /* _NF_DEFRAG_IPV4_H */ -- cgit v1.2.3 From 9ad2d745a23853927a19789b034d9eb2e62d78ee Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 8 Oct 2008 11:35:12 +0200 Subject: netfilter: iptables tproxy core The iptables tproxy core is a module that contains the common routines used by various tproxy related modules (TPROXY target and socket match) Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tproxy_core.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/net/netfilter/nf_tproxy_core.h (limited to 'include') diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h new file mode 100644 index 000000000000..208b46f4d6d2 --- /dev/null +++ b/include/net/netfilter/nf_tproxy_core.h @@ -0,0 +1,32 @@ +#ifndef _NF_TPROXY_CORE_H +#define _NF_TPROXY_CORE_H + +#include +#include +#include +#include +#include +#include + +/* look up and get a reference to a matching socket */ +extern struct sock * +nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, bool listening); + +static inline void +nf_tproxy_put_sock(struct sock *sk) +{ + /* TIME_WAIT inet sockets have to be handled differently */ + if ((sk->sk_protocol == IPPROTO_TCP) && (sk->sk_state == TCP_TIME_WAIT)) + inet_twsk_put(inet_twsk(sk)); + else + sock_put(sk); +} + +/* assign a socket to the skb -- consumes sk */ +int +nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk); + +#endif -- cgit v1.2.3 From e84392707e10301b93121e1b74e2823db50cdf9e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 8 Oct 2008 11:35:12 +0200 Subject: netfilter: iptables TPROXY target The TPROXY target implements redirection of non-local TCP/UDP traffic to local sockets. Additionally, it's possible to manipulate the packet mark if and only if a socket has been found. (We need this because we cannot use multiple targets in the same iptables rule.) Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_TPROXY.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/linux/netfilter/xt_TPROXY.h (limited to 'include') diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h new file mode 100644 index 000000000000..152e8f97132b --- /dev/null +++ b/include/linux/netfilter/xt_TPROXY.h @@ -0,0 +1,14 @@ +#ifndef _XT_TPROXY_H_target +#define _XT_TPROXY_H_target + +/* TPROXY target is capable of marking the packet to perform + * redirection. We can get rid of that whenever we get support for + * mutliple targets in the same rule. */ +struct xt_tproxy_target_info { + u_int32_t mark_mask; + u_int32_t mark_value; + __be32 laddr; + __be16 lport; +}; + +#endif /* _XT_TPROXY_H_target */ -- cgit v1.2.3 From 18219d3f7d6a5bc43825a41e0763158efbdb80d3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:13 +0200 Subject: netfilter: ebtables: do centralized size checking Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 892f5b7771c7..fd085af8962d 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -215,6 +215,7 @@ struct ebt_match int (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *matchdata, unsigned int datalen); void (*destroy)(void *matchdata, unsigned int datalen); + unsigned int matchsize; struct module *me; }; @@ -229,6 +230,7 @@ struct ebt_watcher int (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *watcherdata, unsigned int datalen); void (*destroy)(void *watcherdata, unsigned int datalen); + unsigned int targetsize; struct module *me; }; @@ -244,6 +246,7 @@ struct ebt_target int (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *targetdata, unsigned int datalen); void (*destroy)(void *targetdata, unsigned int datalen); + unsigned int targetsize; struct module *me; }; -- cgit v1.2.3 From 19eda879a136889110c692dec4c2ab59e0e43cef Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:13 +0200 Subject: netfilter: change return types of check functions for Ebtables extensions Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index fd085af8962d..5f71719b7a27 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -211,8 +211,7 @@ struct ebt_match int (*match)(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *matchdata, unsigned int datalen); - /* 0 == let it in */ - int (*check)(const char *tablename, unsigned int hookmask, + bool (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *matchdata, unsigned int datalen); void (*destroy)(void *matchdata, unsigned int datalen); unsigned int matchsize; @@ -226,8 +225,7 @@ struct ebt_watcher void (*watcher)(const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *watcherdata, unsigned int datalen); - /* 0 == let it in */ - int (*check)(const char *tablename, unsigned int hookmask, + bool (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *watcherdata, unsigned int datalen); void (*destroy)(void *watcherdata, unsigned int datalen); unsigned int targetsize; @@ -242,8 +240,7 @@ struct ebt_target int (*target)(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *targetdata, unsigned int datalen); - /* 0 == let it in */ - int (*check)(const char *tablename, unsigned int hookmask, + bool (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *targetdata, unsigned int datalen); void (*destroy)(void *targetdata, unsigned int datalen); unsigned int targetsize; -- cgit v1.2.3 From 8cc784eec6676b58e7f60419c88179aaa97bf71c Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:13 +0200 Subject: netfilter: change return types of match functions for ebtables extensions Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 5f71719b7a27..f9fda2c442a0 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -207,8 +207,7 @@ struct ebt_match { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; - /* 0 == it matches */ - int (*match)(const struct sk_buff *skb, const struct net_device *in, + bool (*match)(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *matchdata, unsigned int datalen); bool (*check)(const char *tablename, unsigned int hookmask, -- cgit v1.2.3 From 0ac6ab1f7915fc820ca0cf8f597290dbb249edcc Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:13 +0200 Subject: netfilter: Change return types of targets/watchers for Ebtables extensions Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index f9fda2c442a0..097432b94c55 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -221,7 +221,7 @@ struct ebt_watcher { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; - void (*watcher)(const struct sk_buff *skb, unsigned int hooknr, + unsigned int (*watcher)(const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *watcherdata, unsigned int datalen); bool (*check)(const char *tablename, unsigned int hookmask, @@ -235,8 +235,8 @@ struct ebt_target { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; - /* returns one of the standard verdicts */ - int (*target)(struct sk_buff *skb, unsigned int hooknr, + /* returns one of the standard EBT_* verdicts */ + unsigned int (*target)(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *targetdata, unsigned int datalen); bool (*check)(const char *tablename, unsigned int hookmask, -- cgit v1.2.3 From 001a18d369f4813ed792629ff4a9a6ade2a4a031 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:14 +0200 Subject: netfilter: add dummy members to Ebtables code to ease transition to Xtables Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 097432b94c55..82f854bf37e7 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -214,6 +214,8 @@ struct ebt_match const struct ebt_entry *e, void *matchdata, unsigned int datalen); void (*destroy)(void *matchdata, unsigned int datalen); unsigned int matchsize; + u_int8_t revision; + u_int8_t family; struct module *me; }; @@ -228,6 +230,8 @@ struct ebt_watcher const struct ebt_entry *e, void *watcherdata, unsigned int datalen); void (*destroy)(void *watcherdata, unsigned int datalen); unsigned int targetsize; + u_int8_t revision; + u_int8_t family; struct module *me; }; @@ -243,6 +247,8 @@ struct ebt_target const struct ebt_entry *e, void *targetdata, unsigned int datalen); void (*destroy)(void *targetdata, unsigned int datalen); unsigned int targetsize; + u_int8_t revision; + u_int8_t family; struct module *me; }; -- cgit v1.2.3 From 2d06d4a5cc107046508d860a0b47dbc43b829b79 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:15 +0200 Subject: netfilter: change Ebtables function signatures to match Xtables's Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 43 +++++++++++++++++++------------ 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 82f854bf37e7..f20a57da7a25 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -31,6 +31,9 @@ * The 4 lsb are more than enough to store the verdict. */ #define EBT_VERDICT_BITS 0x0000000F +struct xt_match; +struct xt_target; + struct ebt_counter { uint64_t pcnt; @@ -208,11 +211,13 @@ struct ebt_match struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; bool (*match)(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *matchdata, - unsigned int datalen); - bool (*check)(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *matchdata, unsigned int datalen); - void (*destroy)(void *matchdata, unsigned int datalen); + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop); + bool (*checkentry)(const char *table, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask); + void (*destroy)(const struct xt_match *match, void *matchinfo); unsigned int matchsize; u_int8_t revision; u_int8_t family; @@ -223,12 +228,14 @@ struct ebt_watcher { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; - unsigned int (*watcher)(const struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *watcherdata, unsigned int datalen); - bool (*check)(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *watcherdata, unsigned int datalen); - void (*destroy)(void *watcherdata, unsigned int datalen); + unsigned int (*target)(struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + unsigned int hook_num, const struct xt_target *target, + const void *targinfo); + bool (*checkentry)(const char *table, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask); + void (*destroy)(const struct xt_target *target, void *targinfo); unsigned int targetsize; u_int8_t revision; u_int8_t family; @@ -240,12 +247,14 @@ struct ebt_target struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; /* returns one of the standard EBT_* verdicts */ - unsigned int (*target)(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *targetdata, unsigned int datalen); - bool (*check)(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *targetdata, unsigned int datalen); - void (*destroy)(void *targetdata, unsigned int datalen); + unsigned int (*target)(struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + unsigned int hook_num, const struct xt_target *target, + const void *targinfo); + bool (*checkentry)(const char *table, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask); + void (*destroy)(const struct xt_target *target, void *targinfo); unsigned int targetsize; u_int8_t revision; u_int8_t family; -- cgit v1.2.3 From 043ef46c7690bfdbd5b012e15812a14a19ca5604 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:15 +0200 Subject: netfilter: move Ebtables to use Xtables Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index f20a57da7a25..d3f9243b9d9b 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -124,7 +124,7 @@ struct ebt_entry_match { union { char name[EBT_FUNCTION_MAXNAMELEN]; - struct ebt_match *match; + struct xt_match *match; } u; /* size of data */ unsigned int match_size; @@ -135,7 +135,7 @@ struct ebt_entry_watcher { union { char name[EBT_FUNCTION_MAXNAMELEN]; - struct ebt_watcher *watcher; + struct xt_target *watcher; } u; /* size of data */ unsigned int watcher_size; @@ -146,7 +146,7 @@ struct ebt_entry_target { union { char name[EBT_FUNCTION_MAXNAMELEN]; - struct ebt_target *target; + struct xt_target *target; } u; /* size of data */ unsigned int target_size; -- cgit v1.2.3 From 66bff35b722956cc2423f55fcf1b69cefa24ef8b Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:16 +0200 Subject: netfilter: remove unused Ebtables functions Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index d3f9243b9d9b..568a690f6a62 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -302,12 +302,6 @@ struct ebt_table ~(__alignof__(struct ebt_replace)-1)) extern int ebt_register_table(struct ebt_table *table); extern void ebt_unregister_table(struct ebt_table *table); -extern int ebt_register_match(struct ebt_match *match); -extern void ebt_unregister_match(struct ebt_match *match); -extern int ebt_register_watcher(struct ebt_watcher *watcher); -extern void ebt_unregister_watcher(struct ebt_watcher *watcher); -extern int ebt_register_target(struct ebt_target *target); -extern void ebt_unregister_target(struct ebt_target *target); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, struct ebt_table *table); -- cgit v1.2.3 From 367c679007fa4f990eb7ee381326ec59d8148b0e Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:17 +0200 Subject: netfilter: xtables: do centralized checkentry call (1/2) It used to be that {ip,ip6,etc}_tables called extension->checkentry themselves, but this can be moved into the xtables core. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 6989b22716e6..85aa42785a5e 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -330,10 +330,12 @@ extern void xt_unregister_matches(struct xt_match *match, unsigned int n); extern int xt_check_match(const struct xt_match *match, unsigned short family, unsigned int size, const char *table, unsigned int hook, - unsigned short proto, int inv_proto); + unsigned short proto, int inv_proto, + const void *entry, void *matchinfo); extern int xt_check_target(const struct xt_target *target, unsigned short family, unsigned int size, const char *table, unsigned int hook, - unsigned short proto, int inv_proto); + unsigned short proto, int inv_proto, + const void *entry, void *targinfo); extern struct xt_table *xt_register_table(struct net *net, struct xt_table *table, -- cgit v1.2.3 From f7108a20dee44e5bb037f9e48f6a207b42e6ae1c Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:18 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (1/6) The function signatures for Xtables extensions have grown over time. It involves a lot of typing/replication, and also a bit of stack space even if they are not used. Realize an NFWS2008 idea and pack them into structs. The skb remains outside of the struct so gcc can continue to apply its optimizations. This patch does this for match extensions' match functions. A few ambiguities have also been addressed. The "offset" parameter for example has been renamed to "fragoff" (there are so many different offsets already) and "protoff" to "thoff" (there is more than just one protocol here, so clarify). Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 85aa42785a5e..bcd40ec83257 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -173,6 +173,26 @@ struct xt_counters_info #include +/** + * struct xt_match_param - parameters for match extensions' match functions + * + * @in: input netdevice + * @out: output netdevice + * @match: struct xt_match through which this function was invoked + * @matchinfo: per-match data + * @fragoff: packet is a fragment, this is the data offset + * @thoff: position of transport header relative to skb->data + * @hotdrop: drop packet if we had inspection problems + */ +struct xt_match_param { + const struct net_device *in, *out; + const struct xt_match *match; + const void *matchinfo; + int fragoff; + unsigned int thoff; + bool *hotdrop; +}; + struct xt_match { struct list_head list; @@ -185,13 +205,7 @@ struct xt_match non-linear skb, using skb_header_pointer and skb_ip_make_writable. */ bool (*match)(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop); + const struct xt_match_param *); /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ -- cgit v1.2.3 From 9b4fce7a3508a9776534188b6065b206a9608ccf Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:18 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (2/6) This patch does this for match extensions' checkentry functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index bcd40ec83257..763a704ce83f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -193,6 +193,25 @@ struct xt_match_param { bool *hotdrop; }; +/** + * struct xt_mtchk_param - parameters for match extensions' + * checkentry functions + * + * @table: table the rule is tried to be inserted into + * @entryinfo: the family-specific rule data + * (struct ipt_ip, ip6t_ip, ebt_entry) + * @match: struct xt_match through which this function was invoked + * @matchinfo: per-match data + * @hook_mask: via which hooks the new rule is reachable + */ +struct xt_mtchk_param { + const char *table; + const void *entryinfo; + const struct xt_match *match; + void *matchinfo; + unsigned int hook_mask; +}; + struct xt_match { struct list_head list; @@ -208,12 +227,7 @@ struct xt_match const struct xt_match_param *); /* Called when user tries to insert an entry of this type. */ - /* Should return true or false. */ - bool (*checkentry)(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask); + bool (*checkentry)(const struct xt_mtchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_match *match, void *matchinfo); @@ -342,10 +356,8 @@ extern void xt_unregister_match(struct xt_match *target); extern int xt_register_matches(struct xt_match *match, unsigned int n); extern void xt_unregister_matches(struct xt_match *match, unsigned int n); -extern int xt_check_match(const struct xt_match *match, unsigned short family, - unsigned int size, const char *table, unsigned int hook, - unsigned short proto, int inv_proto, - const void *entry, void *matchinfo); +extern int xt_check_match(struct xt_mtchk_param *, u_int8_t family, + unsigned int size, u_int8_t proto, bool inv_proto); extern int xt_check_target(const struct xt_target *target, unsigned short family, unsigned int size, const char *table, unsigned int hook, unsigned short proto, int inv_proto, -- cgit v1.2.3 From 6be3d8598e883fb632edf059ba2f8d1b9f4da138 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (3/6) This patch does this for match extensions' destroy functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 763a704ce83f..c79c88380149 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -212,6 +212,12 @@ struct xt_mtchk_param { unsigned int hook_mask; }; +/* Match destructor parameters */ +struct xt_mtdtor_param { + const struct xt_match *match; + void *matchinfo; +}; + struct xt_match { struct list_head list; @@ -230,7 +236,7 @@ struct xt_match bool (*checkentry)(const struct xt_mtchk_param *); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_match *match, void *matchinfo); + void (*destroy)(const struct xt_mtdtor_param *); /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, void *src); -- cgit v1.2.3 From 7eb3558655aaa87a3e71a0c065dfaddda521fa6d Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (4/6) This patch does this for target extensions' target functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c79c88380149..46d0cb1ad340 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -218,6 +218,22 @@ struct xt_mtdtor_param { void *matchinfo; }; +/** + * struct xt_target_param - parameters for target extensions' target functions + * + * @hooknum: hook through which this target was invoked + * @target: struct xt_target through which this function was invoked + * @targinfo: per-target data + * + * Other fields see above. + */ +struct xt_target_param { + const struct net_device *in, *out; + unsigned int hooknum; + const struct xt_target *target; + const void *targinfo; +}; + struct xt_match { struct list_head list; @@ -269,11 +285,7 @@ struct xt_target must now handle non-linear skbs, using skb_copy_bits and skb_ip_make_writable. */ unsigned int (*target)(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo); + const struct xt_target_param *); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be -- cgit v1.2.3 From af5d6dc200eb0fcc6fbd3df1ab4d8969004cb37f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (5/6) This patch does this for target extensions' checkentry functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 29 ++++++++++++++++++++--------- include/linux/netfilter_bridge/ebtables.h | 4 ++-- 2 files changed, 22 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 46d0cb1ad340..8daeb496ba7a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -234,6 +234,23 @@ struct xt_target_param { const void *targinfo; }; +/** + * struct xt_tgchk_param - parameters for target extensions' + * checkentry functions + * + * @entryinfo: the family-specific rule data + * (struct ipt_entry, ip6t_entry, arpt_entry, ebt_entry) + * + * Other fields see above. + */ +struct xt_tgchk_param { + const char *table; + void *entryinfo; + const struct xt_target *target; + void *targinfo; + unsigned int hook_mask; +}; + struct xt_match { struct list_head list; @@ -291,11 +308,7 @@ struct xt_target hook_mask is a bitmask of hooks from which it can be called. */ /* Should return true or false. */ - bool (*checkentry)(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask); + bool (*checkentry)(const struct xt_tgchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_target *target, void *targinfo); @@ -376,10 +389,8 @@ extern void xt_unregister_matches(struct xt_match *match, unsigned int n); extern int xt_check_match(struct xt_mtchk_param *, u_int8_t family, unsigned int size, u_int8_t proto, bool inv_proto); -extern int xt_check_target(const struct xt_target *target, unsigned short family, - unsigned int size, const char *table, unsigned int hook, - unsigned short proto, int inv_proto, - const void *entry, void *targinfo); +extern int xt_check_target(struct xt_tgchk_param *, u_int8_t family, + unsigned int size, u_int8_t proto, bool inv_proto); extern struct xt_table *xt_register_table(struct net *net, struct xt_table *table, diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 568a690f6a62..d45e29cd1cfb 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -310,9 +310,9 @@ extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, #define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg)) /* True if the hook mask denotes that the rule is in a base chain, * used in the check() functions */ -#define BASE_CHAIN (hookmask & (1 << NF_BR_NUMHOOKS)) +#define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS)) /* Clear the bit in the hook mask that tells if the rule is on a base chain */ -#define CLEAR_BASE_CHAIN_BIT (hookmask &= ~(1 << NF_BR_NUMHOOKS)) +#define CLEAR_BASE_CHAIN_BIT (par->hook_mask &= ~(1 << NF_BR_NUMHOOKS)) /* True if the target is not a standard target */ #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) -- cgit v1.2.3 From a2df1648ba615dd5908e9a1fa7b2f133fa302487 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (6/6) This patch does this for target extensions' destroy functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8daeb496ba7a..e3b3b669a143 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -251,6 +251,12 @@ struct xt_tgchk_param { unsigned int hook_mask; }; +/* Target destructor parameters */ +struct xt_tgdtor_param { + const struct xt_target *target; + void *targinfo; +}; + struct xt_match { struct list_head list; @@ -311,7 +317,7 @@ struct xt_target bool (*checkentry)(const struct xt_tgchk_param *); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_target *target, void *targinfo); + void (*destroy)(const struct xt_tgdtor_param *); /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, void *src); -- cgit v1.2.3 From 916a917dfec18535ff9e2afdafba82e6279eb4f4 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:20 +0200 Subject: netfilter: xtables: provide invoked family value to extensions By passing in the family through which extensions were invoked, a bit of data space can be reclaimed. The "family" member will be added to the parameter structures and the check functions be adjusted. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index e3b3b669a143..be41b609c88f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -183,6 +183,8 @@ struct xt_counters_info * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data * @hotdrop: drop packet if we had inspection problems + * @family: Actual NFPROTO_* through which the function is invoked + * (helpful when match->family == NFPROTO_UNSPEC) */ struct xt_match_param { const struct net_device *in, *out; @@ -191,6 +193,7 @@ struct xt_match_param { int fragoff; unsigned int thoff; bool *hotdrop; + u_int8_t family; }; /** @@ -210,12 +213,14 @@ struct xt_mtchk_param { const struct xt_match *match; void *matchinfo; unsigned int hook_mask; + u_int8_t family; }; /* Match destructor parameters */ struct xt_mtdtor_param { const struct xt_match *match; void *matchinfo; + u_int8_t family; }; /** @@ -232,6 +237,7 @@ struct xt_target_param { unsigned int hooknum; const struct xt_target *target; const void *targinfo; + u_int8_t family; }; /** @@ -249,12 +255,14 @@ struct xt_tgchk_param { const struct xt_target *target; void *targinfo; unsigned int hook_mask; + u_int8_t family; }; /* Target destructor parameters */ struct xt_tgdtor_param { const struct xt_target *target; void *targinfo; + u_int8_t family; }; struct xt_match @@ -393,9 +401,9 @@ extern void xt_unregister_match(struct xt_match *target); extern int xt_register_matches(struct xt_match *match, unsigned int n); extern void xt_unregister_matches(struct xt_match *match, unsigned int n); -extern int xt_check_match(struct xt_mtchk_param *, u_int8_t family, +extern int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); -extern int xt_check_target(struct xt_tgchk_param *, u_int8_t family, +extern int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); extern struct xt_table *xt_register_table(struct net *net, -- cgit v1.2.3 From 3bd653c8455bc7991bae77968702b31c8f5df883 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:54:51 -0700 Subject: netns: add net parameter to IP6_INC_STATS Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index dfa7ae3c5607..26c17988b905 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -129,7 +129,8 @@ extern struct ctl_path net_ipv6_ctl_path[]; /* MIBs */ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); -#define IP6_INC_STATS(idev,field) _DEVINC(ipv6, , idev, field) +#define IP6_INC_STATS(net, idev,field) ({ (void)(net); \ + _DEVINC(ipv6, , idev, field); }) #define IP6_INC_STATS_BH(idev,field) _DEVINC(ipv6, _BH, idev, field) #define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val) -- cgit v1.2.3 From 483a47d2fe794328d29950fe00ce26dd405d9437 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 11:09:27 -0700 Subject: ipv6: added net argument to IP6_INC_STATS_BH Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 26c17988b905..e7732d3a3f92 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -131,7 +131,8 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); #define IP6_INC_STATS(net, idev,field) ({ (void)(net); \ _DEVINC(ipv6, , idev, field); }) -#define IP6_INC_STATS_BH(idev,field) _DEVINC(ipv6, _BH, idev, field) +#define IP6_INC_STATS_BH(net, idev,field) ({ (void)(net); \ + _DEVINC(ipv6, _BH, idev, field); }) #define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val) DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); -- cgit v1.2.3 From 821d57776d4dda47ef5f0c33fdb3c761214b2f9f Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:32:43 -0700 Subject: ipv6: added net argument to IP6_ADD_STATS_BH Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e7732d3a3f92..ac0487bab8b1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -133,7 +133,8 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); _DEVINC(ipv6, , idev, field); }) #define IP6_INC_STATS_BH(net, idev,field) ({ (void)(net); \ _DEVINC(ipv6, _BH, idev, field); }) -#define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val) +#define IP6_ADD_STATS_BH(net, idev,field,val) ({ (void)(net); \ + _DEVADD(ipv6, _BH, idev, field, val); }) DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); -- cgit v1.2.3 From a862f6a6dc89c57dd3a959a1636b59f0c27169c2 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:33:06 -0700 Subject: ipv6: added net argument to ICMP6_INC_STATS Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ac0487bab8b1..744fe7443cb9 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -139,7 +139,8 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); -#define ICMP6_INC_STATS(idev, field) _DEVINC(icmpv6, , idev, field) +#define ICMP6_INC_STATS(net, idev, field) ({ (void)(net); \ + _DEVINC(icmpv6, , idev, field); }) #define ICMP6_INC_STATS_BH(idev, field) _DEVINC(icmpv6, _BH, idev, field) #define ICMP6MSGOUT_INC_STATS(idev, field) \ -- cgit v1.2.3 From e41b5368e029e79d11acb5952bc73284e5026c62 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:33:26 -0700 Subject: ipv6: added net argument to ICMP6_INC_STATS_BH Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 744fe7443cb9..5107cd92a467 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -141,7 +141,8 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); #define ICMP6_INC_STATS(net, idev, field) ({ (void)(net); \ _DEVINC(icmpv6, , idev, field); }) -#define ICMP6_INC_STATS_BH(idev, field) _DEVINC(icmpv6, _BH, idev, field) +#define ICMP6_INC_STATS_BH(net, idev, field) ({ (void)(net); \ + _DEVINC(icmpv6, _BH, idev, field); }) #define ICMP6MSGOUT_INC_STATS(idev, field) \ _DEVINC(icmpv6msg, , idev, field +256) -- cgit v1.2.3 From 5c5d244bd388fe498dd7f5f57cb7770aae40b9ab Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:33:50 -0700 Subject: ipv6: added net argument to ICMP6MSGOUT_INC_STATS Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5107cd92a467..7f5a8dec1ae9 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -144,8 +144,8 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); #define ICMP6_INC_STATS_BH(net, idev, field) ({ (void)(net); \ _DEVINC(icmpv6, _BH, idev, field); }) -#define ICMP6MSGOUT_INC_STATS(idev, field) \ - _DEVINC(icmpv6msg, , idev, field +256) +#define ICMP6MSGOUT_INC_STATS(net, idev, field) ({ (void)(net); \ + _DEVINC(icmpv6msg, , idev, field +256); }) #define ICMP6MSGOUT_INC_STATS_BH(idev, field) \ _DEVINC(icmpv6msg, _BH, idev, field +256) #define ICMP6MSGIN_INC_STATS(idev, field) \ -- cgit v1.2.3 From 5a57d4c7fdac0e227efe8c5739fcbb263d9ae993 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:34:14 -0700 Subject: ipv6: added net argument to ICMP6MSGOUT_INC_STATS_BH Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 7f5a8dec1ae9..4736d8f1f286 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -146,8 +146,8 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); #define ICMP6MSGOUT_INC_STATS(net, idev, field) ({ (void)(net); \ _DEVINC(icmpv6msg, , idev, field +256); }) -#define ICMP6MSGOUT_INC_STATS_BH(idev, field) \ - _DEVINC(icmpv6msg, _BH, idev, field +256) +#define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) ({ (void)(net); \ + _DEVINC(icmpv6msg, _BH, idev, field +256); }) #define ICMP6MSGIN_INC_STATS(idev, field) \ _DEVINC(icmpv6msg, , idev, field) #define ICMP6MSGIN_INC_STATS_BH(idev, field) \ -- cgit v1.2.3 From a712d3e859b78edc44d5664d867626d3022bd18e Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:34:35 -0700 Subject: ipv6: ICMP6MSGIN_INC_STATS is not used Removed. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 4736d8f1f286..01da23c061e3 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -148,8 +148,6 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); _DEVINC(icmpv6msg, , idev, field +256); }) #define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) ({ (void)(net); \ _DEVINC(icmpv6msg, _BH, idev, field +256); }) -#define ICMP6MSGIN_INC_STATS(idev, field) \ - _DEVINC(icmpv6msg, , idev, field) #define ICMP6MSGIN_INC_STATS_BH(idev, field) \ _DEVINC(icmpv6msg, _BH, idev, field) -- cgit v1.2.3 From 55d43808eb26e689dacb95b11f956a3b1a56a5f3 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:34:54 -0700 Subject: ipv6: added net argument to ICMP6MSGIN_INC_STATS_BH Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 01da23c061e3..47a76bfbf763 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -148,8 +148,8 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); _DEVINC(icmpv6msg, , idev, field +256); }) #define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) ({ (void)(net); \ _DEVINC(icmpv6msg, _BH, idev, field +256); }) -#define ICMP6MSGIN_INC_STATS_BH(idev, field) \ - _DEVINC(icmpv6msg, _BH, idev, field) +#define ICMP6MSGIN_INC_STATS_BH(net, idev, field) ({ (void)(net); \ + _DEVINC(icmpv6msg, _BH, idev, field); }) struct ip6_ra_chain { -- cgit v1.2.3 From 087fe24033c4280a15b03cce41eaec844c92f8e5 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:35:11 -0700 Subject: ipv6: added net argument to _DEVINC/_DEVADD Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 47a76bfbf763..d0538dd2c44b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -110,17 +110,19 @@ struct frag_hdr { extern int sysctl_mld_max_msf; extern struct ctl_path net_ipv6_ctl_path[]; -#define _DEVINC(statname, modifier, idev, field) \ +#define _DEVINC(net, statname, modifier, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ + (void)(net); \ if (likely(_idev != NULL)) \ SNMP_INC_STATS##modifier((_idev)->stats.statname, (field)); \ SNMP_INC_STATS##modifier(statname##_statistics, (field)); \ }) -#define _DEVADD(statname, modifier, idev, field, val) \ +#define _DEVADD(net, statname, modifier, idev, field, val) \ ({ \ struct inet6_dev *_idev = (idev); \ + (void)(net); \ if (likely(_idev != NULL)) \ SNMP_ADD_STATS##modifier((_idev)->stats.statname, (field), (val)); \ SNMP_ADD_STATS##modifier(statname##_statistics, (field), (val));\ @@ -129,27 +131,27 @@ extern struct ctl_path net_ipv6_ctl_path[]; /* MIBs */ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); -#define IP6_INC_STATS(net, idev,field) ({ (void)(net); \ - _DEVINC(ipv6, , idev, field); }) -#define IP6_INC_STATS_BH(net, idev,field) ({ (void)(net); \ - _DEVINC(ipv6, _BH, idev, field); }) -#define IP6_ADD_STATS_BH(net, idev,field,val) ({ (void)(net); \ - _DEVADD(ipv6, _BH, idev, field, val); }) +#define IP6_INC_STATS(net, idev,field) \ + _DEVINC(net, ipv6, , idev, field) +#define IP6_INC_STATS_BH(net, idev,field) \ + _DEVINC(net, ipv6, _BH, idev, field) +#define IP6_ADD_STATS_BH(net, idev,field,val) \ + _DEVADD(net, ipv6, _BH, idev, field, val) DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); -#define ICMP6_INC_STATS(net, idev, field) ({ (void)(net); \ - _DEVINC(icmpv6, , idev, field); }) -#define ICMP6_INC_STATS_BH(net, idev, field) ({ (void)(net); \ - _DEVINC(icmpv6, _BH, idev, field); }) - -#define ICMP6MSGOUT_INC_STATS(net, idev, field) ({ (void)(net); \ - _DEVINC(icmpv6msg, , idev, field +256); }) -#define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) ({ (void)(net); \ - _DEVINC(icmpv6msg, _BH, idev, field +256); }) -#define ICMP6MSGIN_INC_STATS_BH(net, idev, field) ({ (void)(net); \ - _DEVINC(icmpv6msg, _BH, idev, field); }) +#define ICMP6_INC_STATS(net, idev, field) \ + _DEVINC(net, icmpv6, , idev, field) +#define ICMP6_INC_STATS_BH(net, idev, field) \ + _DEVINC(net, icmpv6, _BH, idev, field) + +#define ICMP6MSGOUT_INC_STATS(net, idev, field) \ + _DEVINC(net, icmpv6msg, , idev, field +256) +#define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) \ + _DEVINC(net, icmpv6msg, _BH, idev, field +256) +#define ICMP6MSGIN_INC_STATS_BH(net, idev, field) \ + _DEVINC(net, icmpv6msg, _BH, idev, field) struct ip6_ra_chain { -- cgit v1.2.3 From 9261e53701121f83eb9482347d68833e95315362 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:36:03 -0700 Subject: ipv6: making ip and icmp statistics per/namespace Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 10 ++-------- include/net/netns/mib.h | 3 +++ 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d0538dd2c44b..6d5b58a1c743 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -113,23 +113,20 @@ extern struct ctl_path net_ipv6_ctl_path[]; #define _DEVINC(net, statname, modifier, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ - (void)(net); \ if (likely(_idev != NULL)) \ SNMP_INC_STATS##modifier((_idev)->stats.statname, (field)); \ - SNMP_INC_STATS##modifier(statname##_statistics, (field)); \ + SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\ }) #define _DEVADD(net, statname, modifier, idev, field, val) \ ({ \ struct inet6_dev *_idev = (idev); \ - (void)(net); \ if (likely(_idev != NULL)) \ SNMP_ADD_STATS##modifier((_idev)->stats.statname, (field), (val)); \ - SNMP_ADD_STATS##modifier(statname##_statistics, (field), (val));\ + SNMP_ADD_STATS##modifier((net)->mib.statname##_statistics, (field), (val));\ }) /* MIBs */ -DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); #define IP6_INC_STATS(net, idev,field) \ _DEVINC(net, ipv6, , idev, field) @@ -138,9 +135,6 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); #define IP6_ADD_STATS_BH(net, idev,field,val) \ _DEVADD(net, ipv6, _BH, idev, field, val) -DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); -DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); - #define ICMP6_INC_STATS(net, idev, field) \ _DEVINC(net, icmpv6, , idev, field) #define ICMP6_INC_STATS_BH(net, idev, field) \ diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index 4e58f0519ce8..10cb7c336de5 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -16,6 +16,9 @@ struct netns_mib { struct proc_dir_entry *proc_net_devsnmp6; DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6); DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6); + DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); + DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); + DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); #endif }; -- cgit v1.2.3 From 3c689b7320ae6f20dba6a8b71806a6c6fd604ee8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Oct 2008 14:18:04 -0700 Subject: inet: cleanup of local_port_range I noticed sysctl_local_port_range[] and its associated seqlock sysctl_local_port_range_lock were on separate cache lines. Moreover, sysctl_local_port_range[] was close to unrelated variables, highly modified, leading to cache misses. Moving these two variables in a structure can help data locality and moving this structure to read_mostly section helps sharing of this data among cpus. Cleanup of extern declarations (moved in include file where they belong), and use of inet_get_local_port_range() accessor instead of direct access to ports values. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index d678ea3d474a..1cbccaf0de3f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -178,6 +178,10 @@ extern unsigned long snmp_fold_field(void *mib[], int offt); extern int snmp_mib_init(void *ptr[2], size_t mibsize); extern void snmp_mib_free(void *ptr[2]); +extern struct local_ports { + seqlock_t lock; + int range[2]; +} sysctl_local_ports; extern void inet_get_local_port_range(int *low, int *high); extern int sysctl_ip_default_ttl; -- cgit v1.2.3 From 8e1ee18c332e08bee9d8bd66e63cd564fbf17fc2 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 8 Oct 2008 14:18:39 -0700 Subject: sctp: Rework the tsn map to use generic bitmap. The tsn map currently use is 4K large and is stuck inside the sctp_association structure making memory references REALLY expensive. What we really need is at most 4K worth of bits so the biggest map we would have is 512 bytes. Also, the map is only really usefull when we have gaps to store and report. As such, starting with minimal map of say 32 TSNs (bits) should be enough for normal low-loss operations. We can grow the map by some multiple of 32 along with some extra room any time we receive the TSN which would put us outside of the map boundry. As we close gaps, we can shift the map to rebase it on the latest TSN we've seen. This saves 4088 bytes per association just in the map alone along savings from the now unnecessary structure members. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 +++- include/net/sctp/structs.h | 1 - include/net/sctp/tsnmap.h | 39 +++++++++------------------------------ 3 files changed, 12 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index c32ddf0279c8..b05b0557211f 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -261,7 +261,9 @@ enum { SCTP_ARBITRARY_COOKIE_ECHO_LEN = 200 }; * must be less than 65535 (2^16 - 1), or we will have overflow * problems creating SACK's. */ -#define SCTP_TSN_MAP_SIZE 2048 +#define SCTP_TSN_MAP_INITIAL BITS_PER_LONG +#define SCTP_TSN_MAP_INCREMENT SCTP_TSN_MAP_INITIAL +#define SCTP_TSN_MAP_SIZE 4096 #define SCTP_TSN_MAX_GAP 65535 /* We will not record more than this many duplicate TSNs between two diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 94c62e4ddea9..9661d7b765f0 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1545,7 +1545,6 @@ struct sctp_association { * in tsn_map--we get it by calling sctp_tsnmap_get_ctsn(). */ struct sctp_tsnmap tsn_map; - __u8 _map[sctp_tsnmap_storage_size(SCTP_TSN_MAP_SIZE)]; /* Ack State : This flag indicates if the next received * : packet is to be responded to with a diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index 099211bf998d..6dabbee8bbd3 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -60,18 +60,7 @@ struct sctp_tsnmap { * It points at one of the two buffers with which we will * ping-pong between. */ - __u8 *tsn_map; - - /* This marks the tsn which overflows the tsn_map, when the - * cumulative ack point reaches this point we know we can switch - * maps (tsn_map and overflow_map swap). - */ - __u32 overflow_tsn; - - /* This is the overflow array for tsn_map. - * It points at one of the other ping-pong buffers. - */ - __u8 *overflow_map; + unsigned long *tsn_map; /* This is the TSN at tsn_map[0]. */ __u32 base_tsn; @@ -89,15 +78,15 @@ struct sctp_tsnmap { */ __u32 cumulative_tsn_ack_point; + /* This is the highest TSN we've marked. */ + __u32 max_tsn_seen; + /* This is the minimum number of TSNs we can track. This corresponds * to the size of tsn_map. Note: the overflow_map allows us to * potentially track more than this quantity. */ __u16 len; - /* This is the highest TSN we've marked. */ - __u32 max_tsn_seen; - /* Data chunks pending receipt. used by SCTP_STATUS sockopt */ __u16 pending_data; @@ -110,24 +99,17 @@ struct sctp_tsnmap { /* Record gap ack block information here. */ struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; - - int malloced; - - __u8 raw_map[0]; }; struct sctp_tsnmap_iter { __u32 start; }; -/* This macro assists in creation of external storage for variable length - * internal buffers. We double allocate so the overflow map works. - */ -#define sctp_tsnmap_storage_size(count) (sizeof(__u8) * (count) * 2) - /* Initialize a block of memory as a tsnmap. */ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *, __u16 len, - __u32 initial_tsn); + __u32 initial_tsn, gfp_t gfp); + +void sctp_tsnmap_free(struct sctp_tsnmap *map); /* Test the tracking state of this TSN. * Returns: @@ -138,7 +120,7 @@ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *, __u16 len, int sctp_tsnmap_check(const struct sctp_tsnmap *, __u32 tsn); /* Mark this TSN as seen. */ -void sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn); +int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn); /* Mark this TSN and all lower as seen. */ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn); @@ -183,10 +165,7 @@ static inline struct sctp_gap_ack_block *sctp_tsnmap_get_gabs(struct sctp_tsnmap /* Is there a gap in the TSN map? */ static inline int sctp_tsnmap_has_gap(const struct sctp_tsnmap *map) { - int has_gap; - - has_gap = (map->cumulative_tsn_ack_point != map->max_tsn_seen); - return has_gap; + return (map->cumulative_tsn_ack_point != map->max_tsn_seen); } /* Mark a duplicate TSN. Note: limit the storage of duplicate TSN -- cgit v1.2.3 From 02015180e2509afd2e3fe3790a333b30708a116b Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 8 Oct 2008 14:19:01 -0700 Subject: sctp: shrink sctp_tsnmap some more by removing gabs array The gabs array in the sctp_tsnmap structure is only used in one place, sctp_make_sack(). As such, carrying the array around in the sctp_tsnmap and thus directly in the sctp_association is rather pointless since most of the time it's just taking up space. Now, let sctp_make_sack create and populate it and then throw it away when it's done. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/tsnmap.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index 6dabbee8bbd3..4aabc5a96cf6 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -94,11 +94,8 @@ struct sctp_tsnmap { * every SACK. Store up to SCTP_MAX_DUP_TSNS worth of * information. */ - __be32 dup_tsns[SCTP_MAX_DUP_TSNS]; __u16 num_dup_tsns; - - /* Record gap ack block information here. */ - struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; + __be32 dup_tsns[SCTP_MAX_DUP_TSNS]; }; struct sctp_tsnmap_iter { @@ -151,17 +148,12 @@ static inline __be32 *sctp_tsnmap_get_dups(struct sctp_tsnmap *map) } /* How many gap ack blocks do we have recorded? */ -__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map); +__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map, + struct sctp_gap_ack_block *gabs); /* Refresh the count on pending data. */ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map); -/* Return pointer to gap ack blocks as needed by SACK. */ -static inline struct sctp_gap_ack_block *sctp_tsnmap_get_gabs(struct sctp_tsnmap *map) -{ - return map->gabs; -} - /* Is there a gap in the TSN map? */ static inline int sctp_tsnmap_has_gap(const struct sctp_tsnmap *map) { -- cgit v1.2.3 From 18ee49ddb0d242ed1d0e273038d5e4f6de7379d3 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 1 Oct 2008 15:41:33 +0000 Subject: phylib: rename mii_bus::dev to mii_bus::parent In preparation of giving mii_bus objects a device tree presence of their own, rename struct mii_bus's ->dev argument to ->parent, since having a 'struct device *dev' that points to our parent device conflicts with introducing a 'struct device dev' representing our own device. Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller Acked-by: Andy Fleming --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 5f170f5b1a30..87499bdedc6f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -99,7 +99,7 @@ struct mii_bus { */ struct mutex mdio_lock; - struct device *dev; + struct device *parent; /* list of all PHYs on bus */ struct phy_device *phy_map[PHY_MAX_ADDR]; -- cgit v1.2.3 From 298cf9beb9679522de995e249eccbd82f7c51999 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 8 Oct 2008 16:29:57 -0700 Subject: phylib: move to dynamic allocation of struct mii_bus This patch introduces mdiobus_alloc() and mdiobus_free(), and makes all mdio bus drivers use these functions to allocate their struct mii_bus'es dynamically. Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller Acked-by: Andy Fleming --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 87499bdedc6f..ca4a83c4c537 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -408,8 +408,10 @@ void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); +struct mii_bus *mdiobus_alloc(void); int mdiobus_register(struct mii_bus *bus); void mdiobus_unregister(struct mii_bus *bus); +void mdiobus_free(struct mii_bus *bus); struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); void phy_sanitize_settings(struct phy_device *phydev); -- cgit v1.2.3 From 46abc02175b3c246dd5141d878f565a8725060c9 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 8 Oct 2008 16:33:40 -0700 Subject: phylib: give mdio buses a device tree presence Introduce the mdio_bus class, and give each 'struct mii_bus' its own 'struct device', so that mii_bus objects are represented in the device tree and can be found by querying the device tree. Signed-off-by: Lennert Buytenhek Acked-by: Andy Fleming Signed-off-by: David S. Miller --- include/linux/phy.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index ca4a83c4c537..891f27f9137e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -100,6 +100,13 @@ struct mii_bus { struct mutex mdio_lock; struct device *parent; + enum { + MDIOBUS_ALLOCATED = 1, + MDIOBUS_REGISTERED, + MDIOBUS_UNREGISTERED, + MDIOBUS_RELEASED, + } state; + struct device dev; /* list of all PHYs on bus */ struct phy_device *phy_map[PHY_MAX_ADDR]; @@ -113,6 +120,7 @@ struct mii_bus { */ int *irq; }; +#define to_mii_bus(d) container_of(d, struct mii_bus, dev) #define PHY_INTERRUPT_DISABLED 0x0 #define PHY_INTERRUPT_ENABLED 0x80000000 -- cgit v1.2.3 From 2e888103295f47b8fcbf7e9bb8c5da97dd2ecd76 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Mon, 29 Sep 2008 17:12:35 +0000 Subject: phylib: add mdiobus_{read,write} Add mdiobus_{read,write} routines to allow direct reading/writing of registers on an mii bus without having to go through the PHY abstraction, and make phy_{read,write} use these primitives. Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- include/linux/phy.h | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 891f27f9137e..77c4ed60b982 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -122,6 +122,15 @@ struct mii_bus { }; #define to_mii_bus(d) container_of(d, struct mii_bus, dev) +struct mii_bus *mdiobus_alloc(void); +int mdiobus_register(struct mii_bus *bus); +void mdiobus_unregister(struct mii_bus *bus); +void mdiobus_free(struct mii_bus *bus); +struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); +int mdiobus_read(struct mii_bus *bus, int addr, u16 regnum); +int mdiobus_write(struct mii_bus *bus, int addr, u16 regnum, u16 val); + + #define PHY_INTERRUPT_DISABLED 0x0 #define PHY_INTERRUPT_ENABLED 0x80000000 @@ -399,8 +408,35 @@ struct phy_fixup { int (*run)(struct phy_device *phydev); }; -int phy_read(struct phy_device *phydev, u16 regnum); -int phy_write(struct phy_device *phydev, u16 regnum, u16 val); +/** + * phy_read - Convenience function for reading a given PHY register + * @phydev: the phy_device struct + * @regnum: register number to read + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +static inline int phy_read(struct phy_device *phydev, u16 regnum) +{ + return mdiobus_read(phydev->bus, phydev->addr, regnum); +} + +/** + * phy_write - Convenience function for writing a given PHY register + * @phydev: the phy_device struct + * @regnum: register number to write + * @val: value to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +static inline int phy_write(struct phy_device *phydev, u16 regnum, u16 val) +{ + return mdiobus_write(phydev->bus, phydev->addr, regnum, val); +} + int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id); struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_clear_interrupt(struct phy_device *phydev); @@ -416,12 +452,6 @@ void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); -struct mii_bus *mdiobus_alloc(void); -int mdiobus_register(struct mii_bus *bus); -void mdiobus_unregister(struct mii_bus *bus); -void mdiobus_free(struct mii_bus *bus); -struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); - void phy_sanitize_settings(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); int phy_enable_interrupts(struct phy_device *phydev); -- cgit v1.2.3 From 91da11f870f00a3322b81c73042291d7f0be5a17 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 7 Oct 2008 13:44:02 +0000 Subject: net: Distributed Switch Architecture protocol support Distributed Switch Architecture is a protocol for managing hardware switch chips. It consists of a set of MII management registers and commands to configure the switch, and an ethernet header format to signal which of the ports of the switch a packet was received from or is intended to be sent to. The switches that this driver supports are typically embedded in access points and routers, and a typical setup with a DSA switch looks something like this: +-----------+ +-----------+ | | RGMII | | | +-------+ +------ 1000baseT MDI ("WAN") | | | 6-port +------ 1000baseT MDI ("LAN1") | CPU | | ethernet +------ 1000baseT MDI ("LAN2") | |MIImgmt| switch +------ 1000baseT MDI ("LAN3") | +-------+ w/5 PHYs +------ 1000baseT MDI ("LAN4") | | | | +-----------+ +-----------+ The switch driver presents each port on the switch as a separate network interface to Linux, polls the switch to maintain software link state of those ports, forwards MII management interface accesses to those network interfaces (e.g. as done by ethtool) to the switch, and exposes the switch's hardware statistics counters via the appropriate Linux kernel interfaces. This initial patch supports the MII management interface register layout of the Marvell 88E6123, 88E6161 and 88E6165 switch chips, and supports the "Ethertype DSA" packet tagging format. (There is no officially registered ethertype for the Ethertype DSA packet format, so we just grab a random one. The ethertype to use is programmed into the switch, and the switch driver uses the value of ETH_P_EDSA for this, so this define can be changed at any time in the future if the one we chose is allocated to another protocol or if Ethertype DSA gets its own officially registered ethertype, and everything will continue to work.) Signed-off-by: Lennert Buytenhek Tested-by: Nicolas Pitre Tested-by: Byron Bradley Tested-by: Tim Ellis Tested-by: Peter van Valderen Tested-by: Dirk Teurlings Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/netdevice.h | 3 +++ include/net/dsa.h | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+) create mode 100644 include/net/dsa.h (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 723a1c5fbc6c..2140aacb6338 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -77,6 +77,7 @@ #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_TIPC 0x88CA /* TIPC */ +#define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ /* * Non DIX types. Won't clash for 1500 types. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9cfd20be8b7f..794eeb4b3462 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -607,6 +607,9 @@ struct net_device /* Protocol specific pointers */ +#ifdef CONFIG_NET_DSA + void *dsa_ptr; /* dsa specific data */ +#endif void *atalk_ptr; /* AppleTalk link */ void *ip_ptr; /* IPv4 specific data */ void *dn_ptr; /* DECnet specific data */ diff --git a/include/net/dsa.h b/include/net/dsa.h new file mode 100644 index 000000000000..dc4784f54520 --- /dev/null +++ b/include/net/dsa.h @@ -0,0 +1,34 @@ +/* + * include/net/dsa.h - Driver for Distributed Switch Architecture switch chips + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_NET_DSA_H +#define __LINUX_NET_DSA_H + +#define DSA_MAX_PORTS 12 + +struct dsa_platform_data { + /* + * Reference to a Linux network interface that connects + * to the switch chip. + */ + struct device *netdev; + + /* + * How to access the switch configuration registers, and + * the names of the switch ports (use "cpu" to designate + * the switch port that the cpu is connected to). + */ + struct device *mii_bus; + int sw_addr; + char *port_names[DSA_MAX_PORTS]; +}; + + +#endif -- cgit v1.2.3 From cf85d08fdf4548ee46657ccfb7f9949a85145db5 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 7 Oct 2008 13:45:02 +0000 Subject: dsa: add support for original DSA tagging format Most of the DSA switches currently in the field do not support the Ethertype DSA tagging format that one of the previous patches added support for, but only the original DSA tagging format. The original DSA tagging format carries the same information as the Ethertype DSA tagging format, but with the difference that it does not have an ethertype field. In other words, when receiving a packet that is tagged with an original DSA tag, there is no way of telling in eth_type_trans() that this packet is in fact a DSA-tagged packet. This patch adds a hook into eth_type_trans() which is only compiled in if support for a switch chip that doesn't support Ethertype DSA is selected, and which checks whether there is a DSA switch driver instance attached to this network device which uses the old tag format. If so, it sets the protocol field to ETH_P_DSA without looking at the packet, so that the packet ends up in the right place. Signed-off-by: Lennert Buytenhek Tested-by: Nicolas Pitre Tested-by: Peter van Valderen Tested-by: Dirk Teurlings Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/netdevice.h | 11 +++++++++++ include/net/dsa.h | 2 ++ 3 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 2140aacb6338..32b9dcda68c7 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -101,6 +101,7 @@ #define ETH_P_ECONET 0x0018 /* Acorn Econet */ #define ETH_P_HDLC 0x0019 /* HDLC frames */ #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ +#define ETH_P_DSA 0x001B /* Distributed Switch Arch. */ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 794eeb4b3462..97f0c64c152a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -42,6 +42,7 @@ #include #include +#include struct vlan_group; struct ethtool_ops; @@ -801,6 +802,16 @@ void dev_net_set(struct net_device *dev, struct net *net) #endif } +static inline bool netdev_uses_dsa_tags(struct net_device *dev) +{ +#ifdef CONFIG_NET_DSA_TAG_DSA + if (dev->dsa_ptr != NULL) + return dsa_uses_dsa_tags(dev->dsa_ptr); +#endif + + return 0; +} + /** * netdev_priv - access network device private data * @dev: network device diff --git a/include/net/dsa.h b/include/net/dsa.h index dc4784f54520..72e509b6a12e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -30,5 +30,7 @@ struct dsa_platform_data { char *port_names[DSA_MAX_PORTS]; }; +extern bool dsa_uses_dsa_tags(void *dsa_ptr); + #endif -- cgit v1.2.3 From 396138f03f4521c55ecc3a5dd75d4c56e6323244 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 7 Oct 2008 13:46:07 +0000 Subject: dsa: add support for Trailer tagging format This adds support for the Trailer switch tagging format. This is another tagging that doesn't explicitly mark tagged packets with a distinct ethertype, so that we need to add a similar hack in the receive path as for the Original DSA tagging format. Signed-off-by: Lennert Buytenhek Tested-by: Byron Bradley Tested-by: Tim Ellis Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/netdevice.h | 10 ++++++++++ include/net/dsa.h | 1 + 3 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 32b9dcda68c7..a0099e98b5c4 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -102,6 +102,7 @@ #define ETH_P_HDLC 0x0019 /* HDLC frames */ #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ #define ETH_P_DSA 0x001B /* Distributed Switch Arch. */ +#define ETH_P_TRAILER 0x001C /* Trailer switch tagging */ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97f0c64c152a..d3ea3de70a8a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -812,6 +812,16 @@ static inline bool netdev_uses_dsa_tags(struct net_device *dev) return 0; } +static inline bool netdev_uses_trailer_tags(struct net_device *dev) +{ +#ifdef CONFIG_NET_DSA_TAG_TRAILER + if (dev->dsa_ptr != NULL) + return dsa_uses_trailer_tags(dev->dsa_ptr); +#endif + + return 0; +} + /** * netdev_priv - access network device private data * @dev: network device diff --git a/include/net/dsa.h b/include/net/dsa.h index 72e509b6a12e..52e97bfca5a1 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -31,6 +31,7 @@ struct dsa_platform_data { }; extern bool dsa_uses_dsa_tags(void *dsa_ptr); +extern bool dsa_uses_trailer_tags(void *dsa_ptr); #endif -- cgit v1.2.3 From c19e654ddbe3831252f61e76a74d661e1a755530 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 9 Oct 2008 11:59:55 -0700 Subject: gre: Add netlink interface This patch adds a netlink interface that will eventually displace the existing ioctl interface. It utilises the elegant rtnl_link_ops mechanism. This also means that user-space no longer needs to rely on the tunnel interface being of type GRE to identify GRE tunnels. The identification can now occur using rtnl_link_ops. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/if_tunnel.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index d4efe4014705..aeab2cb32a9c 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -2,6 +2,7 @@ #define _IF_TUNNEL_H_ #include +#include #define SIOCGETTUNNEL (SIOCDEVPRIVATE + 0) #define SIOCADDTUNNEL (SIOCDEVPRIVATE + 1) @@ -47,4 +48,22 @@ struct ip_tunnel_prl { /* PRL flags */ #define PRL_DEFAULT 0x0001 +enum +{ + IFLA_GRE_UNSPEC, + IFLA_GRE_LINK, + IFLA_GRE_IFLAGS, + IFLA_GRE_OFLAGS, + IFLA_GRE_IKEY, + IFLA_GRE_OKEY, + IFLA_GRE_LOCAL, + IFLA_GRE_REMOTE, + IFLA_GRE_TTL, + IFLA_GRE_TOS, + IFLA_GRE_PMTUDISC, + __IFLA_GRE_MAX, +}; + +#define IFLA_GRE_MAX (__IFLA_GRE_MAX - 1) + #endif /* _IF_TUNNEL_H_ */ -- cgit v1.2.3 From e1a8000228e16212c93b23cfbed4d622e2ec7a6b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 9 Oct 2008 12:00:17 -0700 Subject: gre: Add Transparent Ethernet Bridging This patch adds support for Ethernet over GRE encapsulation. This is exposed to user-space with a new link type of "gretap" instead of "gre". It will create an ARPHRD_ETHER device in lieu of the usual ARPHRD_IPGRE. Note that to preserver backwards compatibility all Transparent Ethernet Bridging packets are passed to an ARPHRD_IPGRE tunnel if its key matches and there is no ARPHRD_ETHER device whose key matches more closely. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index a0099e98b5c4..bf1a53b2682e 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -56,6 +56,7 @@ #define ETH_P_DIAG 0x6005 /* DEC Diagnostics */ #define ETH_P_CUST 0x6006 /* DEC Customer use */ #define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */ +#define ETH_P_TEB 0x6558 /* Trans Ether Bridging */ #define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */ #define ETH_P_ATALK 0x809B /* Appletalk DDP */ #define ETH_P_AARP 0x80F3 /* Appletalk AARP */ -- cgit v1.2.3 From 64194c31a0b6f5d84703b772113aafc400eeaad6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 9 Oct 2008 12:03:17 -0700 Subject: inet: Make tunnel RX/TX byte counters more consistent This patch makes the RX/TX byte counters for IPIP, GRE and SIT more consistent. Previously we included the external IP headers on the way out but not when the packet is inbound. The new scheme is to count payload only in both directions. For IPIP and SIT this simply means the exclusion of the external IP header. For GRE this means that we exclude the GRE header as well. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/ipip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ipip.h b/include/net/ipip.h index a85bda64b852..fdf9bd743705 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -37,7 +37,7 @@ struct ip_tunnel_prl_entry #define IPTUNNEL_XMIT() do { \ int err; \ - int pkt_len = skb->len; \ + int pkt_len = skb->len - skb_transport_offset(skb); \ \ skb->ip_summed = CHECKSUM_NONE; \ ip_select_ident(iph, &rt->u.dst, NULL); \ -- cgit v1.2.3 From bb21c95e2d3325fcb53c591686dbbf4068a165bc Mon Sep 17 00:00:00 2001 From: Guo-Fu Tseng Date: Thu, 9 Oct 2008 21:10:36 -0700 Subject: nf_conntrack_ecache.h: Fix missing braces This patch add missing braces of today's net-next-2.6: include/net/netfilter/nf_conntrack_ecache.h Signed-off-by: Guo-Fu Tseng Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_ecache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 35f814c1e2ca..11480e633a9f 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -74,6 +74,7 @@ static inline void nf_ct_event_cache_flush(struct net *net) {} static inline int nf_conntrack_ecache_init(struct net *net) { return 0; +} static inline void nf_conntrack_ecache_fini(struct net *net) { -- cgit v1.2.3