From 0e60ebe04c51807db972d03665651ae6b5c26d7e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 18:17:21 +0100 Subject: netfilter: add __rcu annotations Add some __rcu annotations and use helpers to reduce number of sparse warnings (CONFIG_SPARSE_RCU_POINTER=y) Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 89341c32631a..928a35ec21c7 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -265,7 +265,7 @@ struct nf_afinfo { int route_key_size; }; -extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO]; +extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO]; static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family) { return rcu_dereference(nf_afinfo[family]); @@ -355,9 +355,9 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #endif /*CONFIG_NETFILTER*/ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) -extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); +extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu; extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); -extern void (*nf_ct_destroy)(struct nf_conntrack *); +extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu; #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} #endif -- cgit v1.2.3 From fe5e7a1efb664df0280f10377813d7099fb7eb0f Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Fri, 19 Nov 2010 14:25:12 +0100 Subject: IPVS: Backup, Adding Version 1 receive capability Functionality improvements * flags changed from 16 to 32 bits * fwmark added (32 bits) * timeout in sec. added (32 bits) * pe data added (Variable length) * IPv6 capabilities (3x16 bytes for addr.) * Version and type in every conn msg. ip_vs_process_message() now handles Version 1 messages and will call ip_vs_process_message_v0() for version 0 messages. ip_vs_proc_conn() is common for both version, and handles the update of connection hash. ip_vs_conn_fill_param_sync() - Version 1 messages only ip_vs_conn_fill_param_sync_v0() - Version 0 messages only Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/linux/ip_vs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 5f43a3b2e3ad..4deb3834d62c 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -89,6 +89,14 @@ #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ +#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \ + IP_VS_CONN_F_NOOUTPUT | \ + IP_VS_CONN_F_INACTIVE | \ + IP_VS_CONN_F_SEQ_MASK | \ + IP_VS_CONN_F_NO_CPORT | \ + IP_VS_CONN_F_TEMPLATE \ + ) + /* Flags that are not sent to backup server start from bit 16 */ #define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ -- cgit v1.2.3 From f1c722295e029eace7960fc687efd5afd67dc555 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 15 Dec 2010 22:58:53 +0100 Subject: netfilter: xtables: use guarded types We are supposed to use the kernel's own types in userspace exports. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_CT.h | 10 +++++----- include/linux/netfilter/xt_TCPOPTSTRIP.h | 2 +- include/linux/netfilter/xt_TPROXY.h | 8 ++++---- include/linux/netfilter/xt_cluster.h | 8 ++++---- include/linux/netfilter/xt_quota.h | 6 +++--- include/linux/netfilter/xt_time.h | 14 +++++++------- include/linux/netfilter/xt_u32.h | 16 ++++++++-------- 7 files changed, 32 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h index 1b564106891d..fbf4c5658554 100644 --- a/include/linux/netfilter/xt_CT.h +++ b/include/linux/netfilter/xt_CT.h @@ -4,11 +4,11 @@ #define XT_CT_NOTRACK 0x1 struct xt_ct_target_info { - u_int16_t flags; - u_int16_t zone; - u_int32_t ct_events; - u_int32_t exp_events; - char helper[16]; + __u16 flags; + __u16 zone; + __u32 ct_events; + __u32 exp_events; + char helper[16]; /* Used internally by the kernel */ struct nf_conn *ct __attribute__((aligned(8))); diff --git a/include/linux/netfilter/xt_TCPOPTSTRIP.h b/include/linux/netfilter/xt_TCPOPTSTRIP.h index 2db543214ff5..342ef14b1761 100644 --- a/include/linux/netfilter/xt_TCPOPTSTRIP.h +++ b/include/linux/netfilter/xt_TCPOPTSTRIP.h @@ -7,7 +7,7 @@ (((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0) struct xt_tcpoptstrip_target_info { - u_int32_t strip_bmap[8]; + __u32 strip_bmap[8]; }; #endif /* _XT_TCPOPTSTRIP_H */ diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h index 3f3d69361289..8097e0b4c15e 100644 --- a/include/linux/netfilter/xt_TPROXY.h +++ b/include/linux/netfilter/xt_TPROXY.h @@ -5,15 +5,15 @@ * redirection. We can get rid of that whenever we get support for * mutliple targets in the same rule. */ struct xt_tproxy_target_info { - u_int32_t mark_mask; - u_int32_t mark_value; + __u32 mark_mask; + __u32 mark_value; __be32 laddr; __be16 lport; }; struct xt_tproxy_target_info_v1 { - u_int32_t mark_mask; - u_int32_t mark_value; + __u32 mark_mask; + __u32 mark_value; union nf_inet_addr laddr; __be16 lport; }; diff --git a/include/linux/netfilter/xt_cluster.h b/include/linux/netfilter/xt_cluster.h index 886682656f09..66cfa3c782ac 100644 --- a/include/linux/netfilter/xt_cluster.h +++ b/include/linux/netfilter/xt_cluster.h @@ -6,10 +6,10 @@ enum xt_cluster_flags { }; struct xt_cluster_match_info { - u_int32_t total_nodes; - u_int32_t node_mask; - u_int32_t hash_seed; - u_int32_t flags; + __u32 total_nodes; + __u32 node_mask; + __u32 hash_seed; + __u32 flags; }; #define XT_CLUSTER_NODES_MAX 32 diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h index b0d28c659ab7..8bda65f0bc92 100644 --- a/include/linux/netfilter/xt_quota.h +++ b/include/linux/netfilter/xt_quota.h @@ -9,9 +9,9 @@ enum xt_quota_flags { struct xt_quota_priv; struct xt_quota_info { - u_int32_t flags; - u_int32_t pad; - aligned_u64 quota; + __u32 flags; + __u32 pad; + aligned_u64 quota; /* Used internally by the kernel */ struct xt_quota_priv *master; diff --git a/include/linux/netfilter/xt_time.h b/include/linux/netfilter/xt_time.h index 14b6df412c9f..b8bd4568efdb 100644 --- a/include/linux/netfilter/xt_time.h +++ b/include/linux/netfilter/xt_time.h @@ -2,13 +2,13 @@ #define _XT_TIME_H 1 struct xt_time_info { - u_int32_t date_start; - u_int32_t date_stop; - u_int32_t daytime_start; - u_int32_t daytime_stop; - u_int32_t monthdays_match; - u_int8_t weekdays_match; - u_int8_t flags; + __u32 date_start; + __u32 date_stop; + __u32 daytime_start; + __u32 daytime_stop; + __u32 monthdays_match; + __u8 weekdays_match; + __u8 flags; }; enum { diff --git a/include/linux/netfilter/xt_u32.h b/include/linux/netfilter/xt_u32.h index 9947f56cdbdd..e8c3d8722bae 100644 --- a/include/linux/netfilter/xt_u32.h +++ b/include/linux/netfilter/xt_u32.h @@ -9,13 +9,13 @@ enum xt_u32_ops { }; struct xt_u32_location_element { - u_int32_t number; - u_int8_t nextop; + __u32 number; + __u8 nextop; }; struct xt_u32_value_element { - u_int32_t min; - u_int32_t max; + __u32 min; + __u32 max; }; /* @@ -27,14 +27,14 @@ struct xt_u32_value_element { struct xt_u32_test { struct xt_u32_location_element location[XT_U32_MAXSIZE+1]; struct xt_u32_value_element value[XT_U32_MAXSIZE+1]; - u_int8_t nnums; - u_int8_t nvalues; + __u8 nnums; + __u8 nvalues; }; struct xt_u32 { struct xt_u32_test tests[XT_U32_MAXSIZE+1]; - u_int8_t ntests; - u_int8_t invert; + __u8 ntests; + __u8 invert; }; #endif /* _XT_U32_H */ -- cgit v1.2.3 From ae90bdeaeac6b964b7a1e853a90a19f358a9ac20 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 15 Dec 2010 23:53:41 +0100 Subject: netfilter: fix compilation when conntrack is disabled but tproxy is enabled The IPv6 tproxy patches split IPv6 defragmentation off of conntrack, but failed to update the #ifdef stanzas guarding the defragmentation related fields and code in skbuff and conntrack related code in nf_defrag_ipv6.c. This patch adds the required #ifdefs so that IPv6 tproxy can truly be used without connection tracking. Original report: http://marc.info/?l=linux-netdev&m=129010118516341&w=2 Reported-by: Randy Dunlap Signed-off-by: KOVACS Krisztian Acked-by: Randy Dunlap Signed-off-by: Patrick McHardy --- include/linux/skbuff.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e6ba898de61c..4f2db79a2abb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -255,6 +255,11 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif +#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \ + defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE) +#define NET_SKBUFF_NF_DEFRAG_NEEDED 1 +#endif + /** * struct sk_buff - socket buffer * @next: Next buffer in list @@ -362,6 +367,8 @@ struct sk_buff { void (*destructor)(struct sk_buff *skb); #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack *nfct; +#endif +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED struct sk_buff *nfct_reasm; #endif #ifdef CONFIG_BRIDGE_NETFILTER @@ -2051,6 +2058,8 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct) if (nfct) atomic_inc(&nfct->use); } +#endif +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED static inline void nf_conntrack_get_reasm(struct sk_buff *skb) { if (skb) @@ -2079,6 +2088,8 @@ static inline void nf_reset(struct sk_buff *skb) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb->nfct); skb->nfct = NULL; +#endif +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED nf_conntrack_put_reasm(skb->nfct_reasm); skb->nfct_reasm = NULL; #endif @@ -2095,6 +2106,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) dst->nfct = src->nfct; nf_conntrack_get(src->nfct); dst->nfctinfo = src->nfctinfo; +#endif +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED dst->nfct_reasm = src->nfct_reasm; nf_conntrack_get_reasm(src->nfct_reasm); #endif @@ -2108,6 +2121,8 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(dst->nfct); +#endif +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED nf_conntrack_put_reasm(dst->nfct_reasm); #endif #ifdef CONFIG_BRIDGE_NETFILTER -- cgit v1.2.3 From 5df15196a2bbf16ca4c6a797ec00ff36d0d5c179 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sun, 26 Dec 2010 10:22:22 +0100 Subject: netfilter: xt_comment: drop unneeded unsigned qualifier Since a string is stored, and not something like a MAC address that would rely on (un)signedness, drop the qualifier. Signed-off-by: Jan Engelhardt Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/xt_comment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_comment.h b/include/linux/netfilter/xt_comment.h index eacfedc6b5d0..0ea5e79f5bd7 100644 --- a/include/linux/netfilter/xt_comment.h +++ b/include/linux/netfilter/xt_comment.h @@ -4,7 +4,7 @@ #define XT_MAX_COMMENT_LEN 256 struct xt_comment_info { - unsigned char comment[XT_MAX_COMMENT_LEN]; + char comment[XT_MAX_COMMENT_LEN]; }; #endif /* XT_COMMENT_H */ -- cgit v1.2.3 From b017900aac4a158b9bf7ffdcb8a369a91115b3e4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Dec 2010 09:46:26 +0100 Subject: netfilter: xt_conntrack: support matching on port ranges Add a new revision 3 that contains port ranges for all of origsrc, origdst, replsrc and repldst. The high ports are appended to the original v2 data structure to allow sharing most of the code with v1 and v2. Use of the revision specific port matching function is made dependant on par->match->revision. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/xt_conntrack.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h index 54f47a2f6152..74b904d8f99c 100644 --- a/include/linux/netfilter/xt_conntrack.h +++ b/include/linux/netfilter/xt_conntrack.h @@ -58,4 +58,19 @@ struct xt_conntrack_mtinfo2 { __u16 state_mask, status_mask; }; +struct xt_conntrack_mtinfo3 { + union nf_inet_addr origsrc_addr, origsrc_mask; + union nf_inet_addr origdst_addr, origdst_mask; + union nf_inet_addr replsrc_addr, replsrc_mask; + union nf_inet_addr repldst_addr, repldst_mask; + __u32 expires_min, expires_max; + __u16 l4proto; + __u16 origsrc_port, origdst_port; + __u16 replsrc_port, repldst_port; + __u16 match_flags, invert_flags; + __u16 state_mask, status_mask; + __u16 origsrc_port_high, origdst_port_high; + __u16 replsrc_port_high, repldst_port_high; +}; + #endif /*_XT_CONNTRACK_H*/ -- cgit v1.2.3 From 255d0dc34068a976550ce555e153c0bfcfec7cc6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Dec 2010 18:35:15 +0100 Subject: netfilter: x_table: speedup compat operations One iptables invocation with 135000 rules takes 35 seconds of cpu time on a recent server, using a 32bit distro and a 64bit kernel. We eventually trigger NMI/RCU watchdog. INFO: rcu_sched_state detected stall on CPU 3 (t=6000 jiffies) COMPAT mode has quadratic behavior and consume 16 bytes of memory per rule. Switch the xt_compat algos to use an array instead of list, and use a binary search to locate an offset in the sorted array. This halves memory need (8 bytes per rule), and removes quadratic behavior [ O(N*N) -> O(N*log2(N)) ] Time of iptables goes from 35 s to 150 ms. Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 742bec051440..0f04d985b41c 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -611,8 +611,9 @@ struct _compat_xt_align { extern void xt_compat_lock(u_int8_t af); extern void xt_compat_unlock(u_int8_t af); -extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); +extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta); extern void xt_compat_flush_offsets(u_int8_t af); +extern void xt_compat_init_offsets(u_int8_t af, unsigned int number); extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); extern int xt_compat_match_offset(const struct xt_match *match); -- cgit v1.2.3 From 6faee60a4e82075853a437831768cc9e2e563e4e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Dec 2010 15:57:47 +0100 Subject: netfilter: ebt_ip6: allow matching on ipv6-icmp types/codes To avoid adding a new match revision icmp type/code are stored in the sport/dport area. Signed-off-by: Florian Westphal Reviewed-by: Holger Eitzenberger Reviewed-by: Bart De Schuymer Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebt_ip6.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h index e5de98701519..22af18a3c16b 100644 --- a/include/linux/netfilter_bridge/ebt_ip6.h +++ b/include/linux/netfilter_bridge/ebt_ip6.h @@ -18,8 +18,11 @@ #define EBT_IP6_PROTO 0x08 #define EBT_IP6_SPORT 0x10 #define EBT_IP6_DPORT 0x20 +#define EBT_IP6_ICMP6 0x40 + #define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\ - EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT) + EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT | \ + EBT_IP6_ICMP6) #define EBT_IP6_MATCH "ip6" /* the same values are used for the invflags */ @@ -32,8 +35,14 @@ struct ebt_ip6_info { uint8_t protocol; uint8_t bitmask; uint8_t invflags; - uint16_t sport[2]; - uint16_t dport[2]; + union { + uint16_t sport[2]; + uint8_t icmpv6_type[2]; + }; + union { + uint16_t dport[2]; + uint8_t icmpv6_code[2]; + }; }; #endif -- cgit v1.2.3 From 43f393caec0362abe03c72799d3f342af3973070 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sun, 16 Jan 2011 18:10:28 +0100 Subject: netfilter: audit target to record accepted/dropped packets This patch adds a new netfilter target which creates audit records for packets traversing a certain chain. It can be used to record packets which are rejected administraively as follows: -N AUDIT_DROP -A AUDIT_DROP -j AUDIT --type DROP -A AUDIT_DROP -j DROP a rule which would typically drop or reject a packet would then invoke the new chain to record packets before dropping them. -j AUDIT_DROP The module is protocol independant and works for iptables, ip6tables and ebtables. The following information is logged: - netfilter hook - packet length - incomming/outgoing interface - MAC src/dst/proto for ethernet packets - src/dst/protocol address for IPv4/IPv6 - src/dst port for TCP/UDP/UDPLITE - icmp type/code Cc: Patrick McHardy Cc: Eric Paris Cc: Al Viro Signed-off-by: Thomas Graf Signed-off-by: Patrick McHardy --- include/linux/audit.h | 1 + include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_AUDIT.h | 30 ++++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 include/linux/netfilter/xt_AUDIT.h (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 8b5c0620abf9..ae227dfcf9c6 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -103,6 +103,7 @@ #define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ #define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */ #define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ +#define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 9d40effe7ca7..9f11fbc377e2 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -9,6 +9,7 @@ header-y += nfnetlink_conntrack.h header-y += nfnetlink_log.h header-y += nfnetlink_queue.h header-y += x_tables.h +header-y += xt_AUDIT.h header-y += xt_CHECKSUM.h header-y += xt_CLASSIFY.h header-y += xt_CONNMARK.h diff --git a/include/linux/netfilter/xt_AUDIT.h b/include/linux/netfilter/xt_AUDIT.h new file mode 100644 index 000000000000..38751d2ea52b --- /dev/null +++ b/include/linux/netfilter/xt_AUDIT.h @@ -0,0 +1,30 @@ +/* + * Header file for iptables xt_AUDIT target + * + * (C) 2010-2011 Thomas Graf + * (C) 2010-2011 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _XT_AUDIT_TARGET_H +#define _XT_AUDIT_TARGET_H + +#include + +enum { + XT_AUDIT_TYPE_ACCEPT = 0, + XT_AUDIT_TYPE_DROP, + XT_AUDIT_TYPE_REJECT, + __XT_AUDIT_TYPE_MAX, +}; + +#define XT_AUDIT_TYPE_MAX (__XT_AUDIT_TYPE_MAX - 1) + +struct xt_audit_info { + __u8 type; /* XT_AUDIT_TYPE_* */ +}; + +#endif /* _XT_AUDIT_TARGET_H */ -- cgit v1.2.3 From fbabf31e4d482149b5e2704eb0287cf9117bdcf3 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sun, 16 Jan 2011 18:12:59 +0100 Subject: netfilter: create audit records for x_tables replaces The setsockopt() syscall to replace tables is already recorded in the audit logs. This patch stores additional information such as table name and netfilter protocol. Cc: Patrick McHardy Cc: Eric Paris Cc: Al Viro Signed-off-by: Thomas Graf Signed-off-by: Patrick McHardy --- include/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index ae227dfcf9c6..32b5c62a5042 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -104,6 +104,7 @@ #define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */ #define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ #define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */ +#define AUDIT_NETFILTER_CFG 1325 /* Netfilter chain modifications */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ -- cgit v1.2.3 From 0260c1dccc6a1018f8cf2c4778dffb47fc5d1c4c Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 18 Jan 2011 07:33:09 +0100 Subject: netfilter: xtables: use __uXX guarded types for userspace exports Signed-off-by: Jan Engelhardt --- include/linux/netfilter_bridge/ebt_802_3.h | 24 ++++++++++++------------ include/linux/netfilter_bridge/ebt_among.h | 2 +- include/linux/netfilter_bridge/ebt_arp.h | 4 ++-- include/linux/netfilter_bridge/ebt_ip.h | 12 ++++++------ include/linux/netfilter_bridge/ebt_ip6.h | 16 ++++++++-------- include/linux/netfilter_bridge/ebt_limit.h | 8 ++++---- include/linux/netfilter_bridge/ebt_log.h | 6 +++--- include/linux/netfilter_bridge/ebt_mark_m.h | 4 ++-- include/linux/netfilter_bridge/ebt_nflog.h | 10 +++++----- include/linux/netfilter_bridge/ebt_pkttype.h | 4 ++-- include/linux/netfilter_bridge/ebt_stp.h | 24 ++++++++++++------------ include/linux/netfilter_bridge/ebt_ulog.h | 2 +- include/linux/netfilter_bridge/ebt_vlan.h | 8 ++++---- include/linux/netfilter_ipv4/ipt_CLUSTERIP.h | 14 +++++++------- include/linux/netfilter_ipv4/ipt_ECN.h | 6 +++--- include/linux/netfilter_ipv4/ipt_SAME.h | 6 +++--- include/linux/netfilter_ipv4/ipt_TTL.h | 4 ++-- include/linux/netfilter_ipv4/ipt_addrtype.h | 14 +++++++------- include/linux/netfilter_ipv4/ipt_ah.h | 4 ++-- include/linux/netfilter_ipv4/ipt_ecn.h | 8 ++++---- include/linux/netfilter_ipv4/ipt_ttl.h | 4 ++-- include/linux/netfilter_ipv6/ip6t_HL.h | 4 ++-- include/linux/netfilter_ipv6/ip6t_REJECT.h | 2 +- include/linux/netfilter_ipv6/ip6t_ah.h | 8 ++++---- include/linux/netfilter_ipv6/ip6t_frag.h | 8 ++++---- include/linux/netfilter_ipv6/ip6t_hl.h | 4 ++-- include/linux/netfilter_ipv6/ip6t_ipv6header.h | 6 +++--- include/linux/netfilter_ipv6/ip6t_mh.h | 4 ++-- include/linux/netfilter_ipv6/ip6t_opts.h | 10 +++++----- include/linux/netfilter_ipv6/ip6t_rt.h | 12 ++++++------ 30 files changed, 121 insertions(+), 121 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h index c73ef0b18bdc..c427764f4444 100644 --- a/include/linux/netfilter_bridge/ebt_802_3.h +++ b/include/linux/netfilter_bridge/ebt_802_3.h @@ -24,24 +24,24 @@ /* ui has one byte ctrl, ni has two */ struct hdr_ui { - uint8_t dsap; - uint8_t ssap; - uint8_t ctrl; - uint8_t orig[3]; + __u8 dsap; + __u8 ssap; + __u8 ctrl; + __u8 orig[3]; __be16 type; }; struct hdr_ni { - uint8_t dsap; - uint8_t ssap; + __u8 dsap; + __u8 ssap; __be16 ctrl; - uint8_t orig[3]; + __u8 orig[3]; __be16 type; }; struct ebt_802_3_hdr { - uint8_t daddr[6]; - uint8_t saddr[6]; + __u8 daddr[6]; + __u8 saddr[6]; __be16 len; union { struct hdr_ui ui; @@ -59,10 +59,10 @@ static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb) #endif struct ebt_802_3_info { - uint8_t sap; + __u8 sap; __be16 type; - uint8_t bitmask; - uint8_t invflags; + __u8 bitmask; + __u8 invflags; }; #endif diff --git a/include/linux/netfilter_bridge/ebt_among.h b/include/linux/netfilter_bridge/ebt_among.h index 0009558609a7..686c9619dbc0 100644 --- a/include/linux/netfilter_bridge/ebt_among.h +++ b/include/linux/netfilter_bridge/ebt_among.h @@ -30,7 +30,7 @@ */ struct ebt_mac_wormhash_tuple { - uint32_t cmp[2]; + __u32 cmp[2]; __be32 ip; }; diff --git a/include/linux/netfilter_bridge/ebt_arp.h b/include/linux/netfilter_bridge/ebt_arp.h index cbf4843b6b0f..e62b5af95869 100644 --- a/include/linux/netfilter_bridge/ebt_arp.h +++ b/include/linux/netfilter_bridge/ebt_arp.h @@ -27,8 +27,8 @@ struct ebt_arp_info unsigned char smmsk[ETH_ALEN]; unsigned char dmaddr[ETH_ALEN]; unsigned char dmmsk[ETH_ALEN]; - uint8_t bitmask; - uint8_t invflags; + __u8 bitmask; + __u8 invflags; }; #endif diff --git a/include/linux/netfilter_bridge/ebt_ip.h b/include/linux/netfilter_bridge/ebt_ip.h index 6a708fb92241..d99de58da2c7 100644 --- a/include/linux/netfilter_bridge/ebt_ip.h +++ b/include/linux/netfilter_bridge/ebt_ip.h @@ -31,12 +31,12 @@ struct ebt_ip_info { __be32 daddr; __be32 smsk; __be32 dmsk; - uint8_t tos; - uint8_t protocol; - uint8_t bitmask; - uint8_t invflags; - uint16_t sport[2]; - uint16_t dport[2]; + __u8 tos; + __u8 protocol; + __u8 bitmask; + __u8 invflags; + __u16 sport[2]; + __u16 dport[2]; }; #endif diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h index 22af18a3c16b..998e9d5a6b60 100644 --- a/include/linux/netfilter_bridge/ebt_ip6.h +++ b/include/linux/netfilter_bridge/ebt_ip6.h @@ -31,17 +31,17 @@ struct ebt_ip6_info { struct in6_addr daddr; struct in6_addr smsk; struct in6_addr dmsk; - uint8_t tclass; - uint8_t protocol; - uint8_t bitmask; - uint8_t invflags; + __u8 tclass; + __u8 protocol; + __u8 bitmask; + __u8 invflags; union { - uint16_t sport[2]; - uint8_t icmpv6_type[2]; + __u16 sport[2]; + __u8 icmpv6_type[2]; }; union { - uint16_t dport[2]; - uint8_t icmpv6_code[2]; + __u16 dport[2]; + __u8 icmpv6_code[2]; }; }; diff --git a/include/linux/netfilter_bridge/ebt_limit.h b/include/linux/netfilter_bridge/ebt_limit.h index 4bf76b751676..721d51ffa513 100644 --- a/include/linux/netfilter_bridge/ebt_limit.h +++ b/include/linux/netfilter_bridge/ebt_limit.h @@ -10,13 +10,13 @@ seconds, or one every 59 hours. */ struct ebt_limit_info { - u_int32_t avg; /* Average secs between packets * scale */ - u_int32_t burst; /* Period multiplier for upper limit. */ + __u32 avg; /* Average secs between packets * scale */ + __u32 burst; /* Period multiplier for upper limit. */ /* Used internally by the kernel */ unsigned long prev; - u_int32_t credit; - u_int32_t credit_cap, cost; + __u32 credit; + __u32 credit_cap, cost; }; #endif diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h index cc2cdfb764bc..564beb4946ea 100644 --- a/include/linux/netfilter_bridge/ebt_log.h +++ b/include/linux/netfilter_bridge/ebt_log.h @@ -10,9 +10,9 @@ #define EBT_LOG_WATCHER "log" struct ebt_log_info { - uint8_t loglevel; - uint8_t prefix[EBT_LOG_PREFIX_SIZE]; - uint32_t bitmask; + __u8 loglevel; + __u8 prefix[EBT_LOG_PREFIX_SIZE]; + __u32 bitmask; }; #endif diff --git a/include/linux/netfilter_bridge/ebt_mark_m.h b/include/linux/netfilter_bridge/ebt_mark_m.h index 9ceb10ec0ed6..97b96c4b8db4 100644 --- a/include/linux/netfilter_bridge/ebt_mark_m.h +++ b/include/linux/netfilter_bridge/ebt_mark_m.h @@ -6,8 +6,8 @@ #define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR) struct ebt_mark_m_info { unsigned long mark, mask; - uint8_t invert; - uint8_t bitmask; + __u8 invert; + __u8 bitmask; }; #define EBT_MARK_MATCH "mark_m" diff --git a/include/linux/netfilter_bridge/ebt_nflog.h b/include/linux/netfilter_bridge/ebt_nflog.h index 052817849b83..477315bc3537 100644 --- a/include/linux/netfilter_bridge/ebt_nflog.h +++ b/include/linux/netfilter_bridge/ebt_nflog.h @@ -10,11 +10,11 @@ #define EBT_NFLOG_DEFAULT_THRESHOLD 1 struct ebt_nflog_info { - u_int32_t len; - u_int16_t group; - u_int16_t threshold; - u_int16_t flags; - u_int16_t pad; + __u32 len; + __u16 group; + __u16 threshold; + __u16 flags; + __u16 pad; char prefix[EBT_NFLOG_PREFIX_SIZE]; }; diff --git a/include/linux/netfilter_bridge/ebt_pkttype.h b/include/linux/netfilter_bridge/ebt_pkttype.h index 51a799840931..7c0fb0fdcf14 100644 --- a/include/linux/netfilter_bridge/ebt_pkttype.h +++ b/include/linux/netfilter_bridge/ebt_pkttype.h @@ -2,8 +2,8 @@ #define __LINUX_BRIDGE_EBT_PKTTYPE_H struct ebt_pkttype_info { - uint8_t pkt_type; - uint8_t invert; + __u8 pkt_type; + __u8 invert; }; #define EBT_PKTTYPE_MATCH "pkttype" diff --git a/include/linux/netfilter_bridge/ebt_stp.h b/include/linux/netfilter_bridge/ebt_stp.h index e503a0aa2728..13a0bd49a92a 100644 --- a/include/linux/netfilter_bridge/ebt_stp.h +++ b/include/linux/netfilter_bridge/ebt_stp.h @@ -21,24 +21,24 @@ #define EBT_STP_MATCH "stp" struct ebt_stp_config_info { - uint8_t flags; - uint16_t root_priol, root_priou; + __u8 flags; + __u16 root_priol, root_priou; char root_addr[6], root_addrmsk[6]; - uint32_t root_costl, root_costu; - uint16_t sender_priol, sender_priou; + __u32 root_costl, root_costu; + __u16 sender_priol, sender_priou; char sender_addr[6], sender_addrmsk[6]; - uint16_t portl, portu; - uint16_t msg_agel, msg_ageu; - uint16_t max_agel, max_ageu; - uint16_t hello_timel, hello_timeu; - uint16_t forward_delayl, forward_delayu; + __u16 portl, portu; + __u16 msg_agel, msg_ageu; + __u16 max_agel, max_ageu; + __u16 hello_timel, hello_timeu; + __u16 forward_delayl, forward_delayu; }; struct ebt_stp_info { - uint8_t type; + __u8 type; struct ebt_stp_config_info config; - uint16_t bitmask; - uint16_t invflags; + __u16 bitmask; + __u16 invflags; }; #endif diff --git a/include/linux/netfilter_bridge/ebt_ulog.h b/include/linux/netfilter_bridge/ebt_ulog.h index b677e2671541..de35a51a7e46 100644 --- a/include/linux/netfilter_bridge/ebt_ulog.h +++ b/include/linux/netfilter_bridge/ebt_ulog.h @@ -10,7 +10,7 @@ #define EBT_ULOG_VERSION 1 struct ebt_ulog_info { - uint32_t nlgroup; + __u32 nlgroup; unsigned int cprange; unsigned int qthreshold; char prefix[EBT_ULOG_PREFIX_LEN]; diff --git a/include/linux/netfilter_bridge/ebt_vlan.h b/include/linux/netfilter_bridge/ebt_vlan.h index 1d98be4031e7..48dffc1dad36 100644 --- a/include/linux/netfilter_bridge/ebt_vlan.h +++ b/include/linux/netfilter_bridge/ebt_vlan.h @@ -8,12 +8,12 @@ #define EBT_VLAN_MATCH "vlan" struct ebt_vlan_info { - uint16_t id; /* VLAN ID {1-4095} */ - uint8_t prio; /* VLAN User Priority {0-7} */ + __u16 id; /* VLAN ID {1-4095} */ + __u8 prio; /* VLAN User Priority {0-7} */ __be16 encap; /* VLAN Encapsulated frame code {0-65535} */ - uint8_t bitmask; /* Args bitmask bit 1=1 - ID arg, + __u8 bitmask; /* Args bitmask bit 1=1 - ID arg, bit 2=1 User-Priority arg, bit 3=1 encap*/ - uint8_t invflags; /* Inverse bitmask bit 1=1 - inversed ID arg, + __u8 invflags; /* Inverse bitmask bit 1=1 - inversed ID arg, bit 2=1 - inversed Pirority arg */ }; diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h index e5a3687c8a72..3114f06939ef 100644 --- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h +++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h @@ -17,15 +17,15 @@ struct clusterip_config; struct ipt_clusterip_tgt_info { - u_int32_t flags; + __u32 flags; /* only relevant for new ones */ - u_int8_t clustermac[6]; - u_int16_t num_total_nodes; - u_int16_t num_local_nodes; - u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; - u_int32_t hash_mode; - u_int32_t hash_initval; + __u8 clustermac[6]; + __u16 num_total_nodes; + __u16 num_local_nodes; + __u16 local_nodes[CLUSTERIP_MAX_NODES]; + __u32 hash_mode; + __u32 hash_initval; /* Used internally by the kernel */ struct clusterip_config *config; diff --git a/include/linux/netfilter_ipv4/ipt_ECN.h b/include/linux/netfilter_ipv4/ipt_ECN.h index 7ca45918ab8e..c6e3e01b75e0 100644 --- a/include/linux/netfilter_ipv4/ipt_ECN.h +++ b/include/linux/netfilter_ipv4/ipt_ECN.h @@ -19,11 +19,11 @@ #define IPT_ECN_OP_MASK 0xce struct ipt_ECN_info { - u_int8_t operation; /* bitset of operations */ - u_int8_t ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */ + __u8 operation; /* bitset of operations */ + __u8 ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */ union { struct { - u_int8_t ece:1, cwr:1; /* TCP ECT bits */ + __u8 ece:1, cwr:1; /* TCP ECT bits */ } tcp; } proto; }; diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h index 2529660c5b38..fa0ebeca5d95 100644 --- a/include/linux/netfilter_ipv4/ipt_SAME.h +++ b/include/linux/netfilter_ipv4/ipt_SAME.h @@ -7,9 +7,9 @@ struct ipt_same_info { unsigned char info; - u_int32_t rangesize; - u_int32_t ipnum; - u_int32_t *iparray; + __u32 rangesize; + __u32 ipnum; + __u32 *iparray; /* hangs off end. */ struct nf_nat_range range[IPT_SAME_MAX_RANGE]; diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h index ee6611edc112..f6250e422d5e 100644 --- a/include/linux/netfilter_ipv4/ipt_TTL.h +++ b/include/linux/netfilter_ipv4/ipt_TTL.h @@ -13,8 +13,8 @@ enum { #define IPT_TTL_MAXMODE IPT_TTL_DEC struct ipt_TTL_info { - u_int8_t mode; - u_int8_t ttl; + __u8 mode; + __u8 ttl; }; diff --git a/include/linux/netfilter_ipv4/ipt_addrtype.h b/include/linux/netfilter_ipv4/ipt_addrtype.h index 446de6aef983..f29c3cfcc240 100644 --- a/include/linux/netfilter_ipv4/ipt_addrtype.h +++ b/include/linux/netfilter_ipv4/ipt_addrtype.h @@ -9,17 +9,17 @@ enum { }; struct ipt_addrtype_info_v1 { - u_int16_t source; /* source-type mask */ - u_int16_t dest; /* dest-type mask */ - u_int32_t flags; + __u16 source; /* source-type mask */ + __u16 dest; /* dest-type mask */ + __u32 flags; }; /* revision 0 */ struct ipt_addrtype_info { - u_int16_t source; /* source-type mask */ - u_int16_t dest; /* dest-type mask */ - u_int32_t invert_source; - u_int32_t invert_dest; + __u16 source; /* source-type mask */ + __u16 dest; /* dest-type mask */ + __u32 invert_source; + __u32 invert_dest; }; #endif diff --git a/include/linux/netfilter_ipv4/ipt_ah.h b/include/linux/netfilter_ipv4/ipt_ah.h index 2e555b4d05e3..8fea283ee62a 100644 --- a/include/linux/netfilter_ipv4/ipt_ah.h +++ b/include/linux/netfilter_ipv4/ipt_ah.h @@ -2,8 +2,8 @@ #define _IPT_AH_H struct ipt_ah { - u_int32_t spis[2]; /* Security Parameter Index */ - u_int8_t invflags; /* Inverse flags */ + __u32 spis[2]; /* Security Parameter Index */ + __u8 invflags; /* Inverse flags */ }; diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h index 9945baa4ccd7..78b98aa8784d 100644 --- a/include/linux/netfilter_ipv4/ipt_ecn.h +++ b/include/linux/netfilter_ipv4/ipt_ecn.h @@ -20,12 +20,12 @@ /* match info */ struct ipt_ecn_info { - u_int8_t operation; - u_int8_t invert; - u_int8_t ip_ect; + __u8 operation; + __u8 invert; + __u8 ip_ect; union { struct { - u_int8_t ect; + __u8 ect; } tcp; } proto; }; diff --git a/include/linux/netfilter_ipv4/ipt_ttl.h b/include/linux/netfilter_ipv4/ipt_ttl.h index ee24fd86a3aa..93d9a06689a3 100644 --- a/include/linux/netfilter_ipv4/ipt_ttl.h +++ b/include/linux/netfilter_ipv4/ipt_ttl.h @@ -13,8 +13,8 @@ enum { struct ipt_ttl_info { - u_int8_t mode; - u_int8_t ttl; + __u8 mode; + __u8 ttl; }; diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h index afb7813d45ab..81cdaf0480e3 100644 --- a/include/linux/netfilter_ipv6/ip6t_HL.h +++ b/include/linux/netfilter_ipv6/ip6t_HL.h @@ -14,8 +14,8 @@ enum { #define IP6T_HL_MAXMODE IP6T_HL_DEC struct ip6t_HL_info { - u_int8_t mode; - u_int8_t hop_limit; + __u8 mode; + __u8 hop_limit; }; diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h index 6be6504162bb..b999aa4e5969 100644 --- a/include/linux/netfilter_ipv6/ip6t_REJECT.h +++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h @@ -12,7 +12,7 @@ enum ip6t_reject_with { }; struct ip6t_reject_info { - u_int32_t with; /* reject type */ + __u32 with; /* reject type */ }; #endif /*_IP6T_REJECT_H*/ diff --git a/include/linux/netfilter_ipv6/ip6t_ah.h b/include/linux/netfilter_ipv6/ip6t_ah.h index 17a745cfb2c7..a602c165edd1 100644 --- a/include/linux/netfilter_ipv6/ip6t_ah.h +++ b/include/linux/netfilter_ipv6/ip6t_ah.h @@ -2,10 +2,10 @@ #define _IP6T_AH_H struct ip6t_ah { - u_int32_t spis[2]; /* Security Parameter Index */ - u_int32_t hdrlen; /* Header Length */ - u_int8_t hdrres; /* Test of the Reserved Filed */ - u_int8_t invflags; /* Inverse flags */ + __u32 spis[2]; /* Security Parameter Index */ + __u32 hdrlen; /* Header Length */ + __u8 hdrres; /* Test of the Reserved Filed */ + __u8 invflags; /* Inverse flags */ }; #define IP6T_AH_SPI 0x01 diff --git a/include/linux/netfilter_ipv6/ip6t_frag.h b/include/linux/netfilter_ipv6/ip6t_frag.h index 3724d0850920..538b31ef5e3d 100644 --- a/include/linux/netfilter_ipv6/ip6t_frag.h +++ b/include/linux/netfilter_ipv6/ip6t_frag.h @@ -2,10 +2,10 @@ #define _IP6T_FRAG_H struct ip6t_frag { - u_int32_t ids[2]; /* Security Parameter Index */ - u_int32_t hdrlen; /* Header Length */ - u_int8_t flags; /* */ - u_int8_t invflags; /* Inverse flags */ + __u32 ids[2]; /* Security Parameter Index */ + __u32 hdrlen; /* Header Length */ + __u8 flags; /* */ + __u8 invflags; /* Inverse flags */ }; #define IP6T_FRAG_IDS 0x01 diff --git a/include/linux/netfilter_ipv6/ip6t_hl.h b/include/linux/netfilter_ipv6/ip6t_hl.h index 5ef91b8319a8..c6fddcb971da 100644 --- a/include/linux/netfilter_ipv6/ip6t_hl.h +++ b/include/linux/netfilter_ipv6/ip6t_hl.h @@ -14,8 +14,8 @@ enum { struct ip6t_hl_info { - u_int8_t mode; - u_int8_t hop_limit; + __u8 mode; + __u8 hop_limit; }; diff --git a/include/linux/netfilter_ipv6/ip6t_ipv6header.h b/include/linux/netfilter_ipv6/ip6t_ipv6header.h index 01dfd445596a..73d53bd3ff62 100644 --- a/include/linux/netfilter_ipv6/ip6t_ipv6header.h +++ b/include/linux/netfilter_ipv6/ip6t_ipv6header.h @@ -9,9 +9,9 @@ on whether they contain certain headers */ #define __IPV6HEADER_H struct ip6t_ipv6header_info { - u_int8_t matchflags; - u_int8_t invflags; - u_int8_t modeflag; + __u8 matchflags; + __u8 invflags; + __u8 modeflag; }; #define MASK_HOPOPTS 128 diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/linux/netfilter_ipv6/ip6t_mh.h index 18549bca2d1f..98c8cf685eea 100644 --- a/include/linux/netfilter_ipv6/ip6t_mh.h +++ b/include/linux/netfilter_ipv6/ip6t_mh.h @@ -3,8 +3,8 @@ /* MH matching stuff */ struct ip6t_mh { - u_int8_t types[2]; /* MH type range */ - u_int8_t invflags; /* Inverse flags */ + __u8 types[2]; /* MH type range */ + __u8 invflags; /* Inverse flags */ }; /* Values for "invflags" field in struct ip6t_mh. */ diff --git a/include/linux/netfilter_ipv6/ip6t_opts.h b/include/linux/netfilter_ipv6/ip6t_opts.h index 62d89bcd9f9c..405d309cd741 100644 --- a/include/linux/netfilter_ipv6/ip6t_opts.h +++ b/include/linux/netfilter_ipv6/ip6t_opts.h @@ -4,11 +4,11 @@ #define IP6T_OPTS_OPTSNR 16 struct ip6t_opts { - u_int32_t hdrlen; /* Header Length */ - u_int8_t flags; /* */ - u_int8_t invflags; /* Inverse flags */ - u_int16_t opts[IP6T_OPTS_OPTSNR]; /* opts */ - u_int8_t optsnr; /* Nr of OPts */ + __u32 hdrlen; /* Header Length */ + __u8 flags; /* */ + __u8 invflags; /* Inverse flags */ + __u16 opts[IP6T_OPTS_OPTSNR]; /* opts */ + __u8 optsnr; /* Nr of OPts */ }; #define IP6T_OPTS_LEN 0x01 diff --git a/include/linux/netfilter_ipv6/ip6t_rt.h b/include/linux/netfilter_ipv6/ip6t_rt.h index ab91bfd2cd00..e8dad20acd37 100644 --- a/include/linux/netfilter_ipv6/ip6t_rt.h +++ b/include/linux/netfilter_ipv6/ip6t_rt.h @@ -6,13 +6,13 @@ #define IP6T_RT_HOPS 16 struct ip6t_rt { - u_int32_t rt_type; /* Routing Type */ - u_int32_t segsleft[2]; /* Segments Left */ - u_int32_t hdrlen; /* Header Length */ - u_int8_t flags; /* */ - u_int8_t invflags; /* Inverse flags */ + __u32 rt_type; /* Routing Type */ + __u32 segsleft[2]; /* Segments Left */ + __u32 hdrlen; /* Header Length */ + __u8 flags; /* */ + __u8 invflags; /* Inverse flags */ struct in6_addr addrs[IP6T_RT_HOPS]; /* Hops */ - u_int8_t addrnr; /* Nr of Addresses */ + __u8 addrnr; /* Nr of Addresses */ }; #define IP6T_RT_TYP 0x01 -- cgit v1.2.3 From 0b8ad876275c74e4bfb6ec3150793f3c0ecfcee2 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 18 Jan 2011 11:23:06 +0100 Subject: netfilter: xtables: add missing header files to export list Signed-off-by: Jan Engelhardt --- include/linux/netfilter/Kbuild | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 9f11fbc377e2..fc4e0aa805a2 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -56,6 +56,8 @@ header-y += xt_rateest.h header-y += xt_realm.h header-y += xt_recent.h header-y += xt_sctp.h +header-y += xt_secmark.h +header-y += xt_socket.h header-y += xt_state.h header-y += xt_statistic.h header-y += xt_string.h -- cgit v1.2.3 From f615df76ed862b7d3927ec5f55b805ca19be29d9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 18 Jan 2011 15:52:14 +0100 Subject: netfilter: reduce NF_VERDICT_MASK to 0xff NF_VERDICT_MASK is currently 0xffff. This is because the upper 16 bits are used to store errno (for NF_DROP) or the queue number (NF_QUEUE verdict). As there are up to 0xffff different queues available, there is no more room to store additional flags. At the moment there are only 6 different verdicts, i.e. we can reduce NF_VERDICT_MASK to 0xff to allow storing additional flags in the 0xff00 space. NF_VERDICT_BITS would then be reduced to 8, but because the value is exported to userspace, this might cause breakage; e.g.: e.g. 'queuenr = (1 << NF_VERDICT_BITS) | NF_QUEUE' would now break. Thus, remove NF_VERDICT_BITS usage in the kernel and move the old value to the 'userspace compat' section. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0ab7ca787b22..78b73cc10216 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -24,16 +24,19 @@ #define NF_MAX_VERDICT NF_STOP /* we overload the higher bits for encoding auxiliary data such as the queue - * number. Not nice, but better than additional function arguments. */ -#define NF_VERDICT_MASK 0x0000ffff -#define NF_VERDICT_BITS 16 + * number or errno values. Not nice, but better than additional function + * arguments. */ +#define NF_VERDICT_MASK 0x000000ff + +/* extra verdict flags have mask 0x0000ff00 */ +/* queue number (NF_QUEUE) or errno (NF_DROP) */ #define NF_VERDICT_QMASK 0xffff0000 #define NF_VERDICT_QBITS 16 -#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE) +#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE) -#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP) +#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP) /* only for userspace compatibility */ #ifndef __KERNEL__ @@ -41,6 +44,9 @@ <= 0x2000 is used for protocol-flags. */ #define NFC_UNKNOWN 0x4000 #define NFC_ALTERED 0x8000 + +/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ +#define NF_VERDICT_BITS 16 #endif enum nf_inet_hooks { @@ -72,6 +78,10 @@ union nf_inet_addr { #ifdef __KERNEL__ #ifdef CONFIG_NETFILTER +static inline int NF_DROP_GETERR(int verdict) +{ + return -(verdict >> NF_VERDICT_QBITS); +} static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *a2) -- cgit v1.2.3 From 94b27cc36123069966616670c3653cd6873babe9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 18 Jan 2011 16:08:30 +0100 Subject: netfilter: allow NFQUEUE bypass if no listener is available If an skb is to be NF_QUEUE'd, but no program has opened the queue, the packet is dropped. This adds a v2 target revision of xt_NFQUEUE that allows packets to continue through the ruleset instead. Because the actual queueing happens outside of the target context, the 'bypass' flag has to be communicated back to the netfilter core. Unfortunately the only choice to do this without adding a new function argument is to use the target function return value (i.e. the verdict). In the NF_QUEUE case, the upper 16bit already contain the queue number to use. The previous patch reduced NF_VERDICT_MASK to 0xff, i.e. we now have extra room for a new flag. If a hook issued a NF_QUEUE verdict, then the netfilter core will continue packet processing if the queueing hook returns -ESRCH (== "this queue does not exist") and the new NF_VERDICT_FLAG_QUEUE_BYPASS flag is set in the verdict value. Note: If the queue exists, but userspace does not consume packets fast enough, the skb will still be dropped. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 1 + include/linux/netfilter/xt_NFQUEUE.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 78b73cc10216..eeec00abb664 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -29,6 +29,7 @@ #define NF_VERDICT_MASK 0x000000ff /* extra verdict flags have mask 0x0000ff00 */ +#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000 /* queue number (NF_QUEUE) or errno (NF_DROP) */ #define NF_VERDICT_QMASK 0xffff0000 diff --git a/include/linux/netfilter/xt_NFQUEUE.h b/include/linux/netfilter/xt_NFQUEUE.h index 2584f4a777de..9eafdbbb401c 100644 --- a/include/linux/netfilter/xt_NFQUEUE.h +++ b/include/linux/netfilter/xt_NFQUEUE.h @@ -20,4 +20,10 @@ struct xt_NFQ_info_v1 { __u16 queues_total; }; +struct xt_NFQ_info_v2 { + __u16 queuenum; + __u16 queues_total; + __u16 bypass; +}; + #endif /* _XT_NFQ_TARGET_H */ -- cgit v1.2.3 From 93557f53e1fbd9e2b6574ab0a9b5852628fde9e3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 18 Jan 2011 18:12:24 +0100 Subject: netfilter: nf_conntrack: nf_conntrack snmp helper Adding support for SNMP broadcast connection tracking. The SNMP broadcast requests are now paired with the SNMP responses. Thus allowing using SNMP broadcasts with firewall enabled. Please refer to the following conversation: http://marc.info/?l=netfilter-devel&m=125992205006600&w=2 Patrick McHardy wrote: > > The best solution would be to add generic broadcast tracking, the > > use of expectations for this is a bit of abuse. > > The second best choice I guess would be to move the help() function > > to a shared module and generalize it so it can be used for both. This patch implements the "second best choice". Since the netbios-ns conntrack module uses the same helper functionality as the snmp, only one helper function is added for both snmp and netbios-ns modules into the new object - nf_conntrack_broadcast. Signed-off-by: Jiri Olsa Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_snmp.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/linux/netfilter/nf_conntrack_snmp.h (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_snmp.h b/include/linux/netfilter/nf_conntrack_snmp.h new file mode 100644 index 000000000000..064bc63a5346 --- /dev/null +++ b/include/linux/netfilter/nf_conntrack_snmp.h @@ -0,0 +1,9 @@ +#ifndef _NF_CONNTRACK_SNMP_H +#define _NF_CONNTRACK_SNMP_H + +extern int (*nf_nat_snmp_hook)(struct sk_buff *skb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo); + +#endif /* _NF_CONNTRACK_SNMP_H */ -- cgit v1.2.3 From a992ca2a0498edd22a88ac8c41570f536de29c9e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 19 Jan 2011 16:00:07 +0100 Subject: netfilter: nf_conntrack_tstamp: add flow-based timestamp extension This patch adds flow-based timestamping for conntracks. This conntrack extension is disabled by default. Basically, we use two 64-bits variables to store the creation timestamp once the conntrack has been confirmed and the other to store the deletion time. This extension is disabled by default, to enable it, you have to: echo 1 > /proc/sys/net/netfilter/nf_conntrack_timestamp This patch allows to save memory for user-space flow-based loogers such as ulogd2. In short, ulogd2 does not need to keep a hashtable with the conntrack in user-space to know when they were created and destroyed, instead we use the kernel timestamp. If we want to have a sane IPFIX implementation in user-space, this nanosecs resolution timestamps are also useful. Other custom user-space applications can benefit from this via libnetfilter_conntrack. This patch modifies the /proc output to display the delta time in seconds since the flow start. You can also obtain the flow-start date by means of the conntrack-tools. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink_conntrack.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 19711e3ffd42..debf1aefd753 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -42,6 +42,7 @@ enum ctattr_type { CTA_SECMARK, /* obsolete */ CTA_ZONE, CTA_SECCTX, + CTA_TIMESTAMP, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) @@ -127,6 +128,14 @@ enum ctattr_counters { }; #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) +enum ctattr_tstamp { + CTA_TIMESTAMP_UNSPEC, + CTA_TIMESTAMP_START, + CTA_TIMESTAMP_STOP, + __CTA_TIMESTAMP_MAX +}; +#define CTA_TIMESTAMP_MAX (__CTA_TIMESTAMP_MAX - 1) + enum ctattr_nat { CTA_NAT_UNSPEC, CTA_NAT_MINIP, -- cgit v1.2.3 From cc4fc022571376412986e27e08b0765e9cb2aafb Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 18 Jan 2011 17:32:40 +0100 Subject: netfilter: xtables: connlimit revision 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds destination address-based selection. The old "inverse" member is overloaded (memory-wise) with a new "flags" variable, similar to how J.Park did it with xt_string rev 1. Since revision 0 userspace only sets flag 0x1, no great changes are made to explicitly test for different revisions. Signed-off-by: Jan Engelhardt --- include/linux/netfilter/xt_connlimit.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_connlimit.h b/include/linux/netfilter/xt_connlimit.h index 7e3284bcbd2b..8884efc605c7 100644 --- a/include/linux/netfilter/xt_connlimit.h +++ b/include/linux/netfilter/xt_connlimit.h @@ -3,6 +3,11 @@ struct xt_connlimit_data; +enum { + XT_CONNLIMIT_INVERT = 1 << 0, + XT_CONNLIMIT_DADDR = 1 << 1, +}; + struct xt_connlimit_info { union { union nf_inet_addr mask; @@ -14,6 +19,13 @@ struct xt_connlimit_info { #endif }; unsigned int limit, inverse; + union { + /* revision 0 */ + unsigned int inverse; + + /* revision 1 */ + __u32 flags; + }; /* Used internally by the kernel */ struct xt_connlimit_data *data __attribute__((aligned(8))); -- cgit v1.2.3 From cbda10fa97d72c7a1923be4426171aa90e8c6dab Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Thu, 13 Jan 2011 23:38:30 +0000 Subject: net_device: add support for network device groups Net devices can now be grouped, enabling simpler manipulation from userspace. This patch adds a group field to the net_device structure, as well as rtnetlink support to query and modify it. Signed-off-by: Vlad Dogaru Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/if_link.h | 1 + include/linux/netdevice.h | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 6485d2a89bec..f4a2e6b1b864 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -135,6 +135,7 @@ enum { IFLA_VF_PORTS, IFLA_PORT_SELF, IFLA_AF_SPEC, + IFLA_GROUP, /* Group the device belongs to */ __IFLA_MAX }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d971346b0340..68a4627b74f5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -75,6 +75,9 @@ struct wireless_dev; #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ #define NET_RX_DROP 1 /* packet dropped */ +/* Initial net device group. All devices belong to group 0 by default. */ +#define INIT_NETDEV_GROUP 0 + /* * Transmit return codes: transmit return codes originate from three different * namespaces: @@ -1153,6 +1156,9 @@ struct net_device { /* phy device may attach itself for hardware timestamping */ struct phy_device *phydev; + + /* group the device belongs to */ + int group; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -1844,6 +1850,7 @@ extern int dev_set_alias(struct net_device *, const char *, size_t); extern int dev_change_net_namespace(struct net_device *, struct net *, const char *); extern int dev_set_mtu(struct net_device *, int); +extern void dev_set_group(struct net_device *, int); extern int dev_set_mac_address(struct net_device *, struct sockaddr *); extern int dev_hard_start_xmit(struct sk_buff *skb, -- cgit v1.2.3 From 4f57c087de9b46182545676d2c594120a20f2e58 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jan 2011 08:06:04 +0000 Subject: net: implement mechanism for HW based QOS This patch provides a mechanism for lower layer devices to steer traffic using skb->priority to tx queues. This allows for hardware based QOS schemes to use the default qdisc without incurring the penalties related to global state and the qdisc lock. While reliably receiving skbs on the correct tx ring to avoid head of line blocking resulting from shuffling in the LLD. Finally, all the goodness from txq caching and xps/rps can still be leveraged. Many drivers and hardware exist with the ability to implement QOS schemes in the hardware but currently these drivers tend to rely on firmware to reroute specific traffic, a driver specific select_queue or the queue_mapping action in the qdisc. By using select_queue for this drivers need to be updated for each and every traffic type and we lose the goodness of much of the upstream work. Firmware solutions are inherently inflexible. And finally if admins are expected to build a qdisc and filter rules to steer traffic this requires knowledge of how the hardware is currently configured. The number of tx queues and the queue offsets may change depending on resources. Also this approach incurs all the overhead of a qdisc with filters. With the mechanism in this patch users can set skb priority using expected methods ie setsockopt() or the stack can set the priority directly. Then the skb will be steered to the correct tx queues aligned with hardware QOS traffic classes. In the normal case with single traffic class and all queues in this class everything works as is until the LLD enables multiple tcs. To steer the skb we mask out the lower 4 bits of the priority and allow the hardware to configure upto 15 distinct classes of traffic. This is expected to be sufficient for most applications at any rate it is more then the 8021Q spec designates and is equal to the number of prio bands currently implemented in the default qdisc. This in conjunction with a userspace application such as lldpad can be used to implement 8021Q transmission selection algorithms one of these algorithms being the extended transmission selection algorithm currently being used for DCB. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/netdevice.h | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 68a4627b74f5..371fa8839d51 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -646,6 +646,14 @@ struct xps_dev_maps { (nr_cpu_ids * sizeof(struct xps_map *))) #endif /* CONFIG_XPS */ +#define TC_MAX_QUEUE 16 +#define TC_BITMASK 15 +/* HW offloaded queuing disciplines txq count and offset maps */ +struct netdev_tc_txq { + u16 count; + u16 offset; +}; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -756,6 +764,11 @@ struct xps_dev_maps { * int (*ndo_set_vf_port)(struct net_device *dev, int vf, * struct nlattr *port[]); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); + * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) + * Called to setup 'tc' number of traffic classes in the net device. This + * is always called from the stack with the rtnl lock held and netif tx + * queues stopped. This allows the netdevice to perform queue management + * safely. */ #define HAVE_NET_DEVICE_OPS struct net_device_ops { @@ -814,6 +827,7 @@ struct net_device_ops { struct nlattr *port[]); int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); + int (*ndo_setup_tc)(struct net_device *dev, u8 tc); #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); @@ -1146,6 +1160,9 @@ struct net_device { /* Data Center Bridging netlink ops */ const struct dcbnl_rtnl_ops *dcbnl_ops; #endif + u8 num_tc; + struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; + u8 prio_tc_map[TC_BITMASK + 1]; #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) /* max exchange id for FCoE LRO by ddp */ @@ -1164,6 +1181,57 @@ struct net_device { #define NETDEV_ALIGN 32 +static inline +int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio) +{ + return dev->prio_tc_map[prio & TC_BITMASK]; +} + +static inline +int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) +{ + if (tc >= dev->num_tc) + return -EINVAL; + + dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK; + return 0; +} + +static inline +void netdev_reset_tc(struct net_device *dev) +{ + dev->num_tc = 0; + memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); + memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); +} + +static inline +int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) +{ + if (tc >= dev->num_tc) + return -EINVAL; + + dev->tc_to_txq[tc].count = count; + dev->tc_to_txq[tc].offset = offset; + return 0; +} + +static inline +int netdev_set_num_tc(struct net_device *dev, u8 num_tc) +{ + if (num_tc > TC_MAX_QUEUE) + return -EINVAL; + + dev->num_tc = num_tc; + return 0; +} + +static inline +int netdev_get_num_tc(struct net_device *dev) +{ + return dev->num_tc; +} + static inline struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, unsigned int index) -- cgit v1.2.3 From b8970f0bfc78103cb74c66055de7379b15097840 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jan 2011 08:06:09 +0000 Subject: net_sched: implement a root container qdisc sch_mqprio This implements a mqprio queueing discipline that by default creates a pfifo_fast qdisc per tx queue and provides the needed configuration interface. Using the mqprio qdisc the number of tcs currently in use along with the range of queues alloted to each class can be configured. By default skbs are mapped to traffic classes using the skb priority. This mapping is configurable. Configurable parameters, struct tc_mqprio_qopt { __u8 num_tc; __u8 prio_tc_map[TC_BITMASK + 1]; __u8 hw; __u16 count[TC_MAX_QUEUE]; __u16 offset[TC_MAX_QUEUE]; }; Here the count/offset pairing give the queue alignment and the prio_tc_map gives the mapping from skb->priority to tc. The hw bit determines if the hardware should configure the count and offset values. If the hardware bit is set then the operation will fail if the hardware does not implement the ndo_setup_tc operation. This is to avoid undetermined states where the hardware may or may not control the queue mapping. Also minimal bounds checking is done on the count/offset to verify a queue does not exceed num_tx_queues and that queue ranges do not overlap. Otherwise it is left to user policy or hardware configuration to create useful mappings. It is expected that hardware QOS schemes can be implemented by creating appropriate mappings of queues in ndo_tc_setup(). One expected use case is drivers will use the ndo_setup_tc to map queue ranges onto 802.1Q traffic classes. This provides a generic mechanism to map network traffic onto these traffic classes and removes the need for lower layer drivers to know specifics about traffic types. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 2cfa4bc8dea6..776cd93d5f7b 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -481,4 +481,16 @@ struct tc_drr_stats { __u32 deficit; }; +/* MQPRIO */ +#define TC_QOPT_BITMASK 15 +#define TC_QOPT_MAX_QUEUE 16 + +struct tc_mqprio_qopt { + __u8 num_tc; + __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; + __u8 hw; + __u16 count[TC_QOPT_MAX_QUEUE]; + __u16 offset[TC_QOPT_MAX_QUEUE]; +}; + #endif -- cgit v1.2.3 From 5d8449286456659cdd0998e62d80df2d9e77e9e3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 20 Jan 2011 08:48:15 +0100 Subject: netfilter: xtables: remove extraneous header that slipped in Commit 0b8ad87 (netfilter: xtables: add missing header files to export list) erroneously added this. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index fc4e0aa805a2..89c0d1e20d72 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -56,7 +56,6 @@ header-y += xt_rateest.h header-y += xt_realm.h header-y += xt_recent.h header-y += xt_sctp.h -header-y += xt_secmark.h header-y += xt_socket.h header-y += xt_state.h header-y += xt_statistic.h -- cgit v1.2.3 From ba12b130a65840005770135a69199cb9adaf8c8f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 20 Jan 2011 14:01:12 +0100 Subject: netfilter: xtables: remove duplicate member Accidentally missed removing the old out-of-union "inverse" member, which caused the struct size to change which then gives size mismatch warnings when using an old iptables. It is interesting to see that gcc did not warn about this before. (Filed http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47376 ) Signed-off-by: Jan Engelhardt --- include/linux/netfilter/xt_connlimit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_connlimit.h b/include/linux/netfilter/xt_connlimit.h index 8884efc605c7..ab1d3b57fff7 100644 --- a/include/linux/netfilter/xt_connlimit.h +++ b/include/linux/netfilter/xt_connlimit.h @@ -18,7 +18,7 @@ struct xt_connlimit_info { }; #endif }; - unsigned int limit, inverse; + unsigned int limit; union { /* revision 0 */ unsigned int inverse; -- cgit v1.2.3 From 06988b06935da7a210887e9d3f50f46f2faa4953 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 20 Jan 2011 17:50:17 +0100 Subject: netfilter: xtables: add missing header inclusions for headers_check Resolve these warnings on `make headers_check`: usr/include/linux/netfilter/xt_CT.h:7: found __[us]{8,16,32,64} type without #include ... Signed-off-by: Jan Engelhardt --- include/linux/netfilter/xt_CT.h | 2 ++ include/linux/netfilter/xt_TCPOPTSTRIP.h | 2 ++ include/linux/netfilter/xt_TPROXY.h | 2 ++ include/linux/netfilter/xt_cluster.h | 2 ++ include/linux/netfilter/xt_connlimit.h | 2 ++ include/linux/netfilter/xt_quota.h | 2 ++ include/linux/netfilter/xt_socket.h | 2 ++ include/linux/netfilter/xt_time.h | 2 ++ include/linux/netfilter/xt_u32.h | 2 ++ include/linux/netfilter_bridge/ebt_802_3.h | 2 ++ include/linux/netfilter_bridge/ebt_among.h | 2 ++ include/linux/netfilter_bridge/ebt_arp.h | 2 ++ include/linux/netfilter_bridge/ebt_ip.h | 2 ++ include/linux/netfilter_bridge/ebt_ip6.h | 2 ++ include/linux/netfilter_bridge/ebt_limit.h | 2 ++ include/linux/netfilter_bridge/ebt_log.h | 2 ++ include/linux/netfilter_bridge/ebt_mark_m.h | 2 ++ include/linux/netfilter_bridge/ebt_nflog.h | 2 ++ include/linux/netfilter_bridge/ebt_pkttype.h | 2 ++ include/linux/netfilter_bridge/ebt_stp.h | 2 ++ include/linux/netfilter_bridge/ebt_ulog.h | 2 ++ include/linux/netfilter_bridge/ebt_vlan.h | 2 ++ include/linux/netfilter_ipv4/ipt_CLUSTERIP.h | 2 ++ include/linux/netfilter_ipv4/ipt_ECN.h | 2 ++ include/linux/netfilter_ipv4/ipt_SAME.h | 2 ++ include/linux/netfilter_ipv4/ipt_TTL.h | 2 ++ include/linux/netfilter_ipv4/ipt_addrtype.h | 2 ++ include/linux/netfilter_ipv4/ipt_ah.h | 2 ++ include/linux/netfilter_ipv4/ipt_ecn.h | 2 ++ include/linux/netfilter_ipv4/ipt_ttl.h | 2 ++ include/linux/netfilter_ipv6/ip6t_HL.h | 2 ++ include/linux/netfilter_ipv6/ip6t_REJECT.h | 2 ++ include/linux/netfilter_ipv6/ip6t_ah.h | 2 ++ include/linux/netfilter_ipv6/ip6t_frag.h | 2 ++ include/linux/netfilter_ipv6/ip6t_hl.h | 2 ++ include/linux/netfilter_ipv6/ip6t_ipv6header.h | 2 ++ include/linux/netfilter_ipv6/ip6t_mh.h | 2 ++ include/linux/netfilter_ipv6/ip6t_opts.h | 2 ++ include/linux/netfilter_ipv6/ip6t_rt.h | 1 + 39 files changed, 77 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h index fbf4c5658554..b56e76811c04 100644 --- a/include/linux/netfilter/xt_CT.h +++ b/include/linux/netfilter/xt_CT.h @@ -1,6 +1,8 @@ #ifndef _XT_CT_H #define _XT_CT_H +#include + #define XT_CT_NOTRACK 0x1 struct xt_ct_target_info { diff --git a/include/linux/netfilter/xt_TCPOPTSTRIP.h b/include/linux/netfilter/xt_TCPOPTSTRIP.h index 342ef14b1761..7157318499c2 100644 --- a/include/linux/netfilter/xt_TCPOPTSTRIP.h +++ b/include/linux/netfilter/xt_TCPOPTSTRIP.h @@ -1,6 +1,8 @@ #ifndef _XT_TCPOPTSTRIP_H #define _XT_TCPOPTSTRIP_H +#include + #define tcpoptstrip_set_bit(bmap, idx) \ (bmap[(idx) >> 5] |= 1U << (idx & 31)) #define tcpoptstrip_test_bit(bmap, idx) \ diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h index 8097e0b4c15e..902043c2073f 100644 --- a/include/linux/netfilter/xt_TPROXY.h +++ b/include/linux/netfilter/xt_TPROXY.h @@ -1,6 +1,8 @@ #ifndef _XT_TPROXY_H #define _XT_TPROXY_H +#include + /* TPROXY target is capable of marking the packet to perform * redirection. We can get rid of that whenever we get support for * mutliple targets in the same rule. */ diff --git a/include/linux/netfilter/xt_cluster.h b/include/linux/netfilter/xt_cluster.h index 66cfa3c782ac..9b883c8fbf54 100644 --- a/include/linux/netfilter/xt_cluster.h +++ b/include/linux/netfilter/xt_cluster.h @@ -1,6 +1,8 @@ #ifndef _XT_CLUSTER_MATCH_H #define _XT_CLUSTER_MATCH_H +#include + enum xt_cluster_flags { XT_CLUSTER_F_INV = (1 << 0) }; diff --git a/include/linux/netfilter/xt_connlimit.h b/include/linux/netfilter/xt_connlimit.h index ab1d3b57fff7..0ca66e97acbc 100644 --- a/include/linux/netfilter/xt_connlimit.h +++ b/include/linux/netfilter/xt_connlimit.h @@ -1,6 +1,8 @@ #ifndef _XT_CONNLIMIT_H #define _XT_CONNLIMIT_H +#include + struct xt_connlimit_data; enum { diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h index 8bda65f0bc92..ca6e03e47a17 100644 --- a/include/linux/netfilter/xt_quota.h +++ b/include/linux/netfilter/xt_quota.h @@ -1,6 +1,8 @@ #ifndef _XT_QUOTA_H #define _XT_QUOTA_H +#include + enum xt_quota_flags { XT_QUOTA_INVERT = 0x1, }; diff --git a/include/linux/netfilter/xt_socket.h b/include/linux/netfilter/xt_socket.h index 6f475b8ff34b..26d7217bd4f1 100644 --- a/include/linux/netfilter/xt_socket.h +++ b/include/linux/netfilter/xt_socket.h @@ -1,6 +1,8 @@ #ifndef _XT_SOCKET_H #define _XT_SOCKET_H +#include + enum { XT_SOCKET_TRANSPARENT = 1 << 0, }; diff --git a/include/linux/netfilter/xt_time.h b/include/linux/netfilter/xt_time.h index b8bd4568efdb..7c37fac576c4 100644 --- a/include/linux/netfilter/xt_time.h +++ b/include/linux/netfilter/xt_time.h @@ -1,6 +1,8 @@ #ifndef _XT_TIME_H #define _XT_TIME_H 1 +#include + struct xt_time_info { __u32 date_start; __u32 date_stop; diff --git a/include/linux/netfilter/xt_u32.h b/include/linux/netfilter/xt_u32.h index e8c3d8722bae..04d1bfea03c2 100644 --- a/include/linux/netfilter/xt_u32.h +++ b/include/linux/netfilter/xt_u32.h @@ -1,6 +1,8 @@ #ifndef _XT_U32_H #define _XT_U32_H 1 +#include + enum xt_u32_ops { XT_U32_AND, XT_U32_LEFTSH, diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h index c427764f4444..be5be1577a56 100644 --- a/include/linux/netfilter_bridge/ebt_802_3.h +++ b/include/linux/netfilter_bridge/ebt_802_3.h @@ -1,6 +1,8 @@ #ifndef __LINUX_BRIDGE_EBT_802_3_H #define __LINUX_BRIDGE_EBT_802_3_H +#include + #define EBT_802_3_SAP 0x01 #define EBT_802_3_TYPE 0x02 diff --git a/include/linux/netfilter_bridge/ebt_among.h b/include/linux/netfilter_bridge/ebt_among.h index 686c9619dbc0..bd4e3ad0b706 100644 --- a/include/linux/netfilter_bridge/ebt_among.h +++ b/include/linux/netfilter_bridge/ebt_among.h @@ -1,6 +1,8 @@ #ifndef __LINUX_BRIDGE_EBT_AMONG_H #define __LINUX_BRIDGE_EBT_AMONG_H +#include + #define EBT_AMONG_DST 0x01 #define EBT_AMONG_SRC 0x02 diff --git a/include/linux/netfilter_bridge/ebt_arp.h b/include/linux/netfilter_bridge/ebt_arp.h index e62b5af95869..522f3e427f49 100644 --- a/include/linux/netfilter_bridge/ebt_arp.h +++ b/include/linux/netfilter_bridge/ebt_arp.h @@ -1,6 +1,8 @@ #ifndef __LINUX_BRIDGE_EBT_ARP_H #define __LINUX_BRIDGE_EBT_ARP_H +#include + #define EBT_ARP_OPCODE 0x01 #define EBT_ARP_HTYPE 0x02 #define EBT_ARP_PTYPE 0x04 diff --git a/include/linux/netfilter_bridge/ebt_ip.h b/include/linux/netfilter_bridge/ebt_ip.h index d99de58da2c7..c4bbc41b0ea4 100644 --- a/include/linux/netfilter_bridge/ebt_ip.h +++ b/include/linux/netfilter_bridge/ebt_ip.h @@ -15,6 +15,8 @@ #ifndef __LINUX_BRIDGE_EBT_IP_H #define __LINUX_BRIDGE_EBT_IP_H +#include + #define EBT_IP_SOURCE 0x01 #define EBT_IP_DEST 0x02 #define EBT_IP_TOS 0x04 diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h index 998e9d5a6b60..42b889682721 100644 --- a/include/linux/netfilter_bridge/ebt_ip6.h +++ b/include/linux/netfilter_bridge/ebt_ip6.h @@ -12,6 +12,8 @@ #ifndef __LINUX_BRIDGE_EBT_IP6_H #define __LINUX_BRIDGE_EBT_IP6_H +#include + #define EBT_IP6_SOURCE 0x01 #define EBT_IP6_DEST 0x02 #define EBT_IP6_TCLASS 0x04 diff --git a/include/linux/netfilter_bridge/ebt_limit.h b/include/linux/netfilter_bridge/ebt_limit.h index 721d51ffa513..66d80b30ba0e 100644 --- a/include/linux/netfilter_bridge/ebt_limit.h +++ b/include/linux/netfilter_bridge/ebt_limit.h @@ -1,6 +1,8 @@ #ifndef __LINUX_BRIDGE_EBT_LIMIT_H #define __LINUX_BRIDGE_EBT_LIMIT_H +#include + #define EBT_LIMIT_MATCH "limit" /* timings are in milliseconds. */ diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h index 564beb4946ea..7e7f1d1fe494 100644 --- a/include/linux/netfilter_bridge/ebt_log.h +++ b/include/linux/netfilter_bridge/ebt_log.h @@ -1,6 +1,8 @@ #ifndef __LINUX_BRIDGE_EBT_LOG_H #define __LINUX_BRIDGE_EBT_LOG_H +#include + #define EBT_LOG_IP 0x01 /* if the frame is made by ip, log the ip information */ #define EBT_LOG_ARP 0x02 #define EBT_LOG_NFLOG 0x04 diff --git a/include/linux/netfilter_bridge/ebt_mark_m.h b/include/linux/netfilter_bridge/ebt_mark_m.h index 97b96c4b8db4..410f9e5a71d4 100644 --- a/include/linux/netfilter_bridge/ebt_mark_m.h +++ b/include/linux/netfilter_bridge/ebt_mark_m.h @@ -1,6 +1,8 @@ #ifndef __LINUX_BRIDGE_EBT_MARK_M_H #define __LINUX_BRIDGE_EBT_MARK_M_H +#include + #define EBT_MARK_AND 0x01 #define EBT_MARK_OR 0x02 #define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR) diff --git a/include/linux/netfilter_bridge/ebt_nflog.h b/include/linux/netfilter_bridge/ebt_nflog.h index 477315bc3537..df829fce9125 100644 --- a/include/linux/netfilter_bridge/ebt_nflog.h +++ b/include/linux/netfilter_bridge/ebt_nflog.h @@ -1,6 +1,8 @@ #ifndef __LINUX_BRIDGE_EBT_NFLOG_H #define __LINUX_BRIDGE_EBT_NFLOG_H +#include + #define EBT_NFLOG_MASK 0x0 #define EBT_NFLOG_PREFIX_SIZE 64 diff --git a/include/linux/netfilter_bridge/ebt_pkttype.h b/include/linux/netfilter_bridge/ebt_pkttype.h index 7c0fb0fdcf14..c241badcd036 100644 --- a/include/linux/netfilter_bridge/ebt_pkttype.h +++ b/include/linux/netfilter_bridge/ebt_pkttype.h @@ -1,6 +1,8 @@ #ifndef __LINUX_BRIDGE_EBT_PKTTYPE_H #define __LINUX_BRIDGE_EBT_PKTTYPE_H +#include + struct ebt_pkttype_info { __u8 pkt_type; __u8 invert; diff --git a/include/linux/netfilter_bridge/ebt_stp.h b/include/linux/netfilter_bridge/ebt_stp.h index 13a0bd49a92a..1025b9f5fb7d 100644 --- a/include/linux/netfilter_bridge/ebt_stp.h +++ b/include/linux/netfilter_bridge/ebt_stp.h @@ -1,6 +1,8 @@ #ifndef __LINUX_BRIDGE_EBT_STP_H #define __LINUX_BRIDGE_EBT_STP_H +#include + #define EBT_STP_TYPE 0x0001 #define EBT_STP_FLAGS 0x0002 diff --git a/include/linux/netfilter_bridge/ebt_ulog.h b/include/linux/netfilter_bridge/ebt_ulog.h index de35a51a7e46..89a6becb5269 100644 --- a/include/linux/netfilter_bridge/ebt_ulog.h +++ b/include/linux/netfilter_bridge/ebt_ulog.h @@ -1,6 +1,8 @@ #ifndef _EBT_ULOG_H #define _EBT_ULOG_H +#include + #define EBT_ULOG_DEFAULT_NLGROUP 0 #define EBT_ULOG_DEFAULT_QTHRESHOLD 1 #define EBT_ULOG_MAXNLGROUPS 32 /* hardcoded netlink max */ diff --git a/include/linux/netfilter_bridge/ebt_vlan.h b/include/linux/netfilter_bridge/ebt_vlan.h index 48dffc1dad36..967d1d5cf98d 100644 --- a/include/linux/netfilter_bridge/ebt_vlan.h +++ b/include/linux/netfilter_bridge/ebt_vlan.h @@ -1,6 +1,8 @@ #ifndef __LINUX_BRIDGE_EBT_VLAN_H #define __LINUX_BRIDGE_EBT_VLAN_H +#include + #define EBT_VLAN_ID 0x01 #define EBT_VLAN_PRIO 0x02 #define EBT_VLAN_ENCAP 0x04 diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h index 3114f06939ef..c6a204c97047 100644 --- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h +++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h @@ -1,6 +1,8 @@ #ifndef _IPT_CLUSTERIP_H_target #define _IPT_CLUSTERIP_H_target +#include + enum clusterip_hashmode { CLUSTERIP_HASHMODE_SIP = 0, CLUSTERIP_HASHMODE_SIP_SPT, diff --git a/include/linux/netfilter_ipv4/ipt_ECN.h b/include/linux/netfilter_ipv4/ipt_ECN.h index c6e3e01b75e0..bb88d5315a4d 100644 --- a/include/linux/netfilter_ipv4/ipt_ECN.h +++ b/include/linux/netfilter_ipv4/ipt_ECN.h @@ -8,6 +8,8 @@ */ #ifndef _IPT_ECN_TARGET_H #define _IPT_ECN_TARGET_H + +#include #include #define IPT_ECN_IP_MASK (~XT_DSCP_MASK) diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h index fa0ebeca5d95..5bca78267afd 100644 --- a/include/linux/netfilter_ipv4/ipt_SAME.h +++ b/include/linux/netfilter_ipv4/ipt_SAME.h @@ -1,6 +1,8 @@ #ifndef _IPT_SAME_H #define _IPT_SAME_H +#include + #define IPT_SAME_MAX_RANGE 10 #define IPT_SAME_NODST 0x01 diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h index f6250e422d5e..f6ac169d92f9 100644 --- a/include/linux/netfilter_ipv4/ipt_TTL.h +++ b/include/linux/netfilter_ipv4/ipt_TTL.h @@ -4,6 +4,8 @@ #ifndef _IPT_TTL_H #define _IPT_TTL_H +#include + enum { IPT_TTL_SET = 0, IPT_TTL_INC, diff --git a/include/linux/netfilter_ipv4/ipt_addrtype.h b/include/linux/netfilter_ipv4/ipt_addrtype.h index f29c3cfcc240..0da42237c8da 100644 --- a/include/linux/netfilter_ipv4/ipt_addrtype.h +++ b/include/linux/netfilter_ipv4/ipt_addrtype.h @@ -1,6 +1,8 @@ #ifndef _IPT_ADDRTYPE_H #define _IPT_ADDRTYPE_H +#include + enum { IPT_ADDRTYPE_INVERT_SOURCE = 0x0001, IPT_ADDRTYPE_INVERT_DEST = 0x0002, diff --git a/include/linux/netfilter_ipv4/ipt_ah.h b/include/linux/netfilter_ipv4/ipt_ah.h index 8fea283ee62a..4e02bb0119e3 100644 --- a/include/linux/netfilter_ipv4/ipt_ah.h +++ b/include/linux/netfilter_ipv4/ipt_ah.h @@ -1,6 +1,8 @@ #ifndef _IPT_AH_H #define _IPT_AH_H +#include + struct ipt_ah { __u32 spis[2]; /* Security Parameter Index */ __u8 invflags; /* Inverse flags */ diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h index 78b98aa8784d..eabf95fb7d3e 100644 --- a/include/linux/netfilter_ipv4/ipt_ecn.h +++ b/include/linux/netfilter_ipv4/ipt_ecn.h @@ -8,6 +8,8 @@ */ #ifndef _IPT_ECN_H #define _IPT_ECN_H + +#include #include #define IPT_ECN_IP_MASK (~XT_DSCP_MASK) diff --git a/include/linux/netfilter_ipv4/ipt_ttl.h b/include/linux/netfilter_ipv4/ipt_ttl.h index 93d9a06689a3..37bee4442486 100644 --- a/include/linux/netfilter_ipv4/ipt_ttl.h +++ b/include/linux/netfilter_ipv4/ipt_ttl.h @@ -4,6 +4,8 @@ #ifndef _IPT_TTL_H #define _IPT_TTL_H +#include + enum { IPT_TTL_EQ = 0, /* equals */ IPT_TTL_NE, /* not equals */ diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h index 81cdaf0480e3..ebd8ead1bb63 100644 --- a/include/linux/netfilter_ipv6/ip6t_HL.h +++ b/include/linux/netfilter_ipv6/ip6t_HL.h @@ -5,6 +5,8 @@ #ifndef _IP6T_HL_H #define _IP6T_HL_H +#include + enum { IP6T_HL_SET = 0, IP6T_HL_INC, diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h index b999aa4e5969..205ed62e4605 100644 --- a/include/linux/netfilter_ipv6/ip6t_REJECT.h +++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h @@ -1,6 +1,8 @@ #ifndef _IP6T_REJECT_H #define _IP6T_REJECT_H +#include + enum ip6t_reject_with { IP6T_ICMP6_NO_ROUTE, IP6T_ICMP6_ADM_PROHIBITED, diff --git a/include/linux/netfilter_ipv6/ip6t_ah.h b/include/linux/netfilter_ipv6/ip6t_ah.h index a602c165edd1..5da2b65cb3ad 100644 --- a/include/linux/netfilter_ipv6/ip6t_ah.h +++ b/include/linux/netfilter_ipv6/ip6t_ah.h @@ -1,6 +1,8 @@ #ifndef _IP6T_AH_H #define _IP6T_AH_H +#include + struct ip6t_ah { __u32 spis[2]; /* Security Parameter Index */ __u32 hdrlen; /* Header Length */ diff --git a/include/linux/netfilter_ipv6/ip6t_frag.h b/include/linux/netfilter_ipv6/ip6t_frag.h index 538b31ef5e3d..b47f61b9e082 100644 --- a/include/linux/netfilter_ipv6/ip6t_frag.h +++ b/include/linux/netfilter_ipv6/ip6t_frag.h @@ -1,6 +1,8 @@ #ifndef _IP6T_FRAG_H #define _IP6T_FRAG_H +#include + struct ip6t_frag { __u32 ids[2]; /* Security Parameter Index */ __u32 hdrlen; /* Header Length */ diff --git a/include/linux/netfilter_ipv6/ip6t_hl.h b/include/linux/netfilter_ipv6/ip6t_hl.h index c6fddcb971da..6e76dbc6c19a 100644 --- a/include/linux/netfilter_ipv6/ip6t_hl.h +++ b/include/linux/netfilter_ipv6/ip6t_hl.h @@ -5,6 +5,8 @@ #ifndef _IP6T_HL_H #define _IP6T_HL_H +#include + enum { IP6T_HL_EQ = 0, /* equals */ IP6T_HL_NE, /* not equals */ diff --git a/include/linux/netfilter_ipv6/ip6t_ipv6header.h b/include/linux/netfilter_ipv6/ip6t_ipv6header.h index 73d53bd3ff62..efae3a20c214 100644 --- a/include/linux/netfilter_ipv6/ip6t_ipv6header.h +++ b/include/linux/netfilter_ipv6/ip6t_ipv6header.h @@ -8,6 +8,8 @@ on whether they contain certain headers */ #ifndef __IPV6HEADER_H #define __IPV6HEADER_H +#include + struct ip6t_ipv6header_info { __u8 matchflags; __u8 invflags; diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/linux/netfilter_ipv6/ip6t_mh.h index 98c8cf685eea..a7729a5025cd 100644 --- a/include/linux/netfilter_ipv6/ip6t_mh.h +++ b/include/linux/netfilter_ipv6/ip6t_mh.h @@ -1,6 +1,8 @@ #ifndef _IP6T_MH_H #define _IP6T_MH_H +#include + /* MH matching stuff */ struct ip6t_mh { __u8 types[2]; /* MH type range */ diff --git a/include/linux/netfilter_ipv6/ip6t_opts.h b/include/linux/netfilter_ipv6/ip6t_opts.h index 405d309cd741..17d419a811fd 100644 --- a/include/linux/netfilter_ipv6/ip6t_opts.h +++ b/include/linux/netfilter_ipv6/ip6t_opts.h @@ -1,6 +1,8 @@ #ifndef _IP6T_OPTS_H #define _IP6T_OPTS_H +#include + #define IP6T_OPTS_OPTSNR 16 struct ip6t_opts { diff --git a/include/linux/netfilter_ipv6/ip6t_rt.h b/include/linux/netfilter_ipv6/ip6t_rt.h index e8dad20acd37..7605a5ff81cd 100644 --- a/include/linux/netfilter_ipv6/ip6t_rt.h +++ b/include/linux/netfilter_ipv6/ip6t_rt.h @@ -1,6 +1,7 @@ #ifndef _IP6T_RT_H #define _IP6T_RT_H +#include /*#include */ #define IP6T_RT_HOPS 16 -- cgit v1.2.3 From d18046b3cd989c06d2ad8d615e57c3cf63c63b67 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 19 Jan 2011 23:12:54 +0000 Subject: dccp: clean up unused DCCP_STATE_MASK definition Remove unused DCCP_STATE_MASK macro. Signed-off-by: Shan Wei Acked-by: Gerrit Renker Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 010e2d87ed75..d638e85dc501 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -279,8 +279,6 @@ enum dccp_state { DCCP_MAX_STATES }; -#define DCCP_STATE_MASK 0x1f - enum { DCCPF_OPEN = TCPF_ESTABLISHED, DCCPF_REQUESTING = TCPF_SYN_SENT, -- cgit v1.2.3 From 686a2955531312dab77bb6f1e8602787d85e47d8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 20 Jan 2011 22:47:32 -0800 Subject: net: Add safe reverse SKB queue walkers. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bf221d65d9ad..6e946da9d1d6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1801,6 +1801,15 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ skb = skb->prev) +#define skb_queue_reverse_walk_safe(queue, skb, tmp) \ + for (skb = (queue)->prev, tmp = skb->prev; \ + skb != (struct sk_buff *)(queue); \ + skb = tmp, tmp = skb->prev) + +#define skb_queue_reverse_walk_from_safe(queue, skb, tmp) \ + for (tmp = skb->prev; \ + skb != (struct sk_buff *)(queue); \ + skb = tmp, tmp = skb->prev) static inline bool skb_has_frag_list(const struct sk_buff *skb) { -- cgit v1.2.3 From c39649c331c70952700f99832b03f87e9d7f5b4b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 19 Jan 2011 11:03:25 +0000 Subject: lib: cpu_rmap: CPU affinity reverse-mapping When initiating I/O on a multiqueue and multi-IRQ device, we may want to select a queue for which the response will be handled on the same or a nearby CPU. This requires a reverse-map of IRQ affinity. Add library functions to support a generic reverse-mapping from CPUs to objects with affinity and the specific case where the objects are IRQs. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/cpu_rmap.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 include/linux/cpu_rmap.h (limited to 'include/linux') diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h new file mode 100644 index 000000000000..473771a528c0 --- /dev/null +++ b/include/linux/cpu_rmap.h @@ -0,0 +1,73 @@ +/* + * cpu_rmap.c: CPU affinity reverse-map support + * Copyright 2011 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include +#include +#include + +/** + * struct cpu_rmap - CPU affinity reverse-map + * @size: Number of objects to be reverse-mapped + * @used: Number of objects added + * @obj: Pointer to array of object pointers + * @near: For each CPU, the index and distance to the nearest object, + * based on affinity masks + */ +struct cpu_rmap { + u16 size, used; + void **obj; + struct { + u16 index; + u16 dist; + } near[0]; +}; +#define CPU_RMAP_DIST_INF 0xffff + +extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags); + +/** + * free_cpu_rmap - free CPU affinity reverse-map + * @rmap: Reverse-map allocated with alloc_cpu_rmap(), or %NULL + */ +static inline void free_cpu_rmap(struct cpu_rmap *rmap) +{ + kfree(rmap); +} + +extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj); +extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, + const struct cpumask *affinity); + +static inline u16 cpu_rmap_lookup_index(struct cpu_rmap *rmap, unsigned int cpu) +{ + return rmap->near[cpu].index; +} + +static inline void *cpu_rmap_lookup_obj(struct cpu_rmap *rmap, unsigned int cpu) +{ + return rmap->obj[rmap->near[cpu].index]; +} + +#ifdef CONFIG_GENERIC_HARDIRQS + +/** + * alloc_irq_cpu_rmap - allocate CPU affinity reverse-map for IRQs + * @size: Number of objects to be mapped + * + * Must be called in process context. + */ +static inline struct cpu_rmap *alloc_irq_cpu_rmap(unsigned int size) +{ + return alloc_cpu_rmap(size, GFP_KERNEL); +} +extern void free_irq_cpu_rmap(struct cpu_rmap *rmap); + +extern int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq); + +#endif -- cgit v1.2.3 From c445477d74ab3779d1386ab797fbb9b628eb9f64 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 19 Jan 2011 11:03:53 +0000 Subject: net: RPS: Enable hardware acceleration of RFS Allow drivers for multiqueue hardware with flow filter tables to accelerate RFS. The driver must: 1. Set net_device::rx_cpu_rmap to a cpu_rmap of the RX completion IRQs (in queue order). This will provide a mapping from CPUs to the queues for which completions are handled nearest to them. 2. Implement net_device_ops::ndo_rx_flow_steer. This operation adds or replaces a filter steering the given flow to the given RX queue, if possible. 3. Periodically remove filters for which rps_may_expire_flow() returns true. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 371fa8839d51..a335f2022690 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -554,14 +554,16 @@ struct rps_map { #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) /* - * The rps_dev_flow structure contains the mapping of a flow to a CPU and the - * tail pointer for that CPU's input queue at the time of last enqueue. + * The rps_dev_flow structure contains the mapping of a flow to a CPU, the + * tail pointer for that CPU's input queue at the time of last enqueue, and + * a hardware filter index. */ struct rps_dev_flow { u16 cpu; - u16 fill; + u16 filter; unsigned int last_qtail; }; +#define RPS_NO_FILTER 0xffff /* * The rps_dev_flow_table structure contains a table of flow mappings. @@ -611,6 +613,11 @@ static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table, extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; +#ifdef CONFIG_RFS_ACCEL +extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, + u32 flow_id, u16 filter_id); +#endif + /* This structure contains an instance of an RX queue. */ struct netdev_rx_queue { struct rps_map __rcu *rps_map; @@ -769,6 +776,13 @@ struct netdev_tc_txq { * is always called from the stack with the rtnl lock held and netif tx * queues stopped. This allows the netdevice to perform queue management * safely. + * + * RFS acceleration. + * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb, + * u16 rxq_index, u32 flow_id); + * Set hardware filter for RFS. rxq_index is the target queue index; + * flow_id is a flow ID to be passed to rps_may_expire_flow() later. + * Return the filter ID on success, or a negative error code. */ #define HAVE_NET_DEVICE_OPS struct net_device_ops { @@ -842,6 +856,12 @@ struct net_device_ops { int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type); #endif +#ifdef CONFIG_RFS_ACCEL + int (*ndo_rx_flow_steer)(struct net_device *dev, + const struct sk_buff *skb, + u16 rxq_index, + u32 flow_id); +#endif }; /* @@ -1056,6 +1076,13 @@ struct net_device { /* Number of RX queues currently active in device */ unsigned int real_num_rx_queues; + +#ifdef CONFIG_RFS_ACCEL + /* CPU reverse-mapping for RX completion interrupts, indexed + * by RX queue number. Assigned by driver. This must only be + * set if the ndo_rx_flow_steer operation is defined. */ + struct cpu_rmap *rx_cpu_rmap; +#endif #endif rx_handler_func_t __rcu *rx_handler; -- cgit v1.2.3 From 04ed3e741d0f133e02bed7fa5c98edba128f90e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 24 Jan 2011 15:32:47 -0800 Subject: net: change netdev->features to u32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quoting Ben Hutchings: we presumably won't be defining features that can only be enabled on 64-bit architectures. Occurences found by `grep -r` on net/, drivers/net, include/ [ Move features and vlan_features next to each other in struct netdev, as per Eric Dumazet's suggestion -DaveM ] Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/netdevice.h | 24 ++++++++++++------------ include/linux/skbuff.h | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a335f2022690..0de3c59720fa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -914,7 +914,11 @@ struct net_device { struct list_head unreg_list; /* Net device features */ - unsigned long features; + u32 features; + + /* VLAN feature mask */ + u32 vlan_features; + #define NETIF_F_SG 1 /* Scatter/gather IO. */ #define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ #define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ @@ -1176,9 +1180,6 @@ struct net_device { /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; - /* VLAN feature mask */ - unsigned long vlan_features; - /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; @@ -1401,7 +1402,7 @@ struct packet_type { struct packet_type *, struct net_device *); struct sk_buff *(*gso_segment)(struct sk_buff *skb, - int features); + u32 features); int (*gso_send_check)(struct sk_buff *skb); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); @@ -2370,7 +2371,7 @@ extern int netdev_tstamp_prequeue; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); extern int skb_checksum_help(struct sk_buff *skb); -extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features); +extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features); #ifdef CONFIG_BUG extern void netdev_rx_csum_fault(struct net_device *dev); #else @@ -2397,22 +2398,21 @@ extern char *netdev_drivername(const struct net_device *dev, char *buffer, int l extern void linkwatch_run_queue(void); -unsigned long netdev_increment_features(unsigned long all, unsigned long one, - unsigned long mask); -unsigned long netdev_fix_features(unsigned long features, const char *name); +u32 netdev_increment_features(u32 all, u32 one, u32 mask); +u32 netdev_fix_features(u32 features, const char *name); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); -int netif_skb_features(struct sk_buff *skb); +u32 netif_skb_features(struct sk_buff *skb); -static inline int net_gso_ok(int features, int gso_type) +static inline int net_gso_ok(u32 features, int gso_type) { int feature = gso_type << NETIF_F_GSO_SHIFT; return (features & feature) == feature; } -static inline int skb_gso_ok(struct sk_buff *skb, int features) +static inline int skb_gso_ok(struct sk_buff *skb, u32 features) { return net_gso_ok(features, skb_shinfo(skb)->gso_type) && (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6e946da9d1d6..31f02d0b46a7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1877,7 +1877,7 @@ extern void skb_split(struct sk_buff *skb, extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); -extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); +extern struct sk_buff *skb_segment(struct sk_buff *skb, u32 features); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) -- cgit v1.2.3 From acd1130e8793fb150fb522da8ec51675839eb4b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 24 Jan 2011 15:45:15 -0800 Subject: net: reduce and unify printk level in netdev_fix_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce printk() levels to KERN_INFO in netdev_fix_features() as this will be used by ethtool and might spam dmesg unnecessarily. This converts the function to use netdev_info() instead of plain printk(). As a side effect, bonding and bridge devices will now log dropped features on every slave device change. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0de3c59720fa..8858422c5c5d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2399,7 +2399,7 @@ extern char *netdev_drivername(const struct net_device *dev, char *buffer, int l extern void linkwatch_run_queue(void); u32 netdev_increment_features(u32 all, u32 one, u32 mask); -u32 netdev_fix_features(u32 features, const char *name); +u32 netdev_fix_features(struct net_device *dev, u32 features); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); -- cgit v1.2.3 From ccf434380d1a67df2dcb9113206b77d0cb0a1cef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Jan 2011 18:08:02 +0000 Subject: net: fix dev_seq_next() Commit c6d14c84566d (net: Introduce for_each_netdev_rcu() iterator) added a race in dev_seq_next(). The rcu_dereference() call should be done _before_ testing the end of list, or we might return a wrong net_device if a concurrent thread changes net_device list under us. Note : discovered thanks to a sparse warning : net/core/dev.c:3919:9: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Eric Dumazet CC: Paul E. McKenney Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8858422c5c5d..c7d707452228 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1447,7 +1447,7 @@ static inline struct net_device *next_net_device_rcu(struct net_device *dev) struct net *net; net = dev_net(dev); - lh = rcu_dereference(dev->dev_list.next); + lh = rcu_dereference(list_next_rcu(&dev->dev_list)); return lh == &net->dev_base_head ? NULL : net_device_entry(lh); } @@ -1457,6 +1457,13 @@ static inline struct net_device *first_net_device(struct net *net) net_device_entry(net->dev_base_head.next); } +static inline struct net_device *first_net_device_rcu(struct net *net) +{ + struct list_head *lh = rcu_dereference(list_next_rcu(&net->dev_base_head)); + + return lh == &net->dev_base_head ? NULL : net_device_entry(lh); +} + extern int netdev_boot_setup_check(struct net_device *dev); extern unsigned long netdev_boot_base(const char *prefix, int unit); extern struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, -- cgit v1.2.3 From f703651ef870bd6b94ddc98ae07488b7d3fd9335 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:20:14 +0100 Subject: netfilter: NFNL_SUBSYS_IPSET id and NLA_PUT_NET* macros The patch adds the NFNL_SUBSYS_IPSET id and NLA_PUT_NET* macros to the vanilla kernel. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 361d6b5630ee..2b11fc1a86be 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -47,7 +47,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_QUEUE 3 #define NFNL_SUBSYS_ULOG 4 #define NFNL_SUBSYS_OSF 5 -#define NFNL_SUBSYS_COUNT 6 +#define NFNL_SUBSYS_IPSET 6 +#define NFNL_SUBSYS_COUNT 7 #ifdef __KERNEL__ -- cgit v1.2.3 From a7b4f989a629493bb4ec4a354def784d440b32c4 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:28:35 +0100 Subject: netfilter: ipset: IP set core support The patch adds the IP set core support to the kernel. The IP set core implements a netlink (nfnetlink) based protocol by which one can create, destroy, flush, rename, swap, list, save, restore sets, and add, delete, test elements from userspace. For simplicity (and backward compatibilty and for not to force ip(6)tables to be linked with a netlink library) reasons a small getsockopt-based protocol is also kept in order to communicate with the ip(6)tables match and target. The netlink protocol passes all u16, etc values in network order with NLA_F_NET_BYTEORDER flag. The protocol enforces the proper use of the NLA_F_NESTED and NLA_F_NET_BYTEORDER flags. For other kernel subsystems (netfilter match and target) the API contains the functions to add, delete and test elements in sets and the required calls to get/put refereces to the sets before those operations can be performed. The set types (which are implemented in independent modules) are stored in a simple RCU protected list. A set type may have variants: for example without timeout or with timeout support, for IPv4 or for IPv6. The sets (i.e. the pointers to the sets) are stored in an array. The sets are identified by their index in the array, which makes possible easy and fast swapping of sets. The array is protected indirectly by the nfnl mutex from nfnetlink. The content of the sets are protected by the rwlock of the set. There are functional differences between the add/del/test functions for the kernel and userspace: - kernel add/del/test: works on the current packet (i.e. one element) - kernel test: may trigger an "add" operation in order to fill out unspecified parts of the element from the packet (like MAC address) - userspace add/del: works on the netlink message and thus possibly on multiple elements from the IPSET_ATTR_ADT container attribute. - userspace add: may trigger resizing of a set Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/ipset/ip_set.h | 452 +++++++++++++++++++++++++ include/linux/netfilter/ipset/ip_set_getport.h | 11 + include/linux/netfilter/ipset/pfxlen.h | 35 ++ 3 files changed, 498 insertions(+) create mode 100644 include/linux/netfilter/ipset/ip_set.h create mode 100644 include/linux/netfilter/ipset/ip_set_getport.h create mode 100644 include/linux/netfilter/ipset/pfxlen.h (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h new file mode 100644 index 000000000000..ec333d83f3b4 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set.h @@ -0,0 +1,452 @@ +#ifndef _IP_SET_H +#define _IP_SET_H + +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson + * Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* The protocol version */ +#define IPSET_PROTOCOL 6 + +/* The max length of strings including NUL: set and type identifiers */ +#define IPSET_MAXNAMELEN 32 + +/* Message types and commands */ +enum ipset_cmd { + IPSET_CMD_NONE, + IPSET_CMD_PROTOCOL, /* 1: Return protocol version */ + IPSET_CMD_CREATE, /* 2: Create a new (empty) set */ + IPSET_CMD_DESTROY, /* 3: Destroy a (empty) set */ + IPSET_CMD_FLUSH, /* 4: Remove all elements from a set */ + IPSET_CMD_RENAME, /* 5: Rename a set */ + IPSET_CMD_SWAP, /* 6: Swap two sets */ + IPSET_CMD_LIST, /* 7: List sets */ + IPSET_CMD_SAVE, /* 8: Save sets */ + IPSET_CMD_ADD, /* 9: Add an element to a set */ + IPSET_CMD_DEL, /* 10: Delete an element from a set */ + IPSET_CMD_TEST, /* 11: Test an element in a set */ + IPSET_CMD_HEADER, /* 12: Get set header data only */ + IPSET_CMD_TYPE, /* 13: Get set type */ + IPSET_MSG_MAX, /* Netlink message commands */ + + /* Commands in userspace: */ + IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */ + IPSET_CMD_HELP, /* 15: Get help */ + IPSET_CMD_VERSION, /* 16: Get program version */ + IPSET_CMD_QUIT, /* 17: Quit from interactive mode */ + + IPSET_CMD_MAX, + + IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */ +}; + +/* Attributes at command level */ +enum { + IPSET_ATTR_UNSPEC, + IPSET_ATTR_PROTOCOL, /* 1: Protocol version */ + IPSET_ATTR_SETNAME, /* 2: Name of the set */ + IPSET_ATTR_TYPENAME, /* 3: Typename */ + IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME, /* Setname at rename/swap */ + IPSET_ATTR_REVISION, /* 4: Settype revision */ + IPSET_ATTR_FAMILY, /* 5: Settype family */ + IPSET_ATTR_FLAGS, /* 6: Flags at command level */ + IPSET_ATTR_DATA, /* 7: Nested attributes */ + IPSET_ATTR_ADT, /* 8: Multiple data containers */ + IPSET_ATTR_LINENO, /* 9: Restore lineno */ + IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */ + IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */ + __IPSET_ATTR_CMD_MAX, +}; +#define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1) + +/* CADT specific attributes */ +enum { + IPSET_ATTR_IP = IPSET_ATTR_UNSPEC + 1, + IPSET_ATTR_IP_FROM = IPSET_ATTR_IP, + IPSET_ATTR_IP_TO, /* 2 */ + IPSET_ATTR_CIDR, /* 3 */ + IPSET_ATTR_PORT, /* 4 */ + IPSET_ATTR_PORT_FROM = IPSET_ATTR_PORT, + IPSET_ATTR_PORT_TO, /* 5 */ + IPSET_ATTR_TIMEOUT, /* 6 */ + IPSET_ATTR_PROTO, /* 7 */ + IPSET_ATTR_CADT_FLAGS, /* 8 */ + IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */ + /* Reserve empty slots */ + IPSET_ATTR_CADT_MAX = 16, + /* Create-only specific attributes */ + IPSET_ATTR_GC, + IPSET_ATTR_HASHSIZE, + IPSET_ATTR_MAXELEM, + IPSET_ATTR_NETMASK, + IPSET_ATTR_PROBES, + IPSET_ATTR_RESIZE, + IPSET_ATTR_SIZE, + /* Kernel-only */ + IPSET_ATTR_ELEMENTS, + IPSET_ATTR_REFERENCES, + IPSET_ATTR_MEMSIZE, + + __IPSET_ATTR_CREATE_MAX, +}; +#define IPSET_ATTR_CREATE_MAX (__IPSET_ATTR_CREATE_MAX - 1) + +/* ADT specific attributes */ +enum { + IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + 1, + IPSET_ATTR_NAME, + IPSET_ATTR_NAMEREF, + IPSET_ATTR_IP2, + IPSET_ATTR_CIDR2, + __IPSET_ATTR_ADT_MAX, +}; +#define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) + +/* IP specific attributes */ +enum { + IPSET_ATTR_IPADDR_IPV4 = IPSET_ATTR_UNSPEC + 1, + IPSET_ATTR_IPADDR_IPV6, + __IPSET_ATTR_IPADDR_MAX, +}; +#define IPSET_ATTR_IPADDR_MAX (__IPSET_ATTR_IPADDR_MAX - 1) + +/* Error codes */ +enum ipset_errno { + IPSET_ERR_PRIVATE = 4096, + IPSET_ERR_PROTOCOL, + IPSET_ERR_FIND_TYPE, + IPSET_ERR_MAX_SETS, + IPSET_ERR_BUSY, + IPSET_ERR_EXIST_SETNAME2, + IPSET_ERR_TYPE_MISMATCH, + IPSET_ERR_EXIST, + IPSET_ERR_INVALID_CIDR, + IPSET_ERR_INVALID_NETMASK, + IPSET_ERR_INVALID_FAMILY, + IPSET_ERR_TIMEOUT, + IPSET_ERR_REFERENCED, + IPSET_ERR_IPADDR_IPV4, + IPSET_ERR_IPADDR_IPV6, + + /* Type specific error codes */ + IPSET_ERR_TYPE_SPECIFIC = 4352, +}; + +/* Flags at command level */ +enum ipset_cmd_flags { + IPSET_FLAG_BIT_EXIST = 0, + IPSET_FLAG_EXIST = (1 << IPSET_FLAG_BIT_EXIST), +}; + +/* Flags at CADT attribute level */ +enum ipset_cadt_flags { + IPSET_FLAG_BIT_BEFORE = 0, + IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE), +}; + +/* Commands with settype-specific attributes */ +enum ipset_adt { + IPSET_ADD, + IPSET_DEL, + IPSET_TEST, + IPSET_ADT_MAX, + IPSET_CREATE = IPSET_ADT_MAX, + IPSET_CADT_MAX, +}; + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#include + +/* Sets are identified by an index in kernel space. Tweak with ip_set_id_t + * and IPSET_INVALID_ID if you want to increase the max number of sets. + */ +typedef u16 ip_set_id_t; + +#define IPSET_INVALID_ID 65535 + +enum ip_set_dim { + IPSET_DIM_ZERO = 0, + IPSET_DIM_ONE, + IPSET_DIM_TWO, + IPSET_DIM_THREE, + /* Max dimension in elements. + * If changed, new revision of iptables match/target is required. + */ + IPSET_DIM_MAX = 6, +}; + +/* Option flags for kernel operations */ +enum ip_set_kopt { + IPSET_INV_MATCH = (1 << IPSET_DIM_ZERO), + IPSET_DIM_ONE_SRC = (1 << IPSET_DIM_ONE), + IPSET_DIM_TWO_SRC = (1 << IPSET_DIM_TWO), + IPSET_DIM_THREE_SRC = (1 << IPSET_DIM_THREE), +}; + +/* Set features */ +enum ip_set_feature { + IPSET_TYPE_IP_FLAG = 0, + IPSET_TYPE_IP = (1 << IPSET_TYPE_IP_FLAG), + IPSET_TYPE_PORT_FLAG = 1, + IPSET_TYPE_PORT = (1 << IPSET_TYPE_PORT_FLAG), + IPSET_TYPE_MAC_FLAG = 2, + IPSET_TYPE_MAC = (1 << IPSET_TYPE_MAC_FLAG), + IPSET_TYPE_IP2_FLAG = 3, + IPSET_TYPE_IP2 = (1 << IPSET_TYPE_IP2_FLAG), + IPSET_TYPE_NAME_FLAG = 4, + IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG), + /* Strictly speaking not a feature, but a flag for dumping: + * this settype must be dumped last */ + IPSET_DUMP_LAST_FLAG = 7, + IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG), +}; + +struct ip_set; + +typedef int (*ipset_adtfn)(struct ip_set *set, void *value, u32 timeout); + +/* Set type, variant-specific part */ +struct ip_set_type_variant { + /* Kernelspace: test/add/del entries + * returns negative error code, + * zero for no match/success to add/delete + * positive for matching element */ + int (*kadt)(struct ip_set *set, const struct sk_buff * skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags); + + /* Userspace: test/add/del entries + * returns negative error code, + * zero for no match/success to add/delete + * positive for matching element */ + int (*uadt)(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags); + + /* Low level add/del/test functions */ + ipset_adtfn adt[IPSET_ADT_MAX]; + + /* When adding entries and set is full, try to resize the set */ + int (*resize)(struct ip_set *set, bool retried); + /* Destroy the set */ + void (*destroy)(struct ip_set *set); + /* Flush the elements */ + void (*flush)(struct ip_set *set); + /* Expire entries before listing */ + void (*expire)(struct ip_set *set); + /* List set header data */ + int (*head)(struct ip_set *set, struct sk_buff *skb); + /* List elements */ + int (*list)(const struct ip_set *set, struct sk_buff *skb, + struct netlink_callback *cb); + + /* Return true if "b" set is the same as "a" + * according to the create set parameters */ + bool (*same_set)(const struct ip_set *a, const struct ip_set *b); +}; + +/* The core set type structure */ +struct ip_set_type { + struct list_head list; + + /* Typename */ + char name[IPSET_MAXNAMELEN]; + /* Protocol version */ + u8 protocol; + /* Set features to control swapping */ + u8 features; + /* Set type dimension */ + u8 dimension; + /* Supported family: may be AF_UNSPEC for both AF_INET/AF_INET6 */ + u8 family; + /* Type revision */ + u8 revision; + + /* Create set */ + int (*create)(struct ip_set *set, struct nlattr *tb[], u32 flags); + + /* Attribute policies */ + const struct nla_policy create_policy[IPSET_ATTR_CREATE_MAX + 1]; + const struct nla_policy adt_policy[IPSET_ATTR_ADT_MAX + 1]; + + /* Set this to THIS_MODULE if you are a module, otherwise NULL */ + struct module *me; +}; + +/* register and unregister set type */ +extern int ip_set_type_register(struct ip_set_type *set_type); +extern void ip_set_type_unregister(struct ip_set_type *set_type); + +/* A generic IP set */ +struct ip_set { + /* The name of the set */ + char name[IPSET_MAXNAMELEN]; + /* Lock protecting the set data */ + rwlock_t lock; + /* References to the set */ + atomic_t ref; + /* The core set type */ + struct ip_set_type *type; + /* The type variant doing the real job */ + const struct ip_set_type_variant *variant; + /* The actual INET family of the set */ + u8 family; + /* The type specific data */ + void *data; +}; + +/* register and unregister set references */ +extern ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set); +extern void ip_set_put_byindex(ip_set_id_t index); +extern const char * ip_set_name_byindex(ip_set_id_t index); +extern ip_set_id_t ip_set_nfnl_get(const char *name); +extern ip_set_id_t ip_set_nfnl_get_byindex(ip_set_id_t index); +extern void ip_set_nfnl_put(ip_set_id_t index); + +/* API for iptables set match, and SET target */ +extern int ip_set_add(ip_set_id_t id, const struct sk_buff *skb, + u8 family, u8 dim, u8 flags); +extern int ip_set_del(ip_set_id_t id, const struct sk_buff *skb, + u8 family, u8 dim, u8 flags); +extern int ip_set_test(ip_set_id_t id, const struct sk_buff *skb, + u8 family, u8 dim, u8 flags); + +/* Utility functions */ +extern void * ip_set_alloc(size_t size); +extern void ip_set_free(void *members); +extern int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr); +extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr); + +static inline int +ip_set_get_hostipaddr4(struct nlattr *nla, u32 *ipaddr) +{ + __be32 ip; + int ret = ip_set_get_ipaddr4(nla, &ip); + + if (ret) + return ret; + *ipaddr = ntohl(ip); + return 0; +} + +/* Ignore IPSET_ERR_EXIST errors if asked to do so? */ +static inline bool +ip_set_eexist(int ret, u32 flags) +{ + return ret == -IPSET_ERR_EXIST && (flags & IPSET_FLAG_EXIST); +} + +/* Check the NLA_F_NET_BYTEORDER flag */ +static inline bool +ip_set_attr_netorder(struct nlattr *tb[], int type) +{ + return tb[type] && (tb[type]->nla_type & NLA_F_NET_BYTEORDER); +} + +static inline bool +ip_set_optattr_netorder(struct nlattr *tb[], int type) +{ + return !tb[type] || (tb[type]->nla_type & NLA_F_NET_BYTEORDER); +} + +/* Useful converters */ +static inline u32 +ip_set_get_h32(const struct nlattr *attr) +{ + return ntohl(nla_get_be32(attr)); +} + +static inline u16 +ip_set_get_h16(const struct nlattr *attr) +{ + return ntohs(nla_get_be16(attr)); +} + +#define ipset_nest_start(skb, attr) nla_nest_start(skb, attr | NLA_F_NESTED) +#define ipset_nest_end(skb, start) nla_nest_end(skb, start) + +#define NLA_PUT_IPADDR4(skb, type, ipaddr) \ +do { \ + struct nlattr *__nested = ipset_nest_start(skb, type); \ + \ + if (!__nested) \ + goto nla_put_failure; \ + NLA_PUT_NET32(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr); \ + ipset_nest_end(skb, __nested); \ +} while (0) + +#define NLA_PUT_IPADDR6(skb, type, ipaddrptr) \ +do { \ + struct nlattr *__nested = ipset_nest_start(skb, type); \ + \ + if (!__nested) \ + goto nla_put_failure; \ + NLA_PUT(skb, IPSET_ATTR_IPADDR_IPV6, \ + sizeof(struct in6_addr), ipaddrptr); \ + ipset_nest_end(skb, __nested); \ +} while (0) + +/* Get address from skbuff */ +static inline __be32 +ip4addr(const struct sk_buff *skb, bool src) +{ + return src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr; +} + +static inline void +ip4addrptr(const struct sk_buff *skb, bool src, __be32 *addr) +{ + *addr = src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr; +} + +static inline void +ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr) +{ + memcpy(addr, src ? &ipv6_hdr(skb)->saddr : &ipv6_hdr(skb)->daddr, + sizeof(*addr)); +} + +/* Calculate the bytes required to store the inclusive range of a-b */ +static inline int +bitmap_bytes(u32 a, u32 b) +{ + return 4 * ((((b - a + 8) / 8) + 3) / 4); +} + +/* Interface to iptables/ip6tables */ + +#define SO_IP_SET 83 + +union ip_set_name_index { + char name[IPSET_MAXNAMELEN]; + ip_set_id_t index; +}; + +#define IP_SET_OP_GET_BYNAME 0x00000006 /* Get set index by name */ +struct ip_set_req_get_set { + unsigned op; + unsigned version; + union ip_set_name_index set; +}; + +#define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */ +/* Uses ip_set_req_get_set */ + +#define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */ +struct ip_set_req_version { + unsigned op; + unsigned version; +}; + +#endif /* __KERNEL__ */ + +#endif /*_IP_SET_H */ diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h new file mode 100644 index 000000000000..694c433298b8 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_getport.h @@ -0,0 +1,11 @@ +#ifndef _IP_SET_GETPORT_H +#define _IP_SET_GETPORT_H + +extern bool ip_set_get_ip4_port(const struct sk_buff *skb, bool src, + __be16 *port, u8 *proto); +extern bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src, + __be16 *port, u8 *proto); +extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, + __be16 *port); + +#endif /*_IP_SET_GETPORT_H*/ diff --git a/include/linux/netfilter/ipset/pfxlen.h b/include/linux/netfilter/ipset/pfxlen.h new file mode 100644 index 000000000000..0e1fb50da562 --- /dev/null +++ b/include/linux/netfilter/ipset/pfxlen.h @@ -0,0 +1,35 @@ +#ifndef _PFXLEN_H +#define _PFXLEN_H + +#include +#include + +/* Prefixlen maps, by Jan Engelhardt */ +extern const union nf_inet_addr ip_set_netmask_map[]; +extern const union nf_inet_addr ip_set_hostmask_map[]; + +static inline __be32 +ip_set_netmask(u8 pfxlen) +{ + return ip_set_netmask_map[pfxlen].ip; +} + +static inline const __be32 * +ip_set_netmask6(u8 pfxlen) +{ + return &ip_set_netmask_map[pfxlen].ip6[0]; +} + +static inline u32 +ip_set_hostmask(u8 pfxlen) +{ + return (__force u32) ip_set_hostmask_map[pfxlen].ip; +} + +static inline const __be32 * +ip_set_hostmask6(u8 pfxlen) +{ + return &ip_set_hostmask_map[pfxlen].ip6[0]; +} + +#endif /*_PFXLEN_H */ -- cgit v1.2.3 From 72205fc68bd13109576aa6c4c12c740962d28a6c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:33:17 +0100 Subject: netfilter: ipset: bitmap:ip set type support The module implements the bitmap:ip set type in two flavours, without and with timeout support. In this kind of set one can store IPv4 addresses (or network addresses) from a given range. In order not to waste memory, the timeout version does not rely on the kernel timer for every element to be timed out but on garbage collection. All set types use this mechanism. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/ipset/ip_set_bitmap.h | 31 ++++++ include/linux/netfilter/ipset/ip_set_timeout.h | 127 +++++++++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 include/linux/netfilter/ipset/ip_set_bitmap.h create mode 100644 include/linux/netfilter/ipset/ip_set_timeout.h (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_bitmap.h b/include/linux/netfilter/ipset/ip_set_bitmap.h new file mode 100644 index 000000000000..61a9e8746c83 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_bitmap.h @@ -0,0 +1,31 @@ +#ifndef __IP_SET_BITMAP_H +#define __IP_SET_BITMAP_H + +/* Bitmap type specific error codes */ +enum { + /* The element is out of the range of the set */ + IPSET_ERR_BITMAP_RANGE = IPSET_ERR_TYPE_SPECIFIC, + /* The range exceeds the size limit of the set type */ + IPSET_ERR_BITMAP_RANGE_SIZE, +}; + +#ifdef __KERNEL__ +#define IPSET_BITMAP_MAX_RANGE 0x0000FFFF + +/* Common functions */ + +static inline u32 +range_to_mask(u32 from, u32 to, u8 *bits) +{ + u32 mask = 0xFFFFFFFE; + + *bits = 32; + while (--(*bits) > 0 && mask && (to & mask) != from) + mask <<= 1; + + return mask; +} + +#endif /* __KERNEL__ */ + +#endif /* __IP_SET_BITMAP_H */ diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h new file mode 100644 index 000000000000..9f30c5f2ec1c --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -0,0 +1,127 @@ +#ifndef _IP_SET_TIMEOUT_H +#define _IP_SET_TIMEOUT_H + +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +/* How often should the gc be run by default */ +#define IPSET_GC_TIME (3 * 60) + +/* Timeout period depending on the timeout value of the given set */ +#define IPSET_GC_PERIOD(timeout) \ + ((timeout/3) ? min_t(u32, (timeout)/3, IPSET_GC_TIME) : 1) + +/* Set is defined without timeout support: timeout value may be 0 */ +#define IPSET_NO_TIMEOUT UINT_MAX + +#define with_timeout(timeout) ((timeout) != IPSET_NO_TIMEOUT) + +static inline unsigned int +ip_set_timeout_uget(struct nlattr *tb) +{ + unsigned int timeout = ip_set_get_h32(tb); + + /* Userspace supplied TIMEOUT parameter: adjust crazy size */ + return timeout == IPSET_NO_TIMEOUT ? IPSET_NO_TIMEOUT - 1 : timeout; +} + +#ifdef IP_SET_BITMAP_TIMEOUT + +/* Bitmap specific timeout constants and macros for the entries */ + +/* Bitmap entry is unset */ +#define IPSET_ELEM_UNSET 0 +/* Bitmap entry is set with no timeout value */ +#define IPSET_ELEM_PERMANENT (UINT_MAX/2) + +static inline bool +ip_set_timeout_test(unsigned long timeout) +{ + return timeout != IPSET_ELEM_UNSET && + (timeout == IPSET_ELEM_PERMANENT || + time_after(timeout, jiffies)); +} + +static inline bool +ip_set_timeout_expired(unsigned long timeout) +{ + return timeout != IPSET_ELEM_UNSET && + timeout != IPSET_ELEM_PERMANENT && + time_before(timeout, jiffies); +} + +static inline unsigned long +ip_set_timeout_set(u32 timeout) +{ + unsigned long t; + + if (!timeout) + return IPSET_ELEM_PERMANENT; + + t = timeout * HZ + jiffies; + if (t == IPSET_ELEM_UNSET || t == IPSET_ELEM_PERMANENT) + /* Bingo! */ + t++; + + return t; +} + +static inline u32 +ip_set_timeout_get(unsigned long timeout) +{ + return timeout == IPSET_ELEM_PERMANENT ? 0 : (timeout - jiffies)/HZ; +} + +#else + +/* Hash specific timeout constants and macros for the entries */ + +/* Hash entry is set with no timeout value */ +#define IPSET_ELEM_PERMANENT 0 + +static inline bool +ip_set_timeout_test(unsigned long timeout) +{ + return timeout == IPSET_ELEM_PERMANENT || + time_after(timeout, jiffies); +} + +static inline bool +ip_set_timeout_expired(unsigned long timeout) +{ + return timeout != IPSET_ELEM_PERMANENT && + time_before(timeout, jiffies); +} + +static inline unsigned long +ip_set_timeout_set(u32 timeout) +{ + unsigned long t; + + if (!timeout) + return IPSET_ELEM_PERMANENT; + + t = timeout * HZ + jiffies; + if (t == IPSET_ELEM_PERMANENT) + /* Bingo! :-) */ + t++; + + return t; +} + +static inline u32 +ip_set_timeout_get(unsigned long timeout) +{ + return timeout == IPSET_ELEM_PERMANENT ? 0 : (timeout - jiffies)/HZ; +} +#endif /* ! IP_SET_BITMAP_TIMEOUT */ + +#endif /* __KERNEL__ */ + +#endif /* _IP_SET_TIMEOUT_H */ -- cgit v1.2.3 From 6c027889696a7a694b0e2f6e3cabadefec7553b6 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:38:36 +0100 Subject: netfilter: ipset: hash:ip set type support The module implements the hash:ip type support in four flavours: for IPv4 or IPv6, both without and with timeout support. All the hash types are based on the "array hash" or ahash structure and functions as a good compromise between minimal memory footprint and speed. The hashing uses arrays to resolve clashes. The hash table is resized (doubled) when searching becomes too long. Resizing can be triggered by userspace add commands only and those are serialized by the nfnl mutex. During resizing the set is read-locked, so the only possible concurrent operations are the kernel side readers. Those are protected by RCU locking. Because of the four flavours and the other hash types, the functions are implemented in general forms in the ip_set_ahash.h header file and the real functions are generated before compiling by macro expansion. Thus the dereferencing of low-level functions and void pointer arguments could be avoided: the low-level functions are inlined, the function arguments are pointers of type-specific structures. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/ipset/ip_set_ahash.h | 1074 ++++++++++++++++++++++++++ include/linux/netfilter/ipset/ip_set_hash.h | 26 + 2 files changed, 1100 insertions(+) create mode 100644 include/linux/netfilter/ipset/ip_set_ahash.h create mode 100644 include/linux/netfilter/ipset/ip_set_hash.h (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h new file mode 100644 index 000000000000..ec9d9bea1e37 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_ahash.h @@ -0,0 +1,1074 @@ +#ifndef _IP_SET_AHASH_H +#define _IP_SET_AHASH_H + +#include +#include +#include + +/* Hashing which uses arrays to resolve clashing. The hash table is resized + * (doubled) when searching becomes too long. + * Internally jhash is used with the assumption that the size of the + * stored data is a multiple of sizeof(u32). If storage supports timeout, + * the timeout field must be the last one in the data structure - that field + * is ignored when computing the hash key. + * + * Readers and resizing + * + * Resizing can be triggered by userspace command only, and those + * are serialized by the nfnl mutex. During resizing the set is + * read-locked, so the only possible concurrent operations are + * the kernel side readers. Those must be protected by proper RCU locking. + */ + +/* Number of elements to store in an initial array block */ +#define AHASH_INIT_SIZE 4 +/* Max number of elements to store in an array block */ +#define AHASH_MAX_SIZE (3*4) + +/* A hash bucket */ +struct hbucket { + void *value; /* the array of the values */ + u8 size; /* size of the array */ + u8 pos; /* position of the first free entry */ +}; + +/* The hash table: the table size stored here in order to make resizing easy */ +struct htable { + u8 htable_bits; /* size of hash table == 2^htable_bits */ + struct hbucket bucket[0]; /* hashtable buckets */ +}; + +#define hbucket(h, i) &((h)->bucket[i]) + +/* Book-keeping of the prefixes added to the set */ +struct ip_set_hash_nets { + u8 cidr; /* the different cidr values in the set */ + u32 nets; /* number of elements per cidr */ +}; + +/* The generic ip_set hash structure */ +struct ip_set_hash { + struct htable *table; /* the hash table */ + u32 maxelem; /* max elements in the hash */ + u32 elements; /* current element (vs timeout) */ + u32 initval; /* random jhash init value */ + u32 timeout; /* timeout value, if enabled */ + struct timer_list gc; /* garbage collection when timeout enabled */ +#ifdef IP_SET_HASH_WITH_NETMASK + u8 netmask; /* netmask value for subnets to store */ +#endif +#ifdef IP_SET_HASH_WITH_NETS + struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */ +#endif +}; + +/* Compute htable_bits from the user input parameter hashsize */ +static u8 +htable_bits(u32 hashsize) +{ + /* Assume that hashsize == 2^htable_bits */ + u8 bits = fls(hashsize - 1); + if (jhash_size(bits) != hashsize) + /* Round up to the first 2^n value */ + bits = fls(hashsize); + + return bits; +} + +#ifdef IP_SET_HASH_WITH_NETS + +#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) + +/* Network cidr size book keeping when the hash stores different + * sized networks */ +static void +add_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask) +{ + u8 i; + + ++h->nets[cidr-1].nets; + + pr_debug("add_cidr added %u: %u\n", cidr, h->nets[cidr-1].nets); + + if (h->nets[cidr-1].nets > 1) + return; + + /* New cidr size */ + for (i = 0; i < host_mask && h->nets[i].cidr; i++) { + /* Add in increasing prefix order, so larger cidr first */ + if (h->nets[i].cidr < cidr) + swap(h->nets[i].cidr, cidr); + } + if (i < host_mask) + h->nets[i].cidr = cidr; +} + +static void +del_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask) +{ + u8 i; + + --h->nets[cidr-1].nets; + + pr_debug("del_cidr deleted %u: %u\n", cidr, h->nets[cidr-1].nets); + + if (h->nets[cidr-1].nets != 0) + return; + + /* All entries with this cidr size deleted, so cleanup h->cidr[] */ + for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) { + if (h->nets[i].cidr == cidr) + h->nets[i].cidr = cidr = h->nets[i+1].cidr; + } + h->nets[i - 1].cidr = 0; +} +#endif + +/* Destroy the hashtable part of the set */ +static void +ahash_destroy(struct htable *t) +{ + struct hbucket *n; + u32 i; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + if (n->size) + /* FIXME: use slab cache */ + kfree(n->value); + } + + ip_set_free(t); +} + +/* Calculate the actual memory size of the set data */ +static size_t +ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 host_mask) +{ + u32 i; + struct htable *t = h->table; + size_t memsize = sizeof(*h) + + sizeof(*t) +#ifdef IP_SET_HASH_WITH_NETS + + sizeof(struct ip_set_hash_nets) * host_mask +#endif + + jhash_size(t->htable_bits) * sizeof(struct hbucket); + + for (i = 0; i < jhash_size(t->htable_bits); i++) + memsize += t->bucket[i].size * dsize; + + return memsize; +} + +/* Flush a hash type of set: destroy all elements */ +static void +ip_set_hash_flush(struct ip_set *set) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + struct hbucket *n; + u32 i; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + if (n->size) { + n->size = n->pos = 0; + /* FIXME: use slab cache */ + kfree(n->value); + } + } +#ifdef IP_SET_HASH_WITH_NETS + memset(h->nets, 0, sizeof(struct ip_set_hash_nets) + * SET_HOST_MASK(set->family)); +#endif + h->elements = 0; +} + +/* Destroy a hash type of set */ +static void +ip_set_hash_destroy(struct ip_set *set) +{ + struct ip_set_hash *h = set->data; + + if (with_timeout(h->timeout)) + del_timer_sync(&h->gc); + + ahash_destroy(h->table); + kfree(h); + + set->data = NULL; +} + +#define HKEY(data, initval, htable_bits) \ +(jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \ + & jhash_mask(htable_bits)) + +#endif /* _IP_SET_AHASH_H */ + +#define CONCAT(a, b, c) a##b##c +#define TOKEN(a, b, c) CONCAT(a, b, c) + +/* Type/family dependent function prototypes */ + +#define type_pf_data_equal TOKEN(TYPE, PF, _data_equal) +#define type_pf_data_isnull TOKEN(TYPE, PF, _data_isnull) +#define type_pf_data_copy TOKEN(TYPE, PF, _data_copy) +#define type_pf_data_zero_out TOKEN(TYPE, PF, _data_zero_out) +#define type_pf_data_netmask TOKEN(TYPE, PF, _data_netmask) +#define type_pf_data_list TOKEN(TYPE, PF, _data_list) +#define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist) + +#define type_pf_elem TOKEN(TYPE, PF, _elem) +#define type_pf_telem TOKEN(TYPE, PF, _telem) +#define type_pf_data_timeout TOKEN(TYPE, PF, _data_timeout) +#define type_pf_data_expired TOKEN(TYPE, PF, _data_expired) +#define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set) + +#define type_pf_elem_add TOKEN(TYPE, PF, _elem_add) +#define type_pf_add TOKEN(TYPE, PF, _add) +#define type_pf_del TOKEN(TYPE, PF, _del) +#define type_pf_test_cidrs TOKEN(TYPE, PF, _test_cidrs) +#define type_pf_test TOKEN(TYPE, PF, _test) + +#define type_pf_elem_tadd TOKEN(TYPE, PF, _elem_tadd) +#define type_pf_del_telem TOKEN(TYPE, PF, _ahash_del_telem) +#define type_pf_expire TOKEN(TYPE, PF, _expire) +#define type_pf_tadd TOKEN(TYPE, PF, _tadd) +#define type_pf_tdel TOKEN(TYPE, PF, _tdel) +#define type_pf_ttest_cidrs TOKEN(TYPE, PF, _ahash_ttest_cidrs) +#define type_pf_ttest TOKEN(TYPE, PF, _ahash_ttest) + +#define type_pf_resize TOKEN(TYPE, PF, _resize) +#define type_pf_tresize TOKEN(TYPE, PF, _tresize) +#define type_pf_flush ip_set_hash_flush +#define type_pf_destroy ip_set_hash_destroy +#define type_pf_head TOKEN(TYPE, PF, _head) +#define type_pf_list TOKEN(TYPE, PF, _list) +#define type_pf_tlist TOKEN(TYPE, PF, _tlist) +#define type_pf_same_set TOKEN(TYPE, PF, _same_set) +#define type_pf_kadt TOKEN(TYPE, PF, _kadt) +#define type_pf_uadt TOKEN(TYPE, PF, _uadt) +#define type_pf_gc TOKEN(TYPE, PF, _gc) +#define type_pf_gc_init TOKEN(TYPE, PF, _gc_init) +#define type_pf_variant TOKEN(TYPE, PF, _variant) +#define type_pf_tvariant TOKEN(TYPE, PF, _tvariant) + +/* Flavour without timeout */ + +/* Get the ith element from the array block n */ +#define ahash_data(n, i) \ + ((struct type_pf_elem *)((n)->value) + (i)) + +/* Add an element to the hash table when resizing the set: + * we spare the maintenance of the internal counters. */ +static int +type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value) +{ + if (n->pos >= n->size) { + void *tmp; + + if (n->size >= AHASH_MAX_SIZE) + /* Trigger rehashing */ + return -EAGAIN; + + tmp = kzalloc((n->size + AHASH_INIT_SIZE) + * sizeof(struct type_pf_elem), + GFP_ATOMIC); + if (!tmp) + return -ENOMEM; + if (n->size) { + memcpy(tmp, n->value, + sizeof(struct type_pf_elem) * n->size); + kfree(n->value); + } + n->value = tmp; + n->size += AHASH_INIT_SIZE; + } + type_pf_data_copy(ahash_data(n, n->pos++), value); + return 0; +} + +/* Resize a hash: create a new hash table with doubling the hashsize + * and inserting the elements to it. Repeat until we succeed or + * fail due to memory pressures. */ +static int +type_pf_resize(struct ip_set *set, bool retried) +{ + struct ip_set_hash *h = set->data; + struct htable *t, *orig = h->table; + u8 htable_bits = orig->htable_bits; + const struct type_pf_elem *data; + struct hbucket *n, *m; + u32 i, j; + int ret; + +retry: + ret = 0; + htable_bits++; + pr_debug("attempt to resize set %s from %u to %u, t %p\n", + set->name, orig->htable_bits, htable_bits, orig); + if (!htable_bits) + /* In case we have plenty of memory :-) */ + return -IPSET_ERR_HASH_FULL; + t = ip_set_alloc(sizeof(*t) + + jhash_size(htable_bits) * sizeof(struct hbucket)); + if (!t) + return -ENOMEM; + t->htable_bits = htable_bits; + + read_lock_bh(&set->lock); + for (i = 0; i < jhash_size(orig->htable_bits); i++) { + n = hbucket(orig, i); + for (j = 0; j < n->pos; j++) { + data = ahash_data(n, j); + m = hbucket(t, HKEY(data, h->initval, htable_bits)); + ret = type_pf_elem_add(m, data); + if (ret < 0) { + read_unlock_bh(&set->lock); + ahash_destroy(t); + if (ret == -EAGAIN) + goto retry; + return ret; + } + } + } + + rcu_assign_pointer(h->table, t); + read_unlock_bh(&set->lock); + + /* Give time to other readers of the set */ + synchronize_rcu_bh(); + + pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, + orig->htable_bits, orig, t->htable_bits, t); + ahash_destroy(orig); + + return 0; +} + +/* Add an element to a hash and update the internal counters when succeeded, + * otherwise report the proper error code. */ +static int +type_pf_add(struct ip_set *set, void *value, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t; + const struct type_pf_elem *d = value; + struct hbucket *n; + int i, ret = 0; + u32 key; + + if (h->elements >= h->maxelem) + return -IPSET_ERR_HASH_FULL; + + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); + key = HKEY(value, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) + if (type_pf_data_equal(ahash_data(n, i), d)) { + ret = -IPSET_ERR_EXIST; + goto out; + } + + ret = type_pf_elem_add(n, value); + if (ret != 0) + goto out; + +#ifdef IP_SET_HASH_WITH_NETS + add_cidr(h, d->cidr, HOST_MASK); +#endif + h->elements++; +out: + rcu_read_unlock_bh(); + return ret; +} + +/* Delete an element from the hash: swap it with the last element + * and free up space if possible. + */ +static int +type_pf_del(struct ip_set *set, void *value, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + const struct type_pf_elem *d = value; + struct hbucket *n; + int i; + struct type_pf_elem *data; + u32 key; + + key = HKEY(value, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_data(n, i); + if (!type_pf_data_equal(data, d)) + continue; + if (i != n->pos - 1) + /* Not last one */ + type_pf_data_copy(data, ahash_data(n, n->pos - 1)); + + n->pos--; + h->elements--; +#ifdef IP_SET_HASH_WITH_NETS + del_cidr(h, d->cidr, HOST_MASK); +#endif + if (n->pos + AHASH_INIT_SIZE < n->size) { + void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) + * sizeof(struct type_pf_elem), + GFP_ATOMIC); + if (!tmp) + return 0; + n->size -= AHASH_INIT_SIZE; + memcpy(tmp, n->value, + n->size * sizeof(struct type_pf_elem)); + kfree(n->value); + n->value = tmp; + } + return 0; + } + + return -IPSET_ERR_EXIST; +} + +#ifdef IP_SET_HASH_WITH_NETS + +/* Special test function which takes into account the different network + * sizes added to the set */ +static int +type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + struct hbucket *n; + const struct type_pf_elem *data; + int i, j = 0; + u32 key; + u8 host_mask = SET_HOST_MASK(set->family); + + pr_debug("test by nets\n"); + for (; j < host_mask && h->nets[j].cidr; j++) { + type_pf_data_netmask(d, h->nets[j].cidr); + key = HKEY(d, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_data(n, i); + if (type_pf_data_equal(data, d)) + return 1; + } + } + return 0; +} +#endif + +/* Test whether the element is added to the set */ +static int +type_pf_test(struct ip_set *set, void *value, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + struct type_pf_elem *d = value; + struct hbucket *n; + const struct type_pf_elem *data; + int i; + u32 key; + +#ifdef IP_SET_HASH_WITH_NETS + /* If we test an IP address and not a network address, + * try all possible network sizes */ + if (d->cidr == SET_HOST_MASK(set->family)) + return type_pf_test_cidrs(set, d, timeout); +#endif + + key = HKEY(d, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_data(n, i); + if (type_pf_data_equal(data, d)) + return 1; + } + return 0; +} + +/* Reply a HEADER request: fill out the header part of the set */ +static int +type_pf_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct ip_set_hash *h = set->data; + struct nlattr *nested; + size_t memsize; + + read_lock_bh(&set->lock); + memsize = ahash_memsize(h, with_timeout(h->timeout) + ? sizeof(struct type_pf_telem) + : sizeof(struct type_pf_elem), + set->family == AF_INET ? 32 : 128); + read_unlock_bh(&set->lock); + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET32(skb, IPSET_ATTR_HASHSIZE, + htonl(jhash_size(h->table->htable_bits))); + NLA_PUT_NET32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)); +#ifdef IP_SET_HASH_WITH_NETMASK + if (h->netmask != HOST_MASK) + NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask); +#endif + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)); + if (with_timeout(h->timeout)) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EMSGSIZE; +} + +/* Reply a LIST/SAVE request: dump the elements of the specified set */ +static int +type_pf_list(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct ip_set_hash *h = set->data; + const struct htable *t = h->table; + struct nlattr *atd, *nested; + const struct hbucket *n; + const struct type_pf_elem *data; + u32 first = cb->args[2]; + /* We assume that one hash bucket fills into one page */ + void *incomplete; + int i; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + pr_debug("list hash set %s\n", set->name); + for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { + incomplete = skb_tail_pointer(skb); + n = hbucket(t, cb->args[2]); + pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); + for (i = 0; i < n->pos; i++) { + data = ahash_data(n, i); + pr_debug("list hash %lu hbucket %p i %u, data %p\n", + cb->args[2], n, i, data); + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (cb->args[2] == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + if (type_pf_data_list(skb, data)) + goto nla_put_failure; + ipset_nest_end(skb, nested); + } + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nlmsg_trim(skb, incomplete); + ipset_nest_end(skb, atd); + if (unlikely(first == cb->args[2])) { + pr_warning("Can't list set %s: one bucket does not fit into " + "a message. Please report it!\n", set->name); + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +static int +type_pf_kadt(struct ip_set *set, const struct sk_buff * skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags); +static int +type_pf_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags); + +static const struct ip_set_type_variant type_pf_variant = { + .kadt = type_pf_kadt, + .uadt = type_pf_uadt, + .adt = { + [IPSET_ADD] = type_pf_add, + [IPSET_DEL] = type_pf_del, + [IPSET_TEST] = type_pf_test, + }, + .destroy = type_pf_destroy, + .flush = type_pf_flush, + .head = type_pf_head, + .list = type_pf_list, + .resize = type_pf_resize, + .same_set = type_pf_same_set, +}; + +/* Flavour with timeout support */ + +#define ahash_tdata(n, i) \ + (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i)) + +static inline u32 +type_pf_data_timeout(const struct type_pf_elem *data) +{ + const struct type_pf_telem *tdata = + (const struct type_pf_telem *) data; + + return tdata->timeout; +} + +static inline bool +type_pf_data_expired(const struct type_pf_elem *data) +{ + const struct type_pf_telem *tdata = + (const struct type_pf_telem *) data; + + return ip_set_timeout_expired(tdata->timeout); +} + +static inline void +type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout) +{ + struct type_pf_telem *tdata = (struct type_pf_telem *) data; + + tdata->timeout = ip_set_timeout_set(timeout); +} + +static int +type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value, + u32 timeout) +{ + struct type_pf_elem *data; + + if (n->pos >= n->size) { + void *tmp; + + if (n->size >= AHASH_MAX_SIZE) + /* Trigger rehashing */ + return -EAGAIN; + + tmp = kzalloc((n->size + AHASH_INIT_SIZE) + * sizeof(struct type_pf_telem), + GFP_ATOMIC); + if (!tmp) + return -ENOMEM; + if (n->size) { + memcpy(tmp, n->value, + sizeof(struct type_pf_telem) * n->size); + kfree(n->value); + } + n->value = tmp; + n->size += AHASH_INIT_SIZE; + } + data = ahash_tdata(n, n->pos++); + type_pf_data_copy(data, value); + type_pf_data_timeout_set(data, timeout); + return 0; +} + +/* Delete expired elements from the hashtable */ +static void +type_pf_expire(struct ip_set_hash *h) +{ + struct htable *t = h->table; + struct hbucket *n; + struct type_pf_elem *data; + u32 i; + int j; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + for (j = 0; j < n->pos; j++) { + data = ahash_tdata(n, j); + if (type_pf_data_expired(data)) { + pr_debug("expired %u/%u\n", i, j); +#ifdef IP_SET_HASH_WITH_NETS + del_cidr(h, data->cidr, HOST_MASK); +#endif + if (j != n->pos - 1) + /* Not last one */ + type_pf_data_copy(data, + ahash_tdata(n, n->pos - 1)); + n->pos--; + h->elements--; + } + } + if (n->pos + AHASH_INIT_SIZE < n->size) { + void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) + * sizeof(struct type_pf_telem), + GFP_ATOMIC); + if (!tmp) + /* Still try to delete expired elements */ + continue; + n->size -= AHASH_INIT_SIZE; + memcpy(tmp, n->value, + n->size * sizeof(struct type_pf_telem)); + kfree(n->value); + n->value = tmp; + } + } +} + +static int +type_pf_tresize(struct ip_set *set, bool retried) +{ + struct ip_set_hash *h = set->data; + struct htable *t, *orig = h->table; + u8 htable_bits = orig->htable_bits; + const struct type_pf_elem *data; + struct hbucket *n, *m; + u32 i, j; + int ret; + + /* Try to cleanup once */ + if (!retried) { + i = h->elements; + write_lock_bh(&set->lock); + type_pf_expire(set->data); + write_unlock_bh(&set->lock); + if (h->elements < i) + return 0; + } + +retry: + ret = 0; + htable_bits++; + if (!htable_bits) + /* In case we have plenty of memory :-) */ + return -IPSET_ERR_HASH_FULL; + t = ip_set_alloc(sizeof(*t) + + jhash_size(htable_bits) * sizeof(struct hbucket)); + if (!t) + return -ENOMEM; + t->htable_bits = htable_bits; + + read_lock_bh(&set->lock); + for (i = 0; i < jhash_size(orig->htable_bits); i++) { + n = hbucket(orig, i); + for (j = 0; j < n->pos; j++) { + data = ahash_tdata(n, j); + m = hbucket(t, HKEY(data, h->initval, htable_bits)); + ret = type_pf_elem_tadd(m, data, + type_pf_data_timeout(data)); + if (ret < 0) { + read_unlock_bh(&set->lock); + ahash_destroy(t); + if (ret == -EAGAIN) + goto retry; + return ret; + } + } + } + + rcu_assign_pointer(h->table, t); + read_unlock_bh(&set->lock); + + /* Give time to other readers of the set */ + synchronize_rcu_bh(); + + ahash_destroy(orig); + + return 0; +} + +static int +type_pf_tadd(struct ip_set *set, void *value, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + const struct type_pf_elem *d = value; + struct hbucket *n; + struct type_pf_elem *data; + int ret = 0, i, j = AHASH_MAX_SIZE + 1; + u32 key; + + if (h->elements >= h->maxelem) + /* FIXME: when set is full, we slow down here */ + type_pf_expire(h); + if (h->elements >= h->maxelem) + return -IPSET_ERR_HASH_FULL; + + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); + key = HKEY(d, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_tdata(n, i); + if (type_pf_data_equal(data, d)) { + if (type_pf_data_expired(data)) + j = i; + else { + ret = -IPSET_ERR_EXIST; + goto out; + } + } else if (j == AHASH_MAX_SIZE + 1 && + type_pf_data_expired(data)) + j = i; + } + if (j != AHASH_MAX_SIZE + 1) { + data = ahash_tdata(n, j); +#ifdef IP_SET_HASH_WITH_NETS + del_cidr(h, data->cidr, HOST_MASK); + add_cidr(h, d->cidr, HOST_MASK); +#endif + type_pf_data_copy(data, d); + type_pf_data_timeout_set(data, timeout); + goto out; + } + ret = type_pf_elem_tadd(n, d, timeout); + if (ret != 0) + goto out; + +#ifdef IP_SET_HASH_WITH_NETS + add_cidr(h, d->cidr, HOST_MASK); +#endif + h->elements++; +out: + rcu_read_unlock_bh(); + return ret; +} + +static int +type_pf_tdel(struct ip_set *set, void *value, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + const struct type_pf_elem *d = value; + struct hbucket *n; + int i, ret = 0; + struct type_pf_elem *data; + u32 key; + + key = HKEY(value, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_tdata(n, i); + if (!type_pf_data_equal(data, d)) + continue; + if (type_pf_data_expired(data)) + ret = -IPSET_ERR_EXIST; + if (i != n->pos - 1) + /* Not last one */ + type_pf_data_copy(data, ahash_tdata(n, n->pos - 1)); + + n->pos--; + h->elements--; +#ifdef IP_SET_HASH_WITH_NETS + del_cidr(h, d->cidr, HOST_MASK); +#endif + if (n->pos + AHASH_INIT_SIZE < n->size) { + void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) + * sizeof(struct type_pf_telem), + GFP_ATOMIC); + if (!tmp) + return 0; + n->size -= AHASH_INIT_SIZE; + memcpy(tmp, n->value, + n->size * sizeof(struct type_pf_telem)); + kfree(n->value); + n->value = tmp; + } + return 0; + } + + return -IPSET_ERR_EXIST; +} + +#ifdef IP_SET_HASH_WITH_NETS +static int +type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + struct type_pf_elem *data; + struct hbucket *n; + int i, j = 0; + u32 key; + u8 host_mask = SET_HOST_MASK(set->family); + + for (; j < host_mask && h->nets[j].cidr; j++) { + type_pf_data_netmask(d, h->nets[j].cidr); + key = HKEY(d, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_tdata(n, i); + if (type_pf_data_equal(data, d)) + return !type_pf_data_expired(data); + } + } + return 0; +} +#endif + +static int +type_pf_ttest(struct ip_set *set, void *value, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + struct type_pf_elem *data, *d = value; + struct hbucket *n; + int i; + u32 key; + +#ifdef IP_SET_HASH_WITH_NETS + if (d->cidr == SET_HOST_MASK(set->family)) + return type_pf_ttest_cidrs(set, d, timeout); +#endif + key = HKEY(d, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_tdata(n, i); + if (type_pf_data_equal(data, d)) + return !type_pf_data_expired(data); + } + return 0; +} + +static int +type_pf_tlist(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct ip_set_hash *h = set->data; + const struct htable *t = h->table; + struct nlattr *atd, *nested; + const struct hbucket *n; + const struct type_pf_elem *data; + u32 first = cb->args[2]; + /* We assume that one hash bucket fills into one page */ + void *incomplete; + int i; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { + incomplete = skb_tail_pointer(skb); + n = hbucket(t, cb->args[2]); + for (i = 0; i < n->pos; i++) { + data = ahash_tdata(n, i); + pr_debug("list %p %u\n", n, i); + if (type_pf_data_expired(data)) + continue; + pr_debug("do list %p %u\n", n, i); + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (cb->args[2] == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + if (type_pf_data_tlist(skb, data)) + goto nla_put_failure; + ipset_nest_end(skb, nested); + } + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nlmsg_trim(skb, incomplete); + ipset_nest_end(skb, atd); + if (unlikely(first == cb->args[2])) { + pr_warning("Can't list set %s: one bucket does not fit into " + "a message. Please report it!\n", set->name); + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +static const struct ip_set_type_variant type_pf_tvariant = { + .kadt = type_pf_kadt, + .uadt = type_pf_uadt, + .adt = { + [IPSET_ADD] = type_pf_tadd, + [IPSET_DEL] = type_pf_tdel, + [IPSET_TEST] = type_pf_ttest, + }, + .destroy = type_pf_destroy, + .flush = type_pf_flush, + .head = type_pf_head, + .list = type_pf_tlist, + .resize = type_pf_tresize, + .same_set = type_pf_same_set, +}; + +static void +type_pf_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set_hash *h = set->data; + + pr_debug("called\n"); + write_lock_bh(&set->lock); + type_pf_expire(h); + write_unlock_bh(&set->lock); + + h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + add_timer(&h->gc); +} + +static void +type_pf_gc_init(struct ip_set *set) +{ + struct ip_set_hash *h = set->data; + + init_timer(&h->gc); + h->gc.data = (unsigned long) set; + h->gc.function = type_pf_gc; + h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + add_timer(&h->gc); + pr_debug("gc initialized, run in every %u\n", + IPSET_GC_PERIOD(h->timeout)); +} + +#undef type_pf_data_equal +#undef type_pf_data_isnull +#undef type_pf_data_copy +#undef type_pf_data_zero_out +#undef type_pf_data_list +#undef type_pf_data_tlist + +#undef type_pf_elem +#undef type_pf_telem +#undef type_pf_data_timeout +#undef type_pf_data_expired +#undef type_pf_data_netmask +#undef type_pf_data_timeout_set + +#undef type_pf_elem_add +#undef type_pf_add +#undef type_pf_del +#undef type_pf_test_cidrs +#undef type_pf_test + +#undef type_pf_elem_tadd +#undef type_pf_expire +#undef type_pf_tadd +#undef type_pf_tdel +#undef type_pf_ttest_cidrs +#undef type_pf_ttest + +#undef type_pf_resize +#undef type_pf_tresize +#undef type_pf_flush +#undef type_pf_destroy +#undef type_pf_head +#undef type_pf_list +#undef type_pf_tlist +#undef type_pf_same_set +#undef type_pf_kadt +#undef type_pf_uadt +#undef type_pf_gc +#undef type_pf_gc_init +#undef type_pf_variant +#undef type_pf_tvariant diff --git a/include/linux/netfilter/ipset/ip_set_hash.h b/include/linux/netfilter/ipset/ip_set_hash.h new file mode 100644 index 000000000000..b86f15c04524 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_hash.h @@ -0,0 +1,26 @@ +#ifndef __IP_SET_HASH_H +#define __IP_SET_HASH_H + +/* Hash type specific error codes */ +enum { + /* Hash is full */ + IPSET_ERR_HASH_FULL = IPSET_ERR_TYPE_SPECIFIC, + /* Null-valued element */ + IPSET_ERR_HASH_ELEM, + /* Invalid protocol */ + IPSET_ERR_INVALID_PROTO, + /* Protocol missing but must be specified */ + IPSET_ERR_MISSING_PROTO, +}; + +#ifdef __KERNEL__ + +#define IPSET_DEFAULT_HASHSIZE 1024 +#define IPSET_MIMINAL_HASHSIZE 64 +#define IPSET_DEFAULT_MAXELEM 65536 +#define IPSET_DEFAULT_PROBES 4 +#define IPSET_DEFAULT_RESIZE 100 + +#endif /* __KERNEL__ */ + +#endif /* __IP_SET_HASH_H */ -- cgit v1.2.3 From f830837f0eed0f9e371b8fd65169365780814bb1 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:54:59 +0100 Subject: netfilter: ipset: list:set set type support The module implements the list:set type support in two flavours: without and with timeout. The sets has two sides: for the userspace, they store the names of other (non list:set type of) sets: one can add, delete and test set names. For the kernel, it forms an ordered union of the member sets: the members sets are tried in order when elements are added, deleted and tested and the process stops at the first success. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/ipset/ip_set_list.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/netfilter/ipset/ip_set_list.h (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_list.h b/include/linux/netfilter/ipset/ip_set_list.h new file mode 100644 index 000000000000..40a63f302613 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_list.h @@ -0,0 +1,27 @@ +#ifndef __IP_SET_LIST_H +#define __IP_SET_LIST_H + +/* List type specific error codes */ +enum { + /* Set name to be added/deleted/tested does not exist. */ + IPSET_ERR_NAME = IPSET_ERR_TYPE_SPECIFIC, + /* list:set type is not permitted to add */ + IPSET_ERR_LOOP, + /* Missing reference set */ + IPSET_ERR_BEFORE, + /* Reference set does not exist */ + IPSET_ERR_NAMEREF, + /* Set is full */ + IPSET_ERR_LIST_FULL, + /* Reference set is not added to the set */ + IPSET_ERR_REF_EXIST, +}; + +#ifdef __KERNEL__ + +#define IP_SET_LIST_DEFAULT_SIZE 8 +#define IP_SET_LIST_MIN_SIZE 4 + +#endif /* __KERNEL__ */ + +#endif /* __IP_SET_LIST_H */ -- cgit v1.2.3 From d956798d82d2d331c031301965d69e17a1a48a2b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:56:00 +0100 Subject: netfilter: xtables: "set" match and "SET" target support The patch adds the combined module of the "SET" target and "set" match to netfilter. Both the previous and the current revisions are supported. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_set.h | 55 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 include/linux/netfilter/xt_set.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_set.h b/include/linux/netfilter/xt_set.h new file mode 100644 index 000000000000..69b2bd1fb818 --- /dev/null +++ b/include/linux/netfilter/xt_set.h @@ -0,0 +1,55 @@ +#ifndef _XT_SET_H +#define _XT_SET_H + +#include + +/* Revision 0 interface: backward compatible with netfilter/iptables */ + +/* + * Option flags for kernel operations (xt_set_info_v0) + */ +#define IPSET_SRC 0x01 /* Source match/add */ +#define IPSET_DST 0x02 /* Destination match/add */ +#define IPSET_MATCH_INV 0x04 /* Inverse matching */ + +struct xt_set_info_v0 { + ip_set_id_t index; + union { + __u32 flags[IPSET_DIM_MAX + 1]; + struct { + __u32 __flags[IPSET_DIM_MAX]; + __u8 dim; + __u8 flags; + } compat; + } u; +}; + +/* match and target infos */ +struct xt_set_info_match_v0 { + struct xt_set_info_v0 match_set; +}; + +struct xt_set_info_target_v0 { + struct xt_set_info_v0 add_set; + struct xt_set_info_v0 del_set; +}; + +/* Revision 1: current interface to netfilter/iptables */ + +struct xt_set_info { + ip_set_id_t index; + __u8 dim; + __u8 flags; +}; + +/* match and target infos */ +struct xt_set_info_match { + struct xt_set_info match_set; +}; + +struct xt_set_info_target { + struct xt_set_info add_set; + struct xt_set_info del_set; +}; + +#endif /*_XT_SET_H*/ -- cgit v1.2.3 From e3e241b2769b27669d05f0a05083acd21b4faa2c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 1 Feb 2011 18:52:42 +0100 Subject: netfilter: ipset: install ipset related header files Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 3 +++ include/linux/netfilter/ipset/Kbuild | 4 ++++ include/linux/netfilter/xt_set.h | 1 + 3 files changed, 8 insertions(+) create mode 100644 include/linux/netfilter/ipset/Kbuild (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 89c0d1e20d72..ba19544cce94 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -1,3 +1,5 @@ +header-y += ipset/ + header-y += nf_conntrack_common.h header-y += nf_conntrack_ftp.h header-y += nf_conntrack_sctp.h @@ -55,6 +57,7 @@ header-y += xt_quota.h header-y += xt_rateest.h header-y += xt_realm.h header-y += xt_recent.h +header-y += xt_set.h header-y += xt_sctp.h header-y += xt_socket.h header-y += xt_state.h diff --git a/include/linux/netfilter/ipset/Kbuild b/include/linux/netfilter/ipset/Kbuild new file mode 100644 index 000000000000..601fe71d34d5 --- /dev/null +++ b/include/linux/netfilter/ipset/Kbuild @@ -0,0 +1,4 @@ +header-y += ip_set.h +header-y += ip_set_bitmap.h +header-y += ip_set_hash.h +header-y += ip_set_list.h diff --git a/include/linux/netfilter/xt_set.h b/include/linux/netfilter/xt_set.h index 69b2bd1fb818..081f1ded2842 100644 --- a/include/linux/netfilter/xt_set.h +++ b/include/linux/netfilter/xt_set.h @@ -1,6 +1,7 @@ #ifndef _XT_SET_H #define _XT_SET_H +#include #include /* Revision 0 interface: backward compatible with netfilter/iptables */ -- cgit v1.2.3 From 724bab476bcac9f7d0b5204cb06e346216d42166 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 2 Feb 2011 23:50:01 +0100 Subject: netfilter: ipset: fix linking with CONFIG_IPV6=n Add a dummy ip_set_get_ip6_port function that unconditionally returns false for CONFIG_IPV6=n and convert the real function to ipv6_skip_exthdr() to avoid pulling in the ip6_tables module when loading ipset. Signed-off-by: Patrick McHardy --- include/linux/netfilter/ipset/ip_set_getport.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h index 694c433298b8..3882a81a3b3c 100644 --- a/include/linux/netfilter/ipset/ip_set_getport.h +++ b/include/linux/netfilter/ipset/ip_set_getport.h @@ -3,8 +3,18 @@ extern bool ip_set_get_ip4_port(const struct sk_buff *skb, bool src, __be16 *port, u8 *proto); + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) extern bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src, __be16 *port, u8 *proto); +#else +static inline bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src, + __be16 *port, u8 *proto) +{ + return false; +} +#endif + extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port); -- cgit v1.2.3 From 9291747f118d6404e509747b85ff5f6dfec368d2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 3 Feb 2011 00:05:43 +0100 Subject: netfilter: xtables: add device group match Add a new 'devgroup' match to match on the device group of the incoming and outgoing network device of a packet. Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_devgroup.h | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 include/linux/netfilter/xt_devgroup.h (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index ba19544cce94..15e83bf3dd58 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -37,6 +37,7 @@ header-y += xt_connmark.h header-y += xt_conntrack.h header-y += xt_cpu.h header-y += xt_dccp.h +header-y += xt_devgroup.h header-y += xt_dscp.h header-y += xt_esp.h header-y += xt_hashlimit.h diff --git a/include/linux/netfilter/xt_devgroup.h b/include/linux/netfilter/xt_devgroup.h new file mode 100644 index 000000000000..1babde0ec900 --- /dev/null +++ b/include/linux/netfilter/xt_devgroup.h @@ -0,0 +1,21 @@ +#ifndef _XT_DEVGROUP_H +#define _XT_DEVGROUP_H + +#include + +enum xt_devgroup_flags { + XT_DEVGROUP_MATCH_SRC = 0x1, + XT_DEVGROUP_INVERT_SRC = 0x2, + XT_DEVGROUP_MATCH_DST = 0x4, + XT_DEVGROUP_INVERT_DST = 0x8, +}; + +struct xt_devgroup_info { + __u32 flags; + __u32 src_group; + __u32 src_mask; + __u32 dst_group; + __u32 dst_mask; +}; + +#endif /* _XT_DEVGROUP_H */ -- cgit v1.2.3 From 45e144339ac59971eb44be32e1282760aaabe861 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 2 Feb 2011 15:21:10 +0000 Subject: sched: CHOKe flow scheduler CHOKe ("CHOose and Kill" or "CHOose and Keep") is an alternative packet scheduler based on the Random Exponential Drop (RED) algorithm. The core idea is: For every packet arrival: Calculate Qave if (Qave < minth) Queue the new packet else Select randomly a packet from the queue if (both packets from same flow) then Drop both the packets else if (Qave > maxth) Drop packet else Admit packet with proability p (same as RED) See also: Rong Pan, Balaji Prabhakar, Konstantinos Psounis, "CHOKe: a stateless active queue management scheme for approximating fair bandwidth allocation", Proceeding of INFOCOM'2000, March 2000. Help from: Eric Dumazet Patrick McHardy Signed-off-by: Stephen Hemminger Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 776cd93d5f7b..d4bb6f58c90c 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -247,6 +247,35 @@ struct tc_gred_sopt { __u16 pad1; }; +/* CHOKe section */ + +enum { + TCA_CHOKE_UNSPEC, + TCA_CHOKE_PARMS, + TCA_CHOKE_STAB, + __TCA_CHOKE_MAX, +}; + +#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1) + +struct tc_choke_qopt { + __u32 limit; /* Hard queue length (packets) */ + __u32 qth_min; /* Min average threshold (packets) */ + __u32 qth_max; /* Max average threshold (packets) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; /* see RED flags */ +}; + +struct tc_choke_xstats { + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ + __u32 matched; /* Drops due to flow match */ +}; + /* HTB section */ #define TC_HTB_NUMPRIO 8 #define TC_HTB_MAXDEPTH 8 -- cgit v1.2.3 From fa9921e46fd52b78070dc67ce0d27ec301a90410 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 2 Feb 2011 06:29:02 +0000 Subject: ipsec: allow to align IPv4 AH on 32 bits The Linux IPv4 AH stack aligns the AH header on a 64 bit boundary (like in IPv6). This is not RFC compliant (see RFC4302, Section 3.3.3.2.1), it should be aligned on 32 bits. For most of the authentication algorithms, the ICV size is 96 bits. The AH header alignment on 32 or 64 bits gives the same results. However for SHA-256-128 for instance, the wrong 64 bit alignment results in adding useless padding in IPv4 AH, which is forbidden by the RFC. To avoid breaking backward compatibility, we use a new flag (XFRM_STATE_ALIGN4) do change original behavior. Initial patch from Dang Hongwu and Christophe Gouault . Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 930fdd2de79c..b93d6f598085 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -350,6 +350,7 @@ struct xfrm_usersa_info { #define XFRM_STATE_WILDRECV 8 #define XFRM_STATE_ICMP 16 #define XFRM_STATE_AF_UNSPEC 32 +#define XFRM_STATE_ALIGN4 64 }; struct xfrm_usersa_id { -- cgit v1.2.3 From 6d1d4ea4a82f8c17a3ff7c2f677bc3d41ea7484b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 8 Feb 2011 23:32:17 +0100 Subject: ssb: extract boardflags2 for SPROMs rev 4 and 5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville --- include/linux/ssb/ssb_regs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index 489f7b6d61c5..df9211a84634 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -268,6 +268,8 @@ /* SPROM Revision 4 */ #define SSB_SPROM4_BFLLO 0x0044 /* Boardflags (low 16 bits) */ #define SSB_SPROM4_BFLHI 0x0046 /* Board Flags Hi */ +#define SSB_SPROM4_BFL2LO 0x0048 /* Board flags 2 (low 16 bits) */ +#define SSB_SPROM4_BFL2HI 0x004A /* Board flags 2 Hi */ #define SSB_SPROM4_IL0MAC 0x004C /* 6 byte MAC address for a/b/g/n */ #define SSB_SPROM4_CCODE 0x0052 /* Country Code (2 bytes) */ #define SSB_SPROM4_GPIOA 0x0056 /* Gen. Purpose IO # 0 and 1 */ @@ -358,6 +360,8 @@ #define SSB_SPROM5_CCODE 0x0044 /* Country Code (2 bytes) */ #define SSB_SPROM5_BFLLO 0x004A /* Boardflags (low 16 bits) */ #define SSB_SPROM5_BFLHI 0x004C /* Board Flags Hi */ +#define SSB_SPROM5_BFL2LO 0x004E /* Board flags 2 (low 16 bits) */ +#define SSB_SPROM5_BFL2HI 0x0050 /* Board flags 2 Hi */ #define SSB_SPROM5_IL0MAC 0x0052 /* 6 byte MAC address for a/b/g/n */ #define SSB_SPROM5_GPIOA 0x0076 /* Gen. Purpose IO # 0 and 1 */ #define SSB_SPROM5_GPIOA_P0 0x00FF /* Pin 0 */ -- cgit v1.2.3 From d033d526a465c4bb8a499a0b5df65b3e7cf4da6f Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 10 Feb 2011 14:40:01 +0000 Subject: ixgbe: DCB, implement 802.1Qaz routines Implements 802.1Qaz support for ixgbe driver. Additionally, this adds IEEE_8021QAZ_TSA_{} defines to dcbnl.h this is to avoid having to use cryptic numeric codes for the TSA type. Signed-off-by: John Fastabend Tested-by: Ross Brattain Signed-off-by: Jeff Kirsher --- include/linux/dcbnl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 68cd248f6d3e..cd8d518efa3b 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -25,6 +25,11 @@ /* IEEE 802.1Qaz std supported values */ #define IEEE_8021QAZ_MAX_TCS 8 +#define IEEE_8021QAZ_TSA_STRICT 0 +#define IEEE_8021QAZ_TSA_CB_SHABER 1 +#define IEEE_8021QAZ_TSA_ETS 2 +#define IEEE_8021QAZ_TSA_VENDOR 255 + /* This structure contains the IEEE 802.1Qaz ETS managed object * * @willing: willing bit in ETS configuratin TLV -- cgit v1.2.3 From d59cfde2fb960b5970ccb5a38cea25d38b37a8e8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 12 Feb 2011 00:46:06 +0000 Subject: net: remove the unnecessary dance around skb_bond_should_drop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to check (master) twice and to drive in and out the header file. Signed-off-by: Jiri Pirko Reviewed-by: Nicolas de Pesloüan Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c7d707452228..5a5baeaaa50f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2437,17 +2437,6 @@ static inline void netif_set_gso_max_size(struct net_device *dev, dev->gso_max_size = size; } -extern int __skb_bond_should_drop(struct sk_buff *skb, - struct net_device *master); - -static inline int skb_bond_should_drop(struct sk_buff *skb, - struct net_device *master) -{ - if (master) - return __skb_bond_should_drop(skb, master); - return 0; -} - extern struct pernet_operations __net_initdata loopback_net_ops; static inline int dev_ethtool_get_settings(struct net_device *dev, -- cgit v1.2.3 From 1765a575334f1a232c1478accdee5c7d19f4b3e3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 12 Feb 2011 06:48:36 +0000 Subject: net: make dev->master general dev->master is now tightly connected to bonding driver. This patch makes this pointer more general and ready to be used by others. - netdev_set_master() - bond specifics moved to new function netdev_set_bond_master() - introduced netif_is_bond_slave() to check if device is a bonding slave Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5a5baeaaa50f..5a42b1003767 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2377,6 +2377,8 @@ extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); +extern int netdev_set_bond_master(struct net_device *dev, + struct net_device *master); extern int skb_checksum_help(struct sk_buff *skb); extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features); #ifdef CONFIG_BUG @@ -2437,6 +2439,11 @@ static inline void netif_set_gso_max_size(struct net_device *dev, dev->gso_max_size = size; } +static inline int netif_is_bond_slave(struct net_device *dev) +{ + return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; +} + extern struct pernet_operations __net_initdata loopback_net_ops; static inline int dev_ethtool_get_settings(struct net_device *dev, -- cgit v1.2.3 From fbaec0ea54f7d9131891ff98744e82c073ce03b1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 13 Feb 2011 10:15:37 +0000 Subject: rtnetlink: implement setting of master device This patch allows userspace to enslave/release slave devices via netlink interface using IFLA_MASTER. This introduces generic way to add/remove underling devices. Signed-off-by: Jiri Pirko Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5a42b1003767..d08ef6538579 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -783,6 +783,14 @@ struct netdev_tc_txq { * Set hardware filter for RFS. rxq_index is the target queue index; * flow_id is a flow ID to be passed to rps_may_expire_flow() later. * Return the filter ID on success, or a negative error code. + * + * Slave management functions (for bridge, bonding, etc). User should + * call netdev_set_master() to set dev->master properly. + * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev); + * Called to make another netdev an underling. + * + * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); + * Called to release previously enslaved netdev. */ #define HAVE_NET_DEVICE_OPS struct net_device_ops { @@ -862,6 +870,10 @@ struct net_device_ops { u16 rxq_index, u32 flow_id); #endif + int (*ndo_add_slave)(struct net_device *dev, + struct net_device *slave_dev); + int (*ndo_del_slave)(struct net_device *dev, + struct net_device *slave_dev); }; /* -- cgit v1.2.3 From d606ef3fe0c57504b8e534c58498f73a6abc049a Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 14 Feb 2011 02:05:33 +0000 Subject: phy/micrel: add ability to support 50MHz RMII clock on KZS8051RNL Platform code can now set the MICREL_PHY_50MHZ_CLK bit of dev_flags in a fixup routine (registered with phy_register_fixup_for_uid()), to make the KZS8051RNL PHY work with 50MHz RMII reference clock. Cc: David J. Choi Signed-off-by: Baruch Siach Signed-off-by: David S. Miller --- include/linux/micrel_phy.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/micrel_phy.h (limited to 'include/linux') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h new file mode 100644 index 000000000000..dd8da342a991 --- /dev/null +++ b/include/linux/micrel_phy.h @@ -0,0 +1,16 @@ +#ifndef _MICREL_PHY_H +#define _MICREL_PHY_H + +#define MICREL_PHY_ID_MASK 0x00fffff0 + +#define PHY_ID_KSZ9021 0x00221611 +#define PHY_ID_KS8737 0x00221720 +#define PHY_ID_KS8041 0x00221510 +#define PHY_ID_KS8051 0x00221550 +/* both for ks8001 Rev. A/B, and for ks8721 Rev 3. */ +#define PHY_ID_KS8001 0x0022161A + +/* struct phy_device dev_flags definitions */ +#define MICREL_PHY_50MHZ_CLK 0x00000001 + +#endif /* _MICREL_PHY_H */ -- cgit v1.2.3 From 212b573f5552c60265da721ff9ce32e3462a2cdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Feb 2011 16:59:16 +0000 Subject: ethtool: enable GSO and GRO by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d08ef6538579..168e3ad14daf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -984,6 +984,9 @@ struct net_device { NETIF_F_SG | NETIF_F_HIGHDMA | \ NETIF_F_FRAGLIST) + /* changeable features with no special hardware requirements */ +#define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) + /* Interface index. Unique device identifier */ int ifindex; int iflink; -- cgit v1.2.3 From 0a417704777ed29d0e8c72b7274a328e61248e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Feb 2011 16:59:17 +0000 Subject: ethtool: factorize get/set_one_feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows to enable GRO even if RX csum is disabled. GRO will not be used for packets without hardware checksum anyway. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 168e3ad14daf..dede3fdbb4be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -976,6 +976,12 @@ struct net_device { #define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) #define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) +#define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) + +#define NETIF_F_ALL_TX_OFFLOADS (NETIF_F_ALL_CSUM | NETIF_F_SG | \ + NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ + NETIF_F_SCTP_CSUM | NETIF_F_FCOE_CRC) + /* * If one device supports one of these features, then enable them * for all in netdev_increment_features. -- cgit v1.2.3 From 5455c6998d34dc983a8693500e4dffefc3682dc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Feb 2011 16:59:17 +0000 Subject: net: Introduce new feature setting ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduces a new framework to handle device features setting. It consists of: - new fields in struct net_device: + hw_features - features that hw/driver supports toggling + wanted_features - features that user wants enabled, when possible - new netdev_ops: + feat = ndo_fix_features(dev, feat) - API checking constraints for enabling features or their combinations + ndo_set_features(dev) - API updating hardware state to match changed dev->features - new ethtool commands: + ETHTOOL_GFEATURES/ETHTOOL_SFEATURES: get/set dev->wanted_features and trigger device reconfiguration if resulting dev->features changed + ETHTOOL_GSTRINGS(ETH_SS_FEATURES): get feature bits names (meaning) Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/ethtool.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/netdevice.h | 37 +++++++++++++++++++-- 2 files changed, 120 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 1908929204a9..806e716bb4fb 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -251,6 +251,7 @@ enum ethtool_stringset { ETH_SS_STATS, ETH_SS_PRIV_FLAGS, ETH_SS_NTUPLE_FILTERS, + ETH_SS_FEATURES, }; /* for passing string sets for data tagging */ @@ -523,6 +524,87 @@ struct ethtool_flash { char data[ETHTOOL_FLASH_MAX_FILENAME]; }; +/* for returning and changing feature sets */ + +/** + * struct ethtool_get_features_block - block with state of 32 features + * @available: mask of changeable features + * @requested: mask of features requested to be enabled if possible + * @active: mask of currently enabled features + * @never_changed: mask of features not changeable for any device + */ +struct ethtool_get_features_block { + __u32 available; + __u32 requested; + __u32 active; + __u32 never_changed; +}; + +/** + * struct ethtool_gfeatures - command to get state of device's features + * @cmd: command number = %ETHTOOL_GFEATURES + * @size: in: number of elements in the features[] array; + * out: number of elements in features[] needed to hold all features + * @features: state of features + */ +struct ethtool_gfeatures { + __u32 cmd; + __u32 size; + struct ethtool_get_features_block features[0]; +}; + +/** + * struct ethtool_set_features_block - block with request for 32 features + * @valid: mask of features to be changed + * @requested: values of features to be changed + */ +struct ethtool_set_features_block { + __u32 valid; + __u32 requested; +}; + +/** + * struct ethtool_sfeatures - command to request change in device's features + * @cmd: command number = %ETHTOOL_SFEATURES + * @size: array size of the features[] array + * @features: feature change masks + */ +struct ethtool_sfeatures { + __u32 cmd; + __u32 size; + struct ethtool_set_features_block features[0]; +}; + +/* + * %ETHTOOL_SFEATURES changes features present in features[].valid to the + * values of corresponding bits in features[].requested. Bits in .requested + * not set in .valid or not changeable are ignored. + * + * Returns %EINVAL when .valid contains undefined or never-changable bits + * or size is not equal to required number of features words (32-bit blocks). + * Returns >= 0 if request was completed; bits set in the value mean: + * %ETHTOOL_F_UNSUPPORTED - there were bits set in .valid that are not + * changeable (not present in %ETHTOOL_GFEATURES' features[].available) + * those bits were ignored. + * %ETHTOOL_F_WISH - some or all changes requested were recorded but the + * resulting state of bits masked by .valid is not equal to .requested. + * Probably there are other device-specific constraints on some features + * in the set. When %ETHTOOL_F_UNSUPPORTED is set, .valid is considered + * here as though ignored bits were cleared. + * + * Meaning of bits in the masks are obtained by %ETHTOOL_GSSET_INFO (number of + * bits in the arrays - always multiple of 32) and %ETHTOOL_GSTRINGS commands + * for ETH_SS_FEATURES string set. First entry in the table corresponds to least + * significant bit in features[0] fields. Empty strings mark undefined features. + */ +enum ethtool_sfeatures_retval_bits { + ETHTOOL_F_UNSUPPORTED__BIT, + ETHTOOL_F_WISH__BIT, +}; + +#define ETHTOOL_F_UNSUPPORTED (1 << ETHTOOL_F_UNSUPPORTED__BIT) +#define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT) + #ifdef __KERNEL__ #include @@ -744,6 +826,9 @@ struct ethtool_ops { #define ETHTOOL_GRXFHINDIR 0x00000038 /* Get RX flow hash indir'n table */ #define ETHTOOL_SRXFHINDIR 0x00000039 /* Set RX flow hash indir'n table */ +#define ETHTOOL_GFEATURES 0x0000003a /* Get device offload settings */ +#define ETHTOOL_SFEATURES 0x0000003b /* Change device offload settings */ + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dede3fdbb4be..85f67e225f60 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -791,6 +791,18 @@ struct netdev_tc_txq { * * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to release previously enslaved netdev. + * + * Feature/offload setting functions. + * u32 (*ndo_fix_features)(struct net_device *dev, u32 features); + * Adjusts the requested feature flags according to device-specific + * constraints, and returns the resulting flags. Must not modify + * the device state. + * + * int (*ndo_set_features)(struct net_device *dev, u32 features); + * Called to update device configuration to new features. Passed + * feature set might be less than what was returned by ndo_fix_features()). + * Must return >0 or -errno if it changed dev->features itself. + * */ #define HAVE_NET_DEVICE_OPS struct net_device_ops { @@ -874,6 +886,10 @@ struct net_device_ops { struct net_device *slave_dev); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); + u32 (*ndo_fix_features)(struct net_device *dev, + u32 features); + int (*ndo_set_features)(struct net_device *dev, + u32 features); }; /* @@ -925,12 +941,18 @@ struct net_device { struct list_head napi_list; struct list_head unreg_list; - /* Net device features */ + /* currently active device features */ u32 features; - + /* user-changeable features */ + u32 hw_features; + /* user-requested features */ + u32 wanted_features; /* VLAN feature mask */ u32 vlan_features; + /* Net device feature bits; if you change something, + * also update netdev_features_strings[] in ethtool.c */ + #define NETIF_F_SG 1 /* Scatter/gather IO. */ #define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ #define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ @@ -966,6 +988,12 @@ struct net_device { #define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT) #define NETIF_F_FSO (SKB_GSO_FCOE << NETIF_F_GSO_SHIFT) + /* Features valid for ethtool to change */ + /* = all defined minus driver/device-class-related */ +#define NETIF_F_NEVER_CHANGE (NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \ + NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) +#define NETIF_F_ETHTOOL_BITS (0x1f3fffff & ~NETIF_F_NEVER_CHANGE) + /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ NETIF_F_TSO6 | NETIF_F_UFO) @@ -2428,8 +2456,13 @@ extern char *netdev_drivername(const struct net_device *dev, char *buffer, int l extern void linkwatch_run_queue(void); +static inline u32 netdev_get_wanted_features(struct net_device *dev) +{ + return (dev->features & ~dev->hw_features) | dev->wanted_features; +} u32 netdev_increment_features(u32 all, u32 one, u32 mask); u32 netdev_fix_features(struct net_device *dev, u32 features); +void netdev_update_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); -- cgit v1.2.3 From e83d360d9a7e5d71d55c13e96b19109a2ea23bf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Feb 2011 16:59:18 +0000 Subject: net: introduce NETIF_F_RXCSUM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce NETIF_F_RXCSUM to replace device-private flags for RX checksum offload. Integrate it with ndo_fix_features. ethtool_op_get_rx_csum() is removed altogether as nothing in-tree uses it. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/ethtool.h | 1 - include/linux/netdevice.h | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 806e716bb4fb..54d776c2c1b5 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -625,7 +625,6 @@ struct net_device; /* Some generic methods drivers may use in their ethtool_ops */ u32 ethtool_op_get_link(struct net_device *dev); -u32 ethtool_op_get_rx_csum(struct net_device *dev); u32 ethtool_op_get_tx_csum(struct net_device *dev); int ethtool_op_set_tx_csum(struct net_device *dev, u32 data); int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 85f67e225f60..ffe56c16df8a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -977,6 +977,7 @@ struct net_device { #define NETIF_F_FCOE_MTU (1 << 26) /* Supports max FCoE MTU, 2158 bytes*/ #define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */ #define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */ +#define NETIF_F_RXCSUM (1 << 29) /* Receive checksumming offload */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -992,7 +993,7 @@ struct net_device { /* = all defined minus driver/device-class-related */ #define NETIF_F_NEVER_CHANGE (NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \ NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) -#define NETIF_F_ETHTOOL_BITS (0x1f3fffff & ~NETIF_F_NEVER_CHANGE) +#define NETIF_F_ETHTOOL_BITS (0x3f3fffff & ~NETIF_F_NEVER_CHANGE) /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ @@ -2510,6 +2511,8 @@ static inline int dev_ethtool_get_settings(struct net_device *dev, static inline u32 dev_ethtool_get_rx_csum(struct net_device *dev) { + if (dev->hw_features & NETIF_F_RXCSUM) + return !!(dev->features & NETIF_F_RXCSUM); if (!dev->ethtool_ops || !dev->ethtool_ops->get_rx_csum) return 0; return dev->ethtool_ops->get_rx_csum(dev); -- cgit v1.2.3 From fd23c3b31107e2fc483301ee923d8a1db14e53f4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 Feb 2011 12:42:28 -0800 Subject: ipv4: Add hash table of interface addresses. This will be used to optimize __ip_dev_find() and friends. With help from Eric Dumazet. Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ae8fdc54e0c0..5f8146695b7f 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -144,6 +144,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY) struct in_ifaddr { + struct hlist_node hash; struct in_ifaddr *ifa_next; struct in_device *ifa_dev; struct rcu_head rcu_head; -- cgit v1.2.3 From 8b2988c13da00ac9d03f1764fdb26180c188f9e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 16 Feb 2011 13:58:26 +0100 Subject: ssb: remove invalid define SSB_TMSLOW_PHYCLK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was incorrectly introduced in d2730b2a6a019d14455556019d744ab051e6554b. We have already fixed function to use correct define, but forgot remove old one. Signed-off-by: Rafał Miłecki Cc: Gábor Stefanik Signed-off-by: John W. Linville --- include/linux/ssb/ssb_regs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index df9211a84634..9b1125bea1fc 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -97,7 +97,6 @@ #define SSB_TMSLOW_RESET 0x00000001 /* Reset */ #define SSB_TMSLOW_REJECT_22 0x00000002 /* Reject (Backplane rev 2.2) */ #define SSB_TMSLOW_REJECT_23 0x00000004 /* Reject (Backplane rev 2.3) */ -#define SSB_TMSLOW_PHYCLK 0x00000010 /* MAC PHY Clock Control Enable */ #define SSB_TMSLOW_CLOCK 0x00010000 /* Clock Enable */ #define SSB_TMSLOW_FGC 0x00020000 /* Force Gated Clocks On */ #define SSB_TMSLOW_PE 0x40000000 /* Power Management Enable */ -- cgit v1.2.3 From b1a1bcf714c4d79f7872a34138d100941ebb0a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 17 Feb 2011 01:50:50 +0100 Subject: ssb: when needed, reject IM input while disabling device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville --- include/linux/ssb/ssb_regs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index 9b1125bea1fc..402955ae48ce 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -85,6 +85,8 @@ #define SSB_IMSTATE_AP_RSV 0x00000030 /* Reserved */ #define SSB_IMSTATE_IBE 0x00020000 /* In Band Error */ #define SSB_IMSTATE_TO 0x00040000 /* Timeout */ +#define SSB_IMSTATE_BUSY 0x01800000 /* Busy (Backplane rev >= 2.3 only) */ +#define SSB_IMSTATE_REJECT 0x02000000 /* Reject (Backplane rev >= 2.3 only) */ #define SSB_INTVEC 0x0F94 /* SB Interrupt Mask */ #define SSB_INTVEC_PCI 0x00000001 /* Enable interrupts for PCI */ #define SSB_INTVEC_ENET0 0x00000002 /* Enable interrupts for enet 0 */ -- cgit v1.2.3 From eaefd1105bc431ef329599e307a07f2a36ae7872 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Feb 2011 03:26:36 +0000 Subject: net: add __rcu annotations to sk_wq and wq Add proper RCU annotations/verbs to sk_wq and wq members Fix __sctp_write_space() sk_sleep() abuse (and sock->wq access) Fix sunrpc sk_sleep() abuse too Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/net.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 16faa130088c..94de83c0f877 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -118,6 +118,7 @@ enum sock_shutdown_cmd { }; struct socket_wq { + /* Note: wait MUST be first field of socket_wq */ wait_queue_head_t wait; struct fasync_struct *fasync_list; struct rcu_head rcu; @@ -142,7 +143,7 @@ struct socket { unsigned long flags; - struct socket_wq *wq; + struct socket_wq __rcu *wq; struct file *file; struct sock *sk; -- cgit v1.2.3 From e33f770426674a565a188042caf3f974f8b3722d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:13:15 -0800 Subject: xfrm: Mark flowi arg to security_xfrm_state_pol_flow_match() const. Signed-off-by: David S. Miller --- include/linux/security.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index b2b7f9749f5e..9b5f184a7f65 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1623,7 +1623,7 @@ struct security_operations { int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int (*xfrm_state_pol_flow_match) (struct xfrm_state *x, struct xfrm_policy *xp, - struct flowi *fl); + const struct flowi *fl); int (*xfrm_decode_session) (struct sk_buff *skb, u32 *secid, int ckall); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ @@ -2761,7 +2761,8 @@ int security_xfrm_state_delete(struct xfrm_state *x); void security_xfrm_state_free(struct xfrm_state *x); int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int security_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, struct flowi *fl); + struct xfrm_policy *xp, + const struct flowi *fl); int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid); void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl); @@ -2813,7 +2814,7 @@ static inline int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_s } static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, struct flowi *fl) + struct xfrm_policy *xp, const struct flowi *fl) { return 1; } -- cgit v1.2.3 From e13e02a3c68d899169c78d9a18689bd73491d59a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Feb 2011 10:56:17 +0000 Subject: net_sched: SFB flow scheduler This is the Stochastic Fair Blue scheduler, based on work from : W. Feng, D. Kandlur, D. Saha, K. Shin. Blue: A New Class of Active Queue Management Algorithms. U. Michigan CSE-TR-387-99, April 1999. http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf This implementation is based on work done by Juliusz Chroboczek General SFB algorithm can be found in figure 14, page 15: B[l][n] : L x N array of bins (L levels, N bins per level) enqueue() Calculate hash function values h{0}, h{1}, .. h{L-1} Update bins at each level for i = 0 to L - 1 if (B[i][h{i}].qlen > bin_size) B[i][h{i}].p_mark += p_increment; else if (B[i][h{i}].qlen == 0) B[i][h{i}].p_mark -= p_decrement; p_min = min(B[0][h{0}].p_mark ... B[L-1][h{L-1}].p_mark); if (p_min == 1.0) ratelimit(); else mark/drop with probabilty p_min; I did the adaptation of Juliusz code to meet current kernel standards, and various changes to address previous comments : http://thread.gmane.org/gmane.linux.network/90225 http://thread.gmane.org/gmane.linux.network/90375 Default flow classifier is the rxhash introduced by RPS in 2.6.35, but we can use an external flow classifier if wanted. tc qdisc add dev $DEV parent 1:11 handle 11: \ est 0.5sec 2sec sfb limit 128 tc filter add dev $DEV protocol ip parent 11: handle 3 \ flow hash keys dst divisor 1024 Notes: 1) SFB default child qdisc is pfifo_fast. It can be changed by another qdisc but a child qdisc MUST not drop a packet previously queued. This is because SFB needs to handle a dequeued packet in order to maintain its virtual queue states. pfifo_head_drop or CHOKe should not be used. 2) ECN is enabled by default, unlike RED/CHOKe/GRED With help from Patrick McHardy & Andi Kleen Signed-off-by: Eric Dumazet CC: Juliusz Chroboczek CC: Stephen Hemminger CC: Patrick McHardy CC: Andi Kleen CC: John W. Linville Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index d4bb6f58c90c..5afee2b238bd 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -522,4 +522,43 @@ struct tc_mqprio_qopt { __u16 offset[TC_QOPT_MAX_QUEUE]; }; +/* SFB */ + +enum { + TCA_SFB_UNSPEC, + TCA_SFB_PARMS, + __TCA_SFB_MAX, +}; + +#define TCA_SFB_MAX (__TCA_SFB_MAX - 1) + +/* + * Note: increment, decrement are Q0.16 fixed-point values. + */ +struct tc_sfb_qopt { + __u32 rehash_interval; /* delay between hash move, in ms */ + __u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */ + __u32 max; /* max len of qlen_min */ + __u32 bin_size; /* maximum queue length per bin */ + __u32 increment; /* probability increment, (d1 in Blue) */ + __u32 decrement; /* probability decrement, (d2 in Blue) */ + __u32 limit; /* max SFB queue length */ + __u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */ + __u32 penalty_burst; +}; + +struct tc_sfb_xstats { + __u32 earlydrop; + __u32 penaltydrop; + __u32 bucketdrop; + __u32 queuedrop; + __u32 childdrop; /* drops in child qdisc */ + __u32 marked; + __u32 maxqlen; + __u32 maxprob; + __u32 avgprob; +}; + +#define SFB_MAX_PROB 0xFFFF + #endif -- cgit v1.2.3 From 39fc0ce5710c53bad14aaba1a789eec810c556f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 22 Feb 2011 16:52:29 +0000 Subject: net: Implement SFEATURES compatibility for not updated drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use discrete setting ops for not updated drivers. This will not make them conform to full G/SFEATURES semantics, though. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/ethtool.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 54d776c2c1b5..aac3e2eeb4fd 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -591,6 +591,9 @@ struct ethtool_sfeatures { * Probably there are other device-specific constraints on some features * in the set. When %ETHTOOL_F_UNSUPPORTED is set, .valid is considered * here as though ignored bits were cleared. + * %ETHTOOL_F_COMPAT - some or all changes requested were made by calling + * compatibility functions. Requested offload state cannot be properly + * managed by kernel. * * Meaning of bits in the masks are obtained by %ETHTOOL_GSSET_INFO (number of * bits in the arrays - always multiple of 32) and %ETHTOOL_GSTRINGS commands @@ -600,10 +603,12 @@ struct ethtool_sfeatures { enum ethtool_sfeatures_retval_bits { ETHTOOL_F_UNSUPPORTED__BIT, ETHTOOL_F_WISH__BIT, + ETHTOOL_F_COMPAT__BIT, }; #define ETHTOOL_F_UNSUPPORTED (1 << ETHTOOL_F_UNSUPPORTED__BIT) #define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT) +#define ETHTOOL_F_COMPAT (1 << ETHTOOL_F_COMPAT__BIT) #ifdef __KERNEL__ -- cgit v1.2.3 From 5413b4c6c07b659e52c84a4e40d897b32b89834f Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 18 Jan 2011 13:24:55 -0500 Subject: tipc: Improve handling of invalid link tolerance values Enhances TIPC link code to ignore an invalid link tolerance value contained in an incoming LINK_PROTOCOL message, rather than processing the value and potentially causing a divide-by-zero error. Also add a compile-time check that catches attempts to redefine TIPC's minimum link tolerance value in a manner that might result in the same divide-by-zero error at run-time. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- include/linux/tipc_config.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h index 7d42460a5e3c..c14102dee22e 100644 --- a/include/linux/tipc_config.h +++ b/include/linux/tipc_config.h @@ -193,6 +193,10 @@ #define TIPC_DEF_LINK_TOL 1500 #define TIPC_MAX_LINK_TOL 30000 +#if (TIPC_MIN_LINK_TOL < 16) +#error "TIPC_MIN_LINK_TOL is too small (abort limit may be NaN)" +#endif + /* * Link window limits (min, default, max), in packets */ -- cgit v1.2.3 From 77c81e0bb8af3f1a0e5d84dd0346fe57dfe3da27 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 18 Jan 2011 13:37:09 -0500 Subject: tipc: Clean out all remaining instances of #if 0'd unused code Remove all instances of legacy or proposed-but-not-implemented code that lives within an #if 0 ... #endif block. If some of it is needed in the future it can recovered out of history, but there is no need for it to clutter up the active code base. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- include/linux/tipc.h | 8 +------- include/linux/tipc_config.h | 28 +--------------------------- 2 files changed, 2 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tipc.h b/include/linux/tipc.h index 1eefa3f6d1f4..a5b994a204d2 100644 --- a/include/linux/tipc.h +++ b/include/linux/tipc.h @@ -2,7 +2,7 @@ * include/linux/tipc.h: Include file for TIPC socket interface * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -130,12 +130,6 @@ static inline unsigned int tipc_node(__u32 addr) #define TIPC_SUB_PORTS 0x01 /* filter for port availability */ #define TIPC_SUB_SERVICE 0x02 /* filter for service availability */ #define TIPC_SUB_CANCEL 0x04 /* cancel a subscription */ -#if 0 -/* The following filter options are not currently implemented */ -#define TIPC_SUB_NO_BIND_EVTS 0x04 /* filter out "publish" events */ -#define TIPC_SUB_NO_UNBIND_EVTS 0x08 /* filter out "withdraw" events */ -#define TIPC_SUB_SINGLE_EVT 0x10 /* expire after first event */ -#endif #define TIPC_WAIT_FOREVER (~0) /* timeout for permanent subscription */ diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h index c14102dee22e..011556fcef04 100644 --- a/include/linux/tipc_config.h +++ b/include/linux/tipc_config.h @@ -2,7 +2,7 @@ * include/linux/tipc_config.h: Include file for TIPC configuration interface * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -76,13 +76,6 @@ #define TIPC_CMD_SHOW_LINK_STATS 0x000B /* tx link_name, rx ultra_string */ #define TIPC_CMD_SHOW_STATS 0x000F /* tx unsigned, rx ultra_string */ -#if 0 -#define TIPC_CMD_SHOW_PORT_STATS 0x0008 /* tx port_ref, rx ultra_string */ -#define TIPC_CMD_RESET_PORT_STATS 0x0009 /* tx port_ref, rx none */ -#define TIPC_CMD_GET_ROUTES 0x000A /* tx ?, rx ? */ -#define TIPC_CMD_GET_LINK_PEER 0x000D /* tx link_name, rx ? */ -#endif - /* * Protected commands: * May only be issued by "network administration capable" process. @@ -109,13 +102,6 @@ #define TIPC_CMD_DUMP_LOG 0x410B /* tx none, rx ultra_string */ #define TIPC_CMD_RESET_LINK_STATS 0x410C /* tx link_name, rx none */ -#if 0 -#define TIPC_CMD_CREATE_LINK 0x4103 /* tx link_create, rx none */ -#define TIPC_CMD_REMOVE_LINK 0x4104 /* tx link_name, rx none */ -#define TIPC_CMD_BLOCK_LINK 0x4105 /* tx link_name, rx none */ -#define TIPC_CMD_UNBLOCK_LINK 0x4106 /* tx link_name, rx none */ -#endif - /* * Private commands: * May only be issued by "network administration capable" process. @@ -123,9 +109,6 @@ */ #define TIPC_CMD_SET_NODE_ADDR 0x8001 /* tx net_addr, rx none */ -#if 0 -#define TIPC_CMD_SET_ZONE_MASTER 0x8002 /* tx none, rx none */ -#endif #define TIPC_CMD_SET_REMOTE_MNG 0x8003 /* tx unsigned, rx none */ #define TIPC_CMD_SET_MAX_PORTS 0x8004 /* tx unsigned, rx none */ #define TIPC_CMD_SET_MAX_PUBL 0x8005 /* tx unsigned, rx none */ @@ -251,15 +234,6 @@ struct tipc_name_table_query { #define TIPC_CFG_NOT_SUPPORTED "\x84" /* request is not supported by TIPC */ #define TIPC_CFG_INVALID_VALUE "\x85" /* request has invalid argument value */ -#if 0 -/* prototypes TLV structures for proposed commands */ -struct tipc_link_create { - __u32 domain; - struct tipc_media_addr peer_addr; - char bearer_name[TIPC_MAX_BEARER_NAME]; -}; -#endif - /* * A TLV consists of a descriptor, followed by the TLV value. * TLV descriptor fields are stored in network byte order; -- cgit v1.2.3 From df173bda2639ac744ccf596ec1f8f7e66fe4c343 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 23 Feb 2011 13:04:19 +0000 Subject: netem: define NETEM_DIST_MAX Rather than magic constant in code, expose the maximum size of packet distribution table in API. In iproute2, q_netem defines MAX_DIST as 16K already. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 5afee2b238bd..891382268fe1 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -495,6 +495,7 @@ struct tc_netem_corrupt { }; #define NETEM_DIST_SCALE 8192 +#define NETEM_DIST_MAX 16384 /* DRR */ -- cgit v1.2.3 From 661b79725fea030803a89a16cda506bac8eeca78 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 23 Feb 2011 13:04:21 +0000 Subject: netem: revised correlated loss generator This is a patch originated with Stefano Salsano and Fabio Ludovici. It provides several alternative loss models for use with netem. This patch adds two state machine based loss models. See: http://netgroup.uniroma2.it/twiki/bin/view.cgi/Main/NetemCLG Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 891382268fe1..b1032a3fafdc 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -464,6 +464,7 @@ enum { TCA_NETEM_DELAY_DIST, TCA_NETEM_REORDER, TCA_NETEM_CORRUPT, + TCA_NETEM_LOSS, __TCA_NETEM_MAX, }; @@ -494,6 +495,31 @@ struct tc_netem_corrupt { __u32 correlation; }; +enum { + NETEM_LOSS_UNSPEC, + NETEM_LOSS_GI, /* General Intuitive - 4 state model */ + NETEM_LOSS_GE, /* Gilbert Elliot models */ + __NETEM_LOSS_MAX +}; +#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1) + +/* State transition probablities for 4 state model */ +struct tc_netem_gimodel { + __u32 p13; + __u32 p31; + __u32 p32; + __u32 p14; + __u32 p23; +}; + +/* Gilbert-Elliot models */ +struct tc_netem_gemodel { + __u32 p; + __u32 r; + __u32 h; + __u32 k1; +}; + #define NETEM_DIST_SCALE 8192 #define NETEM_DIST_MAX 16384 -- cgit v1.2.3 From 080e4130b1fb6a02e75149a1cccc8192e734713d Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Thu, 17 Feb 2011 23:43:33 +0000 Subject: netpoll: remove IFF_IN_NETPOLL flag V4: rebase to net-next-2.6 This patch removes the flag IFF_IN_NETPOLL, we don't need it any more since we have netpoll_tx_running() now. Signed-off-by: WANG Cong Acked-by: Neil Horman Cc: Herbert Xu Signed-off-by: David S. Miller --- include/linux/if.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if.h b/include/linux/if.h index 123959927745..3bc63e6a02f7 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -71,11 +71,10 @@ * release skb->dst */ #define IFF_DONT_BRIDGE 0x800 /* disallow bridging this ether dev */ -#define IFF_IN_NETPOLL 0x1000 /* whether we are processing netpoll */ -#define IFF_DISABLE_NETPOLL 0x2000 /* disable netpoll at run-time */ -#define IFF_MACVLAN_PORT 0x4000 /* device used as macvlan port */ -#define IFF_BRIDGE_PORT 0x8000 /* device used as bridge port */ -#define IFF_OVS_DATAPATH 0x10000 /* device used as Open vSwitch +#define IFF_DISABLE_NETPOLL 0x1000 /* disable netpoll at run-time */ +#define IFF_MACVLAN_PORT 0x2000 /* device used as macvlan port */ +#define IFF_BRIDGE_PORT 0x4000 /* device used as bridge port */ +#define IFF_OVS_DATAPATH 0x8000 /* device used as Open vSwitch * datapath port */ #define IF_GET_IFACE 0x0001 /* for querying only */ -- cgit v1.2.3 From c8dcfd8a046c1f49af0c15726761af17b957962d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 27 Feb 2011 22:08:00 +0100 Subject: cfg80211: add a field for the bitrate of the last rx data packet from a station Also fix a typo in the STATION_INFO_TX_BITRATE description Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 821ffb954f14..30022189104d 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1243,6 +1243,8 @@ enum nl80211_rate_info { * @NL80211_STA_INFO_LLID: the station's mesh LLID * @NL80211_STA_INFO_PLID: the station's mesh PLID * @NL80211_STA_INFO_PLINK_STATE: peer link state for the station + * @NL80211_STA_INFO_RX_BITRATE: last unicast data frame rx rate, nested + * attribute, like NL80211_STA_INFO_TX_BITRATE. * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -1261,6 +1263,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_TX_RETRIES, NL80211_STA_INFO_TX_FAILED, NL80211_STA_INFO_SIGNAL_AVG, + NL80211_STA_INFO_RX_BITRATE, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, -- cgit v1.2.3 From 23b41168fc942a4a041325a04ecc1bd17d031a3e Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Sat, 26 Feb 2011 22:39:12 +0000 Subject: netdevice: make initial group visible to userspace INIT_NETDEV_GROUP is needed by userspace, move it outside __KERNEL__ guards. Signed-off-by: Vlad Dogaru Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ffe56c16df8a..8be4056ad251 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -75,9 +75,6 @@ struct wireless_dev; #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ #define NET_RX_DROP 1 /* packet dropped */ -/* Initial net device group. All devices belong to group 0 by default. */ -#define INIT_NETDEV_GROUP 0 - /* * Transmit return codes: transmit return codes originate from three different * namespaces: @@ -141,6 +138,9 @@ static inline bool dev_xmit_complete(int rc) #define MAX_ADDR_LEN 32 /* Largest hardware address length */ +/* Initial net device group. All devices belong to group 0 by default. */ +#define INIT_NETDEV_GROUP 0 + #ifdef __KERNEL__ /* * Compute the worst case header length according to the protocols -- cgit v1.2.3 From eed84713bc47ce2f7d675914f297ad9b6227a587 Mon Sep 17 00:00:00 2001 From: Shmulik Ravid Date: Sun, 27 Feb 2011 05:04:31 +0000 Subject: dcbnl: add support for retrieving peer configuration - ieee These 2 patches add the support for retrieving the remote or peer DCBX configuration via dcbnl for embedded DCBX stacks. The peer configuration is part of the DCBX MIB and is useful for debugging and diagnostics of the overall DCB configuration. The first patch add this support for IEEE 802.1Qaz standard the second patch add the same support for the older CEE standard. Diff for v2 - the peer-app-info is CEE specific. Signed-off-by: Shmulik Ravid Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 4c5b26e0cc48..2542685f8b3e 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -110,6 +110,20 @@ struct dcb_app { __u16 protocol; }; +/** + * struct dcb_peer_app_info - APP feature information sent by the peer + * + * @willing: willing bit in the peer APP tlv + * @error: error bit in the peer APP tlv + * + * In addition to this information the full peer APP tlv also contains + * a table of 'app_count' APP objects defined above. + */ +struct dcb_peer_app_info { + __u8 willing; + __u8 error; +}; + struct dcbmsg { __u8 dcb_family; __u8 cmd; @@ -235,11 +249,25 @@ enum dcbnl_attrs { DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, }; +/** + * enum ieee_attrs - IEEE 802.1Qaz get/set attributes + * + * @DCB_ATTR_IEEE_UNSPEC: unspecified + * @DCB_ATTR_IEEE_ETS: negotiated ETS configuration + * @DCB_ATTR_IEEE_PFC: negotiated PFC configuration + * @DCB_ATTR_IEEE_APP_TABLE: negotiated APP configuration + * @DCB_ATTR_IEEE_PEER_ETS: peer ETS configuration - get only + * @DCB_ATTR_IEEE_PEER_PFC: peer PFC configuration - get only + * @DCB_ATTR_IEEE_PEER_APP: peer APP tlv - get only + */ enum ieee_attrs { DCB_ATTR_IEEE_UNSPEC, DCB_ATTR_IEEE_ETS, DCB_ATTR_IEEE_PFC, DCB_ATTR_IEEE_APP_TABLE, + DCB_ATTR_IEEE_PEER_ETS, + DCB_ATTR_IEEE_PEER_PFC, + DCB_ATTR_IEEE_PEER_APP, __DCB_ATTR_IEEE_MAX }; #define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1) -- cgit v1.2.3 From dc6ed1df5a5f84e45e77e2acb6fd99b995414956 Mon Sep 17 00:00:00 2001 From: Shmulik Ravid Date: Sun, 27 Feb 2011 05:04:38 +0000 Subject: dcbnl: add support for retrieving peer configuration - cee This patch adds the support for retrieving the remote or peer DCBX configuration via dcbnl for embedded DCBX stacks supporting the CEE DCBX standard. Signed-off-by: Shmulik Ravid Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 2542685f8b3e..a3680a16718f 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -87,6 +87,45 @@ struct ieee_pfc { __u64 indications[IEEE_8021QAZ_MAX_TCS]; }; +/* CEE DCBX std supported values */ +#define CEE_DCBX_MAX_PGS 8 +#define CEE_DCBX_MAX_PRIO 8 + +/** + * struct cee_pg - CEE Prioity-Group managed object + * + * @willing: willing bit in the PG tlv + * @error: error bit in the PG tlv + * @pg_en: enable bit of the PG feature + * @tcs_supported: number of traffic classes supported + * @pg_bw: bandwidth percentage for each priority group + * @prio_pg: priority to PG mapping indexed by priority + */ +struct cee_pg { + __u8 willing; + __u8 error; + __u8 pg_en; + __u8 tcs_supported; + __u8 pg_bw[CEE_DCBX_MAX_PGS]; + __u8 prio_pg[CEE_DCBX_MAX_PGS]; +}; + +/** + * struct cee_pfc - CEE PFC managed object + * + * @willing: willing bit in the PFC tlv + * @error: error bit in the PFC tlv + * @pfc_en: bitmap indicating pfc enabled traffic classes + * @tcs_supported: number of traffic classes supported + */ +struct cee_pfc { + __u8 willing; + __u8 error; + __u8 pfc_en; + __u8 tcs_supported; +}; + + /* This structure contains the IEEE 802.1Qaz APP managed object. This * object is also used for the CEE std as well. There is no difference * between the objects. @@ -158,6 +197,7 @@ struct dcbmsg { * @DCB_CMD_SDCBX: set DCBX engine configuration * @DCB_CMD_GFEATCFG: get DCBX features flags * @DCB_CMD_SFEATCFG: set DCBX features negotiation flags + * @DCB_CMD_CEE_GET: get CEE aggregated configuration */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -200,6 +240,8 @@ enum dcbnl_commands { DCB_CMD_GFEATCFG, DCB_CMD_SFEATCFG, + DCB_CMD_CEE_GET, + __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, }; @@ -222,6 +264,7 @@ enum dcbnl_commands { * @DCB_ATTR_IEEE: IEEE 802.1Qaz supported attributes (NLA_NESTED) * @DCB_ATTR_DCBX: DCBX engine configuration in the device (NLA_U8) * @DCB_ATTR_FEATCFG: DCBX features flags (NLA_NESTED) + * @DCB_ATTR_CEE: CEE std supported attributes (NLA_NESTED) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -245,6 +288,9 @@ enum dcbnl_attrs { DCB_ATTR_DCBX, DCB_ATTR_FEATCFG, + /* CEE nested attributes */ + DCB_ATTR_CEE, + __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, }; @@ -279,6 +325,31 @@ enum ieee_attrs_app { }; #define DCB_ATTR_IEEE_APP_MAX (__DCB_ATTR_IEEE_APP_MAX - 1) +/** + * enum cee_attrs - CEE DCBX get attributes + * + * @DCB_ATTR_CEE_UNSPEC: unspecified + * @DCB_ATTR_CEE_PEER_PG: peer PG configuration - get only + * @DCB_ATTR_CEE_PEER_PFC: peer PFC configuration - get only + * @DCB_ATTR_CEE_PEER_APP: peer APP tlv - get only + */ +enum cee_attrs { + DCB_ATTR_CEE_UNSPEC, + DCB_ATTR_CEE_PEER_PG, + DCB_ATTR_CEE_PEER_PFC, + DCB_ATTR_CEE_PEER_APP_TABLE, + __DCB_ATTR_CEE_MAX +}; +#define DCB_ATTR_CEE_MAX (__DCB_ATTR_CEE_MAX - 1) + +enum peer_app_attr { + DCB_ATTR_CEE_PEER_APP_UNSPEC, + DCB_ATTR_CEE_PEER_APP_INFO, + DCB_ATTR_CEE_PEER_APP, + __DCB_ATTR_CEE_PEER_APP_MAX +}; +#define DCB_ATTR_CEE_PEER_APP_MAX (__DCB_ATTR_CEE_PEER_APP_MAX - 1) + /** * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs * -- cgit v1.2.3 From c53fa1ed92cd671a1dfb1e7569e9ab672612ddc6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 3 Mar 2011 10:55:40 -0800 Subject: netlink: kill loginuid/sessionid/sid members from struct netlink_skb_parms Netlink message processing in the kernel is synchronous these days, the session information can be collected when needed. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index e2b9e63afa68..66823b862022 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -161,9 +161,6 @@ struct netlink_skb_parms { __u32 pid; __u32 dst_group; kernel_cap_t eff_cap; - __u32 loginuid; /* Login (audit) uid */ - __u32 sessionid; /* Session id (audit) */ - __u32 sid; /* SELinux security id */ }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) -- cgit v1.2.3 From 01a16b21d6adf992aa863186c3c4e561a57c1714 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 3 Mar 2011 13:32:07 -0800 Subject: netlink: kill eff_cap from struct netlink_skb_parms Netlink message processing in the kernel is synchronous these days, capabilities can be checked directly in security_netlink_recv() from the current process. Signed-off-by: Patrick McHardy Reviewed-by: James Morris [chrisw: update to include pohmelfs and uvesafb] Signed-off-by: Chris Wright Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 66823b862022..4c4ac3f3ce5a 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -160,7 +160,6 @@ struct netlink_skb_parms { struct ucred creds; /* Skb credentials */ __u32 pid; __u32 dst_group; - kernel_cap_t eff_cap; }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) -- cgit v1.2.3 From 256ee435b9a9ee9cca69602fe8046b27ca99fbee Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 1 Mar 2011 07:06:12 +0000 Subject: netdevice: Convert printk to pr_info in netif_tx_stop_queue This allows any caller to be prefaced by any specific pr_fmt to better identify which device driver is using this function inappropriately. Add terminating newline. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8be4056ad251..71563e7b2bfb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1765,8 +1765,7 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { if (WARN_ON(!dev_queue)) { - printk(KERN_INFO "netif_stop_queue() cannot be called before " - "register_netdev()"); + pr_info("netif_stop_queue() cannot be called before register_netdev()\n"); return; } set_bit(__QUEUE_STATE_XOFF, &dev_queue->state); -- cgit v1.2.3 From 6247e086188dd2ba5bfd64f9a876fe42b0cf39fb Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Tue, 1 Feb 2011 07:22:06 +0000 Subject: net: add ndo_fcoe_ddp_target() to support FCoE DDP in target mode The Fiber Channel over Ethernet (FCoE) Direct Data Placement (DDP) can also be used for FCoE target, where the DDP used for read I/O on an initiator can be used on an FCoE target to speed up the write I/O to the target from the initiator. The added ndo_fcoe_ddp_target() works in the similar way as the existing ndo_fcoe_ddp_setup() to allow the underlying hardware set up the DDP context accordingly when it gets called from the FCoE target implementation on top the existing Open-FCoE fcoe/libfc protocol stack so without losing the ability to provide DDP for read I/O as an initiator, it can also provide DDP offload to the write I/O coming from the initiator as a target. Signed-off-by: Yi Zou Signed-off-by: Kiran Patil Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 71563e7b2bfb..6bd5d460b7c1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -871,6 +871,10 @@ struct net_device_ops { unsigned int sgc); int (*ndo_fcoe_ddp_done)(struct net_device *dev, u16 xid); + int (*ndo_fcoe_ddp_target)(struct net_device *dev, + u16 xid, + struct scatterlist *sgl, + unsigned int sgc); #define NETDEV_FCOE_WWNN 0 #define NETDEV_FCOE_WWPN 1 int (*ndo_fcoe_get_wwn)(struct net_device *dev, -- cgit v1.2.3 From acaf7df610ff3faf1778ce40d601fc3dd4a41b40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 8 Mar 2011 22:44:11 +0000 Subject: Phonet: provide pipe socket option to retrieve the pipe identifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-space sometimes needs this information. In particular, the GPRS context or the AT commands pipe setups may use the pipe handle as a reference. This removes the settable pipe handle with CONFIG_PHONET_PIPECTRLR. It did not handle error cases correctly. Furthermore, the kernel *could* implement a smart scheme for allocating handles (if ever needed), but userspace really cannot. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 26c8df786918..32a0965da953 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -36,7 +36,7 @@ /* Socket options for SOL_PNPIPE level */ #define PNPIPE_ENCAP 1 #define PNPIPE_IFINDEX 2 -#define PNPIPE_PIPE_HANDLE 3 +#define PNPIPE_HANDLE 3 #define PNPIPE_ENABLE 4 /* unused slot */ -- cgit v1.2.3 From a015f6f49968c330b236ca2f6c2170820414f922 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 8 Mar 2011 22:44:13 +0000 Subject: Phonet: kill the ST-Ericsson pipe controller Kconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is now a run-time choice so that a single kernel can support both old and new generation ISI modems. Support for manually enabling the pipe flow is removed as it did not work properly, does not fit well with the socket API, and I am not aware of any use at the moment. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 32a0965da953..6fb13841db45 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -37,8 +37,6 @@ #define PNPIPE_ENCAP 1 #define PNPIPE_IFINDEX 2 #define PNPIPE_HANDLE 3 -#define PNPIPE_ENABLE 4 -/* unused slot */ #define PNADDR_ANY 0 #define PNADDR_BROADCAST 0xFC -- cgit v1.2.3 From 80751e2b8ffcbbe065e850d943301aa1ab219599 Mon Sep 17 00:00:00 2001 From: Bing Zhao Date: Mon, 7 Mar 2011 11:14:23 -0800 Subject: ieee80211: add IEEE80211_COUNTRY_STRING_LEN definition and make use of it in wireless drivers Signed-off-by: Bing Zhao Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 294169e31364..2d1c6117d92c 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1325,6 +1325,9 @@ enum { /* Although the spec says 8 I'm seeing 6 in practice */ #define IEEE80211_COUNTRY_IE_MIN_LEN 6 +/* The Country String field of the element shall be 3 octets in length */ +#define IEEE80211_COUNTRY_STRING_LEN 3 + /* * For regulatory extension stuff see IEEE 802.11-2007 * Annex I (page 1141) and Annex J (page 1147). Also -- cgit v1.2.3 From 2f4e1b3970973bbb57cc3a3b9d67e67c1c648c37 Mon Sep 17 00:00:00 2001 From: Mario Schuknecht Date: Wed, 9 Mar 2011 14:08:09 -0800 Subject: tcp: ioctl type SIOCOUTQNSD returns amount of data not sent In contrast to SIOCOUTQ which returns the amount of data sent but not yet acknowledged plus data not yet sent this patch only returns the data not sent. For various methods of live streaming bitrate control it may be helpful to know how much data are in the tcp outqueue are not sent yet. Signed-off-by: Mario Schuknecht Signed-off-by: Steffen Sledz Signed-off-by: David S. Miller --- include/linux/sockios.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sockios.h b/include/linux/sockios.h index 241f179347d9..7997a506ad41 100644 --- a/include/linux/sockios.h +++ b/include/linux/sockios.h @@ -22,7 +22,7 @@ /* Linux-specific socket ioctls */ #define SIOCINQ FIONREAD -#define SIOCOUTQ TIOCOUTQ +#define SIOCOUTQ TIOCOUTQ /* output queue size (not sent + not acked) */ /* Routing table calls. */ #define SIOCADDRT 0x890B /* add routing table entry */ @@ -83,6 +83,8 @@ #define SIOCWANDEV 0x894A /* get/set netdev parameters */ +#define SIOCOUTQNSD 0x894B /* output queue size (not sent only) */ + /* ARP cache control calls. */ /* 0x8950 - 0x8952 * obsolete calls, don't re-use */ #define SIOCDARP 0x8953 /* delete ARP table entry */ -- cgit v1.2.3 From d4894f3ea7375dd9492b5d3d2ecb0b6e4bdb604e Mon Sep 17 00:00:00 2001 From: Matt Carlson Date: Wed, 9 Mar 2011 16:58:21 +0000 Subject: tg3: Add code to verify RODATA checksum of VPD This patch adds code to verify the checksum stored in the "RV" info keyword of the RODATA VPD section. Signed-off-by: Matt Carlson Reviewed-by: Michael Chan Signed-off-by: David S. Miller --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 559d02897075..ff5bccb87136 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1479,6 +1479,7 @@ void pci_request_acs(void); #define PCI_VPD_RO_KEYWORD_PARTNO "PN" #define PCI_VPD_RO_KEYWORD_MFR_ID "MN" #define PCI_VPD_RO_KEYWORD_VENDOR0 "V0" +#define PCI_VPD_RO_KEYWORD_CHKSUM "RV" /** * pci_vpd_lrdt_size - Extracts the Large Resource Data Type length -- cgit v1.2.3 From dbdd9a52e38a4a93adfa4d0278801cce4fad98eb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 Mar 2011 16:34:38 -0800 Subject: ipv4: Remove redundant RCU locking in ip_check_mc(). All callers are under rcu_read_lock() protection already. Rename to ip_check_mc_rcu() to make it even more clear. Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 74cfcff0148b..82de336b8155 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -217,7 +217,7 @@ struct ip_mc_list { #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value) #define IGMPV3_MRC(value) IGMPV3_EXP(0x80, 4, 3, value) -extern int ip_check_mc(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto); +extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto); extern int igmp_rcv(struct sk_buff *); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); -- cgit v1.2.3 From e9bce845c0cee1a492e5cee6a827ae71140fe8b3 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Wed, 9 Mar 2011 08:48:03 +0000 Subject: net: add proper documentation for previously added net_device_ops for FCoE Add proper documentation for previously added net_device_ops ops for FCoE. Signed-off-by: Yi Zou Signed-off-by: Jeff Kirsher --- include/linux/netdevice.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7a071535c4c0..604dbf5051d3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -777,6 +777,42 @@ struct netdev_tc_txq { * queues stopped. This allows the netdevice to perform queue management * safely. * + * Fiber Channel over Ethernet (FCoE) offload functions. + * int (*ndo_fcoe_enable)(struct net_device *dev); + * Called when the FCoE protocol stack wants to start using LLD for FCoE + * so the underlying device can perform whatever needed configuration or + * initialization to support acceleration of FCoE traffic. + * + * int (*ndo_fcoe_disable)(struct net_device *dev); + * Called when the FCoE protocol stack wants to stop using LLD for FCoE + * so the underlying device can perform whatever needed clean-ups to + * stop supporting acceleration of FCoE traffic. + * + * int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid, + * struct scatterlist *sgl, unsigned int sgc); + * Called when the FCoE Initiator wants to initialize an I/O that + * is a possible candidate for Direct Data Placement (DDP). The LLD can + * perform necessary setup and returns 1 to indicate the device is set up + * successfully to perform DDP on this I/O, otherwise this returns 0. + * + * int (*ndo_fcoe_ddp_done)(struct net_device *dev, u16 xid); + * Called when the FCoE Initiator/Target is done with the DDPed I/O as + * indicated by the FC exchange id 'xid', so the underlying device can + * clean up and reuse resources for later DDP requests. + * + * int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid, + * struct scatterlist *sgl, unsigned int sgc); + * Called when the FCoE Target wants to initialize an I/O that + * is a possible candidate for Direct Data Placement (DDP). The LLD can + * perform necessary setup and returns 1 to indicate the device is set up + * successfully to perform DDP on this I/O, otherwise this returns 0. + * + * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type); + * Called when the underlying device wants to override default World Wide + * Name (WWN) generation mechanism in FCoE protocol stack to pass its own + * World Wide Port Name (WWPN) or World Wide Node Name (WWNN) to the FCoE + * protocol stack to use. + * * RFS acceleration. * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb, * u16 rxq_index, u32 flow_id); -- cgit v1.2.3 From 4c9483b2fb5d2548c3cc1fe03cdd4484ceeb5d1c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 16:22:43 -0500 Subject: ipv6: Convert to use flowi6 where applicable. Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 4c4c74ec5987..ba45e6bc0764 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -183,10 +183,10 @@ extern void icmpv6_cleanup(void); extern void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos); -struct flowi; +struct flowi6; struct in6_addr; extern void icmpv6_flow_init(struct sock *sk, - struct flowi *fl, + struct flowi6 *fl6, u8 type, const struct in6_addr *saddr, const struct in6_addr *daddr, -- cgit v1.2.3 From f831c963b5c20bec230edce89e25f369996be5db Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Fri, 25 Feb 2011 14:22:11 -0500 Subject: tipc: Eliminate configuration for maximum number of cluster nodes Gets rid of the need for users to specify the maximum number of cluster nodes supported by TIPC. TIPC now automatically provides support for all 4K nodes allowed by its addressing scheme. Note: This change sets TIPC's memory usage to the amount used by a maximum size node table with 4K entries. An upcoming patch that converts the node table from a linear array to a hash table will compact the node table to a more efficient design, but for clarity it is nice to have all the Kconfig infrastruture go away separately. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- include/linux/tipc_config.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h index 011556fcef04..64aba5a0e9c7 100644 --- a/include/linux/tipc_config.h +++ b/include/linux/tipc_config.h @@ -89,7 +89,7 @@ #define TIPC_CMD_GET_MAX_SUBSCR 0x4006 /* tx none, rx unsigned */ #define TIPC_CMD_GET_MAX_ZONES 0x4007 /* obsoleted */ #define TIPC_CMD_GET_MAX_CLUSTERS 0x4008 /* obsoleted */ -#define TIPC_CMD_GET_MAX_NODES 0x4009 /* tx none, rx unsigned */ +#define TIPC_CMD_GET_MAX_NODES 0x4009 /* obsoleted */ #define TIPC_CMD_GET_MAX_SLAVES 0x400A /* obsoleted */ #define TIPC_CMD_GET_NETID 0x400B /* tx none, rx unsigned */ @@ -115,7 +115,7 @@ #define TIPC_CMD_SET_MAX_SUBSCR 0x8006 /* tx unsigned, rx none */ #define TIPC_CMD_SET_MAX_ZONES 0x8007 /* obsoleted */ #define TIPC_CMD_SET_MAX_CLUSTERS 0x8008 /* obsoleted */ -#define TIPC_CMD_SET_MAX_NODES 0x8009 /* tx unsigned, rx none */ +#define TIPC_CMD_SET_MAX_NODES 0x8009 /* obsoleted */ #define TIPC_CMD_SET_MAX_SLAVES 0x800A /* obsoleted */ #define TIPC_CMD_SET_NETID 0x800B /* tx unsigned, rx none */ -- cgit v1.2.3 From 50d3e6399a61fca53c5c440a79f71299db66b803 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 28 Feb 2011 14:56:15 -0500 Subject: tipc: Correct misnamed references to neighbor discovery domain Renames items that are improperly labelled as "network scope" items (which are represented by simple integer values) rather than "network domain" items (which are represented by -type network addresses). This change is purely cosmetic, and does not affect the operation of TIPC. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- include/linux/tipc_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h index 64aba5a0e9c7..0db239590b4d 100644 --- a/include/linux/tipc_config.h +++ b/include/linux/tipc_config.h @@ -202,7 +202,7 @@ struct tipc_link_info { struct tipc_bearer_config { __be32 priority; /* Range [1,31]. Override per link */ - __be32 detect_scope; + __be32 disc_domain; /* describing desired nodes */ char name[TIPC_MAX_BEARER_NAME]; }; -- cgit v1.2.3 From 9736acf395d3608583a7be70f62800b494fa103c Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 8 Mar 2011 00:05:43 +0000 Subject: xfrm: Add basic infrastructure to support IPsec extended sequence numbers This patch adds the struct xfrm_replay_state_esn which will be used to support IPsec extended sequence numbers and anti replay windows bigger than 32 packets. Also we add a function that returns the actual size of the xfrm_replay_state_esn, a xfrm netlink atribute and a xfrm state flag for the use of extended sequence numbers. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/xfrm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index b93d6f598085..22e61fdf75a2 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -84,6 +84,16 @@ struct xfrm_replay_state { __u32 bitmap; }; +struct xfrm_replay_state_esn { + unsigned int bmp_len; + __u32 oseq; + __u32 seq; + __u32 oseq_hi; + __u32 seq_hi; + __u32 replay_window; + __u32 bmp[0]; +}; + struct xfrm_algo { char alg_name[64]; unsigned int alg_key_len; /* in bits */ @@ -284,6 +294,7 @@ enum xfrm_attr_type_t { XFRMA_ALG_AUTH_TRUNC, /* struct xfrm_algo_auth */ XFRMA_MARK, /* struct xfrm_mark */ XFRMA_TFCPAD, /* __u32 */ + XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -351,6 +362,7 @@ struct xfrm_usersa_info { #define XFRM_STATE_ICMP 16 #define XFRM_STATE_AF_UNSPEC 32 #define XFRM_STATE_ALIGN4 64 +#define XFRM_STATE_ESN 128 }; struct xfrm_usersa_id { -- cgit v1.2.3 From 698e1d23cfc15312be2e7665014afd98c49ae9a1 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Mon, 14 Mar 2011 09:01:02 +0000 Subject: net: dcbnl: Update copyright dates Signed-off-by: Mark Rustad Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index a3680a16718f..97c90b953c31 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, Intel Corporation. + * Copyright (c) 2008-2011, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, -- cgit v1.2.3 From 171f20e93b9d44a846bd5a41a583fdae13d26cd7 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Mon, 14 Mar 2011 09:01:08 +0000 Subject: net: dcbnl: Fix misspellings Fix a few spelling errors in dcbnl.h. Signed-off-by: Mark Rustad Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 97c90b953c31..eedf79b7ef7e 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -26,13 +26,13 @@ #define IEEE_8021QAZ_MAX_TCS 8 #define IEEE_8021QAZ_TSA_STRICT 0 -#define IEEE_8021QAZ_TSA_CB_SHABER 1 +#define IEEE_8021QAZ_TSA_CB_SHAPER 1 #define IEEE_8021QAZ_TSA_ETS 2 #define IEEE_8021QAZ_TSA_VENDOR 255 /* This structure contains the IEEE 802.1Qaz ETS managed object * - * @willing: willing bit in ETS configuratin TLV + * @willing: willing bit in ETS configuration TLV * @ets_cap: indicates supported capacity of ets feature * @cbs: credit based shaper ets algorithm supported * @tc_tx_bw: tc tx bandwidth indexed by traffic class @@ -92,7 +92,7 @@ struct ieee_pfc { #define CEE_DCBX_MAX_PRIO 8 /** - * struct cee_pg - CEE Prioity-Group managed object + * struct cee_pg - CEE Priority-Group managed object * * @willing: willing bit in the PG tlv * @error: error bit in the PG tlv -- cgit v1.2.3 From 0c0217b016ba8a970a6f6ab62ad0d858f39881ca Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Mon, 14 Mar 2011 09:01:15 +0000 Subject: net: dcbnl: Add IEEE app selector value definitions This adds defines for the app selector values currently defined in the IEEE 802.1Qaz specification. Signed-off-by: Mark Rustad Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index eedf79b7ef7e..c52280047e2c 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -125,6 +125,11 @@ struct cee_pfc { __u8 tcs_supported; }; +/* IEEE 802.1Qaz std supported values */ +#define IEEE_8021QAZ_APP_SEL_ETHERTYPE 1 +#define IEEE_8021QAZ_APP_SEL_STREAM 2 +#define IEEE_8021QAZ_APP_SEL_DGRAM 3 +#define IEEE_8021QAZ_APP_SEL_ANY 4 /* This structure contains the IEEE 802.1Qaz APP managed object. This * object is also used for the CEE std as well. There is no difference -- cgit v1.2.3 From de81bbea17650769882bc625d6b5df11ee7c4b24 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Mar 2011 20:16:20 +0100 Subject: netfilter: ipt_addrtype: rename to xt_addrtype Followup patch will add ipv6 support. ipt_addrtype.h is retained for compatibility reasons, but no longer used by the kernel. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_addrtype.h | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 include/linux/netfilter/xt_addrtype.h (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 15e83bf3dd58..a1b410c76fc3 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -29,6 +29,7 @@ header-y += xt_TCPMSS.h header-y += xt_TCPOPTSTRIP.h header-y += xt_TEE.h header-y += xt_TPROXY.h +header-y += xt_addrtype.h header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h diff --git a/include/linux/netfilter/xt_addrtype.h b/include/linux/netfilter/xt_addrtype.h new file mode 100644 index 000000000000..b492fc84f737 --- /dev/null +++ b/include/linux/netfilter/xt_addrtype.h @@ -0,0 +1,27 @@ +#ifndef _XT_ADDRTYPE_H +#define _XT_ADDRTYPE_H + +#include + +enum { + XT_ADDRTYPE_INVERT_SOURCE = 0x0001, + XT_ADDRTYPE_INVERT_DEST = 0x0002, + XT_ADDRTYPE_LIMIT_IFACE_IN = 0x0004, + XT_ADDRTYPE_LIMIT_IFACE_OUT = 0x0008, +}; + +struct xt_addrtype_info_v1 { + __u16 source; /* source-type mask */ + __u16 dest; /* dest-type mask */ + __u32 flags; +}; + +/* revision 0 */ +struct xt_addrtype_info { + __u16 source; /* source-type mask */ + __u16 dest; /* dest-type mask */ + __u32 invert_source; + __u32 invert_dest; +}; + +#endif -- cgit v1.2.3 From 2f5dc63123905a89d4260ab8ee08d19ec104db04 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Mar 2011 20:17:44 +0100 Subject: netfilter: xt_addrtype: ipv6 support The kernel will refuse certain types that do not work in ipv6 mode. We can then add these features incrementally without risk of userspace breakage. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_addrtype.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_addrtype.h b/include/linux/netfilter/xt_addrtype.h index b492fc84f737..b156baa9d55e 100644 --- a/include/linux/netfilter/xt_addrtype.h +++ b/include/linux/netfilter/xt_addrtype.h @@ -10,6 +10,23 @@ enum { XT_ADDRTYPE_LIMIT_IFACE_OUT = 0x0008, }; + +/* rtn_type enum values from rtnetlink.h, but shifted */ +enum { + XT_ADDRTYPE_UNSPEC = 1 << 0, + XT_ADDRTYPE_UNICAST = 1 << 1, /* 1 << RTN_UNICAST */ + XT_ADDRTYPE_LOCAL = 1 << 2, /* 1 << RTN_LOCAL, etc */ + XT_ADDRTYPE_BROADCAST = 1 << 3, + XT_ADDRTYPE_ANYCAST = 1 << 4, + XT_ADDRTYPE_MULTICAST = 1 << 5, + XT_ADDRTYPE_BLACKHOLE = 1 << 6, + XT_ADDRTYPE_UNREACHABLE = 1 << 7, + XT_ADDRTYPE_PROHIBIT = 1 << 8, + XT_ADDRTYPE_THROW = 1 << 9, + XT_ADDRTYPE_NAT = 1 << 10, + XT_ADDRTYPE_XRESOLVE = 1 << 11, +}; + struct xt_addrtype_info_v1 { __u16 source; /* source-type mask */ __u16 dest; /* dest-type mask */ -- cgit v1.2.3 From 8a4eb5734e8d1dc60a8c28576bbbdfdcc643626d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 12 Mar 2011 03:14:39 +0000 Subject: net: introduce rx_handler results and logic around that MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch allows rx_handlers to better signalize what to do next to it's caller. That makes skb->deliver_no_wcard no longer needed. kernel-doc for rx_handler_result is taken from Nicolas' patch. Signed-off-by: Jiri Pirko Reviewed-by: Nicolas de Pesloüan Signed-off-by: David S. Miller --- include/linux/netdevice.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++- include/linux/skbuff.h | 5 +---- 2 files changed, 50 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 604dbf5051d3..5eeb2cd3631c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -390,7 +390,55 @@ enum gro_result { }; typedef enum gro_result gro_result_t; -typedef struct sk_buff *rx_handler_func_t(struct sk_buff *skb); +/* + * enum rx_handler_result - Possible return values for rx_handlers. + * @RX_HANDLER_CONSUMED: skb was consumed by rx_handler, do not process it + * further. + * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in + * case skb->dev was changed by rx_handler. + * @RX_HANDLER_EXACT: Force exact delivery, no wildcard. + * @RX_HANDLER_PASS: Do nothing, passe the skb as if no rx_handler was called. + * + * rx_handlers are functions called from inside __netif_receive_skb(), to do + * special processing of the skb, prior to delivery to protocol handlers. + * + * Currently, a net_device can only have a single rx_handler registered. Trying + * to register a second rx_handler will return -EBUSY. + * + * To register a rx_handler on a net_device, use netdev_rx_handler_register(). + * To unregister a rx_handler on a net_device, use + * netdev_rx_handler_unregister(). + * + * Upon return, rx_handler is expected to tell __netif_receive_skb() what to + * do with the skb. + * + * If the rx_handler consumed to skb in some way, it should return + * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for + * the skb to be delivered in some other ways. + * + * If the rx_handler changed skb->dev, to divert the skb to another + * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the + * new device will be called if it exists. + * + * If the rx_handler consider the skb should be ignored, it should return + * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that + * are registred on exact device (ptype->dev == skb->dev). + * + * If the rx_handler didn't changed skb->dev, but want the skb to be normally + * delivered, it should return RX_HANDLER_PASS. + * + * A device without a registered rx_handler will behave as if rx_handler + * returned RX_HANDLER_PASS. + */ + +enum rx_handler_result { + RX_HANDLER_CONSUMED, + RX_HANDLER_ANOTHER, + RX_HANDLER_EXACT, + RX_HANDLER_PASS, +}; +typedef enum rx_handler_result rx_handler_result_t; +typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); extern void __napi_schedule(struct napi_struct *n); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 31f02d0b46a7..24cfa626931e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -388,10 +388,7 @@ struct sk_buff { kmemcheck_bitfield_begin(flags2); __u16 queue_mapping:16; #ifdef CONFIG_IPV6_NDISC_NODETYPE - __u8 ndisc_nodetype:2, - deliver_no_wcard:1; -#else - __u8 deliver_no_wcard:1; + __u8 ndisc_nodetype:2; #endif __u8 ooo_okay:1; kmemcheck_bitfield_end(flags2); -- cgit v1.2.3