From 72b31f7271df34c6aab36c01305287924826678f Mon Sep 17 00:00:00 2001
From: Bernhard Thaler <bernhard.thaler@wvnet.at>
Date: Sat, 30 May 2015 15:27:40 +0200
Subject: netfilter: bridge: detect NAT66 correctly and change MAC address

IPv4 iptables allows to REDIRECT/DNAT/SNAT any traffic over a bridge.

e.g. REDIRECT
$ sysctl -w net.bridge.bridge-nf-call-iptables=1
$ iptables -t nat -A PREROUTING -p tcp -m tcp --dport 8080 \
  -j REDIRECT --to-ports 81

This does not work with ip6tables on a bridge in NAT66 scenario
because the REDIRECT/DNAT/SNAT is not correctly detected.

The bridge pre-routing (finish) netfilter hook has to check for a possible
redirect and then fix the destination mac address. This allows to use the
ip6tables rules for local REDIRECT/DNAT/SNAT REDIRECT similar to the IPv4
iptables version.

e.g. REDIRECT
$ sysctl -w net.bridge.bridge-nf-call-ip6tables=1
$ ip6tables -t nat -A PREROUTING -p tcp -m tcp --dport 8080 \
  -j REDIRECT --to-ports 81

This patch makes it possible to use IPv6 NAT66 on a bridge. It was tested
on a bridge with two interfaces using SNAT/DNAT NAT66 rules.

Reported-by: Artie Hamilton <artiemhamilton@yahoo.com>
Signed-off-by: Sven Eckelmann <sven@open-mesh.com>
[bernhard.thaler@wvnet.at: rebased, add indirect call to ip6_route_input()]
[bernhard.thaler@wvnet.at: rebased, split into separate patches]
Signed-off-by: Bernhard Thaler <bernhard.thaler@wvnet.at>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h | 1 +
 include/linux/skbuff.h         | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 64dad1cc1a4b..e2d19694ee8f 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -25,6 +25,7 @@ void ipv6_netfilter_fini(void);
 struct nf_ipv6_ops {
 	int (*chk_addr)(struct net *net, const struct in6_addr *addr,
 			const struct net_device *dev, int strict);
+	void (*route_input)(struct sk_buff *skb);
 };
 
 extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cc612fc0a894..f70fc0e6bf7b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -36,6 +36,7 @@
 #include <linux/sched.h>
 #include <net/flow_dissector.h>
 #include <linux/splice.h>
+#include <linux/in6.h>
 
 /* A. Checksumming of received packets by device.
  *
@@ -179,7 +180,10 @@ struct nf_bridge_info {
 		struct net_device *physoutdev;
 		char neigh_header[8];
 	};
-	__be32			ipv4_daddr;
+	union {
+		__be32          ipv4_daddr;
+		struct in6_addr ipv6_daddr;
+	};
 };
 #endif
 
-- 
cgit v1.2.3


From 411ffb4fde80705a9a8db4c2d38dbeef6f5bd689 Mon Sep 17 00:00:00 2001
From: Bernhard Thaler <bernhard.thaler@wvnet.at>
Date: Sat, 30 May 2015 15:28:28 +0200
Subject: netfilter: bridge: refactor frag_max_size

Currently frag_max_size is member of br_input_skb_cb and copied back and
forth using IPCB(skb) and BR_INPUT_SKB_CB(skb) each time it is changed or
used.

Attach frag_max_size to nf_bridge_info and set value in pre_routing and
forward functions. Use its value in forward and xmit functions.

Signed-off-by: Bernhard Thaler <bernhard.thaler@wvnet.at>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f70fc0e6bf7b..32b105e682b3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -174,6 +174,7 @@ struct nf_bridge_info {
 		BRNF_PROTO_PPPOE
 	} orig_proto:8;
 	bool			pkt_otherhost;
+	__u16			frag_max_size;
 	unsigned int		mask;
 	struct net_device	*physindev;
 	union {
-- 
cgit v1.2.3


From efb6de9b4ba0092b2c55f6a52d16294a8a698edd Mon Sep 17 00:00:00 2001
From: Bernhard Thaler <bernhard.thaler@wvnet.at>
Date: Sat, 30 May 2015 15:30:16 +0200
Subject: netfilter: bridge: forward IPv6 fragmented packets

IPv6 fragmented packets are not forwarded on an ethernet bridge
with netfilter ip6_tables loaded. e.g. steps to reproduce

1) create a simple bridge like this

        modprobe br_netfilter
        brctl addbr br0
        brctl addif br0 eth0
        brctl addif br0 eth2
        ifconfig eth0 up
        ifconfig eth2 up
        ifconfig br0 up

2) place a host with an IPv6 address on each side of the bridge

        set IPv6 address on host A:
        ip -6 addr add fd01:2345:6789:1::1/64 dev eth0

        set IPv6 address on host B:
        ip -6 addr add fd01:2345:6789:1::2/64 dev eth0

3) run a simple ping command on host A with packets > MTU

        ping6 -s 4000 fd01:2345:6789:1::2

4) wait some time and run e.g. "ip6tables -t nat -nvL" on the bridge

IPv6 fragmented packets traverse the bridge cleanly until somebody runs.
"ip6tables -t nat -nvL". As soon as it is run (and netfilter modules are
loaded) IPv6 fragmented packets do not traverse the bridge any more (you
see no more responses in ping's output).

After applying this patch IPv6 fragmented packets traverse the bridge
cleanly in above scenario.

Signed-off-by: Bernhard Thaler <bernhard.thaler@wvnet.at>
[pablo@netfilter.org: small changes to br_nf_dev_queue_xmit]
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index e2d19694ee8f..8b7d28f3aada 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -26,6 +26,8 @@ struct nf_ipv6_ops {
 	int (*chk_addr)(struct net *net, const struct in6_addr *addr,
 			const struct net_device *dev, int strict);
 	void (*route_input)(struct sk_buff *skb);
+	int (*fragment)(struct sock *sk, struct sk_buff *skb,
+			int (*output)(struct sock *, struct sk_buff *));
 };
 
 extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
-- 
cgit v1.2.3


From 33b1f31392861947fa2a2a57c3a39ab63b8c9f9d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 5 Jun 2015 13:28:38 +0200
Subject: net: ip_fragment: remove BRIDGE_NETFILTER mtu special handling

since commit d6b915e29f4adea9
("ip_fragment: don't forward defragmented DF packet") the largest
fragment size is available in the IPCB.

Therefore we no longer need to care about 'encapsulation'
overhead of stripped PPPOE/VLAN headers since ip_do_fragment
doesn't use device mtu in such cases.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index f2fdb5a52070..6d80fc686323 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -20,13 +20,6 @@ enum nf_br_hook_priorities {
 #define BRNF_BRIDGED_DNAT		0x02
 #define BRNF_NF_BRIDGE_PREROUTING	0x08
 
-static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
-{
-	if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
-		return PPPOE_SES_HLEN;
-	return 0;
-}
-
 int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
 
 static inline void br_drop_fake_rtable(struct sk_buff *skb)
-- 
cgit v1.2.3


From 71ae0dff02d756e4d2ca710b79f2ff5390029a5f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 11 Jun 2015 01:34:54 +0200
Subject: netfilter: xtables: use percpu rule counters

The binary arp/ip/ip6tables ruleset is stored per cpu.

The only reason left as to why we need percpu duplication are the rule
counters embedded into ipt_entry et al -- since each cpu has its own copy
of the rules, all counters can be lockless.

The downside is that the more cpus are supported, the more memory is
required.  Rules are not just duplicated per online cpu but for each
possible cpu, i.e. if maxcpu is 144, then rule is duplicated 144 times,
not for the e.g. 64 cores present.

To save some memory and also improve utilization of shared caches it
would be preferable to only store the rule blob once.

So we first need to separate counters and the rule blob.

Instead of using entry->counters, allocate this percpu and store the
percpu address in entry->counters.pcnt on CONFIG_SMP.

This change makes no sense as-is; it is merely an intermediate step to
remove the percpu duplication of the rule set in a followup patch.

Suggested-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 49 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 09f38206c18f..b77ab9f17641 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -353,6 +353,55 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
 	return ret;
 }
 
+
+/* On SMP, ip(6)t_entry->counters.pcnt holds address of the
+ * real (percpu) counter.  On !SMP, its just the packet count,
+ * so nothing needs to be done there.
+ *
+ * xt_percpu_counter_alloc returns the address of the percpu
+ * counter, or 0 on !SMP.
+ *
+ * Hence caller must use IS_ERR_VALUE to check for error, this
+ * allows us to return 0 for single core systems without forcing
+ * callers to deal with SMP vs. NONSMP issues.
+ */
+static inline u64 xt_percpu_counter_alloc(void)
+{
+	if (nr_cpu_ids > 1) {
+		void __percpu *res = alloc_percpu(struct xt_counters);
+
+		if (res == NULL)
+			return (u64) -ENOMEM;
+
+		return (__force u64) res;
+	}
+
+	return 0;
+}
+static inline void xt_percpu_counter_free(u64 pcnt)
+{
+	if (nr_cpu_ids > 1)
+		free_percpu((void __percpu *) pcnt);
+}
+
+static inline struct xt_counters *
+xt_get_this_cpu_counter(struct xt_counters *cnt)
+{
+	if (nr_cpu_ids > 1)
+		return this_cpu_ptr((void __percpu *) cnt->pcnt);
+
+	return cnt;
+}
+
+static inline struct xt_counters *
+xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
+{
+	if (nr_cpu_ids > 1)
+		return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu);
+
+	return cnt;
+}
+
 struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
 void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
 
-- 
cgit v1.2.3


From 482cfc318559e2527dfd8513582d2fdb276e47c2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 11 Jun 2015 01:34:55 +0200
Subject: netfilter: xtables: avoid percpu ruleset duplication

We store the rule blob per (possible) cpu.  Unfortunately this means we can
waste lot of memory on big smp machines. ipt_entry structure ('rule head')
is 112 byte, so e.g. with maxcpu=64 one single rule eats
close to 8k RAM.

Since previous patch made counters percpu it appears there is nothing
left in the rule blob that needs to be percpu.

On my test system (144 possible cpus, 400k dummy rules) this
change saves close to 9 Gigabyte of RAM.

Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index b77ab9f17641..9969d79dcde1 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -224,9 +224,9 @@ struct xt_table_info {
 	unsigned int stacksize;
 	unsigned int __percpu *stackptr;
 	void ***jumpstack;
-	/* ipt_entry tables: one per CPU */
+
 	/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
-	void *entries[1];
+	void *entries;
 };
 
 #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
-- 
cgit v1.2.3


From aaeb6e24f5b6cb6a664fbdec6e08b65c3173c1b3 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 12 Jun 2015 21:07:54 +0200
Subject: netfilter: ipset: Use MSEC_PER_SEC consistently

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set_timeout.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 83c2f9e0886c..3c8842bcedaa 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -61,7 +61,7 @@ ip_set_timeout_set(unsigned long *timeout, u32 t)
 		return;
 	}
 
-	*timeout = msecs_to_jiffies(t * 1000) + jiffies;
+	*timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies;
 	if (*timeout == IPSET_ELEM_PERMANENT)
 		/* Bingo! :-) */
 		(*timeout)--;
@@ -71,7 +71,7 @@ static inline u32
 ip_set_timeout_get(unsigned long *timeout)
 {
 	return *timeout == IPSET_ELEM_PERMANENT ? 0 :
-		jiffies_to_msecs(*timeout - jiffies)/1000;
+		jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
 }
 
 #endif	/* __KERNEL__ */
-- 
cgit v1.2.3


From f690cbaed9fe4d77592e24139db7ad790641c4fd Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 12 Jun 2015 22:11:00 +0200
Subject: netfilter: ipset: Fix cidr handling for hash:*net* types

Commit "Simplify cidr handling for hash:*net* types" broke the cidr
handling for the hash:*net* types when the sets were used by the SET
target: entries with invalid cidr values were added to the sets.
Reported by Jonathan Johnson.

Testsuite entry is added to verify the fix.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index ffdfdc24952a..a6fe1ce96437 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -545,8 +545,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
 	{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX,	\
 	  .timeout = (set)->timeout }
 
-#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))
-
 #define IPSET_CONCAT(a, b)		a##b
 #define IPSET_TOKEN(a, b)		IPSET_CONCAT(a, b)
 
-- 
cgit v1.2.3


From c4c997839cf92cb1037e43a85cdb4cbf44ed39a5 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 13 Jun 2015 11:59:45 +0200
Subject: netfilter: ipset: Fix parallel resizing and listing of the same set

When elements added to a hash:* type of set and resizing triggered,
parallel listing could start to list the original set (before resizing)
and "continue" with listing the new set. Fix it by references and
using the original hash table for listing. Therefore the destroying of
the original hash table may happen from the resizing or listing functions.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index a6fe1ce96437..5674b6ac6646 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -176,6 +176,9 @@ struct ip_set_type_variant {
 	/* List elements */
 	int (*list)(const struct ip_set *set, struct sk_buff *skb,
 		    struct netlink_callback *cb);
+	/* Keep listing private when resizing runs parallel */
+	void (*uref)(struct ip_set *set, struct netlink_callback *cb,
+		     bool start);
 
 	/* Return true if "b" set is the same as "a"
 	 * according to the create set parameters */
@@ -380,12 +383,12 @@ ip_set_init_counter(struct ip_set_counter *counter,
 
 /* Netlink CB args */
 enum {
-	IPSET_CB_NET = 0,
-	IPSET_CB_DUMP,
-	IPSET_CB_INDEX,
-	IPSET_CB_ARG0,
+	IPSET_CB_NET = 0,	/* net namespace */
+	IPSET_CB_DUMP,		/* dump single set/all sets */
+	IPSET_CB_INDEX,		/* set index */
+	IPSET_CB_PRIVATE,	/* set private data */
+	IPSET_CB_ARG0,		/* type specific */
 	IPSET_CB_ARG1,
-	IPSET_CB_ARG2,
 };
 
 /* register and unregister set references */
-- 
cgit v1.2.3


From b57b2d1fa53fe8563bdfc66a33b844463b9af285 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 13 Jun 2015 14:22:25 +0200
Subject: netfilter: ipset: Prepare the ipset core to use RCU at set level

Replace rwlock_t with spinlock_t in "struct ip_set" and change the locking
accordingly. Convert the comment extension into an rcu-avare object. Also,
simplify the timeout routines.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h         |  9 ++++--
 include/linux/netfilter/ipset/ip_set_comment.h | 38 ++++++++++++++++++--------
 include/linux/netfilter/ipset/ip_set_timeout.h | 25 +++++++----------
 3 files changed, 44 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 5674b6ac6646..19b4969a25fe 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -108,8 +108,13 @@ struct ip_set_counter {
 	atomic64_t packets;
 };
 
+struct ip_set_comment_rcu {
+	struct rcu_head rcu;
+	char str[0];
+};
+
 struct ip_set_comment {
-	char *str;
+	struct ip_set_comment_rcu __rcu *c;
 };
 
 struct ip_set_skbinfo {
@@ -226,7 +231,7 @@ struct ip_set {
 	/* The name of the set */
 	char name[IPSET_MAXNAMELEN];
 	/* Lock protecting the set data */
-	rwlock_t lock;
+	spinlock_t lock;
 	/* References to the set */
 	u32 ref;
 	/* The core set type */
diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
index 21217ea008d7..8d0248525957 100644
--- a/include/linux/netfilter/ipset/ip_set_comment.h
+++ b/include/linux/netfilter/ipset/ip_set_comment.h
@@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
 	return nla_data(tb);
 }
 
+/* Called from uadd only, protected by the set spinlock.
+ * The kadt functions don't use the comment extensions in any way.
+ */
 static inline void
 ip_set_init_comment(struct ip_set_comment *comment,
 		    const struct ip_set_ext *ext)
 {
+	struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
 	size_t len = ext->comment ? strlen(ext->comment) : 0;
 
-	if (unlikely(comment->str)) {
-		kfree(comment->str);
-		comment->str = NULL;
+	if (unlikely(c)) {
+		kfree_rcu(c, rcu);
+		rcu_assign_pointer(comment->c, NULL);
 	}
 	if (!len)
 		return;
 	if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
 		len = IPSET_MAX_COMMENT_SIZE;
-	comment->str = kzalloc(len + 1, GFP_ATOMIC);
-	if (unlikely(!comment->str))
+	c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+	if (unlikely(!c))
 		return;
-	strlcpy(comment->str, ext->comment, len + 1);
+	strlcpy(c->str, ext->comment, len + 1);
+	rcu_assign_pointer(comment->c, c);
 }
 
+/* Used only when dumping a set, protected by rcu_read_lock_bh() */
 static inline int
 ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
 {
-	if (!comment->str)
+	struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
+
+	if (!c)
 		return 0;
-	return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
+	return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
 }
 
+/* Called from uadd/udel, flush or the garbage collectors protected
+ * by the set spinlock.
+ * Called when the set is destroyed and when there can't be any user
+ * of the set data anymore.
+ */
 static inline void
 ip_set_comment_free(struct ip_set_comment *comment)
 {
-	if (unlikely(!comment->str))
+	struct ip_set_comment_rcu *c;
+
+	c = rcu_dereference_protected(comment->c, 1);
+	if (unlikely(!c))
 		return;
-	kfree(comment->str);
-	comment->str = NULL;
+	kfree_rcu(c, rcu);
+	rcu_assign_pointer(comment->c, NULL);
 }
 
 #endif
diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 3c8842bcedaa..1d6a935c1ac5 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -40,31 +40,26 @@ ip_set_timeout_uget(struct nlattr *tb)
 }
 
 static inline bool
-ip_set_timeout_test(unsigned long timeout)
+ip_set_timeout_expired(unsigned long *t)
 {
-	return timeout == IPSET_ELEM_PERMANENT ||
-	       time_is_after_jiffies(timeout);
-}
-
-static inline bool
-ip_set_timeout_expired(unsigned long *timeout)
-{
-	return *timeout != IPSET_ELEM_PERMANENT &&
-	       time_is_before_jiffies(*timeout);
+	return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
 }
 
 static inline void
-ip_set_timeout_set(unsigned long *timeout, u32 t)
+ip_set_timeout_set(unsigned long *timeout, u32 value)
 {
-	if (!t) {
+	unsigned long t;
+
+	if (!value) {
 		*timeout = IPSET_ELEM_PERMANENT;
 		return;
 	}
 
-	*timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies;
-	if (*timeout == IPSET_ELEM_PERMANENT)
+	t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
+	if (t == IPSET_ELEM_PERMANENT)
 		/* Bingo! :-) */
-		(*timeout)--;
+		t--;
+	*timeout = t;
 }
 
 static inline u32
-- 
cgit v1.2.3


From ca0f6a5cd99e0c6ba4bb78dc402817f636370f26 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 13 Jun 2015 19:45:33 +0200
Subject: netfilter: ipset: Fix coding styles reported by checkpatch.pl

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 19b4969a25fe..48bb01edcf30 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -349,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
 			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
 					  skbinfo->skbmarkmask))) ||
 	       (skbinfo->skbprio &&
-	        nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+		nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
 			      cpu_to_be32(skbinfo->skbprio))) ||
 	       (skbinfo->skbqueue &&
-	        nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
 			     cpu_to_be16(skbinfo->skbqueue)));
-
 }
 
 static inline void
-- 
cgit v1.2.3


From 711bdde6a884354ddae8da2fcb495b2a9364cc90 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Jun 2015 09:57:30 -0700
Subject: netfilter: x_tables: remove XT_TABLE_INFO_SZ and a dereference.

After Florian patches, there is no need for XT_TABLE_INFO_SZ anymore :
Only one copy of table is kept, instead of one copy per cpu.

We also can avoid a dereference if we put table data right after
xt_table_info. It reduces register pressure and helps compiler.

Then, we attempt a kmalloc() if total size is under order-3 allocation,
to reduce TLB pressure, as in many cases, rules fit in 32 KB.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 9969d79dcde1..95693c4cebdd 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -225,12 +225,9 @@ struct xt_table_info {
 	unsigned int __percpu *stackptr;
 	void ***jumpstack;
 
-	/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
-	void *entries;
+	unsigned char entries[0] __aligned(8);
 };
 
-#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
-			  + nr_cpu_ids * sizeof(char *))
 int xt_register_target(struct xt_target *target);
 void xt_unregister_target(struct xt_target *target);
 int xt_register_targets(struct xt_target *target, unsigned int n);
-- 
cgit v1.2.3