From 761da2935d6e18d178582dbdf315a3a458555505 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 26 Mar 2015 12:39:36 +0000 Subject: netfilter: nf_tables: add set timeout API support Add set timeout support to the netlink API. Sets with timeout support enabled can have a default timeout value and garbage collection interval specified. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b8cd60dcb4e1..8936803a2ad5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -258,6 +258,8 @@ void nft_unregister_set(struct nft_set_ops *ops); * @dtype: data type (verdict or numeric type defined by userspace) * @size: maximum set size * @nelems: number of elements + * @timeout: default timeout value in msecs + * @gc_int: garbage collection interval in msecs * @policy: set parameterization (see enum nft_set_policies) * @ops: set ops * @pnet: network namespace @@ -274,6 +276,8 @@ struct nft_set { u32 dtype; u32 size; u32 nelems; + u64 timeout; + u32 gc_int; u16 policy; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; @@ -295,6 +299,11 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table, struct nft_set *nf_tables_set_lookup_byid(const struct net *net, const struct nlattr *nla); +static inline unsigned long nft_set_gc_interval(const struct nft_set *set) +{ + return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ; +} + /** * struct nft_set_binding - nf_tables set binding * -- cgit v1.2.3 From c3e1b005ed1cc068fc9d454a6e745830d55d251d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 26 Mar 2015 12:39:37 +0000 Subject: netfilter: nf_tables: add set element timeout support Add API support for set element timeouts. Elements can have a individual timeout value specified, overriding the sets' default. Two new extension types are used for timeouts - the timeout value and the expiration time. The timeout value only exists if it differs from the default value. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8936803a2ad5..f2726c537248 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -329,12 +329,16 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @NFT_SET_EXT_KEY: element key * @NFT_SET_EXT_DATA: mapping data * @NFT_SET_EXT_FLAGS: element flags + * @NFT_SET_EXT_TIMEOUT: element timeout + * @NFT_SET_EXT_EXPIRATION: element expiration time * @NFT_SET_EXT_NUM: number of extension types */ enum nft_set_extensions { NFT_SET_EXT_KEY, NFT_SET_EXT_DATA, NFT_SET_EXT_FLAGS, + NFT_SET_EXT_TIMEOUT, + NFT_SET_EXT_EXPIRATION, NFT_SET_EXT_NUM }; @@ -431,6 +435,22 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext) return nft_set_ext(ext, NFT_SET_EXT_FLAGS); } +static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT); +} + +static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION); +} + +static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +{ + return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && + time_is_before_eq_jiffies(*nft_set_ext_expiration(ext)); +} + static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, void *elem) { -- cgit v1.2.3 From cfed7e1b1f8ed9b3d81ab12203cfb69c3ef24ac6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 26 Mar 2015 12:39:38 +0000 Subject: netfilter: nf_tables: add set garbage collection helpers Add helpers for GC batch destruction: since element destruction needs a RCU grace period for all set implementations, add some helper functions for asynchronous batch destruction. Elements are collected in a batch structure, which is asynchronously released using RCU once its full. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 56 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f2726c537248..6fd44959bf87 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -459,6 +459,62 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, void nft_set_elem_destroy(const struct nft_set *set, void *elem); +/** + * struct nft_set_gc_batch_head - nf_tables set garbage collection batch + * + * @rcu: rcu head + * @set: set the elements belong to + * @cnt: count of elements + */ +struct nft_set_gc_batch_head { + struct rcu_head rcu; + const struct nft_set *set; + unsigned int cnt; +}; + +#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \ + sizeof(struct nft_set_gc_batch_head)) / \ + sizeof(void *)) + +/** + * struct nft_set_gc_batch - nf_tables set garbage collection batch + * + * @head: GC batch head + * @elems: garbage collection elements + */ +struct nft_set_gc_batch { + struct nft_set_gc_batch_head head; + void *elems[NFT_SET_GC_BATCH_SIZE]; +}; + +struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, + gfp_t gfp); +void nft_set_gc_batch_release(struct rcu_head *rcu); + +static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb) +{ + if (gcb != NULL) + call_rcu(&gcb->head.rcu, nft_set_gc_batch_release); +} + +static inline struct nft_set_gc_batch * +nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb, + gfp_t gfp) +{ + if (gcb != NULL) { + if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems)) + return gcb; + nft_set_gc_batch_complete(gcb); + } + return nft_set_gc_batch_alloc(set, gfp); +} + +static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, + void *elem) +{ + gcb->elems[gcb->head.cnt++] = elem; +} + /** * struct nft_expr_type - nf_tables expression type * -- cgit v1.2.3 From 6908665826d56ddd024f3e131a9ee36f0d140943 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 26 Mar 2015 12:39:39 +0000 Subject: netfilter: nf_tables: add GC synchronization helpers GC is expected to happen asynchrously to the netlink interface. In the netlink path, both insertion and removal of elements consist of two steps, insertion followed by activation or deactivation followed by removal, during which the element must not be freed by GC. The synchronization helpers use an unused bit in the genmask field to atomically mark an element as "busy", meaning it is either currently being handled through the netlink API or by GC. Elements being processed by GC will never survive, netlink will simply ignore them. Elements being currently processed through netlink will be skipped by GC and reprocessed during the next run. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 6fd44959bf87..1ea13fcd388e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -852,6 +852,41 @@ static inline void nft_set_elem_change_active(const struct nft_set *set, ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet)); } +/* + * We use a free bit in the genmask field to indicate the element + * is busy, meaning it is currently being processed either by + * the netlink API or GC. + * + * Even though the genmask is only a single byte wide, this works + * because the extension structure if fully constant once initialized, + * so there are no non-atomic write accesses unless it is already + * marked busy. + */ +#define NFT_SET_ELEM_BUSY_MASK (1 << 2) + +#if defined(__LITTLE_ENDIAN_BITFIELD) +#define NFT_SET_ELEM_BUSY_BIT 2 +#elif defined(__BIG_ENDIAN_BITFIELD) +#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) +#else +#error +#endif + +static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext) +{ + unsigned long *word = (unsigned long *)ext; + + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); + return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word); +} + +static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) +{ + unsigned long *word = (unsigned long *)ext; + + clear_bit(NFT_SET_ELEM_BUSY_BIT, word); +} + /** * struct nft_trans - nf_tables object update in transaction * -- cgit v1.2.3 From 9d0982927e79049675cb6c6c04a0ebb3dad5a434 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 26 Mar 2015 12:39:40 +0000 Subject: netfilter: nft_hash: add support for timeouts Add support for element timeouts to nft_hash. The lookup and walking functions are changed to ignore timed out elements, a periodic garbage collection task cleans out expired entries. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1ea13fcd388e..a785699329c9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -294,6 +294,11 @@ static inline void *nft_set_priv(const struct nft_set *set) return (void *)set->data; } +static inline struct nft_set *nft_set_container_of(const void *priv) +{ + return (void *)priv - offsetof(struct nft_set, data); +} + struct nft_set *nf_tables_set_lookup(const struct nft_table *table, const struct nlattr *nla); struct nft_set *nf_tables_set_lookup_byid(const struct net *net, -- cgit v1.2.3 From 3dd0673ac3cd7d05cde103396ec7ec410a901de2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 5 Apr 2015 14:41:06 +0200 Subject: netfilter: nf_tables: prepare set element accounting for async updates Use atomic operations for the element count to avoid races with async updates. To properly handle the transactional semantics during netlink updates, deleted but not yet committed elements are accounted for seperately and are treated as being already removed. This means for the duration of a netlink transaction, the limit might be exceeded by the amount of elements deleted. Set implementations must be prepared to handle this. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a785699329c9..746423332fcb 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -258,6 +258,7 @@ void nft_unregister_set(struct nft_set_ops *ops); * @dtype: data type (verdict or numeric type defined by userspace) * @size: maximum set size * @nelems: number of elements + * @ndeact: number of deactivated elements queued for removal * @timeout: default timeout value in msecs * @gc_int: garbage collection interval in msecs * @policy: set parameterization (see enum nft_set_policies) @@ -275,7 +276,8 @@ struct nft_set { u32 ktype; u32 dtype; u32 size; - u32 nelems; + atomic_t nelems; + u32 ndeact; u64 timeout; u32 gc_int; u16 policy; -- cgit v1.2.3 From 11113e190bf0ad73086884f87efccc994ff28b3d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 5 Apr 2015 14:41:07 +0200 Subject: netfilter: nf_tables: support different set binding types Currently a set binding is assumed to be related to a lookup and, in case of maps, a data load. In order to use bindings for set updates, the loop detection checks must be restricted to map operations only. Add a flags member to the binding struct to hold the set "action" flags such as NFT_SET_MAP, and perform loop detection based on these. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 746423332fcb..e7e6365c248f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -316,6 +316,7 @@ static inline unsigned long nft_set_gc_interval(const struct nft_set *set) * * @list: set bindings list node * @chain: chain containing the rule bound to the set + * @flags: set action flags * * A set binding contains all information necessary for validation * of new elements added to a bound set. @@ -323,6 +324,7 @@ static inline unsigned long nft_set_gc_interval(const struct nft_set *set) struct nft_set_binding { struct list_head list; const struct nft_chain *chain; + u32 flags; }; int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, -- cgit v1.2.3 From 22fe54d5fefcfa98c58cc2f4607dd26d9648b3f5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 5 Apr 2015 14:41:08 +0200 Subject: netfilter: nf_tables: add support for dynamic set updates Add a new "dynset" expression for dynamic set updates. A new set op ->update() is added which, for non existant elements, invokes an initialization callback and inserts the new element. For both new or existing elements the extenstion pointer is returned to the caller to optionally perform timer updates or other actions. Element removal is not supported so far, however that seems to be a rather exotic need and can be added later on. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 17 +++++++++++++++++ include/net/netfilter/nf_tables_core.h | 3 +++ 2 files changed, 20 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e7e6365c248f..38c3496f7bf2 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -196,6 +196,7 @@ struct nft_set_estimate { }; struct nft_set_ext; +struct nft_expr; /** * struct nft_set_ops - nf_tables set operations @@ -218,6 +219,15 @@ struct nft_set_ops { bool (*lookup)(const struct nft_set *set, const struct nft_data *key, const struct nft_set_ext **ext); + bool (*update)(struct nft_set *set, + const struct nft_data *key, + void *(*new)(struct nft_set *, + const struct nft_expr *, + struct nft_data []), + const struct nft_expr *expr, + struct nft_data data[], + const struct nft_set_ext **ext); + int (*insert)(const struct nft_set *set, const struct nft_set_elem *elem); void (*activate)(const struct nft_set *set, @@ -466,6 +476,11 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, return elem + set->ops->elemsize; } +void *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const struct nft_data *key, + const struct nft_data *data, + u64 timeout, gfp_t gfp); void nft_set_elem_destroy(const struct nft_set *set, void *elem); /** @@ -845,6 +860,8 @@ static inline u8 nft_genmask_cur(const struct net *net) return 1 << ACCESS_ONCE(net->nft.gencursor); } +#define NFT_GENMASK_ANY ((1 << 0) | (1 << 1)) + /* * Set element transaction helpers */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index a75fc8e27cd6..c6f400cfaac8 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -31,6 +31,9 @@ void nft_cmp_module_exit(void); int nft_lookup_module_init(void); void nft_lookup_module_exit(void); +int nft_dynset_module_init(void); +void nft_dynset_module_exit(void); + int nft_bitwise_module_init(void); void nft_bitwise_module_exit(void); -- cgit v1.2.3 From 68e942e88add0ac8576fc8397e86495edf3dcea7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 5 Apr 2015 14:43:38 +0200 Subject: netfilter: nf_tables: support optional userdata for set elements Add an userdata set extension and allow the user to attach arbitrary data to set elements. This is intended to hold TLV encoded data like comments or DNS annotations that have no meaning to the kernel. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 38c3496f7bf2..63c44bdfdd3b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -350,6 +350,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @NFT_SET_EXT_FLAGS: element flags * @NFT_SET_EXT_TIMEOUT: element timeout * @NFT_SET_EXT_EXPIRATION: element expiration time + * @NFT_SET_EXT_USERDATA: user data associated with the element * @NFT_SET_EXT_NUM: number of extension types */ enum nft_set_extensions { @@ -358,6 +359,7 @@ enum nft_set_extensions { NFT_SET_EXT_FLAGS, NFT_SET_EXT_TIMEOUT, NFT_SET_EXT_EXPIRATION, + NFT_SET_EXT_USERDATA, NFT_SET_EXT_NUM }; @@ -464,6 +466,11 @@ static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ex return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION); } +static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_USERDATA); +} + static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) { return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && -- cgit v1.2.3