From a42b99a6e329654d376b330de057eff87686d890 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 19 Dec 2013 18:25:14 +0100 Subject: netfilter: avoid get_random_bytes calls All these users need an initial seed value for jhash, prandom is perfectly fine. This avoids draining the entropy pool where its not strictly required. nfnetlink_log did not use the random value at all. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 8 -------- net/netfilter/nft_hash.c | 2 +- net/netfilter/xt_RATEEST.c | 2 +- net/netfilter/xt_connlimit.c | 2 +- net/netfilter/xt_hashlimit.c | 2 +- net/netfilter/xt_recent.c | 2 +- 6 files changed, 5 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 3c4b69e5fe17..7d4254b0dc6b 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -28,8 +28,6 @@ #include #include #include -#include -#include #include #include #include @@ -75,7 +73,6 @@ struct nfulnl_instance { }; #define INSTANCE_BUCKETS 16 -static unsigned int hash_init; static int nfnl_log_net_id __read_mostly; @@ -1066,11 +1063,6 @@ static int __init nfnetlink_log_init(void) { int status = -ENOMEM; - /* it's not really all that important to have a random value, so - * we can do this from the init function, even if there hasn't - * been that much entropy yet */ - get_random_bytes(&hash_init, sizeof(hash_init)); - netlink_register_notifier(&nfulnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfulnl_subsys); if (status < 0) { diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 3d3f8fce10a5..6aae699aeb46 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -164,7 +164,7 @@ static int nft_hash_init(const struct nft_set *set, unsigned int cnt, i; if (unlikely(!nft_hash_rnd_initted)) { - get_random_bytes(&nft_hash_rnd, 4); + nft_hash_rnd = prandom_u32(); nft_hash_rnd_initted = true; } diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 370adf622cef..190854be7629 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -100,7 +100,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) int ret; if (unlikely(!rnd_inited)) { - get_random_bytes(&jhash_rnd, sizeof(jhash_rnd)); + jhash_rnd = prandom_u32(); rnd_inited = true; } diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index c40b2695633b..7671e8214919 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -229,7 +229,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) u_int32_t rand; do { - get_random_bytes(&rand, sizeof(rand)); + rand = prandom_u32(); } while (!rand); cmpxchg(&connlimit_rnd, 0, rand); } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 9ff035c71403..a83a35c81150 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -177,7 +177,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht, /* initialize hash with random val at the time we allocate * the first hashtable entry */ if (unlikely(!ht->rnd_initialized)) { - get_random_bytes(&ht->rnd, sizeof(ht->rnd)); + ht->rnd = prandom_u32(); ht->rnd_initialized = true; } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 1e657cf715c4..bfdc29f1a04a 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -334,7 +334,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par, size_t sz; if (unlikely(!hash_rnd_inited)) { - get_random_bytes(&hash_rnd, sizeof(hash_rnd)); + hash_rnd = prandom_u32(); hash_rnd_inited = true; } if (info->check_set & ~XT_RECENT_VALID_FLAGS) { -- cgit v1.2.3 From 534473c6080e01395058445135df29a8eb638c77 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 19 Dec 2013 18:25:15 +0100 Subject: netfilter: ctnetlink: honor CTA_MARK_MASK when setting ctmark Useful to only set a particular range of the conntrack mark while leaving exisiting parts of the value alone, e.g. when setting conntrack marks via NFQUEUE. Follows same scheme as MARK/CONNMARK targets, i.e. the mask defines those bits that should be altered. No mask is equal to '~0', ie. the old value is replaced by new one. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 08870b859046..bb322d0beb48 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2118,8 +2118,16 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) return err; } #if defined(CONFIG_NF_CONNTRACK_MARK) - if (cda[CTA_MARK]) - ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); + if (cda[CTA_MARK]) { + u32 mask = 0, mark, newmark; + if (cda[CTA_MARK_MASK]) + mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + + mark = ntohl(nla_get_be32(cda[CTA_MARK])); + newmark = (ct->mark & mask) ^ mark; + if (newmark != ct->mark) + ct->mark = newmark; + } #endif return 0; } -- cgit v1.2.3 From 08c0cad69f32ad1e881fa3fb7f5e0a25db5b07ce Mon Sep 17 00:00:00 2001 From: Valentina Giusti Date: Fri, 20 Dec 2013 17:28:53 +0100 Subject: netfilter: nfnetlink_queue: enable UID/GID socket info retrieval Thanks to commits 41063e9 (ipv4: Early TCP socket demux) and 421b388 (udp: ipv4: Add udp early demux) it is now possible to parse UID and GID socket info also for incoming TCP and UDP connections. Having this info available, it is convenient to let NFQUEUE parse it in order to improve and refine the traffic analysis in userspace. Signed-off-by: Valentina Giusti Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 21258cf70091..d3cf12b83174 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -297,6 +297,31 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0; } +static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk) +{ + const struct cred *cred; + + if (sk->sk_state == TCP_TIME_WAIT) + return 0; + + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) { + cred = sk->sk_socket->file->f_cred; + if (nla_put_be32(skb, NFQA_UID, + htonl(from_kuid_munged(&init_user_ns, cred->fsuid)))) + goto nla_put_failure; + if (nla_put_be32(skb, NFQA_GID, + htonl(from_kgid_munged(&init_user_ns, cred->fsgid)))) + goto nla_put_failure; + } + read_unlock_bh(&sk->sk_callback_lock); + return 0; + +nla_put_failure: + read_unlock_bh(&sk->sk_callback_lock); + return -1; +} + static struct sk_buff * nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, @@ -372,6 +397,11 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (queue->flags & NFQA_CFG_F_CONNTRACK) ct = nfqnl_ct_get(entskb, &size, &ctinfo); + if (queue->flags & NFQA_CFG_F_UID_GID) { + size += (nla_total_size(sizeof(u_int32_t)) /* uid */ + + nla_total_size(sizeof(u_int32_t))); /* gid */ + } + skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); if (!skb) @@ -484,6 +514,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, goto nla_put_failure; } + if ((queue->flags & NFQA_CFG_F_UID_GID) && entskb->sk && + nfqnl_put_sk_uidgid(skb, entskb->sk) < 0) + goto nla_put_failure; + if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) goto nla_put_failure; -- cgit v1.2.3 From 6a649f339802f104549e1fb211e381036661e244 Mon Sep 17 00:00:00 2001 From: "fan.du" Date: Wed, 18 Dec 2013 11:27:02 +0800 Subject: netfilter: add IPv4/6 IPComp extension match support With this plugin, user could specify IPComp tagged with certain CPI that host not interested will be DROPped or any other action. For example: iptables -A INPUT -p 108 -m ipcomp --ipcompspi 0x87 -j DROP ip6tables -A INPUT -p 108 -m ipcomp --ipcompspi 0x87 -j DROP Then input IPComp packet with CPI equates 0x87 will not reach upper layer anymore. Signed-off-by: Fan Du Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 9 ++++ net/netfilter/Makefile | 1 + net/netfilter/xt_ipcomp.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+) create mode 100644 net/netfilter/xt_ipcomp.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c3398cd99b94..6d8e48b376fc 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1035,6 +1035,15 @@ config NETFILTER_XT_MATCH_HL in the IPv6 header, or the time-to-live field in the IPv4 header of the packet. +config NETFILTER_XT_MATCH_IPCOMP + tristate '"ipcomp" match support' + depends on NETFILTER_ADVANCED + help + This match extension allows you to match a range of CPIs(16 bits) + inside IPComp header of IPSec packets. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_IPRANGE tristate '"iprange" address range match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 394483b2c193..398cd709aa09 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -133,6 +133,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o +obj-$(CONFIG_NETFILTER_XT_MATCH_IPCOMP) += xt_ipcomp.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c new file mode 100644 index 000000000000..a4c7561698c5 --- /dev/null +++ b/net/netfilter/xt_ipcomp.c @@ -0,0 +1,111 @@ +/* Kernel module to match IPComp parameters for IPv4 and IPv6 + * + * Copyright (C) 2013 WindRiver + * + * Author: + * Fan Du + * + * Based on: + * net/netfilter/xt_esp.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Fan Du "); +MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match"); + +/* Returns 1 if the spi is matched by the range, 0 otherwise */ +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) +{ + bool r; + pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", + invert ? '!' : ' ', min, spi, max); + r = (spi >= min && spi <= max) ^ invert; + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); + return r; +} + +static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct ip_comp_hdr _comphdr; + const struct ip_comp_hdr *chdr; + const struct xt_ipcomp *compinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + chdr = skb_header_pointer(skb, par->thoff, sizeof(_comphdr), &_comphdr); + if (chdr == NULL) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + pr_debug("Dropping evil IPComp tinygram.\n"); + par->hotdrop = true; + return 0; + } + + return spi_match(compinfo->spis[0], compinfo->spis[1], + ntohl(chdr->cpi << 16), + !!(compinfo->invflags & XT_IPCOMP_INV_SPI)); +} + +static int comp_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_ipcomp *compinfo = par->matchinfo; + + /* Must specify no unknown invflags */ + if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) { + pr_err("unknown flags %X\n", compinfo->invflags); + return -EINVAL; + } + return 0; +} + +static struct xt_match comp_mt_reg[] __read_mostly = { + { + .name = "ipcomp", + .family = NFPROTO_IPV4, + .match = comp_mt, + .matchsize = sizeof(struct xt_ipcomp), + .proto = IPPROTO_COMP, + .checkentry = comp_mt_check, + .me = THIS_MODULE, + }, + { + .name = "ipcomp", + .family = NFPROTO_IPV6, + .match = comp_mt, + .matchsize = sizeof(struct xt_ipcomp), + .proto = IPPROTO_COMP, + .checkentry = comp_mt_check, + .me = THIS_MODULE, + }, +}; + +static int __init comp_mt_init(void) +{ + return xt_register_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg)); +} + +static void __exit comp_mt_exit(void) +{ + xt_unregister_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg)); +} + +module_init(comp_mt_init); +module_exit(comp_mt_exit); -- cgit v1.2.3 From 9dcbe1b87c4a8e3ed62e95369c18709541a3dc8f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 12 Nov 2013 15:34:24 +0100 Subject: ipvs: Remove unused variable ret from sync_thread_master() net/netfilter/ipvs/ip_vs_sync.c: In function 'sync_thread_master': net/netfilter/ipvs/ip_vs_sync.c:1640:8: warning: unused variable 'ret' [-Wunused-variable] Commit 35a2af94c7ce7130ca292c68b1d27fcfdb648f6b ("sched/wait: Make the __wait_event*() interface more friendly") changed how the interruption state is returned. However, sync_thread_master() ignores this state, now causing a compile warning. According to Julian Anastasov , this behavior is OK: "Yes, your patch looks ok to me. In the past we used ssleep() but IPVS users were confused why IPVS threads increase the load average. So, we switched to _interruptible calls and later the socket polling was added." Document this, as requested by Peter Zijlstra, to avoid precious developers disappearing in this pitfall in the future. Signed-off-by: Geert Uytterhoeven Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sync.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f63c2388f38d..db801263ee9f 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1637,7 +1637,10 @@ static int sync_thread_master(void *data) continue; } while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { - int ret = __wait_event_interruptible(*sk_sleep(sk), + /* (Ab)use interruptible sleep to avoid increasing + * the load avg. + */ + __wait_event_interruptible(*sk_sleep(sk), sock_writeable(sk) || kthread_should_stop()); if (unlikely(kthread_should_stop())) -- cgit v1.2.3 From 34ce324019e76f6d93768d68343a0e78f464d754 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Dec 2013 22:40:29 +0100 Subject: netfilter: nf_nat: add full port randomization support We currently use prandom_u32() for allocation of ports in tcp bind(0) and udp code. In case of plain SNAT we try to keep the ports as is or increment on collision. SNAT --random mode does use per-destination incrementing port allocation. As a recent paper pointed out in [1] that this mode of port allocation makes it possible to an attacker to find the randomly allocated ports through a timing side-channel in a socket overloading attack conducted through an off-path attacker. So, NF_NAT_RANGE_PROTO_RANDOM actually weakens the port randomization in regard to the attack described in this paper. As we need to keep compatibility, add another flag called NF_NAT_RANGE_PROTO_RANDOM_FULLY that would replace the NF_NAT_RANGE_PROTO_RANDOM hash-based port selection algorithm with a simple prandom_u32() in order to mitigate this attack vector. Note that the lfsr113's internal state is periodically reseeded by the kernel through a local secure entropy source. More details can be found in [1], the basic idea is to send bursts of packets to a socket to overflow its receive queue and measure the latency to detect a possible retransmit when the port is found. Because of increasing ports to given destination and port, further allocations can be predicted. This information could then be used by an attacker for e.g. for cache-poisoning, NS pinning, and degradation of service attacks against DNS servers [1]: The best defense against the poisoning attacks is to properly deploy and validate DNSSEC; DNSSEC provides security not only against off-path attacker but even against MitM attacker. We hope that our results will help motivate administrators to adopt DNSSEC. However, full DNSSEC deployment make take significant time, and until that happens, we recommend short-term, non-cryptographic defenses. We recommend to support full port randomisation, according to practices recommended in [2], and to avoid per-destination sequential port allocation, which we show may be vulnerable to derandomisation attacks. Joint work between Hannes Frederic Sowa and Daniel Borkmann. [1] https://sites.google.com/site/hayashulman/files/NIC-derandomisation.pdf [2] http://arxiv.org/pdf/1205.5190v1.pdf Signed-off-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 4 ++-- net/netfilter/nf_nat_proto_common.c | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 63a815402211..d3f5cd6dd962 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -315,7 +315,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, * manips not an issue. */ if (maniptype == NF_NAT_MANIP_SRC && - !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ if (in_range(l3proto, l4proto, orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { @@ -339,7 +339,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, */ /* Only bother mapping if it's not already in range and unique */ - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (l4proto->in_range(tuple, maniptype, &range->min_proto, diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c index 9baaf734c142..83a72a235cae 100644 --- a/net/netfilter/nf_nat_proto_common.c +++ b/net/netfilter/nf_nat_proto_common.c @@ -74,22 +74,24 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, range_size = ntohs(range->max_proto.all) - min + 1; } - if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) + if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) { off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC ? tuple->dst.u.all : tuple->src.u.all); - else + } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) { + off = prandom_u32(); + } else { off = *rover; + } for (i = 0; ; ++off) { *portptr = htons(min + off % range_size); if (++i != range_size && nf_nat_used_tuple(tuple, ct)) continue; - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) *rover = off; return; } - return; } EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple); -- cgit v1.2.3 From 02eca9d2cc541806e8f03b4131c7ee9120246df7 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 30 Dec 2013 17:13:10 -0800 Subject: netfilter: ipset: remove unused code Function never used in current upstream code. Signed-off-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index bac7e01df67f..de770ec39e51 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -624,34 +624,6 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex); * call nfnl_lock for us. */ -/* - * Find set by name, reference it once. The reference makes sure the - * thing pointed to, does not go away under our feet. - * - * The nfnl mutex is used in the function. - */ -ip_set_id_t -ip_set_nfnl_get(struct net *net, const char *name) -{ - ip_set_id_t i, index = IPSET_INVALID_ID; - struct ip_set *s; - struct ip_set_net *inst = ip_set_pernet(net); - - nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < inst->ip_set_max; i++) { - s = nfnl_set(inst, i); - if (s != NULL && STREQ(s->name, name)) { - __ip_set_get(s); - index = i; - break; - } - } - nfnl_unlock(NFNL_SUBSYS_IPSET); - - return index; -} -EXPORT_SYMBOL_GPL(ip_set_nfnl_get); - /* * Find set by index, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. -- cgit v1.2.3 From dcd93ed4cd1669b2c1510e801fe5f1132390761c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 30 Dec 2013 17:16:08 -0800 Subject: netfilter: nf_conntrack: remove dead code The following code is not used in current upstream code. Some of this seems to be old hooks, other might be used by some out of tree module (which I don't care about breaking), and the need_ipv4_conntrack was used by old NAT code but no longer called. Signed-off-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 ------ net/netfilter/nf_conntrack_core.c | 15 --------------- net/netfilter/nf_conntrack_proto.c | 6 ------ 3 files changed, 27 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ecd8bec411c9..8127dc802865 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -548,9 +548,3 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) module_init(nf_conntrack_l3proto_ipv4_init); module_exit(nf_conntrack_l3proto_ipv4_fini); - -void need_ipv4_conntrack(void) -{ - return; -} -EXPORT_SYMBOL_GPL(need_ipv4_conntrack); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 43549eb7a7be..8824ed0ccc9c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -60,12 +60,6 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, const struct nlattr *attr) __read_mostly; EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); -int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff); -EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); - DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); @@ -361,15 +355,6 @@ begin: return NULL; } -struct nf_conntrack_tuple_hash * -__nf_conntrack_find(struct net *net, u16 zone, - const struct nf_conntrack_tuple *tuple) -{ - return ____nf_conntrack_find(net, zone, tuple, - hash_conntrack_raw(tuple, zone)); -} -EXPORT_SYMBOL_GPL(__nf_conntrack_find); - /* Find a connection corresponding to a tuple. */ static struct nf_conntrack_tuple_hash * __nf_conntrack_find_get(struct net *net, u16 zone, diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index ce3004156eeb..b65d5864b6d9 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -92,12 +92,6 @@ nf_ct_l3proto_find_get(u_int16_t l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get); -void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p) -{ - module_put(p->me); -} -EXPORT_SYMBOL_GPL(nf_ct_l3proto_put); - int nf_ct_l3proto_try_module_get(unsigned short l3proto) { -- cgit v1.2.3 From 14abfa161d256c60f3ea6ba494704ac634b94f63 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 2 Jan 2014 10:03:45 +0100 Subject: netfilter: xt_CT: fix error value in xt_ct_tg_check() If setting event mask fails then we were returning 0 for success. This patch updates return code to -EINVAL in case of problem. Signed-off-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_CT.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index da35ac06a975..5929be622c5c 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -211,8 +211,10 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, ret = 0; if ((info->ct_events || info->exp_events) && !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events, - GFP_KERNEL)) + GFP_KERNEL)) { + ret = -EINVAL; goto err3; + } if (info->helper[0]) { ret = xt_ct_set_helper(ct, info->helper, par); -- cgit v1.2.3 From fe1217c4f3f7d7cbf8efdd8dd5fdc7204a1d65a8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 29 Dec 2013 18:27:10 +0100 Subject: net: net_cls: move cgroupfs classid handling into core Zefan Li requested [1] to perform the following cleanup/refactoring: - Split cgroupfs classid handling into net core to better express a possible more generic use. - Disable module support for cgroupfs bits as the majority of other cgroupfs subsystems do not have that, and seems to be not wished from cgroup side. Zefan probably might want to follow-up for netprio later on. - By this, code can be further reduced which previously took care of functionality built when compiled as module. cgroupfs bits are being placed under net/core/netclassid_cgroup.c, so that we are consistent with {netclassid,netprio}_cgroup naming that is under net/core/ as suggested by Zefan. No change in functionality, but only code refactoring that is being done here. [1] http://patchwork.ozlabs.org/patch/304825/ Suggested-by: Li Zefan Signed-off-by: Daniel Borkmann Cc: Zefan Li Cc: Thomas Graf Cc: cgroups@vger.kernel.org Acked-by: Li Zefan Signed-off-by: Pablo Neira Ayuso --- net/Kconfig | 7 +++ net/core/Makefile | 1 + net/core/netclassid_cgroup.c | 120 +++++++++++++++++++++++++++++++++++++++++++ net/core/sock.c | 12 ----- net/sched/Kconfig | 1 + net/sched/cls_cgroup.c | 111 +-------------------------------------- 6 files changed, 130 insertions(+), 122 deletions(-) create mode 100644 net/core/netclassid_cgroup.c (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index d334678c0bd8..7da10b830d70 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -245,6 +245,13 @@ config NETPRIO_CGROUP Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis +config CGROUP_NET_CLASSID + boolean "Network classid cgroup" + depends on CGROUPS + ---help--- + Cgroup subsystem for use as general purpose socket classid marker that is + being used in cls_cgroup and for netfilter matching. + config NET_RX_BUSY_POLL boolean default y diff --git a/net/core/Makefile b/net/core/Makefile index b33b996f5dd6..4839a2796964 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -22,3 +22,4 @@ obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o +obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c new file mode 100644 index 000000000000..719efd541668 --- /dev/null +++ b/net/core/netclassid_cgroup.c @@ -0,0 +1,120 @@ +/* + * net/core/netclassid_cgroup.c Classid Cgroupfs Handling + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf + */ + +#include +#include +#include +#include +#include +#include + +static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct cgroup_cls_state, css) : NULL; +} + +struct cgroup_cls_state *task_cls_state(struct task_struct *p) +{ + return css_cls_state(task_css(p, net_cls_subsys_id)); +} +EXPORT_SYMBOL_GPL(task_cls_state); + +static struct cgroup_subsys_state * +cgrp_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct cgroup_cls_state *cs; + + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) + return ERR_PTR(-ENOMEM); + + return &cs->css; +} + +static int cgrp_css_online(struct cgroup_subsys_state *css) +{ + struct cgroup_cls_state *cs = css_cls_state(css); + struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); + + if (parent) + cs->classid = parent->classid; + + return 0; +} + +static void cgrp_css_free(struct cgroup_subsys_state *css) +{ + kfree(css_cls_state(css)); +} + +static int update_classid(const void *v, struct file *file, unsigned n) +{ + int err; + struct socket *sock = sock_from_file(file, &err); + + if (sock) + sock->sk->sk_classid = (u32)(unsigned long)v; + + return 0; +} + +static void cgrp_attach(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset) +{ + struct cgroup_cls_state *cs = css_cls_state(css); + void *v = (void *)(unsigned long)cs->classid; + struct task_struct *p; + + cgroup_taskset_for_each(p, css, tset) { + task_lock(p); + iterate_fd(p->files, 0, update_classid, v); + task_unlock(p); + } +} + +static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) +{ + return css_cls_state(css)->classid; +} + +static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, + u64 value) +{ + css_cls_state(css)->classid = (u32) value; + + return 0; +} + +static struct cftype ss_files[] = { + { + .name = "classid", + .read_u64 = read_classid, + .write_u64 = write_classid, + }, + { } /* terminate */ +}; + +struct cgroup_subsys net_cls_subsys = { + .name = "net_cls", + .css_alloc = cgrp_css_alloc, + .css_online = cgrp_css_online, + .css_free = cgrp_css_free, + .attach = cgrp_attach, + .subsys_id = net_cls_subsys_id, + .base_cftypes = ss_files, + .module = THIS_MODULE, +}; + +static int __init init_netclassid_cgroup(void) +{ + return cgroup_load_subsys(&net_cls_subsys); +} +__initcall(init_netclassid_cgroup); diff --git a/net/core/sock.c b/net/core/sock.c index ab20ed9b0f31..3f150729fb15 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1308,18 +1308,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } -#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk) -{ - u32 classid; - - classid = task_cls_classid(current); - if (classid != sk->sk_classid) - sk->sk_classid = classid; -} -EXPORT_SYMBOL(sock_update_classid); -#endif - #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) void sock_update_netprioidx(struct sock *sk) { diff --git a/net/sched/Kconfig b/net/sched/Kconfig index ad1f1d819203..f711a471d0b7 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -435,6 +435,7 @@ config NET_CLS_FLOW config NET_CLS_CGROUP tristate "Control Group Classifier" select NET_CLS + select CGROUP_NET_CLASSID depends on CGROUPS ---help--- Say Y here if you want to classify packets based on the control diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 16006c92c3fd..838fa40abad1 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -11,109 +11,13 @@ #include #include -#include -#include -#include #include -#include #include -#include #include #include #include #include -static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css) -{ - return css ? container_of(css, struct cgroup_cls_state, css) : NULL; -} - -static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p) -{ - return css_cls_state(task_css(p, net_cls_subsys_id)); -} - -static struct cgroup_subsys_state * -cgrp_css_alloc(struct cgroup_subsys_state *parent_css) -{ - struct cgroup_cls_state *cs; - - cs = kzalloc(sizeof(*cs), GFP_KERNEL); - if (!cs) - return ERR_PTR(-ENOMEM); - return &cs->css; -} - -static int cgrp_css_online(struct cgroup_subsys_state *css) -{ - struct cgroup_cls_state *cs = css_cls_state(css); - struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); - - if (parent) - cs->classid = parent->classid; - return 0; -} - -static void cgrp_css_free(struct cgroup_subsys_state *css) -{ - kfree(css_cls_state(css)); -} - -static int update_classid(const void *v, struct file *file, unsigned n) -{ - int err; - struct socket *sock = sock_from_file(file, &err); - if (sock) - sock->sk->sk_classid = (u32)(unsigned long)v; - return 0; -} - -static void cgrp_attach(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) -{ - struct task_struct *p; - struct cgroup_cls_state *cs = css_cls_state(css); - void *v = (void *)(unsigned long)cs->classid; - - cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid, v); - task_unlock(p); - } -} - -static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) -{ - return css_cls_state(css)->classid; -} - -static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, - u64 value) -{ - css_cls_state(css)->classid = (u32) value; - return 0; -} - -static struct cftype ss_files[] = { - { - .name = "classid", - .read_u64 = read_classid, - .write_u64 = write_classid, - }, - { } /* terminate */ -}; - -struct cgroup_subsys net_cls_subsys = { - .name = "net_cls", - .css_alloc = cgrp_css_alloc, - .css_online = cgrp_css_online, - .css_free = cgrp_css_free, - .attach = cgrp_attach, - .subsys_id = net_cls_subsys_id, - .base_cftypes = ss_files, - .module = THIS_MODULE, -}; - struct cls_cgroup_head { u32 handle; struct tcf_exts exts; @@ -309,25 +213,12 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { static int __init init_cgroup_cls(void) { - int ret; - - ret = cgroup_load_subsys(&net_cls_subsys); - if (ret) - goto out; - - ret = register_tcf_proto_ops(&cls_cgroup_ops); - if (ret) - cgroup_unload_subsys(&net_cls_subsys); - -out: - return ret; + return register_tcf_proto_ops(&cls_cgroup_ops); } static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); - - cgroup_unload_subsys(&net_cls_subsys); } module_init(init_cgroup_cls); -- cgit v1.2.3 From 86f8515f9721fa171483f0fe0391968fbb949cc9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 29 Dec 2013 17:27:11 +0100 Subject: net: netprio: rename config to be more consistent with cgroup configs While we're at it and introduced CGROUP_NET_CLASSID, lets also make NETPRIO_CGROUP more consistent with the rest of cgroups and rename it into CONFIG_CGROUP_NET_PRIO so that for networking, we now have CONFIG_CGROUP_NET_{PRIO,CLASSID}. This not only makes the CONFIG option consistent among networking cgroups, but also among cgroups CONFIG conventions in general as the vast majority has a prefix of CONFIG_CGROUP_. Signed-off-by: Daniel Borkmann Cc: Zefan Li Cc: cgroups@vger.kernel.org Acked-by: Li Zefan Signed-off-by: Pablo Neira Ayuso --- net/Kconfig | 4 ++-- net/core/Makefile | 2 +- net/core/dev.c | 2 +- net/core/sock.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 7da10b830d70..e411046a62e3 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -238,12 +238,12 @@ config XPS depends on SMP default y -config NETPRIO_CGROUP +config CGROUP_NET_PRIO tristate "Network priority cgroup" depends on CGROUPS ---help--- Cgroup subsystem for use in assigning processes to network priorities on - a per-interface basis + a per-interface basis. config CGROUP_NET_CLASSID boolean "Network classid cgroup" diff --git a/net/core/Makefile b/net/core/Makefile index 4839a2796964..9628c20acff6 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -21,5 +21,5 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o -obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o +obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o diff --git a/net/core/dev.c b/net/core/dev.c index c95d664b2b42..888a79b2b8b9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2747,7 +2747,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) static void skb_update_prio(struct sk_buff *skb) { struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); diff --git a/net/core/sock.c b/net/core/sock.c index 3f150729fb15..a29735c9a05d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1308,7 +1308,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) void sock_update_netprioidx(struct sock *sk) { if (in_interrupt()) -- cgit v1.2.3 From 82a37132f300ea53bdcd812917af5a6329ec80c3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 29 Dec 2013 18:27:12 +0100 Subject: netfilter: x_tables: lightweight process control group matching It would be useful e.g. in a server or desktop environment to have a facility in the notion of fine-grained "per application" or "per application group" firewall policies. Probably, users in the mobile, embedded area (e.g. Android based) with different security policy requirements for application groups could have great benefit from that as well. For example, with a little bit of configuration effort, an admin could whitelist well-known applications, and thus block otherwise unwanted "hard-to-track" applications like [1] from a user's machine. Blocking is just one example, but it is not limited to that, meaning we can have much different scenarios/policies that netfilter allows us than just blocking, e.g. fine grained settings where applications are allowed to connect/send traffic to, application traffic marking/conntracking, application-specific packet mangling, and so on. Implementation of PID-based matching would not be appropriate as they frequently change, and child tracking would make that even more complex and ugly. Cgroups would be a perfect candidate for accomplishing that as they associate a set of tasks with a set of parameters for one or more subsystems, in our case the netfilter subsystem, which, of course, can be combined with other cgroup subsystems into something more complex if needed. As mentioned, to overcome this constraint, such processes could be placed into one or multiple cgroups where different fine-grained rules can be defined depending on the application scenario, while e.g. everything else that is not part of that could be dropped (or vice versa), thus making life harder for unwanted processes to communicate to the outside world. So, we make use of cgroups here to track jobs and limit their resources in terms of iptables policies; in other words, limiting, tracking, etc what they are allowed to communicate. In our case we're working on outgoing traffic based on which local socket that originated from. Also, one doesn't even need to have an a-prio knowledge of the application internals regarding their particular use of ports or protocols. Matching is *extremly* lightweight as we just test for the sk_classid marker of sockets, originating from net_cls. net_cls and netfilter do not contradict each other; in fact, each construct can live as standalone or they can be used in combination with each other, which is perfectly fine, plus it serves Tejun's requirement to not introduce a new cgroups subsystem. Through this, we result in a very minimal and efficient module, and don't add anything except netfilter code. One possible, minimal usage example (many other iptables options can be applied obviously): 1) Configuring cgroups if not already done, e.g.: mkdir /sys/fs/cgroup/net_cls mount -t cgroup -o net_cls net_cls /sys/fs/cgroup/net_cls mkdir /sys/fs/cgroup/net_cls/0 echo 1 > /sys/fs/cgroup/net_cls/0/net_cls.classid (resp. a real flow handle id for tc) 2) Configuring netfilter (iptables-nftables), e.g.: iptables -A OUTPUT -m cgroup ! --cgroup 1 -j DROP 3) Running applications, e.g.: ping 208.67.222.222 echo 1799 > /sys/fs/cgroup/net_cls/0/tasks 64 bytes from 208.67.222.222: icmp_seq=44 ttl=49 time=11.9 ms [...] ping 208.67.220.220 ping: sendmsg: Operation not permitted [...] echo 1804 > /sys/fs/cgroup/net_cls/0/tasks 64 bytes from 208.67.220.220: icmp_seq=89 ttl=56 time=19.0 ms [...] Of course, real-world deployments would make use of cgroups user space toolsuite, or own custom policy daemons dynamically moving applications from/to various cgroups. [1] http://www.blackhat.com/presentations/bh-europe-06/bh-eu-06-biondi/bh-eu-06-biondi-up.pdf Signed-off-by: Daniel Borkmann Cc: Tejun Heo Cc: cgroups@vger.kernel.org Acked-by: Li Zefan Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 10 +++++++ net/netfilter/Makefile | 1 + net/netfilter/xt_cgroup.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 net/netfilter/xt_cgroup.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6d8e48b376fc..c17902cb5df9 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -858,6 +858,16 @@ config NETFILTER_XT_MATCH_BPF To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_CGROUP + tristate '"control group" match support' + depends on NETFILTER_ADVANCED + depends on CGROUPS + select CGROUP_NET_CLASSID + ---help--- + Socket/process control group matching allows you to match locally + generated packets based on which net_cls control group processes + belong to. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 398cd709aa09..407fc232f625 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -143,6 +143,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CGROUP) += xt_cgroup.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c new file mode 100644 index 000000000000..9a8e77e7f8d4 --- /dev/null +++ b/net/netfilter/xt_cgroup.c @@ -0,0 +1,71 @@ +/* + * Xtables module to match the process control group. + * + * Might be used to implement individual "per-application" firewall + * policies in contrast to global policies based on control groups. + * Matching is based upon processes tagged to net_cls' classid marker. + * + * (C) 2013 Daniel Borkmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Borkmann "); +MODULE_DESCRIPTION("Xtables: process control group matching"); +MODULE_ALIAS("ipt_cgroup"); +MODULE_ALIAS("ip6t_cgroup"); + +static int cgroup_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_cgroup_info *info = par->matchinfo; + + if (info->invert & ~1) + return -EINVAL; + + return info->id ? 0 : -EINVAL; +} + +static bool +cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cgroup_info *info = par->matchinfo; + + if (skb->sk == NULL) + return false; + + return (info->id == skb->sk->sk_classid) ^ info->invert; +} + +static struct xt_match cgroup_mt_reg __read_mostly = { + .name = "cgroup", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check, + .match = cgroup_mt, + .matchsize = sizeof(struct xt_cgroup_info), + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING), +}; + +static int __init cgroup_mt_init(void) +{ + return xt_register_match(&cgroup_mt_reg); +} + +static void __exit cgroup_mt_exit(void) +{ + xt_unregister_match(&cgroup_mt_reg); +} + +module_init(cgroup_mt_init); +module_exit(cgroup_mt_exit); -- cgit v1.2.3