diff options
Diffstat (limited to 'net')
43 files changed, 5119 insertions, 354 deletions
diff --git a/net/Kconfig b/net/Kconfig index e07272d0bb2d..0b005b95206a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -10,7 +10,7 @@ menuconfig NET The reason is that some programs need kernel networking support even when running on a stand-alone machine that isn't connected to any other computer. - + If you are upgrading from an older kernel, you should consider updating your networking tools too because changes in the kernel and the tools often go hand in hand. The tools are @@ -79,6 +79,20 @@ source "net/netlabel/Kconfig" endif # if INET +config ANDROID_PARANOID_NETWORK + bool "Only allow certain groups to create sockets" + default y + help + none + +config NET_ACTIVITY_STATS + bool "Network activity statistics tracking" + default y + help + Network activity statistics are useful for tracking wireless + modem activity on 2G, 3G, 4G wireless networks. Counts number of + transmissions and groups them in specified time buckets. + config NETWORK_SECMARK bool "Security Marking" help @@ -218,7 +232,7 @@ source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" config RPS - boolean + boolean "RPS" depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y @@ -288,7 +302,7 @@ config NET_TCPPROBE Documentation on how to use TCP connection probing can be found at: - + http://www.linuxfoundation.org/collaborate/workgroups/networking/tcpprobe To compile this code as a module, choose M here: the diff --git a/net/Makefile b/net/Makefile index ad432fa4d934..6865dab6af4c 100644 --- a/net/Makefile +++ b/net/Makefile @@ -70,3 +70,4 @@ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ +obj-$(CONFIG_NET_ACTIVITY_STATS) += activity_stats.o diff --git a/net/activity_stats.c b/net/activity_stats.c new file mode 100644 index 000000000000..8a3e93470069 --- /dev/null +++ b/net/activity_stats.c @@ -0,0 +1,115 @@ +/* net/activity_stats.c + * + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Mike Chan (mike@android.com) + */ + +#include <linux/proc_fs.h> +#include <linux/suspend.h> +#include <net/net_namespace.h> + +/* + * Track transmission rates in buckets (power of 2). + * 1,2,4,8...512 seconds. + * + * Buckets represent the count of network transmissions at least + * N seconds apart, where N is 1 << bucket index. + */ +#define BUCKET_MAX 10 + +/* Track network activity frequency */ +static unsigned long activity_stats[BUCKET_MAX]; +static ktime_t last_transmit; +static ktime_t suspend_time; +static DEFINE_SPINLOCK(activity_lock); + +void activity_stats_update(void) +{ + int i; + unsigned long flags; + ktime_t now; + s64 delta; + + spin_lock_irqsave(&activity_lock, flags); + now = ktime_get(); + delta = ktime_to_ns(ktime_sub(now, last_transmit)); + + for (i = BUCKET_MAX - 1; i >= 0; i--) { + /* + * Check if the time delta between network activity is within the + * minimum bucket range. + */ + if (delta < (1000000000ULL << i)) + continue; + + activity_stats[i]++; + last_transmit = now; + break; + } + spin_unlock_irqrestore(&activity_lock, flags); +} + +static int activity_stats_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int i; + int len; + char *p = page; + + /* Only print if offset is 0, or we have enough buffer space */ + if (off || count < (30 * BUCKET_MAX + 22)) + return -ENOMEM; + + len = snprintf(p, count, "Min Bucket(sec) Count\n"); + count -= len; + p += len; + + for (i = 0; i < BUCKET_MAX; i++) { + len = snprintf(p, count, "%15d %lu\n", 1 << i, activity_stats[i]); + count -= len; + p += len; + } + *eof = 1; + + return p - page; +} + +static int activity_stats_notifier(struct notifier_block *nb, + unsigned long event, void *dummy) +{ + switch (event) { + case PM_SUSPEND_PREPARE: + suspend_time = ktime_get_real(); + break; + + case PM_POST_SUSPEND: + suspend_time = ktime_sub(ktime_get_real(), suspend_time); + last_transmit = ktime_sub(last_transmit, suspend_time); + } + + return 0; +} + +static struct notifier_block activity_stats_notifier_block = { + .notifier_call = activity_stats_notifier, +}; + +static int __init activity_stats_init(void) +{ + create_proc_read_entry("activity", S_IRUGO, + init_net.proc_net_stat, activity_stats_read_proc, NULL); + return register_pm_notifier(&activity_stats_notifier_block); +} + +subsys_initcall(activity_stats_init); + diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 6fb68a9743af..b9af14e8a9ee 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -40,6 +40,15 @@ #include <net/bluetooth/bluetooth.h> +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include <linux/android_aid.h> +#endif + +#ifndef CONFIG_BT_SOCK_DEBUG +#undef BT_DBG +#define BT_DBG(D...) +#endif + #define VERSION "2.16" /* Bluetooth sockets */ @@ -122,11 +131,40 @@ int bt_sock_unregister(int proto) } EXPORT_SYMBOL(bt_sock_unregister); +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +static inline int current_has_bt_admin(void) +{ + return (!current_euid() || in_egroup_p(AID_NET_BT_ADMIN)); +} + +static inline int current_has_bt(void) +{ + return (current_has_bt_admin() || in_egroup_p(AID_NET_BT)); +} +# else +static inline int current_has_bt_admin(void) +{ + return 1; +} + +static inline int current_has_bt(void) +{ + return 1; +} +#endif + static int bt_sock_create(struct net *net, struct socket *sock, int proto, int kern) { int err; + if (proto == BTPROTO_RFCOMM || proto == BTPROTO_SCO || + proto == BTPROTO_L2CAP) { + if (!current_has_bt()) + return -EPERM; + } else if (!current_has_bt_admin()) + return -EPERM; + if (net != &init_net) return -EAFNOSUPPORT; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 5238b6b3ea6a..a5f93a9a0133 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -360,7 +360,8 @@ static void hci_conn_auto_accept(unsigned long arg) &conn->dst); } -struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) +struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, + __u16 pkt_type, bdaddr_t *dst) { struct hci_conn *conn; @@ -388,14 +389,22 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK; break; case SCO_LINK: - if (lmp_esco_capable(hdev)) - conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | - (hdev->esco_type & EDR_ESCO_MASK); - else - conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK; - break; + if (!pkt_type) + pkt_type = SCO_ESCO_MASK; case ESCO_LINK: - conn->pkt_type = hdev->esco_type & ~EDR_ESCO_MASK; + if (!pkt_type) + pkt_type = ALL_ESCO_MASK; + if (lmp_esco_capable(hdev)) { + /* HCI Setup Synchronous Connection Command uses + reverse logic on the EDR_ESCO_MASK bits */ + conn->pkt_type = (pkt_type ^ EDR_ESCO_MASK) & + hdev->esco_type; + } else { + /* Legacy HCI Add Sco Connection Command uses a + shifted bitmask */ + conn->pkt_type = (pkt_type << 5) & hdev->pkt_type & + SCO_PTYPE_MASK; + } break; } @@ -513,7 +522,9 @@ EXPORT_SYMBOL(hci_get_route); /* Create SCO, ACL or LE connection. * Device _must_ be locked */ -struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type) +struct hci_conn *hci_connect(struct hci_dev *hdev, int type, + __u16 pkt_type, bdaddr_t *dst, + __u8 sec_level, __u8 auth_type) { struct hci_conn *acl; struct hci_conn *sco; @@ -532,7 +543,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 if (!entry) return ERR_PTR(-EHOSTUNREACH); - le = hci_conn_add(hdev, LE_LINK, dst); + le = hci_conn_add(hdev, LE_LINK, 0, dst); if (!le) return ERR_PTR(-ENOMEM); @@ -547,7 +558,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { - acl = hci_conn_add(hdev, ACL_LINK, dst); + acl = hci_conn_add(hdev, ACL_LINK, 0, dst); if (!acl) return ERR_PTR(-ENOMEM); } @@ -566,7 +577,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sco = hci_conn_hash_lookup_ba(hdev, type, dst); if (!sco) { - sco = hci_conn_add(hdev, type, dst); + sco = hci_conn_add(hdev, type, pkt_type, dst); if (!sco) { hci_conn_put(acl); return ERR_PTR(-ENOMEM); @@ -870,6 +881,15 @@ int hci_get_conn_list(void __user *arg) (ci + n)->out = c->out; (ci + n)->state = c->state; (ci + n)->link_mode = c->link_mode; + if (c->type == SCO_LINK) { + (ci + n)->mtu = hdev->sco_mtu; + (ci + n)->cnt = hdev->sco_cnt; + (ci + n)->pkts = hdev->sco_pkts; + } else { + (ci + n)->mtu = hdev->acl_mtu; + (ci + n)->cnt = hdev->acl_cnt; + (ci + n)->pkts = hdev->acl_pkts; + } if (++n >= req.conn_num) break; } @@ -906,6 +926,15 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg) ci.out = conn->out; ci.state = conn->state; ci.link_mode = conn->link_mode; + if (req.type == SCO_LINK) { + ci.mtu = hdev->sco_mtu; + ci.cnt = hdev->sco_cnt; + ci.pkts = hdev->sco_pkts; + } else { + ci.mtu = hdev->acl_mtu; + ci.cnt = hdev->acl_cnt; + ci.pkts = hdev->acl_pkts; + } } hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d6dc44cd15b0..70a398ed4011 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2155,6 +2155,9 @@ static int hci_send_frame(struct sk_buff *skb) /* Get rid of skb owner, prior to sending to the driver. */ skb_orphan(skb); + /* Notify the registered devices about a new send */ + hci_notify(hdev, HCI_DEV_WRITE); + return hdev->send(skb); } @@ -2167,6 +2170,11 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) BT_DBG("%s opcode 0x%x plen %d", hdev->name, opcode, plen); + if (!hdev->workqueue) { + WARN_ON("hci_send_cmd: workqueue not initialised"); + return -ENOMEM; + } + skb = bt_skb_alloc(len, GFP_ATOMIC); if (!skb) { BT_ERR("%s no memory for command", hdev->name); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1266f78fa8e3..626318c122f4 100644..100755 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1213,7 +1213,7 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) } } else { if (!conn) { - conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr); + conn = hci_conn_add(hdev, ACL_LINK, 0, &cp->bdaddr); if (conn) { conn->out = true; conn->link_mode |= HCI_LM_MASTER; @@ -1630,7 +1630,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status) } } else { if (!conn) { - conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr); + conn = hci_conn_add(hdev, LE_LINK, 0, &cp->peer_addr); if (conn) { conn->dst_type = cp->peer_addr_type; conn->out = true; @@ -1800,6 +1800,15 @@ unlock: hci_conn_check_pending(hdev); } +static inline bool is_sco_active(struct hci_dev *hdev) +{ + if (hci_conn_hash_lookup_state(hdev, SCO_LINK, BT_CONNECTED) || + (hci_conn_hash_lookup_state(hdev, ESCO_LINK, + BT_CONNECTED))) + return true; + return false; +} + static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_conn_request *ev = (void *) skb->data; @@ -1824,7 +1833,8 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { - conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr); + /* pkt_type not yet used for incoming connections */ + conn = hci_conn_add(hdev, ev->link_type, 0, &ev->bdaddr); if (!conn) { BT_ERR("No memory for new connection"); hci_dev_unlock(hdev); @@ -1842,7 +1852,8 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk bacpy(&cp.bdaddr, &ev->bdaddr); - if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) + if (lmp_rswitch_capable(hdev) && ((mask & HCI_LM_MASTER) + || is_sco_active(hdev))) cp.role = 0x00; /* Become master */ else cp.role = 0x01; /* Remain slave */ @@ -2929,6 +2940,7 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu hci_conn_add_sysfs(conn); break; + case 0x10: /* Connection Accept Timeout */ case 0x11: /* Unsupported Feature or Parameter Value */ case 0x1c: /* SCO interval rejected */ case 0x1a: /* Unsupported Remote Feature */ @@ -3274,7 +3286,7 @@ static inline void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr); if (!conn) { - conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr); + conn = hci_conn_add(hdev, LE_LINK, 0, &ev->bdaddr); if (!conn) { BT_ERR("No memory for new connection"); hci_dev_unlock(hdev); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 6f9c25b633a6..0939c7295a64 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1218,10 +1218,10 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *d auth_type = l2cap_get_auth_type(chan); if (chan->dcid == L2CAP_CID_LE_DATA) - hcon = hci_connect(hdev, LE_LINK, dst, + hcon = hci_connect(hdev, LE_LINK, 0, dst, chan->sec_level, auth_type); else - hcon = hci_connect(hdev, ACL_LINK, dst, + hcon = hci_connect(hdev, ACL_LINK, 0, dst, chan->sec_level, auth_type); if (IS_ERR(hcon)) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4bb03b111122..c05c3a69c372 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1891,11 +1891,11 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, auth_type = HCI_AT_DEDICATED_BONDING_MITM; if (cp->addr.type == MGMT_ADDR_BREDR) - conn = hci_connect(hdev, ACL_LINK, &cp->addr.bdaddr, sec_level, - auth_type); + conn = hci_connect(hdev, ACL_LINK, 0, &cp->addr.bdaddr, + sec_level, auth_type); else - conn = hci_connect(hdev, LE_LINK, &cp->addr.bdaddr, sec_level, - auth_type); + conn = hci_connect(hdev, LE_LINK, 0, &cp->addr.bdaddr, + sec_level, auth_type); memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 8a602388f1e7..9652707b1a05 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -462,7 +462,6 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) switch (d->state) { case BT_CONNECT: - case BT_CONFIG: if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { set_bit(RFCOMM_AUTH_REJECT, &d->flags); rfcomm_schedule(); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index f6ab12907963..458cec074114 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -177,6 +177,7 @@ static int sco_connect(struct sock *sk) { bdaddr_t *src = &bt_sk(sk)->src; bdaddr_t *dst = &bt_sk(sk)->dst; + __u16 pkt_type = sco_pi(sk)->pkt_type; struct sco_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; @@ -192,10 +193,12 @@ static int sco_connect(struct sock *sk) if (lmp_esco_capable(hdev) && !disable_esco) type = ESCO_LINK; - else + else { type = SCO_LINK; + pkt_type &= SCO_ESCO_MASK; + } - hcon = hci_connect(hdev, type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING); + hcon = hci_connect(hdev, type, pkt_type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING); if (IS_ERR(hcon)) { err = PTR_ERR(hcon); goto done; @@ -462,18 +465,22 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol, return 0; } -static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) { - struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; + struct sockaddr_sco sa; struct sock *sk = sock->sk; - bdaddr_t *src = &sa->sco_bdaddr; - int err = 0; + bdaddr_t *src = &sa.sco_bdaddr; + int len, err = 0; - BT_DBG("sk %p %s", sk, batostr(&sa->sco_bdaddr)); + BT_DBG("sk %p %s", sk, batostr(&sa.sco_bdaddr)); if (!addr || addr->sa_family != AF_BLUETOOTH) return -EINVAL; + memset(&sa, 0, sizeof(sa)); + len = min_t(unsigned int, sizeof(sa), alen); + memcpy(&sa, addr, len); + lock_sock(sk); if (sk->sk_state != BT_OPEN) { @@ -487,7 +494,8 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le err = -EADDRINUSE; } else { /* Save source address */ - bacpy(&bt_sk(sk)->src, &sa->sco_bdaddr); + bacpy(&bt_sk(sk)->src, &sa.sco_bdaddr); + sco_pi(sk)->pkt_type = sa.sco_pkt_type; sk->sk_state = BT_BOUND; } @@ -500,27 +508,34 @@ done: static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { - struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; struct sock *sk = sock->sk; - int err = 0; - + struct sockaddr_sco sa; + int len, err = 0; BT_DBG("sk %p", sk); - if (alen < sizeof(struct sockaddr_sco) || - addr->sa_family != AF_BLUETOOTH) + if (!addr || addr->sa_family != AF_BLUETOOTH) return -EINVAL; - if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) - return -EBADFD; - - if (sk->sk_type != SOCK_SEQPACKET) - return -EINVAL; + memset(&sa, 0, sizeof(sa)); + len = min_t(unsigned int, sizeof(sa), alen); + memcpy(&sa, addr, len); lock_sock(sk); + if (sk->sk_type != SOCK_SEQPACKET) { + err = -EINVAL; + goto done; + } + + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { + err = -EBADFD; + goto done; + } + /* Set destination address and psm */ - bacpy(&bt_sk(sk)->dst, &sa->sco_bdaddr); + bacpy(&bt_sk(sk)->dst, &sa.sco_bdaddr); + sco_pi(sk)->pkt_type = sa.sco_pkt_type; err = sco_connect(sk); if (err) @@ -627,6 +642,7 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len bacpy(&sa->sco_bdaddr, &bt_sk(sk)->dst); else bacpy(&sa->sco_bdaddr, &bt_sk(sk)->src); + sa->sco_pkt_type = sco_pi(sk)->pkt_type; return 0; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ba829de84423..9ec0822f440b 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -38,16 +38,17 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) } #endif - u64_stats_update_begin(&brstats->syncp); - brstats->tx_packets++; - brstats->tx_bytes += skb->len; - u64_stats_update_end(&brstats->syncp); - BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); + u64_stats_update_begin(&brstats->syncp); + brstats->tx_packets++; + /* Exclude ETH_HLEN from byte stats for consistency with Rx chain */ + brstats->tx_bytes += skb->len; + u64_stats_update_end(&brstats->syncp); + rcu_read_lock(); if (is_broadcast_ether_addr(dest)) br_flood_deliver(br, skb); diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ff75d3bbcd6a..c6f177cf7130 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -14,6 +14,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_fragment.o ping.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o +obj-$(CONFIG_SYSFS) += sysfs_net_ipv4.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 10e3751466b5..0b711659ac74 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -119,6 +119,19 @@ #include <linux/mroute.h> #endif +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include <linux/android_aid.h> + +static inline int current_has_network(void) +{ + return in_egroup_p(AID_INET) || capable(CAP_NET_RAW); +} +#else +static inline int current_has_network(void) +{ + return 1; +} +#endif /* The inetsw table contains everything that inet_create needs to * build a new socket. @@ -259,6 +272,7 @@ static inline int inet_netns_ok(struct net *net, int protocol) return ipprot->netns_ok; } + /* * Create an inet socket. */ @@ -275,6 +289,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; + if (!current_has_network()) + return -EACCES; + if (unlikely(!inet_ehash_secret)) if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) build_ehash_secret(); @@ -881,6 +898,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFPFLAGS: case SIOCGIFPFLAGS: case SIOCSIFFLAGS: + case SIOCKILLADDR: err = devinet_ioctl(net, cmd, (void __user *)arg); break; default: diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6e447ff94dfa..8a9aab37f0a7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -58,6 +58,7 @@ #include <net/arp.h> #include <net/ip.h> +#include <net/tcp.h> #include <net/route.h> #include <net/ip_fib.h> #include <net/rtnetlink.h> @@ -734,6 +735,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSIFBRDADDR: /* Set the broadcast address */ case SIOCSIFDSTADDR: /* Set the destination address */ case SIOCSIFNETMASK: /* Set the netmask for the interface */ + case SIOCKILLADDR: /* Nuke all sockets on this address */ ret = -EACCES; if (!capable(CAP_NET_ADMIN)) goto out; @@ -785,7 +787,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) } ret = -EADDRNOTAVAIL; - if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) + if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS + && cmd != SIOCKILLADDR) goto done; switch (cmd) { @@ -911,6 +914,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) inet_insert_ifa(ifa); } break; + case SIOCKILLADDR: /* Nuke all connections on this address */ + ret = tcp_nuke_addr(net, (struct sockaddr *) sin); + break; } done: rtnl_unlock(); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index fcc543cd987a..55ba5fee92cf 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -123,6 +123,18 @@ config IP_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_REJECT_SKERR + bool "Force socket error when rejecting with icmp*" + depends on IP_NF_TARGET_REJECT + default n + help + This option enables turning a "--reject-with icmp*" into a matching + socket error also. + The REJECT target normally allows sending an ICMP message. But it + leaves the local socket unaware of any ingress rejects. + + If unsure, say N. + config IP_NF_TARGET_ULOG tristate "ULOG target support" default m if NETFILTER_ADVANCED=n @@ -296,7 +308,7 @@ config IP_NF_TARGET_CLUSTERIP The CLUSTERIP target allows you to build load-balancing clusters of network servers without having a dedicated load-balancing router/server/switch. - + To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_ECN @@ -305,7 +317,7 @@ config IP_NF_TARGET_ECN depends on NETFILTER_ADVANCED ---help--- This option adds a `ECN' target, which can be used in the iptables mangle - table. + table. You can use this target to remove the ECN bits from the IPv4 header of an IP packet. This is particularly useful, if you need to work around @@ -330,7 +342,7 @@ config IP_NF_RAW This option adds a `raw' table to iptables. This table is the very first in the netfilter framework and hooks in at the PREROUTING and OUTPUT chains. - + If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. @@ -342,7 +354,7 @@ config IP_NF_SECURITY help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. - + If unsure, say N. endif # IP_NF_IPTABLES diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 51f13f8ec724..9dd754c7f2b6 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -128,6 +128,14 @@ static void send_reset(struct sk_buff *oldskb, int hook) static inline void send_unreach(struct sk_buff *skb_in, int code) { icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); +#ifdef CONFIG_IP_NF_TARGET_REJECT_SKERR + if (skb_in->sk) { + skb_in->sk->sk_err = icmp_err_convert[code].errno; + skb_in->sk->sk_error_report(skb_in->sk); + pr_debug("ipt_REJECT: sk_err=%d for skb=%p sk=%p\n", + skb_in->sk->sk_err, skb_in, skb_in->sk); + } +#endif } static unsigned int diff --git a/net/ipv4/sysfs_net_ipv4.c b/net/ipv4/sysfs_net_ipv4.c new file mode 100644 index 000000000000..0cbbf10026a6 --- /dev/null +++ b/net/ipv4/sysfs_net_ipv4.c @@ -0,0 +1,88 @@ +/* + * net/ipv4/sysfs_net_ipv4.c + * + * sysfs-based networking knobs (so we can, unlike with sysctl, control perms) + * + * Copyright (C) 2008 Google, Inc. + * + * Robert Love <rlove@google.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kobject.h> +#include <linux/string.h> +#include <linux/sysfs.h> +#include <linux/init.h> +#include <net/tcp.h> + +#define CREATE_IPV4_FILE(_name, _var) \ +static ssize_t _name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%d\n", _var); \ +} \ +static ssize_t _name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + int val, ret; \ + ret = sscanf(buf, "%d", &val); \ + if (ret != 1) \ + return -EINVAL; \ + if (val < 0) \ + return -EINVAL; \ + _var = val; \ + return count; \ +} \ +static struct kobj_attribute _name##_attr = \ + __ATTR(_name, 0644, _name##_show, _name##_store) + +CREATE_IPV4_FILE(tcp_wmem_min, sysctl_tcp_wmem[0]); +CREATE_IPV4_FILE(tcp_wmem_def, sysctl_tcp_wmem[1]); +CREATE_IPV4_FILE(tcp_wmem_max, sysctl_tcp_wmem[2]); + +CREATE_IPV4_FILE(tcp_rmem_min, sysctl_tcp_rmem[0]); +CREATE_IPV4_FILE(tcp_rmem_def, sysctl_tcp_rmem[1]); +CREATE_IPV4_FILE(tcp_rmem_max, sysctl_tcp_rmem[2]); + +static struct attribute *ipv4_attrs[] = { + &tcp_wmem_min_attr.attr, + &tcp_wmem_def_attr.attr, + &tcp_wmem_max_attr.attr, + &tcp_rmem_min_attr.attr, + &tcp_rmem_def_attr.attr, + &tcp_rmem_max_attr.attr, + NULL +}; + +static struct attribute_group ipv4_attr_group = { + .attrs = ipv4_attrs, +}; + +static __init int sysfs_ipv4_init(void) +{ + struct kobject *ipv4_kobject; + int ret; + + ipv4_kobject = kobject_create_and_add("ipv4", kernel_kobj); + if (!ipv4_kobject) + return -ENOMEM; + + ret = sysfs_create_group(ipv4_kobject, &ipv4_attr_group); + if (ret) { + kobject_put(ipv4_kobject); + return ret; + } + + return 0; +} + +subsys_initcall(sysfs_ipv4_init); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6589e11d57b6..cf54e10b2587 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -268,11 +268,15 @@ #include <linux/crypto.h> #include <linux/time.h> #include <linux/slab.h> +#include <linux/uid_stat.h> #include <net/icmp.h> #include <net/tcp.h> #include <net/xfrm.h> #include <net/ip.h> +#include <net/ip6_route.h> +#include <net/ipv6.h> +#include <net/transp_v6.h> #include <net/netdma.h> #include <net/sock.h> @@ -1115,6 +1119,9 @@ out: if (copied) tcp_push(sk, flags, mss_now, tp->nonagle); release_sock(sk); + + if (copied > 0) + uid_stat_tcp_snd(current_uid(), copied); return copied; do_fault: @@ -1389,8 +1396,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ - if (copied > 0) + if (copied > 0) { tcp_cleanup_rbuf(sk, copied); + uid_stat_tcp_rcv(current_uid(), copied); + } + return copied; } EXPORT_SYMBOL(tcp_read_sock); @@ -1773,6 +1783,9 @@ skip_copy: tcp_cleanup_rbuf(sk, copied); release_sock(sk); + + if (copied > 0) + uid_stat_tcp_rcv(current_uid(), copied); return copied; out: @@ -1781,6 +1794,8 @@ out: recv_urg: err = tcp_recv_urg(sk, msg, len, flags); + if (err > 0) + uid_stat_tcp_rcv(current_uid(), err); goto out; } EXPORT_SYMBOL(tcp_recvmsg); @@ -3327,3 +3342,107 @@ void __init tcp_init(void) tcp_secret_retiring = &tcp_secret_two; tcp_secret_secondary = &tcp_secret_two; } + +static int tcp_is_local(struct net *net, __be32 addr) { + struct rtable *rt; + struct flowi4 fl4 = { .daddr = addr }; + rt = ip_route_output_key(net, &fl4); + if (IS_ERR_OR_NULL(rt)) + return 0; + return rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int tcp_is_local6(struct net *net, struct in6_addr *addr) { + struct rt6_info *rt6 = rt6_lookup(net, addr, addr, 0, 0); + return rt6 && rt6->dst.dev && (rt6->dst.dev->flags & IFF_LOOPBACK); +} +#endif + +/* + * tcp_nuke_addr - destroy all sockets on the given local address + * if local address is the unspecified address (0.0.0.0 or ::), destroy all + * sockets with local addresses that are not configured. + */ +int tcp_nuke_addr(struct net *net, struct sockaddr *addr) +{ + int family = addr->sa_family; + unsigned int bucket; + + struct in_addr *in; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct in6_addr *in6; +#endif + if (family == AF_INET) { + in = &((struct sockaddr_in *)addr)->sin_addr; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (family == AF_INET6) { + in6 = &((struct sockaddr_in6 *)addr)->sin6_addr; +#endif + } else { + return -EAFNOSUPPORT; + } + + for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) { + struct hlist_nulls_node *node; + struct sock *sk; + spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket); + +restart: + spin_lock_bh(lock); + sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) { + struct inet_sock *inet = inet_sk(sk); + + if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT) + continue; + if (sock_flag(sk, SOCK_DEAD)) + continue; + + if (family == AF_INET) { + __be32 s4 = inet->inet_rcv_saddr; + if (s4 == LOOPBACK4_IPV6) + continue; + + if (in->s_addr != s4 && + !(in->s_addr == INADDR_ANY && + !tcp_is_local(net, s4))) + continue; + } + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + if (family == AF_INET6) { + struct in6_addr *s6; + if (!inet->pinet6) + continue; + + s6 = &inet->pinet6->rcv_saddr; + if (ipv6_addr_type(s6) == IPV6_ADDR_MAPPED) + continue; + + if (!ipv6_addr_equal(in6, s6) && + !(ipv6_addr_equal(in6, &in6addr_any) && + !tcp_is_local6(net, s6))) + continue; + } +#endif + + sock_hold(sk); + spin_unlock_bh(lock); + + local_bh_disable(); + bh_lock_sock(sk); + sk->sk_err = ETIMEDOUT; + sk->sk_error_report(sk); + + tcp_done(sk); + bh_unlock_sock(sk); + local_bh_enable(); + sock_put(sk); + + goto restart; + } + spin_unlock_bh(lock); + } + + return 0; +} diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8ed1b930e75f..29625e9a51a6 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -62,6 +62,20 @@ #include <asm/uaccess.h> #include <linux/mroute6.h> +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include <linux/android_aid.h> + +static inline int current_has_network(void) +{ + return in_egroup_p(AID_INET) || capable(CAP_NET_RAW); +} +#else +static inline int current_has_network(void) +{ + return 1; +} +#endif + MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); @@ -108,6 +122,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; + if (!current_has_network()) + return -EACCES; + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM && !inet_ehash_secret) @@ -477,6 +494,21 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, EXPORT_SYMBOL(inet6_getname); +int inet6_killaddr_ioctl(struct net *net, void __user *arg) { + struct in6_ifreq ireq; + struct sockaddr_in6 sin6; + + if (!capable(CAP_NET_ADMIN)) + return -EACCES; + + if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) + return -EFAULT; + + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = ireq.ifr6_addr; + return tcp_nuke_addr(net, (struct sockaddr *) &sin6); +} + int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; @@ -501,6 +533,8 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return addrconf_del_ifaddr(net, (void __user *) arg); case SIOCSIFDSTADDR: return addrconf_set_dstaddr(net, (void __user *) arg); + case SIOCKILLADDR: + return inet6_killaddr_ioctl(net, (void __user *) arg); default: if (!sk->sk_prot->ioctl) return -ENOIOCTLCMD; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index d33cddd16fbb..acd7c9e1da43 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -175,6 +175,18 @@ config IP6_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_REJECT_SKERR + bool "Force socket error when rejecting with icmp*" + depends on IP6_NF_TARGET_REJECT + default n + help + This option enables turning a "--reject-with icmp*" into a matching + socket error also. + The REJECT target normally allows sending an ICMP message. But it + leaves the local socket unaware of any ingress rejects. + + If unsure, say N. + config IP6_NF_MANGLE tristate "Packet mangling" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 9d4e15559319..e641f8fa7489 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2279,16 +2279,15 @@ static void __exit ip6_tables_fini(void) * "No next header". * * If target header is found, its offset is set in *offset and return protocol - * number. Otherwise, return -1. + * number. Otherwise, return -ENOENT or -EBADMSG. * * If the first fragment doesn't contain the final protocol header or * NEXTHDR_NONE it is considered invalid. * * Note that non-1st fragment is special case that "the protocol number * of last header" is "next header" field in Fragment header. In this case, - * *offset is meaningless and fragment offset is stored in *fragoff if fragoff - * isn't NULL. - * + * *offset is meaningless. If fragoff is not NULL, the fragment offset is + * stored in *fragoff; if it is NULL, return -EINVAL. */ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, unsigned short *fragoff) @@ -2329,9 +2328,12 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, if (target < 0 && ((!ipv6_ext_hdr(hp->nexthdr)) || hp->nexthdr == NEXTHDR_NONE)) { - if (fragoff) + if (fragoff) { *fragoff = _frag_off; - return hp->nexthdr; + return hp->nexthdr; + } else { + return -EINVAL; + } } return -ENOENT; } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index aad2fa41cf46..09155e344587 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -178,6 +178,15 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code, skb_in->dev = net->loopback_dev; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); +#ifdef CONFIG_IP6_NF_TARGET_REJECT_SKERR + if (skb_in->sk) { + icmpv6_err_convert(ICMPV6_DEST_UNREACH, code, + &skb_in->sk->sk_err); + skb_in->sk->sk_error_report(skb_in->sk); + pr_debug("ip6t_REJECT: sk_err=%d for skb=%p sk=%p\n", + skb_in->sk->sk_err, skb_in, skb_in->sk); + } +#endif } static unsigned int diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 677d65929780..52c1edf36b75 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -496,13 +496,27 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, return ret; } +static void ieee80211_config_ap_ssid(struct ieee80211_sub_if_data *sdata, + struct beacon_parameters *params) +{ + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; + + bss_conf->ssid_len = params->ssid_len; + + if (params->ssid_len) + memcpy(bss_conf->ssid, params->ssid, params->ssid_len); + + bss_conf->hidden_ssid = + (params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE); +} + static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, - const u8 *resp, size_t resp_len) + u8 *resp, size_t resp_len) { struct sk_buff *new, *old; if (!resp || !resp_len) - return 1; + return -EINVAL; old = rtnl_dereference(sdata->u.ap.probe_resp); @@ -513,28 +527,50 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, memcpy(skb_put(new, resp_len), resp, resp_len); rcu_assign_pointer(sdata->u.ap.probe_resp, new); - if (old) { - /* TODO: use call_rcu() */ - synchronize_rcu(); + synchronize_rcu(); + + if (old) dev_kfree_skb(old); - } return 0; } -static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, - struct cfg80211_beacon_data *params) +/* + * This handles both adding a beacon and setting new beacon info + */ +static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, + struct beacon_parameters *params) { struct beacon_data *new, *old; int new_head_len, new_tail_len; - int size, err; - u32 changed = BSS_CHANGED_BEACON; + int size; + int err = -EINVAL; + u32 changed = 0; old = rtnl_dereference(sdata->u.ap.beacon); + /* head must not be zero-length */ + if (params->head && !params->head_len) + return -EINVAL; + + /* + * This is a kludge. beacon interval should really be part + * of the beacon information. + */ + if (params->interval && + (sdata->vif.bss_conf.beacon_int != params->interval)) { + sdata->vif.bss_conf.beacon_int = params->interval; + ieee80211_bss_info_change_notify(sdata, + BSS_CHANGED_BEACON_INT); + } + /* Need to have a beacon head if we don't have one yet */ if (!params->head && !old) - return -EINVAL; + return err; + + /* sorry, no way to start beaconing without dtim period */ + if (!params->dtim_period && !old) + return err; /* new or old head? */ if (params->head) @@ -557,6 +593,12 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, /* start filling the new info now */ + /* new or old dtim period? */ + if (params->dtim_period) + new->dtim_period = params->dtim_period; + else + new->dtim_period = old->dtim_period; + /* * pointers go into the block we allocated, * memory is | beacon_data | head | tail | @@ -579,37 +621,46 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, if (old) memcpy(new->tail, old->tail, new_tail_len); + sdata->vif.bss_conf.dtim_period = new->dtim_period; + + rcu_assign_pointer(sdata->u.ap.beacon, new); + + synchronize_rcu(); + + kfree(old); + err = ieee80211_set_probe_resp(sdata, params->probe_resp, params->probe_resp_len); - if (err < 0) - return err; - if (err == 0) + if (!err) changed |= BSS_CHANGED_AP_PROBE_RESP; - rcu_assign_pointer(sdata->u.ap.beacon, new); - - if (old) - kfree_rcu(old, rcu_head); + ieee80211_config_ap_ssid(sdata, params); + changed |= BSS_CHANGED_BEACON_ENABLED | + BSS_CHANGED_BEACON | + BSS_CHANGED_SSID; - return changed; + ieee80211_bss_info_change_notify(sdata, changed); + return 0; } -static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_ap_settings *params) +static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, + struct beacon_parameters *params) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata; struct beacon_data *old; struct ieee80211_sub_if_data *vlan; - u32 changed = BSS_CHANGED_BEACON_INT | - BSS_CHANGED_BEACON_ENABLED | - BSS_CHANGED_BEACON | - BSS_CHANGED_SSID; - int err; + int ret; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); old = rtnl_dereference(sdata->u.ap.beacon); if (old) return -EALREADY; + ret = ieee80211_config_beacon(sdata, params); + if (ret) + return ret; + /* * Apply control port protocol, this allows us to * not encrypt dynamic WEP control frames. @@ -623,32 +674,14 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, params->crypto.control_port_no_encrypt; } - sdata->vif.bss_conf.beacon_int = params->beacon_interval; - sdata->vif.bss_conf.dtim_period = params->dtim_period; - - sdata->vif.bss_conf.ssid_len = params->ssid_len; - if (params->ssid_len) - memcpy(sdata->vif.bss_conf.ssid, params->ssid, - params->ssid_len); - sdata->vif.bss_conf.hidden_ssid = - (params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE); - - err = ieee80211_assign_beacon(sdata, ¶ms->beacon); - if (err < 0) - return err; - changed |= err; - - ieee80211_bss_info_change_notify(sdata, changed); - return 0; } -static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_beacon_data *params) +static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev, + struct beacon_parameters *params) { struct ieee80211_sub_if_data *sdata; struct beacon_data *old; - int err; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -656,14 +689,10 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, if (!old) return -ENOENT; - err = ieee80211_assign_beacon(sdata, params); - if (err < 0) - return err; - ieee80211_bss_info_change_notify(sdata, err); - return 0; + return ieee80211_config_beacon(sdata, params); } -static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) +static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) { struct ieee80211_sub_if_data *sdata; struct beacon_data *old; @@ -675,11 +704,10 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) return -ENOENT; RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); - - kfree_rcu(old, rcu_head); + synchronize_rcu(); + kfree(old); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); - return 0; } @@ -2682,9 +2710,9 @@ struct cfg80211_ops mac80211_config_ops = { .get_key = ieee80211_get_key, .set_default_key = ieee80211_config_default_key, .set_default_mgmt_key = ieee80211_config_default_mgmt_key, - .start_ap = ieee80211_start_ap, - .change_beacon = ieee80211_change_beacon, - .stop_ap = ieee80211_stop_ap, + .add_beacon = ieee80211_add_beacon, + .set_beacon = ieee80211_set_beacon, + .del_beacon = ieee80211_del_beacon, .add_station = ieee80211_add_station, .del_station = ieee80211_del_station, .change_station = ieee80211_change_station, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index db8fae51714c..5aec7bb2d9ff 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -266,7 +266,7 @@ struct ieee80211_rx_data { struct beacon_data { u8 *head, *tail; int head_len, tail_len; - struct rcu_head rcu_head; + int dtim_period; }; struct ieee80211_if_ap { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e76facc69e95..f9104c37dba0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2218,8 +2218,7 @@ void ieee80211_tx_pending(unsigned long data) /* functions for drivers to get certain frames */ -static void ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_ap *bss, +static void ieee80211_beacon_add_tim(struct ieee80211_if_ap *bss, struct sk_buff *skb, struct beacon_data *beacon) { @@ -2236,7 +2235,7 @@ static void ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, IEEE80211_MAX_AID+1); if (bss->dtim_count == 0) - bss->dtim_count = sdata->vif.bss_conf.dtim_period - 1; + bss->dtim_count = beacon->dtim_period - 1; else bss->dtim_count--; @@ -2244,7 +2243,7 @@ static void ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, *pos++ = WLAN_EID_TIM; *pos++ = 4; *pos++ = bss->dtim_count; - *pos++ = sdata->vif.bss_conf.dtim_period; + *pos++ = beacon->dtim_period; if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) aid0 = 1; @@ -2337,14 +2336,12 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, * of the tim bitmap in mac80211 and the driver. */ if (local->tim_in_locked_section) { - ieee80211_beacon_add_tim(sdata, ap, skb, - beacon); + ieee80211_beacon_add_tim(ap, skb, beacon); } else { unsigned long flags; spin_lock_irqsave(&local->tim_lock, flags); - ieee80211_beacon_add_tim(sdata, ap, skb, - beacon); + ieee80211_beacon_add_tim(ap, skb, beacon); spin_unlock_irqrestore(&local->tim_lock, flags); } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0c6f67e8f2e5..ce2976c0ce70 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -967,6 +967,8 @@ config NETFILTER_XT_MATCH_OWNER based on who created the socket: the user or group. It is also possible to check whether a socket actually exists. + Conflicts with '"quota, tag, uid" match' + config NETFILTER_XT_MATCH_POLICY tristate 'IPsec "policy" match support' depends on XFRM @@ -1000,6 +1002,22 @@ config NETFILTER_XT_MATCH_PKTTYPE To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_QTAGUID + bool '"quota, tag, owner" match and stats support' + depends on NETFILTER_XT_MATCH_SOCKET + depends on NETFILTER_XT_MATCH_OWNER=n + help + This option replaces the `owner' match. In addition to matching + on uid, it keeps stats based on a tag assigned to a socket. + The full tag is comprised of a UID and an accounting tag. + The tags are assignable to sockets from user space (e.g. a download + manager can assign the socket to another UID for accounting). + Stats and control are done via /proc/net/xt_qtaguid/. + It replaces owner as it takes the same arguments, but should + really be recognized by the iptables tool. + + If unsure, say `N'. + config NETFILTER_XT_MATCH_QUOTA tristate '"quota" match support' depends on NETFILTER_ADVANCED @@ -1010,6 +1028,30 @@ config NETFILTER_XT_MATCH_QUOTA If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_QUOTA2 + tristate '"quota2" match support' + depends on NETFILTER_ADVANCED + help + This option adds a `quota2' match, which allows to match on a + byte counter correctly and not per CPU. + It allows naming the quotas. + This is based on http://xtables-addons.git.sourceforge.net + + If you want to compile it as a module, say M here and read + <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. + +config NETFILTER_XT_MATCH_QUOTA2_LOG + bool '"quota2" Netfilter LOG support' + depends on NETFILTER_XT_MATCH_QUOTA2 + depends on IP_NF_TARGET_ULOG=n # not yes, not module, just no + default n + help + This option allows `quota2' to log ONCE when a quota limit + is passed. It logs via NETLINK using the NETLINK_NFLOG family. + It logs similarly to how ipt_ULOG would without data. + + If unsure, say `N'. + config NETFILTER_XT_MATCH_RATEEST tristate '"rateest" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ca3676586f51..452e84de7e7c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -101,7 +101,9 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o +obj-$(CONFIG_NETFILTER_XT_MATCH_QTAGUID) += xt_qtaguid_print.o xt_qtaguid.o obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o +obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA2) += xt_quota2.o obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c new file mode 100644 index 000000000000..08086d680c2c --- /dev/null +++ b/net/netfilter/xt_qtaguid.c @@ -0,0 +1,2785 @@ +/* + * Kernel iptables module to track stats for packets based on user tags. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * There are run-time debug flags enabled via the debug_mask module param, or + * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h. + */ +#define DEBUG + +#include <linux/file.h> +#include <linux/inetdevice.h> +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_qtaguid.h> +#include <linux/skbuff.h> +#include <linux/workqueue.h> +#include <net/addrconf.h> +#include <net/sock.h> +#include <net/tcp.h> +#include <net/udp.h> + +#include <linux/netfilter/xt_socket.h> +#include "xt_qtaguid_internal.h" +#include "xt_qtaguid_print.h" + +/* + * We only use the xt_socket funcs within a similar context to avoid unexpected + * return values. + */ +#define XT_SOCKET_SUPPORTED_HOOKS \ + ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) + + +static const char *module_procdirname = "xt_qtaguid"; +static struct proc_dir_entry *xt_qtaguid_procdir; + +static unsigned int proc_iface_perms = S_IRUGO; +module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR); + +static struct proc_dir_entry *xt_qtaguid_stats_file; +static unsigned int proc_stats_perms = S_IRUGO; +module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR); + +static struct proc_dir_entry *xt_qtaguid_ctrl_file; +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO; +#else +static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR; +#endif +module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR); + +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include <linux/android_aid.h> +static gid_t proc_stats_readall_gid = AID_NET_BW_STATS; +static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT; +#else +/* 0 means, don't limit anybody */ +static gid_t proc_stats_readall_gid; +static gid_t proc_ctrl_write_gid; +#endif +module_param_named(stats_readall_gid, proc_stats_readall_gid, uint, + S_IRUGO | S_IWUSR); +module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint, + S_IRUGO | S_IWUSR); + +/* + * Limit the number of active tags (via socket tags) for a given UID. + * Multiple processes could share the UID. + */ +static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS; +module_param(max_sock_tags, int, S_IRUGO | S_IWUSR); + +/* + * After the kernel has initiallized this module, it is still possible + * to make it passive. + * Setting passive to Y: + * - the iface stats handling will not act on notifications. + * - iptables matches will never match. + * - ctrl commands silently succeed. + * - stats are always empty. + * This is mostly usefull when a bug is suspected. + */ +static bool module_passive; +module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR); + +/* + * Control how qtaguid data is tracked per proc/uid. + * Setting tag_tracking_passive to Y: + * - don't create proc specific structs to track tags + * - don't check that active tag stats exceed some limits. + * - don't clean up socket tags on process exits. + * This is mostly usefull when a bug is suspected. + */ +static bool qtu_proc_handling_passive; +module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool, + S_IRUGO | S_IWUSR); + +#define QTU_DEV_NAME "xt_qtaguid" + +uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK; +module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR); + +/*---------------------------------------------------------------------------*/ +static const char *iface_stat_procdirname = "iface_stat"; +static struct proc_dir_entry *iface_stat_procdir; +static const char *iface_stat_all_procfilename = "iface_stat_all"; +static struct proc_dir_entry *iface_stat_all_procfile; + +/* + * Ordering of locks: + * outer locks: + * iface_stat_list_lock + * sock_tag_list_lock + * inner locks: + * uid_tag_data_tree_lock + * tag_counter_set_list_lock + * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock + * is acquired. + * + * Call tree with all lock holders as of 2011-09-25: + * + * iface_stat_all_proc_read() + * iface_stat_list_lock + * (struct iface_stat) + * + * qtaguid_ctrl_proc_read() + * sock_tag_list_lock + * (sock_tag_tree) + * (struct proc_qtu_data->sock_tag_list) + * prdebug_full_state() + * sock_tag_list_lock + * (sock_tag_tree) + * uid_tag_data_tree_lock + * (uid_tag_data_tree) + * (proc_qtu_data_tree) + * iface_stat_list_lock + * + * qtaguid_stats_proc_read() + * iface_stat_list_lock + * struct iface_stat->tag_stat_list_lock + * + * qtudev_open() + * uid_tag_data_tree_lock + * + * qtudev_release() + * sock_tag_data_list_lock + * uid_tag_data_tree_lock + * prdebug_full_state() + * sock_tag_list_lock + * uid_tag_data_tree_lock + * iface_stat_list_lock + * + * iface_netdev_event_handler() + * iface_stat_create() + * iface_stat_list_lock + * iface_stat_update() + * iface_stat_list_lock + * + * iface_inetaddr_event_handler() + * iface_stat_create() + * iface_stat_list_lock + * iface_stat_update() + * iface_stat_list_lock + * + * iface_inet6addr_event_handler() + * iface_stat_create_ipv6() + * iface_stat_list_lock + * iface_stat_update() + * iface_stat_list_lock + * + * qtaguid_mt() + * account_for_uid() + * if_tag_stat_update() + * get_sock_stat() + * sock_tag_list_lock + * struct iface_stat->tag_stat_list_lock + * tag_stat_update() + * get_active_counter_set() + * tag_counter_set_list_lock + * tag_stat_update() + * get_active_counter_set() + * tag_counter_set_list_lock + * + * + * qtaguid_ctrl_parse() + * ctrl_cmd_delete() + * sock_tag_list_lock + * tag_counter_set_list_lock + * iface_stat_list_lock + * struct iface_stat->tag_stat_list_lock + * uid_tag_data_tree_lock + * ctrl_cmd_counter_set() + * tag_counter_set_list_lock + * ctrl_cmd_tag() + * sock_tag_list_lock + * (sock_tag_tree) + * get_tag_ref() + * uid_tag_data_tree_lock + * (uid_tag_data_tree) + * uid_tag_data_tree_lock + * (proc_qtu_data_tree) + * ctrl_cmd_untag() + * sock_tag_list_lock + * uid_tag_data_tree_lock + * + */ +static LIST_HEAD(iface_stat_list); +static DEFINE_SPINLOCK(iface_stat_list_lock); + +static struct rb_root sock_tag_tree = RB_ROOT; +static DEFINE_SPINLOCK(sock_tag_list_lock); + +static struct rb_root tag_counter_set_tree = RB_ROOT; +static DEFINE_SPINLOCK(tag_counter_set_list_lock); + +static struct rb_root uid_tag_data_tree = RB_ROOT; +static DEFINE_SPINLOCK(uid_tag_data_tree_lock); + +static struct rb_root proc_qtu_data_tree = RB_ROOT; +/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */ + +static struct qtaguid_event_counts qtu_events; +/*----------------------------------------------*/ +static bool can_manipulate_uids(void) +{ + /* root pwnd */ + return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid) + || in_egroup_p(proc_ctrl_write_gid); +} + +static bool can_impersonate_uid(uid_t uid) +{ + return uid == current_fsuid() || can_manipulate_uids(); +} + +static bool can_read_other_uid_stats(uid_t uid) +{ + /* root pwnd */ + return unlikely(!current_fsuid()) || uid == current_fsuid() + || unlikely(!proc_stats_readall_gid) + || in_egroup_p(proc_stats_readall_gid); +} + +static inline void dc_add_byte_packets(struct data_counters *counters, int set, + enum ifs_tx_rx direction, + enum ifs_proto ifs_proto, + int bytes, + int packets) +{ + counters->bpc[set][direction][ifs_proto].bytes += bytes; + counters->bpc[set][direction][ifs_proto].packets += packets; +} + +static inline uint64_t dc_sum_bytes(struct data_counters *counters, + int set, + enum ifs_tx_rx direction) +{ + return counters->bpc[set][direction][IFS_TCP].bytes + + counters->bpc[set][direction][IFS_UDP].bytes + + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; +} + +static inline uint64_t dc_sum_packets(struct data_counters *counters, + int set, + enum ifs_tx_rx direction) +{ + return counters->bpc[set][direction][IFS_TCP].packets + + counters->bpc[set][direction][IFS_UDP].packets + + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; +} + +static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct tag_node *data = rb_entry(node, struct tag_node, node); + int result; + RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " + " node=%p data=%p\n", tag, node, data); + result = tag_compare(tag, data->tag); + RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " + " data.tag=0x%llx (uid=%u) res=%d\n", + tag, data->tag, get_uid_from_tag(data->tag), result); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return data; + } + return NULL; +} + +static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct tag_node *this = rb_entry(*new, struct tag_node, + node); + int result = tag_compare(data->tag, this->tag); + RB_DEBUG("qtaguid: %s(): tag=0x%llx" + " (uid=%u)\n", __func__, + this->tag, + get_uid_from_tag(this->tag)); + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + BUG(); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root) +{ + tag_node_tree_insert(&data->tn, root); +} + +static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag) +{ + struct tag_node *node = tag_node_tree_search(root, tag); + if (!node) + return NULL; + return rb_entry(&node->node, struct tag_stat, tn.node); +} + +static void tag_counter_set_tree_insert(struct tag_counter_set *data, + struct rb_root *root) +{ + tag_node_tree_insert(&data->tn, root); +} + +static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root, + tag_t tag) +{ + struct tag_node *node = tag_node_tree_search(root, tag); + if (!node) + return NULL; + return rb_entry(&node->node, struct tag_counter_set, tn.node); + +} + +static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root) +{ + tag_node_tree_insert(&data->tn, root); +} + +static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag) +{ + struct tag_node *node = tag_node_tree_search(root, tag); + if (!node) + return NULL; + return rb_entry(&node->node, struct tag_ref, tn.node); +} + +static struct sock_tag *sock_tag_tree_search(struct rb_root *root, + const struct sock *sk) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct sock_tag *data = rb_entry(node, struct sock_tag, + sock_node); + if (sk < data->sk) + node = node->rb_left; + else if (sk > data->sk) + node = node->rb_right; + else + return data; + } + return NULL; +} + +static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct sock_tag *this = rb_entry(*new, struct sock_tag, + sock_node); + parent = *new; + if (data->sk < this->sk) + new = &((*new)->rb_left); + else if (data->sk > this->sk) + new = &((*new)->rb_right); + else + BUG(); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->sock_node, parent, new); + rb_insert_color(&data->sock_node, root); +} + +static void sock_tag_tree_erase(struct rb_root *st_to_free_tree) +{ + struct rb_node *node; + struct sock_tag *st_entry; + + node = rb_first(st_to_free_tree); + while (node) { + st_entry = rb_entry(node, struct sock_tag, sock_node); + node = rb_next(node); + CT_DEBUG("qtaguid: %s(): " + "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__, + st_entry->sk, + st_entry->tag, + get_uid_from_tag(st_entry->tag)); + rb_erase(&st_entry->sock_node, st_to_free_tree); + sockfd_put(st_entry->socket); + kfree(st_entry); + } +} + +static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root, + const pid_t pid) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct proc_qtu_data *data = rb_entry(node, + struct proc_qtu_data, + node); + if (pid < data->pid) + node = node->rb_left; + else if (pid > data->pid) + node = node->rb_right; + else + return data; + } + return NULL; +} + +static void proc_qtu_data_tree_insert(struct proc_qtu_data *data, + struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct proc_qtu_data *this = rb_entry(*new, + struct proc_qtu_data, + node); + parent = *new; + if (data->pid < this->pid) + new = &((*new)->rb_left); + else if (data->pid > this->pid) + new = &((*new)->rb_right); + else + BUG(); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static void uid_tag_data_tree_insert(struct uid_tag_data *data, + struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct uid_tag_data *this = rb_entry(*new, + struct uid_tag_data, + node); + parent = *new; + if (data->uid < this->uid) + new = &((*new)->rb_left); + else if (data->uid > this->uid) + new = &((*new)->rb_right); + else + BUG(); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root, + uid_t uid) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct uid_tag_data *data = rb_entry(node, + struct uid_tag_data, + node); + if (uid < data->uid) + node = node->rb_left; + else if (uid > data->uid) + node = node->rb_right; + else + return data; + } + return NULL; +} + +/* + * Allocates a new uid_tag_data struct if needed. + * Returns a pointer to the found or allocated uid_tag_data. + * Returns a PTR_ERR on failures, and lock is not held. + * If found is not NULL: + * sets *found to true if not allocated. + * sets *found to false if allocated. + */ +struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res) +{ + struct uid_tag_data *utd_entry; + + /* Look for top level uid_tag_data for the UID */ + utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid); + DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry); + + if (found_res) + *found_res = utd_entry; + if (utd_entry) + return utd_entry; + + utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC); + if (!utd_entry) { + pr_err("qtaguid: get_uid_data(%u): " + "tag data alloc failed\n", uid); + return ERR_PTR(-ENOMEM); + } + + utd_entry->uid = uid; + utd_entry->tag_ref_tree = RB_ROOT; + uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree); + DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry); + return utd_entry; +} + +/* Never returns NULL. Either PTR_ERR or a valid ptr. */ +static struct tag_ref *new_tag_ref(tag_t new_tag, + struct uid_tag_data *utd_entry) +{ + struct tag_ref *tr_entry; + int res; + + if (utd_entry->num_active_tags + 1 > max_sock_tags) { + pr_info("qtaguid: new_tag_ref(0x%llx): " + "tag ref alloc quota exceeded. max=%d\n", + new_tag, max_sock_tags); + res = -EMFILE; + goto err_res; + + } + + tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC); + if (!tr_entry) { + pr_err("qtaguid: new_tag_ref(0x%llx): " + "tag ref alloc failed\n", + new_tag); + res = -ENOMEM; + goto err_res; + } + tr_entry->tn.tag = new_tag; + /* tr_entry->num_sock_tags handled by caller */ + utd_entry->num_active_tags++; + tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree); + DR_DEBUG("qtaguid: new_tag_ref(0x%llx): " + " inserted new tag ref %p\n", + new_tag, tr_entry); + return tr_entry; + +err_res: + return ERR_PTR(res); +} + +static struct tag_ref *lookup_tag_ref(tag_t full_tag, + struct uid_tag_data **utd_res) +{ + struct uid_tag_data *utd_entry; + struct tag_ref *tr_entry; + bool found_utd; + uid_t uid = get_uid_from_tag(full_tag); + + DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n", + full_tag, uid); + + utd_entry = get_uid_data(uid, &found_utd); + if (IS_ERR_OR_NULL(utd_entry)) { + if (utd_res) + *utd_res = utd_entry; + return NULL; + } + + tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag); + if (utd_res) + *utd_res = utd_entry; + DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n", + full_tag, utd_entry, tr_entry); + return tr_entry; +} + +/* Never returns NULL. Either PTR_ERR or a valid ptr. */ +static struct tag_ref *get_tag_ref(tag_t full_tag, + struct uid_tag_data **utd_res) +{ + struct uid_tag_data *utd_entry; + struct tag_ref *tr_entry; + + DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n", + full_tag); + spin_lock_bh(&uid_tag_data_tree_lock); + tr_entry = lookup_tag_ref(full_tag, &utd_entry); + BUG_ON(IS_ERR_OR_NULL(utd_entry)); + if (!tr_entry) + tr_entry = new_tag_ref(full_tag, utd_entry); + + spin_unlock_bh(&uid_tag_data_tree_lock); + if (utd_res) + *utd_res = utd_entry; + DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n", + full_tag, utd_entry, tr_entry); + return tr_entry; +} + +/* Checks and maybe frees the UID Tag Data entry */ +static void put_utd_entry(struct uid_tag_data *utd_entry) +{ + /* Are we done with the UID tag data entry? */ + if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) && + !utd_entry->num_pqd) { + DR_DEBUG("qtaguid: %s(): " + "erase utd_entry=%p uid=%u " + "by pid=%u tgid=%u uid=%u\n", __func__, + utd_entry, utd_entry->uid, + current->pid, current->tgid, current_fsuid()); + BUG_ON(utd_entry->num_active_tags); + rb_erase(&utd_entry->node, &uid_tag_data_tree); + kfree(utd_entry); + } else { + DR_DEBUG("qtaguid: %s(): " + "utd_entry=%p still has %d tags %d proc_qtu_data\n", + __func__, utd_entry, utd_entry->num_active_tags, + utd_entry->num_pqd); + BUG_ON(!(utd_entry->num_active_tags || + utd_entry->num_pqd)); + } +} + +/* + * If no sock_tags are using this tag_ref, + * decrements refcount of utd_entry, removes tr_entry + * from utd_entry->tag_ref_tree and frees. + */ +static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry, + struct uid_tag_data *utd_entry) +{ + DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__, + tr_entry, tr_entry->tn.tag, + get_uid_from_tag(tr_entry->tn.tag)); + if (!tr_entry->num_sock_tags) { + BUG_ON(!utd_entry->num_active_tags); + utd_entry->num_active_tags--; + rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree); + DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry); + kfree(tr_entry); + } +} + +static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry) +{ + struct rb_node *node; + struct tag_ref *tr_entry; + tag_t acct_tag; + + DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__, + full_tag, get_uid_from_tag(full_tag)); + acct_tag = get_atag_from_tag(full_tag); + node = rb_first(&utd_entry->tag_ref_tree); + while (node) { + tr_entry = rb_entry(node, struct tag_ref, tn.node); + node = rb_next(node); + if (!acct_tag || tr_entry->tn.tag == full_tag) + free_tag_ref_from_utd_entry(tr_entry, utd_entry); + } +} + +static int read_proc_u64(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + uint64_t value; + char *p = page; + uint64_t *iface_entry = data; + + if (!data) + return 0; + + value = *iface_entry; + p += sprintf(p, "%llu\n", value); + len = (p - page) - off; + *eof = (len <= count) ? 1 : 0; + *start = page + off; + return len; +} + +static int read_proc_bool(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + bool value; + char *p = page; + bool *bool_entry = data; + + if (!data) + return 0; + + value = *bool_entry; + p += sprintf(p, "%u\n", value); + len = (p - page) - off; + *eof = (len <= count) ? 1 : 0; + *start = page + off; + return len; +} + +static int get_active_counter_set(tag_t tag) +{ + int active_set = 0; + struct tag_counter_set *tcs; + + MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)" + " (uid=%u)\n", + tag, get_uid_from_tag(tag)); + /* For now we only handle UID tags for active sets */ + tag = get_utag_from_tag(tag); + spin_lock_bh(&tag_counter_set_list_lock); + tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); + if (tcs) + active_set = tcs->active_set; + spin_unlock_bh(&tag_counter_set_list_lock); + return active_set; +} + +/* + * Find the entry for tracking the specified interface. + * Caller must hold iface_stat_list_lock + */ +static struct iface_stat *get_iface_entry(const char *ifname) +{ + struct iface_stat *iface_entry; + + /* Find the entry for tracking the specified tag within the interface */ + if (ifname == NULL) { + pr_info("qtaguid: iface_stat: get() NULL device name\n"); + return NULL; + } + + /* Iterate over interfaces */ + list_for_each_entry(iface_entry, &iface_stat_list, list) { + if (!strcmp(ifname, iface_entry->ifname)) + goto done; + } + iface_entry = NULL; +done: + return iface_entry; +} + +static int iface_stat_all_proc_read(char *page, char **num_items_returned, + off_t items_to_skip, int char_count, + int *eof, void *data) +{ + char *outp = page; + int item_index = 0; + int len; + struct iface_stat *iface_entry; + struct rtnl_link_stats64 dev_stats, *stats; + struct rtnl_link_stats64 no_dev_stats = {0}; + + if (unlikely(module_passive)) { + *eof = 1; + return 0; + } + + CT_DEBUG("qtaguid:proc iface_stat_all " + "page=%p *num_items_returned=%p off=%ld " + "char_count=%d *eof=%d\n", page, *num_items_returned, + items_to_skip, char_count, *eof); + + if (*eof) + return 0; + + /* + * This lock will prevent iface_stat_update() from changing active, + * and in turn prevent an interface from unregistering itself. + */ + spin_lock_bh(&iface_stat_list_lock); + list_for_each_entry(iface_entry, &iface_stat_list, list) { + if (item_index++ < items_to_skip) + continue; + + if (iface_entry->active) { + stats = dev_get_stats(iface_entry->net_dev, + &dev_stats); + } else { + stats = &no_dev_stats; + } + len = snprintf(outp, char_count, + "%s %d " + "%llu %llu %llu %llu " + "%llu %llu %llu %llu\n", + iface_entry->ifname, + iface_entry->active, + iface_entry->totals[IFS_RX].bytes, + iface_entry->totals[IFS_RX].packets, + iface_entry->totals[IFS_TX].bytes, + iface_entry->totals[IFS_TX].packets, + stats->rx_bytes, stats->rx_packets, + stats->tx_bytes, stats->tx_packets); + if (len >= char_count) { + spin_unlock_bh(&iface_stat_list_lock); + *outp = '\0'; + return outp - page; + } + outp += len; + char_count -= len; + (*num_items_returned)++; + } + spin_unlock_bh(&iface_stat_list_lock); + + *eof = 1; + return outp - page; +} + +static void iface_create_proc_worker(struct work_struct *work) +{ + struct proc_dir_entry *proc_entry; + struct iface_stat_work *isw = container_of(work, struct iface_stat_work, + iface_work); + struct iface_stat *new_iface = isw->iface_entry; + + /* iface_entries are not deleted, so safe to manipulate. */ + proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir); + if (IS_ERR_OR_NULL(proc_entry)) { + pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n"); + kfree(isw); + return; + } + + new_iface->proc_ptr = proc_entry; + + create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, + read_proc_u64, &new_iface->totals[IFS_TX].bytes); + create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, + read_proc_u64, &new_iface->totals[IFS_RX].bytes); + create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, + read_proc_u64, &new_iface->totals[IFS_TX].packets); + create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, + read_proc_u64, &new_iface->totals[IFS_RX].packets); + create_proc_read_entry("active", proc_iface_perms, proc_entry, + read_proc_bool, &new_iface->active); + + IF_DEBUG("qtaguid: iface_stat: create_proc(): done " + "entry=%p dev=%s\n", new_iface, new_iface->ifname); + kfree(isw); +} + +/* + * Will set the entry's active state, and + * update the net_dev accordingly also. + */ +static void _iface_stat_set_active(struct iface_stat *entry, + struct net_device *net_dev, + bool activate) +{ + if (activate) { + entry->net_dev = net_dev; + entry->active = true; + IF_DEBUG("qtaguid: %s(%s): " + "enable tracking. rfcnt=%d\n", __func__, + entry->ifname, + percpu_read(*net_dev->pcpu_refcnt)); + } else { + entry->active = false; + entry->net_dev = NULL; + IF_DEBUG("qtaguid: %s(%s): " + "disable tracking. rfcnt=%d\n", __func__, + entry->ifname, + percpu_read(*net_dev->pcpu_refcnt)); + + } +} + +/* Caller must hold iface_stat_list_lock */ +static struct iface_stat *iface_alloc(struct net_device *net_dev) +{ + struct iface_stat *new_iface; + struct iface_stat_work *isw; + + new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC); + if (new_iface == NULL) { + pr_err("qtaguid: iface_stat: create(%s): " + "iface_stat alloc failed\n", net_dev->name); + return NULL; + } + new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC); + if (new_iface->ifname == NULL) { + pr_err("qtaguid: iface_stat: create(%s): " + "ifname alloc failed\n", net_dev->name); + kfree(new_iface); + return NULL; + } + spin_lock_init(&new_iface->tag_stat_list_lock); + new_iface->tag_stat_tree = RB_ROOT; + _iface_stat_set_active(new_iface, net_dev, true); + + /* + * ipv6 notifier chains are atomic :( + * No create_proc_read_entry() for you! + */ + isw = kmalloc(sizeof(*isw), GFP_ATOMIC); + if (!isw) { + pr_err("qtaguid: iface_stat: create(%s): " + "work alloc failed\n", new_iface->ifname); + _iface_stat_set_active(new_iface, net_dev, false); + kfree(new_iface->ifname); + kfree(new_iface); + return NULL; + } + isw->iface_entry = new_iface; + INIT_WORK(&isw->iface_work, iface_create_proc_worker); + schedule_work(&isw->iface_work); + list_add(&new_iface->list, &iface_stat_list); + return new_iface; +} + +static void iface_check_stats_reset_and_adjust(struct net_device *net_dev, + struct iface_stat *iface) +{ + struct rtnl_link_stats64 dev_stats, *stats; + bool stats_rewound; + + stats = dev_get_stats(net_dev, &dev_stats); + /* No empty packets */ + stats_rewound = + (stats->rx_bytes < iface->last_known[IFS_RX].bytes) + || (stats->tx_bytes < iface->last_known[IFS_TX].bytes); + + IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p " + "bytes rx/tx=%llu/%llu " + "active=%d last_known=%d " + "stats_rewound=%d\n", __func__, + net_dev ? net_dev->name : "?", + iface, net_dev, + stats->rx_bytes, stats->tx_bytes, + iface->active, iface->last_known_valid, stats_rewound); + + if (iface->active && iface->last_known_valid && stats_rewound) { + pr_warn_once("qtaguid: iface_stat: %s(%s): " + "iface reset its stats unexpectedly\n", __func__, + net_dev->name); + + iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes; + iface->totals[IFS_TX].packets += + iface->last_known[IFS_TX].packets; + iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes; + iface->totals[IFS_RX].packets += + iface->last_known[IFS_RX].packets; + iface->last_known_valid = false; + IF_DEBUG("qtaguid: %s(%s): iface=%p " + "used last known bytes rx/tx=%llu/%llu\n", __func__, + iface->ifname, iface, iface->last_known[IFS_RX].bytes, + iface->last_known[IFS_TX].bytes); + } +} + +/* + * Create a new entry for tracking the specified interface. + * Do nothing if the entry already exists. + * Called when an interface is configured with a valid IP address. + */ +static void iface_stat_create(struct net_device *net_dev, + struct in_ifaddr *ifa) +{ + struct in_device *in_dev = NULL; + const char *ifname; + struct iface_stat *entry; + __be32 ipaddr = 0; + struct iface_stat *new_iface; + + IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n", + net_dev ? net_dev->name : "?", + ifa, net_dev); + if (!net_dev) { + pr_err("qtaguid: iface_stat: create(): no net dev\n"); + return; + } + + ifname = net_dev->name; + if (!ifa) { + in_dev = in_dev_get(net_dev); + if (!in_dev) { + pr_err("qtaguid: iface_stat: create(%s): no inet dev\n", + ifname); + return; + } + IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n", + ifname, in_dev); + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { + IF_DEBUG("qtaguid: iface_stat: create(%s): " + "ifa=%p ifa_label=%s\n", + ifname, ifa, + ifa->ifa_label ? ifa->ifa_label : "(null)"); + if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label)) + break; + } + } + + if (!ifa) { + IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n", + ifname); + goto done_put; + } + ipaddr = ifa->ifa_local; + + spin_lock_bh(&iface_stat_list_lock); + entry = get_iface_entry(ifname); + if (entry != NULL) { + bool activate = !ipv4_is_loopback(ipaddr); + IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", + ifname, entry); + iface_check_stats_reset_and_adjust(net_dev, entry); + _iface_stat_set_active(entry, net_dev, activate); + IF_DEBUG("qtaguid: %s(%s): " + "tracking now %d on ip=%pI4\n", __func__, + entry->ifname, activate, &ipaddr); + goto done_unlock_put; + } else if (ipv4_is_loopback(ipaddr)) { + IF_DEBUG("qtaguid: iface_stat: create(%s): " + "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr); + goto done_unlock_put; + } + + new_iface = iface_alloc(net_dev); + IF_DEBUG("qtaguid: iface_stat: create(%s): done " + "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr); +done_unlock_put: + spin_unlock_bh(&iface_stat_list_lock); +done_put: + if (in_dev) + in_dev_put(in_dev); +} + +static void iface_stat_create_ipv6(struct net_device *net_dev, + struct inet6_ifaddr *ifa) +{ + struct in_device *in_dev; + const char *ifname; + struct iface_stat *entry; + struct iface_stat *new_iface; + int addr_type; + + IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n", + ifa, net_dev, net_dev ? net_dev->name : ""); + if (!net_dev) { + pr_err("qtaguid: iface_stat: create6(): no net dev!\n"); + return; + } + ifname = net_dev->name; + + in_dev = in_dev_get(net_dev); + if (!in_dev) { + pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n", + ifname); + return; + } + + IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n", + ifname, in_dev); + + if (!ifa) { + IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n", + ifname); + goto done_put; + } + addr_type = ipv6_addr_type(&ifa->addr); + + spin_lock_bh(&iface_stat_list_lock); + entry = get_iface_entry(ifname); + if (entry != NULL) { + bool activate = !(addr_type & IPV6_ADDR_LOOPBACK); + IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, + ifname, entry); + iface_check_stats_reset_and_adjust(net_dev, entry); + _iface_stat_set_active(entry, net_dev, activate); + IF_DEBUG("qtaguid: %s(%s): " + "tracking now %d on ip=%pI6c\n", __func__, + entry->ifname, activate, &ifa->addr); + goto done_unlock_put; + } else if (addr_type & IPV6_ADDR_LOOPBACK) { + IF_DEBUG("qtaguid: %s(%s): " + "ignore loopback dev. ip=%pI6c\n", __func__, + ifname, &ifa->addr); + goto done_unlock_put; + } + + new_iface = iface_alloc(net_dev); + IF_DEBUG("qtaguid: iface_stat: create6(%s): done " + "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr); + +done_unlock_put: + spin_unlock_bh(&iface_stat_list_lock); +done_put: + in_dev_put(in_dev); +} + +static struct sock_tag *get_sock_stat_nl(const struct sock *sk) +{ + MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk); + return sock_tag_tree_search(&sock_tag_tree, sk); +} + +static struct sock_tag *get_sock_stat(const struct sock *sk) +{ + struct sock_tag *sock_tag_entry; + MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk); + if (!sk) + return NULL; + spin_lock_bh(&sock_tag_list_lock); + sock_tag_entry = get_sock_stat_nl(sk); + spin_unlock_bh(&sock_tag_list_lock); + return sock_tag_entry; +} + +static void +data_counters_update(struct data_counters *dc, int set, + enum ifs_tx_rx direction, int proto, int bytes) +{ + switch (proto) { + case IPPROTO_TCP: + dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1); + break; + case IPPROTO_UDP: + dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1); + break; + case IPPROTO_IP: + default: + dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes, + 1); + break; + } +} + +/* + * Update stats for the specified interface. Do nothing if the entry + * does not exist (when a device was never configured with an IP address). + * Called when an device is being unregistered. + */ +static void iface_stat_update(struct net_device *net_dev, bool stash_only) +{ + struct rtnl_link_stats64 dev_stats, *stats; + struct iface_stat *entry; + + stats = dev_get_stats(net_dev, &dev_stats); + spin_lock_bh(&iface_stat_list_lock); + entry = get_iface_entry(net_dev->name); + if (entry == NULL) { + IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n", + net_dev->name); + spin_unlock_bh(&iface_stat_list_lock); + return; + } + + IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, + net_dev->name, entry); + if (!entry->active) { + IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__, + net_dev->name); + spin_unlock_bh(&iface_stat_list_lock); + return; + } + + if (stash_only) { + entry->last_known[IFS_TX].bytes = stats->tx_bytes; + entry->last_known[IFS_TX].packets = stats->tx_packets; + entry->last_known[IFS_RX].bytes = stats->rx_bytes; + entry->last_known[IFS_RX].packets = stats->rx_packets; + entry->last_known_valid = true; + IF_DEBUG("qtaguid: %s(%s): " + "dev stats stashed rx/tx=%llu/%llu\n", __func__, + net_dev->name, stats->rx_bytes, stats->tx_bytes); + spin_unlock_bh(&iface_stat_list_lock); + return; + } + entry->totals[IFS_TX].bytes += stats->tx_bytes; + entry->totals[IFS_TX].packets += stats->tx_packets; + entry->totals[IFS_RX].bytes += stats->rx_bytes; + entry->totals[IFS_RX].packets += stats->rx_packets; + /* We don't need the last_known[] anymore */ + entry->last_known_valid = false; + _iface_stat_set_active(entry, net_dev, false); + IF_DEBUG("qtaguid: %s(%s): " + "disable tracking. rx/tx=%llu/%llu\n", __func__, + net_dev->name, stats->rx_bytes, stats->tx_bytes); + spin_unlock_bh(&iface_stat_list_lock); +} + +static void tag_stat_update(struct tag_stat *tag_entry, + enum ifs_tx_rx direction, int proto, int bytes) +{ + int active_set; + active_set = get_active_counter_set(tag_entry->tn.tag); + MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d " + "dir=%d proto=%d bytes=%d)\n", + tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag), + active_set, direction, proto, bytes); + data_counters_update(&tag_entry->counters, active_set, direction, + proto, bytes); + if (tag_entry->parent_counters) + data_counters_update(tag_entry->parent_counters, active_set, + direction, proto, bytes); +} + +/* + * Create a new entry for tracking the specified {acct_tag,uid_tag} within + * the interface. + * iface_entry->tag_stat_list_lock should be held. + */ +static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry, + tag_t tag) +{ + struct tag_stat *new_tag_stat_entry = NULL; + IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx" + " (uid=%u)\n", __func__, + iface_entry, tag, get_uid_from_tag(tag)); + new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC); + if (!new_tag_stat_entry) { + pr_err("qtaguid: iface_stat: tag stat alloc failed\n"); + goto done; + } + new_tag_stat_entry->tn.tag = tag; + tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree); +done: + return new_tag_stat_entry; +} + +static void if_tag_stat_update(const char *ifname, uid_t uid, + const struct sock *sk, enum ifs_tx_rx direction, + int proto, int bytes) +{ + struct tag_stat *tag_stat_entry; + tag_t tag, acct_tag; + tag_t uid_tag; + struct data_counters *uid_tag_counters; + struct sock_tag *sock_tag_entry; + struct iface_stat *iface_entry; + struct tag_stat *new_tag_stat; + MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s " + "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n", + ifname, uid, sk, direction, proto, bytes); + + + iface_entry = get_iface_entry(ifname); + if (!iface_entry) { + pr_err("qtaguid: iface_stat: stat_update() %s not found\n", + ifname); + return; + } + /* It is ok to process data when an iface_entry is inactive */ + + MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n", + ifname, iface_entry); + + /* + * Look for a tagged sock. + * It will have an acct_uid. + */ + sock_tag_entry = get_sock_stat(sk); + if (sock_tag_entry) { + tag = sock_tag_entry->tag; + acct_tag = get_atag_from_tag(tag); + uid_tag = get_utag_from_tag(tag); + } else { + acct_tag = make_atag_from_value(0); + tag = combine_atag_with_uid(acct_tag, uid); + uid_tag = make_tag_from_uid(uid); + } + MT_DEBUG("qtaguid: iface_stat: stat_update(): " + " looking for tag=0x%llx (uid=%u) in ife=%p\n", + tag, get_uid_from_tag(tag), iface_entry); + /* Loop over tag list under this interface for {acct_tag,uid_tag} */ + spin_lock_bh(&iface_entry->tag_stat_list_lock); + + tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, + tag); + if (tag_stat_entry) { + /* + * Updating the {acct_tag, uid_tag} entry handles both stats: + * {0, uid_tag} will also get updated. + */ + tag_stat_update(tag_stat_entry, direction, proto, bytes); + spin_unlock_bh(&iface_entry->tag_stat_list_lock); + return; + } + + /* Loop over tag list under this interface for {0,uid_tag} */ + tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, + uid_tag); + if (!tag_stat_entry) { + /* Here: the base uid_tag did not exist */ + /* + * No parent counters. So + * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats. + */ + new_tag_stat = create_if_tag_stat(iface_entry, uid_tag); + uid_tag_counters = &new_tag_stat->counters; + } else { + uid_tag_counters = &tag_stat_entry->counters; + } + + if (acct_tag) { + new_tag_stat = create_if_tag_stat(iface_entry, tag); + new_tag_stat->parent_counters = uid_tag_counters; + } + tag_stat_update(new_tag_stat, direction, proto, bytes); + spin_unlock_bh(&iface_entry->tag_stat_list_lock); +} + +static int iface_netdev_event_handler(struct notifier_block *nb, + unsigned long event, void *ptr) { + struct net_device *dev = ptr; + + if (unlikely(module_passive)) + return NOTIFY_DONE; + + IF_DEBUG("qtaguid: iface_stat: netdev_event(): " + "ev=0x%lx/%s netdev=%p->name=%s\n", + event, netdev_evt_str(event), dev, dev ? dev->name : ""); + + switch (event) { + case NETDEV_UP: + iface_stat_create(dev, NULL); + atomic64_inc(&qtu_events.iface_events); + break; + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + iface_stat_update(dev, event == NETDEV_DOWN); + atomic64_inc(&qtu_events.iface_events); + break; + } + return NOTIFY_DONE; +} + +static int iface_inet6addr_event_handler(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = ptr; + struct net_device *dev; + + if (unlikely(module_passive)) + return NOTIFY_DONE; + + IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): " + "ev=0x%lx/%s ifa=%p\n", + event, netdev_evt_str(event), ifa); + + switch (event) { + case NETDEV_UP: + BUG_ON(!ifa || !ifa->idev); + dev = (struct net_device *)ifa->idev->dev; + iface_stat_create_ipv6(dev, ifa); + atomic64_inc(&qtu_events.iface_events); + break; + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + BUG_ON(!ifa || !ifa->idev); + dev = (struct net_device *)ifa->idev->dev; + iface_stat_update(dev, event == NETDEV_DOWN); + atomic64_inc(&qtu_events.iface_events); + break; + } + return NOTIFY_DONE; +} + +static int iface_inetaddr_event_handler(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = ptr; + struct net_device *dev; + + if (unlikely(module_passive)) + return NOTIFY_DONE; + + IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): " + "ev=0x%lx/%s ifa=%p\n", + event, netdev_evt_str(event), ifa); + + switch (event) { + case NETDEV_UP: + BUG_ON(!ifa || !ifa->ifa_dev); + dev = ifa->ifa_dev->dev; + iface_stat_create(dev, ifa); + atomic64_inc(&qtu_events.iface_events); + break; + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + BUG_ON(!ifa || !ifa->ifa_dev); + dev = ifa->ifa_dev->dev; + iface_stat_update(dev, event == NETDEV_DOWN); + atomic64_inc(&qtu_events.iface_events); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block iface_netdev_notifier_blk = { + .notifier_call = iface_netdev_event_handler, +}; + +static struct notifier_block iface_inetaddr_notifier_blk = { + .notifier_call = iface_inetaddr_event_handler, +}; + +static struct notifier_block iface_inet6addr_notifier_blk = { + .notifier_call = iface_inet6addr_event_handler, +}; + +static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) +{ + int err; + + iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir); + if (!iface_stat_procdir) { + pr_err("qtaguid: iface_stat: init failed to create proc entry\n"); + err = -1; + goto err; + } + + iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename, + proc_iface_perms, + parent_procdir); + if (!iface_stat_all_procfile) { + pr_err("qtaguid: iface_stat: init " + " failed to create stat_all proc entry\n"); + err = -1; + goto err_zap_entry; + } + iface_stat_all_procfile->read_proc = iface_stat_all_proc_read; + + + err = register_netdevice_notifier(&iface_netdev_notifier_blk); + if (err) { + pr_err("qtaguid: iface_stat: init " + "failed to register dev event handler\n"); + goto err_zap_all_stats_entry; + } + err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk); + if (err) { + pr_err("qtaguid: iface_stat: init " + "failed to register ipv4 dev event handler\n"); + goto err_unreg_nd; + } + + err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk); + if (err) { + pr_err("qtaguid: iface_stat: init " + "failed to register ipv6 dev event handler\n"); + goto err_unreg_ip4_addr; + } + return 0; + +err_unreg_ip4_addr: + unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk); +err_unreg_nd: + unregister_netdevice_notifier(&iface_netdev_notifier_blk); +err_zap_all_stats_entry: + remove_proc_entry(iface_stat_all_procfilename, parent_procdir); +err_zap_entry: + remove_proc_entry(iface_stat_procdirname, parent_procdir); +err: + return err; +} + +static struct sock *qtaguid_find_sk(const struct sk_buff *skb, + struct xt_action_param *par) +{ + struct sock *sk; + unsigned int hook_mask = (1 << par->hooknum); + + MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb, + par->hooknum, par->family); + + /* + * Let's not abuse the the xt_socket_get*_sk(), or else it will + * return garbage SKs. + */ + if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS)) + return NULL; + + switch (par->family) { + case NFPROTO_IPV6: + sk = xt_socket_get6_sk(skb, par); + break; + case NFPROTO_IPV4: + sk = xt_socket_get4_sk(skb, par); + break; + default: + return NULL; + } + + /* + * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs. + * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959 + * Not fixed in 3.0-r3 :( + */ + if (sk) { + MT_DEBUG("qtaguid: %p->sk_proto=%u " + "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state); + if (sk->sk_state == TCP_TIME_WAIT) { + xt_socket_put_sk(sk); + sk = NULL; + } + } + return sk; +} + +static void account_for_uid(const struct sk_buff *skb, + const struct sock *alternate_sk, uid_t uid, + struct xt_action_param *par) +{ + const struct net_device *el_dev; + + if (!skb->dev) { + MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); + el_dev = par->in ? : par->out; + } else { + const struct net_device *other_dev; + el_dev = skb->dev; + other_dev = par->in ? : par->out; + if (el_dev != other_dev) { + MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " + "par->(in/out)=%p %s\n", + par->hooknum, el_dev, el_dev->name, other_dev, + other_dev->name); + } + } + + if (unlikely(!el_dev)) { + pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum); + } else if (unlikely(!el_dev->name)) { + pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum); + } else { + MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n", + par->hooknum, + el_dev->name, + el_dev->type); + + if_tag_stat_update(el_dev->name, uid, + skb->sk ? skb->sk : alternate_sk, + par->in ? IFS_RX : IFS_TX, + ip_hdr(skb)->protocol, skb->len); + } +} + +static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_qtaguid_match_info *info = par->matchinfo; + const struct file *filp; + bool got_sock = false; + struct sock *sk; + uid_t sock_uid; + bool res; + + if (unlikely(module_passive)) + return (info->match ^ info->invert) == 0; + + MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n", + par->hooknum, skb, par->in, par->out, par->family); + + atomic64_inc(&qtu_events.match_calls); + if (skb == NULL) { + res = (info->match ^ info->invert) == 0; + goto ret_res; + } + + sk = skb->sk; + + if (sk == NULL) { + /* + * A missing sk->sk_socket happens when packets are in-flight + * and the matching socket is already closed and gone. + */ + sk = qtaguid_find_sk(skb, par); + /* + * If we got the socket from the find_sk(), we will need to put + * it back, as nf_tproxy_get_sock_v4() got it. + */ + got_sock = sk; + if (sk) + atomic64_inc(&qtu_events.match_found_sk_in_ct); + else + atomic64_inc(&qtu_events.match_found_no_sk_in_ct); + } else { + atomic64_inc(&qtu_events.match_found_sk); + } + MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n", + par->hooknum, sk, got_sock, ip_hdr(skb)->protocol); + if (sk != NULL) { + MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", + par->hooknum, sk, sk->sk_socket, + sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); + filp = sk->sk_socket ? sk->sk_socket->file : NULL; + MT_DEBUG("qtaguid[%d]: filp...uid=%u\n", + par->hooknum, filp ? filp->f_cred->fsuid : -1); + } + + if (sk == NULL || sk->sk_socket == NULL) { + /* + * Here, the qtaguid_find_sk() using connection tracking + * couldn't find the owner, so for now we just count them + * against the system. + */ + /* + * TODO: unhack how to force just accounting. + * For now we only do iface stats when the uid-owner is not + * requested. + */ + if (!(info->match & XT_QTAGUID_UID)) + account_for_uid(skb, sk, 0, par); + MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", + par->hooknum, + sk ? sk->sk_socket : NULL); + res = (info->match ^ info->invert) == 0; + atomic64_inc(&qtu_events.match_no_sk); + goto put_sock_ret_res; + } else if (info->match & info->invert & XT_QTAGUID_SOCKET) { + res = false; + goto put_sock_ret_res; + } + filp = sk->sk_socket->file; + if (filp == NULL) { + MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum); + account_for_uid(skb, sk, 0, par); + res = ((info->match ^ info->invert) & + (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0; + atomic64_inc(&qtu_events.match_no_sk_file); + goto put_sock_ret_res; + } + sock_uid = filp->f_cred->fsuid; + /* + * TODO: unhack how to force just accounting. + * For now we only do iface stats when the uid-owner is not requested + */ + if (!(info->match & XT_QTAGUID_UID)) + account_for_uid(skb, sk, sock_uid, par); + + /* + * The following two tests fail the match when: + * id not in range AND no inverted condition requested + * or id in range AND inverted condition requested + * Thus (!a && b) || (a && !b) == a ^ b + */ + if (info->match & XT_QTAGUID_UID) + if ((filp->f_cred->fsuid >= info->uid_min && + filp->f_cred->fsuid <= info->uid_max) ^ + !(info->invert & XT_QTAGUID_UID)) { + MT_DEBUG("qtaguid[%d]: leaving uid not matching\n", + par->hooknum); + res = false; + goto put_sock_ret_res; + } + if (info->match & XT_QTAGUID_GID) + if ((filp->f_cred->fsgid >= info->gid_min && + filp->f_cred->fsgid <= info->gid_max) ^ + !(info->invert & XT_QTAGUID_GID)) { + MT_DEBUG("qtaguid[%d]: leaving gid not matching\n", + par->hooknum); + res = false; + goto put_sock_ret_res; + } + + MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum); + res = true; + +put_sock_ret_res: + if (got_sock) + xt_socket_put_sk(sk); +ret_res: + MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res); + return res; +} + +#ifdef DDEBUG +/* This function is not in xt_qtaguid_print.c because of locks visibility */ +static void prdebug_full_state(int indent_level, const char *fmt, ...) +{ + va_list args; + char *fmt_buff; + char *buff; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + fmt_buff = kasprintf(GFP_ATOMIC, + "qtaguid: %s(): %s {\n", __func__, fmt); + BUG_ON(!fmt_buff); + va_start(args, fmt); + buff = kvasprintf(GFP_ATOMIC, + fmt_buff, args); + BUG_ON(!buff); + pr_debug("%s", buff); + kfree(fmt_buff); + kfree(buff); + va_end(args); + + spin_lock_bh(&sock_tag_list_lock); + prdebug_sock_tag_tree(indent_level, &sock_tag_tree); + spin_unlock_bh(&sock_tag_list_lock); + + spin_lock_bh(&sock_tag_list_lock); + spin_lock_bh(&uid_tag_data_tree_lock); + prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree); + prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree); + spin_unlock_bh(&uid_tag_data_tree_lock); + spin_unlock_bh(&sock_tag_list_lock); + + spin_lock_bh(&iface_stat_list_lock); + prdebug_iface_stat_list(indent_level, &iface_stat_list); + spin_unlock_bh(&iface_stat_list_lock); + + pr_debug("qtaguid: %s(): }\n", __func__); +} +#else +static void prdebug_full_state(int indent_level, const char *fmt, ...) {} +#endif + +/* + * Procfs reader to get all active socket tags using style "1)" as described in + * fs/proc/generic.c + */ +static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, + off_t items_to_skip, int char_count, int *eof, + void *data) +{ + char *outp = page; + int len; + uid_t uid; + struct rb_node *node; + struct sock_tag *sock_tag_entry; + int item_index = 0; + int indent_level = 0; + long f_count; + + if (unlikely(module_passive)) { + *eof = 1; + return 0; + } + + if (*eof) + return 0; + + CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n", + page, items_to_skip, char_count, *eof); + + spin_lock_bh(&sock_tag_list_lock); + for (node = rb_first(&sock_tag_tree); + node; + node = rb_next(node)) { + if (item_index++ < items_to_skip) + continue; + sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); + uid = get_uid_from_tag(sock_tag_entry->tag); + CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) " + "pid=%u\n", + sock_tag_entry->sk, + sock_tag_entry->tag, + uid, + sock_tag_entry->pid + ); + f_count = atomic_long_read( + &sock_tag_entry->socket->file->f_count); + len = snprintf(outp, char_count, + "sock=%p tag=0x%llx (uid=%u) pid=%u " + "f_count=%lu\n", + sock_tag_entry->sk, + sock_tag_entry->tag, uid, + sock_tag_entry->pid, f_count); + if (len >= char_count) { + spin_unlock_bh(&sock_tag_list_lock); + *outp = '\0'; + return outp - page; + } + outp += len; + char_count -= len; + (*num_items_returned)++; + } + spin_unlock_bh(&sock_tag_list_lock); + + if (item_index++ >= items_to_skip) { + len = snprintf(outp, char_count, + "events: sockets_tagged=%llu " + "sockets_untagged=%llu " + "counter_set_changes=%llu " + "delete_cmds=%llu " + "iface_events=%llu " + "match_calls=%llu " + "match_found_sk=%llu " + "match_found_sk_in_ct=%llu " + "match_found_no_sk_in_ct=%llu " + "match_no_sk=%llu " + "match_no_sk_file=%llu\n", + atomic64_read(&qtu_events.sockets_tagged), + atomic64_read(&qtu_events.sockets_untagged), + atomic64_read(&qtu_events.counter_set_changes), + atomic64_read(&qtu_events.delete_cmds), + atomic64_read(&qtu_events.iface_events), + atomic64_read(&qtu_events.match_calls), + atomic64_read(&qtu_events.match_found_sk), + atomic64_read(&qtu_events.match_found_sk_in_ct), + atomic64_read( + &qtu_events.match_found_no_sk_in_ct), + atomic64_read(&qtu_events.match_no_sk), + atomic64_read(&qtu_events.match_no_sk_file)); + if (len >= char_count) { + *outp = '\0'; + return outp - page; + } + outp += len; + char_count -= len; + (*num_items_returned)++; + } + + /* Count the following as part of the last item_index */ + if (item_index > items_to_skip) { + prdebug_full_state(indent_level, "proc ctrl"); + } + + *eof = 1; + return outp - page; +} + +/* + * Delete socket tags, and stat tags associated with a given + * accouting tag and uid. + */ +static int ctrl_cmd_delete(const char *input) +{ + char cmd; + uid_t uid; + uid_t entry_uid; + tag_t acct_tag; + tag_t tag; + int res, argc; + struct iface_stat *iface_entry; + struct rb_node *node; + struct sock_tag *st_entry; + struct rb_root st_to_free_tree = RB_ROOT; + struct tag_stat *ts_entry; + struct tag_counter_set *tcs_entry; + struct tag_ref *tr_entry; + struct uid_tag_data *utd_entry; + + argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid); + CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c " + "user_tag=0x%llx uid=%u\n", input, argc, cmd, + acct_tag, uid); + if (argc < 2) { + res = -EINVAL; + goto err; + } + if (!valid_atag(acct_tag)) { + pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input); + res = -EINVAL; + goto err; + } + if (argc < 3) { + uid = current_fsuid(); + } else if (!can_impersonate_uid(uid)) { + pr_info("qtaguid: ctrl_delete(%s): " + "insufficient priv from pid=%u tgid=%u uid=%u\n", + input, current->pid, current->tgid, current_fsuid()); + res = -EPERM; + goto err; + } + + tag = combine_atag_with_uid(acct_tag, uid); + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "looking for tag=0x%llx (uid=%u)\n", + input, tag, uid); + + /* Delete socket tags */ + spin_lock_bh(&sock_tag_list_lock); + node = rb_first(&sock_tag_tree); + while (node) { + st_entry = rb_entry(node, struct sock_tag, sock_node); + entry_uid = get_uid_from_tag(st_entry->tag); + node = rb_next(node); + if (entry_uid != uid) + continue; + + CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n", + input, st_entry->tag, entry_uid); + + if (!acct_tag || st_entry->tag == tag) { + rb_erase(&st_entry->sock_node, &sock_tag_tree); + /* Can't sockfd_put() within spinlock, do it later. */ + sock_tag_tree_insert(st_entry, &st_to_free_tree); + tr_entry = lookup_tag_ref(st_entry->tag, NULL); + BUG_ON(tr_entry->num_sock_tags <= 0); + tr_entry->num_sock_tags--; + /* + * TODO: remove if, and start failing. + * This is a hack to work around the fact that in some + * places we have "if (IS_ERR_OR_NULL(pqd_entry))" + * and are trying to work around apps + * that didn't open the /dev/xt_qtaguid. + */ + if (st_entry->list.next && st_entry->list.prev) + list_del(&st_entry->list); + } + } + spin_unlock_bh(&sock_tag_list_lock); + + sock_tag_tree_erase(&st_to_free_tree); + + /* Delete tag counter-sets */ + spin_lock_bh(&tag_counter_set_list_lock); + /* Counter sets are only on the uid tag, not full tag */ + tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag); + if (tcs_entry) { + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "erase tcs: tag=0x%llx (uid=%u) set=%d\n", + input, + tcs_entry->tn.tag, + get_uid_from_tag(tcs_entry->tn.tag), + tcs_entry->active_set); + rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree); + kfree(tcs_entry); + } + spin_unlock_bh(&tag_counter_set_list_lock); + + /* + * If acct_tag is 0, then all entries belonging to uid are + * erased. + */ + spin_lock_bh(&iface_stat_list_lock); + list_for_each_entry(iface_entry, &iface_stat_list, list) { + spin_lock_bh(&iface_entry->tag_stat_list_lock); + node = rb_first(&iface_entry->tag_stat_tree); + while (node) { + ts_entry = rb_entry(node, struct tag_stat, tn.node); + entry_uid = get_uid_from_tag(ts_entry->tn.tag); + node = rb_next(node); + + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "ts tag=0x%llx (uid=%u)\n", + input, ts_entry->tn.tag, entry_uid); + + if (entry_uid != uid) + continue; + if (!acct_tag || ts_entry->tn.tag == tag) { + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "erase ts: %s 0x%llx %u\n", + input, iface_entry->ifname, + get_atag_from_tag(ts_entry->tn.tag), + entry_uid); + rb_erase(&ts_entry->tn.node, + &iface_entry->tag_stat_tree); + kfree(ts_entry); + } + } + spin_unlock_bh(&iface_entry->tag_stat_list_lock); + } + spin_unlock_bh(&iface_stat_list_lock); + + /* Cleanup the uid_tag_data */ + spin_lock_bh(&uid_tag_data_tree_lock); + node = rb_first(&uid_tag_data_tree); + while (node) { + utd_entry = rb_entry(node, struct uid_tag_data, node); + entry_uid = utd_entry->uid; + node = rb_next(node); + + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "utd uid=%u\n", + input, entry_uid); + + if (entry_uid != uid) + continue; + /* + * Go over the tag_refs, and those that don't have + * sock_tags using them are freed. + */ + put_tag_ref_tree(tag, utd_entry); + put_utd_entry(utd_entry); + } + spin_unlock_bh(&uid_tag_data_tree_lock); + + atomic64_inc(&qtu_events.delete_cmds); + res = 0; + +err: + return res; +} + +static int ctrl_cmd_counter_set(const char *input) +{ + char cmd; + uid_t uid = 0; + tag_t tag; + int res, argc; + struct tag_counter_set *tcs; + int counter_set; + + argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid); + CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c " + "set=%d uid=%u\n", input, argc, cmd, + counter_set, uid); + if (argc != 3) { + res = -EINVAL; + goto err; + } + if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) { + pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n", + input); + res = -EINVAL; + goto err; + } + if (!can_manipulate_uids()) { + pr_info("qtaguid: ctrl_counterset(%s): " + "insufficient priv from pid=%u tgid=%u uid=%u\n", + input, current->pid, current->tgid, current_fsuid()); + res = -EPERM; + goto err; + } + + tag = make_tag_from_uid(uid); + spin_lock_bh(&tag_counter_set_list_lock); + tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); + if (!tcs) { + tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC); + if (!tcs) { + spin_unlock_bh(&tag_counter_set_list_lock); + pr_err("qtaguid: ctrl_counterset(%s): " + "failed to alloc counter set\n", + input); + res = -ENOMEM; + goto err; + } + tcs->tn.tag = tag; + tag_counter_set_tree_insert(tcs, &tag_counter_set_tree); + CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx " + "(uid=%u) set=%d\n", + input, tag, get_uid_from_tag(tag), counter_set); + } + tcs->active_set = counter_set; + spin_unlock_bh(&tag_counter_set_list_lock); + atomic64_inc(&qtu_events.counter_set_changes); + res = 0; + +err: + return res; +} + +static int ctrl_cmd_tag(const char *input) +{ + char cmd; + int sock_fd = 0; + uid_t uid = 0; + tag_t acct_tag = make_atag_from_value(0); + tag_t full_tag; + struct socket *el_socket; + int res, argc; + struct sock_tag *sock_tag_entry; + struct tag_ref *tag_ref_entry; + struct uid_tag_data *uid_tag_data_entry; + struct proc_qtu_data *pqd_entry; + + /* Unassigned args will get defaulted later. */ + argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid); + CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d " + "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd, + acct_tag, uid); + if (argc < 2) { + res = -EINVAL; + goto err; + } + el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ + if (!el_socket) { + pr_info("qtaguid: ctrl_tag(%s): failed to lookup" + " sock_fd=%d err=%d\n", input, sock_fd, res); + goto err; + } + CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n", + input, atomic_long_read(&el_socket->file->f_count), + el_socket->sk); + if (argc < 3) { + acct_tag = make_atag_from_value(0); + } else if (!valid_atag(acct_tag)) { + pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input); + res = -EINVAL; + goto err_put; + } + CT_DEBUG("qtaguid: ctrl_tag(%s): " + "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " + "in_group=%d in_egroup=%d\n", + input, current->pid, current->tgid, current_uid(), + current_euid(), current_fsuid(), + in_group_p(proc_ctrl_write_gid), + in_egroup_p(proc_ctrl_write_gid)); + if (argc < 4) { + uid = current_fsuid(); + } else if (!can_impersonate_uid(uid)) { + pr_info("qtaguid: ctrl_tag(%s): " + "insufficient priv from pid=%u tgid=%u uid=%u\n", + input, current->pid, current->tgid, current_fsuid()); + res = -EPERM; + goto err_put; + } + full_tag = combine_atag_with_uid(acct_tag, uid); + + spin_lock_bh(&sock_tag_list_lock); + sock_tag_entry = get_sock_stat_nl(el_socket->sk); + tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry); + if (IS_ERR(tag_ref_entry)) { + res = PTR_ERR(tag_ref_entry); + spin_unlock_bh(&sock_tag_list_lock); + goto err_put; + } + tag_ref_entry->num_sock_tags++; + if (sock_tag_entry) { + struct tag_ref *prev_tag_ref_entry; + + CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p " + "st@%p ...->f_count=%ld\n", + input, el_socket->sk, sock_tag_entry, + atomic_long_read(&el_socket->file->f_count)); + /* + * This is a re-tagging, so release the sock_fd that was + * locked at the time of the 1st tagging. + * There is still the ref from this call's sockfd_lookup() so + * it can be done within the spinlock. + */ + sockfd_put(sock_tag_entry->socket); + prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, + &uid_tag_data_entry); + BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry)); + BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0); + prev_tag_ref_entry->num_sock_tags--; + sock_tag_entry->tag = full_tag; + } else { + CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n", + input, el_socket->sk); + sock_tag_entry = kzalloc(sizeof(*sock_tag_entry), + GFP_ATOMIC); + if (!sock_tag_entry) { + pr_err("qtaguid: ctrl_tag(%s): " + "socket tag alloc failed\n", + input); + spin_unlock_bh(&sock_tag_list_lock); + res = -ENOMEM; + goto err_tag_unref_put; + } + sock_tag_entry->sk = el_socket->sk; + sock_tag_entry->socket = el_socket; + sock_tag_entry->pid = current->tgid; + sock_tag_entry->tag = combine_atag_with_uid(acct_tag, + uid); + spin_lock_bh(&uid_tag_data_tree_lock); + pqd_entry = proc_qtu_data_tree_search( + &proc_qtu_data_tree, current->tgid); + /* + * TODO: remove if, and start failing. + * At first, we want to catch user-space code that is not + * opening the /dev/xt_qtaguid. + */ + if (IS_ERR_OR_NULL(pqd_entry)) + pr_warn_once( + "qtaguid: %s(): " + "User space forgot to open /dev/xt_qtaguid? " + "pid=%u tgid=%u uid=%u\n", __func__, + current->pid, current->tgid, + current_fsuid()); + else + list_add(&sock_tag_entry->list, + &pqd_entry->sock_tag_list); + spin_unlock_bh(&uid_tag_data_tree_lock); + + sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree); + atomic64_inc(&qtu_events.sockets_tagged); + } + spin_unlock_bh(&sock_tag_list_lock); + /* We keep the ref to the socket (file) until it is untagged */ + CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n", + input, sock_tag_entry, + atomic_long_read(&el_socket->file->f_count)); + return 0; + +err_tag_unref_put: + BUG_ON(tag_ref_entry->num_sock_tags <= 0); + tag_ref_entry->num_sock_tags--; + free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry); +err_put: + CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n", + input, atomic_long_read(&el_socket->file->f_count) - 1); + /* Release the sock_fd that was grabbed by sockfd_lookup(). */ + sockfd_put(el_socket); + return res; + +err: + CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input); + return res; +} + +static int ctrl_cmd_untag(const char *input) +{ + char cmd; + int sock_fd = 0; + struct socket *el_socket; + int res, argc; + struct sock_tag *sock_tag_entry; + struct tag_ref *tag_ref_entry; + struct uid_tag_data *utd_entry; + struct proc_qtu_data *pqd_entry; + + argc = sscanf(input, "%c %d", &cmd, &sock_fd); + CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n", + input, argc, cmd, sock_fd); + if (argc < 2) { + res = -EINVAL; + goto err; + } + el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ + if (!el_socket) { + pr_info("qtaguid: ctrl_untag(%s): failed to lookup" + " sock_fd=%d err=%d\n", input, sock_fd, res); + goto err; + } + CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n", + input, atomic_long_read(&el_socket->file->f_count), + el_socket->sk); + spin_lock_bh(&sock_tag_list_lock); + sock_tag_entry = get_sock_stat_nl(el_socket->sk); + if (!sock_tag_entry) { + spin_unlock_bh(&sock_tag_list_lock); + res = -EINVAL; + goto err_put; + } + /* + * The socket already belongs to the current process + * so it can do whatever it wants to it. + */ + rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree); + + tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry); + BUG_ON(!tag_ref_entry); + BUG_ON(tag_ref_entry->num_sock_tags <= 0); + spin_lock_bh(&uid_tag_data_tree_lock); + pqd_entry = proc_qtu_data_tree_search( + &proc_qtu_data_tree, current->tgid); + /* + * TODO: remove if, and start failing. + * At first, we want to catch user-space code that is not + * opening the /dev/xt_qtaguid. + */ + if (IS_ERR_OR_NULL(pqd_entry)) + pr_warn_once("qtaguid: %s(): " + "User space forgot to open /dev/xt_qtaguid? " + "pid=%u tgid=%u uid=%u\n", __func__, + current->pid, current->tgid, current_fsuid()); + else + list_del(&sock_tag_entry->list); + spin_unlock_bh(&uid_tag_data_tree_lock); + /* + * We don't free tag_ref from the utd_entry here, + * only during a cmd_delete(). + */ + tag_ref_entry->num_sock_tags--; + spin_unlock_bh(&sock_tag_list_lock); + /* + * Release the sock_fd that was grabbed at tag time, + * and once more for the sockfd_lookup() here. + */ + sockfd_put(sock_tag_entry->socket); + CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n", + input, sock_tag_entry, + atomic_long_read(&el_socket->file->f_count) - 1); + sockfd_put(el_socket); + + kfree(sock_tag_entry); + atomic64_inc(&qtu_events.sockets_untagged); + + return 0; + +err_put: + CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n", + input, atomic_long_read(&el_socket->file->f_count) - 1); + /* Release the sock_fd that was grabbed by sockfd_lookup(). */ + sockfd_put(el_socket); + return res; + +err: + CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input); + return res; +} + +static int qtaguid_ctrl_parse(const char *input, int count) +{ + char cmd; + int res; + + cmd = input[0]; + /* Collect params for commands */ + switch (cmd) { + case 'd': + res = ctrl_cmd_delete(input); + break; + + case 's': + res = ctrl_cmd_counter_set(input); + break; + + case 't': + res = ctrl_cmd_tag(input); + break; + + case 'u': + res = ctrl_cmd_untag(input); + break; + + default: + res = -EINVAL; + goto err; + } + if (!res) + res = count; +err: + CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res); + return res; +} + +#define MAX_QTAGUID_CTRL_INPUT_LEN 255 +static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN]; + + if (unlikely(module_passive)) + return count; + + if (count >= MAX_QTAGUID_CTRL_INPUT_LEN) + return -EINVAL; + + if (copy_from_user(input_buf, buffer, count)) + return -EFAULT; + + input_buf[count] = '\0'; + return qtaguid_ctrl_parse(input_buf, count); +} + +struct proc_print_info { + char *outp; + char **num_items_returned; + struct iface_stat *iface_entry; + struct tag_stat *ts_entry; + int item_index; + int items_to_skip; + int char_count; +}; + +static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) +{ + int len; + struct data_counters *cnts; + + if (!ppi->item_index) { + if (ppi->item_index++ < ppi->items_to_skip) + return 0; + len = snprintf(ppi->outp, ppi->char_count, + "idx iface acct_tag_hex uid_tag_int cnt_set " + "rx_bytes rx_packets " + "tx_bytes tx_packets " + "rx_tcp_bytes rx_tcp_packets " + "rx_udp_bytes rx_udp_packets " + "rx_other_bytes rx_other_packets " + "tx_tcp_bytes tx_tcp_packets " + "tx_udp_bytes tx_udp_packets " + "tx_other_bytes tx_other_packets\n"); + } else { + tag_t tag = ppi->ts_entry->tn.tag; + uid_t stat_uid = get_uid_from_tag(tag); + + if (!can_read_other_uid_stats(stat_uid)) { + CT_DEBUG("qtaguid: stats line: " + "%s 0x%llx %u: insufficient priv " + "from pid=%u tgid=%u uid=%u\n", + ppi->iface_entry->ifname, + get_atag_from_tag(tag), stat_uid, + current->pid, current->tgid, current_fsuid()); + return 0; + } + if (ppi->item_index++ < ppi->items_to_skip) + return 0; + cnts = &ppi->ts_entry->counters; + len = snprintf( + ppi->outp, ppi->char_count, + "%d %s 0x%llx %u %u " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu\n", + ppi->item_index, + ppi->iface_entry->ifname, + get_atag_from_tag(tag), + stat_uid, + cnt_set, + dc_sum_bytes(cnts, cnt_set, IFS_RX), + dc_sum_packets(cnts, cnt_set, IFS_RX), + dc_sum_bytes(cnts, cnt_set, IFS_TX), + dc_sum_packets(cnts, cnt_set, IFS_TX), + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); + } + return len; +} + +static bool pp_sets(struct proc_print_info *ppi) +{ + int len; + int counter_set; + for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS; + counter_set++) { + len = pp_stats_line(ppi, counter_set); + if (len >= ppi->char_count) { + *ppi->outp = '\0'; + return false; + } + if (len) { + ppi->outp += len; + ppi->char_count -= len; + (*ppi->num_items_returned)++; + } + } + return true; +} + +/* + * Procfs reader to get all tag stats using style "1)" as described in + * fs/proc/generic.c + * Groups all protocols tx/rx bytes. + */ +static int qtaguid_stats_proc_read(char *page, char **num_items_returned, + off_t items_to_skip, int char_count, int *eof, + void *data) +{ + struct proc_print_info ppi; + int len; + + ppi.outp = page; + ppi.item_index = 0; + ppi.char_count = char_count; + ppi.num_items_returned = num_items_returned; + ppi.items_to_skip = items_to_skip; + + if (unlikely(module_passive)) { + len = pp_stats_line(&ppi, 0); + /* The header should always be shorter than the buffer. */ + BUG_ON(len >= ppi.char_count); + (*num_items_returned)++; + *eof = 1; + return len; + } + + CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld " + "char_count=%d *eof=%d\n", page, *num_items_returned, + items_to_skip, char_count, *eof); + + if (*eof) + return 0; + + /* The idx is there to help debug when things go belly up. */ + len = pp_stats_line(&ppi, 0); + /* Don't advance the outp unless the whole line was printed */ + if (len >= ppi.char_count) { + *ppi.outp = '\0'; + return ppi.outp - page; + } + if (len) { + ppi.outp += len; + ppi.char_count -= len; + (*num_items_returned)++; + } + + spin_lock_bh(&iface_stat_list_lock); + list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) { + struct rb_node *node; + spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock); + for (node = rb_first(&ppi.iface_entry->tag_stat_tree); + node; + node = rb_next(node)) { + ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node); + if (!pp_sets(&ppi)) { + spin_unlock_bh( + &ppi.iface_entry->tag_stat_list_lock); + spin_unlock_bh(&iface_stat_list_lock); + return ppi.outp - page; + } + } + spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock); + } + spin_unlock_bh(&iface_stat_list_lock); + + *eof = 1; + return ppi.outp - page; +} + +/*------------------------------------------*/ +static int qtudev_open(struct inode *inode, struct file *file) +{ + struct uid_tag_data *utd_entry; + struct proc_qtu_data *pqd_entry; + struct proc_qtu_data *new_pqd_entry; + int res; + bool utd_entry_found; + + if (unlikely(qtu_proc_handling_passive)) + return 0; + + DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n", + current->pid, current->tgid, current_fsuid()); + + spin_lock_bh(&uid_tag_data_tree_lock); + + /* Look for existing uid data, or alloc one. */ + utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); + if (IS_ERR_OR_NULL(utd_entry)) { + res = PTR_ERR(utd_entry); + goto err; + } + + /* Look for existing PID based proc_data */ + pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree, + current->tgid); + if (pqd_entry) { + pr_err("qtaguid: qtudev_open(): %u/%u %u " + "%s already opened\n", + current->pid, current->tgid, current_fsuid(), + QTU_DEV_NAME); + res = -EBUSY; + goto err_unlock_free_utd; + } + + new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC); + if (!new_pqd_entry) { + pr_err("qtaguid: qtudev_open(): %u/%u %u: " + "proc data alloc failed\n", + current->pid, current->tgid, current_fsuid()); + res = -ENOMEM; + goto err_unlock_free_utd; + } + new_pqd_entry->pid = current->tgid; + INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list); + new_pqd_entry->parent_tag_data = utd_entry; + utd_entry->num_pqd++; + + proc_qtu_data_tree_insert(new_pqd_entry, + &proc_qtu_data_tree); + + spin_unlock_bh(&uid_tag_data_tree_lock); + DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n", + current_fsuid(), new_pqd_entry); + file->private_data = new_pqd_entry; + return 0; + +err_unlock_free_utd: + if (!utd_entry_found) { + rb_erase(&utd_entry->node, &uid_tag_data_tree); + kfree(utd_entry); + } + spin_unlock_bh(&uid_tag_data_tree_lock); +err: + return res; +} + +static int qtudev_release(struct inode *inode, struct file *file) +{ + struct proc_qtu_data *pqd_entry = file->private_data; + struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data; + struct sock_tag *st_entry; + struct rb_root st_to_free_tree = RB_ROOT; + struct list_head *entry, *next; + struct tag_ref *tr; + + if (unlikely(qtu_proc_handling_passive)) + return 0; + + /* + * Do not trust the current->pid, it might just be a kworker cleaning + * up after a dead proc. + */ + DR_DEBUG("qtaguid: qtudev_release(): " + "pid=%u tgid=%u uid=%u " + "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n", + current->pid, current->tgid, pqd_entry->parent_tag_data->uid, + pqd_entry, pqd_entry->pid, utd_entry, + utd_entry->num_active_tags); + + spin_lock_bh(&sock_tag_list_lock); + spin_lock_bh(&uid_tag_data_tree_lock); + + list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) { + st_entry = list_entry(entry, struct sock_tag, list); + DR_DEBUG("qtaguid: %s(): " + "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n", + __func__, + st_entry, st_entry->sk, + current->pid, current->tgid, + pqd_entry->parent_tag_data->uid); + + utd_entry = uid_tag_data_tree_search( + &uid_tag_data_tree, + get_uid_from_tag(st_entry->tag)); + BUG_ON(IS_ERR_OR_NULL(utd_entry)); + DR_DEBUG("qtaguid: %s(): " + "looking for tag=0x%llx in utd_entry=%p\n", __func__, + st_entry->tag, utd_entry); + tr = tag_ref_tree_search(&utd_entry->tag_ref_tree, + st_entry->tag); + BUG_ON(!tr); + BUG_ON(tr->num_sock_tags <= 0); + tr->num_sock_tags--; + free_tag_ref_from_utd_entry(tr, utd_entry); + + rb_erase(&st_entry->sock_node, &sock_tag_tree); + list_del(&st_entry->list); + /* Can't sockfd_put() within spinlock, do it later. */ + sock_tag_tree_insert(st_entry, &st_to_free_tree); + + /* + * Try to free the utd_entry if no other proc_qtu_data is + * using it (num_pqd is 0) and it doesn't have active tags + * (num_active_tags is 0). + */ + put_utd_entry(utd_entry); + } + + rb_erase(&pqd_entry->node, &proc_qtu_data_tree); + BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1); + pqd_entry->parent_tag_data->num_pqd--; + put_utd_entry(pqd_entry->parent_tag_data); + kfree(pqd_entry); + file->private_data = NULL; + + spin_unlock_bh(&uid_tag_data_tree_lock); + spin_unlock_bh(&sock_tag_list_lock); + + + sock_tag_tree_erase(&st_to_free_tree); + + prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__, + current->pid, current->tgid); + return 0; +} + +/*------------------------------------------*/ +static const struct file_operations qtudev_fops = { + .owner = THIS_MODULE, + .open = qtudev_open, + .release = qtudev_release, +}; + +static struct miscdevice qtu_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = QTU_DEV_NAME, + .fops = &qtudev_fops, + /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */ +}; + +/*------------------------------------------*/ +static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir) +{ + int ret; + *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net); + if (!*res_procdir) { + pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n"); + ret = -ENOMEM; + goto no_dir; + } + + xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms, + *res_procdir); + if (!xt_qtaguid_ctrl_file) { + pr_err("qtaguid: failed to create xt_qtaguid/ctrl " + " file\n"); + ret = -ENOMEM; + goto no_ctrl_entry; + } + xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read; + xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write; + + xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms, + *res_procdir); + if (!xt_qtaguid_stats_file) { + pr_err("qtaguid: failed to create xt_qtaguid/stats " + "file\n"); + ret = -ENOMEM; + goto no_stats_entry; + } + xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read; + /* + * TODO: add support counter hacking + * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write; + */ + return 0; + +no_stats_entry: + remove_proc_entry("ctrl", *res_procdir); +no_ctrl_entry: + remove_proc_entry("xt_qtaguid", NULL); +no_dir: + return ret; +} + +static struct xt_match qtaguid_mt_reg __read_mostly = { + /* + * This module masquerades as the "owner" module so that iptables + * tools can deal with it. + */ + .name = "owner", + .revision = 1, + .family = NFPROTO_UNSPEC, + .match = qtaguid_mt, + .matchsize = sizeof(struct xt_qtaguid_match_info), + .me = THIS_MODULE, +}; + +static int __init qtaguid_mt_init(void) +{ + if (qtaguid_proc_register(&xt_qtaguid_procdir) + || iface_stat_init(xt_qtaguid_procdir) + || xt_register_match(&qtaguid_mt_reg) + || misc_register(&qtu_device)) + return -1; + return 0; +} + +/* + * TODO: allow unloading of the module. + * For now stats are permanent. + * Kconfig forces'y/n' and never an 'm'. + */ + +module_init(qtaguid_mt_init); +MODULE_AUTHOR("jpa <jpa@google.com>"); +MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_owner"); +MODULE_ALIAS("ip6t_owner"); +MODULE_ALIAS("ipt_qtaguid"); +MODULE_ALIAS("ip6t_qtaguid"); diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h new file mode 100644 index 000000000000..02479d6d317d --- /dev/null +++ b/net/netfilter/xt_qtaguid_internal.h @@ -0,0 +1,330 @@ +/* + * Kernel iptables module to track stats for packets based on user tags. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __XT_QTAGUID_INTERNAL_H__ +#define __XT_QTAGUID_INTERNAL_H__ + +#include <linux/types.h> +#include <linux/rbtree.h> +#include <linux/spinlock_types.h> +#include <linux/workqueue.h> + +/* Iface handling */ +#define IDEBUG_MASK (1<<0) +/* Iptable Matching. Per packet. */ +#define MDEBUG_MASK (1<<1) +/* Red-black tree handling. Per packet. */ +#define RDEBUG_MASK (1<<2) +/* procfs ctrl/stats handling */ +#define CDEBUG_MASK (1<<3) +/* dev and resource tracking */ +#define DDEBUG_MASK (1<<4) + +/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */ +#define DEFAULT_DEBUG_MASK 0 + +/* + * (Un)Define these *DEBUG to compile out/in the pr_debug calls. + * All undef: text size ~ 0x3030; all def: ~ 0x4404. + */ +#define IDEBUG +#define MDEBUG +#define RDEBUG +#define CDEBUG +#define DDEBUG + +#define MSK_DEBUG(mask, ...) do { \ + if (unlikely(qtaguid_debug_mask & (mask))) \ + pr_debug(__VA_ARGS__); \ + } while (0) +#ifdef IDEBUG +#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__) +#else +#define IF_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef MDEBUG +#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__) +#else +#define MT_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef RDEBUG +#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__) +#else +#define RB_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef CDEBUG +#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__) +#else +#define CT_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef DDEBUG +#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__) +#else +#define DR_DEBUG(...) no_printk(__VA_ARGS__) +#endif + +extern uint qtaguid_debug_mask; + +/*---------------------------------------------------------------------------*/ +/* + * Tags: + * + * They represent what the data usage counters will be tracked against. + * By default a tag is just based on the UID. + * The UID is used as the base for policing, and can not be ignored. + * So a tag will always at least represent a UID (uid_tag). + * + * A tag can be augmented with an "accounting tag" which is associated + * with a UID. + * User space can set the acct_tag portion of the tag which is then used + * with sockets: all data belonging to that socket will be counted against the + * tag. The policing is then based on the tag's uid_tag portion, + * and stats are collected for the acct_tag portion separately. + * + * There could be + * a: {acct_tag=1, uid_tag=10003} + * b: {acct_tag=2, uid_tag=10003} + * c: {acct_tag=3, uid_tag=10003} + * d: {acct_tag=0, uid_tag=10003} + * a, b, and c represent tags associated with specific sockets. + * d is for the totals for that uid, including all untagged traffic. + * Typically d is used with policing/quota rules. + * + * We want tag_t big enough to distinguish uid_t and acct_tag. + * It might become a struct if needed. + * Nothing should be using it as an int. + */ +typedef uint64_t tag_t; /* Only used via accessors */ + +#define TAG_UID_MASK 0xFFFFFFFFULL +#define TAG_ACCT_MASK (~0xFFFFFFFFULL) + +static inline int tag_compare(tag_t t1, tag_t t2) +{ + return t1 < t2 ? -1 : t1 == t2 ? 0 : 1; +} + +static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid) +{ + return acct_tag | uid; +} +static inline tag_t make_tag_from_uid(uid_t uid) +{ + return uid; +} +static inline uid_t get_uid_from_tag(tag_t tag) +{ + return tag & TAG_UID_MASK; +} +static inline tag_t get_utag_from_tag(tag_t tag) +{ + return tag & TAG_UID_MASK; +} +static inline tag_t get_atag_from_tag(tag_t tag) +{ + return tag & TAG_ACCT_MASK; +} + +static inline bool valid_atag(tag_t tag) +{ + return !(tag & TAG_UID_MASK); +} +static inline tag_t make_atag_from_value(uint32_t value) +{ + return (uint64_t)value << 32; +} +/*---------------------------------------------------------------------------*/ + +/* + * Maximum number of socket tags that a UID is allowed to have active. + * Multiple processes belonging to the same UID contribute towards this limit. + * Special UIDs that can impersonate a UID also contribute (e.g. download + * manager, ...) + */ +#define DEFAULT_MAX_SOCK_TAGS 1024 + +/* + * For now we only track 2 sets of counters. + * The default set is 0. + * Userspace can activate another set for a given uid being tracked. + */ +#define IFS_MAX_COUNTER_SETS 2 + +enum ifs_tx_rx { + IFS_TX, + IFS_RX, + IFS_MAX_DIRECTIONS +}; + +/* For now, TCP, UDP, the rest */ +enum ifs_proto { + IFS_TCP, + IFS_UDP, + IFS_PROTO_OTHER, + IFS_MAX_PROTOS +}; + +struct byte_packet_counters { + uint64_t bytes; + uint64_t packets; +}; + +struct data_counters { + struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; +}; + +/* Generic X based nodes used as a base for rb_tree ops */ +struct tag_node { + struct rb_node node; + tag_t tag; +}; + +struct tag_stat { + struct tag_node tn; + struct data_counters counters; + /* + * If this tag is acct_tag based, we need to count against the + * matching parent uid_tag. + */ + struct data_counters *parent_counters; +}; + +struct iface_stat { + struct list_head list; /* in iface_stat_list */ + char *ifname; + bool active; + /* net_dev is only valid for active iface_stat */ + struct net_device *net_dev; + + struct byte_packet_counters totals[IFS_MAX_DIRECTIONS]; + /* + * We keep the last_known, because some devices reset their counters + * just before NETDEV_UP, while some will reset just before + * NETDEV_REGISTER (which is more normal). + * So now, if the device didn't do a NETDEV_UNREGISTER and we see + * its current dev stats smaller that what was previously known, we + * assume an UNREGISTER and just use the last_known. + */ + struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS]; + /* last_known is usable when last_known_valid is true */ + bool last_known_valid; + + struct proc_dir_entry *proc_ptr; + + struct rb_root tag_stat_tree; + spinlock_t tag_stat_list_lock; +}; + +/* This is needed to create proc_dir_entries from atomic context. */ +struct iface_stat_work { + struct work_struct iface_work; + struct iface_stat *iface_entry; +}; + +/* + * Track tag that this socket is transferring data for, and not necessarily + * the uid that owns the socket. + * This is the tag against which tag_stat.counters will be billed. + * These structs need to be looked up by sock and pid. + */ +struct sock_tag { + struct rb_node sock_node; + struct sock *sk; /* Only used as a number, never dereferenced */ + /* The socket is needed for sockfd_put() */ + struct socket *socket; + /* Used to associate with a given pid */ + struct list_head list; /* in proc_qtu_data.sock_tag_list */ + pid_t pid; + + tag_t tag; +}; + +struct qtaguid_event_counts { + /* Various successful events */ + atomic64_t sockets_tagged; + atomic64_t sockets_untagged; + atomic64_t counter_set_changes; + atomic64_t delete_cmds; + atomic64_t iface_events; /* Number of NETDEV_* events handled */ + + atomic64_t match_calls; /* Number of times iptables called mt */ + /* + * match_found_sk_*: numbers related to the netfilter matching + * function finding a sock for the sk_buff. + * Total skbs processed is sum(match_found*). + */ + atomic64_t match_found_sk; /* An sk was already in the sk_buff. */ + /* The connection tracker had or didn't have the sk. */ + atomic64_t match_found_sk_in_ct; + atomic64_t match_found_no_sk_in_ct; + /* + * No sk could be found. No apparent owner. Could happen with + * unsolicited traffic. + */ + atomic64_t match_no_sk; + /* + * The file ptr in the sk_socket wasn't there. + * This might happen for traffic while the socket is being closed. + */ + atomic64_t match_no_sk_file; +}; + +/* Track the set active_set for the given tag. */ +struct tag_counter_set { + struct tag_node tn; + int active_set; +}; + +/*----------------------------------------------*/ +/* + * The qtu uid data is used to track resources that are created directly or + * indirectly by processes (uid tracked). + * It is shared by the processes with the same uid. + * Some of the resource will be counted to prevent further rogue allocations, + * some will need freeing once the owner process (uid) exits. + */ +struct uid_tag_data { + struct rb_node node; + uid_t uid; + + /* + * For the uid, how many accounting tags have been set. + */ + int num_active_tags; + /* Track the number of proc_qtu_data that reference it */ + int num_pqd; + struct rb_root tag_ref_tree; + /* No tag_node_tree_lock; use uid_tag_data_tree_lock */ +}; + +struct tag_ref { + struct tag_node tn; + + /* + * This tracks the number of active sockets that have a tag on them + * which matches this tag_ref.tn.tag. + * A tag ref can live on after the sockets are untagged. + * A tag ref can only be removed during a tag delete command. + */ + int num_sock_tags; +}; + +struct proc_qtu_data { + struct rb_node node; + pid_t pid; + + struct uid_tag_data *parent_tag_data; + + /* Tracks the sock_tags that need freeing upon this proc's death */ + struct list_head sock_tag_list; + /* No spinlock_t sock_tag_list_lock; use the global one. */ +}; + +/*----------------------------------------------*/ +#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */ diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c new file mode 100644 index 000000000000..39176785c91f --- /dev/null +++ b/net/netfilter/xt_qtaguid_print.c @@ -0,0 +1,556 @@ +/* + * Pretty printing Support for iptables xt_qtaguid module. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Most of the functions in this file just waste time if DEBUG is not defined. + * The matching xt_qtaguid_print.h will static inline empty funcs if the needed + * debug flags ore not defined. + * Those funcs that fail to allocate memory will panic as there is no need to + * hobble allong just pretending to do the requested work. + */ + +#define DEBUG + +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/net.h> +#include <linux/rbtree.h> +#include <linux/slab.h> +#include <linux/spinlock_types.h> + + +#include "xt_qtaguid_internal.h" +#include "xt_qtaguid_print.h" + +#ifdef DDEBUG + +static void _bug_on_err_or_null(void *ptr) +{ + if (IS_ERR_OR_NULL(ptr)) { + pr_err("qtaguid: kmalloc failed\n"); + BUG(); + } +} + +char *pp_tag_t(tag_t *tag) +{ + char *res; + + if (!tag) + res = kasprintf(GFP_ATOMIC, "tag_t@null{}"); + else + res = kasprintf(GFP_ATOMIC, + "tag_t@%p{tag=0x%llx, uid=%u}", + tag, *tag, get_uid_from_tag(*tag)); + _bug_on_err_or_null(res); + return res; +} + +char *pp_data_counters(struct data_counters *dc, bool showValues) +{ + char *res; + + if (!dc) + res = kasprintf(GFP_ATOMIC, "data_counters@null{}"); + else if (showValues) + res = kasprintf( + GFP_ATOMIC, "data_counters@%p{" + "set0{" + "rx{" + "tcp{b=%llu, p=%llu}, " + "udp{b=%llu, p=%llu}," + "other{b=%llu, p=%llu}}, " + "tx{" + "tcp{b=%llu, p=%llu}, " + "udp{b=%llu, p=%llu}," + "other{b=%llu, p=%llu}}}, " + "set1{" + "rx{" + "tcp{b=%llu, p=%llu}, " + "udp{b=%llu, p=%llu}," + "other{b=%llu, p=%llu}}, " + "tx{" + "tcp{b=%llu, p=%llu}, " + "udp{b=%llu, p=%llu}," + "other{b=%llu, p=%llu}}}}", + dc, + dc->bpc[0][IFS_RX][IFS_TCP].bytes, + dc->bpc[0][IFS_RX][IFS_TCP].packets, + dc->bpc[0][IFS_RX][IFS_UDP].bytes, + dc->bpc[0][IFS_RX][IFS_UDP].packets, + dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes, + dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets, + dc->bpc[0][IFS_TX][IFS_TCP].bytes, + dc->bpc[0][IFS_TX][IFS_TCP].packets, + dc->bpc[0][IFS_TX][IFS_UDP].bytes, + dc->bpc[0][IFS_TX][IFS_UDP].packets, + dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes, + dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets, + dc->bpc[1][IFS_RX][IFS_TCP].bytes, + dc->bpc[1][IFS_RX][IFS_TCP].packets, + dc->bpc[1][IFS_RX][IFS_UDP].bytes, + dc->bpc[1][IFS_RX][IFS_UDP].packets, + dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes, + dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets, + dc->bpc[1][IFS_TX][IFS_TCP].bytes, + dc->bpc[1][IFS_TX][IFS_TCP].packets, + dc->bpc[1][IFS_TX][IFS_UDP].bytes, + dc->bpc[1][IFS_TX][IFS_UDP].packets, + dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes, + dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets); + else + res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc); + _bug_on_err_or_null(res); + return res; +} + +char *pp_tag_node(struct tag_node *tn) +{ + char *tag_str; + char *res; + + if (!tn) { + res = kasprintf(GFP_ATOMIC, "tag_node@null{}"); + _bug_on_err_or_null(res); + return res; + } + tag_str = pp_tag_t(&tn->tag); + res = kasprintf(GFP_ATOMIC, + "tag_node@%p{tag=%s}", + tn, tag_str); + _bug_on_err_or_null(res); + kfree(tag_str); + return res; +} + +char *pp_tag_ref(struct tag_ref *tr) +{ + char *tn_str; + char *res; + + if (!tr) { + res = kasprintf(GFP_ATOMIC, "tag_ref@null{}"); + _bug_on_err_or_null(res); + return res; + } + tn_str = pp_tag_node(&tr->tn); + res = kasprintf(GFP_ATOMIC, + "tag_ref@%p{%s, num_sock_tags=%d}", + tr, tn_str, tr->num_sock_tags); + _bug_on_err_or_null(res); + kfree(tn_str); + return res; +} + +char *pp_tag_stat(struct tag_stat *ts) +{ + char *tn_str; + char *counters_str; + char *parent_counters_str; + char *res; + + if (!ts) { + res = kasprintf(GFP_ATOMIC, "tag_stat@null{}"); + _bug_on_err_or_null(res); + return res; + } + tn_str = pp_tag_node(&ts->tn); + counters_str = pp_data_counters(&ts->counters, true); + parent_counters_str = pp_data_counters(ts->parent_counters, false); + res = kasprintf(GFP_ATOMIC, + "tag_stat@%p{%s, counters=%s, parent_counters=%s}", + ts, tn_str, counters_str, parent_counters_str); + _bug_on_err_or_null(res); + kfree(tn_str); + kfree(counters_str); + kfree(parent_counters_str); + return res; +} + +char *pp_iface_stat(struct iface_stat *is) +{ + char *res; + if (!is) + res = kasprintf(GFP_ATOMIC, "iface_stat@null{}"); + else + res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" + "list=list_head{...}, " + "ifname=%s, " + "total={rx={bytes=%llu, " + "packets=%llu}, " + "tx={bytes=%llu, " + "packets=%llu}}, " + "last_known_valid=%d, " + "last_known={rx={bytes=%llu, " + "packets=%llu}, " + "tx={bytes=%llu, " + "packets=%llu}}, " + "active=%d, " + "net_dev=%p, " + "proc_ptr=%p, " + "tag_stat_tree=rb_root{...}}", + is, + is->ifname, + is->totals[IFS_RX].bytes, + is->totals[IFS_RX].packets, + is->totals[IFS_TX].bytes, + is->totals[IFS_TX].packets, + is->last_known_valid, + is->last_known[IFS_RX].bytes, + is->last_known[IFS_RX].packets, + is->last_known[IFS_TX].bytes, + is->last_known[IFS_TX].packets, + is->active, + is->net_dev, + is->proc_ptr); + _bug_on_err_or_null(res); + return res; +} + +char *pp_sock_tag(struct sock_tag *st) +{ + char *tag_str; + char *res; + + if (!st) { + res = kasprintf(GFP_ATOMIC, "sock_tag@null{}"); + _bug_on_err_or_null(res); + return res; + } + tag_str = pp_tag_t(&st->tag); + res = kasprintf(GFP_ATOMIC, "sock_tag@%p{" + "sock_node=rb_node{...}, " + "sk=%p socket=%p (f_count=%lu), list=list_head{...}, " + "pid=%u, tag=%s}", + st, st->sk, st->socket, atomic_long_read( + &st->socket->file->f_count), + st->pid, tag_str); + _bug_on_err_or_null(res); + kfree(tag_str); + return res; +} + +char *pp_uid_tag_data(struct uid_tag_data *utd) +{ + char *res; + + if (!utd) + res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}"); + else + res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{" + "uid=%u, num_active_acct_tags=%d, " + "num_pqd=%d, " + "tag_node_tree=rb_root{...}, " + "proc_qtu_data_tree=rb_root{...}}", + utd, utd->uid, + utd->num_active_tags, utd->num_pqd); + _bug_on_err_or_null(res); + return res; +} + +char *pp_proc_qtu_data(struct proc_qtu_data *pqd) +{ + char *parent_tag_data_str; + char *res; + + if (!pqd) { + res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}"); + _bug_on_err_or_null(res); + return res; + } + parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data); + res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{" + "node=rb_node{...}, pid=%u, " + "parent_tag_data=%s, " + "sock_tag_list=list_head{...}}", + pqd, pqd->pid, parent_tag_data_str + ); + _bug_on_err_or_null(res); + kfree(parent_tag_data_str); + return res; +} + +/*------------------------------------------*/ +void prdebug_sock_tag_tree(int indent_level, + struct rb_root *sock_tag_tree) +{ + struct rb_node *node; + struct sock_tag *sock_tag_entry; + char *str; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(sock_tag_tree)) { + str = "sock_tag_tree=rb_root{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "sock_tag_tree=rb_root{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(sock_tag_tree); + node; + node = rb_next(node)) { + sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); + str = pp_sock_tag(sock_tag_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); + kfree(str); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_sock_tag_list(int indent_level, + struct list_head *sock_tag_list) +{ + struct sock_tag *sock_tag_entry; + char *str; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (list_empty(sock_tag_list)) { + str = "sock_tag_list=list_head{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "sock_tag_list=list_head{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + list_for_each_entry(sock_tag_entry, sock_tag_list, list) { + str = pp_sock_tag(sock_tag_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); + kfree(str); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_proc_qtu_data_tree(int indent_level, + struct rb_root *proc_qtu_data_tree) +{ + char *str; + struct rb_node *node; + struct proc_qtu_data *proc_qtu_data_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(proc_qtu_data_tree)) { + str = "proc_qtu_data_tree=rb_root{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "proc_qtu_data_tree=rb_root{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(proc_qtu_data_tree); + node; + node = rb_next(node)) { + proc_qtu_data_entry = rb_entry(node, + struct proc_qtu_data, + node); + str = pp_proc_qtu_data(proc_qtu_data_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, + str); + kfree(str); + indent_level++; + prdebug_sock_tag_list(indent_level, + &proc_qtu_data_entry->sock_tag_list); + indent_level--; + + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) +{ + char *str; + struct rb_node *node; + struct tag_ref *tag_ref_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(tag_ref_tree)) { + str = "tag_ref_tree{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "tag_ref_tree{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(tag_ref_tree); + node; + node = rb_next(node)) { + tag_ref_entry = rb_entry(node, + struct tag_ref, + tn.node); + str = pp_tag_ref(tag_ref_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, + str); + kfree(str); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_uid_tag_data_tree(int indent_level, + struct rb_root *uid_tag_data_tree) +{ + char *str; + struct rb_node *node; + struct uid_tag_data *uid_tag_data_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(uid_tag_data_tree)) { + str = "uid_tag_data_tree=rb_root{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "uid_tag_data_tree=rb_root{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(uid_tag_data_tree); + node; + node = rb_next(node)) { + uid_tag_data_entry = rb_entry(node, struct uid_tag_data, + node); + str = pp_uid_tag_data(uid_tag_data_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); + kfree(str); + if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) { + indent_level++; + prdebug_tag_ref_tree(indent_level, + &uid_tag_data_entry->tag_ref_tree); + indent_level--; + } + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_tag_stat_tree(int indent_level, + struct rb_root *tag_stat_tree) +{ + char *str; + struct rb_node *node; + struct tag_stat *ts_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(tag_stat_tree)) { + str = "tag_stat_tree{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "tag_stat_tree{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(tag_stat_tree); + node; + node = rb_next(node)) { + ts_entry = rb_entry(node, struct tag_stat, tn.node); + str = pp_tag_stat(ts_entry); + pr_debug("%*d: %s\n", indent_level*2, indent_level, + str); + kfree(str); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_iface_stat_list(int indent_level, + struct list_head *iface_stat_list) +{ + char *str; + struct iface_stat *iface_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (list_empty(iface_stat_list)) { + str = "iface_stat_list=list_head{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "iface_stat_list=list_head{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + list_for_each_entry(iface_entry, iface_stat_list, list) { + str = pp_iface_stat(iface_entry); + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + kfree(str); + + spin_lock_bh(&iface_entry->tag_stat_list_lock); + if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) { + indent_level++; + prdebug_tag_stat_tree(indent_level, + &iface_entry->tag_stat_tree); + indent_level--; + } + spin_unlock_bh(&iface_entry->tag_stat_list_lock); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +#endif /* ifdef DDEBUG */ +/*------------------------------------------*/ +static const char * const netdev_event_strings[] = { + "netdev_unknown", + "NETDEV_UP", + "NETDEV_DOWN", + "NETDEV_REBOOT", + "NETDEV_CHANGE", + "NETDEV_REGISTER", + "NETDEV_UNREGISTER", + "NETDEV_CHANGEMTU", + "NETDEV_CHANGEADDR", + "NETDEV_GOING_DOWN", + "NETDEV_CHANGENAME", + "NETDEV_FEAT_CHANGE", + "NETDEV_BONDING_FAILOVER", + "NETDEV_PRE_UP", + "NETDEV_PRE_TYPE_CHANGE", + "NETDEV_POST_TYPE_CHANGE", + "NETDEV_POST_INIT", + "NETDEV_UNREGISTER_BATCH", + "NETDEV_RELEASE", + "NETDEV_NOTIFY_PEERS", + "NETDEV_JOIN", +}; + +const char *netdev_evt_str(int netdev_event) +{ + if (netdev_event < 0 + || netdev_event >= ARRAY_SIZE(netdev_event_strings)) + return "bad event num"; + return netdev_event_strings[netdev_event]; +} diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h new file mode 100644 index 000000000000..b63871a0be5a --- /dev/null +++ b/net/netfilter/xt_qtaguid_print.h @@ -0,0 +1,120 @@ +/* + * Pretty printing Support for iptables xt_qtaguid module. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __XT_QTAGUID_PRINT_H__ +#define __XT_QTAGUID_PRINT_H__ + +#include "xt_qtaguid_internal.h" + +#ifdef DDEBUG + +char *pp_tag_t(tag_t *tag); +char *pp_data_counters(struct data_counters *dc, bool showValues); +char *pp_tag_node(struct tag_node *tn); +char *pp_tag_ref(struct tag_ref *tr); +char *pp_tag_stat(struct tag_stat *ts); +char *pp_iface_stat(struct iface_stat *is); +char *pp_sock_tag(struct sock_tag *st); +char *pp_uid_tag_data(struct uid_tag_data *qtd); +char *pp_proc_qtu_data(struct proc_qtu_data *pqd); + +/*------------------------------------------*/ +void prdebug_sock_tag_list(int indent_level, + struct list_head *sock_tag_list); +void prdebug_sock_tag_tree(int indent_level, + struct rb_root *sock_tag_tree); +void prdebug_proc_qtu_data_tree(int indent_level, + struct rb_root *proc_qtu_data_tree); +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree); +void prdebug_uid_tag_data_tree(int indent_level, + struct rb_root *uid_tag_data_tree); +void prdebug_tag_stat_tree(int indent_level, + struct rb_root *tag_stat_tree); +void prdebug_iface_stat_list(int indent_level, + struct list_head *iface_stat_list); + +#else + +/*------------------------------------------*/ +static inline char *pp_tag_t(tag_t *tag) +{ + return NULL; +} +static inline char *pp_data_counters(struct data_counters *dc, bool showValues) +{ + return NULL; +} +static inline char *pp_tag_node(struct tag_node *tn) +{ + return NULL; +} +static inline char *pp_tag_ref(struct tag_ref *tr) +{ + return NULL; +} +static inline char *pp_tag_stat(struct tag_stat *ts) +{ + return NULL; +} +static inline char *pp_iface_stat(struct iface_stat *is) +{ + return NULL; +} +static inline char *pp_sock_tag(struct sock_tag *st) +{ + return NULL; +} +static inline char *pp_uid_tag_data(struct uid_tag_data *qtd) +{ + return NULL; +} +static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd) +{ + return NULL; +} + +/*------------------------------------------*/ +static inline +void prdebug_sock_tag_list(int indent_level, + struct list_head *sock_tag_list) +{ +} +static inline +void prdebug_sock_tag_tree(int indent_level, + struct rb_root *sock_tag_tree) +{ +} +static inline +void prdebug_proc_qtu_data_tree(int indent_level, + struct rb_root *proc_qtu_data_tree) +{ +} +static inline +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) +{ +} +static inline +void prdebug_uid_tag_data_tree(int indent_level, + struct rb_root *uid_tag_data_tree) +{ +} +static inline +void prdebug_tag_stat_tree(int indent_level, + struct rb_root *tag_stat_tree) +{ +} +static inline +void prdebug_iface_stat_list(int indent_level, + struct list_head *iface_stat_list) +{ +} +#endif +/*------------------------------------------*/ +const char *netdev_evt_str(int netdev_event); +#endif /* ifndef __XT_QTAGUID_PRINT_H__ */ diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c new file mode 100644 index 000000000000..fb2ef46b2b84 --- /dev/null +++ b/net/netfilter/xt_quota2.c @@ -0,0 +1,382 @@ +/* + * xt_quota2 - enhanced xt_quota that can count upwards and in packets + * as a minimal accounting match. + * by Jan Engelhardt <jengelh@medozas.de>, 2008 + * + * Originally based on xt_quota.c: + * netfilter module to enforce network quotas + * Sam Johnston <samj@samj.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License; either + * version 2 of the License, as published by the Free Software Foundation. + */ +#include <linux/list.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <asm/atomic.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_quota2.h> +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +#include <linux/netfilter_ipv4/ipt_ULOG.h> +#endif + +/** + * @lock: lock to protect quota writers from each other + */ +struct xt_quota_counter { + u_int64_t quota; + spinlock_t lock; + struct list_head list; + atomic_t ref; + char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)]; + struct proc_dir_entry *procfs_entry; +}; + +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +/* Harald's favorite number +1 :D From ipt_ULOG.C */ +static int qlog_nl_event = 112; +module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(event_num, + "Event number for NETLINK_NFLOG message. 0 disables log." + "111 is what ipt_ULOG uses."); +static struct sock *nflognl; +#endif + +static LIST_HEAD(counter_list); +static DEFINE_SPINLOCK(counter_list_lock); + +static struct proc_dir_entry *proc_xt_quota; +static unsigned int quota_list_perms = S_IRUGO | S_IWUSR; +static unsigned int quota_list_uid = 0; +static unsigned int quota_list_gid = 0; +module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR); +module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR); +module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR); + + +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +static void quota2_log(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ + ulog_packet_msg_t *pm; + struct sk_buff *log_skb; + size_t size; + struct nlmsghdr *nlh; + + if (!qlog_nl_event) + return; + + size = NLMSG_SPACE(sizeof(*pm)); + size = max(size, (size_t)NLMSG_GOODSIZE); + log_skb = alloc_skb(size, GFP_ATOMIC); + if (!log_skb) { + pr_err("xt_quota2: cannot alloc skb for logging\n"); + return; + } + + /* NLMSG_PUT() uses "goto nlmsg_failure" */ + nlh = NLMSG_PUT(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event, + sizeof(*pm)); + pm = NLMSG_DATA(nlh); + if (skb->tstamp.tv64 == 0) + __net_timestamp((struct sk_buff *)skb); + pm->data_len = 0; + pm->hook = hooknum; + if (prefix != NULL) + strlcpy(pm->prefix, prefix, sizeof(pm->prefix)); + else + *(pm->prefix) = '\0'; + if (in) + strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name)); + else + pm->indev_name[0] = '\0'; + + if (out) + strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); + else + pm->outdev_name[0] = '\0'; + + NETLINK_CB(log_skb).dst_group = 1; + pr_debug("throwing 1 packets to netlink group 1\n"); + netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC); + +nlmsg_failure: /* Used within NLMSG_PUT() */ + pr_debug("xt_quota2: error during NLMSG_PUT\n"); +} +#else +static void quota2_log(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ +} +#endif /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */ + +static int quota_proc_read(char *page, char **start, off_t offset, + int count, int *eof, void *data) +{ + struct xt_quota_counter *e = data; + int ret; + + spin_lock_bh(&e->lock); + ret = snprintf(page, PAGE_SIZE, "%llu\n", e->quota); + spin_unlock_bh(&e->lock); + return ret; +} + +static int quota_proc_write(struct file *file, const char __user *input, + unsigned long size, void *data) +{ + struct xt_quota_counter *e = data; + char buf[sizeof("18446744073709551616")]; + + if (size > sizeof(buf)) + size = sizeof(buf); + if (copy_from_user(buf, input, size) != 0) + return -EFAULT; + buf[sizeof(buf)-1] = '\0'; + + spin_lock_bh(&e->lock); + e->quota = simple_strtoull(buf, NULL, 0); + spin_unlock_bh(&e->lock); + return size; +} + +static struct xt_quota_counter * +q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon) +{ + struct xt_quota_counter *e; + unsigned int size; + + /* Do not need all the procfs things for anonymous counters. */ + size = anon ? offsetof(typeof(*e), list) : sizeof(*e); + e = kmalloc(size, GFP_KERNEL); + if (e == NULL) + return NULL; + + e->quota = q->quota; + spin_lock_init(&e->lock); + if (!anon) { + INIT_LIST_HEAD(&e->list); + atomic_set(&e->ref, 1); + strlcpy(e->name, q->name, sizeof(e->name)); + } + return e; +} + +/** + * q2_get_counter - get ref to counter or create new + * @name: name of counter + */ +static struct xt_quota_counter * +q2_get_counter(const struct xt_quota_mtinfo2 *q) +{ + struct proc_dir_entry *p; + struct xt_quota_counter *e = NULL; + struct xt_quota_counter *new_e; + + if (*q->name == '\0') + return q2_new_counter(q, true); + + /* No need to hold a lock while getting a new counter */ + new_e = q2_new_counter(q, false); + if (new_e == NULL) + goto out; + + spin_lock_bh(&counter_list_lock); + list_for_each_entry(e, &counter_list, list) + if (strcmp(e->name, q->name) == 0) { + atomic_inc(&e->ref); + spin_unlock_bh(&counter_list_lock); + kfree(new_e); + pr_debug("xt_quota2: old counter name=%s", e->name); + return e; + } + e = new_e; + pr_debug("xt_quota2: new_counter name=%s", e->name); + list_add_tail(&e->list, &counter_list); + /* The entry having a refcount of 1 is not directly destructible. + * This func has not yet returned the new entry, thus iptables + * has not references for destroying this entry. + * For another rule to try to destroy it, it would 1st need for this + * func* to be re-invoked, acquire a new ref for the same named quota. + * Nobody will access the e->procfs_entry either. + * So release the lock. */ + spin_unlock_bh(&counter_list_lock); + + /* create_proc_entry() is not spin_lock happy */ + p = e->procfs_entry = create_proc_entry(e->name, quota_list_perms, + proc_xt_quota); + + if (IS_ERR_OR_NULL(p)) { + spin_lock_bh(&counter_list_lock); + list_del(&e->list); + spin_unlock_bh(&counter_list_lock); + goto out; + } + p->data = e; + p->read_proc = quota_proc_read; + p->write_proc = quota_proc_write; + p->uid = quota_list_uid; + p->gid = quota_list_gid; + return e; + + out: + kfree(e); + return NULL; +} + +static int quota_mt2_check(const struct xt_mtchk_param *par) +{ + struct xt_quota_mtinfo2 *q = par->matchinfo; + + pr_debug("xt_quota2: check() flags=0x%04x", q->flags); + + if (q->flags & ~XT_QUOTA_MASK) + return -EINVAL; + + q->name[sizeof(q->name)-1] = '\0'; + if (*q->name == '.' || strchr(q->name, '/') != NULL) { + printk(KERN_ERR "xt_quota.3: illegal name\n"); + return -EINVAL; + } + + q->master = q2_get_counter(q); + if (q->master == NULL) { + printk(KERN_ERR "xt_quota.3: memory alloc failure\n"); + return -ENOMEM; + } + + return 0; +} + +static void quota_mt2_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_quota_mtinfo2 *q = par->matchinfo; + struct xt_quota_counter *e = q->master; + + if (*q->name == '\0') { + kfree(e); + return; + } + + spin_lock_bh(&counter_list_lock); + if (!atomic_dec_and_test(&e->ref)) { + spin_unlock_bh(&counter_list_lock); + return; + } + + list_del(&e->list); + remove_proc_entry(e->name, proc_xt_quota); + spin_unlock_bh(&counter_list_lock); + kfree(e); +} + +static bool +quota_mt2(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct xt_quota_mtinfo2 *q = (void *)par->matchinfo; + struct xt_quota_counter *e = q->master; + bool ret = q->flags & XT_QUOTA_INVERT; + + spin_lock_bh(&e->lock); + if (q->flags & XT_QUOTA_GROW) { + /* + * While no_change is pointless in "grow" mode, we will + * implement it here simply to have a consistent behavior. + */ + if (!(q->flags & XT_QUOTA_NO_CHANGE)) { + e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; + } + ret = true; + } else { + if (e->quota >= skb->len) { + if (!(q->flags & XT_QUOTA_NO_CHANGE)) + e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; + ret = !ret; + } else { + /* We are transitioning, log that fact. */ + if (e->quota) { + quota2_log(par->hooknum, + skb, + par->in, + par->out, + q->name); + } + /* we do not allow even small packets from now on */ + e->quota = 0; + } + } + spin_unlock_bh(&e->lock); + return ret; +} + +static struct xt_match quota_mt2_reg[] __read_mostly = { + { + .name = "quota2", + .revision = 3, + .family = NFPROTO_IPV4, + .checkentry = quota_mt2_check, + .match = quota_mt2, + .destroy = quota_mt2_destroy, + .matchsize = sizeof(struct xt_quota_mtinfo2), + .me = THIS_MODULE, + }, + { + .name = "quota2", + .revision = 3, + .family = NFPROTO_IPV6, + .checkentry = quota_mt2_check, + .match = quota_mt2, + .destroy = quota_mt2_destroy, + .matchsize = sizeof(struct xt_quota_mtinfo2), + .me = THIS_MODULE, + }, +}; + +static int __init quota_mt2_init(void) +{ + int ret; + pr_debug("xt_quota2: init()"); + +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG + nflognl = netlink_kernel_create(&init_net, + NETLINK_NFLOG, 1, NULL, + NULL, THIS_MODULE); + if (!nflognl) + return -ENOMEM; +#endif + + proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net); + if (proc_xt_quota == NULL) + return -EACCES; + + ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); + if (ret < 0) + remove_proc_entry("xt_quota", init_net.proc_net); + pr_debug("xt_quota2: init() %d", ret); + return ret; +} + +static void __exit quota_mt2_exit(void) +{ + xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); + remove_proc_entry("xt_quota", init_net.proc_net); +} + +module_init(quota_mt2_init); +module_exit(quota_mt2_exit); +MODULE_DESCRIPTION("Xtables: countdown quota match; up counter"); +MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_quota2"); +MODULE_ALIAS("ip6t_quota2"); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 72bb07f57f97..1e48fcf29203 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -35,7 +35,7 @@ #include <net/netfilter/nf_conntrack.h> #endif -static void +void xt_socket_put_sk(struct sock *sk) { if (sk->sk_state == TCP_TIME_WAIT) @@ -43,6 +43,7 @@ xt_socket_put_sk(struct sock *sk) else sock_put(sk); } +EXPORT_SYMBOL(xt_socket_put_sk); static int extract_icmp4_fields(const struct sk_buff *skb, @@ -101,9 +102,8 @@ extract_icmp4_fields(const struct sk_buff *skb, return 0; } -static bool -socket_match(const struct sk_buff *skb, struct xt_action_param *par, - const struct xt_socket_mtinfo1 *info) +struct sock* +xt_socket_get4_sk(const struct sk_buff *skb, struct xt_action_param *par) { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; @@ -120,7 +120,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp == NULL) - return false; + return NULL; protocol = iph->protocol; saddr = iph->saddr; @@ -131,9 +131,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, } else if (iph->protocol == IPPROTO_ICMP) { if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, &sport, &dport)) - return false; + return NULL; } else { - return false; + return NULL; } #ifdef XT_SOCKET_HAVE_CONNTRACK @@ -157,6 +157,23 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); + + pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n", + protocol, &saddr, ntohs(sport), + &daddr, ntohs(dport), + &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); + + return sk; +} +EXPORT_SYMBOL(xt_socket_get4_sk); + +static bool +socket_match(const struct sk_buff *skb, struct xt_action_param *par, + const struct xt_socket_mtinfo1 *info) +{ + struct sock *sk; + + sk = xt_socket_get4_sk(skb, par); if (sk != NULL) { bool wildcard; bool transparent = true; @@ -179,11 +196,6 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, sk = NULL; } - pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n", - protocol, &saddr, ntohs(sport), - &daddr, ntohs(dport), - &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); - return (sk != NULL); } @@ -255,8 +267,8 @@ extract_icmp6_fields(const struct sk_buff *skb, return 0; } -static bool -socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) +struct sock* +xt_socket_get6_sk(const struct sk_buff *skb, struct xt_action_param *par) { struct ipv6hdr *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; @@ -264,7 +276,6 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) struct in6_addr *daddr, *saddr; __be16 dport, sport; int thoff, tproto; - const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; tproto = ipv6_find_hdr(skb, &thoff, -1, NULL); if (tproto < 0) { @@ -276,7 +287,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); if (hp == NULL) - return false; + return NULL; saddr = &iph->saddr; sport = hp->source; @@ -286,13 +297,30 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) } else if (tproto == IPPROTO_ICMPV6) { if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, &sport, &dport)) - return false; + return NULL; } else { - return false; + return NULL; } sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); + pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu " + "(orig %pI6:%hu) sock %p\n", + tproto, saddr, ntohs(sport), + daddr, ntohs(dport), + &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); + return sk; +} +EXPORT_SYMBOL(xt_socket_get6_sk); + +static bool +socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct sock *sk; + const struct xt_socket_mtinfo1 *info; + + info = (struct xt_socket_mtinfo1 *) par->matchinfo; + sk = xt_socket_get6_sk(skb, par); if (sk != NULL) { bool wildcard; bool transparent = true; @@ -315,12 +343,6 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) sk = NULL; } - pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu " - "(orig %pI6:%hu) sock %p\n", - tproto, saddr, ntohs(sport), - daddr, ntohs(dport), - &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); - return (sk != NULL); } #endif diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index 78efe895b663..8e12c8a2b82b 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -10,6 +10,11 @@ menuconfig RFKILL To compile this driver as a module, choose M here: the module will be called rfkill. +config RFKILL_PM + bool "Power off on suspend" + depends on RFKILL && PM + default y + # LED trigger support config RFKILL_LEDS bool diff --git a/net/rfkill/core.c b/net/rfkill/core.c index f974961754ca..04afd89a1583 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -770,6 +770,7 @@ void rfkill_pause_polling(struct rfkill *rfkill) } EXPORT_SYMBOL(rfkill_pause_polling); +#ifdef CONFIG_RFKILL_PM void rfkill_resume_polling(struct rfkill *rfkill) { BUG_ON(!rfkill); @@ -804,14 +805,17 @@ static int rfkill_resume(struct device *dev) return 0; } +#endif static struct class rfkill_class = { .name = "rfkill", .dev_release = rfkill_release, .dev_attrs = rfkill_dev_attrs, .dev_uevent = rfkill_dev_uevent, +#ifdef CONFIG_RFKILL_PM .suspend = rfkill_suspend, .resume = rfkill_resume, +#endif }; bool rfkill_blocked(struct rfkill *rfkill) diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index ce7bd449173d..cb3b8ddebc29 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -9,7 +9,6 @@ struct cache_detail; struct sunrpc_net { struct proc_dir_entry *proc_net_rpc; struct cache_detail *ip_map_cache; - struct cache_detail *unix_gid_cache; struct cache_detail *rsc_cache; struct cache_detail *rsi_cache; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 3d6498af9adc..d87a32575134 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -40,18 +40,12 @@ static __net_init int sunrpc_init_net(struct net *net) if (err) goto err_ipmap; - err = unix_gid_cache_create(net); - if (err) - goto err_unixgid; - rpc_pipefs_init_net(net); INIT_LIST_HEAD(&sn->all_clients); spin_lock_init(&sn->rpc_client_lock); spin_lock_init(&sn->rpcb_clnt_lock); return 0; -err_unixgid: - ip_map_cache_destroy(net); err_ipmap: rpc_proc_exit(net); err_proc: @@ -60,7 +54,6 @@ err_proc: static __net_exit void sunrpc_exit_net(struct net *net) { - unix_gid_cache_destroy(net); ip_map_cache_destroy(net); rpc_proc_exit(net); } @@ -72,6 +65,8 @@ static struct pernet_operations sunrpc_net_ops = { .size = sizeof(struct sunrpc_net), }; +extern struct cache_detail unix_gid_cache; + static int __init init_sunrpc(void) { @@ -94,6 +89,7 @@ init_sunrpc(void) #ifdef RPC_DEBUG rpc_register_sysctl(); #endif + cache_register_net(&unix_gid_cache, &init_net); svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ return 0; @@ -116,6 +112,7 @@ cleanup_sunrpc(void) svc_cleanup_xprt_sock(); unregister_rpc_pipefs(); rpc_destroy_mempool(); + cache_unregister_net(&unix_gid_cache, &init_net); unregister_pernet_subsys(&sunrpc_net_ops); #ifdef RPC_DEBUG rpc_unregister_sysctl(); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 521d8f7dc833..8896a1404211 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -436,6 +436,7 @@ struct unix_gid { uid_t uid; struct group_info *gi; }; +static struct cache_head *gid_table[GID_HASHMAX]; static void unix_gid_put(struct kref *kref) { @@ -493,7 +494,8 @@ static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); } -static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid); +static struct unix_gid *unix_gid_lookup(uid_t uid); +extern struct cache_detail unix_gid_cache; static int unix_gid_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -537,19 +539,19 @@ static int unix_gid_parse(struct cache_detail *cd, GROUP_AT(ug.gi, i) = gid; } - ugp = unix_gid_lookup(cd, uid); + ugp = unix_gid_lookup(uid); if (ugp) { struct cache_head *ch; ug.h.flags = 0; ug.h.expiry_time = expiry; - ch = sunrpc_cache_update(cd, + ch = sunrpc_cache_update(&unix_gid_cache, &ug.h, &ugp->h, hash_long(uid, GID_HASHBITS)); if (!ch) err = -ENOMEM; else { err = 0; - cache_put(ch, cd); + cache_put(ch, &unix_gid_cache); } } else err = -ENOMEM; @@ -585,9 +587,10 @@ static int unix_gid_show(struct seq_file *m, return 0; } -static struct cache_detail unix_gid_cache_template = { +struct cache_detail unix_gid_cache = { .owner = THIS_MODULE, .hash_size = GID_HASHMAX, + .hash_table = gid_table, .name = "auth.unix.gid", .cache_put = unix_gid_put, .cache_upcall = unix_gid_upcall, @@ -599,42 +602,14 @@ static struct cache_detail unix_gid_cache_template = { .alloc = unix_gid_alloc, }; -int unix_gid_cache_create(struct net *net) -{ - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - struct cache_detail *cd; - int err; - - cd = cache_create_net(&unix_gid_cache_template, net); - if (IS_ERR(cd)) - return PTR_ERR(cd); - err = cache_register_net(cd, net); - if (err) { - cache_destroy_net(cd, net); - return err; - } - sn->unix_gid_cache = cd; - return 0; -} - -void unix_gid_cache_destroy(struct net *net) -{ - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - struct cache_detail *cd = sn->unix_gid_cache; - - sn->unix_gid_cache = NULL; - cache_purge(cd); - cache_unregister_net(cd, net); - cache_destroy_net(cd, net); -} - -static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid) +static struct unix_gid *unix_gid_lookup(uid_t uid) { struct unix_gid ug; struct cache_head *ch; ug.uid = uid; - ch = sunrpc_cache_lookup(cd, &ug.h, hash_long(uid, GID_HASHBITS)); + ch = sunrpc_cache_lookup(&unix_gid_cache, &ug.h, + hash_long(uid, GID_HASHBITS)); if (ch) return container_of(ch, struct unix_gid, h); else @@ -646,13 +621,11 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) struct unix_gid *ug; struct group_info *gi; int ret; - struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, - sunrpc_net_id); - ug = unix_gid_lookup(sn->unix_gid_cache, uid); + ug = unix_gid_lookup(uid); if (!ug) return ERR_PTR(-EAGAIN); - ret = cache_check(sn->unix_gid_cache, &ug->h, &rqstp->rq_chandle); + ret = cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle); switch (ret) { case -ENOENT: return ERR_PTR(-ENOENT); @@ -660,7 +633,7 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) return ERR_PTR(-ESHUTDOWN); case 0: gi = get_group_info(ug->gi); - cache_put(&ug->h, sn->unix_gid_cache); + cache_put(&ug->h, &unix_gid_cache); return gi; default: return ERR_PTR(-EAGAIN); diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 2e4444fedbe0..8db24da2e2cf 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -160,3 +160,14 @@ config LIB80211_DEBUG from lib80211. If unsure, say N. + +config CFG80211_ALLOW_RECONNECT + bool "Allow reconnect while already connected" + depends on CFG80211 + default n + help + cfg80211 stack doesn't allow to connect if you are already + connected. This option allows to make a connection in this case. + + Select this option ONLY for wlan drivers that are specifically + built for such purposes. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f432c57af05d..c5b572cdb489 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -873,7 +873,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(add_virtual_intf, NEW_INTERFACE); CMD(change_virtual_intf, SET_INTERFACE); CMD(add_key, NEW_KEY); - CMD(start_ap, START_AP); + CMD(add_beacon, NEW_BEACON); CMD(add_station, NEW_STATION); CMD(add_mpath, NEW_MPATH); CMD(update_mesh_config, SET_MESH_CONFIG); @@ -2082,10 +2082,15 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) return err; } -static int nl80211_parse_beacon(struct genl_info *info, - struct cfg80211_beacon_data *bcn) +static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info) { - bool haveinfo = false; + int (*call)(struct wiphy *wiphy, struct net_device *dev, + struct beacon_parameters *info); + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct beacon_parameters params; + int haveinfo = 0, err; if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL]) || !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]) || @@ -2093,190 +2098,149 @@ static int nl80211_parse_beacon(struct genl_info *info, !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE_ASSOC_RESP])) return -EINVAL; - memset(bcn, 0, sizeof(*bcn)); + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; - if (info->attrs[NL80211_ATTR_BEACON_HEAD]) { - bcn->head = nla_data(info->attrs[NL80211_ATTR_BEACON_HEAD]); - bcn->head_len = nla_len(info->attrs[NL80211_ATTR_BEACON_HEAD]); - if (!bcn->head_len) + memset(¶ms, 0, sizeof(params)); + + switch (info->genlhdr->cmd) { + case NL80211_CMD_NEW_BEACON: + /* these are required for NEW_BEACON */ + if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] || + !info->attrs[NL80211_ATTR_DTIM_PERIOD] || + !info->attrs[NL80211_ATTR_BEACON_HEAD]) return -EINVAL; - haveinfo = true; + + params.interval = + nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); + params.dtim_period = + nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); + + err = cfg80211_validate_beacon_int(rdev, params.interval); + if (err) + return err; + + /* + * In theory, some of these attributes could be required for + * NEW_BEACON, but since they were not used when the command was + * originally added, keep them optional for old user space + * programs to work with drivers that do not need the additional + * information. + */ + if (info->attrs[NL80211_ATTR_SSID]) { + params.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); + params.ssid_len = + nla_len(info->attrs[NL80211_ATTR_SSID]); + if (params.ssid_len == 0 || + params.ssid_len > IEEE80211_MAX_SSID_LEN) + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_HIDDEN_SSID]) { + params.hidden_ssid = nla_get_u32( + info->attrs[NL80211_ATTR_HIDDEN_SSID]); + if (params.hidden_ssid != + NL80211_HIDDEN_SSID_NOT_IN_USE && + params.hidden_ssid != + NL80211_HIDDEN_SSID_ZERO_LEN && + params.hidden_ssid != + NL80211_HIDDEN_SSID_ZERO_CONTENTS) + return -EINVAL; + } + + params.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; + + if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { + params.auth_type = nla_get_u32( + info->attrs[NL80211_ATTR_AUTH_TYPE]); + if (!nl80211_valid_auth_type(params.auth_type)) + return -EINVAL; + } else + params.auth_type = NL80211_AUTHTYPE_AUTOMATIC; + + err = nl80211_crypto_settings(rdev, info, ¶ms.crypto, + NL80211_MAX_NR_CIPHER_SUITES); + if (err) + return err; + + call = rdev->ops->add_beacon; + break; + case NL80211_CMD_SET_BEACON: + call = rdev->ops->set_beacon; + break; + default: + WARN_ON(1); + return -EOPNOTSUPP; + } + + if (!call) + return -EOPNOTSUPP; + + if (info->attrs[NL80211_ATTR_BEACON_HEAD]) { + params.head = nla_data(info->attrs[NL80211_ATTR_BEACON_HEAD]); + params.head_len = + nla_len(info->attrs[NL80211_ATTR_BEACON_HEAD]); + haveinfo = 1; } if (info->attrs[NL80211_ATTR_BEACON_TAIL]) { - bcn->tail = nla_data(info->attrs[NL80211_ATTR_BEACON_TAIL]); - bcn->tail_len = + params.tail = nla_data(info->attrs[NL80211_ATTR_BEACON_TAIL]); + params.tail_len = nla_len(info->attrs[NL80211_ATTR_BEACON_TAIL]); - haveinfo = true; + haveinfo = 1; } if (!haveinfo) return -EINVAL; if (info->attrs[NL80211_ATTR_IE]) { - bcn->beacon_ies = nla_data(info->attrs[NL80211_ATTR_IE]); - bcn->beacon_ies_len = nla_len(info->attrs[NL80211_ATTR_IE]); + params.beacon_ies = nla_data(info->attrs[NL80211_ATTR_IE]); + params.beacon_ies_len = nla_len(info->attrs[NL80211_ATTR_IE]); } if (info->attrs[NL80211_ATTR_IE_PROBE_RESP]) { - bcn->proberesp_ies = + params.proberesp_ies = nla_data(info->attrs[NL80211_ATTR_IE_PROBE_RESP]); - bcn->proberesp_ies_len = + params.proberesp_ies_len = nla_len(info->attrs[NL80211_ATTR_IE_PROBE_RESP]); } if (info->attrs[NL80211_ATTR_IE_ASSOC_RESP]) { - bcn->assocresp_ies = + params.assocresp_ies = nla_data(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]); - bcn->assocresp_ies_len = + params.assocresp_ies_len = nla_len(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]); } if (info->attrs[NL80211_ATTR_PROBE_RESP]) { - bcn->probe_resp = + params.probe_resp = nla_data(info->attrs[NL80211_ATTR_PROBE_RESP]); - bcn->probe_resp_len = + params.probe_resp_len = nla_len(info->attrs[NL80211_ATTR_PROBE_RESP]); } - return 0; -} - -static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) -{ - struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_ap_settings params; - int err; - - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) - return -EOPNOTSUPP; - - if (!rdev->ops->start_ap) - return -EOPNOTSUPP; - - if (wdev->beacon_interval) - return -EALREADY; - - memset(¶ms, 0, sizeof(params)); - - /* these are required for START_AP */ - if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] || - !info->attrs[NL80211_ATTR_DTIM_PERIOD] || - !info->attrs[NL80211_ATTR_BEACON_HEAD]) - return -EINVAL; - - err = nl80211_parse_beacon(info, ¶ms.beacon); - if (err) - return err; - - params.beacon_interval = - nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - params.dtim_period = - nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); - - err = cfg80211_validate_beacon_int(rdev, params.beacon_interval); - if (err) - return err; - - /* - * In theory, some of these attributes should be required here - * but since they were not used when the command was originally - * added, keep them optional for old user space programs to let - * them continue to work with drivers that do not need the - * additional information -- drivers must check! - */ - if (info->attrs[NL80211_ATTR_SSID]) { - params.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); - params.ssid_len = - nla_len(info->attrs[NL80211_ATTR_SSID]); - if (params.ssid_len == 0 || - params.ssid_len > IEEE80211_MAX_SSID_LEN) - return -EINVAL; - } - - if (info->attrs[NL80211_ATTR_HIDDEN_SSID]) { - params.hidden_ssid = nla_get_u32( - info->attrs[NL80211_ATTR_HIDDEN_SSID]); - if (params.hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE && - params.hidden_ssid != NL80211_HIDDEN_SSID_ZERO_LEN && - params.hidden_ssid != NL80211_HIDDEN_SSID_ZERO_CONTENTS) - return -EINVAL; - } - - params.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; - - if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { - params.auth_type = nla_get_u32( - info->attrs[NL80211_ATTR_AUTH_TYPE]); - if (!nl80211_valid_auth_type(params.auth_type)) - return -EINVAL; - } else - params.auth_type = NL80211_AUTHTYPE_AUTOMATIC; - - err = nl80211_crypto_settings(rdev, info, ¶ms.crypto, - NL80211_MAX_NR_CIPHER_SUITES); - if (err) - return err; - - if (info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]) { - if (!(rdev->wiphy.features & NL80211_FEATURE_INACTIVITY_TIMER)) - return -EOPNOTSUPP; - params.inactivity_timeout = nla_get_u16( - info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]); - } - - err = rdev->ops->start_ap(&rdev->wiphy, dev, ¶ms); - if (!err) - wdev->beacon_interval = params.beacon_interval; + err = call(&rdev->wiphy, dev, ¶ms); + if (!err && params.interval) + wdev->beacon_interval = params.interval; return err; } -static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) +static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_beacon_data params; int err; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) - return -EOPNOTSUPP; - - if (!rdev->ops->change_beacon) - return -EOPNOTSUPP; - - if (!wdev->beacon_interval) - return -EINVAL; - - err = nl80211_parse_beacon(info, ¶ms); - if (err) - return err; - - return rdev->ops->change_beacon(&rdev->wiphy, dev, ¶ms); -} - -static int nl80211_stop_ap(struct sk_buff *skb, struct genl_info *info) -{ - struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - if (!rdev->ops->stop_ap) + if (!rdev->ops->del_beacon) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; - if (!wdev->beacon_interval) - return -ENOENT; - - err = rdev->ops->stop_ap(&rdev->wiphy, dev); + err = rdev->ops->del_beacon(&rdev->wiphy, dev); if (!err) wdev->beacon_interval = 0; return err; @@ -6420,23 +6384,23 @@ static struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_SET_BEACON, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .doit = nl80211_set_beacon, + .doit = nl80211_addset_beacon, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { - .cmd = NL80211_CMD_START_AP, + .cmd = NL80211_CMD_NEW_BEACON, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .doit = nl80211_start_ap, + .doit = nl80211_addset_beacon, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { - .cmd = NL80211_CMD_STOP_AP, + .cmd = NL80211_CMD_DEL_BEACON, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .doit = nl80211_stop_ap, + .doit = nl80211_del_beacon, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 70faadf16a32..e40104f562ba 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -18,7 +18,7 @@ #include "nl80211.h" #include "wext-compat.h" -#define IEEE80211_SCAN_RESULT_EXPIRE (15 * HZ) +#define IEEE80211_SCAN_RESULT_EXPIRE (3 * HZ) void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) { diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f7e937ff8978..bbbed736e5ef 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -689,8 +689,10 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; +#ifndef CONFIG_CFG80211_ALLOW_RECONNECT if (wdev->sme_state != CFG80211_SME_CONNECTED) return; +#endif if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); @@ -767,10 +769,14 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); +#ifndef CONFIG_CFG80211_ALLOW_RECONNECT if (wdev->sme_state != CFG80211_SME_IDLE) return -EALREADY; if (WARN_ON(wdev->connect_keys)) { +#else + if (wdev->connect_keys) { +#endif kfree(wdev->connect_keys); wdev->connect_keys = NULL; } |