diff options
author | Anton Vorontsov <cbouatmailru@gmail.com> | 2008-07-30 02:05:23 +0400 |
---|---|---|
committer | Anton Vorontsov <cbouatmailru@gmail.com> | 2008-07-30 02:05:23 +0400 |
commit | 9fec6060d9e48ed7db0dac0e16d0f0f0e615b7f6 (patch) | |
tree | 74b41f31a08f6500ff3dfcf64ba21e2d9a8e87e5 /net | |
parent | fece418418f51e92dd7e67e17c5e3fe5a28d3279 (diff) | |
parent | 6e86841d05f371b5b9b86ce76c02aaee83352298 (diff) |
Merge branch 'master' of /home/cbou/linux-2.6
Conflicts:
drivers/power/Kconfig
drivers/power/Makefile
Diffstat (limited to 'net')
476 files changed, 16863 insertions, 12773 deletions
diff --git a/net/802/Kconfig b/net/802/Kconfig new file mode 100644 index 000000000000..be33d27c8e69 --- /dev/null +++ b/net/802/Kconfig @@ -0,0 +1,7 @@ +config STP + tristate + select LLC + +config GARP + tristate + select STP diff --git a/net/802/Makefile b/net/802/Makefile index 68569ffddea1..7893d679910c 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -10,3 +10,5 @@ obj-$(CONFIG_FDDI) += fddi.o obj-$(CONFIG_HIPPI) += hippi.o obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o obj-$(CONFIG_ATALK) += p8022.o psnap.o +obj-$(CONFIG_STP) += stp.o +obj-$(CONFIG_GARP) += garp.o diff --git a/net/802/garp.c b/net/802/garp.c new file mode 100644 index 000000000000..1dcb0660c49d --- /dev/null +++ b/net/802/garp.c @@ -0,0 +1,636 @@ +/* + * IEEE 802.1D Generic Attribute Registration Protocol (GARP) + * + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/timer.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/llc.h> +#include <net/llc.h> +#include <net/llc_pdu.h> +#include <net/garp.h> +#include <asm/unaligned.h> + +static unsigned int garp_join_time __read_mostly = 200; +module_param(garp_join_time, uint, 0644); +MODULE_PARM_DESC(garp_join_time, "Join time in ms (default 200ms)"); +MODULE_LICENSE("GPL"); + +static const struct garp_state_trans { + u8 state; + u8 action; +} garp_applicant_state_table[GARP_APPLICANT_MAX + 1][GARP_EVENT_MAX + 1] = { + [GARP_APPLICANT_VA] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_AA, + .action = GARP_ACTION_S_JOIN_IN }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_AA }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_LA }, + }, + [GARP_APPLICANT_AA] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_QA, + .action = GARP_ACTION_S_JOIN_IN }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QA }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_LA }, + }, + [GARP_APPLICANT_QA] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QA }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_LA }, + }, + [GARP_APPLICANT_LA] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_VO, + .action = GARP_ACTION_S_LEAVE_EMPTY }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_LA }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_LA }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_LA }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID }, + }, + [GARP_APPLICANT_VP] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_AA, + .action = GARP_ACTION_S_JOIN_IN }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_AP }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_VO }, + }, + [GARP_APPLICANT_AP] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_QA, + .action = GARP_ACTION_S_JOIN_IN }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QP }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_AO }, + }, + [GARP_APPLICANT_QP] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QP }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_QO }, + }, + [GARP_APPLICANT_VO] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_AO }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID }, + }, + [GARP_APPLICANT_AO] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QO }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_AP }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID }, + }, + [GARP_APPLICANT_QO] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QO }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_QP }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID }, + }, +}; + +static int garp_attr_cmp(const struct garp_attr *attr, + const void *data, u8 len, u8 type) +{ + if (attr->type != type) + return attr->type - type; + if (attr->dlen != len) + return attr->dlen - len; + return memcmp(attr->data, data, len); +} + +static struct garp_attr *garp_attr_lookup(const struct garp_applicant *app, + const void *data, u8 len, u8 type) +{ + struct rb_node *parent = app->gid.rb_node; + struct garp_attr *attr; + int d; + + while (parent) { + attr = rb_entry(parent, struct garp_attr, node); + d = garp_attr_cmp(attr, data, len, type); + if (d < 0) + parent = parent->rb_left; + else if (d > 0) + parent = parent->rb_right; + else + return attr; + } + return NULL; +} + +static void garp_attr_insert(struct garp_applicant *app, struct garp_attr *new) +{ + struct rb_node *parent = NULL, **p = &app->gid.rb_node; + struct garp_attr *attr; + int d; + + while (*p) { + parent = *p; + attr = rb_entry(parent, struct garp_attr, node); + d = garp_attr_cmp(attr, new->data, new->dlen, new->type); + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + } + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &app->gid); +} + +static struct garp_attr *garp_attr_create(struct garp_applicant *app, + const void *data, u8 len, u8 type) +{ + struct garp_attr *attr; + + attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC); + if (!attr) + return attr; + attr->state = GARP_APPLICANT_VO; + attr->type = type; + attr->dlen = len; + memcpy(attr->data, data, len); + garp_attr_insert(app, attr); + return attr; +} + +static void garp_attr_destroy(struct garp_applicant *app, struct garp_attr *attr) +{ + rb_erase(&attr->node, &app->gid); + kfree(attr); +} + +static int garp_pdu_init(struct garp_applicant *app) +{ + struct sk_buff *skb; + struct garp_pdu_hdr *gp; + +#define LLC_RESERVE sizeof(struct llc_pdu_un) + skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev), + GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + skb->dev = app->dev; + skb->protocol = htons(ETH_P_802_2); + skb_reserve(skb, LL_RESERVED_SPACE(app->dev) + LLC_RESERVE); + + gp = (struct garp_pdu_hdr *)__skb_put(skb, sizeof(*gp)); + put_unaligned(htons(GARP_PROTOCOL_ID), &gp->protocol); + + app->pdu = skb; + return 0; +} + +static int garp_pdu_append_end_mark(struct garp_applicant *app) +{ + if (skb_tailroom(app->pdu) < sizeof(u8)) + return -1; + *(u8 *)__skb_put(app->pdu, sizeof(u8)) = GARP_END_MARK; + return 0; +} + +static void garp_pdu_queue(struct garp_applicant *app) +{ + if (!app->pdu) + return; + + garp_pdu_append_end_mark(app); + garp_pdu_append_end_mark(app); + + llc_pdu_header_init(app->pdu, LLC_PDU_TYPE_U, LLC_SAP_BSPAN, + LLC_SAP_BSPAN, LLC_PDU_CMD); + llc_pdu_init_as_ui_cmd(app->pdu); + llc_mac_hdr_init(app->pdu, app->dev->dev_addr, + app->app->proto.group_address); + + skb_queue_tail(&app->queue, app->pdu); + app->pdu = NULL; +} + +static void garp_queue_xmit(struct garp_applicant *app) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&app->queue))) + dev_queue_xmit(skb); +} + +static int garp_pdu_append_msg(struct garp_applicant *app, u8 attrtype) +{ + struct garp_msg_hdr *gm; + + if (skb_tailroom(app->pdu) < sizeof(*gm)) + return -1; + gm = (struct garp_msg_hdr *)__skb_put(app->pdu, sizeof(*gm)); + gm->attrtype = attrtype; + garp_cb(app->pdu)->cur_type = attrtype; + return 0; +} + +static int garp_pdu_append_attr(struct garp_applicant *app, + const struct garp_attr *attr, + enum garp_attr_event event) +{ + struct garp_attr_hdr *ga; + unsigned int len; + int err; +again: + if (!app->pdu) { + err = garp_pdu_init(app); + if (err < 0) + return err; + } + + if (garp_cb(app->pdu)->cur_type != attr->type) { + if (garp_cb(app->pdu)->cur_type && + garp_pdu_append_end_mark(app) < 0) + goto queue; + if (garp_pdu_append_msg(app, attr->type) < 0) + goto queue; + } + + len = sizeof(*ga) + attr->dlen; + if (skb_tailroom(app->pdu) < len) + goto queue; + ga = (struct garp_attr_hdr *)__skb_put(app->pdu, len); + ga->len = len; + ga->event = event; + memcpy(ga->data, attr->data, attr->dlen); + return 0; + +queue: + garp_pdu_queue(app); + goto again; +} + +static void garp_attr_event(struct garp_applicant *app, + struct garp_attr *attr, enum garp_event event) +{ + enum garp_applicant_state state; + + state = garp_applicant_state_table[attr->state][event].state; + if (state == GARP_APPLICANT_INVALID) + return; + + switch (garp_applicant_state_table[attr->state][event].action) { + case GARP_ACTION_NONE: + break; + case GARP_ACTION_S_JOIN_IN: + /* When appending the attribute fails, don't update state in + * order to retry on next TRANSMIT_PDU event. */ + if (garp_pdu_append_attr(app, attr, GARP_JOIN_IN) < 0) + return; + break; + case GARP_ACTION_S_LEAVE_EMPTY: + garp_pdu_append_attr(app, attr, GARP_LEAVE_EMPTY); + /* As a pure applicant, sending a leave message implies that + * the attribute was unregistered and can be destroyed. */ + garp_attr_destroy(app, attr); + return; + default: + WARN_ON(1); + } + + attr->state = state; +} + +int garp_request_join(const struct net_device *dev, + const struct garp_application *appl, + const void *data, u8 len, u8 type) +{ + struct garp_port *port = dev->garp_port; + struct garp_applicant *app = port->applicants[appl->type]; + struct garp_attr *attr; + + spin_lock_bh(&app->lock); + attr = garp_attr_create(app, data, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return -ENOMEM; + } + garp_attr_event(app, attr, GARP_EVENT_REQ_JOIN); + spin_unlock_bh(&app->lock); + return 0; +} +EXPORT_SYMBOL_GPL(garp_request_join); + +void garp_request_leave(const struct net_device *dev, + const struct garp_application *appl, + const void *data, u8 len, u8 type) +{ + struct garp_port *port = dev->garp_port; + struct garp_applicant *app = port->applicants[appl->type]; + struct garp_attr *attr; + + spin_lock_bh(&app->lock); + attr = garp_attr_lookup(app, data, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return; + } + garp_attr_event(app, attr, GARP_EVENT_REQ_LEAVE); + spin_unlock_bh(&app->lock); +} +EXPORT_SYMBOL_GPL(garp_request_leave); + +static void garp_gid_event(struct garp_applicant *app, enum garp_event event) +{ + struct rb_node *node, *next; + struct garp_attr *attr; + + for (node = rb_first(&app->gid); + next = node ? rb_next(node) : NULL, node != NULL; + node = next) { + attr = rb_entry(node, struct garp_attr, node); + garp_attr_event(app, attr, event); + } +} + +static void garp_join_timer_arm(struct garp_applicant *app) +{ + unsigned long delay; + + delay = (u64)msecs_to_jiffies(garp_join_time) * net_random() >> 32; + mod_timer(&app->join_timer, jiffies + delay); +} + +static void garp_join_timer(unsigned long data) +{ + struct garp_applicant *app = (struct garp_applicant *)data; + + spin_lock(&app->lock); + garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); + garp_pdu_queue(app); + spin_unlock(&app->lock); + + garp_queue_xmit(app); + garp_join_timer_arm(app); +} + +static int garp_pdu_parse_end_mark(struct sk_buff *skb) +{ + if (!pskb_may_pull(skb, sizeof(u8))) + return -1; + if (*skb->data == GARP_END_MARK) { + skb_pull(skb, sizeof(u8)); + return -1; + } + return 0; +} + +static int garp_pdu_parse_attr(struct garp_applicant *app, struct sk_buff *skb, + u8 attrtype) +{ + const struct garp_attr_hdr *ga; + struct garp_attr *attr; + enum garp_event event; + unsigned int dlen; + + if (!pskb_may_pull(skb, sizeof(*ga))) + return -1; + ga = (struct garp_attr_hdr *)skb->data; + if (ga->len < sizeof(*ga)) + return -1; + + if (!pskb_may_pull(skb, ga->len)) + return -1; + skb_pull(skb, ga->len); + dlen = sizeof(*ga) - ga->len; + + if (attrtype > app->app->maxattr) + return 0; + + switch (ga->event) { + case GARP_LEAVE_ALL: + if (dlen != 0) + return -1; + garp_gid_event(app, GARP_EVENT_R_LEAVE_EMPTY); + return 0; + case GARP_JOIN_EMPTY: + event = GARP_EVENT_R_JOIN_EMPTY; + break; + case GARP_JOIN_IN: + event = GARP_EVENT_R_JOIN_IN; + break; + case GARP_LEAVE_EMPTY: + event = GARP_EVENT_R_LEAVE_EMPTY; + break; + case GARP_EMPTY: + event = GARP_EVENT_R_EMPTY; + break; + default: + return 0; + } + + if (dlen == 0) + return -1; + attr = garp_attr_lookup(app, ga->data, dlen, attrtype); + if (attr == NULL) + return 0; + garp_attr_event(app, attr, event); + return 0; +} + +static int garp_pdu_parse_msg(struct garp_applicant *app, struct sk_buff *skb) +{ + const struct garp_msg_hdr *gm; + + if (!pskb_may_pull(skb, sizeof(*gm))) + return -1; + gm = (struct garp_msg_hdr *)skb->data; + if (gm->attrtype == 0) + return -1; + skb_pull(skb, sizeof(*gm)); + + while (skb->len > 0) { + if (garp_pdu_parse_attr(app, skb, gm->attrtype) < 0) + return -1; + if (garp_pdu_parse_end_mark(skb) < 0) + break; + } + return 0; +} + +static void garp_pdu_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev) +{ + struct garp_application *appl = proto->data; + struct garp_port *port; + struct garp_applicant *app; + const struct garp_pdu_hdr *gp; + + port = rcu_dereference(dev->garp_port); + if (!port) + goto err; + app = rcu_dereference(port->applicants[appl->type]); + if (!app) + goto err; + + if (!pskb_may_pull(skb, sizeof(*gp))) + goto err; + gp = (struct garp_pdu_hdr *)skb->data; + if (get_unaligned(&gp->protocol) != htons(GARP_PROTOCOL_ID)) + goto err; + skb_pull(skb, sizeof(*gp)); + + spin_lock(&app->lock); + while (skb->len > 0) { + if (garp_pdu_parse_msg(app, skb) < 0) + break; + if (garp_pdu_parse_end_mark(skb) < 0) + break; + } + spin_unlock(&app->lock); +err: + kfree_skb(skb); +} + +static int garp_init_port(struct net_device *dev) +{ + struct garp_port *port; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + rcu_assign_pointer(dev->garp_port, port); + return 0; +} + +static void garp_release_port(struct net_device *dev) +{ + struct garp_port *port = dev->garp_port; + unsigned int i; + + for (i = 0; i <= GARP_APPLICATION_MAX; i++) { + if (port->applicants[i]) + return; + } + rcu_assign_pointer(dev->garp_port, NULL); + synchronize_rcu(); + kfree(port); +} + +int garp_init_applicant(struct net_device *dev, struct garp_application *appl) +{ + struct garp_applicant *app; + int err; + + ASSERT_RTNL(); + + if (!dev->garp_port) { + err = garp_init_port(dev); + if (err < 0) + goto err1; + } + + err = -ENOMEM; + app = kzalloc(sizeof(*app), GFP_KERNEL); + if (!app) + goto err2; + + err = dev_mc_add(dev, appl->proto.group_address, ETH_ALEN, 0); + if (err < 0) + goto err3; + + app->dev = dev; + app->app = appl; + app->gid = RB_ROOT; + spin_lock_init(&app->lock); + skb_queue_head_init(&app->queue); + rcu_assign_pointer(dev->garp_port->applicants[appl->type], app); + setup_timer(&app->join_timer, garp_join_timer, (unsigned long)app); + garp_join_timer_arm(app); + return 0; + +err3: + kfree(app); +err2: + garp_release_port(dev); +err1: + return err; +} +EXPORT_SYMBOL_GPL(garp_init_applicant); + +void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl) +{ + struct garp_port *port = dev->garp_port; + struct garp_applicant *app = port->applicants[appl->type]; + + ASSERT_RTNL(); + + rcu_assign_pointer(port->applicants[appl->type], NULL); + synchronize_rcu(); + + /* Delete timer and generate a final TRANSMIT_PDU event to flush out + * all pending messages before the applicant is gone. */ + del_timer_sync(&app->join_timer); + garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); + garp_pdu_queue(app); + garp_queue_xmit(app); + + dev_mc_delete(dev, appl->proto.group_address, ETH_ALEN, 0); + kfree(app); + garp_release_port(dev); +} +EXPORT_SYMBOL_GPL(garp_uninit_applicant); + +int garp_register_application(struct garp_application *appl) +{ + appl->proto.rcv = garp_pdu_rcv; + appl->proto.data = appl; + return stp_proto_register(&appl->proto); +} +EXPORT_SYMBOL_GPL(garp_register_application); + +void garp_unregister_application(struct garp_application *appl) +{ + stp_proto_unregister(&appl->proto); +} +EXPORT_SYMBOL_GPL(garp_unregister_application); diff --git a/net/802/psnap.c b/net/802/psnap.c index 31128cb92a23..b3cfe5a14fca 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -20,6 +20,7 @@ #include <linux/mm.h> #include <linux/in.h> #include <linux/init.h> +#include <linux/rculist.h> static LIST_HEAD(snap_list); static DEFINE_SPINLOCK(snap_lock); @@ -30,11 +31,9 @@ static struct llc_sap *snap_sap; */ static struct datalink_proto *find_snap_client(unsigned char *desc) { - struct list_head *entry; struct datalink_proto *proto = NULL, *p; - list_for_each_rcu(entry, &snap_list) { - p = list_entry(entry, struct datalink_proto, node); + list_for_each_entry_rcu(p, &snap_list, node) { if (!memcmp(p->type, desc, 5)) { proto = p; break; diff --git a/net/802/stp.c b/net/802/stp.c new file mode 100644 index 000000000000..0b7a24452d11 --- /dev/null +++ b/net/802/stp.c @@ -0,0 +1,102 @@ +/* + * STP SAP demux + * + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/mutex.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/llc.h> +#include <net/llc.h> +#include <net/llc_pdu.h> +#include <net/stp.h> + +/* 01:80:c2:00:00:20 - 01:80:c2:00:00:2F */ +#define GARP_ADDR_MIN 0x20 +#define GARP_ADDR_MAX 0x2F +#define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN) + +static const struct stp_proto *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly; +static const struct stp_proto *stp_proto __read_mostly; + +static struct llc_sap *sap __read_mostly; +static unsigned int sap_registered; +static DEFINE_MUTEX(stp_proto_mutex); + +/* Called under rcu_read_lock from LLC */ +static int stp_pdu_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + const struct ethhdr *eh = eth_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct stp_proto *proto; + + if (pdu->ssap != LLC_SAP_BSPAN || + pdu->dsap != LLC_SAP_BSPAN || + pdu->ctrl_1 != LLC_PDU_TYPE_U) + goto err; + + if (eh->h_dest[5] >= GARP_ADDR_MIN && eh->h_dest[5] <= GARP_ADDR_MAX) { + proto = rcu_dereference(garp_protos[eh->h_dest[5] - + GARP_ADDR_MIN]); + if (proto && + compare_ether_addr(eh->h_dest, proto->group_address)) + goto err; + } else + proto = rcu_dereference(stp_proto); + + if (!proto) + goto err; + + proto->rcv(proto, skb, dev); + return 0; + +err: + kfree_skb(skb); + return 0; +} + +int stp_proto_register(const struct stp_proto *proto) +{ + int err = 0; + + mutex_lock(&stp_proto_mutex); + if (sap_registered++ == 0) { + sap = llc_sap_open(LLC_SAP_BSPAN, stp_pdu_rcv); + if (!sap) { + err = -ENOMEM; + goto out; + } + } + if (is_zero_ether_addr(proto->group_address)) + rcu_assign_pointer(stp_proto, proto); + else + rcu_assign_pointer(garp_protos[proto->group_address[5] - + GARP_ADDR_MIN], proto); +out: + mutex_unlock(&stp_proto_mutex); + return err; +} +EXPORT_SYMBOL_GPL(stp_proto_register); + +void stp_proto_unregister(const struct stp_proto *proto) +{ + mutex_lock(&stp_proto_mutex); + if (is_zero_ether_addr(proto->group_address)) + rcu_assign_pointer(stp_proto, NULL); + else + rcu_assign_pointer(garp_protos[proto->group_address[5] - + GARP_ADDR_MIN], NULL); + synchronize_rcu(); + + if (--sap_registered == 0) + llc_sap_put(sap); + mutex_unlock(&stp_proto_mutex); +} +EXPORT_SYMBOL_GPL(stp_proto_unregister); + +MODULE_LICENSE("GPL"); diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index c4a382e450e2..fa073a54963e 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -17,3 +17,13 @@ config VLAN_8021Q will be called 8021q. If unsure, say N. + +config VLAN_8021Q_GVRP + bool "GVRP (GARP VLAN Registration Protocol) support" + depends on VLAN_8021Q + select GARP + help + Select this to enable GVRP end-system support. GVRP is used for + automatic propagation of registered VLANs to switches. + + If unsure, say N. diff --git a/net/8021q/Makefile b/net/8021q/Makefile index 10ca7f486c3a..9f4f174ead1c 100644 --- a/net/8021q/Makefile +++ b/net/8021q/Makefile @@ -1,12 +1,10 @@ # # Makefile for the Linux VLAN layer. # +obj-$(subst m,y,$(CONFIG_VLAN_8021Q)) += vlan_core.o +obj-$(CONFIG_VLAN_8021Q) += 8021q.o -obj-$(CONFIG_VLAN_8021Q) += 8021q.o - -8021q-objs := vlan.o vlan_dev.o vlan_netlink.o - -ifeq ($(CONFIG_PROC_FS),y) -8021q-objs += vlanproc.o -endif +8021q-y := vlan.o vlan_dev.o vlan_netlink.o +8021q-$(CONFIG_VLAN_8021Q_GVRP) += vlan_gvrp.o +8021q-$(CONFIG_PROC_FS) += vlanproc.o diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 2a739adaa92b..b661f47bf10a 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -18,21 +18,20 @@ * 2 of the License, or (at your option) any later version. */ -#include <asm/uaccess.h> /* for copy_from_user */ #include <linux/capability.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> -#include <net/datalink.h> -#include <linux/mm.h> -#include <linux/in.h> #include <linux/init.h> +#include <linux/rculist.h> #include <net/p8022.h> #include <net/arp.h> #include <linux/rtnetlink.h> #include <linux/notifier.h> +#include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <asm/uaccess.h> #include <linux/if_vlan.h> #include "vlan.h" @@ -83,13 +82,12 @@ static struct vlan_group *__vlan_find_group(struct net_device *real_dev) * * Must be invoked with RCU read lock (no preempt) */ -struct net_device *__find_vlan_dev(struct net_device *real_dev, - unsigned short VID) +struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id) { struct vlan_group *grp = __vlan_find_group(real_dev); if (grp) - return vlan_group_get_device(grp, VID); + return vlan_group_get_device(grp, vlan_id); return NULL; } @@ -117,14 +115,14 @@ static struct vlan_group *vlan_group_alloc(struct net_device *real_dev) return grp; } -static int vlan_group_prealloc_vid(struct vlan_group *vg, int vid) +static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id) { struct net_device **array; unsigned int size; ASSERT_RTNL(); - array = vg->vlan_devices_arrays[vid / VLAN_GROUP_ARRAY_PART_LEN]; + array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; if (array != NULL) return 0; @@ -133,7 +131,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, int vid) if (array == NULL) return -ENOBUFS; - vg->vlan_devices_arrays[vid / VLAN_GROUP_ARRAY_PART_LEN] = array; + vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array; return 0; } @@ -147,7 +145,7 @@ void unregister_vlan_dev(struct net_device *dev) struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; struct vlan_group *grp; - unsigned short vlan_id = vlan->vlan_id; + u16 vlan_id = vlan->vlan_id; ASSERT_RTNL(); @@ -165,8 +163,12 @@ void unregister_vlan_dev(struct net_device *dev) synchronize_net(); + unregister_netdevice(dev); + /* If the group is now empty, kill off the group. */ if (grp->nr_vlans == 0) { + vlan_gvrp_uninit_applicant(real_dev); + if (real_dev->features & NETIF_F_HW_VLAN_RX) real_dev->vlan_rx_register(real_dev, NULL); @@ -178,8 +180,6 @@ void unregister_vlan_dev(struct net_device *dev) /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); - - unregister_netdevice(dev); } static void vlan_transfer_operstate(const struct net_device *dev, @@ -203,7 +203,7 @@ static void vlan_transfer_operstate(const struct net_device *dev, } } -int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id) +int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) { char *name = real_dev->name; @@ -240,7 +240,7 @@ int register_vlan_dev(struct net_device *dev) { struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; - unsigned short vlan_id = vlan->vlan_id; + u16 vlan_id = vlan->vlan_id; struct vlan_group *grp, *ngrp = NULL; int err; @@ -249,15 +249,18 @@ int register_vlan_dev(struct net_device *dev) ngrp = grp = vlan_group_alloc(real_dev); if (!grp) return -ENOBUFS; + err = vlan_gvrp_init_applicant(real_dev); + if (err < 0) + goto out_free_group; } err = vlan_group_prealloc_vid(grp, vlan_id); if (err < 0) - goto out_free_group; + goto out_uninit_applicant; err = register_netdevice(dev); if (err < 0) - goto out_free_group; + goto out_uninit_applicant; /* Account for reference in struct vlan_dev_info */ dev_hold(real_dev); @@ -278,6 +281,9 @@ int register_vlan_dev(struct net_device *dev) return 0; +out_uninit_applicant: + if (ngrp) + vlan_gvrp_uninit_applicant(real_dev); out_free_group: if (ngrp) vlan_group_free(ngrp); @@ -287,8 +293,7 @@ out_free_group: /* Attach a VLAN device to a mac address (ie Ethernet Card). * Returns 0 if the device was created or a negative error code otherwise. */ -static int register_vlan_device(struct net_device *real_dev, - unsigned short VLAN_ID) +static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) { struct net_device *new_dev; struct net *net = dev_net(real_dev); @@ -296,10 +301,10 @@ static int register_vlan_device(struct net_device *real_dev, char name[IFNAMSIZ]; int err; - if (VLAN_ID >= VLAN_VID_MASK) + if (vlan_id >= VLAN_VID_MASK) return -ERANGE; - err = vlan_check_real_dev(real_dev, VLAN_ID); + err = vlan_check_real_dev(real_dev, vlan_id); if (err < 0) return err; @@ -307,26 +312,26 @@ static int register_vlan_device(struct net_device *real_dev, switch (vn->name_type) { case VLAN_NAME_TYPE_RAW_PLUS_VID: /* name will look like: eth1.0005 */ - snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, VLAN_ID); + snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, vlan_id); break; case VLAN_NAME_TYPE_PLUS_VID_NO_PAD: /* Put our vlan.VID in the name. * Name will look like: vlan5 */ - snprintf(name, IFNAMSIZ, "vlan%i", VLAN_ID); + snprintf(name, IFNAMSIZ, "vlan%i", vlan_id); break; case VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD: /* Put our vlan.VID in the name. * Name will look like: eth0.5 */ - snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, VLAN_ID); + snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, vlan_id); break; case VLAN_NAME_TYPE_PLUS_VID: /* Put our vlan.VID in the name. * Name will look like: vlan0005 */ default: - snprintf(name, IFNAMSIZ, "vlan%.4i", VLAN_ID); + snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id); } new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, @@ -341,7 +346,7 @@ static int register_vlan_device(struct net_device *real_dev, */ new_dev->mtu = real_dev->mtu; - vlan_dev_info(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */ + vlan_dev_info(new_dev)->vlan_id = vlan_id; vlan_dev_info(new_dev)->real_dev = real_dev; vlan_dev_info(new_dev)->dent = NULL; vlan_dev_info(new_dev)->flags = VLAN_FLAG_REORDER_HDR; @@ -382,6 +387,18 @@ static void vlan_sync_address(struct net_device *dev, memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN); } +static void vlan_transfer_features(struct net_device *dev, + struct net_device *vlandev) +{ + unsigned long old_features = vlandev->features; + + vlandev->features &= ~dev->vlan_features; + vlandev->features |= dev->features & dev->vlan_features; + + if (old_features != vlandev->features) + netdev_features_change(vlandev); +} + static void __vlan_device_event(struct net_device *dev, unsigned long event) { switch (event) { @@ -410,10 +427,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, int i, flgs; struct net_device *vlandev; - if (is_vlan_dev(dev)) { + if (is_vlan_dev(dev)) __vlan_device_event(dev, event); - goto out; - } grp = __vlan_find_group(dev); if (!grp) @@ -450,6 +465,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, } break; + case NETDEV_FEAT_CHANGE: + /* Propagate device features to underlying device */ + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + vlandev = vlan_group_get_device(grp, i); + if (!vlandev) + continue; + + vlan_transfer_features(dev, vlandev); + } + + break; + case NETDEV_DOWN: /* Put all VLANs for this dev in the down state too. */ for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { @@ -513,7 +540,6 @@ static struct notifier_block vlan_notifier_block __read_mostly = { static int vlan_ioctl_handler(struct net *net, void __user *arg) { int err; - unsigned short vid = 0; struct vlan_ioctl_args args; struct net_device *dev = NULL; @@ -540,8 +566,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) goto out; err = -EINVAL; - if (args.cmd != ADD_VLAN_CMD && - !(dev->priv_flags & IFF_802_1Q_VLAN)) + if (args.cmd != ADD_VLAN_CMD && !is_vlan_dev(dev)) goto out; } @@ -569,9 +594,9 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) err = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - err = vlan_dev_set_vlan_flag(dev, - args.u.flag, - args.vlan_qos); + err = vlan_dev_change_flags(dev, + args.vlan_qos ? args.u.flag : 0, + args.u.flag); break; case SET_VLAN_NAME_TYPE_CMD: @@ -615,8 +640,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) case GET_VLAN_VID_CMD: err = 0; - vlan_dev_get_vid(dev, &vid); - args.u.VID = vid; + args.u.VID = vlan_dev_vlan_id(dev); if (copy_to_user(arg, &args, sizeof(struct vlan_ioctl_args))) err = -EFAULT; @@ -691,14 +715,20 @@ static int __init vlan_proto_init(void) if (err < 0) goto err2; - err = vlan_netlink_init(); + err = vlan_gvrp_init(); if (err < 0) goto err3; + err = vlan_netlink_init(); + if (err < 0) + goto err4; + dev_add_pack(&vlan_packet_type); vlan_ioctl_set(vlan_ioctl_handler); return 0; +err4: + vlan_gvrp_uninit(); err3: unregister_netdevice_notifier(&vlan_notifier_block); err2: @@ -723,8 +753,9 @@ static void __exit vlan_cleanup_module(void) BUG_ON(!hlist_empty(&vlan_group_hash[i])); unregister_pernet_gen_device(vlan_net_id, &vlan_net_ops); - synchronize_net(); + + vlan_gvrp_uninit(); } module_init(vlan_proto_init); diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 5229a72c7ea1..a6603a4d917f 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -3,6 +3,55 @@ #include <linux/if_vlan.h> + +/** + * struct vlan_priority_tci_mapping - vlan egress priority mappings + * @priority: skb priority + * @vlan_qos: vlan priority: (skb->priority << 13) & 0xE000 + * @next: pointer to next struct + */ +struct vlan_priority_tci_mapping { + u32 priority; + u16 vlan_qos; + struct vlan_priority_tci_mapping *next; +}; + +/** + * struct vlan_dev_info - VLAN private device data + * @nr_ingress_mappings: number of ingress priority mappings + * @ingress_priority_map: ingress priority mappings + * @nr_egress_mappings: number of egress priority mappings + * @egress_priority_map: hash of egress priority mappings + * @vlan_id: VLAN identifier + * @flags: device flags + * @real_dev: underlying netdevice + * @real_dev_addr: address of underlying netdevice + * @dent: proc dir entry + * @cnt_inc_headroom_on_tx: statistic - number of skb expansions on TX + * @cnt_encap_on_xmit: statistic - number of skb encapsulations on TX + */ +struct vlan_dev_info { + unsigned int nr_ingress_mappings; + u32 ingress_priority_map[8]; + unsigned int nr_egress_mappings; + struct vlan_priority_tci_mapping *egress_priority_map[16]; + + u16 vlan_id; + u16 flags; + + struct net_device *real_dev; + unsigned char real_dev_addr[ETH_ALEN]; + + struct proc_dir_entry *dent; + unsigned long cnt_inc_headroom_on_tx; + unsigned long cnt_encap_on_xmit; +}; + +static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev) +{ + return netdev_priv(dev); +} + #define VLAN_GRP_HASH_SHIFT 5 #define VLAN_GRP_HASH_SIZE (1 << VLAN_GRP_HASH_SHIFT) #define VLAN_GRP_HASH_MASK (VLAN_GRP_HASH_SIZE - 1) @@ -18,26 +67,47 @@ * Must be invoked with rcu_read_lock (ie preempt disabled) * or with RTNL. */ -struct net_device *__find_vlan_dev(struct net_device *real_dev, - unsigned short VID); /* vlan.c */ +struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id); /* found in vlan_dev.c */ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev); void vlan_dev_set_ingress_priority(const struct net_device *dev, - u32 skb_prio, short vlan_prio); + u32 skb_prio, u16 vlan_prio); int vlan_dev_set_egress_priority(const struct net_device *dev, - u32 skb_prio, short vlan_prio); -int vlan_dev_set_vlan_flag(const struct net_device *dev, - u32 flag, short flag_val); + u32 skb_prio, u16 vlan_prio); +int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask); void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); -void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result); -int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id); +int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id); void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); void unregister_vlan_dev(struct net_device *dev); +static inline u32 vlan_get_ingress_priority(struct net_device *dev, + u16 vlan_tci) +{ + struct vlan_dev_info *vip = vlan_dev_info(dev); + + return vip->ingress_priority_map[(vlan_tci >> 13) & 0x7]; +} + +#ifdef CONFIG_VLAN_8021Q_GVRP +extern int vlan_gvrp_request_join(const struct net_device *dev); +extern void vlan_gvrp_request_leave(const struct net_device *dev); +extern int vlan_gvrp_init_applicant(struct net_device *dev); +extern void vlan_gvrp_uninit_applicant(struct net_device *dev); +extern int vlan_gvrp_init(void); +extern void vlan_gvrp_uninit(void); +#else +static inline int vlan_gvrp_request_join(const struct net_device *dev) { return 0; } +static inline void vlan_gvrp_request_leave(const struct net_device *dev) {} +static inline int vlan_gvrp_init_applicant(struct net_device *dev) { return 0; } +static inline void vlan_gvrp_uninit_applicant(struct net_device *dev) {} +static inline int vlan_gvrp_init(void) { return 0; } +static inline void vlan_gvrp_uninit(void) {} +#endif + int vlan_netlink_init(void); void vlan_netlink_fini(void); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c new file mode 100644 index 000000000000..916061f681b6 --- /dev/null +++ b/net/8021q/vlan_core.c @@ -0,0 +1,64 @@ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/if_vlan.h> +#include "vlan.h" + +/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ +int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, + u16 vlan_tci, int polling) +{ + struct net_device_stats *stats; + + if (skb_bond_should_drop(skb)) { + dev_kfree_skb_any(skb); + return NET_RX_DROP; + } + + skb->vlan_tci = vlan_tci; + netif_nit_deliver(skb); + + skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + if (skb->dev == NULL) { + dev_kfree_skb_any(skb); + /* Not NET_RX_DROP, this is not being dropped + * due to congestion. */ + return NET_RX_SUCCESS; + } + skb->dev->last_rx = jiffies; + skb->vlan_tci = 0; + + stats = &skb->dev->stats; + stats->rx_packets++; + stats->rx_bytes += skb->len; + + skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); + switch (skb->pkt_type) { + case PACKET_BROADCAST: + break; + case PACKET_MULTICAST: + stats->multicast++; + break; + case PACKET_OTHERHOST: + /* Our lower layer thinks this is not local, let's make sure. + * This allows the VLAN to have a different MAC than the + * underlying device, and still route correctly. */ + if (!compare_ether_addr(eth_hdr(skb)->h_dest, + skb->dev->dev_addr)) + skb->pkt_type = PACKET_HOST; + break; + }; + return (polling ? netif_receive_skb(skb) : netif_rx(skb)); +} +EXPORT_SYMBOL(__vlan_hwaccel_rx); + +struct net_device *vlan_dev_real_dev(const struct net_device *dev) +{ + return vlan_dev_info(dev)->real_dev; +} +EXPORT_SYMBOL_GPL(vlan_dev_real_dev); + +u16 vlan_dev_vlan_id(const struct net_device *dev) +{ + return vlan_dev_info(dev)->vlan_id; +} +EXPORT_SYMBOL_GPL(vlan_dev_vlan_id); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index c961f0826005..4bf014e51f8c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -21,21 +21,15 @@ */ #include <linux/module.h> -#include <linux/mm.h> -#include <linux/in.h> -#include <linux/init.h> -#include <asm/uaccess.h> /* for copy_from_user */ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include <net/datalink.h> -#include <net/p8022.h> +#include <linux/ethtool.h> #include <net/arp.h> #include "vlan.h" #include "vlanproc.h" #include <linux/if_vlan.h> -#include <net/ip.h> /* * Rebuild the Ethernet MAC header. This is called after an ARP @@ -73,11 +67,8 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb) static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) { if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { - if (skb_shared(skb) || skb_cloned(skb)) { - struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); - kfree_skb(skb); - skb = nskb; - } + if (skb_cow(skb, skb_headroom(skb)) < 0) + skb = NULL; if (skb) { /* Lifted from Gleb's VLAN code... */ memmove(skb->data - ETH_HLEN, @@ -149,9 +140,9 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { struct vlan_hdr *vhdr; - unsigned short vid; struct net_device_stats *stats; - unsigned short vlan_TCI; + u16 vlan_id; + u16 vlan_tci; skb = skb_share_check(skb, GFP_ATOMIC); if (skb == NULL) @@ -161,14 +152,14 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, goto err_free; vhdr = (struct vlan_hdr *)skb->data; - vlan_TCI = ntohs(vhdr->h_vlan_TCI); - vid = (vlan_TCI & VLAN_VID_MASK); + vlan_tci = ntohs(vhdr->h_vlan_TCI); + vlan_id = vlan_tci & VLAN_VID_MASK; rcu_read_lock(); - skb->dev = __find_vlan_dev(dev, vid); + skb->dev = __find_vlan_dev(dev, vlan_id); if (!skb->dev) { pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n", - __func__, (unsigned int)vid, dev->name); + __func__, vlan_id, dev->name); goto err_unlock; } @@ -180,11 +171,10 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, skb_pull_rcsum(skb, VLAN_HLEN); - skb->priority = vlan_get_ingress_priority(skb->dev, - ntohs(vhdr->h_vlan_TCI)); + skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); pr_debug("%s: priority: %u for TCI: %hu\n", - __func__, skb->priority, ntohs(vhdr->h_vlan_TCI)); + __func__, skb->priority, vlan_tci); switch (skb->pkt_type) { case PACKET_BROADCAST: /* Yeah, stats collect these together.. */ @@ -227,7 +217,7 @@ err_free: return NET_RX_DROP; } -static inline unsigned short +static inline u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb) { struct vlan_priority_tci_mapping *mp; @@ -259,103 +249,44 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned int len) { struct vlan_hdr *vhdr; - unsigned short veth_TCI = 0; - int rc = 0; - int build_vlan_header = 0; - struct net_device *vdev = dev; - - pr_debug("%s: skb: %p type: %hx len: %u vlan_id: %hx, daddr: %p\n", - __func__, skb, type, len, vlan_dev_info(dev)->vlan_id, - daddr); - - /* build vlan header only if re_order_header flag is NOT set. This - * fixes some programs that get confused when they see a VLAN device - * sending a frame that is VLAN encoded (the consensus is that the VLAN - * device should look completely like an Ethernet device when the - * REORDER_HEADER flag is set) The drawback to this is some extra - * header shuffling in the hard_start_xmit. Users can turn off this - * REORDER behaviour with the vconfig tool. - */ - if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) - build_vlan_header = 1; + unsigned int vhdrlen = 0; + u16 vlan_tci = 0; + int rc; - if (build_vlan_header) { - vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); + if (WARN_ON(skb_headroom(skb) < dev->hard_header_len)) + return -ENOSPC; - /* build the four bytes that make this a VLAN header. */ - - /* Now, construct the second two bytes. This field looks - * something like: - * usr_priority: 3 bits (high bits) - * CFI 1 bit - * VLAN ID 12 bits (low bits) - * - */ - veth_TCI = vlan_dev_info(dev)->vlan_id; - veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb); + if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) { + vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); - vhdr->h_vlan_TCI = htons(veth_TCI); + vlan_tci = vlan_dev_info(dev)->vlan_id; + vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); + vhdr->h_vlan_TCI = htons(vlan_tci); /* * Set the protocol type. For a packet of type ETH_P_802_3 we * put the length in here instead. It is up to the 802.2 * layer to carry protocol information. */ - if (type != ETH_P_802_3) vhdr->h_vlan_encapsulated_proto = htons(type); else vhdr->h_vlan_encapsulated_proto = htons(len); skb->protocol = htons(ETH_P_8021Q); - skb_reset_network_header(skb); + type = ETH_P_8021Q; + vhdrlen = VLAN_HLEN; } /* Before delegating work to the lower layer, enter our MAC-address */ if (saddr == NULL) saddr = dev->dev_addr; + /* Now make the underlying real hard header */ dev = vlan_dev_info(dev)->real_dev; - - /* MPLS can send us skbuffs w/out enough space. This check will grow - * the skb if it doesn't have enough headroom. Not a beautiful solution, - * so I'll tick a counter so that users can know it's happening... - * If they care... - */ - - /* NOTE: This may still break if the underlying device is not the final - * device (and thus there are more headers to add...) It should work for - * good-ole-ethernet though. - */ - if (skb_headroom(skb) < dev->hard_header_len) { - struct sk_buff *sk_tmp = skb; - skb = skb_realloc_headroom(sk_tmp, dev->hard_header_len); - kfree_skb(sk_tmp); - if (skb == NULL) { - struct net_device_stats *stats = &vdev->stats; - stats->tx_dropped++; - return -ENOMEM; - } - vlan_dev_info(vdev)->cnt_inc_headroom_on_tx++; - pr_debug("%s: %s: had to grow skb\n", __func__, vdev->name); - } - - if (build_vlan_header) { - /* Now make the underlying real hard header */ - rc = dev_hard_header(skb, dev, ETH_P_8021Q, daddr, saddr, - len + VLAN_HLEN); - if (rc > 0) - rc += VLAN_HLEN; - else if (rc < 0) - rc -= VLAN_HLEN; - } else - /* If here, then we'll just make a normal looking ethernet - * frame, but, the hard_start_xmit method will insert the tag - * (it has to be able to do this for bridged and other skbs - * that don't come down the protocol stack in an orderly manner. - */ - rc = dev_hard_header(skb, dev, type, daddr, saddr, len); - + rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen); + if (rc > 0) + rc += vhdrlen; return rc; } @@ -369,78 +300,49 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ - if (veth->h_vlan_proto != htons(ETH_P_8021Q) || - vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) { - int orig_headroom = skb_headroom(skb); - unsigned short veth_TCI; + vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) { + unsigned int orig_headroom = skb_headroom(skb); + u16 vlan_tci; - /* This is not a VLAN frame...but we can fix that! */ vlan_dev_info(dev)->cnt_encap_on_xmit++; - pr_debug("%s: proto to encap: 0x%hx\n", - __func__, ntohs(veth->h_vlan_proto)); - /* Construct the second two bytes. This field looks something - * like: - * usr_priority: 3 bits (high bits) - * CFI 1 bit - * VLAN ID 12 bits (low bits) - */ - veth_TCI = vlan_dev_info(dev)->vlan_id; - veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb); - - skb = __vlan_put_tag(skb, veth_TCI); + vlan_tci = vlan_dev_info(dev)->vlan_id; + vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); + skb = __vlan_put_tag(skb, vlan_tci); if (!skb) { stats->tx_dropped++; - return 0; + return NETDEV_TX_OK; } if (orig_headroom < VLAN_HLEN) vlan_dev_info(dev)->cnt_inc_headroom_on_tx++; } - pr_debug("%s: about to send skb: %p to dev: %s\n", - __func__, skb, skb->dev->name); - pr_debug(" " MAC_FMT " " MAC_FMT " %4hx %4hx %4hx\n", - veth->h_dest[0], veth->h_dest[1], veth->h_dest[2], - veth->h_dest[3], veth->h_dest[4], veth->h_dest[5], - veth->h_source[0], veth->h_source[1], veth->h_source[2], - veth->h_source[3], veth->h_source[4], veth->h_source[5], - veth->h_vlan_proto, veth->h_vlan_TCI, - veth->h_vlan_encapsulated_proto); - - stats->tx_packets++; /* for statics only */ + stats->tx_packets++; stats->tx_bytes += skb->len; skb->dev = vlan_dev_info(dev)->real_dev; dev_queue_xmit(skb); - - return 0; + return NETDEV_TX_OK; } static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_device_stats *stats = &dev->stats; - unsigned short veth_TCI; + u16 vlan_tci; - /* Construct the second two bytes. This field looks something - * like: - * usr_priority: 3 bits (high bits) - * CFI 1 bit - * VLAN ID 12 bits (low bits) - */ - veth_TCI = vlan_dev_info(dev)->vlan_id; - veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb); - skb = __vlan_hwaccel_put_tag(skb, veth_TCI); + vlan_tci = vlan_dev_info(dev)->vlan_id; + vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); + skb = __vlan_hwaccel_put_tag(skb, vlan_tci); stats->tx_packets++; stats->tx_bytes += skb->len; skb->dev = vlan_dev_info(dev)->real_dev; dev_queue_xmit(skb); - - return 0; + return NETDEV_TX_OK; } static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) @@ -457,7 +359,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) } void vlan_dev_set_ingress_priority(const struct net_device *dev, - u32 skb_prio, short vlan_prio) + u32 skb_prio, u16 vlan_prio) { struct vlan_dev_info *vlan = vlan_dev_info(dev); @@ -470,7 +372,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev, } int vlan_dev_set_egress_priority(const struct net_device *dev, - u32 skb_prio, short vlan_prio) + u32 skb_prio, u16 vlan_prio) { struct vlan_dev_info *vlan = vlan_dev_info(dev); struct vlan_priority_tci_mapping *mp = NULL; @@ -507,18 +409,23 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, } /* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */ -int vlan_dev_set_vlan_flag(const struct net_device *dev, - u32 flag, short flag_val) +int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) { - /* verify flag is supported */ - if (flag == VLAN_FLAG_REORDER_HDR) { - if (flag_val) - vlan_dev_info(dev)->flags |= VLAN_FLAG_REORDER_HDR; + struct vlan_dev_info *vlan = vlan_dev_info(dev); + u32 old_flags = vlan->flags; + + if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP)) + return -EINVAL; + + vlan->flags = (old_flags & ~mask) | (flags & mask); + + if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_GVRP) { + if (vlan->flags & VLAN_FLAG_GVRP) + vlan_gvrp_request_join(dev); else - vlan_dev_info(dev)->flags &= ~VLAN_FLAG_REORDER_HDR; - return 0; + vlan_gvrp_request_leave(dev); } - return -EINVAL; + return 0; } void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) @@ -526,11 +433,6 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) strncpy(result, vlan_dev_info(dev)->real_dev->name, 23); } -void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result) -{ - *result = vlan_dev_info(dev)->vlan_id; -} - static int vlan_dev_open(struct net_device *dev) { struct vlan_dev_info *vlan = vlan_dev_info(dev); @@ -543,21 +445,44 @@ static int vlan_dev_open(struct net_device *dev) if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) { err = dev_unicast_add(real_dev, dev->dev_addr, ETH_ALEN); if (err < 0) - return err; + goto out; } + + if (dev->flags & IFF_ALLMULTI) { + err = dev_set_allmulti(real_dev, 1); + if (err < 0) + goto del_unicast; + } + if (dev->flags & IFF_PROMISC) { + err = dev_set_promiscuity(real_dev, 1); + if (err < 0) + goto clear_allmulti; + } + memcpy(vlan->real_dev_addr, real_dev->dev_addr, ETH_ALEN); - if (dev->flags & IFF_ALLMULTI) - dev_set_allmulti(real_dev, 1); - if (dev->flags & IFF_PROMISC) - dev_set_promiscuity(real_dev, 1); + if (vlan->flags & VLAN_FLAG_GVRP) + vlan_gvrp_request_join(dev); return 0; + +clear_allmulti: + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(real_dev, -1); +del_unicast: + if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) + dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN); +out: + return err; } static int vlan_dev_stop(struct net_device *dev) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct net_device *real_dev = vlan->real_dev; + + if (vlan->flags & VLAN_FLAG_GVRP) + vlan_gvrp_request_leave(dev); dev_mc_unsync(real_dev, dev); dev_unicast_unsync(real_dev, dev); @@ -644,6 +569,24 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) * separate class since they always nest. */ static struct lock_class_key vlan_netdev_xmit_lock_key; +static struct lock_class_key vlan_netdev_addr_lock_key; + +static void vlan_dev_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_subclass) +{ + lockdep_set_class_and_subclass(&txq->_xmit_lock, + &vlan_netdev_xmit_lock_key, + *(int *)_subclass); +} + +static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass) +{ + lockdep_set_class_and_subclass(&dev->addr_list_lock, + &vlan_netdev_addr_lock_key, + subclass); + netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass); +} static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, @@ -663,6 +606,8 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); + dev->features |= real_dev->features & real_dev->vlan_features; + /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; @@ -681,11 +626,10 @@ static int vlan_dev_init(struct net_device *dev) dev->hard_start_xmit = vlan_dev_hard_start_xmit; } - if (real_dev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(real_dev)) subclass = 1; - lockdep_set_class_and_subclass(&dev->_xmit_lock, - &vlan_netdev_xmit_lock_key, subclass); + vlan_dev_set_lockdep_class(dev, subclass); return 0; } @@ -703,6 +647,35 @@ static void vlan_dev_uninit(struct net_device *dev) } } +static u32 vlan_ethtool_get_rx_csum(struct net_device *dev) +{ + const struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct net_device *real_dev = vlan->real_dev; + + if (real_dev->ethtool_ops == NULL || + real_dev->ethtool_ops->get_rx_csum == NULL) + return 0; + return real_dev->ethtool_ops->get_rx_csum(real_dev); +} + +static u32 vlan_ethtool_get_flags(struct net_device *dev) +{ + const struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct net_device *real_dev = vlan->real_dev; + + if (!(real_dev->features & NETIF_F_HW_VLAN_RX) || + real_dev->ethtool_ops == NULL || + real_dev->ethtool_ops->get_flags == NULL) + return 0; + return real_dev->ethtool_ops->get_flags(real_dev); +} + +static const struct ethtool_ops vlan_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_rx_csum = vlan_ethtool_get_rx_csum, + .get_flags = vlan_ethtool_get_flags, +}; + void vlan_setup(struct net_device *dev) { ether_setup(dev); @@ -721,6 +694,7 @@ void vlan_setup(struct net_device *dev) dev->change_rx_flags = vlan_dev_change_rx_flags; dev->do_ioctl = vlan_dev_ioctl; dev->destructor = free_netdev; + dev->ethtool_ops = &vlan_ethtool_ops; memset(dev->broadcast, 0, ETH_ALEN); } diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c new file mode 100644 index 000000000000..061ceceeef12 --- /dev/null +++ b/net/8021q/vlan_gvrp.c @@ -0,0 +1,66 @@ +/* + * IEEE 802.1Q GARP VLAN Registration Protocol (GVRP) + * + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/types.h> +#include <linux/if_vlan.h> +#include <net/garp.h> +#include "vlan.h" + +#define GARP_GVRP_ADDRESS { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 } + +enum gvrp_attributes { + GVRP_ATTR_INVALID, + GVRP_ATTR_VID, + __GVRP_ATTR_MAX +}; +#define GVRP_ATTR_MAX (__GVRP_ATTR_MAX - 1) + +static struct garp_application vlan_gvrp_app __read_mostly = { + .proto.group_address = GARP_GVRP_ADDRESS, + .maxattr = GVRP_ATTR_MAX, + .type = GARP_APPLICATION_GVRP, +}; + +int vlan_gvrp_request_join(const struct net_device *dev) +{ + const struct vlan_dev_info *vlan = vlan_dev_info(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + return garp_request_join(vlan->real_dev, &vlan_gvrp_app, + &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); +} + +void vlan_gvrp_request_leave(const struct net_device *dev) +{ + const struct vlan_dev_info *vlan = vlan_dev_info(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + garp_request_leave(vlan->real_dev, &vlan_gvrp_app, + &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); +} + +int vlan_gvrp_init_applicant(struct net_device *dev) +{ + return garp_init_applicant(dev, &vlan_gvrp_app); +} + +void vlan_gvrp_uninit_applicant(struct net_device *dev) +{ + garp_uninit_applicant(dev, &vlan_gvrp_app); +} + +int __init vlan_gvrp_init(void) +{ + return garp_register_application(&vlan_gvrp_app); +} + +void vlan_gvrp_uninit(void) +{ + garp_unregister_application(&vlan_gvrp_app); +} diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index c93e69ec28ed..e9c91dcecc9b 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -59,7 +59,8 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) } if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); - if ((flags->flags & flags->mask) & ~VLAN_FLAG_REORDER_HDR) + if ((flags->flags & flags->mask) & + ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP)) return -EINVAL; } @@ -75,7 +76,6 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); struct ifla_vlan_flags *flags; struct ifla_vlan_qos_mapping *m; struct nlattr *attr; @@ -83,8 +83,7 @@ static int vlan_changelink(struct net_device *dev, if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); - vlan->flags = (vlan->flags & ~flags->mask) | - (flags->flags & flags->mask); + vlan_dev_change_flags(dev, flags->flags, flags->mask); } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 08b54b593d56..0feefa4e1a4b 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -18,16 +18,9 @@ *****************************************************************************/ #include <linux/module.h> -#include <linux/stddef.h> /* offsetof(), etc. */ -#include <linux/errno.h> /* return codes */ +#include <linux/errno.h> #include <linux/kernel.h> -#include <linux/slab.h> /* kmalloc(), kfree() */ -#include <linux/mm.h> -#include <linux/string.h> /* inline mem*, str* functions */ -#include <linux/init.h> /* __initfunc et al. */ -#include <asm/byteorder.h> /* htons(), etc. */ -#include <asm/uaccess.h> /* copy_to_user */ -#include <asm/io.h> +#include <linux/string.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/fs.h> @@ -290,7 +283,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) static const char fmt[] = "%30s %12lu\n"; int i; - if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) + if (!is_vlan_dev(vlandev)) return 0; seq_printf(seq, diff --git a/net/9p/Kconfig b/net/9p/Kconfig index bafc50c9e6ff..ff34c5acc130 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -13,16 +13,6 @@ menuconfig NET_9P If unsure, say N. -config NET_9P_FD - depends on NET_9P - default y if NET_9P - tristate "9P File Descriptor Transports (Experimental)" - help - This builds support for file descriptor transports for 9p - which includes support for TCP/IP, named pipes, or passed - file descriptors. TCP/IP is the default transport for 9p, - so if you are going to use 9p, you'll likely want this. - config NET_9P_VIRTIO depends on NET_9P && EXPERIMENTAL && VIRTIO tristate "9P Virtio Transport (Experimental)" diff --git a/net/9p/Makefile b/net/9p/Makefile index 8a1051101898..519219480db1 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,5 +1,4 @@ obj-$(CONFIG_NET_9P) := 9pnet.o -obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o 9pnet-objs := \ @@ -9,8 +8,6 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o error.o \ fcprint.o \ util.o \ - -9pnet_fd-objs := \ trans_fd.o \ 9pnet_virtio-objs := \ diff --git a/net/9p/client.c b/net/9p/client.c index 84e087e24146..2ffe40cf2f01 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -64,21 +64,30 @@ static match_table_t tokens = { * @options: options string passed from mount * @v9ses: existing v9fs session information * + * Return 0 upon success, -ERRNO upon failure */ -static void parse_opts(char *options, struct p9_client *clnt) +static int parse_opts(char *opts, struct p9_client *clnt) { + char *options; char *p; substring_t args[MAX_OPT_ARGS]; int option; - int ret; + int ret = 0; clnt->trans_mod = v9fs_default_trans(); clnt->dotu = 1; clnt->msize = 8192; - if (!options) - return; + if (!opts) + return 0; + + options = kstrdup(opts, GFP_KERNEL); + if (!options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } while ((p = strsep(&options, ",")) != NULL) { int token; @@ -86,10 +95,11 @@ static void parse_opts(char *options, struct p9_client *clnt) continue; token = match_token(p, tokens, args); if (token < Opt_trans) { - ret = match_int(&args[0], &option); - if (ret < 0) { + int r = match_int(&args[0], &option); + if (r < 0) { P9_DPRINTK(P9_DEBUG_ERROR, "integer field, but no integer?\n"); + ret = r; continue; } } @@ -107,6 +117,8 @@ static void parse_opts(char *options, struct p9_client *clnt) continue; } } + kfree(options); + return ret; } @@ -138,16 +150,20 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (!clnt) return ERR_PTR(-ENOMEM); + clnt->trans = NULL; spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); clnt->fidpool = p9_idpool_create(); - if (!clnt->fidpool) { + if (IS_ERR(clnt->fidpool)) { err = PTR_ERR(clnt->fidpool); clnt->fidpool = NULL; goto error; } - parse_opts(options, clnt); + err = parse_opts(options, clnt); + if (err < 0) + goto error; + if (clnt->trans_mod == NULL) { err = -EPROTONOSUPPORT; P9_DPRINTK(P9_DEBUG_ERROR, diff --git a/net/9p/conv.c b/net/9p/conv.c index 3fe35d532c87..44547201f5bc 100644 --- a/net/9p/conv.c +++ b/net/9p/conv.c @@ -197,7 +197,7 @@ static void buf_get_qid(struct cbuf *bufp, struct p9_qid *qid) /** * p9_size_wstat - calculate the size of a variable length stat struct - * @stat: metadata (stat) structure + * @wstat: metadata (stat) structure * @dotu: non-zero if 9P2000.u * */ @@ -511,6 +511,12 @@ p9_create_common(struct cbuf *bufp, u32 size, u8 id) return fc; } +/** + * p9_set_tag - set the tag field of an &p9_fcall structure + * @fc: fcall structure to set tag within + * @tag: tag id to set + */ + void p9_set_tag(struct p9_fcall *fc, u16 tag) { fc->tag = tag; @@ -518,6 +524,12 @@ void p9_set_tag(struct p9_fcall *fc, u16 tag) } EXPORT_SYMBOL(p9_set_tag); +/** + * p9_create_tversion - allocates and creates a T_VERSION request + * @msize: requested maximum data size + * @version: version string to negotiate + * + */ struct p9_fcall *p9_create_tversion(u32 msize, char *version) { int size; @@ -542,6 +554,16 @@ error: } EXPORT_SYMBOL(p9_create_tversion); +/** + * p9_create_tauth - allocates and creates a T_AUTH request + * @afid: handle to use for authentication protocol + * @uname: user name attempting to authenticate + * @aname: mount specifier for remote server + * @n_uname: numeric id for user attempting to authneticate + * @dotu: 9P2000.u extension flag + * + */ + struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname, u32 n_uname, int dotu) { @@ -580,6 +602,18 @@ error: } EXPORT_SYMBOL(p9_create_tauth); +/** + * p9_create_tattach - allocates and creates a T_ATTACH request + * @fid: handle to use for the new mount point + * @afid: handle to use for authentication protocol + * @uname: user name attempting to attach + * @aname: mount specifier for remote server + * @n_uname: numeric id for user attempting to attach + * @n_uname: numeric id for user attempting to attach + * @dotu: 9P2000.u extension flag + * + */ + struct p9_fcall * p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname, u32 n_uname, int dotu) @@ -616,6 +650,12 @@ error: } EXPORT_SYMBOL(p9_create_tattach); +/** + * p9_create_tflush - allocates and creates a T_FLUSH request + * @oldtag: tag id for the transaction we are attempting to cancel + * + */ + struct p9_fcall *p9_create_tflush(u16 oldtag) { int size; @@ -639,6 +679,15 @@ error: } EXPORT_SYMBOL(p9_create_tflush); +/** + * p9_create_twalk - allocates and creates a T_FLUSH request + * @fid: handle we are traversing from + * @newfid: a new handle for this transaction + * @nwname: number of path elements to traverse + * @wnames: array of path elements + * + */ + struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname, char **wnames) { @@ -677,6 +726,13 @@ error: } EXPORT_SYMBOL(p9_create_twalk); +/** + * p9_create_topen - allocates and creates a T_OPEN request + * @fid: handle we are trying to open + * @mode: what mode we are trying to open the file in + * + */ + struct p9_fcall *p9_create_topen(u32 fid, u8 mode) { int size; @@ -701,6 +757,19 @@ error: } EXPORT_SYMBOL(p9_create_topen); +/** + * p9_create_tcreate - allocates and creates a T_CREATE request + * @fid: handle of directory we are trying to create in + * @name: name of the file we are trying to create + * @perm: permissions for the file we are trying to create + * @mode: what mode we are trying to open the file in + * @extension: 9p2000.u extension string (for special files) + * @dotu: 9p2000.u enabled flag + * + * Note: Plan 9 create semantics include opening the resulting file + * which is why mode is included. + */ + struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, char *extension, int dotu) { @@ -736,6 +805,13 @@ error: } EXPORT_SYMBOL(p9_create_tcreate); +/** + * p9_create_tread - allocates and creates a T_READ request + * @fid: handle of the file we are trying to read + * @offset: offset to start reading from + * @count: how many bytes to read + */ + struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count) { int size; @@ -761,6 +837,17 @@ error: } EXPORT_SYMBOL(p9_create_tread); +/** + * p9_create_twrite - allocates and creates a T_WRITE request from the kernel + * @fid: handle of the file we are trying to write + * @offset: offset to start writing at + * @count: how many bytes to write + * @data: data to write + * + * This function will create a requst with data buffers from the kernel + * such as the page cache. + */ + struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count, const char *data) { @@ -794,6 +881,16 @@ error: } EXPORT_SYMBOL(p9_create_twrite); +/** + * p9_create_twrite_u - allocates and creates a T_WRITE request from userspace + * @fid: handle of the file we are trying to write + * @offset: offset to start writing at + * @count: how many bytes to write + * @data: data to write + * + * This function will create a request with data buffers from userspace + */ + struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count, const char __user *data) { @@ -827,6 +924,14 @@ error: } EXPORT_SYMBOL(p9_create_twrite_u); +/** + * p9_create_tclunk - allocate a request to forget about a file handle + * @fid: handle of the file we closing or forgetting about + * + * clunk is used both to close open files and to discard transient handles + * which may be created during meta-data operations and hierarchy traversal. + */ + struct p9_fcall *p9_create_tclunk(u32 fid) { int size; @@ -850,6 +955,12 @@ error: } EXPORT_SYMBOL(p9_create_tclunk); +/** + * p9_create_tremove - allocate and create a request to remove a file + * @fid: handle of the file or directory we are removing + * + */ + struct p9_fcall *p9_create_tremove(u32 fid) { int size; @@ -873,6 +984,12 @@ error: } EXPORT_SYMBOL(p9_create_tremove); +/** + * p9_create_tstat - allocate and populate a request for attributes + * @fid: handle of the file or directory we are trying to get the attributes of + * + */ + struct p9_fcall *p9_create_tstat(u32 fid) { int size; @@ -896,6 +1013,14 @@ error: } EXPORT_SYMBOL(p9_create_tstat); +/** + * p9_create_tstat - allocate and populate a request to change attributes + * @fid: handle of the file or directory we are trying to change + * @wstat: &p9_stat structure with attributes we wish to set + * @dotu: 9p2000.u enabled flag + * + */ + struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat, int dotu) { @@ -922,3 +1047,4 @@ error: return fc; } EXPORT_SYMBOL(p9_create_twstat); + diff --git a/net/9p/error.c b/net/9p/error.c index 64104b9cb422..fdebe4314062 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -33,6 +33,13 @@ #include <linux/errno.h> #include <net/9p/9p.h> +/** + * struct errormap - map string errors from Plan 9 to Linux numeric ids + * @name: string sent over 9P + * @val: numeric id most closely representing @name + * @namelen: length of string + * @list: hash-table list for string lookup + */ struct errormap { char *name; int val; @@ -177,8 +184,7 @@ static struct errormap errmap[] = { }; /** - * p9_error_init - preload - * @errstr: error string + * p9_error_init - preload mappings into hash list * */ @@ -206,6 +212,7 @@ EXPORT_SYMBOL(p9_error_init); /** * errstr2errno - convert error string to error number * @errstr: error string + * @len: length of error string * */ @@ -230,8 +237,8 @@ int p9_errstr2errno(char *errstr, int len) if (errno == 0) { /* TODO: if error isn't found, add it dynamically */ errstr[len] = 0; - printk(KERN_ERR "%s: errstr :%s: not found\n", __func__, - errstr); + printk(KERN_ERR "%s: server reported unknown error %s\n", + __func__, errstr); errno = 1; } diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c index 40244fbd9b0d..53dd8e28dd8a 100644 --- a/net/9p/fcprint.c +++ b/net/9p/fcprint.c @@ -142,6 +142,14 @@ p9_printdata(char *buf, int buflen, u8 *data, int datalen) return p9_dumpdata(buf, buflen, data, datalen < 16?datalen:16); } +/** + * p9_printfcall - decode and print a protocol structure into a buffer + * @buf: buffer to deposit decoded structure into + * @buflen: available space in buffer + * @fc: protocol rpc structure of type &p9_fcall + * @extended: whether or not session is operating with extended protocol + */ + int p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) { diff --git a/net/9p/mod.c b/net/9p/mod.c index c285aab2af04..bdee1fb7cc62 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -39,9 +39,6 @@ module_param_named(debug, p9_debug_level, uint, 0); MODULE_PARM_DESC(debug, "9P debugging level"); #endif -extern int p9_mux_global_init(void); -extern void p9_mux_global_exit(void); - /* * Dynamic Transport Registration Routines * @@ -52,7 +49,7 @@ static struct p9_trans_module *v9fs_default_transport; /** * v9fs_register_trans - register a new transport with 9p - * @m - structure describing the transport module and entry points + * @m: structure describing the transport module and entry points * */ void v9fs_register_trans(struct p9_trans_module *m) @@ -65,7 +62,7 @@ EXPORT_SYMBOL(v9fs_register_trans); /** * v9fs_match_trans - match transport versus registered transports - * @arg: string identifying transport + * @name: string identifying transport * */ struct p9_trans_module *v9fs_match_trans(const substring_t *name) @@ -110,6 +107,7 @@ static int __init init_p9(void) p9_error_init(); printk(KERN_INFO "Installing 9P2000 support\n"); + p9_trans_fd_init(); return ret; } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index f624dff76852..cdf137af7adc 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -47,12 +47,29 @@ #define SCHED_TIMEOUT 10 #define MAXPOLLWADDR 2 +/** + * struct p9_fd_opts - per-transport options + * @rfd: file descriptor for reading (trans=fd) + * @wfd: file descriptor for writing (trans=fd) + * @port: port to connect to (trans=tcp) + * + */ + struct p9_fd_opts { int rfd; int wfd; u16 port; }; + +/** + * struct p9_trans_fd - transport state + * @rd: reference to file to read from + * @wr: reference of file to write to + * @conn: connection state reference + * + */ + struct p9_trans_fd { struct file *rd; struct file *wr; @@ -90,10 +107,24 @@ enum { }; struct p9_req; - typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a); + +/** + * struct p9_req - fd mux encoding of an rpc transaction + * @lock: protects req_list + * @tag: numeric tag for rpc transaction + * @tcall: request &p9_fcall structure + * @rcall: response &p9_fcall structure + * @err: error state + * @cb: callback for when response is received + * @cba: argument to pass to callback + * @flush: flag to indicate RPC has been flushed + * @req_list: list link for higher level objects to chain requests + * + */ + struct p9_req { - spinlock_t lock; /* protect request structure */ + spinlock_t lock; int tag; struct p9_fcall *tcall; struct p9_fcall *rcall; @@ -104,7 +135,39 @@ struct p9_req { struct list_head req_list; }; -struct p9_mux_poll_task; +struct p9_mux_poll_task { + struct task_struct *task; + struct list_head mux_list; + int muxnum; +}; + +/** + * struct p9_conn - fd mux connection state information + * @lock: protects mux_list (?) + * @mux_list: list link for mux to manage multiple connections (?) + * @poll_task: task polling on this connection + * @msize: maximum size for connection (dup) + * @extended: 9p2000.u flag (dup) + * @trans: reference to transport instance for this connection + * @tagpool: id accounting for transactions + * @err: error state + * @equeue: event wait_q (?) + * @req_list: accounting for requests which have been sent + * @unsent_req_list: accounting for requests that haven't been sent + * @rcall: current response &p9_fcall structure + * @rpos: read position in current frame + * @rbuf: current read buffer + * @wpos: write position for current frame + * @wsize: amount of data to write for current frame + * @wbuf: current write buffer + * @poll_wait: array of wait_q's for various worker threads + * @poll_waddr: ???? + * @pt: poll state + * @rq: current read work + * @wq: current write work + * @wsched: ???? + * + */ struct p9_conn { spinlock_t lock; /* protect lock structure */ @@ -132,11 +195,16 @@ struct p9_conn { unsigned long wsched; }; -struct p9_mux_poll_task { - struct task_struct *task; - struct list_head mux_list; - int muxnum; -}; +/** + * struct p9_mux_rpc - fd mux rpc accounting structure + * @m: connection this request was issued on + * @err: error state + * @tcall: request &p9_fcall + * @rcall: response &p9_fcall + * @wqueue: wait queue that client is blocked on for this rpc + * + * Bug: isn't this information duplicated elsewhere like &p9_req + */ struct p9_mux_rpc { struct p9_conn *m; @@ -207,10 +275,12 @@ static void p9_mux_put_tag(struct p9_conn *m, u16 tag) /** * p9_mux_calc_poll_procs - calculates the number of polling procs - * based on the number of mounted v9fs filesystems. + * @muxnum: number of mounts * + * Calculation is based on the number of mounted v9fs filesystems. * The current implementation returns sqrt of the number of mounts. */ + static int p9_mux_calc_poll_procs(int muxnum) { int n; @@ -331,12 +401,11 @@ static void p9_mux_poll_stop(struct p9_conn *m) /** * p9_conn_create - allocate and initialize the per-session mux data - * Creates the polling task if this is the first session. + * @trans: transport structure * - * @trans - transport structure - * @msize - maximum message size - * @extended - extended flag + * Note: Creates the polling task if this is the first session. */ + static struct p9_conn *p9_conn_create(struct p9_trans *trans) { int i, n; @@ -406,7 +475,10 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) /** * p9_mux_destroy - cancels all pending requests and frees mux resources + * @m: mux to destroy + * */ + static void p9_conn_destroy(struct p9_conn *m) { P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, @@ -429,9 +501,14 @@ static void p9_conn_destroy(struct p9_conn *m) } /** - * p9_pollwait - called by files poll operation to add v9fs-poll task - * to files wait queue + * p9_pollwait - add poll task to the wait queue + * @filp: file pointer being polled + * @wait_address: wait_q to block on + * @p: poll state + * + * called by files poll operation to add v9fs-poll task to files wait queue */ + static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { @@ -462,7 +539,10 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) /** * p9_poll_mux - polls a mux and schedules read or write works if necessary + * @m: connection to poll + * */ + static void p9_poll_mux(struct p9_conn *m) { int n; @@ -499,9 +579,14 @@ static void p9_poll_mux(struct p9_conn *m) } /** - * p9_poll_proc - polls all v9fs transports for new events and queues - * the appropriate work to the work queue + * p9_poll_proc - poll worker thread + * @a: thread state and arguments + * + * polls all v9fs transports for new events and queues the appropriate + * work to the work queue + * */ + static int p9_poll_proc(void *a) { struct p9_conn *m, *mtmp; @@ -527,7 +612,10 @@ static int p9_poll_proc(void *a) /** * p9_write_work - called when a transport can send some data + * @work: container for work to be done + * */ + static void p9_write_work(struct work_struct *work) { int n, err; @@ -638,7 +726,10 @@ static void process_request(struct p9_conn *m, struct p9_req *req) /** * p9_read_work - called when there is some data to be read from a transport + * @work: container of work to be done + * */ + static void p9_read_work(struct work_struct *work) { int n, err; @@ -793,7 +884,9 @@ error: * @tc: request to be sent * @cb: callback function to call when response is received * @cba: parameter to pass to the callback function + * */ + static struct p9_req *p9_send_request(struct p9_conn *m, struct p9_fcall *tc, p9_conn_req_callback cb, void *cba) @@ -961,10 +1054,12 @@ p9_conn_rpc_cb(struct p9_req *req, void *a) /** * p9_fd_rpc- sends 9P request and waits until a response is available. * The function can be interrupted. - * @m: mux data + * @t: transport data * @tc: request to be sent * @rc: pointer where a pointer to the response is stored + * */ + int p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) { @@ -1041,8 +1136,10 @@ p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) * @m: mux data * @tc: request to be sent * @cb: callback function to be called when response arrives - * @cba: value to pass to the callback function + * @a: value to pass to the callback function + * */ + int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, p9_conn_req_callback cb, void *a) { @@ -1065,7 +1162,9 @@ int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, * p9_conn_cancel - cancel all pending requests with error * @m: mux data * @err: error code + * */ + void p9_conn_cancel(struct p9_conn *m, int err) { struct p9_req *req, *rtmp; @@ -1097,35 +1196,46 @@ void p9_conn_cancel(struct p9_conn *m, int err) } /** - * v9fs_parse_options - parse mount options into session structure + * parse_options - parse mount options into session structure * @options: options string passed from mount - * @v9ses: existing v9fs session information + * @opts: transport-specific structure to parse options into * + * Returns 0 upon success, -ERRNO upon failure */ -static void parse_opts(char *options, struct p9_fd_opts *opts) +static int parse_opts(char *params, struct p9_fd_opts *opts) { char *p; substring_t args[MAX_OPT_ARGS]; int option; + char *options; int ret; opts->port = P9_PORT; opts->rfd = ~0; opts->wfd = ~0; - if (!options) - return; + if (!params) + return 0; + + options = kstrdup(params, GFP_KERNEL); + if (!options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } while ((p = strsep(&options, ",")) != NULL) { int token; + int r; if (!*p) continue; token = match_token(p, tokens, args); - ret = match_int(&args[0], &option); - if (ret < 0) { + r = match_int(&args[0], &option); + if (r < 0) { P9_DPRINTK(P9_DEBUG_ERROR, "integer field, but no integer?\n"); + ret = r; continue; } switch (token) { @@ -1142,6 +1252,8 @@ static void parse_opts(char *options, struct p9_fd_opts *opts) continue; } } + kfree(options); + return 0; } static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd) @@ -1173,7 +1285,7 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) int fd, ret; csocket->sk->sk_allocation = GFP_NOIO; - fd = sock_map_fd(csocket); + fd = sock_map_fd(csocket, 0); if (fd < 0) { P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n"); return fd; @@ -1193,11 +1305,12 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) /** * p9_fd_read- read from a fd - * @v9ses: session information + * @trans: transport instance state * @v: buffer to receive data into * @len: size of receive buffer * */ + static int p9_fd_read(struct p9_trans *trans, void *v, int len) { int ret; @@ -1220,11 +1333,12 @@ static int p9_fd_read(struct p9_trans *trans, void *v, int len) /** * p9_fd_write - write to a socket - * @v9ses: session information + * @trans: transport instance state * @v: buffer to send data from * @len: size of send buffer * */ + static int p9_fd_write(struct p9_trans *trans, void *v, int len) { int ret; @@ -1296,6 +1410,7 @@ end: * @trans: private socket structure * */ + static void p9_fd_close(struct p9_trans *trans) { struct p9_trans_fd *ts; @@ -1318,6 +1433,23 @@ static void p9_fd_close(struct p9_trans *trans) kfree(ts); } +/* + * stolen from NFS - maybe should be made a generic function? + */ +static inline int valid_ipaddr4(const char *buf) +{ + int rc, count, in[4]; + + rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); + if (rc != 4) + return -EINVAL; + for (count = 0; count < 4; count++) { + if (in[count] > 255) + return -EINVAL; + } + return 0; +} + static struct p9_trans * p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu) { @@ -1328,7 +1460,12 @@ p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu) struct p9_fd_opts opts; struct p9_trans_fd *p; - parse_opts(args, &opts); + err = parse_opts(args, &opts); + if (err < 0) + return ERR_PTR(err); + + if (valid_ipaddr4(addr) < 0) + return ERR_PTR(-EINVAL); csocket = NULL; trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); @@ -1508,7 +1645,7 @@ static struct p9_trans_module p9_fd_trans = { .create = p9_trans_create_fd, }; -static int __init p9_trans_fd_init(void) +int p9_trans_fd_init(void) { int ret = p9_mux_global_init(); if (ret) { @@ -1522,9 +1659,4 @@ static int __init p9_trans_fd_init(void) return 0; } - -module_init(p9_trans_fd_init); - -MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>"); -MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); -MODULE_LICENSE("GPL"); +EXPORT_SYMBOL(p9_trans_fd_init); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index de7a9f532edc..42adc052b149 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -49,29 +49,75 @@ #define VIRTQUEUE_NUM 128 /* a single mutex to manage channel initialization and attachment */ -static DECLARE_MUTEX(virtio_9p_lock); +static DEFINE_MUTEX(virtio_9p_lock); /* global which tracks highest initialized channel */ static int chan_index; #define P9_INIT_MAXTAG 16 -#define REQ_STATUS_IDLE 0 -#define REQ_STATUS_SENT 1 -#define REQ_STATUS_RCVD 2 -#define REQ_STATUS_FLSH 3 + +/** + * enum p9_req_status_t - virtio request status + * @REQ_STATUS_IDLE: request slot unused + * @REQ_STATUS_SENT: request sent to server + * @REQ_STATUS_RCVD: response received from server + * @REQ_STATUS_FLSH: request has been flushed + * + * The @REQ_STATUS_IDLE state is used to mark a request slot as unused + * but use is actually tracked by the idpool structure which handles tag + * id allocation. + * + */ + +enum p9_req_status_t { + REQ_STATUS_IDLE, + REQ_STATUS_SENT, + REQ_STATUS_RCVD, + REQ_STATUS_FLSH, +}; + +/** + * struct p9_req_t - virtio request slots + * @status: status of this request slot + * @wq: wait_queue for the client to block on for this request + * + * The virtio transport uses an array to track outstanding requests + * instead of a list. While this may incurr overhead during initial + * allocation or expansion, it makes request lookup much easier as the + * tag id is a index into an array. (We use tag+1 so that we can accomodate + * the -1 tag for the T_VERSION request). + * This also has the nice effect of only having to allocate wait_queues + * once, instead of constantly allocating and freeing them. Its possible + * other resources could benefit from this scheme as well. + * + */ struct p9_req_t { int status; wait_queue_head_t *wq; }; -/* We keep all per-channel information in a structure. +/** + * struct virtio_chan - per-instance transport information + * @initialized: whether the channel is initialized + * @inuse: whether the channel is in use + * @lock: protects multiple elements within this structure + * @vdev: virtio dev associated with this channel + * @vq: virtio queue associated with this channel + * @tagpool: accounting for tag ids (and request slots) + * @reqs: array of request slots + * @max_tag: current number of request_slots allocated + * @sg: scatter gather list which is used to pack a request (protected?) + * + * We keep all per-channel information in a structure. * This structure is allocated within the devices dev->mem space. * A pointer to the structure will get put in the transport private. + * */ + static struct virtio_chan { - bool initialized; /* channel is initialized */ - bool inuse; /* channel is in use */ + bool initialized; + bool inuse; spinlock_t lock; @@ -86,7 +132,19 @@ static struct virtio_chan { struct scatterlist sg[VIRTQUEUE_NUM]; } channels[MAX_9P_CHAN]; -/* Lookup requests by tag */ +/** + * p9_lookup_tag - Lookup requests by tag + * @c: virtio channel to lookup tag within + * @tag: numeric id for transaction + * + * this is a simple array lookup, but will grow the + * request_slots as necessary to accomodate transaction + * ids which did not previously have a slot. + * + * Bugs: there is currently no upper limit on request slots set + * here, but that should be constrained by the id accounting. + */ + static struct p9_req_t *p9_lookup_tag(struct virtio_chan *c, u16 tag) { /* This looks up the original request by tag so we know which @@ -130,11 +188,20 @@ static unsigned int rest_of_page(void *data) return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); } +/** + * p9_virtio_close - reclaim resources of a channel + * @trans: transport state + * + * This reclaims a channel by freeing its resources and + * reseting its inuse flag. + * + */ + static void p9_virtio_close(struct p9_trans *trans) { struct virtio_chan *chan = trans->priv; int count; - unsigned int flags; + unsigned long flags; spin_lock_irqsave(&chan->lock, flags); p9_idpool_destroy(chan->tagpool); @@ -144,13 +211,26 @@ static void p9_virtio_close(struct p9_trans *trans) chan->max_tag = 0; spin_unlock_irqrestore(&chan->lock, flags); - down(&virtio_9p_lock); + mutex_lock(&virtio_9p_lock); chan->inuse = false; - up(&virtio_9p_lock); + mutex_unlock(&virtio_9p_lock); kfree(trans); } +/** + * req_done - callback which signals activity from the server + * @vq: virtio queue activity was received on + * + * This notifies us that the server has triggered some activity + * on the virtio channel - most likely a response to request we + * sent. Figure out which requests now have responses and wake up + * those threads. + * + * Bugs: could do with some additional sanity checking, but appears to work. + * + */ + static void req_done(struct virtqueue *vq) { struct virtio_chan *chan = vq->vdev->priv; @@ -169,6 +249,20 @@ static void req_done(struct virtqueue *vq) spin_unlock_irqrestore(&chan->lock, flags); } +/** + * pack_sg_list - pack a scatter gather list from a linear buffer + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @limit: maximum segment to pack data to + * @data: data to pack into scatter/gather list + * @count: amount of data to pack into the scatter/gather list + * + * sg_lists have multiple segments of various sizes. This will pack + * arbitrary data into an existing scatter gather list, segmenting the + * data as necessary within constraints. + * + */ + static int pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, int count) @@ -189,6 +283,14 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, return index-start; } +/** + * p9_virtio_rpc - issue a request and wait for a response + * @t: transport state + * @tc: &p9_fcall request to transmit + * @rc: &p9_fcall to put reponse into + * + */ + static int p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) { @@ -263,16 +365,26 @@ p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) return 0; } +/** + * p9_virtio_probe - probe for existence of 9P virtio channels + * @vdev: virtio device to probe + * + * This probes for existing virtio channels. At present only + * a single channel is in use, so in the future more work may need + * to be done here. + * + */ + static int p9_virtio_probe(struct virtio_device *vdev) { int err; struct virtio_chan *chan; int index; - down(&virtio_9p_lock); + mutex_lock(&virtio_9p_lock); index = chan_index++; chan = &channels[index]; - up(&virtio_9p_lock); + mutex_unlock(&virtio_9p_lock); if (chan_index > MAX_9P_CHAN) { printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n"); @@ -301,17 +413,34 @@ static int p9_virtio_probe(struct virtio_device *vdev) out_free_vq: vdev->config->del_vq(chan->vq); fail: - down(&virtio_9p_lock); + mutex_lock(&virtio_9p_lock); chan_index--; - up(&virtio_9p_lock); + mutex_unlock(&virtio_9p_lock); return err; } -/* This sets up a transport channel for 9p communication. Right now + +/** + * p9_virtio_create - allocate a new virtio channel + * @devname: string identifying the channel to connect to (unused) + * @args: args passed from sys_mount() for per-transport options (unused) + * @msize: requested maximum packet size + * @extended: 9p2000.u enabled flag + * + * This sets up a transport channel for 9p communication. Right now * we only match the first available channel, but eventually we couldlook up * alternate channels by matching devname versus a virtio_config entry. * We use a simple reference count mechanism to ensure that only a single - * mount has a channel open at a time. */ + * mount has a channel open at a time. + * + * Bugs: doesn't allow identification of a specific channel + * to allocate, channels are allocated sequentially. This was + * a pragmatic decision to get things rolling, but ideally some + * way of identifying the channel to attach to would be nice + * if we are going to support multiple channels. + * + */ + static struct p9_trans * p9_virtio_create(const char *devname, char *args, int msize, unsigned char extended) @@ -320,7 +449,7 @@ p9_virtio_create(const char *devname, char *args, int msize, struct virtio_chan *chan = channels; int index = 0; - down(&virtio_9p_lock); + mutex_lock(&virtio_9p_lock); while (index < MAX_9P_CHAN) { if (chan->initialized && !chan->inuse) { chan->inuse = true; @@ -330,7 +459,7 @@ p9_virtio_create(const char *devname, char *args, int msize, chan = &channels[index]; } } - up(&virtio_9p_lock); + mutex_unlock(&virtio_9p_lock); if (index >= MAX_9P_CHAN) { printk(KERN_ERR "9p: no channels available\n"); @@ -360,6 +489,12 @@ p9_virtio_create(const char *devname, char *args, int msize, return trans; } +/** + * p9_virtio_remove - clean up resources associated with a virtio device + * @vdev: virtio device to remove + * + */ + static void p9_virtio_remove(struct virtio_device *vdev) { struct virtio_chan *chan = vdev->priv; diff --git a/net/9p/util.c b/net/9p/util.c index ef7215565d88..958fc58cd1ff 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -32,11 +32,23 @@ #include <linux/idr.h> #include <net/9p/9p.h> +/** + * struct p9_idpool - per-connection accounting for tag idpool + * @lock: protects the pool + * @pool: idr to allocate tag id from + * + */ + struct p9_idpool { spinlock_t lock; struct idr pool; }; +/** + * p9_idpool_create - create a new per-connection id pool + * + */ + struct p9_idpool *p9_idpool_create(void) { struct p9_idpool *p; @@ -52,6 +64,11 @@ struct p9_idpool *p9_idpool_create(void) } EXPORT_SYMBOL(p9_idpool_create); +/** + * p9_idpool_destroy - create a new per-connection id pool + * @p: idpool to destory + */ + void p9_idpool_destroy(struct p9_idpool *p) { idr_destroy(&p->pool); @@ -61,9 +78,9 @@ EXPORT_SYMBOL(p9_idpool_destroy); /** * p9_idpool_get - allocate numeric id from pool - * @p - pool to allocate from + * @p: pool to allocate from * - * XXX - This seems to be an awful generic function, should it be in idr.c with + * Bugs: This seems to be an awful generic function, should it be in idr.c with * the lock included in struct idr? */ @@ -71,7 +88,7 @@ int p9_idpool_get(struct p9_idpool *p) { int i = 0; int error; - unsigned int flags; + unsigned long flags; retry: if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) @@ -94,15 +111,16 @@ EXPORT_SYMBOL(p9_idpool_get); /** * p9_idpool_put - release numeric id from pool - * @p - pool to allocate from + * @id: numeric id which is being released + * @p: pool to release id into * - * XXX - This seems to be an awful generic function, should it be in idr.c with + * Bugs: This seems to be an awful generic function, should it be in idr.c with * the lock included in struct idr? */ void p9_idpool_put(int id, struct p9_idpool *p) { - unsigned int flags; + unsigned long flags; spin_lock_irqsave(&p->lock, flags); idr_remove(&p->pool, id); spin_unlock_irqrestore(&p->lock, flags); @@ -111,11 +129,13 @@ EXPORT_SYMBOL(p9_idpool_put); /** * p9_idpool_check - check if the specified id is available - * @id - id to check - * @p - pool + * @id: id to check + * @p: pool to check */ + int p9_idpool_check(int id, struct p9_idpool *p) { return idr_find(&p->pool, id) != NULL; } EXPORT_SYMBOL(p9_idpool_check); + diff --git a/net/Kconfig b/net/Kconfig index acbf7c60e89b..b98668751749 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -181,6 +181,7 @@ source "net/dccp/Kconfig" source "net/sctp/Kconfig" source "net/tipc/Kconfig" source "net/atm/Kconfig" +source "net/802/Kconfig" source "net/bridge/Kconfig" source "net/8021q/Kconfig" source "net/decnet/Kconfig" diff --git a/net/Makefile b/net/Makefile index b7a13643b549..4f43e7f874f3 100644 --- a/net/Makefile +++ b/net/Makefile @@ -42,7 +42,9 @@ obj-$(CONFIG_AF_RXRPC) += rxrpc/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_DECNET) += decnet/ obj-$(CONFIG_ECONET) += econet/ -obj-$(CONFIG_VLAN_8021Q) += 8021q/ +ifneq ($(CONFIG_VLAN_8021Q),) +obj-y += 8021q/ +endif obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ obj-y += wireless/ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 25aa37ce9430..b25c1e909d14 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -333,7 +333,7 @@ static int aarp_device_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; int ct; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event == NETDEV_DOWN) { @@ -716,7 +716,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, struct atalk_addr sa, *ma, da; struct atalk_iface *ifa; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto out0; /* We only do Ethernet SNAP AARP. */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 44cd42f7786b..0c850427a85b 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -648,7 +648,7 @@ static int ddp_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event == NETDEV_DOWN) @@ -959,7 +959,7 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -986,7 +986,7 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -1405,7 +1405,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, int origlen; __u16 len_hops; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto freeit; /* Don't mangle buffer if shared */ @@ -1493,7 +1493,7 @@ freeit: static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto freeit; /* Expand any short form frames */ diff --git a/net/atm/addr.c b/net/atm/addr.c index 6afa77d63bb5..82e85abc303d 100644 --- a/net/atm/addr.c +++ b/net/atm/addr.c @@ -9,7 +9,7 @@ #include "signaling.h" #include "addr.h" -static int check_addr(struct sockaddr_atmsvc *addr) +static int check_addr(const struct sockaddr_atmsvc *addr) { int i; @@ -23,7 +23,7 @@ static int check_addr(struct sockaddr_atmsvc *addr) return -EINVAL; } -static int identical(struct sockaddr_atmsvc *a, struct sockaddr_atmsvc *b) +static int identical(const struct sockaddr_atmsvc *a, const struct sockaddr_atmsvc *b) { if (*a->sas_addr.prv) if (memcmp(a->sas_addr.prv, b->sas_addr.prv, ATM_ESA_LEN)) @@ -35,7 +35,7 @@ static int identical(struct sockaddr_atmsvc *a, struct sockaddr_atmsvc *b) return !strcmp(a->sas_addr.pub, b->sas_addr.pub); } -static void notify_sigd(struct atm_dev *dev) +static void notify_sigd(const struct atm_dev *dev) { struct sockaddr_atmpvc pvc; @@ -63,7 +63,7 @@ void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t atype) notify_sigd(dev); } -int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, +int atm_add_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr, enum atm_addr_type_t atype) { unsigned long flags; @@ -98,7 +98,7 @@ int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, return 0; } -int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, +int atm_del_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr, enum atm_addr_type_t atype) { unsigned long flags; diff --git a/net/atm/addr.h b/net/atm/addr.h index f39433ad45da..6837e9e7eb13 100644 --- a/net/atm/addr.h +++ b/net/atm/addr.h @@ -10,9 +10,9 @@ #include <linux/atmdev.h> void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t type); -int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, +int atm_add_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr, enum atm_addr_type_t type); -int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, +int atm_del_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr, enum atm_addr_type_t type); int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user *buf, size_t size, enum atm_addr_type_t type); diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 1b228065e745..8d9a6f158880 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -52,12 +52,12 @@ static void skb_debug(const struct sk_buff *skb) #define ETHERTYPE_IPV6 0x86, 0xdd #define PAD_BRIDGED 0x00, 0x00 -static unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 }; -static unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 }; -static unsigned char llc_oui_pid_pad[] = +static const unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 }; +static const unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 }; +static const unsigned char llc_oui_pid_pad[] = { LLC, SNAP_BRIDGED, PID_ETHERNET, PAD_BRIDGED }; -static unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 }; -static unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 }; +static const unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 }; +static const unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 }; enum br2684_encaps { e_vc = BR2684_ENCAPS_VC, @@ -188,10 +188,13 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, return 0; } } - } else { - skb_push(skb, 2); - if (brdev->payload == p_bridged) + } else { /* e_vc */ + if (brdev->payload == p_bridged) { + skb_push(skb, 2); memset(skb->data, 0, 2); + } else { /* p_routed */ + skb_pull(skb, ETH_HLEN); + } } skb_debug(skb); @@ -214,8 +217,8 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, return 1; } -static inline struct br2684_vcc *pick_outgoing_vcc(struct sk_buff *skb, - struct br2684_dev *brdev) +static inline struct br2684_vcc *pick_outgoing_vcc(const struct sk_buff *skb, + const struct br2684_dev *brdev) { return list_empty(&brdev->brvccs) ? NULL : list_entry_brvcc(brdev->brvccs.next); /* 1 vcc/dev right now */ } @@ -346,9 +349,9 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) /* skb==NULL means VCC is being destroyed */ br2684_close_vcc(brvcc); if (list_empty(&brdev->brvccs)) { - read_lock(&devs_lock); + write_lock_irq(&devs_lock); list_del(&brdev->br2684_devs); - read_unlock(&devs_lock); + write_unlock_irq(&devs_lock); unregister_netdev(net_dev); free_netdev(net_dev); } @@ -377,11 +380,8 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) (skb->data + 6, ethertype_ipv4, sizeof(ethertype_ipv4)) == 0) skb->protocol = __constant_htons(ETH_P_IP); - else { - brdev->stats.rx_errors++; - dev_kfree_skb(skb); - return; - } + else + goto error; skb_pull(skb, sizeof(llc_oui_ipv4)); skb_reset_network_header(skb); skb->pkt_type = PACKET_HOST; @@ -394,44 +394,56 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) (memcmp(skb->data, llc_oui_pid_pad, 7) == 0)) { skb_pull(skb, sizeof(llc_oui_pid_pad)); skb->protocol = eth_type_trans(skb, net_dev); - } else { - brdev->stats.rx_errors++; - dev_kfree_skb(skb); - return; - } + } else + goto error; - } else { - /* first 2 chars should be 0 */ - if (*((u16 *) (skb->data)) != 0) { - brdev->stats.rx_errors++; - dev_kfree_skb(skb); - return; + } else { /* e_vc */ + if (brdev->payload == p_routed) { + struct iphdr *iph; + + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->version == 4) + skb->protocol = __constant_htons(ETH_P_IP); + else if (iph->version == 6) + skb->protocol = __constant_htons(ETH_P_IPV6); + else + goto error; + skb->pkt_type = PACKET_HOST; + } else { /* p_bridged */ + /* first 2 chars should be 0 */ + if (*((u16 *) (skb->data)) != 0) + goto error; + skb_pull(skb, BR2684_PAD_LEN); + skb->protocol = eth_type_trans(skb, net_dev); } - skb_pull(skb, BR2684_PAD_LEN + ETH_HLEN); /* pad, dstmac, srcmac, ethtype */ - skb->protocol = eth_type_trans(skb, net_dev); } #ifdef CONFIG_ATM_BR2684_IPFILTER - if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) { - brdev->stats.rx_dropped++; - dev_kfree_skb(skb); - return; - } + if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) + goto dropped; #endif /* CONFIG_ATM_BR2684_IPFILTER */ skb->dev = net_dev; ATM_SKB(skb)->vcc = atmvcc; /* needed ? */ pr_debug("received packet's protocol: %x\n", ntohs(skb->protocol)); skb_debug(skb); - if (unlikely(!(net_dev->flags & IFF_UP))) { - /* sigh, interface is down */ - brdev->stats.rx_dropped++; - dev_kfree_skb(skb); - return; - } + /* sigh, interface is down? */ + if (unlikely(!(net_dev->flags & IFF_UP))) + goto dropped; brdev->stats.rx_packets++; brdev->stats.rx_bytes += skb->len; memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); netif_rx(skb); + return; + +dropped: + brdev->stats.rx_dropped++; + goto free_skb; +error: + brdev->stats.rx_errors++; +free_skb: + dev_kfree_skb(skb); + return; } /* @@ -518,9 +530,9 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) struct sk_buff *next = skb->next; skb->next = skb->prev = NULL; + br2684_push(atmvcc, skb); BRPRIV(skb->dev)->stats.rx_bytes -= skb->len; BRPRIV(skb->dev)->stats.rx_packets--; - br2684_push(atmvcc, skb); skb = next; } diff --git a/net/atm/clip.c b/net/atm/clip.c index 6f8223ebf551..5b5b96344ce6 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -612,7 +612,7 @@ static int clip_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = arg; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event == NETDEV_UNREGISTER) { diff --git a/net/atm/common.c b/net/atm/common.c index c865517ba449..d34edbe754c8 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -262,7 +262,7 @@ static int adjust_tp(struct atm_trafprm *tp,unsigned char aal) } -static int check_ci(struct atm_vcc *vcc, short vpi, int vci) +static int check_ci(const struct atm_vcc *vcc, short vpi, int vci) { struct hlist_head *head = &vcc_hash[vci & (VCC_HTABLE_SIZE - 1)]; @@ -290,7 +290,7 @@ static int check_ci(struct atm_vcc *vcc, short vpi, int vci) } -static int find_ci(struct atm_vcc *vcc, short *vpi, int *vci) +static int find_ci(const struct atm_vcc *vcc, short *vpi, int *vci) { static short p; /* poor man's per-device cache */ static int c; @@ -646,7 +646,7 @@ static int atm_change_qos(struct atm_vcc *vcc,struct atm_qos *qos) } -static int check_tp(struct atm_trafprm *tp) +static int check_tp(const struct atm_trafprm *tp) { /* @@@ Should be merged with adjust_tp */ if (!tp->traffic_class || tp->traffic_class == ATM_ANYCLASS) return 0; @@ -663,7 +663,7 @@ static int check_tp(struct atm_trafprm *tp) } -static int check_qos(struct atm_qos *qos) +static int check_qos(const struct atm_qos *qos) { int error; diff --git a/net/atm/lec.c b/net/atm/lec.c index 653aca3573ac..5799fb52365a 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -65,36 +65,36 @@ static int lec_close(struct net_device *dev); static struct net_device_stats *lec_get_stats(struct net_device *dev); static void lec_init(struct net_device *dev); static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, - unsigned char *mac_addr); + const unsigned char *mac_addr); static int lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove); /* LANE2 functions */ -static void lane2_associate_ind(struct net_device *dev, u8 *mac_address, - u8 *tlvs, u32 sizeoftlvs); -static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, +static void lane2_associate_ind(struct net_device *dev, const u8 *mac_address, + const u8 *tlvs, u32 sizeoftlvs); +static int lane2_resolve(struct net_device *dev, const u8 *dst_mac, int force, u8 **tlvs, u32 *sizeoftlvs); -static int lane2_associate_req(struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs); +static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst, + const u8 *tlvs, u32 sizeoftlvs); -static int lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, +static int lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr, unsigned long permanent); static void lec_arp_check_empties(struct lec_priv *priv, struct atm_vcc *vcc, struct sk_buff *skb); static void lec_arp_destroy(struct lec_priv *priv); static void lec_arp_init(struct lec_priv *priv); static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, - unsigned char *mac_to_find, + const unsigned char *mac_to_find, int is_rdesc, struct lec_arp_table **ret_entry); -static void lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, - unsigned char *atm_addr, unsigned long remoteflag, +static void lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, + const unsigned char *atm_addr, unsigned long remoteflag, unsigned int targetless_le_arp); static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id); static int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc); static void lec_set_flush_tran_id(struct lec_priv *priv, - unsigned char *atm_addr, + const unsigned char *atm_addr, unsigned long tran_id); -static void lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, +static void lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, struct atm_vcc *vcc, void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb)); @@ -634,7 +634,7 @@ static struct atm_dev lecatm_dev = { */ static int send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, - unsigned char *mac_addr, unsigned char *atm_addr, + const unsigned char *mac_addr, const unsigned char *atm_addr, struct sk_buff *data) { struct sock *sk; @@ -705,10 +705,9 @@ static void lec_init(struct net_device *dev) dev->set_multicast_list = lec_set_multicast_list; dev->do_ioctl = NULL; printk("%s: Initialized!\n", dev->name); - return; } -static unsigned char lec_ctrl_magic[] = { +static const unsigned char lec_ctrl_magic[] = { 0xff, 0x00, 0x01, @@ -1276,7 +1275,7 @@ module_exit(lane_module_cleanup); * lec will be used. * If dst_mac == NULL, targetless LE_ARP will be sent */ -static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, +static int lane2_resolve(struct net_device *dev, const u8 *dst_mac, int force, u8 **tlvs, u32 *sizeoftlvs) { unsigned long flags; @@ -1322,8 +1321,8 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, * Returns 1 for success, 0 for failure (out of memory) * */ -static int lane2_associate_req(struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs) +static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst, + const u8 *tlvs, u32 sizeoftlvs) { int retval; struct sk_buff *skb; @@ -1358,8 +1357,8 @@ static int lane2_associate_req(struct net_device *dev, u8 *lan_dst, * LANE2: 3.1.5, LE_ASSOCIATE.indication * */ -static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr, - u8 *tlvs, u32 sizeoftlvs) +static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr, + const u8 *tlvs, u32 sizeoftlvs) { #if 0 int i = 0; @@ -1744,7 +1743,7 @@ static void lec_arp_destroy(struct lec_priv *priv) * Find entry by mac_address */ static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, - unsigned char *mac_addr) + const unsigned char *mac_addr) { struct hlist_node *node; struct hlist_head *head; @@ -1764,7 +1763,7 @@ static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, } static struct lec_arp_table *make_entry(struct lec_priv *priv, - unsigned char *mac_addr) + const unsigned char *mac_addr) { struct lec_arp_table *to_return; @@ -1921,7 +1920,7 @@ restart: * */ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, - unsigned char *mac_to_find, int is_rdesc, + const unsigned char *mac_to_find, int is_rdesc, struct lec_arp_table **ret_entry) { unsigned long flags; @@ -2017,7 +2016,7 @@ out: } static int -lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, +lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr, unsigned long permanent) { unsigned long flags; @@ -2047,8 +2046,8 @@ lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, * Notifies: Response to arp_request (atm_addr != NULL) */ static void -lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, - unsigned char *atm_addr, unsigned long remoteflag, +lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, + const unsigned char *atm_addr, unsigned long remoteflag, unsigned int targetless_le_arp) { unsigned long flags; @@ -2148,7 +2147,7 @@ out: * Notifies: Vcc setup ready */ static void -lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, +lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, struct atm_vcc *vcc, void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb)) { @@ -2336,7 +2335,7 @@ restart: static void lec_set_flush_tran_id(struct lec_priv *priv, - unsigned char *atm_addr, unsigned long tran_id) + const unsigned char *atm_addr, unsigned long tran_id) { unsigned long flags; struct hlist_node *node; diff --git a/net/atm/lec.h b/net/atm/lec.h index b41cda7ea1e1..0d376682c1a3 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -42,12 +42,12 @@ struct lecdatahdr_8025 { * */ struct lane2_ops { - int (*resolve) (struct net_device *dev, u8 *dst_mac, int force, + int (*resolve) (struct net_device *dev, const u8 *dst_mac, int force, u8 **tlvs, u32 *sizeoftlvs); - int (*associate_req) (struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs); - void (*associate_indicator) (struct net_device *dev, u8 *mac_addr, - u8 *tlvs, u32 sizeoftlvs); + int (*associate_req) (struct net_device *dev, const u8 *lan_dst, + const u8 *tlvs, u32 sizeoftlvs); + void (*associate_indicator) (struct net_device *dev, const u8 *mac_addr, + const u8 *tlvs, u32 sizeoftlvs); }; /* diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 9db332e7a6c0..4fccaa1e07be 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -964,7 +964,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo dev = (struct net_device *)dev_ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (dev->name == NULL || strncmp(dev->name, "lec", 3)) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 2712544cf0ca..01c83e2a4c19 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -116,7 +116,7 @@ static int ax25_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* Reject non AX.25 devices */ @@ -893,13 +893,11 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) sk->sk_destruct = ax25_free_sock; sk->sk_type = osk->sk_type; - sk->sk_socket = osk->sk_socket; sk->sk_priority = osk->sk_priority; sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; - sk->sk_sleep = osk->sk_sleep; sock_copy_flags(sk, osk); oax25 = ax25_sk(osk); @@ -1361,13 +1359,11 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) goto out; newsk = skb->sk; - newsk->sk_socket = newsock; - newsk->sk_sleep = &newsock->wait; + sock_graft(newsk, newsock); /* Now attach up the new socket */ kfree_skb(skb); sk->sk_ack_backlog--; - newsock->sk = newsk; newsock->state = SS_CONNECTED; out: diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 33790a8efbc8..4a5ba978a804 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -451,7 +451,7 @@ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, skb->sk = NULL; /* Initially we don't know who it's for */ skb->destructor = NULL; /* Who initializes this, dammit?! */ - if (dev_net(dev) != &init_net) { + if (!net_eq(dev_net(dev), &init_net)) { kfree_skb(skb); return 0; } diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index 96e4b9273250..cdc7e751ef36 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -39,11 +39,9 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: - /* Magic here: If we listen() and a new link dies before it - is accepted() it isn't 'dead' so doesn't get removed. */ - if (!sk || sock_flag(sk, SOCK_DESTROY) || - (sk->sk_state == TCP_LISTEN && - sock_flag(sk, SOCK_DEAD))) { + if (!sk || + sock_flag(sk, SOCK_DESTROY) || + sock_flag(sk, SOCK_DEAD)) { if (sk) { sock_hold(sk); ax25_destroy_socket(ax25); diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index d8f215733175..034aa10a5198 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -64,20 +64,15 @@ void ax25_frames_acked(ax25_cb *ax25, unsigned short nr) void ax25_requeue_frames(ax25_cb *ax25) { - struct sk_buff *skb, *skb_prev = NULL; + struct sk_buff *skb; /* * Requeue all the un-ack-ed frames on the output queue to be picked * up by ax25_kick called from the timer. This arrangement handles the * possibility of an empty output queue. */ - while ((skb = skb_dequeue(&ax25->ack_queue)) != NULL) { - if (skb_prev == NULL) - skb_queue_head(&ax25->write_queue, skb); - else - skb_append(skb_prev, skb, &ax25->write_queue); - skb_prev = skb; - } + while ((skb = skb_dequeue_tail(&ax25->ack_queue)) != NULL) + skb_queue_head(&ax25->write_queue, skb); } /* diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index d366423c8392..4e59df5f8e05 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -36,6 +36,7 @@ #include <linux/init.h> #include <linux/poll.h> #include <net/sock.h> +#include <asm/ioctls.h> #if defined(CONFIG_KMOD) #include <linux/kmod.h> @@ -48,7 +49,7 @@ #define BT_DBG(D...) #endif -#define VERSION "2.11" +#define VERSION "2.12" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 @@ -266,6 +267,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + if (err == 0) + sock_recv_timestamp(msg, sk, skb); skb_free_datagram(sk, skb); @@ -329,6 +332,54 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w } EXPORT_SYMBOL(bt_sock_poll); +int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = sock->sk; + struct sk_buff *skb; + long amount; + int err; + + BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg); + + switch (cmd) { + case TIOCOUTQ: + if (sk->sk_state == BT_LISTEN) + return -EINVAL; + + amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + if (amount < 0) + amount = 0; + err = put_user(amount, (int __user *) arg); + break; + + case TIOCINQ: + if (sk->sk_state == BT_LISTEN) + return -EINVAL; + + lock_sock(sk); + skb = skb_peek(&sk->sk_receive_queue); + amount = skb ? skb->len : 0; + release_sock(sk); + err = put_user(amount, (int __user *) arg); + break; + + case SIOCGSTAMP: + err = sock_get_timestamp(sk, (struct timeval __user *) arg); + break; + + case SIOCGSTAMPNS: + err = sock_get_timestampns(sk, (struct timespec __user *) arg); + break; + + default: + err = -ENOIOCTLCMD; + break; + } + + return err; +} +EXPORT_SYMBOL(bt_sock_ioctl); + int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) { DECLARE_WAITQUEUE(wait, current); diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index e69244dd8de8..b69bf4e7c48b 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -16,10 +16,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* - * $Id: bnep.h,v 1.5 2002/08/04 21:23:58 maxk Exp $ - */ - #ifndef _BNEP_H #define _BNEP_H diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index f85d94643aaf..021172c0e666 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -25,10 +25,6 @@ SOFTWARE IS DISCLAIMED. */ -/* - * $Id: core.c,v 1.20 2002/08/04 21:23:58 maxk Exp $ - */ - #include <linux/module.h> #include <linux/kernel.h> @@ -507,6 +503,11 @@ static int bnep_session(void *arg) /* Delete network device */ unregister_netdev(dev); + /* Wakeup user-space polling for socket errors */ + s->sock->sk->sk_err = EUNATCH; + + wake_up_interruptible(s->sock->sk->sk_sleep); + /* Release the socket */ fput(s->sock->file); diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 95e3837e4312..d9fa0ab2c87f 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -25,10 +25,6 @@ SOFTWARE IS DISCLAIMED. */ -/* - * $Id: netdev.c,v 1.8 2002/08/04 21:23:58 maxk Exp $ - */ - #include <linux/module.h> #include <linux/socket.h> diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 201e5b1ce473..8ffb57f2303a 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -24,10 +24,6 @@ SOFTWARE IS DISCLAIMED. */ -/* - * $Id: sock.c,v 1.4 2002/08/04 21:23:58 maxk Exp $ - */ - #include <linux/module.h> #include <linux/types.h> diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f8880261da0e..ca8d05245ca0 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -59,24 +59,31 @@ void hci_acl_connect(struct hci_conn *conn) BT_DBG("%p", conn); conn->state = BT_CONNECT; - conn->out = 1; + conn->out = 1; + conn->link_mode = HCI_LM_MASTER; conn->attempt++; + conn->link_policy = hdev->link_policy; + memset(&cp, 0, sizeof(cp)); bacpy(&cp.bdaddr, &conn->dst); cp.pscan_rep_mode = 0x02; - if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst)) && - inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) { - cp.pscan_rep_mode = ie->data.pscan_rep_mode; - cp.pscan_mode = ie->data.pscan_mode; - cp.clock_offset = ie->data.clock_offset | cpu_to_le16(0x8000); + if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst))) { + if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) { + cp.pscan_rep_mode = ie->data.pscan_rep_mode; + cp.pscan_mode = ie->data.pscan_mode; + cp.clock_offset = ie->data.clock_offset | + cpu_to_le16(0x8000); + } + memcpy(conn->dev_class, ie->data.dev_class, 3); + conn->ssp_mode = ie->data.ssp_mode; } - cp.pkt_type = cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK); + cp.pkt_type = cpu_to_le16(conn->pkt_type); if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER)) cp.role_switch = 0x01; else @@ -122,7 +129,7 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle) conn->out = 1; cp.handle = cpu_to_le16(handle); - cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); + cp.pkt_type = cpu_to_le16(conn->pkt_type); hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp); } @@ -138,7 +145,7 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle) conn->out = 1; cp.handle = cpu_to_le16(handle); - cp.pkt_type = cpu_to_le16(hdev->esco_type); + cp.pkt_type = cpu_to_le16(conn->pkt_type); cp.tx_bandwidth = cpu_to_le32(0x00001f40); cp.rx_bandwidth = cpu_to_le32(0x00001f40); @@ -163,11 +170,13 @@ static void hci_conn_timeout(unsigned long arg) switch (conn->state) { case BT_CONNECT: + case BT_CONNECT2: if (conn->type == ACL_LINK) hci_acl_connect_cancel(conn); else hci_acl_disconn(conn, 0x13); break; + case BT_CONFIG: case BT_CONNECTED: hci_acl_disconn(conn, 0x13); break; @@ -199,13 +208,28 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) return NULL; bacpy(&conn->dst, dst); - conn->hdev = hdev; - conn->type = type; - conn->mode = HCI_CM_ACTIVE; - conn->state = BT_OPEN; + conn->hdev = hdev; + conn->type = type; + conn->mode = HCI_CM_ACTIVE; + conn->state = BT_OPEN; conn->power_save = 1; + switch (type) { + case ACL_LINK: + conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK; + break; + case SCO_LINK: + if (lmp_esco_capable(hdev)) + conn->pkt_type = hdev->esco_type & SCO_ESCO_MASK; + else + conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK; + break; + case ESCO_LINK: + conn->pkt_type = hdev->esco_type; + break; + } + skb_queue_head_init(&conn->data_q); setup_timer(&conn->disc_timer, hci_conn_timeout, (unsigned long)conn); @@ -221,8 +245,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); - hci_conn_add_sysfs(conn); - tasklet_enable(&hdev->tx_task); return conn; @@ -254,12 +276,14 @@ int hci_conn_del(struct hci_conn *conn) } tasklet_disable(&hdev->tx_task); + hci_conn_hash_del(hdev, conn); if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); + tasklet_enable(&hdev->tx_task); + skb_queue_purge(&conn->data_q); - hci_conn_del_sysfs(conn); return 0; } @@ -355,13 +379,21 @@ int hci_conn_auth(struct hci_conn *conn) { BT_DBG("conn %p", conn); + if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) { + if (!(conn->auth_type & 0x01)) { + conn->auth_type = HCI_AT_GENERAL_BONDING_MITM; + conn->link_mode &= ~HCI_LM_AUTH; + } + } + if (conn->link_mode & HCI_LM_AUTH) return 1; if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { struct hci_cp_auth_requested cp; cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, + sizeof(cp), &cp); } return 0; } @@ -373,7 +405,7 @@ int hci_conn_encrypt(struct hci_conn *conn) BT_DBG("conn %p", conn); if (conn->link_mode & HCI_LM_ENCRYPT) - return 1; + return hci_conn_auth(conn); if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) return 0; @@ -382,7 +414,8 @@ int hci_conn_encrypt(struct hci_conn *conn) struct hci_cp_set_conn_encrypt cp; cp.handle = cpu_to_le16(conn->handle); cp.encrypt = 1; - hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, + sizeof(cp), &cp); } return 0; } @@ -396,7 +429,8 @@ int hci_conn_change_link_key(struct hci_conn *conn) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { struct hci_cp_change_conn_link_key cp; cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, + sizeof(cp), &cp); } return 0; } @@ -498,6 +532,8 @@ void hci_conn_hash_flush(struct hci_dev *hdev) c->state = BT_CLOSED; + hci_conn_del_sysfs(c); + hci_proto_disconn_ind(c, 0x16); hci_conn_del(c); } @@ -600,3 +636,23 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg) return copy_to_user(ptr, &ci, sizeof(ci)) ? -EFAULT : 0; } + +int hci_get_auth_info(struct hci_dev *hdev, void __user *arg) +{ + struct hci_auth_info_req req; + struct hci_conn *conn; + + if (copy_from_user(&req, arg, sizeof(req))) + return -EFAULT; + + hci_dev_lock_bh(hdev); + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &req.bdaddr); + if (conn) + req.type = conn->auth_type; + hci_dev_unlock_bh(hdev); + + if (!conn) + return -ENOENT; + + return copy_to_user(arg, &req, sizeof(req)) ? -EFAULT : 0; +} diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index aec6929f5c16..f5b21cb93699 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -279,10 +279,20 @@ static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt) BT_DBG("%s %x", hdev->name, encrypt); - /* Authentication */ + /* Encryption */ hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); } +static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt) +{ + __le16 policy = cpu_to_le16(opt); + + BT_DBG("%s %x", hdev->name, opt); + + /* Default link policy */ + hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); +} + /* Get HCI device by index. * Device is held on return. */ struct hci_dev *hci_dev_get(int index) @@ -694,32 +704,35 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) msecs_to_jiffies(HCI_INIT_TIMEOUT)); break; - case HCISETPTYPE: - hdev->pkt_type = (__u16) dr.dev_opt; - break; - case HCISETLINKPOL: - hdev->link_policy = (__u16) dr.dev_opt; + err = hci_request(hdev, hci_linkpol_req, dr.dev_opt, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); break; case HCISETLINKMODE: - hdev->link_mode = ((__u16) dr.dev_opt) & (HCI_LM_MASTER | HCI_LM_ACCEPT); + hdev->link_mode = ((__u16) dr.dev_opt) & + (HCI_LM_MASTER | HCI_LM_ACCEPT); + break; + + case HCISETPTYPE: + hdev->pkt_type = (__u16) dr.dev_opt; break; case HCISETACLMTU: - hdev->acl_mtu = *((__u16 *)&dr.dev_opt + 1); - hdev->acl_pkts = *((__u16 *)&dr.dev_opt + 0); + hdev->acl_mtu = *((__u16 *) &dr.dev_opt + 1); + hdev->acl_pkts = *((__u16 *) &dr.dev_opt + 0); break; case HCISETSCOMTU: - hdev->sco_mtu = *((__u16 *)&dr.dev_opt + 1); - hdev->sco_pkts = *((__u16 *)&dr.dev_opt + 0); + hdev->sco_mtu = *((__u16 *) &dr.dev_opt + 1); + hdev->sco_pkts = *((__u16 *) &dr.dev_opt + 0); break; default: err = -EINVAL; break; } + hci_dev_put(hdev); return err; } @@ -1270,9 +1283,12 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int struct hci_conn *c; c = list_entry(p, struct hci_conn, list); - if (c->type != type || c->state != BT_CONNECTED - || skb_queue_empty(&c->data_q)) + if (c->type != type || skb_queue_empty(&c->data_q)) + continue; + + if (c->state != BT_CONNECTED && c->state != BT_CONFIG) continue; + num++; if (c->sent < min) { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6aef8f24e581..0e3db289f4be 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -110,6 +110,25 @@ static void hci_cc_role_discovery(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); } +static void hci_cc_read_link_policy(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_link_policy *rp = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (conn) + conn->link_policy = __le16_to_cpu(rp->policy); + + hci_dev_unlock(hdev); +} + static void hci_cc_write_link_policy(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_write_link_policy *rp = (void *) skb->data; @@ -128,13 +147,41 @@ static void hci_cc_write_link_policy(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); - if (conn) { + if (conn) conn->link_policy = get_unaligned_le16(sent + 2); - } hci_dev_unlock(hdev); } +static void hci_cc_read_def_link_policy(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_def_link_policy *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->link_policy = __le16_to_cpu(rp->policy); +} + +static void hci_cc_write_def_link_policy(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; + + BT_DBG("%s status 0x%x", hdev->name, status); + + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_DEF_LINK_POLICY); + if (!sent) + return; + + if (!status) + hdev->link_policy = get_unaligned_le16(sent); + + hci_req_complete(hdev, status); +} + static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); @@ -151,12 +198,14 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LOCAL_NAME); if (!sent) return; - if (!status) - memcpy(hdev->dev_name, sent, 248); + memcpy(hdev->dev_name, sent, 248); } static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) @@ -266,12 +315,14 @@ static void hci_cc_write_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_CLASS_OF_DEV); if (!sent) return; - if (!status) - memcpy(hdev->dev_class, sent, 3); + memcpy(hdev->dev_class, sent, 3); } static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) @@ -286,7 +337,7 @@ static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) setting = __le16_to_cpu(rp->voice_setting); - if (hdev->voice_setting == setting ) + if (hdev->voice_setting == setting) return; hdev->voice_setting = setting; @@ -303,28 +354,31 @@ static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) static void hci_cc_write_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); + __u16 setting; void *sent; BT_DBG("%s status 0x%x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_VOICE_SETTING); if (!sent) return; - if (!status) { - __u16 setting = get_unaligned_le16(sent); + setting = get_unaligned_le16(sent); - if (hdev->voice_setting != setting) { - hdev->voice_setting = setting; + if (hdev->voice_setting == setting) + return; - BT_DBG("%s voice setting 0x%04x", hdev->name, setting); + hdev->voice_setting = setting; - if (hdev->notify) { - tasklet_disable(&hdev->tx_task); - hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); - tasklet_enable(&hdev->tx_task); - } - } + BT_DBG("%s voice setting 0x%04x", hdev->name, setting); + + if (hdev->notify) { + tasklet_disable(&hdev->tx_task); + hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); + tasklet_enable(&hdev->tx_task); } } @@ -337,6 +391,35 @@ static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) hci_req_complete(hdev, status); } +static void hci_cc_read_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_ssp_mode *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->ssp_mode = rp->mode; +} + +static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; + + BT_DBG("%s status 0x%x", hdev->name, status); + + if (status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SSP_MODE); + if (!sent) + return; + + hdev->ssp_mode = *((__u8 *) sent); +} + static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_read_local_version *rp = (void *) skb->data; @@ -347,8 +430,8 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) return; hdev->hci_ver = rp->hci_ver; - hdev->hci_rev = btohs(rp->hci_rev); - hdev->manufacturer = btohs(rp->manufacturer); + hdev->hci_rev = __le16_to_cpu(rp->hci_rev); + hdev->manufacturer = __le16_to_cpu(rp->manufacturer); BT_DBG("%s manufacturer %d hci ver %d:%d", hdev->name, hdev->manufacturer, @@ -536,11 +619,119 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status) hci_dev_unlock(hdev); } +static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_auth_requested *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, status); + + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_AUTH_REQUESTED); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + if (conn->state == BT_CONFIG) { + hci_proto_connect_cfm(conn, status); + hci_conn_put(conn); + } + } + + hci_dev_unlock(hdev); +} + +static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_set_conn_encrypt *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, status); + + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_SET_CONN_ENCRYPT); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + if (conn->state == BT_CONFIG) { + hci_proto_connect_cfm(conn, status); + hci_conn_put(conn); + } + } + + hci_dev_unlock(hdev); +} + static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%x", hdev->name, status); } +static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_read_remote_features *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, status); + + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_REMOTE_FEATURES); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + if (conn->state == BT_CONFIG) { + hci_proto_connect_cfm(conn, status); + hci_conn_put(conn); + } + } + + hci_dev_unlock(hdev); +} + +static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_read_remote_ext_features *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, status); + + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_REMOTE_EXT_FEATURES); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + if (conn->state == BT_CONFIG) { + hci_proto_connect_cfm(conn, status); + hci_conn_put(conn); + } + } + + hci_dev_unlock(hdev); +} + static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status) { struct hci_cp_setup_sync_conn *cp; @@ -653,6 +844,7 @@ static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff * memcpy(data.dev_class, info->dev_class, 3); data.clock_offset = info->clock_offset; data.rssi = 0x00; + data.ssp_mode = 0x00; info++; hci_inquiry_cache_update(hdev, &data); } @@ -675,7 +867,14 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (!ev->status) { conn->handle = __le16_to_cpu(ev->handle); - conn->state = BT_CONNECTED; + + if (conn->type == ACL_LINK) { + conn->state = BT_CONFIG; + hci_conn_hold(conn); + } else + conn->state = BT_CONNECTED; + + hci_conn_add_sysfs(conn); if (test_bit(HCI_AUTH, &hdev->flags)) conn->link_mode |= HCI_LM_AUTH; @@ -687,30 +886,17 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn->type == ACL_LINK) { struct hci_cp_read_remote_features cp; cp.handle = ev->handle; - hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp); - } - - /* Set link policy */ - if (conn->type == ACL_LINK && hdev->link_policy) { - struct hci_cp_write_link_policy cp; - cp.handle = ev->handle; - cp.policy = cpu_to_le16(hdev->link_policy); - hci_send_cmd(hdev, HCI_OP_WRITE_LINK_POLICY, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, + sizeof(cp), &cp); } /* Set packet type for incoming connection */ - if (!conn->out) { + if (!conn->out && hdev->hci_ver < 3) { struct hci_cp_change_conn_ptype cp; cp.handle = ev->handle; - cp.pkt_type = (conn->type == ACL_LINK) ? - cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK): - cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); - - hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp), &cp); - } else { - /* Update disconnect timer */ - hci_conn_hold(conn); - hci_conn_put(conn); + cp.pkt_type = cpu_to_le16(conn->pkt_type); + hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, + sizeof(cp), &cp); } } else conn->state = BT_CLOSED; @@ -730,9 +916,10 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s } } - hci_proto_connect_cfm(conn, ev->status); - if (ev->status) + if (ev->status) { + hci_proto_connect_cfm(conn, ev->status); hci_conn_del(conn); + } unlock: hci_dev_unlock(hdev); @@ -752,10 +939,14 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk if (mask & HCI_LM_ACCEPT) { /* Connection accepted */ + struct inquiry_entry *ie; struct hci_conn *conn; hci_dev_lock(hdev); + if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr))) + memcpy(ie->data.dev_class, ev->dev_class, 3); + conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) { @@ -786,7 +977,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk struct hci_cp_accept_sync_conn_req cp; bacpy(&cp.bdaddr, &ev->bdaddr); - cp.pkt_type = cpu_to_le16(hdev->esco_type); + cp.pkt_type = cpu_to_le16(conn->pkt_type); cp.tx_bandwidth = cpu_to_le32(0x00001f40); cp.rx_bandwidth = cpu_to_le32(0x00001f40); @@ -822,6 +1013,9 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { conn->state = BT_CLOSED; + + hci_conn_del_sysfs(conn); + hci_proto_disconn_ind(conn, ev->reason); hci_conn_del(conn); } @@ -845,15 +1039,29 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); - hci_auth_cfm(conn, ev->status); + if (conn->state == BT_CONFIG) { + if (!ev->status && hdev->ssp_mode > 0 && + conn->ssp_mode > 0) { + struct hci_cp_set_conn_encrypt cp; + cp.handle = ev->handle; + cp.encrypt = 0x01; + hci_send_cmd(hdev, HCI_OP_SET_CONN_ENCRYPT, + sizeof(cp), &cp); + } else { + conn->state = BT_CONNECTED; + hci_proto_connect_cfm(conn, ev->status); + hci_conn_put(conn); + } + } else + hci_auth_cfm(conn, ev->status); if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) { if (!ev->status) { struct hci_cp_set_conn_encrypt cp; - cp.handle = cpu_to_le16(conn->handle); - cp.encrypt = 1; - hci_send_cmd(conn->hdev, - HCI_OP_SET_CONN_ENCRYPT, sizeof(cp), &cp); + cp.handle = ev->handle; + cp.encrypt = 0x01; + hci_send_cmd(hdev, HCI_OP_SET_CONN_ENCRYPT, + sizeof(cp), &cp); } else { clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend); hci_encrypt_cfm(conn, ev->status, 0x00); @@ -883,15 +1091,24 @@ static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff * conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { if (!ev->status) { - if (ev->encrypt) + if (ev->encrypt) { + /* Encryption implies authentication */ + conn->link_mode |= HCI_LM_AUTH; conn->link_mode |= HCI_LM_ENCRYPT; - else + } else conn->link_mode &= ~HCI_LM_ENCRYPT; } clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend); - hci_encrypt_cfm(conn, ev->status, ev->encrypt); + if (conn->state == BT_CONFIG) { + if (!ev->status) + conn->state = BT_CONNECTED; + + hci_proto_connect_cfm(conn, ev->status); + hci_conn_put(conn); + } else + hci_encrypt_cfm(conn, ev->status, ev->encrypt); } hci_dev_unlock(hdev); @@ -926,14 +1143,29 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff BT_DBG("%s status %d", hdev->name, ev->status); - if (ev->status) - return; - hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); - if (conn) - memcpy(conn->features, ev->features, 8); + if (conn) { + if (!ev->status) + memcpy(conn->features, ev->features, 8); + + if (conn->state == BT_CONFIG) { + if (!ev->status && lmp_ssp_capable(hdev) && + lmp_ssp_capable(conn)) { + struct hci_cp_read_remote_ext_features cp; + cp.handle = ev->handle; + cp.page = 0x01; + hci_send_cmd(hdev, + HCI_OP_READ_REMOTE_EXT_FEATURES, + sizeof(cp), &cp); + } else { + conn->state = BT_CONNECTED; + hci_proto_connect_cfm(conn, ev->status); + hci_conn_put(conn); + } + } + } hci_dev_unlock(hdev); } @@ -974,10 +1206,22 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_role_discovery(hdev, skb); break; + case HCI_OP_READ_LINK_POLICY: + hci_cc_read_link_policy(hdev, skb); + break; + case HCI_OP_WRITE_LINK_POLICY: hci_cc_write_link_policy(hdev, skb); break; + case HCI_OP_READ_DEF_LINK_POLICY: + hci_cc_read_def_link_policy(hdev, skb); + break; + + case HCI_OP_WRITE_DEF_LINK_POLICY: + hci_cc_write_def_link_policy(hdev, skb); + break; + case HCI_OP_RESET: hci_cc_reset(hdev, skb); break; @@ -1022,6 +1266,14 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_host_buffer_size(hdev, skb); break; + case HCI_OP_READ_SSP_MODE: + hci_cc_read_ssp_mode(hdev, skb); + break; + + case HCI_OP_WRITE_SSP_MODE: + hci_cc_write_ssp_mode(hdev, skb); + break; + case HCI_OP_READ_LOCAL_VERSION: hci_cc_read_local_version(hdev, skb); break; @@ -1076,10 +1328,26 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cs_add_sco(hdev, ev->status); break; + case HCI_OP_AUTH_REQUESTED: + hci_cs_auth_requested(hdev, ev->status); + break; + + case HCI_OP_SET_CONN_ENCRYPT: + hci_cs_set_conn_encrypt(hdev, ev->status); + break; + case HCI_OP_REMOTE_NAME_REQ: hci_cs_remote_name_req(hdev, ev->status); break; + case HCI_OP_READ_REMOTE_FEATURES: + hci_cs_read_remote_features(hdev, ev->status); + break; + + case HCI_OP_READ_REMOTE_EXT_FEATURES: + hci_cs_read_remote_ext_features(hdev, ev->status); + break; + case HCI_OP_SETUP_SYNC_CONN: hci_cs_setup_sync_conn(hdev, ev->status); break; @@ -1235,6 +1503,22 @@ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *sk hci_dev_unlock(hdev); } +static inline void hci_pkt_type_change_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_pkt_type_change *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn && !ev->status) + conn->pkt_type = __le16_to_cpu(ev->pkt_type); + + hci_dev_unlock(hdev); +} + static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_pscan_rep_mode *ev = (void *) skb->data; @@ -1275,6 +1559,7 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct memcpy(data.dev_class, info->dev_class, 3); data.clock_offset = info->clock_offset; data.rssi = info->rssi; + data.ssp_mode = 0x00; info++; hci_inquiry_cache_update(hdev, &data); } @@ -1289,6 +1574,7 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct memcpy(data.dev_class, info->dev_class, 3); data.clock_offset = info->clock_offset; data.rssi = info->rssi; + data.ssp_mode = 0x00; info++; hci_inquiry_cache_update(hdev, &data); } @@ -1299,7 +1585,43 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_remote_ext_features *ev = (void *) skb->data; + struct hci_conn *conn; + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn) { + if (!ev->status && ev->page == 0x01) { + struct inquiry_entry *ie; + + if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst))) + ie->data.ssp_mode = (ev->features[0] & 0x01); + + conn->ssp_mode = (ev->features[0] & 0x01); + } + + if (conn->state == BT_CONFIG) { + if (!ev->status && hdev->ssp_mode > 0 && + conn->ssp_mode > 0) { + if (conn->out) { + struct hci_cp_auth_requested cp; + cp.handle = ev->handle; + hci_send_cmd(hdev, + HCI_OP_AUTH_REQUESTED, + sizeof(cp), &cp); + } + } else { + conn->state = BT_CONNECTED; + hci_proto_connect_cfm(conn, ev->status); + hci_conn_put(conn); + } + } + } + + hci_dev_unlock(hdev); } static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -1312,12 +1634,22 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); - if (!conn) - goto unlock; + if (!conn) { + if (ev->link_type == ESCO_LINK) + goto unlock; + + conn = hci_conn_hash_lookup_ba(hdev, ESCO_LINK, &ev->bdaddr); + if (!conn) + goto unlock; + + conn->type = SCO_LINK; + } if (!ev->status) { conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; + + hci_conn_add_sysfs(conn); } else conn->state = BT_CLOSED; @@ -1371,6 +1703,7 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct memcpy(data.dev_class, info->dev_class, 3); data.clock_offset = info->clock_offset; data.rssi = info->rssi; + data.ssp_mode = 0x01; info++; hci_inquiry_cache_update(hdev, &data); } @@ -1378,6 +1711,53 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct hci_dev_unlock(hdev); } +static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_io_capa_request *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) + hci_conn_hold(conn); + + hci_dev_unlock(hdev); +} + +static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_simple_pair_complete *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) + hci_conn_put(conn); + + hci_dev_unlock(hdev); +} + +static inline void hci_remote_host_features_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_remote_host_features *ev = (void *) skb->data; + struct inquiry_entry *ie; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr))) + ie->data.ssp_mode = (ev->features[0] & 0x01); + + hci_dev_unlock(hdev); +} + void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_event_hdr *hdr = (void *) skb->data; @@ -1470,6 +1850,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_clock_offset_evt(hdev, skb); break; + case HCI_EV_PKT_TYPE_CHANGE: + hci_pkt_type_change_evt(hdev, skb); + break; + case HCI_EV_PSCAN_REP_MODE: hci_pscan_rep_mode_evt(hdev, skb); break; @@ -1498,6 +1882,18 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_extended_inquiry_result_evt(hdev, skb); break; + case HCI_EV_IO_CAPA_REQUEST: + hci_io_capa_request_evt(hdev, skb); + break; + + case HCI_EV_SIMPLE_PAIR_COMPLETE: + hci_simple_pair_complete_evt(hdev, skb); + break; + + case HCI_EV_REMOTE_HOST_FEATURES: + hci_remote_host_features_evt(hdev, skb); + break; + default: BT_DBG("%s event 0x%x", hdev->name, event); break; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 747fabd735d2..d62579b67959 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -193,19 +193,11 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign return 0; - case HCISETSECMGR: - if (!capable(CAP_NET_ADMIN)) - return -EACCES; - - if (arg) - set_bit(HCI_SECMGR, &hdev->flags); - else - clear_bit(HCI_SECMGR, &hdev->flags); - - return 0; - case HCIGETCONNINFO: - return hci_get_conn_info(hdev, (void __user *)arg); + return hci_get_conn_info(hdev, (void __user *) arg); + + case HCIGETAUTHINFO: + return hci_get_auth_info(hdev, (void __user *) arg); default: if (hdev->ioctl) @@ -217,7 +209,7 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; - void __user *argp = (void __user *)arg; + void __user *argp = (void __user *) arg; int err; BT_DBG("cmd %x arg %lx", cmd, arg); diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 84360c117d4e..c85bf8f678dc 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -113,11 +113,13 @@ static ssize_t show_inquiry_cache(struct device *dev, struct device_attribute *a struct inquiry_data *data = &e->data; bdaddr_t bdaddr; baswap(&bdaddr, &data->bdaddr); - n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %u\n", + n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", batostr(&bdaddr), - data->pscan_rep_mode, data->pscan_period_mode, data->pscan_mode, - data->dev_class[2], data->dev_class[1], data->dev_class[0], - __le16_to_cpu(data->clock_offset), data->rssi, e->timestamp); + data->pscan_rep_mode, data->pscan_period_mode, + data->pscan_mode, data->dev_class[2], + data->dev_class[1], data->dev_class[0], + __le16_to_cpu(data->clock_offset), + data->rssi, data->ssp_mode, e->timestamp); } hci_dev_unlock_bh(hdev); @@ -249,15 +251,28 @@ static ssize_t show_conn_address(struct device *dev, struct device_attribute *at return sprintf(buf, "%s\n", batostr(&bdaddr)); } +static ssize_t show_conn_features(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_conn *conn = dev_get_drvdata(dev); + + return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", + conn->features[0], conn->features[1], + conn->features[2], conn->features[3], + conn->features[4], conn->features[5], + conn->features[6], conn->features[7]); +} + #define CONN_ATTR(_name,_mode,_show,_store) \ struct device_attribute conn_attr_##_name = __ATTR(_name,_mode,_show,_store) static CONN_ATTR(type, S_IRUGO, show_conn_type, NULL); static CONN_ATTR(address, S_IRUGO, show_conn_address, NULL); +static CONN_ATTR(features, S_IRUGO, show_conn_features, NULL); static struct device_attribute *conn_attrs[] = { &conn_attr_type, &conn_attr_address, + &conn_attr_features, NULL }; @@ -296,7 +311,6 @@ static void add_conn(struct work_struct *work) void hci_conn_add_sysfs(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - bdaddr_t *ba = &conn->dst; BT_DBG("conn %p", conn); @@ -305,11 +319,8 @@ void hci_conn_add_sysfs(struct hci_conn *conn) conn->dev.release = bt_release; - snprintf(conn->dev.bus_id, BUS_ID_SIZE, - "%s%2.2X%2.2X%2.2X%2.2X%2.2X%2.2X", - conn->type == ACL_LINK ? "acl" : "sco", - ba->b[5], ba->b[4], ba->b[3], - ba->b[2], ba->b[1], ba->b[0]); + snprintf(conn->dev.bus_id, BUS_ID_SIZE, "%s:%d", + hdev->name, conn->handle); dev_set_drvdata(&conn->dev, conn); @@ -387,10 +398,6 @@ int hci_register_sysfs(struct hci_dev *hdev) if (device_create_file(dev, bt_attrs[i]) < 0) BT_ERR("Failed to create device attribute"); - if (sysfs_create_link(&bt_class->subsys.kobj, - &dev->kobj, kobject_name(&dev->kobj)) < 0) - BT_ERR("Failed to create class symlink"); - return 0; } @@ -398,9 +405,6 @@ void hci_unregister_sysfs(struct hci_dev *hdev) { BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); - sysfs_remove_link(&bt_class->subsys.kobj, - kobject_name(&hdev->dev.kobj)); - device_del(&hdev->dev); } diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 519cdb920f93..96434d774c84 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -581,6 +581,12 @@ static int hidp_session(void *arg) hid_free_device(session->hid); } + /* Wakeup user-space polling for socket errors */ + session->intr_sock->sk->sk_err = EUNATCH; + session->ctrl_sock->sk->sk_err = EUNATCH; + + hidp_schedule(session); + fput(session->intr_sock->file); wait_event_timeout(*(ctrl_sk->sk_sleep), @@ -879,6 +885,10 @@ int hidp_del_connection(struct hidp_conndel_req *req) skb_queue_purge(&session->ctrl_transmit); skb_queue_purge(&session->intr_transmit); + /* Wakeup user-space polling for socket errors */ + session->intr_sock->sk->sk_err = EUNATCH; + session->ctrl_sock->sk->sk_err = EUNATCH; + /* Kill session thread */ atomic_inc(&session->terminate); hidp_schedule(session); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 6e180d255505..c1239852834a 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -55,7 +55,7 @@ #define BT_DBG(D...) #endif -#define VERSION "2.9" +#define VERSION "2.10" static u32 l2cap_feat_mask = 0x0000; @@ -76,11 +76,21 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, static void l2cap_sock_timeout(unsigned long arg) { struct sock *sk = (struct sock *) arg; + int reason; BT_DBG("sock %p state %d", sk, sk->sk_state); bh_lock_sock(sk); - __l2cap_sock_close(sk, ETIMEDOUT); + + if (sk->sk_state == BT_CONNECT && + (l2cap_pi(sk)->link_mode & (L2CAP_LM_AUTH | + L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE))) + reason = ECONNREFUSED; + else + reason = ETIMEDOUT; + + __l2cap_sock_close(sk, reason); + bh_unlock_sock(sk); l2cap_sock_kill(sk); @@ -240,7 +250,7 @@ static void l2cap_chan_del(struct sock *sk, int err) hci_conn_put(conn->hcon); } - sk->sk_state = BT_CLOSED; + sk->sk_state = BT_CLOSED; sock_set_flag(sk, SOCK_ZAPPED); if (err) @@ -253,6 +263,21 @@ static void l2cap_chan_del(struct sock *sk, int err) sk->sk_state_change(sk); } +/* Service level security */ +static inline int l2cap_check_link_mode(struct sock *sk) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + + if ((l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) || + (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) + return hci_conn_encrypt(conn->hcon); + + if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH) + return hci_conn_auth(conn->hcon); + + return 1; +} + static inline u8 l2cap_get_ident(struct l2cap_conn *conn) { u8 id; @@ -287,6 +312,36 @@ static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 return hci_send_acl(conn->hcon, skb, 0); } +static void l2cap_do_start(struct sock *sk) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + + if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { + if (l2cap_check_link_mode(sk)) { + struct l2cap_conn_req req; + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; + + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + } + } else { + struct l2cap_info_req req; + req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; + conn->info_ident = l2cap_get_ident(conn); + + mod_timer(&conn->info_timer, jiffies + + msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); + + l2cap_send_cmd(conn, conn->info_ident, + L2CAP_INFO_REQ, sizeof(req), &req); + } +} + /* ---- L2CAP connections ---- */ static void l2cap_conn_start(struct l2cap_conn *conn) { @@ -301,16 +356,37 @@ static void l2cap_conn_start(struct l2cap_conn *conn) bh_lock_sock(sk); if (sk->sk_type != SOCK_SEQPACKET) { - l2cap_sock_clear_timer(sk); - sk->sk_state = BT_CONNECTED; - sk->sk_state_change(sk); - } else if (sk->sk_state == BT_CONNECT) { - struct l2cap_conn_req req; - l2cap_pi(sk)->ident = l2cap_get_ident(conn); - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - req.psm = l2cap_pi(sk)->psm; - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + bh_unlock_sock(sk); + continue; + } + + if (sk->sk_state == BT_CONNECT) { + if (l2cap_check_link_mode(sk)) { + struct l2cap_conn_req req; + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; + + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req); + } + } else if (sk->sk_state == BT_CONNECT2) { + struct l2cap_conn_rsp rsp; + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + + if (l2cap_check_link_mode(sk)) { + sk->sk_state = BT_CONFIG; + rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + } else { + rsp.result = cpu_to_le16(L2CAP_CR_PEND); + rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND); + } + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); } bh_unlock_sock(sk); @@ -321,22 +397,27 @@ static void l2cap_conn_start(struct l2cap_conn *conn) static void l2cap_conn_ready(struct l2cap_conn *conn) { - BT_DBG("conn %p", conn); + struct l2cap_chan_list *l = &conn->chan_list; + struct sock *sk; - if (conn->chan_list.head || !hlist_empty(&l2cap_sk_list.head)) { - struct l2cap_info_req req; + BT_DBG("conn %p", conn); - req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + read_lock(&l->lock); - conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; - conn->info_ident = l2cap_get_ident(conn); + for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + bh_lock_sock(sk); - mod_timer(&conn->info_timer, - jiffies + msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); + if (sk->sk_type != SOCK_SEQPACKET) { + l2cap_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + } else if (sk->sk_state == BT_CONNECT) + l2cap_do_start(sk); - l2cap_send_cmd(conn, conn->info_ident, - L2CAP_INFO_REQ, sizeof(req), &req); + bh_unlock_sock(sk); } + + read_unlock(&l->lock); } /* Notify sockets that we cannot guaranty reliability anymore */ @@ -388,7 +469,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) conn->feat_mask = 0; - setup_timer(&conn->info_timer, l2cap_info_timeout, (unsigned long)conn); + setup_timer(&conn->info_timer, l2cap_info_timeout, + (unsigned long) conn); spin_lock_init(&conn->lock); rwlock_init(&conn->chan_list.lock); @@ -500,7 +582,7 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) while ((sk = bt_accept_dequeue(parent, NULL))) l2cap_sock_close(sk); - parent->sk_state = BT_CLOSED; + parent->sk_state = BT_CLOSED; sock_set_flag(parent, SOCK_ZAPPED); } @@ -543,9 +625,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason) req.scid = cpu_to_le16(l2cap_pi(sk)->scid); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_DISCONN_REQ, sizeof(req), &req); - } else { + } else l2cap_chan_del(sk, reason); - } break; case BT_CONNECT: @@ -614,9 +695,9 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p sock_reset_flag(sk, SOCK_ZAPPED); sk->sk_protocol = proto; - sk->sk_state = BT_OPEN; + sk->sk_state = BT_OPEN; - setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long)sk); + setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk); bt_sock_link(&l2cap_sk_list, sk); return sk; @@ -729,22 +810,11 @@ static int l2cap_do_connect(struct sock *sk) l2cap_sock_set_timer(sk, sk->sk_sndtimeo); if (hcon->state == BT_CONNECTED) { - if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)) { - l2cap_conn_ready(conn); - goto done; - } - - if (sk->sk_type == SOCK_SEQPACKET) { - struct l2cap_conn_req req; - l2cap_pi(sk)->ident = l2cap_get_ident(conn); - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - req.psm = l2cap_pi(sk)->psm; - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_REQ, sizeof(req), &req); - } else { + if (sk->sk_type != SOCK_SEQPACKET) { l2cap_sock_clear_timer(sk); sk->sk_state = BT_CONNECTED; - } + } else + l2cap_do_start(sk); } done: @@ -1145,7 +1215,8 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) __l2cap_sock_close(sk, 0); if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) - err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); + err = bt_sock_wait_state(sk, BT_CLOSED, + sk->sk_lingertime); } release_sock(sk); return err; @@ -1189,6 +1260,11 @@ static void l2cap_chan_ready(struct sock *sk) */ parent->sk_data_ready(parent, 0); } + + if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) { + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + hci_conn_change_link_key(conn->hcon); + } } /* Copy frame to all raw sockets on that connection */ @@ -1477,7 +1553,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; struct l2cap_conn_rsp rsp; struct sock *sk, *parent; - int result = 0, status = 0; + int result, status = 0; u16 dcid = 0, scid = __le16_to_cpu(req->scid); __le16 psm = req->psm; @@ -1526,25 +1602,24 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd l2cap_sock_set_timer(sk, sk->sk_sndtimeo); - /* Service level security */ - result = L2CAP_CR_PEND; - status = L2CAP_CS_AUTHEN_PEND; - sk->sk_state = BT_CONNECT2; l2cap_pi(sk)->ident = cmd->ident; - if ((l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) || - (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) { - if (!hci_conn_encrypt(conn->hcon)) - goto done; - } else if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH) { - if (!hci_conn_auth(conn->hcon)) - goto done; + if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { + if (l2cap_check_link_mode(sk)) { + sk->sk_state = BT_CONFIG; + result = L2CAP_CR_SUCCESS; + status = L2CAP_CS_NO_INFO; + } else { + sk->sk_state = BT_CONNECT2; + result = L2CAP_CR_PEND; + status = L2CAP_CS_AUTHEN_PEND; + } + } else { + sk->sk_state = BT_CONNECT2; + result = L2CAP_CR_PEND; + status = L2CAP_CS_NO_INFO; } - sk->sk_state = BT_CONFIG; - result = status = 0; - -done: write_unlock_bh(&list->lock); response: @@ -1556,6 +1631,21 @@ sendresp: rsp.result = cpu_to_le16(result); rsp.status = cpu_to_le16(status); l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); + + if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) { + struct l2cap_info_req info; + info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; + conn->info_ident = l2cap_get_ident(conn); + + mod_timer(&conn->info_timer, jiffies + + msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); + + l2cap_send_cmd(conn, conn->info_ident, + L2CAP_INFO_REQ, sizeof(info), &info); + } + return 0; } @@ -1664,9 +1754,9 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr } if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) { - u8 req[64]; + u8 buf[64]; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(sk, req), req); + l2cap_build_conf_req(sk, buf), buf); } unlock: @@ -1708,7 +1798,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr default: sk->sk_state = BT_DISCONN; - sk->sk_err = ECONNRESET; + sk->sk_err = ECONNRESET; l2cap_sock_set_timer(sk, HZ * 5); { struct l2cap_disconn_req req; @@ -2080,10 +2170,8 @@ static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason) static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) { struct l2cap_chan_list *l; - struct l2cap_conn *conn = conn = hcon->l2cap_data; - struct l2cap_conn_rsp rsp; + struct l2cap_conn *conn = hcon->l2cap_data; struct sock *sk; - int result; if (!conn) return 0; @@ -2095,45 +2183,65 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) read_lock(&l->lock); for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + struct l2cap_pinfo *pi = l2cap_pi(sk); + bh_lock_sock(sk); - if (sk->sk_state != BT_CONNECT2 || - (l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) || - (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) { + if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) && + !(hcon->link_mode & HCI_LM_ENCRYPT) && + !status) { bh_unlock_sock(sk); continue; } - if (!status) { - sk->sk_state = BT_CONFIG; - result = 0; - } else { - sk->sk_state = BT_DISCONN; - l2cap_sock_set_timer(sk, HZ/10); - result = L2CAP_CR_SEC_BLOCK; - } + if (sk->sk_state == BT_CONNECT) { + if (!status) { + struct l2cap_conn_req req; + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(0); - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + } else { + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ / 10); + } + } else if (sk->sk_state == BT_CONNECT2) { + struct l2cap_conn_rsp rsp; + __u16 result; + + if (!status) { + sk->sk_state = BT_CONFIG; + result = L2CAP_CR_SUCCESS; + } else { + sk->sk_state = BT_DISCONN; + l2cap_sock_set_timer(sk, HZ / 10); + result = L2CAP_CR_SEC_BLOCK; + } + + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(result); + rsp.status = cpu_to_le16(0); + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + } bh_unlock_sock(sk); } read_unlock(&l->lock); + return 0; } -static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status) +static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) { struct l2cap_chan_list *l; struct l2cap_conn *conn = hcon->l2cap_data; - struct l2cap_conn_rsp rsp; struct sock *sk; - int result; if (!conn) return 0; @@ -2145,36 +2253,59 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status) read_lock(&l->lock); for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + struct l2cap_pinfo *pi = l2cap_pi(sk); + bh_lock_sock(sk); - if (sk->sk_state != BT_CONNECT2) { + if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) && + (sk->sk_state == BT_CONNECTED || + sk->sk_state == BT_CONFIG) && + !status && encrypt == 0x00) { + __l2cap_sock_close(sk, ECONNREFUSED); bh_unlock_sock(sk); continue; } - if (!status) { - sk->sk_state = BT_CONFIG; - result = 0; - } else { - sk->sk_state = BT_DISCONN; - l2cap_sock_set_timer(sk, HZ/10); - result = L2CAP_CR_SEC_BLOCK; - } + if (sk->sk_state == BT_CONNECT) { + if (!status) { + struct l2cap_conn_req req; + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; + + l2cap_pi(sk)->ident = l2cap_get_ident(conn); - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(0); - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + } else { + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ / 10); + } + } else if (sk->sk_state == BT_CONNECT2) { + struct l2cap_conn_rsp rsp; + __u16 result; + + if (!status) { + sk->sk_state = BT_CONFIG; + result = L2CAP_CR_SUCCESS; + } else { + sk->sk_state = BT_DISCONN; + l2cap_sock_set_timer(sk, HZ / 10); + result = L2CAP_CR_SEC_BLOCK; + } - if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) - hci_conn_change_link_key(hcon); + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(result); + rsp.status = cpu_to_le16(0); + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + } bh_unlock_sock(sk); } read_unlock(&l->lock); + return 0; } @@ -2301,9 +2432,9 @@ static const struct proto_ops l2cap_sock_ops = { .sendmsg = l2cap_sock_sendmsg, .recvmsg = bt_sock_recvmsg, .poll = bt_sock_poll, + .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, - .ioctl = sock_no_ioctl, .shutdown = l2cap_sock_shutdown, .setsockopt = l2cap_sock_setsockopt, .getsockopt = l2cap_sock_getsockopt diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index eb62558e9b09..6cfc7ba611b3 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -23,8 +23,6 @@ /* * Bluetooth RFCOMM core. - * - * $Id: core.c,v 1.42 2002/10/01 23:26:25 maxk Exp $ */ #include <linux/module.h> @@ -53,7 +51,7 @@ #define BT_DBG(D...) #endif -#define VERSION "1.8" +#define VERSION "1.10" static int disable_cfc = 0; static int channel_mtu = -1; @@ -230,6 +228,21 @@ static int rfcomm_l2sock_create(struct socket **sock) return err; } +static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d) +{ + struct sock *sk = d->session->sock->sk; + + if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) { + if (!hci_conn_encrypt(l2cap_pi(sk)->conn->hcon)) + return 1; + } else if (d->link_mode & RFCOMM_LM_AUTH) { + if (!hci_conn_auth(l2cap_pi(sk)->conn->hcon)) + return 1; + } + + return 0; +} + /* ---- RFCOMM DLCs ---- */ static void rfcomm_dlc_timeout(unsigned long arg) { @@ -371,15 +384,23 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, d->addr = __addr(s->initiator, dlci); d->priority = 7; - d->state = BT_CONFIG; + d->state = BT_CONFIG; rfcomm_dlc_link(s, d); + d->out = 1; + d->mtu = s->mtu; d->cfc = (s->cfc == RFCOMM_CFC_UNKNOWN) ? 0 : s->cfc; - if (s->state == BT_CONNECTED) - rfcomm_send_pn(s, 1, d); + if (s->state == BT_CONNECTED) { + if (rfcomm_check_link_mode(d)) + set_bit(RFCOMM_AUTH_PENDING, &d->flags); + else + rfcomm_send_pn(s, 1, d); + } + rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); + return 0; } @@ -423,8 +444,8 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) rfcomm_dlc_lock(d); d->state = BT_CLOSED; - rfcomm_dlc_unlock(d); d->state_change(d, err); + rfcomm_dlc_unlock(d); skb_queue_purge(&d->tx_queue); rfcomm_dlc_unlink(d); @@ -1146,21 +1167,6 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) return 0; } -static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d) -{ - struct sock *sk = d->session->sock->sk; - - if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) { - if (!hci_conn_encrypt(l2cap_pi(sk)->conn->hcon)) - return 1; - } else if (d->link_mode & RFCOMM_LM_AUTH) { - if (!hci_conn_auth(l2cap_pi(sk)->conn->hcon)) - return 1; - } - - return 0; -} - static void rfcomm_dlc_accept(struct rfcomm_dlc *d) { struct sock *sk = d->session->sock->sk; @@ -1205,10 +1211,8 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) if (rfcomm_check_link_mode(d)) { set_bit(RFCOMM_AUTH_PENDING, &d->flags); rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); - return 0; - } - - rfcomm_dlc_accept(d); + } else + rfcomm_dlc_accept(d); } return 0; } @@ -1223,10 +1227,8 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) if (rfcomm_check_link_mode(d)) { set_bit(RFCOMM_AUTH_PENDING, &d->flags); rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); - return 0; - } - - rfcomm_dlc_accept(d); + } else + rfcomm_dlc_accept(d); } else { rfcomm_send_dm(s, dlci); } @@ -1459,8 +1461,12 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb clear_bit(RFCOMM_TX_THROTTLED, &d->flags); rfcomm_dlc_lock(d); + + d->remote_v24_sig = msc->v24_sig; + if (d->modem_status) d->modem_status(d, msc->v24_sig); + rfcomm_dlc_unlock(d); rfcomm_send_msc(s, 0, dlci, msc->v24_sig); @@ -1636,7 +1642,11 @@ static void rfcomm_process_connect(struct rfcomm_session *s) d = list_entry(p, struct rfcomm_dlc, list); if (d->state == BT_CONFIG) { d->mtu = s->mtu; - rfcomm_send_pn(s, 1, d); + if (rfcomm_check_link_mode(d)) { + set_bit(RFCOMM_AUTH_PENDING, &d->flags); + rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); + } else + rfcomm_send_pn(s, 1, d); } } } @@ -1709,7 +1719,11 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) if (test_and_clear_bit(RFCOMM_AUTH_ACCEPT, &d->flags)) { rfcomm_dlc_clear_timer(d); - rfcomm_dlc_accept(d); + if (d->out) { + rfcomm_send_pn(s, 1, d); + rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); + } else + rfcomm_dlc_accept(d); if (d->link_mode & RFCOMM_LM_SECURE) { struct sock *sk = s->sock->sk; hci_conn_change_link_key(l2cap_pi(sk)->conn->hcon); @@ -1717,7 +1731,10 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) continue; } else if (test_and_clear_bit(RFCOMM_AUTH_REJECT, &d->flags)) { rfcomm_dlc_clear_timer(d); - rfcomm_send_dm(s, d->dlci); + if (!d->out) + rfcomm_send_dm(s, d->dlci); + else + d->state = BT_CLOSED; __rfcomm_dlc_close(d, ECONNREFUSED); continue; } @@ -1726,7 +1743,7 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) continue; if ((d->state == BT_CONNECTED || d->state == BT_DISCONN) && - d->mscex == RFCOMM_MSCEX_OK) + d->mscex == RFCOMM_MSCEX_OK) rfcomm_process_tx(d); } } @@ -1954,7 +1971,8 @@ static void rfcomm_auth_cfm(struct hci_conn *conn, u8 status) list_for_each_safe(p, n, &s->dlcs) { d = list_entry(p, struct rfcomm_dlc, list); - if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) + if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) && + !(conn->link_mode & HCI_LM_ENCRYPT) && !status) continue; if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) @@ -1988,6 +2006,14 @@ static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt) list_for_each_safe(p, n, &s->dlcs) { d = list_entry(p, struct rfcomm_dlc, list); + if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) && + (d->state == BT_CONNECTED || + d->state == BT_CONFIG) && + !status && encrypt == 0x00) { + __rfcomm_dlc_close(d, ECONNREFUSED); + continue; + } + if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) continue; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 5083adcbfae5..8a972b6ba85f 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -23,8 +23,6 @@ /* * RFCOMM sockets. - * - * $Id: sock.c,v 1.24 2002/10/03 01:00:34 maxk Exp $ */ #include <linux/module.h> @@ -309,13 +307,13 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int sk->sk_destruct = rfcomm_sock_destruct; sk->sk_sndtimeo = RFCOMM_CONN_TIMEOUT; - sk->sk_sndbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10; - sk->sk_rcvbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10; + sk->sk_sndbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10; + sk->sk_rcvbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10; sock_reset_flag(sk, SOCK_ZAPPED); sk->sk_protocol = proto; - sk->sk_state = BT_OPEN; + sk->sk_state = BT_OPEN; bt_sock_link(&rfcomm_sk_list, sk); @@ -413,6 +411,8 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr); rfcomm_pi(sk)->channel = sa->rc_channel; + d->link_mode = rfcomm_pi(sk)->link_mode; + err = rfcomm_dlc_open(d, &bt_sk(sk)->src, &sa->rc_bdaddr, sa->rc_channel); if (!err) err = bt_sock_wait_state(sk, BT_CONNECTED, @@ -688,6 +688,8 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, copied += chunk; size -= chunk; + sock_recv_timestamp(msg, sk, skb); + if (!(flags & MSG_PEEK)) { atomic_sub(chunk, &sk->sk_rmem_alloc); @@ -793,15 +795,20 @@ static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned lon struct sock *sk = sock->sk; int err; - lock_sock(sk); + BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg); + + err = bt_sock_ioctl(sock, cmd, arg); + if (err == -ENOIOCTLCMD) { #ifdef CONFIG_BT_RFCOMM_TTY - err = rfcomm_dev_ioctl(sk, cmd, (void __user *)arg); + lock_sock(sk); + err = rfcomm_dev_ioctl(sk, cmd, (void __user *) arg); + release_sock(sk); #else - err = -EOPNOTSUPP; + err = -EOPNOTSUPP; #endif + } - release_sock(sk); return err; } diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index c3f749abb2d0..d3340dd52bcf 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -23,8 +23,6 @@ /* * RFCOMM TTY. - * - * $Id: tty.c,v 1.24 2002/10/03 01:54:38 holtmann Exp $ */ #include <linux/module.h> @@ -77,6 +75,8 @@ struct rfcomm_dev { struct device *tty_dev; atomic_t wmem_alloc; + + struct sk_buff_head pending; }; static LIST_HEAD(rfcomm_dev_list); @@ -264,13 +264,34 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) init_waitqueue_head(&dev->wait); tasklet_init(&dev->wakeup_task, rfcomm_tty_wakeup, (unsigned long) dev); + skb_queue_head_init(&dev->pending); + rfcomm_dlc_lock(dlc); + + if (req->flags & (1 << RFCOMM_REUSE_DLC)) { + struct sock *sk = dlc->owner; + struct sk_buff *skb; + + BUG_ON(!sk); + + rfcomm_dlc_throttle(dlc); + + while ((skb = skb_dequeue(&sk->sk_receive_queue))) { + skb_orphan(skb); + skb_queue_tail(&dev->pending, skb); + atomic_sub(skb->len, &sk->sk_rmem_alloc); + } + } + dlc->data_ready = rfcomm_dev_data_ready; dlc->state_change = rfcomm_dev_state_change; dlc->modem_status = rfcomm_dev_modem_status; dlc->owner = dev; dev->dlc = dlc; + + rfcomm_dev_modem_status(dlc, dlc->remote_v24_sig); + rfcomm_dlc_unlock(dlc); /* It's safe to call __module_get() here because socket already @@ -539,11 +560,16 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) struct rfcomm_dev *dev = dlc->owner; struct tty_struct *tty; - if (!dev || !(tty = dev->tty)) { + if (!dev) { kfree_skb(skb); return; } + if (!(tty = dev->tty) || !skb_queue_empty(&dev->pending)) { + skb_queue_tail(&dev->pending, skb); + return; + } + BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len); tty_insert_flip_string(tty, skb->data, skb->len); @@ -566,11 +592,22 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) if (dlc->state == BT_CLOSED) { if (!dev->tty) { if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { - if (rfcomm_dev_get(dev->id) == NULL) + /* Drop DLC lock here to avoid deadlock + * 1. rfcomm_dev_get will take rfcomm_dev_lock + * but in rfcomm_dev_add there's lock order: + * rfcomm_dev_lock -> dlc lock + * 2. rfcomm_dev_put will deadlock if it's + * the last reference + */ + rfcomm_dlc_unlock(dlc); + if (rfcomm_dev_get(dev->id) == NULL) { + rfcomm_dlc_lock(dlc); return; + } rfcomm_dev_del(dev); rfcomm_dev_put(dev); + rfcomm_dlc_lock(dlc); } } else tty_hangup(dev->tty); @@ -606,14 +643,31 @@ static void rfcomm_tty_wakeup(unsigned long arg) return; BT_DBG("dev %p tty %p", dev, tty); + tty_wakeup(tty); +} - if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && tty->ldisc.write_wakeup) - (tty->ldisc.write_wakeup)(tty); +static void rfcomm_tty_copy_pending(struct rfcomm_dev *dev) +{ + struct tty_struct *tty = dev->tty; + struct sk_buff *skb; + int inserted = 0; - wake_up_interruptible(&tty->write_wait); -#ifdef SERIAL_HAVE_POLL_WAIT - wake_up_interruptible(&tty->poll_wait); -#endif + if (!tty) + return; + + BT_DBG("dev %p tty %p", dev, tty); + + rfcomm_dlc_lock(dev->dlc); + + while ((skb = skb_dequeue(&dev->pending))) { + inserted += tty_insert_flip_string(tty, skb->data, skb->len); + kfree_skb(skb); + } + + rfcomm_dlc_unlock(dev->dlc); + + if (inserted > 0) + tty_flip_buffer_push(tty); } static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) @@ -680,6 +734,10 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) if (err == 0) device_move(dev->tty_dev, rfcomm_get_device(dev)); + rfcomm_tty_copy_pending(dev); + + rfcomm_dlc_unthrottle(dev->dlc); + return err; } @@ -994,9 +1052,7 @@ static void rfcomm_tty_flush_buffer(struct tty_struct *tty) return; skb_queue_purge(&dev->dlc->tx_queue); - - if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && tty->ldisc.write_wakeup) - tty->ldisc.write_wakeup(tty); + tty_wakeup(tty); } static void rfcomm_tty_send_xchar(struct tty_struct *tty, char ch) @@ -1112,6 +1168,7 @@ int rfcomm_init_ttys(void) rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; rfcomm_tty_driver->init_termios = tty_std_termios; rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; + rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON; tty_set_operations(rfcomm_tty_driver, &rfcomm_ops); if (tty_register_driver(rfcomm_tty_driver)) { diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index b0d487e2db20..8cda49874868 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -53,7 +53,9 @@ #define BT_DBG(D...) #endif -#define VERSION "0.5" +#define VERSION "0.6" + +static int disable_esco = 0; static const struct proto_ops sco_sock_ops; @@ -193,7 +195,10 @@ static int sco_connect(struct sock *sk) err = -ENOMEM; - type = lmp_esco_capable(hdev) ? ESCO_LINK : SCO_LINK; + if (lmp_esco_capable(hdev) && !disable_esco) + type = ESCO_LINK; + else + type = SCO_LINK; hcon = hci_connect(hdev, type, dst); if (!hcon) @@ -921,7 +926,7 @@ static const struct proto_ops sco_sock_ops = { .sendmsg = sco_sock_sendmsg, .recvmsg = bt_sock_recvmsg, .poll = bt_sock_poll, - .ioctl = sock_no_ioctl, + .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, .shutdown = sock_no_shutdown, @@ -994,6 +999,9 @@ static void __exit sco_exit(void) module_init(sco_init); module_exit(sco_exit); +module_param(disable_esco, bool, 0644); +MODULE_PARM_DESC(disable_esco, "Disable eSCO connection creation"); + MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth SCO ver " VERSION); MODULE_VERSION(VERSION); diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 12265aff7099..e143ca678881 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -5,6 +5,7 @@ config BRIDGE tristate "802.1d Ethernet Bridging" select LLC + select STP ---help--- If you say Y here, then your Linux box will be able to act as an Ethernet bridge, which means that the different Ethernet segments it diff --git a/net/bridge/br.c b/net/bridge/br.c index 8f3c58e5f7a5..573acdf6f9ff 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br.c,v 1.47 2001/12/24 00:56:41 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -20,21 +18,24 @@ #include <linux/init.h> #include <linux/llc.h> #include <net/llc.h> +#include <net/stp.h> #include "br_private.h" int (*br_should_route_hook)(struct sk_buff *skb); -static struct llc_sap *br_stp_sap; +static const struct stp_proto br_stp_proto = { + .rcv = br_stp_rcv, +}; static int __init br_init(void) { int err; - br_stp_sap = llc_sap_open(LLC_SAP_BSPAN, br_stp_rcv); - if (!br_stp_sap) { + err = stp_proto_register(&br_stp_proto); + if (err < 0) { printk(KERN_ERR "bridge: can't register sap for STP\n"); - return -EADDRINUSE; + return err; } err = br_fdb_init(); @@ -67,13 +68,13 @@ err_out2: err_out1: br_fdb_fini(); err_out: - llc_sap_put(br_stp_sap); + stp_proto_unregister(&br_stp_proto); return err; } static void __exit br_deinit(void) { - rcu_assign_pointer(br_stp_sap->rcv_func, NULL); + stp_proto_unregister(&br_stp_proto); br_netlink_fini(); unregister_netdevice_notifier(&br_device_notifier); @@ -84,7 +85,6 @@ static void __exit br_deinit(void) synchronize_net(); br_netfilter_fini(); - llc_sap_put(br_stp_sap); br_fdb_get_hook = NULL; br_fdb_put_hook = NULL; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index bf7787395fe0..d9449df7cad5 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_device.c,v 1.6 2001/12/24 00:59:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -21,12 +19,6 @@ #include <asm/uaccess.h> #include "br_private.h" -static struct net_device_stats *br_dev_get_stats(struct net_device *dev) -{ - struct net_bridge *br = netdev_priv(dev); - return &br->statistics; -} - /* net device transmit always called with no BH (preempt_disabled) */ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -34,8 +26,8 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) const unsigned char *dest = skb->data; struct net_bridge_fdb_entry *dst; - br->statistics.tx_packets++; - br->statistics.tx_bytes += skb->len; + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); @@ -95,6 +87,7 @@ static int br_set_mac_address(struct net_device *dev, void *p) spin_lock_bh(&br->lock); memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); br_stp_change_bridge_id(br, addr->sa_data); + br->flags |= BR_SET_MAC_ADDR; spin_unlock_bh(&br->lock); return 0; @@ -161,7 +154,6 @@ void br_dev_setup(struct net_device *dev) ether_setup(dev); dev->do_ioctl = br_dev_ioctl; - dev->get_stats = br_dev_get_stats; dev->hard_start_xmit = br_dev_xmit; dev->open = br_dev_open; dev->set_multicast_list = br_dev_set_multicast_list; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 72c5976a5ce3..a48f5efdb6bf 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_fdb.c,v 1.6 2002/01/17 00:57:07 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -15,6 +13,7 @@ #include <linux/kernel.h> #include <linux/init.h> +#include <linux/rculist.h> #include <linux/spinlock.h> #include <linux/times.h> #include <linux/netdevice.h> diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index bdd7c35c3c7b..bdd9ccea17ce 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_forward.c,v 1.4 2001/08/14 22:05:57 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -91,7 +89,7 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) /* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) { - if (should_deliver(to, skb)) { + if (!skb_warn_if_lro(skb) && should_deliver(to, skb)) { __br_forward(to, skb); return; } @@ -115,7 +113,7 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb2; if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { - br->statistics.tx_dropped++; + br->dev->stats.tx_dropped++; kfree_skb(skb); return; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 77a981a1ee52..a072ea5ca6f5 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_if.c,v 1.7 2001/12/24 00:59:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -273,15 +271,13 @@ int br_add_bridge(const char *name) rtnl_lock(); if (strchr(dev->name, '%')) { ret = dev_alloc_name(dev, dev->name); - if (ret < 0) { - free_netdev(dev); - goto out; - } + if (ret < 0) + goto out_free; } ret = register_netdevice(dev); if (ret) - goto out; + goto out_free; ret = br_sysfs_addbr(dev); if (ret) @@ -289,6 +285,10 @@ int br_add_bridge(const char *name) out: rtnl_unlock(); return ret; + +out_free: + free_netdev(dev); + goto out; } int br_del_bridge(const char *name) @@ -373,6 +373,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (IS_ERR(p)) return PTR_ERR(p); + err = dev_set_promiscuity(dev, 1); + if (err) + goto put_back; + err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj), SYSFS_BRIDGE_PORT_ATTR); if (err) @@ -387,7 +391,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) goto err2; rcu_assign_pointer(dev->br_port, p); - dev_set_promiscuity(dev, 1); + dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -411,12 +415,12 @@ err2: br_fdb_delete_by_port(br, p, 1); err1: kobject_del(&p->kobj); - goto put_back; err0: kobject_put(&p->kobj); - + dev_set_promiscuity(dev, -1); put_back: dev_put(dev); + kfree(p); return err; } @@ -440,12 +444,16 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) void __exit br_cleanup_bridges(void) { - struct net_device *dev, *nxt; + struct net_device *dev; rtnl_lock(); - for_each_netdev_safe(&init_net, dev, nxt) - if (dev->priv_flags & IFF_EBRIDGE) +restart: + for_each_netdev(&init_net, dev) { + if (dev->priv_flags & IFF_EBRIDGE) { del_br(dev->priv); + goto restart; + } + } rtnl_unlock(); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 255c00f60ce7..30b88777c3df 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_input.c,v 1.10 2001/12/24 04:50:20 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -24,13 +22,13 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb) { - struct net_device *indev; + struct net_device *indev, *brdev = br->dev; - br->statistics.rx_packets++; - br->statistics.rx_bytes += skb->len; + brdev->stats.rx_packets++; + brdev->stats.rx_bytes += skb->len; indev = skb->dev; - skb->dev = br->dev; + skb->dev = brdev; NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, netif_receive_skb); @@ -64,7 +62,7 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; if (is_multicast_ether_addr(dest)) { - br->statistics.multicast++; + br->dev->stats.multicast++; skb2 = skb; } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { skb2 = skb; @@ -136,14 +134,11 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_PAUSE)) goto drop; - /* Process STP BPDU's through normal netif_receive_skb() path */ - if (p->br->stp_enabled != BR_NO_STP) { - if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, - NULL, br_handle_local_finish)) - return NULL; - else - return skb; - } + if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, + NULL, br_handle_local_finish)) + return NULL; /* frame consumed by filter */ + else + return skb; /* continue processing */ } switch (p->state) { diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 0655a5f07f58..eeee218eed80 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_ioctl.c,v 1.4 2000/11/08 05:16:40 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 00644a544e3c..76340bdd052e 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_notify.c,v 1.2 2000/02/21 15:51:34 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -37,7 +35,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_bridge_port *p = dev->br_port; struct net_bridge *br; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* not a port of a bridge */ diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c11b554fd109..815ed38925b2 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -4,8 +4,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_private.h,v 1.7 2001/12/24 00:59:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -90,11 +88,12 @@ struct net_bridge spinlock_t lock; struct list_head port_list; struct net_device *dev; - struct net_device_stats statistics; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; struct list_head age_list; unsigned long feature_mask; + unsigned long flags; +#define BR_SET_MAC_ADDR 0x00000001 /* STP */ bridge_id designated_root; @@ -227,8 +226,9 @@ extern void br_stp_set_path_cost(struct net_bridge_port *p, extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); /* br_stp_bpdu.c */ -extern int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev); +struct stp_proto; +extern void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev); /* br_stp_timer.c */ extern void br_stp_timer_init(struct net_bridge *br); diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h index e29f01ac1adf..8b650f7fbfa0 100644 --- a/net/bridge/br_private_stp.h +++ b/net/bridge/br_private_stp.h @@ -4,8 +4,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_private_stp.h,v 1.3 2001/02/05 06:03:47 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index e38034aa56f5..921bbe5cb94a 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -5,14 +5,13 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_stp.c,v 1.4 2000/06/19 10:13:35 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> +#include <linux/rculist.h> #include "br_private.h" #include "br_private_stp.h" diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index ddeb6e5d45d6..8b200f96f722 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_stp_bpdu.c,v 1.3 2001/11/10 02:35:25 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -20,6 +18,7 @@ #include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> +#include <net/stp.h> #include <asm/unaligned.h> #include "br_private.h" @@ -133,26 +132,20 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) * * NO locks, but rcu_read_lock (preempt_disabled) */ -int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev) { - const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); const unsigned char *dest = eth_hdr(skb)->h_dest; struct net_bridge_port *p = rcu_dereference(dev->br_port); struct net_bridge *br; const unsigned char *buf; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto err; if (!p) goto err; - if (pdu->ssap != LLC_SAP_BSPAN - || pdu->dsap != LLC_SAP_BSPAN - || pdu->ctrl_1 != LLC_PDU_TYPE_U) - goto err; - if (!pskb_may_pull(skb, 4)) goto err; @@ -226,5 +219,4 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, spin_unlock(&br->lock); err: kfree_skb(skb); - return 0; } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 1a430eccec9b..9a52ac5b4525 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_stp_if.c,v 1.4 2001/04/14 21:14:39 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -216,6 +214,10 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br) const unsigned char *addr = br_mac_zero; struct net_bridge_port *p; + /* user has chosen a value so keep it */ + if (br->flags & BR_SET_MAC_ADDR) + return; + list_for_each_entry(p, &br->port_list, list) { if (addr == br_mac_zero || memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0) diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 77f5255e6915..772a140bfdf0 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_stp_timer.c,v 1.3 2000/05/05 02:17:17 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 7beeefa0f9c0..909479794999 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -83,6 +83,15 @@ config BRIDGE_EBT_IP To compile it as a module, choose M here. If unsure, say N. +config BRIDGE_EBT_IP6 + tristate "ebt: IP6 filter support" + depends on BRIDGE_NF_EBTABLES && IPV6 + help + This option adds the IP6 match, which allows basic IPV6 header field + filtering. + + To compile it as a module, choose M here. If unsure, say N. + config BRIDGE_EBT_LIMIT tristate "ebt: limit match support" depends on BRIDGE_NF_EBTABLES @@ -221,7 +230,7 @@ config BRIDGE_EBT_NFLOG either the old LOG target, the old ULOG target or nfnetlink_log as backend. - This option adds the ulog watcher, that you can use in any rule + This option adds the nflog watcher, that you can use in any rule in any ebtables table. To compile it as a module, choose M here. If unsure, say N. diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 83715d73a503..0718699540b0 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_BRIDGE_EBT_802_3) += ebt_802_3.o obj-$(CONFIG_BRIDGE_EBT_AMONG) += ebt_among.o obj-$(CONFIG_BRIDGE_EBT_ARP) += ebt_arp.o obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip.o +obj-$(CONFIG_BRIDGE_EBT_IP6) += ebt_ip6.o obj-$(CONFIG_BRIDGE_EBT_LIMIT) += ebt_limit.o obj-$(CONFIG_BRIDGE_EBT_MARK) += ebt_mark_m.o obj-$(CONFIG_BRIDGE_EBT_PKTTYPE) += ebt_pkttype.o diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c new file mode 100644 index 000000000000..36efb3a75249 --- /dev/null +++ b/net/bridge/netfilter/ebt_ip6.c @@ -0,0 +1,144 @@ +/* + * ebt_ip6 + * + * Authors: + * Manohar Castelino <manohar.r.castelino@intel.com> + * Kuo-Lang Tseng <kuo-lang.tseng@intel.com> + * Jan Engelhardt <jengelh@computergmbh.de> + * + * Summary: + * This is just a modification of the IPv4 code written by + * Bart De Schuymer <bdschuym@pandora.be> + * with the changes required to support IPv6 + * + * Jan, 2008 + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip6.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <linux/in.h> +#include <linux/module.h> +#include <net/dsfield.h> + +struct tcpudphdr { + __be16 src; + __be16 dst; +}; + +static int ebt_filter_ip6(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, const void *data, + unsigned int datalen) +{ + const struct ebt_ip6_info *info = (struct ebt_ip6_info *)data; + const struct ipv6hdr *ih6; + struct ipv6hdr _ip6h; + const struct tcpudphdr *pptr; + struct tcpudphdr _ports; + struct in6_addr tmp_addr; + int i; + + ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); + if (ih6 == NULL) + return EBT_NOMATCH; + if (info->bitmask & EBT_IP6_TCLASS && + FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) + return EBT_NOMATCH; + for (i = 0; i < 4; i++) + tmp_addr.in6_u.u6_addr32[i] = ih6->saddr.in6_u.u6_addr32[i] & + info->smsk.in6_u.u6_addr32[i]; + if (info->bitmask & EBT_IP6_SOURCE && + FWINV((ipv6_addr_cmp(&tmp_addr, &info->saddr) != 0), + EBT_IP6_SOURCE)) + return EBT_NOMATCH; + for (i = 0; i < 4; i++) + tmp_addr.in6_u.u6_addr32[i] = ih6->daddr.in6_u.u6_addr32[i] & + info->dmsk.in6_u.u6_addr32[i]; + if (info->bitmask & EBT_IP6_DEST && + FWINV((ipv6_addr_cmp(&tmp_addr, &info->daddr) != 0), EBT_IP6_DEST)) + return EBT_NOMATCH; + if (info->bitmask & EBT_IP6_PROTO) { + uint8_t nexthdr = ih6->nexthdr; + int offset_ph; + + offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr); + if (offset_ph == -1) + return EBT_NOMATCH; + if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) + return EBT_NOMATCH; + if (!(info->bitmask & EBT_IP6_DPORT) && + !(info->bitmask & EBT_IP6_SPORT)) + return EBT_MATCH; + pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), + &_ports); + if (pptr == NULL) + return EBT_NOMATCH; + if (info->bitmask & EBT_IP6_DPORT) { + u32 dst = ntohs(pptr->dst); + if (FWINV(dst < info->dport[0] || + dst > info->dport[1], EBT_IP6_DPORT)) + return EBT_NOMATCH; + } + if (info->bitmask & EBT_IP6_SPORT) { + u32 src = ntohs(pptr->src); + if (FWINV(src < info->sport[0] || + src > info->sport[1], EBT_IP6_SPORT)) + return EBT_NOMATCH; + } + return EBT_MATCH; + } + return EBT_MATCH; +} + +static int ebt_ip6_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_ip6_info *info = (struct ebt_ip6_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_ip6_info))) + return -EINVAL; + if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO) + return -EINVAL; + if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK) + return -EINVAL; + if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) { + if (info->invflags & EBT_IP6_PROTO) + return -EINVAL; + if (info->protocol != IPPROTO_TCP && + info->protocol != IPPROTO_UDP && + info->protocol != IPPROTO_UDPLITE && + info->protocol != IPPROTO_SCTP && + info->protocol != IPPROTO_DCCP) + return -EINVAL; + } + if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1]) + return -EINVAL; + if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) + return -EINVAL; + return 0; +} + +static struct ebt_match filter_ip6 = +{ + .name = EBT_IP6_MATCH, + .match = ebt_filter_ip6, + .check = ebt_ip6_check, + .me = THIS_MODULE, +}; + +static int __init ebt_ip6_init(void) +{ + return ebt_register_match(&filter_ip6); +} + +static void __exit ebt_ip6_fini(void) +{ + ebt_unregister_match(&filter_ip6); +} + +module_init(ebt_ip6_init); +module_exit(ebt_ip6_fini); +MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 0b209e4aad0a..2f430d4ae911 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -18,6 +18,9 @@ #include <linux/if_arp.h> #include <linux/spinlock.h> #include <net/netfilter/nf_log.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <linux/in6.h> static DEFINE_SPINLOCK(ebt_log_lock); @@ -58,6 +61,27 @@ static void print_MAC(const unsigned char *p) printk("%02x%c", *p, i == ETH_ALEN - 1 ? ' ':':'); } +static void +print_ports(const struct sk_buff *skb, uint8_t protocol, int offset) +{ + if (protocol == IPPROTO_TCP || + protocol == IPPROTO_UDP || + protocol == IPPROTO_UDPLITE || + protocol == IPPROTO_SCTP || + protocol == IPPROTO_DCCP) { + const struct tcpudphdr *pptr; + struct tcpudphdr _ports; + + pptr = skb_header_pointer(skb, offset, + sizeof(_ports), &_ports); + if (pptr == NULL) { + printk(" INCOMPLETE TCP/UDP header"); + return; + } + printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst)); + } +} + #define myNIPQUAD(a) a[0], a[1], a[2], a[3] static void ebt_log_packet(unsigned int pf, unsigned int hooknum, @@ -95,25 +119,35 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u, IP " "tos=0x%02X, IP proto=%d", NIPQUAD(ih->saddr), NIPQUAD(ih->daddr), ih->tos, ih->protocol); - if (ih->protocol == IPPROTO_TCP || - ih->protocol == IPPROTO_UDP || - ih->protocol == IPPROTO_UDPLITE || - ih->protocol == IPPROTO_SCTP || - ih->protocol == IPPROTO_DCCP) { - const struct tcpudphdr *pptr; - struct tcpudphdr _ports; - - pptr = skb_header_pointer(skb, ih->ihl*4, - sizeof(_ports), &_ports); - if (pptr == NULL) { - printk(" INCOMPLETE TCP/UDP header"); - goto out; - } - printk(" SPT=%u DPT=%u", ntohs(pptr->src), - ntohs(pptr->dst)); + print_ports(skb, ih->protocol, ih->ihl*4); + goto out; + } + +#if defined(CONFIG_BRIDGE_EBT_IP6) || defined(CONFIG_BRIDGE_EBT_IP6_MODULE) + if ((bitmask & EBT_LOG_IP6) && eth_hdr(skb)->h_proto == + htons(ETH_P_IPV6)) { + const struct ipv6hdr *ih; + struct ipv6hdr _iph; + uint8_t nexthdr; + int offset_ph; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) { + printk(" INCOMPLETE IPv6 header"); + goto out; } + printk(" IPv6 SRC=%x:%x:%x:%x:%x:%x:%x:%x " + "IPv6 DST=%x:%x:%x:%x:%x:%x:%x:%x, IPv6 " + "priority=0x%01X, Next Header=%d", NIP6(ih->saddr), + NIP6(ih->daddr), ih->priority, ih->nexthdr); + nexthdr = ih->nexthdr; + offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr); + if (offset_ph == -1) + goto out; + print_ports(skb, nexthdr, offset_ph); goto out; } +#endif if ((bitmask & EBT_LOG_ARP) && ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) || diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 690bc3ab186c..1a58af51a2e2 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -93,28 +93,20 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { static int __init ebtable_filter_init(void) { - int i, j, ret; + int ret; ret = ebt_register_table(&frame_filter); if (ret < 0) return ret; - for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++) - if ((ret = nf_register_hook(&ebt_ops_filter[i])) < 0) - goto cleanup; - return ret; -cleanup: - for (j = 0; j < i; j++) - nf_unregister_hook(&ebt_ops_filter[j]); - ebt_unregister_table(&frame_filter); + ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); + if (ret < 0) + ebt_unregister_table(&frame_filter); return ret; } static void __exit ebtable_filter_fini(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++) - nf_unregister_hook(&ebt_ops_filter[i]); + nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); ebt_unregister_table(&frame_filter); } diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 5b495fe2d0b6..f60c1e78e575 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -100,28 +100,20 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { static int __init ebtable_nat_init(void) { - int i, ret, j; + int ret; ret = ebt_register_table(&frame_nat); if (ret < 0) return ret; - for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++) - if ((ret = nf_register_hook(&ebt_ops_nat[i])) < 0) - goto cleanup; - return ret; -cleanup: - for (j = 0; j < i; j++) - nf_unregister_hook(&ebt_ops_nat[j]); - ebt_unregister_table(&frame_nat); + ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); + if (ret < 0) + ebt_unregister_table(&frame_nat); return ret; } static void __exit ebtable_nat_fini(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++) - nf_unregister_hook(&ebt_ops_nat[i]); + nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); ebt_unregister_table(&frame_nat); } diff --git a/net/can/af_can.c b/net/can/af_can.c index 2759b76f731c..8035fbf526ae 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -205,11 +205,19 @@ static int can_create(struct net *net, struct socket *sock, int protocol) * -ENOBUFS on full driver queue (see net_xmit_errno()) * -ENOMEM when local loopback failed at calling skb_clone() * -EPERM when trying to send on a non-CAN interface + * -EINVAL when the skb->data does not contain a valid CAN frame */ int can_send(struct sk_buff *skb, int loop) { + struct sk_buff *newskb = NULL; + struct can_frame *cf = (struct can_frame *)skb->data; int err; + if (skb->len != sizeof(struct can_frame) || cf->can_dlc > 8) { + kfree_skb(skb); + return -EINVAL; + } + if (skb->dev->type != ARPHRD_CAN) { kfree_skb(skb); return -EPERM; @@ -244,8 +252,7 @@ int can_send(struct sk_buff *skb, int loop) * If the interface is not capable to do loopback * itself, we do it here. */ - struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); - + newskb = skb_clone(skb, GFP_ATOMIC); if (!newskb) { kfree_skb(skb); return -ENOMEM; @@ -254,7 +261,6 @@ int can_send(struct sk_buff *skb, int loop) newskb->sk = skb->sk; newskb->ip_summed = CHECKSUM_UNNECESSARY; newskb->pkt_type = PACKET_BROADCAST; - netif_rx(newskb); } } else { /* indication for the CAN driver: no loopback required */ @@ -266,11 +272,20 @@ int can_send(struct sk_buff *skb, int loop) if (err > 0) err = net_xmit_errno(err); + if (err) { + if (newskb) + kfree_skb(newskb); + return err; + } + + if (newskb) + netif_rx(newskb); + /* update statistics */ can_stats.tx_frames++; can_stats.tx_frames_delta++; - return err; + return 0; } EXPORT_SYMBOL(can_send); @@ -597,13 +612,16 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct dev_rcv_lists *d; + struct can_frame *cf = (struct can_frame *)skb->data; int matches; - if (dev->type != ARPHRD_CAN || dev_net(dev) != &init_net) { + if (dev->type != ARPHRD_CAN || !net_eq(dev_net(dev), &init_net)) { kfree_skb(skb); return 0; } + BUG_ON(skb->len != sizeof(struct can_frame) || cf->can_dlc > 8); + /* update statistics */ can_stats.rx_frames++; can_stats.rx_frames_delta++; @@ -710,7 +728,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, struct net_device *dev = (struct net_device *)data; struct dev_rcv_lists *d; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) diff --git a/net/can/bcm.c b/net/can/bcm.c index d9a3a9d13bed..d0dd382001e2 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -298,7 +298,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, if (head->nframes) { /* can_frames starting here */ - firstframe = (struct can_frame *) skb_tail_pointer(skb); + firstframe = (struct can_frame *)skb_tail_pointer(skb); memcpy(skb_put(skb, datalen), frames, datalen); @@ -826,6 +826,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, for (i = 0; i < msg_head->nframes; i++) { err = memcpy_fromiovec((u8 *)&op->frames[i], msg->msg_iov, CFSIZ); + + if (op->frames[i].can_dlc > 8) + err = -EINVAL; + if (err < 0) return err; @@ -858,6 +862,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, for (i = 0; i < msg_head->nframes; i++) { err = memcpy_fromiovec((u8 *)&op->frames[i], msg->msg_iov, CFSIZ); + + if (op->frames[i].can_dlc > 8) + err = -EINVAL; + if (err < 0) { if (op->frames != &op->sframe) kfree(op->frames); @@ -1164,9 +1172,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) skb->dev = dev; skb->sk = sk; - can_send(skb, 1); /* send with loopback */ + err = can_send(skb, 1); /* send with loopback */ dev_put(dev); + if (err) + return err; + return CFSIZ + MHSIZ; } @@ -1185,6 +1196,10 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, if (!bo->bound) return -ENOTCONN; + /* check for valid message length from userspace */ + if (size < MHSIZ || (size - MHSIZ) % CFSIZ) + return -EINVAL; + /* check for alternative ifindex for this bcm_op */ if (!ifindex && msg->msg_name) { @@ -1259,8 +1274,8 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, break; case TX_SEND: - /* we need at least one can_frame */ - if (msg_head.nframes < 1) + /* we need exactly one can_frame behind the msg head */ + if ((msg_head.nframes != 1) || (size != CFSIZ + MHSIZ)) ret = -EINVAL; else ret = bcm_tx_send(msg, ifindex, sk); @@ -1288,7 +1303,7 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg, struct bcm_op *op; int notify_enodev = 0; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) diff --git a/net/can/raw.c b/net/can/raw.c index 69877b8e7e9c..6e0663faaf9f 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -210,7 +210,7 @@ static int raw_notifier(struct notifier_block *nb, struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); struct sock *sk = &ro->sk; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) @@ -632,6 +632,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, } else ifindex = ro->ifindex; + if (size != sizeof(struct can_frame)) + return -EINVAL; + dev = dev_get_by_index(&init_net, ifindex); if (!dev) return -ENXIO; diff --git a/net/compat.c b/net/compat.c index c823f6f290cb..67fb6a3834a3 100644 --- a/net/compat.c +++ b/net/compat.c @@ -75,7 +75,7 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) /* I've named the args so it is easy to tell whose space the pointers are in. */ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, - char *kern_address, int mode) + struct sockaddr *kern_address, int mode) { int tot_len; @@ -722,9 +722,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt); /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) -static unsigned char nas[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), +static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)}; + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), + AL(6)}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -737,13 +738,52 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } +asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize, int flags) +{ + compat_sigset_t ss32; + sigset_t ksigmask, sigsaved; + int ret; + + if (sigmask) { + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + if (copy_from_user(&ss32, sigmask, sizeof(ss32))) + return -EFAULT; + sigset_from_compat(&ksigmask, &ss32); + + sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + + ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); + + if (ret == -ERESTARTNOHAND) { + /* + * Don't restore the signal mask yet. Let do_signal() deliver + * the signal on the way back to userspace, before the signal + * mask is restored. + */ + if (sigmask) { + memcpy(¤t->saved_sigmask, &sigsaved, + sizeof(sigsaved)); + set_restore_sigmask(); + } + } else if (sigmask) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + return ret; +} + asmlinkage long compat_sys_socketcall(int call, u32 __user *args) { int ret; u32 a[6]; u32 a0, a1; - if (call < SYS_SOCKET || call > SYS_RECVMSG) + if (call < SYS_SOCKET || call > SYS_PACCEPT) return -EINVAL; if (copy_from_user(a, args, nas[call])) return -EFAULT; @@ -764,7 +804,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) ret = sys_listen(a0, a1); break; case SYS_ACCEPT: - ret = sys_accept(a0, compat_ptr(a1), compat_ptr(a[2])); + ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0); break; case SYS_GETSOCKNAME: ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); @@ -804,6 +844,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) case SYS_RECVMSG: ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; + case SYS_PACCEPT: + ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]), + compat_ptr(a[3]), a[4], a[5]); + break; default: ret = -EINVAL; break; diff --git a/net/core/datagram.c b/net/core/datagram.c index 8a28fc93b724..dd61dcad6019 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -285,7 +285,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -315,7 +315,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -366,7 +366,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -402,7 +402,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, for (; list; list=list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { diff --git a/net/core/dev.c b/net/core/dev.c index d334446a8eaf..8d13a9b9f1df 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -90,6 +90,7 @@ #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/ethtool.h> #include <linux/notifier.h> #include <linux/skbuff.h> #include <net/net_namespace.h> @@ -119,6 +120,12 @@ #include <linux/err.h> #include <linux/ctype.h> #include <linux/if_arp.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/jhash.h> +#include <linux/random.h> #include "net-sysfs.h" @@ -254,9 +261,9 @@ static RAW_NOTIFIER_HEAD(netdev_chain); DEFINE_PER_CPU(struct softnet_data, softnet_data); -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#ifdef CONFIG_LOCKDEP /* - * register_netdevice() inits dev->_xmit_lock and sets lockdep class + * register_netdevice() inits txq->_xmit_lock and sets lockdep class * according to dev->type */ static const unsigned short netdev_lock_type[] = @@ -294,6 +301,7 @@ static const char *netdev_lock_name[] = "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; +static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; static inline unsigned short netdev_lock_pos(unsigned short dev_type) { @@ -306,8 +314,8 @@ static inline unsigned short netdev_lock_pos(unsigned short dev_type) return ARRAY_SIZE(netdev_lock_type) - 1; } -static inline void netdev_set_lockdep_class(spinlock_t *lock, - unsigned short dev_type) +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) { int i; @@ -315,9 +323,22 @@ static inline void netdev_set_lockdep_class(spinlock_t *lock, lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], netdev_lock_name[i]); } + +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) +{ + int i; + + i = netdev_lock_pos(dev->type); + lockdep_set_class_and_name(&dev->addr_list_lock, + &netdev_addr_lock_key[i], + netdev_lock_name[i]); +} #else -static inline void netdev_set_lockdep_class(spinlock_t *lock, - unsigned short dev_type) +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ +} +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) { } #endif @@ -453,7 +474,7 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { memset(s[i].name, 0, sizeof(s[i].name)); - strcpy(s[i].name, name); + strlcpy(s[i].name, name, IFNAMSIZ); memcpy(&s[i].map, map, sizeof(s[i].map)); break; } @@ -478,7 +499,7 @@ int netdev_boot_setup_check(struct net_device *dev) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && - !strncmp(dev->name, s[i].name, strlen(s[i].name))) { + !strcmp(dev->name, s[i].name)) { dev->irq = s[i].map.irq; dev->base_addr = s[i].map.base_addr; dev->mem_start = s[i].map.mem_start; @@ -903,7 +924,11 @@ int dev_change_name(struct net_device *dev, char *newname) strlcpy(dev->name, newname, IFNAMSIZ); rollback: - device_rename(&dev->dev, dev->name); + err = device_rename(&dev->dev, dev->name); + if (err) { + memcpy(dev->name, oldname, IFNAMSIZ); + return err; + } write_lock_bh(&dev_base_lock); hlist_del(&dev->name_hlist); @@ -956,6 +981,12 @@ void netdev_state_change(struct net_device *dev) } } +void netdev_bonding_change(struct net_device *dev) +{ + call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev); +} +EXPORT_SYMBOL(netdev_bonding_change); + /** * dev_load - load a network module * @net: the applicable net namespace @@ -994,6 +1025,8 @@ int dev_open(struct net_device *dev) { int ret = 0; + ASSERT_RTNL(); + /* * Is it already up? */ @@ -1060,6 +1093,8 @@ int dev_open(struct net_device *dev) */ int dev_close(struct net_device *dev) { + ASSERT_RTNL(); + might_sleep(); if (!(dev->flags & IFF_UP)) @@ -1108,6 +1143,29 @@ int dev_close(struct net_device *dev) } +/** + * dev_disable_lro - disable Large Receive Offload on a device + * @dev: device + * + * Disable Large Receive Offload (LRO) on a net device. Must be + * called under RTNL. This is needed if received packets may be + * forwarded to another interface. + */ +void dev_disable_lro(struct net_device *dev) +{ + if (dev->ethtool_ops && dev->ethtool_ops->get_flags && + dev->ethtool_ops->set_flags) { + u32 flags = dev->ethtool_ops->get_flags(dev); + if (flags & ETH_FLAG_LRO) { + flags &= ~ETH_FLAG_LRO; + dev->ethtool_ops->set_flags(dev, flags); + } + } + WARN_ON(dev->features & NETIF_F_LRO); +} +EXPORT_SYMBOL(dev_disable_lro); + + static int dev_boot_phase = 1; /* @@ -1281,16 +1339,16 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) } -void __netif_schedule(struct net_device *dev) +void __netif_schedule(struct Qdisc *q) { - if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { - unsigned long flags; + if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) { struct softnet_data *sd; + unsigned long flags; local_irq_save(flags); sd = &__get_cpu_var(softnet_data); - dev->next_sched = sd->output_queue; - sd->output_queue = dev; + q->next_sched = sd->output_queue; + sd->output_queue = q; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } @@ -1354,6 +1412,29 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); +static bool can_checksum_protocol(unsigned long features, __be16 protocol) +{ + return ((features & NETIF_F_GEN_CSUM) || + ((features & NETIF_F_IP_CSUM) && + protocol == htons(ETH_P_IP)) || + ((features & NETIF_F_IPV6_CSUM) && + protocol == htons(ETH_P_IPV6))); +} + +static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) +{ + if (can_checksum_protocol(dev->features, skb->protocol)) + return true; + + if (skb->protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + if (can_checksum_protocol(dev->features & dev->vlan_features, + veh->h_vlan_encapsulated_proto)) + return true; + } + + return false; +} /* * Invalidate hardware checksum when packet is to be mangled, and @@ -1534,7 +1615,8 @@ static int dev_gso_segment(struct sk_buff *skb) return 0; } -int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq) { if (likely(!skb->next)) { if (!list_empty(&ptype_all)) @@ -1563,9 +1645,7 @@ gso: skb->next = nskb; return rc; } - if (unlikely((netif_queue_stopped(dev) || - netif_subqueue_stopped(dev, skb)) && - skb->next)) + if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1576,6 +1656,73 @@ out_kfree_skb: return 0; } +static u32 simple_tx_hashrnd; +static int simple_tx_hashrnd_initialized = 0; + +static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) +{ + u32 addr1, addr2, ports; + u32 hash, ihl; + u8 ip_proto; + + if (unlikely(!simple_tx_hashrnd_initialized)) { + get_random_bytes(&simple_tx_hashrnd, 4); + simple_tx_hashrnd_initialized = 1; + } + + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + ip_proto = ip_hdr(skb)->protocol; + addr1 = ip_hdr(skb)->saddr; + addr2 = ip_hdr(skb)->daddr; + ihl = ip_hdr(skb)->ihl; + break; + case __constant_htons(ETH_P_IPV6): + ip_proto = ipv6_hdr(skb)->nexthdr; + addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; + addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; + ihl = (40 >> 2); + break; + default: + return 0; + } + + + switch (ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + case IPPROTO_AH: + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); + break; + + default: + ports = 0; + break; + } + + hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); + + return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); +} + +static struct netdev_queue *dev_pick_tx(struct net_device *dev, + struct sk_buff *skb) +{ + u16 queue_index = 0; + + if (dev->select_queue) + queue_index = dev->select_queue(dev, skb); + else if (dev->real_num_tx_queues > 1) + queue_index = simple_tx_hash(dev, skb); + + skb_set_queue_mapping(skb, queue_index); + return netdev_get_tx_queue(dev, queue_index); +} + /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -1601,10 +1748,10 @@ out_kfree_skb: * the BH enable code must have IRQs enabled so that it will not deadlock. * --BLG */ - int dev_queue_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; + struct netdev_queue *txq; struct Qdisc *q; int rc = -ENOMEM; @@ -1632,55 +1779,34 @@ int dev_queue_xmit(struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_set_transport_header(skb, skb->csum_start - skb_headroom(skb)); - - if (!(dev->features & NETIF_F_GEN_CSUM) && - !((dev->features & NETIF_F_IP_CSUM) && - skb->protocol == htons(ETH_P_IP)) && - !((dev->features & NETIF_F_IPV6_CSUM) && - skb->protocol == htons(ETH_P_IPV6))) - if (skb_checksum_help(skb)) - goto out_kfree_skb; + if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) + goto out_kfree_skb; } gso: - spin_lock_prefetch(&dev->queue_lock); - /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ rcu_read_lock_bh(); - /* Updates of qdisc are serialized by queue_lock. - * The struct Qdisc which is pointed to by qdisc is now a - * rcu structure - it may be accessed without acquiring - * a lock (but the structure may be stale.) The freeing of the - * qdisc will be deferred until it's known that there are no - * more references to it. - * - * If the qdisc has an enqueue function, we still need to - * hold the queue_lock before calling it, since queue_lock - * also serializes access to the device queue. - */ + txq = dev_pick_tx(dev, skb); + q = rcu_dereference(txq->qdisc); - q = rcu_dereference(dev->qdisc); #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); #endif if (q->enqueue) { - /* Grab device queue */ - spin_lock(&dev->queue_lock); - q = dev->qdisc; - if (q->enqueue) { - /* reset queue_mapping to zero */ - skb_set_queue_mapping(skb, 0); - rc = q->enqueue(skb, q); - qdisc_run(dev); - spin_unlock(&dev->queue_lock); - - rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; - goto out; - } - spin_unlock(&dev->queue_lock); + spinlock_t *root_lock = qdisc_root_lock(q); + + spin_lock(root_lock); + + rc = qdisc_enqueue_root(skb, q); + qdisc_run(q); + + spin_unlock(root_lock); + + rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; + goto out; } /* The device has no queue. Common case for software devices: @@ -1698,19 +1824,18 @@ gso: if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ - if (dev->xmit_lock_owner != cpu) { + if (txq->xmit_lock_owner != cpu) { - HARD_TX_LOCK(dev, cpu); + HARD_TX_LOCK(dev, txq, cpu); - if (!netif_queue_stopped(dev) && - !netif_subqueue_stopped(dev, skb)) { + if (!netif_tx_queue_stopped(txq)) { rc = 0; - if (!dev_hard_start_xmit(skb, dev)) { - HARD_TX_UNLOCK(dev); + if (!dev_hard_start_xmit(skb, dev, txq)) { + HARD_TX_UNLOCK(dev, txq); goto out; } } - HARD_TX_UNLOCK(dev); + HARD_TX_UNLOCK(dev, txq); if (net_ratelimit()) printk(KERN_CRIT "Virtual device %s asks to " "queue packet!\n", dev->name); @@ -1848,13 +1973,13 @@ static void net_tx_action(struct softirq_action *h) struct sk_buff *skb = clist; clist = clist->next; - BUG_TRAP(!atomic_read(&skb->users)); + WARN_ON(atomic_read(&skb->users)); __kfree_skb(skb); } } if (sd->output_queue) { - struct net_device *head; + struct Qdisc *head; local_irq_disable(); head = sd->output_queue; @@ -1862,17 +1987,20 @@ static void net_tx_action(struct softirq_action *h) local_irq_enable(); while (head) { - struct net_device *dev = head; + struct Qdisc *q = head; + spinlock_t *root_lock; + head = head->next_sched; smp_mb__before_clear_bit(); - clear_bit(__LINK_STATE_SCHED, &dev->state); + clear_bit(__QDISC_STATE_SCHED, &q->state); - if (spin_trylock(&dev->queue_lock)) { - qdisc_run(dev); - spin_unlock(&dev->queue_lock); + root_lock = qdisc_root_lock(q); + if (spin_trylock(root_lock)) { + qdisc_run(q); + spin_unlock(root_lock); } else { - netif_schedule(dev); + __netif_schedule(q); } } } @@ -1953,10 +2081,11 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, */ static int ing_filter(struct sk_buff *skb) { - struct Qdisc *q; struct net_device *dev = skb->dev; - int result = TC_ACT_OK; u32 ttl = G_TC_RTTL(skb->tc_verd); + struct netdev_queue *rxq; + int result = TC_ACT_OK; + struct Qdisc *q; if (MAX_RED_LOOP < ttl++) { printk(KERN_WARNING @@ -1968,10 +2097,14 @@ static int ing_filter(struct sk_buff *skb) skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - spin_lock(&dev->ingress_lock); - if ((q = dev->qdisc_ingress) != NULL) - result = q->enqueue(skb, q); - spin_unlock(&dev->ingress_lock); + rxq = &dev->rx_queue; + + q = rxq->qdisc; + if (q) { + spin_lock(qdisc_lock(q)); + result = qdisc_enqueue_root(skb, q); + spin_unlock(qdisc_lock(q)); + } return result; } @@ -1980,7 +2113,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { - if (!skb->dev->qdisc_ingress) + if (!skb->dev->rx_queue.qdisc) goto out; if (*pt_prev) { @@ -2004,6 +2137,33 @@ out: } #endif +/* + * netif_nit_deliver - deliver received packets to network taps + * @skb: buffer + * + * This function is used to deliver incoming packets to network + * taps. It should be used when the normal netif_receive_skb path + * is bypassed, for example because of VLAN acceleration. + */ +void netif_nit_deliver(struct sk_buff *skb) +{ + struct packet_type *ptype; + + if (list_empty(&ptype_all)) + return; + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->mac_len = skb->network_header - skb->mac_header; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &ptype_all, list) { + if (!ptype->dev || ptype->dev == skb->dev) + deliver_skb(skb, ptype, skb->dev); + } + rcu_read_unlock(); +} + /** * netif_receive_skb - process receive buffer from network * @skb: buffer to process @@ -2051,6 +2211,10 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); + /* Don't receive packets in an exiting network namespace */ + if (!net_alive(dev_net(skb->dev))) + goto out; + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -2231,7 +2395,7 @@ out: */ if (!cpus_empty(net_dma.channel_mask)) { int chan_idx; - for_each_cpu_mask(chan_idx, net_dma.channel_mask) { + for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) { struct dma_chan *chan = net_dma.channels[chan_idx]; if (chan) dma_async_memcpy_issue_pending(chan); @@ -2739,16 +2903,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } -static void __dev_set_promiscuity(struct net_device *dev, int inc) +static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; ASSERT_RTNL(); - if ((dev->promiscuity += inc) == 0) - dev->flags &= ~IFF_PROMISC; - else - dev->flags |= IFF_PROMISC; + dev->flags |= IFF_PROMISC; + dev->promiscuity += inc; + if (dev->promiscuity == 0) { + /* + * Avoid overflow. + * If inc causes overflow, untouch promisc and return error. + */ + if (inc < 0) + dev->flags &= ~IFF_PROMISC; + else { + dev->promiscuity -= inc; + printk(KERN_WARNING "%s: promiscuity touches roof, " + "set promiscuity failed, promiscuity feature " + "of device might be broken.\n", dev->name); + return -EOVERFLOW; + } + } if (dev->flags != old_flags) { printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : @@ -2766,6 +2943,7 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) if (dev->change_rx_flags) dev->change_rx_flags(dev, IFF_PROMISC); } + return 0; } /** @@ -2777,14 +2955,19 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) * remains above zero the interface remains promiscuous. Once it hits zero * the device reverts back to normal filtering operation. A negative inc * value is used to drop promiscuity on the device. + * Return 0 if successful or a negative errno code on error. */ -void dev_set_promiscuity(struct net_device *dev, int inc) +int dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + int err; - __dev_set_promiscuity(dev, inc); + err = __dev_set_promiscuity(dev, inc); + if (err < 0) + return err; if (dev->flags != old_flags) dev_set_rx_mode(dev); + return err; } /** @@ -2797,22 +2980,38 @@ void dev_set_promiscuity(struct net_device *dev, int inc) * to all interfaces. Once it hits zero the device reverts back to normal * filtering operation. A negative @inc value is used to drop the counter * when releasing a resource needing all multicasts. + * Return 0 if successful or a negative errno code on error. */ -void dev_set_allmulti(struct net_device *dev, int inc) +int dev_set_allmulti(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; ASSERT_RTNL(); dev->flags |= IFF_ALLMULTI; - if ((dev->allmulti += inc) == 0) - dev->flags &= ~IFF_ALLMULTI; + dev->allmulti += inc; + if (dev->allmulti == 0) { + /* + * Avoid overflow. + * If inc causes overflow, untouch allmulti and return error. + */ + if (inc < 0) + dev->flags &= ~IFF_ALLMULTI; + else { + dev->allmulti -= inc; + printk(KERN_WARNING "%s: allmulti touches roof, " + "set allmulti failed, allmulti feature of " + "device might be broken.\n", dev->name); + return -EOVERFLOW; + } + } if (dev->flags ^ old_flags) { if (dev->change_rx_flags) dev->change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); } + return 0; } /* @@ -2851,9 +3050,9 @@ void __dev_set_rx_mode(struct net_device *dev) void dev_set_rx_mode(struct net_device *dev) { - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); } int __dev_addr_delete(struct dev_addr_list **list, int *count, @@ -2931,11 +3130,11 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); if (!err) __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_delete); @@ -2943,7 +3142,7 @@ EXPORT_SYMBOL(dev_unicast_delete); /** * dev_unicast_add - add a secondary unicast address * @dev: device - * @addr: address to delete + * @addr: address to add * @alen: length of @addr * * Add a secondary unicast address to the device or increase @@ -2957,11 +3156,11 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); if (!err) __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_add); @@ -3028,12 +3227,12 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) { int err = 0; - netif_tx_lock_bh(to); + netif_addr_lock_bh(to); err = __dev_addr_sync(&to->uc_list, &to->uc_count, &from->uc_list, &from->uc_count); if (!err) __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); + netif_addr_unlock_bh(to); return err; } EXPORT_SYMBOL(dev_unicast_sync); @@ -3049,15 +3248,15 @@ EXPORT_SYMBOL(dev_unicast_sync); */ void dev_unicast_unsync(struct net_device *to, struct net_device *from) { - netif_tx_lock_bh(from); - netif_tx_lock_bh(to); + netif_addr_lock_bh(from); + netif_addr_lock(to); __dev_addr_unsync(&to->uc_list, &to->uc_count, &from->uc_list, &from->uc_count); __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); - netif_tx_unlock_bh(from); + netif_addr_unlock(to); + netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_unicast_unsync); @@ -3077,7 +3276,7 @@ static void __dev_addr_discard(struct dev_addr_list **list) static void dev_addr_discard(struct net_device *dev) { - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); __dev_addr_discard(&dev->uc_list); dev->uc_count = 0; @@ -3085,7 +3284,7 @@ static void dev_addr_discard(struct net_device *dev) __dev_addr_discard(&dev->mc_list); dev->mc_count = 0; - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); } unsigned dev_get_flags(const struct net_device *dev) @@ -3133,7 +3332,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST) + if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST) dev->change_rx_flags(dev, IFF_MULTICAST); dev_set_rx_mode(dev); @@ -3648,7 +3847,7 @@ static void rollback_registered(struct net_device *dev) dev->uninit(dev); /* Notifier chain MUST detach us from master device. */ - BUG_TRAP(!dev->master); + WARN_ON(dev->master); /* Remove entries from kobject tree */ netdev_unregister_kobject(dev); @@ -3658,6 +3857,21 @@ static void rollback_registered(struct net_device *dev) dev_put(dev); } +static void __netdev_init_queue_locks_one(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + spin_lock_init(&dev_queue->_xmit_lock); + netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); + dev_queue->xmit_lock_owner = -1; +} + +static void netdev_init_queue_locks(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); + __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); +} + /** * register_netdevice - register a network device * @dev: device to register @@ -3692,11 +3906,9 @@ int register_netdevice(struct net_device *dev) BUG_ON(!dev_net(dev)); net = dev_net(dev); - spin_lock_init(&dev->queue_lock); - spin_lock_init(&dev->_xmit_lock); - netdev_set_lockdep_class(&dev->_xmit_lock, dev->type); - dev->xmit_lock_owner = -1; - spin_lock_init(&dev->ingress_lock); + spin_lock_init(&dev->addr_list_lock); + netdev_set_addr_lockdep_class(dev); + netdev_init_queue_locks(dev); dev->iflink = -1; @@ -3957,9 +4169,9 @@ void netdev_run_todo(void) /* paranoia */ BUG_ON(atomic_read(&dev->refcnt)); - BUG_TRAP(!dev->ip_ptr); - BUG_TRAP(!dev->ip6_ptr); - BUG_TRAP(!dev->dn_ptr); + WARN_ON(dev->ip_ptr); + WARN_ON(dev->ip6_ptr); + WARN_ON(dev->dn_ptr); if (dev->destructor) dev->destructor(dev); @@ -3977,6 +4189,19 @@ static struct net_device_stats *internal_stats(struct net_device *dev) return &dev->stats; } +static void netdev_init_one_queue(struct net_device *dev, + struct netdev_queue *queue, + void *_unused) +{ + queue->dev = dev; +} + +static void netdev_init_queues(struct net_device *dev) +{ + netdev_init_one_queue(dev, &dev->rx_queue, NULL); + netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); +} + /** * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -3991,14 +4216,14 @@ static struct net_device_stats *internal_stats(struct net_device *dev) struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, void (*setup)(struct net_device *), unsigned int queue_count) { - void *p; + struct netdev_queue *tx; struct net_device *dev; - int alloc_size; + size_t alloc_size; + void *p; BUG_ON(strlen(name) >= sizeof(dev->name)); - alloc_size = sizeof(struct net_device) + - sizeof(struct net_device_subqueue) * (queue_count - 1); + alloc_size = sizeof(struct net_device); if (sizeof_priv) { /* ensure 32-byte alignment of private area */ alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; @@ -4013,22 +4238,33 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, return NULL; } + tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); + if (!tx) { + printk(KERN_ERR "alloc_netdev: Unable to allocate " + "tx qdiscs.\n"); + kfree(p); + return NULL; + } + dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; dev_net_set(dev, &init_net); + dev->_tx = tx; + dev->num_tx_queues = queue_count; + dev->real_num_tx_queues = queue_count; + if (sizeof_priv) { dev->priv = ((char *)dev + - ((sizeof(struct net_device) + - (sizeof(struct net_device_subqueue) * - (queue_count - 1)) + NETDEV_ALIGN_CONST) + ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST)); } - dev->egress_subqueue_count = queue_count; dev->gso_max_size = GSO_MAX_SIZE; + netdev_init_queues(dev); + dev->get_stats = internal_stats; netpoll_netdev_init(dev); setup(dev); @@ -4049,6 +4285,8 @@ void free_netdev(struct net_device *dev) { release_net(dev_net(dev)); + kfree(dev->_tx); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); @@ -4230,7 +4468,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, void *ocpu) { struct sk_buff **list_skb; - struct net_device **list_net; + struct Qdisc **list_net; struct sk_buff *skb; unsigned int cpu, oldcpu = (unsigned long)ocpu; struct softnet_data *sd, *oldsd; @@ -4292,7 +4530,7 @@ static void net_dma_rebalance(struct net_dma *net_dma) i = 0; cpu = first_cpu(cpu_online_map); - for_each_cpu_mask(chan_idx, net_dma->channel_mask) { + for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) { chan = net_dma->channels[chan_idx]; n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) @@ -4459,6 +4697,26 @@ err_name: return -ENOMEM; } +char *netdev_drivername(struct net_device *dev, char *buffer, int len) +{ + struct device_driver *driver; + struct device *parent; + + if (len <= 0 || !buffer) + return buffer; + buffer[0] = 0; + + parent = dev->dev.parent; + + if (!parent) + return buffer; + + driver = parent->driver; + if (driver && driver->name) + strlcpy(buffer, driver->name, len); + return buffer; +} + static void __net_exit netdev_exit(struct net *net) { kfree(net->dev_name_head); @@ -4480,17 +4738,19 @@ static void __net_exit default_device_exit(struct net *net) rtnl_lock(); for_each_netdev_safe(net, dev, next) { int err; + char fb_name[IFNAMSIZ]; /* Ignore unmoveable devices (i.e. loopback) */ if (dev->features & NETIF_F_NETNS_LOCAL) continue; /* Push remaing network devices to init_net */ - err = dev_change_net_namespace(dev, &init_net, "dev%d"); + snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); + err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { - printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n", + printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n", __func__, dev->name, err); - unregister_netdevice(dev); + BUG(); } } rtnl_unlock(); @@ -4553,8 +4813,8 @@ static int __init net_dev_init(void) dev_boot_phase = 0; - open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); - open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); + open_softirq(NET_TX_SOFTIRQ, net_tx_action); + open_softirq(NET_RX_SOFTIRQ, net_rx_action); hotcpu_notifier(dev_cpu_callback, 0); dst_init(); diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index f8a3455f4493..5402b3b38e0d 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -72,7 +72,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) { int err; - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_delete(&dev->mc_list, &dev->mc_count, addr, alen, glbl); if (!err) { @@ -83,7 +83,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) __dev_set_rx_mode(dev); } - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } @@ -95,11 +95,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) { int err; - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); if (!err) __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } @@ -119,12 +119,12 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) { int err = 0; - netif_tx_lock_bh(to); + netif_addr_lock_bh(to); err = __dev_addr_sync(&to->mc_list, &to->mc_count, &from->mc_list, &from->mc_count); if (!err) __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); + netif_addr_unlock_bh(to); return err; } @@ -143,15 +143,15 @@ EXPORT_SYMBOL(dev_mc_sync); */ void dev_mc_unsync(struct net_device *to, struct net_device *from) { - netif_tx_lock_bh(from); - netif_tx_lock_bh(to); + netif_addr_lock_bh(from); + netif_addr_lock(to); __dev_addr_unsync(&to->mc_list, &to->mc_count, &from->mc_list, &from->mc_count); __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); - netif_tx_unlock_bh(from); + netif_addr_unlock(to); + netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_mc_unsync); @@ -164,7 +164,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) return 0; - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); for (m = dev->mc_list; m; m = m->next) { int i; @@ -176,7 +176,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) seq_putc(seq, '\n'); } - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return 0; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0133b5ebd545..14ada537f895 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -209,6 +209,36 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) return 0; } +static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_rxnfc cmd; + + if (!dev->ethtool_ops->set_rxhash) + return -EOPNOTSUPP; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + + return dev->ethtool_ops->set_rxhash(dev, &cmd); +} + +static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_rxnfc info; + + if (!dev->ethtool_ops->get_rxhash) + return -EOPNOTSUPP; + + if (copy_from_user(&info, useraddr, sizeof(info))) + return -EFAULT; + + dev->ethtool_ops->get_rxhash(dev, &info); + + if (copy_to_user(useraddr, &info, sizeof(info))) + return -EFAULT; + return 0; +} + static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) { struct ethtool_regs regs; @@ -826,6 +856,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GGSO: case ETHTOOL_GFLAGS: case ETHTOOL_GPFLAGS: + case ETHTOOL_GRXFH: break; default: if (!capable(CAP_NET_ADMIN)) @@ -977,6 +1008,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_set_value(dev, useraddr, dev->ethtool_ops->set_priv_flags); break; + case ETHTOOL_GRXFH: + rc = ethtool_get_rxhash(dev, useraddr); + break; + case ETHTOOL_SRXFH: + rc = ethtool_set_rxhash(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e3e9ab0f74e3..79de3b14a8d1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -69,7 +69,7 @@ static void rules_ops_put(struct fib_rules_ops *ops) static void flush_route_cache(struct fib_rules_ops *ops) { if (ops->flush_cache) - ops->flush_cache(); + ops->flush_cache(ops); } int fib_rules_register(struct fib_rules_ops *ops) @@ -226,7 +226,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { - err = EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto errout; } @@ -365,7 +365,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { - err = EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto errout; } diff --git a/net/core/filter.c b/net/core/filter.c index 4f8369729a4e..df3744355839 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -68,7 +68,6 @@ static inline void *load_pointer(struct sk_buff *skb, int k, * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff * @skb: buffer to filter - * @needlock: set to 1 if the sock is not locked by caller. * * Run the filter code and then cut skb->data to correct size returned by * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller diff --git a/net/core/flow.c b/net/core/flow.c index 19991175fdeb..5cf81052d044 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -298,7 +298,7 @@ void flow_cache_flush(void) init_completion(&info.completion); local_bh_disable(); - smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0); + smp_call_function(flow_cache_flush_per_cpu, &info, 0); flow_cache_flush_tasklet((unsigned long)&info); local_bh_enable(); diff --git a/net/core/iovec.c b/net/core/iovec.c index 755c37fdaee7..4c9c0121c9da 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -36,7 +36,7 @@ * in any case. */ -int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode) +int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) { int size, err, ct; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index a5e372b9ec4d..bf8f7af699d7 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -77,10 +77,10 @@ static void rfc2863_policy(struct net_device *dev) } -static int linkwatch_urgent_event(struct net_device *dev) +static bool linkwatch_urgent_event(struct net_device *dev) { return netif_running(dev) && netif_carrier_ok(dev) && - dev->qdisc != dev->qdisc_sleeping; + qdisc_tx_changing(dev); } @@ -180,10 +180,9 @@ static void __linkwatch_run_queue(int urgent_only) rfc2863_policy(dev); if (dev->flags & IFF_UP) { - if (netif_carrier_ok(dev)) { - WARN_ON(dev->qdisc_sleeping == &noop_qdisc); + if (netif_carrier_ok(dev)) dev_activate(dev); - } else + else dev_deactivate(dev); netdev_state_change(dev); @@ -214,7 +213,7 @@ static void linkwatch_event(struct work_struct *dummy) void linkwatch_fire_event(struct net_device *dev) { - int urgent = linkwatch_urgent_event(dev); + bool urgent = linkwatch_urgent_event(dev); if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { dev_hold(dev); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5d9d7130bd6e..f62c8af85d38 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -930,6 +930,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) buff = neigh->arp_queue.next; __skb_unlink(buff, &neigh->arp_queue); kfree_skb(buff); + NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } __skb_queue_tail(&neigh->arp_queue, skb); } @@ -1714,7 +1715,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) return nla_nest_end(skb, nest); nla_put_failure: - return nla_nest_cancel(skb, nest); + nla_nest_cancel(skb, nest); + return -EMSGSIZE; } static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, @@ -2057,9 +2059,9 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, goto nla_put_failure; } - ci.ndm_used = now - neigh->used; - ci.ndm_confirmed = now - neigh->confirmed; - ci.ndm_updated = now - neigh->updated; + ci.ndm_used = jiffies_to_clock_t(now - neigh->used); + ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); + ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; read_unlock_bh(&neigh->lock); @@ -2461,12 +2463,12 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n"); + seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n"); return 0; } seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " - "%08lx %08lx %08lx %08lx\n", + "%08lx %08lx %08lx %08lx %08lx\n", atomic_read(&tbl->entries), st->allocs, @@ -2482,7 +2484,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) st->rcv_probes_ucast, st->periodic_gc_runs, - st->forced_gc_runs + st->forced_gc_runs, + st->unres_discards ); return 0; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 90e2177af081..c1f4e0d428c0 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -242,11 +242,11 @@ static ssize_t netstat_show(const struct device *d, offset % sizeof(unsigned long) != 0); read_lock(&dev_base_lock); - if (dev_isalive(dev) && dev->get_stats && - (stats = (*dev->get_stats)(dev))) + if (dev_isalive(dev)) { + stats = dev->get_stats(dev); ret = sprintf(buf, fmt_ulong, *(unsigned long *)(((u8 *) stats) + offset)); - + } read_unlock(&dev_base_lock); return ret; } @@ -318,7 +318,7 @@ static struct attribute_group netstat_group = { .attrs = netstat_attrs, }; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT_SYSFS /* helper function that does all the locking etc for wireless stats */ static ssize_t wireless_show(struct device *d, char *buf, ssize_t (*format)(const struct iw_statistics *, @@ -457,10 +457,9 @@ int netdev_register_kobject(struct net_device *net) strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); #ifdef CONFIG_SYSFS - if (net->get_stats) - *groups++ = &netstat_group; + *groups++ = &netstat_group; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT_SYSFS if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) *groups++ = &wireless_group; #endif @@ -469,6 +468,19 @@ int netdev_register_kobject(struct net_device *net) return device_add(dev); } +int netdev_class_create_file(struct class_attribute *class_attr) +{ + return class_create_file(&net_class, class_attr); +} + +void netdev_class_remove_file(struct class_attribute *class_attr) +{ + class_remove_file(&net_class, class_attr); +} + +EXPORT_SYMBOL(netdev_class_create_file); +EXPORT_SYMBOL(netdev_class_remove_file); + void netdev_initialize_kobject(struct net_device *net) { struct device *device = &(net->dev); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 72b4c184dd84..7c52fe277b62 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -140,6 +140,9 @@ static void cleanup_net(struct work_struct *work) struct pernet_operations *ops; struct net *net; + /* Be very certain incoming network packets will not find us */ + rcu_barrier(); + net = container_of(work, struct net, work); mutex_lock(&net_mutex); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b04d643fc3c7..c12720895ecf 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -58,25 +58,27 @@ static void queue_process(struct work_struct *work) while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; + struct netdev_queue *txq; if (!netif_device_present(dev) || !netif_running(dev)) { __kfree_skb(skb); continue; } + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + local_irq_save(flags); - netif_tx_lock(dev); - if ((netif_queue_stopped(dev) || - netif_subqueue_stopped(dev, skb)) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + __netif_tx_lock(txq, smp_processor_id()); + if (netif_tx_queue_stopped(txq) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); - netif_tx_unlock(dev); + __netif_tx_unlock(txq); local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } - netif_tx_unlock(dev); + __netif_tx_unlock(txq); local_irq_restore(flags); } } @@ -278,17 +280,19 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { + struct netdev_queue *txq; unsigned long flags; + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + local_irq_save(flags); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { - if (netif_tx_trylock(dev)) { - if (!netif_queue_stopped(dev) && - !netif_subqueue_stopped(dev, skb)) + if (__netif_tx_trylock(txq)) { + if (!netif_tx_queue_stopped(txq)) status = dev->hard_start_xmit(skb, dev); - netif_tx_unlock(dev); + __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) break; @@ -419,7 +423,7 @@ static void arp_reply(struct sk_buff *skb) return; size = arp_hdr_len(skb->dev); - send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev), + send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev), LL_RESERVED_SPACE(np->dev)); if (!send_skb) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8dca21110493..c7d484f7e1c4 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -390,6 +390,7 @@ struct pktgen_thread { int cpu; wait_queue_head_t queue; + struct completion start_done; }; #define REMOVE 1 @@ -1874,7 +1875,7 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* It is OK that we do not hold the group lock right now, @@ -2122,6 +2123,24 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) } } #endif +static void set_cur_queue_map(struct pktgen_dev *pkt_dev) +{ + if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { + __u16 t; + if (pkt_dev->flags & F_QUEUE_MAP_RND) { + t = random32() % + (pkt_dev->queue_map_max - + pkt_dev->queue_map_min + 1) + + pkt_dev->queue_map_min; + } else { + t = pkt_dev->cur_queue_map + 1; + if (t > pkt_dev->queue_map_max) + t = pkt_dev->queue_map_min; + } + pkt_dev->cur_queue_map = t; + } +} + /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst */ @@ -2324,19 +2343,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->cur_pkt_size = t; } - if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { - __u16 t; - if (pkt_dev->flags & F_QUEUE_MAP_RND) { - t = random32() % - (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1) - + pkt_dev->queue_map_min; - } else { - t = pkt_dev->cur_queue_map + 1; - if (t > pkt_dev->queue_map_max) - t = pkt_dev->queue_map_min; - } - pkt_dev->cur_queue_map = t; - } + set_cur_queue_map(pkt_dev); pkt_dev->flows[flow].count++; } @@ -2457,7 +2464,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ - + u16 queue_map; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2468,6 +2475,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, /* Update any of the values, used when we're incrementing various * fields. */ + queue_map = pkt_dev->cur_queue_map; mod_cur_headers(pkt_dev); datalen = (odev->hard_header_len + 16) & ~0xf; @@ -2506,7 +2514,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); - skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); + skb_set_queue_mapping(skb, queue_map); iph = ip_hdr(skb); udph = udp_hdr(skb); @@ -2796,6 +2804,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ + u16 queue_map; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2806,6 +2815,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, /* Update any of the values, used when we're incrementing various * fields. */ + queue_map = pkt_dev->cur_queue_map; mod_cur_headers(pkt_dev); skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 + @@ -2843,7 +2853,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); - skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); + skb_set_queue_mapping(skb, queue_map); iph = ipv6_hdr(skb); udph = udp_hdr(skb); @@ -3262,7 +3272,9 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) { struct net_device *odev = NULL; + struct netdev_queue *txq; __u64 idle_start = 0; + u16 queue_map; int ret; odev = pkt_dev->odev; @@ -3284,9 +3296,15 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - if ((netif_queue_stopped(odev) || - (pkt_dev->skb && - netif_subqueue_stopped(odev, pkt_dev->skb))) || + if (!pkt_dev->skb) { + set_cur_queue_map(pkt_dev); + queue_map = pkt_dev->cur_queue_map; + } else { + queue_map = skb_get_queue_mapping(pkt_dev->skb); + } + + txq = netdev_get_tx_queue(odev, queue_map); + if (netif_tx_queue_stopped(txq) || need_resched()) { idle_start = getCurUs(); @@ -3302,8 +3320,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; - if (netif_queue_stopped(odev) || - netif_subqueue_stopped(odev, pkt_dev->skb)) { + if (netif_tx_queue_stopped(txq)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3330,9 +3347,12 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - netif_tx_lock_bh(odev); - if (!netif_queue_stopped(odev) && - !netif_subqueue_stopped(odev, pkt_dev->skb)) { + /* fill_packet() might have changed the queue */ + queue_map = skb_get_queue_mapping(pkt_dev->skb); + txq = netdev_get_tx_queue(odev, queue_map); + + __netif_tx_lock_bh(txq); + if (!netif_tx_queue_stopped(txq)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: @@ -3376,7 +3396,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->next_tx_ns = 0; } - netif_tx_unlock_bh(odev); + __netif_tx_unlock_bh(txq); /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { @@ -3414,6 +3434,7 @@ static int pktgen_thread_worker(void *arg) BUG_ON(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); + complete(&t->start_done); pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); @@ -3615,6 +3636,7 @@ static int __init pktgen_create_thread(int cpu) INIT_LIST_HEAD(&t->if_list); list_add_tail(&t->th_list, &pktgen_threads); + init_completion(&t->start_done); p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); if (IS_ERR(p)) { @@ -3639,6 +3661,7 @@ static int __init pktgen_create_thread(int cpu) } wake_up_process(p); + wait_for_completion(&t->start_done); return 0; } diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 2d3035d3abd7..7552495aff7a 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -123,7 +123,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) } } - BUG_TRAP(lopt->qlen == 0); + WARN_ON(lopt->qlen != 0); if (lopt_size > PAGE_SIZE) vfree(lopt); else diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cf857c4dc7b1..71edb8b36341 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -498,7 +498,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) return nla_nest_end(skb, mx); nla_put_failure: - return nla_nest_cancel(skb, mx); + nla_nest_cancel(skb, mx); + return -EMSGSIZE; } int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, @@ -604,8 +605,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags) { + struct netdev_queue *txq; struct ifinfomsg *ifm; struct nlmsghdr *nlh; + struct net_device_stats *stats; + struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) @@ -632,8 +636,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (dev->master) NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); - if (dev->qdisc_sleeping) - NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id); + txq = netdev_get_tx_queue(dev, 0); + if (txq->qdisc_sleeping) + NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); if (1) { struct rtnl_link_ifmap map = { @@ -652,19 +657,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); } - if (dev->get_stats) { - struct net_device_stats *stats = dev->get_stats(dev); - if (stats) { - struct nlattr *attr; - - attr = nla_reserve(skb, IFLA_STATS, - sizeof(struct rtnl_link_stats)); - if (attr == NULL) - goto nla_put_failure; + attr = nla_reserve(skb, IFLA_STATS, + sizeof(struct rtnl_link_stats)); + if (attr == NULL) + goto nla_put_failure; - copy_rtnl_link_stats(nla_data(attr), stats); - } - } + stats = dev->get_stats(dev); + copy_rtnl_link_stats(nla_data(attr), stats); if (dev->rtnl_link_ops) { if (rtnl_link_fill(skb, dev) < 0) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4fe605fa6f8a..4e0c92274189 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4,8 +4,6 @@ * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * - * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ - * * Fixes: * Alan Cox : Fixed the worst of the load * balancer bugs. @@ -200,7 +198,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, goto nodata; /* - * See comment in sk_buff definition, just before the 'tail' member + * Only clear those fields we need to clear, not those that we will + * actually initialise below. Hence, don't put any more fields after + * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); skb->truesize = size + sizeof(struct sk_buff); @@ -459,6 +459,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->tc_verd = old->tc_verd; #endif #endif + new->vlan_tci = old->vlan_tci; + skb_copy_secmark(new, old); } @@ -1198,7 +1200,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -1227,7 +1229,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -1280,107 +1282,83 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, return 0; } -/* - * Map linear and fragment data from the skb to spd. Returns number of - * pages mapped. - */ -static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, - unsigned int *total_len, - struct splice_pipe_desc *spd) +static inline void __segment_seek(struct page **page, unsigned int *poff, + unsigned int *plen, unsigned int off) { - unsigned int nr_pages = spd->nr_pages; - unsigned int poff, plen, len, toff, tlen; - int headlen, seg; + *poff += off; + *page += *poff / PAGE_SIZE; + *poff = *poff % PAGE_SIZE; + *plen -= off; +} - toff = *offset; - tlen = *total_len; - if (!tlen) - goto err; +static inline int __splice_segment(struct page *page, unsigned int poff, + unsigned int plen, unsigned int *off, + unsigned int *len, struct sk_buff *skb, + struct splice_pipe_desc *spd) +{ + if (!*len) + return 1; - /* - * if the offset is greater than the linear part, go directly to - * the fragments. - */ - headlen = skb_headlen(skb); - if (toff >= headlen) { - toff -= headlen; - goto map_frag; + /* skip this segment if already processed */ + if (*off >= plen) { + *off -= plen; + return 0; } - /* - * first map the linear region into the pages/partial map, skipping - * any potential initial offset. - */ - len = 0; - while (len < headlen) { - void *p = skb->data + len; - - poff = (unsigned long) p & (PAGE_SIZE - 1); - plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff); - len += plen; - - if (toff) { - if (plen <= toff) { - toff -= plen; - continue; - } - plen -= toff; - poff += toff; - toff = 0; - } + /* ignore any bits we already processed */ + if (*off) { + __segment_seek(&page, &poff, &plen, *off); + *off = 0; + } - plen = min(plen, tlen); - if (!plen) - break; + do { + unsigned int flen = min(*len, plen); - /* - * just jump directly to update and return, no point - * in going over fragments when the output is full. - */ - if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb)) - goto done; + /* the linear region may spread across several pages */ + flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - tlen -= plen; - } + if (spd_fill_page(spd, page, flen, poff, skb)) + return 1; + + __segment_seek(&page, &poff, &plen, flen); + *len -= flen; + + } while (*len && plen); + + return 0; +} + +/* + * Map linear and fragment data from the skb to spd. It reports failure if the + * pipe is full or if we already spliced the requested length. + */ +static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, + unsigned int *len, + struct splice_pipe_desc *spd) +{ + int seg; + + /* + * map the linear part + */ + if (__splice_segment(virt_to_page(skb->data), + (unsigned long) skb->data & (PAGE_SIZE - 1), + skb_headlen(skb), + offset, len, skb, spd)) + return 1; /* * then map the fragments */ -map_frag: for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; - plen = f->size; - poff = f->page_offset; - - if (toff) { - if (plen <= toff) { - toff -= plen; - continue; - } - plen -= toff; - poff += toff; - toff = 0; - } - - plen = min(plen, tlen); - if (!plen) - break; - - if (spd_fill_page(spd, f->page, plen, poff, skb)) - break; - - tlen -= plen; + if (__splice_segment(f->page, f->page_offset, f->size, + offset, len, skb, spd)) + return 1; } -done: - if (spd->nr_pages - nr_pages) { - *offset = 0; - *total_len = tlen; - return 0; - } -err: - return 1; + return 0; } /* @@ -1443,6 +1421,7 @@ done: if (spd.nr_pages) { int ret; + struct sock *sk = __skb->sk; /* * Drop the socket lock, otherwise we have reverse @@ -1453,9 +1432,9 @@ done: * we call into ->sendpage() with the i_mutex lock held * and networking will grab the socket lock. */ - release_sock(__skb->sk); + release_sock(sk); ret = splice_to_pipe(pipe, &spd); - lock_sock(__skb->sk); + lock_sock(sk); return ret; } @@ -1496,7 +1475,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + frag->size; if ((copy = end - offset) > 0) { @@ -1524,7 +1503,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -1573,7 +1552,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -1602,7 +1581,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -1650,7 +1629,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -1683,7 +1662,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, __wsum csum2; int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -2278,6 +2257,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) skb_copy_queue_mapping(nskb, skb); nskb->priority = skb->priority; nskb->protocol = skb->protocol; + nskb->vlan_tci = skb->vlan_tci; nskb->dst = dst_clone(skb->dst); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); nskb->pkt_type = skb->pkt_type; @@ -2393,7 +2373,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -2417,7 +2397,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -2582,6 +2562,13 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) return true; } +void __skb_warn_lro_forwarding(const struct sk_buff *skb) +{ + if (net_ratelimit()) + pr_warning("%s: received packets cannot be forwarded" + " while LRO is enabled\n", skb->dev->name); +} + EXPORT_SYMBOL(___pskb_trim); EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(kfree_skb); @@ -2615,6 +2602,7 @@ EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); EXPORT_SYMBOL(skb_append_datato_frags); +EXPORT_SYMBOL(__skb_warn_lro_forwarding); EXPORT_SYMBOL_GPL(skb_to_sgvec); EXPORT_SYMBOL_GPL(skb_cow_data); diff --git a/net/core/sock.c b/net/core/sock.c index fa76f04fa9c6..91f8bbc93526 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -7,8 +7,6 @@ * handler for protocols to use and generic option handler. * * - * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> @@ -182,7 +180,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , - "clock-27" , "clock-28" , "clock-29" , + "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_MAX" }; @@ -270,7 +268,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) int err = 0; int skb_len; - /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces + /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces number of warnings when compiling with -W --ANK */ if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= @@ -1068,7 +1066,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) * to be taken into account in all callers. -acme */ sk_refcnt_debug_inc(newsk); - newsk->sk_socket = NULL; + sk_set_socket(newsk, NULL); newsk->sk_sleep = NULL; if (newsk->sk_prot->sockets_allocated) @@ -1444,7 +1442,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) /* Under pressure. */ if (allocated > prot->sysctl_mem[1]) if (prot->enter_memory_pressure) - prot->enter_memory_pressure(); + prot->enter_memory_pressure(sk); /* Over hard limit. */ if (allocated > prot->sysctl_mem[2]) @@ -1704,7 +1702,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; sk->sk_state = TCP_CLOSE; - sk->sk_socket = sock; + sk_set_socket(sk, sock); sock_set_flag(sk, SOCK_ZAPPED); diff --git a/net/core/stream.c b/net/core/stream.c index 4a0ad152c9c4..a6b3437ff082 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -192,13 +192,13 @@ void sk_stream_kill_queues(struct sock *sk) __skb_queue_purge(&sk->sk_error_queue); /* Next, the write queue. */ - BUG_TRAP(skb_queue_empty(&sk->sk_write_queue)); + WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); /* Account for returned memory. */ sk_mem_reclaim(sk); - BUG_TRAP(!sk->sk_wmem_queued); - BUG_TRAP(!sk->sk_forward_alloc); + WARN_ON(sk->sk_wmem_queued); + WARN_ON(sk->sk_forward_alloc); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 5fc801057244..f686467ff12b 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -67,7 +67,7 @@ static struct ctl_table net_core_table[] = { { .ctl_name = NET_CORE_MSG_COST, .procname = "message_cost", - .data = &net_msg_cost, + .data = &net_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -76,7 +76,7 @@ static struct ctl_table net_core_table[] = { { .ctl_name = NET_CORE_MSG_BURST, .procname = "message_burst", - .data = &net_msg_burst, + .data = &net_ratelimit_state.burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -125,14 +125,6 @@ static struct ctl_table net_core_table[] = { #endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ { - .ctl_name = NET_CORE_SOMAXCONN, - .procname = "somaxconn", - .data = &init_net.core.sysctl_somaxconn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = NET_CORE_BUDGET, .procname = "netdev_budget", .data = &netdev_budget, @@ -151,6 +143,18 @@ static struct ctl_table net_core_table[] = { { .ctl_name = 0 } }; +static struct ctl_table netns_core_table[] = { + { + .ctl_name = NET_CORE_SOMAXCONN, + .procname = "somaxconn", + .data = &init_net.core.sysctl_somaxconn, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { .ctl_name = 0 } +}; + static __net_initdata struct ctl_path net_core_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "core", .ctl_name = NET_CORE, }, @@ -159,23 +163,17 @@ static __net_initdata struct ctl_path net_core_path[] = { static __net_init int sysctl_core_net_init(struct net *net) { - struct ctl_table *tbl, *tmp; + struct ctl_table *tbl; net->core.sysctl_somaxconn = SOMAXCONN; - tbl = net_core_table; + tbl = netns_core_table; if (net != &init_net) { - tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL); + tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); if (tbl == NULL) goto err_dup; - for (tmp = tbl; tmp->procname; tmp++) { - if (tmp->data >= (void *)&init_net && - tmp->data < (void *)(&init_net + 1)) - tmp->data += (char *)net - (char *)&init_net; - else - tmp->mode &= ~0222; - } + tbl[0].data = &net->core.sysctl_somaxconn; } net->core.sysctl_hdr = register_net_sysctl_table(net, @@ -186,7 +184,7 @@ static __net_init int sysctl_core_net_init(struct net *net) return 0; err_reg: - if (tbl != net_core_table) + if (tbl != netns_core_table) kfree(tbl); err_dup: return -ENOMEM; @@ -198,7 +196,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net) tbl = net->core.sysctl_hdr->ctl_table_arg; unregister_net_sysctl_table(net->core.sysctl_hdr); - BUG_ON(tbl == net_core_table); + BUG_ON(tbl == netns_core_table); kfree(tbl); } @@ -209,6 +207,7 @@ static __net_initdata struct pernet_operations sysctl_core_ops = { static __init int sysctl_core_init(void) { + register_net_sysctl_rotable(net_core_path, net_core_table); return register_pernet_subsys(&sysctl_core_ops); } diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 0ad1cd57bc39..164b090d5ac3 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -27,13 +27,13 @@ #include <linux/dmaengine.h> #include <linux/socket.h> -#include <linux/rtnetlink.h> /* for BUG_TRAP */ #include <net/tcp.h> #include <net/netdma.h> #define NET_DMA_DEFAULT_COPYBREAK 4096 int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK; +EXPORT_SYMBOL(sysctl_tcp_dma_copybreak); /** * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec. @@ -71,11 +71,11 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; copy = end - offset; - if ((copy = end - offset) > 0) { + if (copy > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = frag->page; @@ -100,7 +100,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; copy = end - offset; diff --git a/net/core/utils.c b/net/core/utils.c index 8031eb59054e..72e0ebe964a0 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -31,17 +31,16 @@ #include <asm/system.h> #include <asm/uaccess.h> -int net_msg_cost __read_mostly = 5*HZ; -int net_msg_burst __read_mostly = 10; int net_msg_warn __read_mostly = 1; EXPORT_SYMBOL(net_msg_warn); +DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10); /* * All net warning printk()s should be guarded by this function. */ int net_ratelimit(void) { - return __printk_ratelimit(net_msg_cost, net_msg_burst); + return __ratelimit(&net_ratelimit_state); } EXPORT_SYMBOL(net_ratelimit); diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 6de4bd195d28..1e8be246ad15 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -290,12 +290,12 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, while (1) { const u8 len = dccp_ackvec_len(av, index); - const u8 state = dccp_ackvec_state(av, index); + const u8 av_state = dccp_ackvec_state(av, index); /* * valid packets not yet in av_buf have a reserved * entry, with a len equal to 0. */ - if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && + if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED && len == 0 && delta == 0) { /* Found our reserved seat! */ dccp_pr_debug("Found %llu reserved seat!\n", @@ -325,31 +325,6 @@ out_duplicate: return -EILSEQ; } -#ifdef CONFIG_IP_DCCP_DEBUG -void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) -{ - dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len, - (unsigned long long)ackno); - - while (len--) { - const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; - const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; - - dccp_pr_debug_cat("%d,%d|", state, rl); - ++vector; - } - - dccp_pr_debug_cat("\n"); -} - -void dccp_ackvec_print(const struct dccp_ackvec *av) -{ - dccp_ackvector_print(av->av_buf_ackno, - av->av_buf + av->av_buf_head, - av->av_vec_len); -} -#endif - static void dccp_ackvec_throw_record(struct dccp_ackvec *av, struct dccp_ackvec_record *avr) { diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index cd61dea2eea1..f6756e0c9e69 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -159,8 +159,8 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) } else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld) - (s64)hctx->ccid3hctx_rtt >= 0) { - hctx->ccid3hctx_x = - max(min(2 * hctx->ccid3hctx_x, min_rate), + hctx->ccid3hctx_x = min(2 * hctx->ccid3hctx_x, min_rate); + hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, scaled_div(((__u64)hctx->ccid3hctx_s) << 6, hctx->ccid3hctx_rtt)); hctx->ccid3hctx_t_ld = now; @@ -193,22 +193,17 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) /* * Update Window Counter using the algorithm from [RFC 4342, 8.1]. - * The algorithm is not applicable if RTT < 4 microseconds. + * As elsewhere, RTT > 0 is assumed by using dccp_sample_rtt(). */ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, ktime_t now) { - u32 quarter_rtts; - - if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */ - return; - - quarter_rtts = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count); - quarter_rtts /= hctx->ccid3hctx_rtt / 4; + u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count), + quarter_rtts = (4 * delta) / hctx->ccid3hctx_rtt; if (quarter_rtts > 0) { hctx->ccid3hctx_t_last_win_count = now; - hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5); + hctx->ccid3hctx_last_win_count += min(quarter_rtts, 5U); hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ } } @@ -334,8 +329,14 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) hctx->ccid3hctx_x = rfc3390_initial_rate(sk); hctx->ccid3hctx_t_ld = now; } else { - /* Sender does not have RTT sample: X_pps = 1 pkt/sec */ - hctx->ccid3hctx_x = hctx->ccid3hctx_s; + /* + * Sender does not have RTT sample: + * - set fallback RTT (RFC 4340, 3.4) since a RTT value + * is needed in several parts (e.g. window counter); + * - set sending rate X_pps = 1pps as per RFC 3448, 4.2. + */ + hctx->ccid3hctx_rtt = DCCP_FALLBACK_RTT; + hctx->ccid3hctx_x = hctx->ccid3hctx_s; hctx->ccid3hctx_x <<= 6; } ccid3_update_send_interval(hctx); @@ -793,7 +794,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE; - const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; + const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; const bool is_data_packet = dccp_data_packet(skb); if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) { @@ -824,18 +825,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* - * Handle pending losses and otherwise check for new loss + * Perform loss detection and handle pending losses */ - if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist) && - tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_li_hist, - skb, ndp, ccid3_first_li, sk) ) { + if (tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist, + skb, ndp, ccid3_first_li, sk)) { do_feedback = CCID3_FBACK_PARAM_CHANGE; goto done_receiving; } - if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp)) - goto update_records; + if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist)) + return; /* done receiving */ /* * Handle data packets: RTT sampling and monitoring p diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 849e181e698f..bcd6ac415bb9 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -90,14 +90,14 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) { struct tfrc_loss_interval *cur = tfrc_lh_peek(lh); u32 old_i_mean = lh->i_mean; - s64 length; + s64 len; if (cur == NULL) /* not initialised */ return 0; - length = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq); + len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1; - if (length - cur->li_length <= 0) /* duplicate or reordered */ + if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */ return 0; if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4) @@ -114,7 +114,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */ return 0; - cur->li_length = length; + cur->li_length = len; tfrc_lh_calc_i_mean(lh); return (lh->i_mean < old_i_mean); @@ -159,7 +159,7 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, else { cur->li_length = dccp_delta_seqno(cur->li_seqno, new->li_seqno); new->li_length = dccp_delta_seqno(new->li_seqno, - tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno); + tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno) + 1; if (lh->counter > (2*LIH_SIZE)) lh->counter -= LIH_SIZE; diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 20af1a693427..6cc108afdc3b 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -153,7 +153,7 @@ void tfrc_rx_packet_history_exit(void) static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry, const struct sk_buff *skb, - const u32 ndp) + const u64 ndp) { const struct dccp_hdr *dh = dccp_hdr(skb); @@ -166,7 +166,7 @@ static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry, void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, - const u32 ndp) + const u64 ndp) { struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h); @@ -206,31 +206,39 @@ static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) * * In the descriptions, `Si' refers to the sequence number of entry number i, * whose NDP count is `Ni' (lower case is used for variables). - * Note: All __after_loss functions expect that a test against duplicates has - * been performed already: the seqno of the skb must not be less than the - * seqno of loss_prev; and it must not equal that of any valid hist_entry. + * Note: All __xxx_loss functions expect that a test against duplicates has been + * performed already: the seqno of the skb must not be less than the seqno + * of loss_prev; and it must not equal that of any valid history entry. */ +static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1) +{ + u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, + s1 = DCCP_SKB_CB(skb)->dccpd_seq; + + if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */ + h->loss_count = 1; + tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1); + } +} + static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) { u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, s2 = DCCP_SKB_CB(skb)->dccpd_seq; - int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp, - d12 = dccp_delta_seqno(s1, s2), d2; - if (d12 > 0) { /* S1 < S2 */ + if (likely(dccp_delta_seqno(s1, s2) > 0)) { /* S1 < S2 */ h->loss_count = 2; tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2); return; } /* S0 < S2 < S1 */ - d2 = dccp_delta_seqno(s0, s2); - if (d2 == 1 || n2 >= d2) { /* S2 is direct successor of S0 */ - int d21 = -d12; + if (dccp_loss_free(s0, s2, n2)) { + u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp; - if (d21 == 1 || n1 >= d21) { + if (dccp_loss_free(s2, s1, n1)) { /* hole is filled: S0, S2, and S1 are consecutive */ h->loss_count = 0; h->loss_start = tfrc_rx_hist_index(h, 1); @@ -238,9 +246,9 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2 /* gap between S2 and S1: just update loss_prev */ tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); - } else { /* hole between S0 and S2 */ + } else { /* gap between S0 and S2 */ /* - * Reorder history to insert S2 between S0 and s1 + * Reorder history to insert S2 between S0 and S1 */ tfrc_rx_hist_swap(h, 0, 3); h->loss_start = tfrc_rx_hist_index(h, 3); @@ -256,22 +264,18 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, s3 = DCCP_SKB_CB(skb)->dccpd_seq; - int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp, - d23 = dccp_delta_seqno(s2, s3), d13, d3, d31; - if (d23 > 0) { /* S2 < S3 */ + if (likely(dccp_delta_seqno(s2, s3) > 0)) { /* S2 < S3 */ h->loss_count = 3; tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3); return 1; } /* S3 < S2 */ - d13 = dccp_delta_seqno(s1, s3); - if (d13 > 0) { + if (dccp_delta_seqno(s1, s3) > 0) { /* S1 < S3 < S2 */ /* - * The sequence number order is S1, S3, S2 - * Reorder history to insert entry between S1 and S2 + * Reorder history to insert S3 between S1 and S2 */ tfrc_rx_hist_swap(h, 2, 3); tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3); @@ -280,17 +284,15 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) } /* S0 < S3 < S1 */ - d31 = -d13; - d3 = dccp_delta_seqno(s0, s3); - if (d3 == 1 || n3 >= d3) { /* S3 is a successor of S0 */ + if (dccp_loss_free(s0, s3, n3)) { + u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp; - if (d31 == 1 || n1 >= d31) { + if (dccp_loss_free(s3, s1, n1)) { /* hole between S0 and S1 filled by S3 */ - int d2 = dccp_delta_seqno(s1, s2), - n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp; + u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp; - if (d2 == 1 || n2 >= d2) { + if (dccp_loss_free(s1, s2, n2)) { /* entire hole filled by S0, S3, S1, S2 */ h->loss_start = tfrc_rx_hist_index(h, 2); h->loss_count = 0; @@ -307,8 +309,8 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) } /* - * The remaining case: S3 is not a successor of S0. - * Sequence order is S0, S3, S1, S2; reorder to insert between S0 and S1 + * The remaining case: S0 < S3 < S1 < S2; gap between S0 and S3 + * Reorder history to insert S3 between S0 and S1. */ tfrc_rx_hist_swap(h, 0, 3); h->loss_start = tfrc_rx_hist_index(h, 3); @@ -318,33 +320,25 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) return 1; } -/* return the signed modulo-2^48 sequence number distance from entry e1 to e2 */ -static s64 tfrc_rx_hist_delta_seqno(struct tfrc_rx_hist *h, u8 e1, u8 e2) -{ - DCCP_BUG_ON(e1 > h->loss_count || e2 > h->loss_count); - - return dccp_delta_seqno(tfrc_rx_hist_entry(h, e1)->tfrchrx_seqno, - tfrc_rx_hist_entry(h, e2)->tfrchrx_seqno); -} - /* recycle RX history records to continue loss detection if necessary */ static void __three_after_loss(struct tfrc_rx_hist *h) { /* - * The distance between S0 and S1 is always greater than 1 and the NDP - * count of S1 is smaller than this distance. Otherwise there would - * have been no loss. Hence it is only necessary to see whether there - * are further missing data packets between S1/S2 and S2/S3. + * At this stage we know already that there is a gap between S0 and S1 + * (since S0 was the highest sequence number received before detecting + * the loss). To recycle the loss record, it is thus only necessary to + * check for other possible gaps between S1/S2 and between S2/S3. */ - int d2 = tfrc_rx_hist_delta_seqno(h, 1, 2), - d3 = tfrc_rx_hist_delta_seqno(h, 2, 3), - n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp, + u64 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, + s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, + s3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_seqno; + u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp, n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp; - if (d2 == 1 || n2 >= d2) { /* S2 is successor to S1 */ + if (dccp_loss_free(s1, s2, n2)) { - if (d3 == 1 || n3 >= d3) { - /* S3 is successor of S2: entire hole is filled */ + if (dccp_loss_free(s2, s3, n3)) { + /* no gap between S2 and S3: entire hole is filled */ h->loss_start = tfrc_rx_hist_index(h, 3); h->loss_count = 0; } else { @@ -353,7 +347,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h) h->loss_count = 1; } - } else { /* gap between S1 and S2 */ + } else { /* gap between S1 and S2 */ h->loss_start = tfrc_rx_hist_index(h, 1); h->loss_count = 2; } @@ -370,15 +364,20 @@ static void __three_after_loss(struct tfrc_rx_hist *h) * Chooses action according to pending loss, updates LI database when a new * loss was detected, and does required post-processing. Returns 1 when caller * should send feedback, 0 otherwise. + * Since it also takes care of reordering during loss detection and updates the + * records accordingly, the caller should not perform any more RX history + * operations when loss_count is greater than 0 after calling this function. */ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh, - struct sk_buff *skb, u32 ndp, + struct sk_buff *skb, const u64 ndp, u32 (*calc_first_li)(struct sock *), struct sock *sk) { int is_new_loss = 0; - if (h->loss_count == 1) { + if (h->loss_count == 0) { + __do_track_loss(h, skb, ndp); + } else if (h->loss_count == 1) { __one_after_loss(h, skb, ndp); } else if (h->loss_count != 2) { DCCP_BUG("invalid loss_count %d", h->loss_count); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index c7eeda49cb20..461cc91cce88 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -64,7 +64,7 @@ struct tfrc_rx_hist_entry { u64 tfrchrx_seqno:48, tfrchrx_ccval:4, tfrchrx_type:4; - u32 tfrchrx_ndp; /* In fact it is from 8 to 24 bits */ + u64 tfrchrx_ndp:48; ktime_t tfrchrx_tstamp; }; @@ -118,41 +118,21 @@ static inline struct tfrc_rx_hist_entry * return h->ring[h->loss_start]; } -/* initialise loss detection and disable RTT sampling */ -static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h) -{ - h->loss_count = 1; -} - /* indicate whether previously a packet was detected missing */ -static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) -{ - return h->loss_count; -} - -/* any data packets missing between last reception and skb ? */ -static inline int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h, - const struct sk_buff *skb, - u32 ndp) +static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) { - int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno, - DCCP_SKB_CB(skb)->dccpd_seq); - - if (delta > 1 && ndp < delta) - tfrc_rx_hist_loss_indicated(h); - - return tfrc_rx_hist_loss_pending(h); + return h->loss_count > 0; } extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, - const struct sk_buff *skb, const u32 ndp); + const struct sk_buff *skb, const u64 ndp); extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); struct tfrc_loss_hist; extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh, - struct sk_buff *skb, u32 ndp, + struct sk_buff *skb, const u64 ndp, u32 (*first_li)(struct sock *sk), struct sock *sk); extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index d1dfbb8de64c..97ecec0a8e76 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -14,14 +14,6 @@ module_param(tfrc_debug, bool, 0444); MODULE_PARM_DESC(tfrc_debug, "Enable debug messages"); #endif -extern int tfrc_tx_packet_history_init(void); -extern void tfrc_tx_packet_history_exit(void); -extern int tfrc_rx_packet_history_init(void); -extern void tfrc_rx_packet_history_exit(void); - -extern int tfrc_li_init(void); -extern void tfrc_li_exit(void); - static int __init tfrc_module_init(void) { int rc = tfrc_li_init(); diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 1fb1187bbf1c..ed9857527acf 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -15,7 +15,7 @@ * (at your option) any later version. */ #include <linux/types.h> -#include <asm/div64.h> +#include <linux/math64.h> #include "../../dccp.h" /* internal includes that this module exports: */ #include "loss_interval.h" @@ -29,21 +29,19 @@ extern int tfrc_debug; #endif /* integer-arithmetic divisions of type (a * 1000000)/b */ -static inline u64 scaled_div(u64 a, u32 b) +static inline u64 scaled_div(u64 a, u64 b) { BUG_ON(b==0); - a *= 1000000; - do_div(a, b); - return a; + return div64_u64(a * 1000000, b); } -static inline u32 scaled_div32(u64 a, u32 b) +static inline u32 scaled_div32(u64 a, u64 b) { u64 result = scaled_div(a, b); if (result > UINT_MAX) { - DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U", - (unsigned long long)a, b); + DCCP_CRIT("Overflow: %llu/%llu > UINT_MAX", + (unsigned long long)a, (unsigned long long)b); return UINT_MAX; } return result; @@ -58,7 +56,14 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval; } -extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); -extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); +extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); +extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); +extern int tfrc_tx_packet_history_init(void); +extern void tfrc_tx_packet_history_exit(void); +extern int tfrc_rx_packet_history_init(void); +extern void tfrc_rx_packet_history_exit(void); + +extern int tfrc_li_init(void); +extern void tfrc_li_exit(void); #endif /* _TFRC_H_ */ diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index e4e64b76c10c..2f20a29cffe4 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -661,7 +661,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) EXPORT_SYMBOL_GPL(tfrc_calc_x); -/* +/** * tfrc_calc_x_reverse_lookup - try to find p given f(p) * * @fvalue: function value to match, scaled by 1000000 @@ -676,11 +676,11 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) /* Error cases. */ if (fvalue < tfrc_calc_x_lookup[0][1]) { - DCCP_WARN("fvalue %d smaller than resolution\n", fvalue); - return tfrc_calc_x_lookup[0][1]; + DCCP_WARN("fvalue %u smaller than resolution\n", fvalue); + return TFRC_SMALLEST_P; } if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) { - DCCP_WARN("fvalue %d exceeds bounds!\n", fvalue); + DCCP_WARN("fvalue %u exceeds bounds!\n", fvalue); return 1000000; } diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index f44d492d3b74..1c2e3ec2eb57 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -153,6 +153,21 @@ static inline u64 max48(const u64 seq1, const u64 seq2) return after48(seq1, seq2) ? seq1 : seq2; } +/** + * dccp_loss_free - Evaluates condition for data loss from RFC 4340, 7.7.1 + * @s1: start sequence number + * @s2: end sequence number + * @ndp: NDP count on packet with sequence number @s2 + * Returns true if the sequence range s1...s2 has no data loss. + */ +static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp) +{ + s64 delta = dccp_delta_seqno(s1, s2); + + WARN_ON(delta < 0); + return (u64)delta <= ndp + 1; +} + enum { DCCP_MIB_NUM = 0, DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ @@ -211,7 +226,7 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb) extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); -extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); +extern int dccp_retransmit_skb(struct sock *sk); extern void dccp_send_ack(struct sock *sk); extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk); @@ -262,7 +277,7 @@ extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned len); extern int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized); -extern int dccp_destroy_sock(struct sock *sk); +extern void dccp_destroy_sock(struct sock *sk); extern void dccp_close(struct sock *sk, long timeout); extern struct sk_buff *dccp_make_response(struct sock *sk, diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 4a4f6ce4498d..933a0ecf8d46 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -32,7 +32,7 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, if (len > 3) { DCCP_WARN("invalid length %d\n", len); - return 1; + return -EINVAL; } /* XXX add further sanity checks */ diff --git a/net/dccp/input.c b/net/dccp/input.c index 08392ed86c25..df2f110df94a 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -413,7 +413,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, /* Stop the REQUEST timer */ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); - BUG_TRAP(sk->sk_send_head != NULL); + WARN_ON(sk->sk_send_head == NULL); __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b348dd70c685..882c5c4de69e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -196,8 +196,8 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + - (iph->ihl << 2)); + const u8 offset = iph->ihl << 2; + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; @@ -205,17 +205,19 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) struct sock *sk; __u64 seq; int err; + struct net *net = dev_net(skb->dev); - if (skb->len < (iph->ihl << 2) + 8) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + if (skb->len < offset + sizeof(*dh) || + skb->len < offset + __dccp_basic_hdr_len(dh)) { + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } - sk = inet_lookup(dev_net(skb->dev), &dccp_hashinfo, + sk = inet_lookup(net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, iph->saddr, dh->dccph_sport, inet_iif(skb)); if (sk == NULL) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } @@ -229,7 +231,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; @@ -237,8 +239,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) dp = dccp_sk(sk); seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && - !between48(seq, dp->dccps_swl, dp->dccps_swh)) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + !between48(seq, dp->dccps_awl, dp->dccps_awh)) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -282,10 +284,10 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) * ICMPs are not backlogged, hence we cannot get an established * socket here. */ - BUG_TRAP(!req->sk); + WARN_ON(req->sk); if (seq != dccp_rsk(req)->dreq_iss) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } /* @@ -408,9 +410,9 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; exit_overflow: - NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); exit: - NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); dst_release(dst); return NULL; } @@ -464,7 +466,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, security_skb_classify_flow(skb, &fl); if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } @@ -589,7 +591,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = reqsk_alloc(&dccp_request_sock_ops); + req = inet_reqsk_alloc(&dccp_request_sock_ops); if (req == NULL) goto drop; @@ -605,7 +607,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; - ireq->opt = NULL; /* * Step 3: Process LISTEN state @@ -739,8 +740,8 @@ int dccp_invalid_packet(struct sk_buff *skb) * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet * has short sequence numbers), drop packet and return */ - if (dh->dccph_type >= DCCP_PKT_DATA && - dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0) { + if ((dh->dccph_type < DCCP_PKT_DATA || + dh->dccph_type > DCCP_PKT_DATAACK) && dh->dccph_x == 0) { DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n", dccp_packet_name(dh->dccph_type)); return 1; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9b1129bb7ece..5e1ee0da2c40 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -89,12 +89,20 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; int err; __u64 seq; + struct net *net = dev_net(skb->dev); - sk = inet6_lookup(dev_net(skb->dev), &dccp_hashinfo, + if (skb->len < offset + sizeof(*dh) || + skb->len < offset + __dccp_basic_hdr_len(dh)) { + ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + return; + } + + sk = inet6_lookup(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); @@ -110,11 +118,19 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, bh_lock_sock(sk); if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; + dp = dccp_sk(sk); + seq = dccp_hdr_seq(dh); + if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && + !between48(seq, dp->dccps_awl, dp->dccps_awh)) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + goto out; + } + np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { @@ -167,7 +183,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, icmpv6_err_convert(type, code, &err); - seq = dccp_hdr_seq(dh); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; @@ -185,10 +200,10 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, * ICMPs are not backlogged, hence we cannot get an established * socket here. */ - BUG_TRAP(req->sk == NULL); + WARN_ON(req->sk != NULL); if (seq != dccp_rsk(req)->dreq_iss) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -421,7 +436,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq6 = inet6_rsk(req); ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); - ireq6->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || @@ -630,9 +644,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, return newsk; out_overflow: - NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out: - NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); if (opt != NULL && opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); @@ -1092,10 +1106,10 @@ static int dccp_v6_init_sock(struct sock *sk) return err; } -static int dccp_v6_destroy_sock(struct sock *sk) +static void dccp_v6_destroy_sock(struct sock *sk) { dccp_destroy_sock(sk); - return inet6_destroy_sock(sk); + inet6_destroy_sock(sk); } static struct timewait_sock_ops dccp6_timewait_sock_ops = { diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 33ad48321b08..66dca5bba858 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -165,12 +165,12 @@ out_free: /* See dccp_v4_conn_request */ newdmsk->dccpms_sequence_window = req->rcv_wnd; - newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr; - dccp_update_gsr(newsk, dreq->dreq_isr); - - newdp->dccps_iss = dreq->dreq_iss; + newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss; dccp_update_gss(newsk, dreq->dreq_iss); + newdp->dccps_isr = dreq->dreq_isr; + dccp_update_gsr(newsk, dreq->dreq_isr); + /* * SWL and AWL are initially adjusted so that they are not less than * the initial Sequence Numbers received and sent, respectively: diff --git a/net/dccp/options.c b/net/dccp/options.c index d2a84a2fecee..dc7c158a2f4b 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -107,9 +107,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, * * CCID-specific options are ignored during connection setup, as * negotiation may still be in progress (see RFC 4340, 10.3). + * The same applies to Ack Vectors, as these depend on the CCID. * */ - if (dreq != NULL && opt >= 128) + if (dreq != NULL && (opt >= 128 || + opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) goto ignore_option; switch (opt) { @@ -122,12 +124,12 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, mandatory = 1; break; case DCCPO_NDP_COUNT: - if (len > 3) + if (len > 6) goto out_invalid_option; opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); - dccp_pr_debug("%s rx opt: NDP count=%d\n", dccp_role(sk), - opt_recv->dccpor_ndp); + dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk), + (unsigned long long)opt_recv->dccpor_ndp); break; case DCCPO_CHANGE_L: /* fall through */ @@ -305,9 +307,11 @@ static void dccp_encode_value_var(const u32 value, unsigned char *to, *to++ = (value & 0xFF); } -static inline int dccp_ndp_len(const int ndp) +static inline u8 dccp_ndp_len(const u64 ndp) { - return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3; + if (likely(ndp <= 0xFF)) + return 1; + return likely(ndp <= USHORT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6); } int dccp_insert_option(struct sock *sk, struct sk_buff *skb, @@ -334,7 +338,7 @@ EXPORT_SYMBOL_GPL(dccp_insert_option); static int dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - int ndp = dp->dccps_ndp_count; + u64 ndp = dp->dccps_ndp_count; if (dccp_non_data_packet(skb)) ++dp->dccps_ndp_count; diff --git a/net/dccp/output.c b/net/dccp/output.c index 1f8a9b64c083..d06945c7d3df 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -53,8 +53,11 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) dccp_packet_hdr_len(dcb->dccpd_type); int err, set_ack = 1; u64 ackno = dp->dccps_gsr; - - dccp_inc_seqno(&dp->dccps_gss); + /* + * Increment GSS here already in case the option code needs it. + * Update GSS for real only if option processing below succeeds. + */ + dcb->dccpd_seq = ADD48(dp->dccps_gss, 1); switch (dcb->dccpd_type) { case DCCP_PKT_DATA: @@ -66,6 +69,9 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_REQUEST: set_ack = 0; + /* Use ISS on the first (non-retransmitted) Request. */ + if (icsk->icsk_retransmits == 0) + dcb->dccpd_seq = dp->dccps_iss; /* fall through */ case DCCP_PKT_SYNC: @@ -84,8 +90,6 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) break; } - dcb->dccpd_seq = dp->dccps_gss; - if (dccp_insert_options(sk, skb)) { kfree_skb(skb); return -EPROTO; @@ -103,7 +107,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) /* XXX For now we're using only 48 bits sequence numbers */ dh->dccph_x = 1; - dp->dccps_awh = dp->dccps_gss; + dccp_update_gss(sk, dcb->dccpd_seq); dccp_hdr_set_seq(dh, dp->dccps_gss); if (set_ack) dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); @@ -112,6 +116,11 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_REQUEST: dccp_hdr_request(skb)->dccph_req_service = dp->dccps_service; + /* + * Limit Ack window to ISS <= P.ackno <= GSS, so that + * only Responses to Requests we sent are considered. + */ + dp->dccps_awl = dp->dccps_iss; break; case DCCP_PKT_RESET: dccp_hdr_reset(skb)->dccph_reset_code = @@ -284,14 +293,26 @@ void dccp_write_xmit(struct sock *sk, int block) } } -int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +/** + * dccp_retransmit_skb - Retransmit Request, Close, or CloseReq packets + * There are only four retransmittable packet types in DCCP: + * - Request in client-REQUEST state (sec. 8.1.1), + * - CloseReq in server-CLOSEREQ state (sec. 8.3), + * - Close in node-CLOSING state (sec. 8.3), + * - Acks in client-PARTOPEN state (sec. 8.1.5, handled by dccp_delack_timer()). + * This function expects sk->sk_send_head to contain the original skb. + */ +int dccp_retransmit_skb(struct sock *sk) { + WARN_ON(sk->sk_send_head == NULL); + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0) return -EHOSTUNREACH; /* Routing failure or similar. */ - return dccp_transmit_skb(sk, (skb_cloned(skb) ? - pskb_copy(skb, GFP_ATOMIC): - skb_clone(skb, GFP_ATOMIC))); + /* this count is used to distinguish original and retransmitted skb */ + inet_csk(sk)->icsk_retransmits++; + + return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC)); } struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, @@ -437,19 +458,7 @@ static inline void dccp_connect_init(struct sock *sk) dccp_sync_mss(sk, dst_mtu(dst)); - /* - * SWL and AWL are initially adjusted so that they are not less than - * the initial Sequence Numbers received and sent, respectively: - * SWL := max(GSR + 1 - floor(W/4), ISR), - * AWL := max(GSS - W' + 1, ISS). - * These adjustments MUST be applied only at the beginning of the - * connection. - */ - dccp_update_gss(sk, dp->dccps_iss); - dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); - - /* S.GAR - greatest valid acknowledgement number received on a non-Sync; - * initialized to S.ISS (sec. 8.5) */ + /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ dp->dccps_gar = dp->dccps_iss; icsk->icsk_retransmits = 0; @@ -508,6 +517,7 @@ void dccp_send_ack(struct sock *sk) EXPORT_SYMBOL_GPL(dccp_send_ack); +#if 0 /* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */ void dccp_send_delayed_ack(struct sock *sk) { @@ -538,6 +548,7 @@ void dccp_send_delayed_ack(struct sock *sk) icsk->icsk_ack.timeout = timeout; sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } +#endif void dccp_send_sync(struct sock *sk, const u64 ackno, const enum dccp_pkt_type pkt_type) diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 0bcdc9250279..81368a7f5379 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -42,7 +42,7 @@ static int bufsize = 64 * 1024; static const char procname[] = "dccpprobe"; -struct { +static struct { struct kfifo *fifo; spinlock_t lock; wait_queue_head_t wait; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 9dfe2470962c..b622d9744856 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -237,7 +237,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) EXPORT_SYMBOL_GPL(dccp_init_sock); -int dccp_destroy_sock(struct sock *sk) +void dccp_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_minisock *dmsk = dccp_msk(sk); @@ -268,8 +268,6 @@ int dccp_destroy_sock(struct sock *sk) /* clean up feature negotiation state */ dccp_feat_clean(dmsk); - - return 0; } EXPORT_SYMBOL_GPL(dccp_destroy_sock); @@ -329,7 +327,7 @@ int dccp_disconnect(struct sock *sk, int flags) inet_csk_delack_init(sk); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || icsk->icsk_bind_hash); + WARN_ON(inet->num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; @@ -983,7 +981,7 @@ adjudge_to_death: */ local_bh_disable(); bh_lock_sock(sk); - BUG_TRAP(!sock_owned_by_user(sk)); + WARN_ON(sock_owned_by_user(sk)); /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 8703a792b560..54b3c7e9e016 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -99,21 +99,11 @@ static void dccp_retransmit_timer(struct sock *sk) } /* - * sk->sk_send_head has to have one skb with - * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP - * packet types. The only packets eligible for retransmission are: - * -- Requests in client-REQUEST state (sec. 8.1.1) - * -- Acks in client-PARTOPEN state (sec. 8.1.5) - * -- CloseReq in server-CLOSEREQ state (sec. 8.3) - * -- Close in node-CLOSING state (sec. 8.3) */ - BUG_TRAP(sk->sk_send_head != NULL); - - /* * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was * sent, no need to retransmit, this sock is dead. */ if (dccp_write_timeout(sk)) - goto out; + return; /* * We want to know the number of packets retransmitted, not the @@ -122,30 +112,28 @@ static void dccp_retransmit_timer(struct sock *sk) if (icsk->icsk_retransmits == 0) DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); - if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) { + if (dccp_retransmit_skb(sk) != 0) { /* * Retransmission failed because of local congestion, * do not backoff. */ - if (icsk->icsk_retransmits == 0) + if (--icsk->icsk_retransmits == 0) icsk->icsk_retransmits = 1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), DCCP_RTO_MAX); - goto out; + return; } backoff: icsk->icsk_backoff++; - icsk->icsk_retransmits++; icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, DCCP_RTO_MAX); if (icsk->icsk_retransmits > sysctl_dccp_retries1) __sk_dst_reset(sk); -out:; } static void dccp_write_timer(unsigned long data) @@ -224,7 +212,7 @@ static void dccp_delack_timer(unsigned long data) if (sock_owned_by_user(sk)) { /* Try again later. */ icsk->icsk_ack.blocked = 1; - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); goto out; @@ -254,7 +242,7 @@ static void dccp_delack_timer(unsigned long data) icsk->icsk_ack.ato = TCP_ATO_MIN; } dccp_send_ack(sk); - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); } out: bh_unlock_sock(sk); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index fc2efe899e91..3c23ab33dbc0 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -451,7 +451,7 @@ static void dn_destruct(struct sock *sk) static int dn_memory_pressure; -static void dn_enter_memory_pressure(void) +static void dn_enter_memory_pressure(struct sock *sk) { if (!dn_memory_pressure) { dn_memory_pressure = 1; @@ -1719,6 +1719,8 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, * See if there is data ready to read, sleep if there isn't */ for(;;) { + DEFINE_WAIT(wait); + if (sk->sk_err) goto out; @@ -1748,14 +1750,11 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; } - set_bit(SOCK_ASYNC_WAITDATA, &sock->flags); - SOCK_SLEEP_PRE(sk) - - if (!dn_data_ready(sk, queue, flags, target)) - schedule(); - - SOCK_SLEEP_POST(sk) - clear_bit(SOCK_ASYNC_WAITDATA, &sock->flags); + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target)); + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + finish_wait(sk->sk_sleep, &wait); } for(skb = queue->next; skb != (struct sk_buff *)queue; skb = nskb) { @@ -2002,18 +2001,19 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, * size. */ if (dn_queue_too_long(scp, queue, flags)) { + DEFINE_WAIT(wait); + if (flags & MSG_DONTWAIT) { err = -EWOULDBLOCK; goto out; } - SOCK_SLEEP_PRE(sk) - - if (dn_queue_too_long(scp, queue, flags)) - schedule(); - - SOCK_SLEEP_POST(sk) - + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_wait_event(sk, &timeo, + !dn_queue_too_long(scp, queue, flags)); + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + finish_wait(sk->sk_sleep, &wait); continue; } @@ -2089,7 +2089,7 @@ static int dn_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; switch(event) { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 2f665a516476..821bd1cdec04 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -235,14 +235,14 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) else min_mtu -= 21; - if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= min_mtu) { + if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) { if (!(dst_metric_locked(dst, RTAX_MTU))) { dst->metrics[RTAX_MTU-1] = mtu; dst_set_expires(dst, dn_rt_mtu_expires); } if (!(dst_metric_locked(dst, RTAX_ADVMSS))) { u32 mss = mtu - DN_MAX_NSP_DATA_HEADER; - if (dst->metrics[RTAX_ADVMSS-1] > mss) + if (dst_metric(dst, RTAX_ADVMSS) > mss) dst->metrics[RTAX_ADVMSS-1] = mss; } } @@ -580,7 +580,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; unsigned char padlen = 0; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto dump_it; if (dn == NULL) @@ -805,12 +805,12 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) rt->u.dst.neighbour = n; } - if (rt->u.dst.metrics[RTAX_MTU-1] == 0 || - rt->u.dst.metrics[RTAX_MTU-1] > rt->u.dst.dev->mtu) + if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 || + dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu) rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst)); - if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0 || - rt->u.dst.metrics[RTAX_ADVMSS-1] > mss) + if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 || + dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss) rt->u.dst.metrics[RTAX_ADVMSS-1] = mss; return 0; } diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 5b7539b7fe0c..14fbca55e908 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -229,7 +229,7 @@ static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops) return 0; } -static void dn_fib_rule_flush_cache(void) +static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) { dn_rt_cache_flush(-1); } diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 68d154480043..8789d2bb1b06 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -340,7 +340,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, dev_hold(dev); - skb = sock_alloc_send_skb(sk, len+LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev), msg->msg_flags & MSG_DONTWAIT, &err); if (skb==NULL) goto out_unlock; @@ -573,9 +573,7 @@ static int econet_release(struct socket *sock) sk->sk_state_change(sk); /* It is useless. Just for sanity. */ - sock->sk = NULL; - sk->sk_socket = NULL; - sock_set_flag(sk, SOCK_DEAD); + sock_orphan(sk); /* Purge queues */ @@ -1064,7 +1062,7 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet struct sock *sk; struct ec_device *edev = dev->ec_ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto drop; if (skb->pkt_type == PACKET_OTHERHOST) @@ -1121,7 +1119,7 @@ static int econet_notifier(struct notifier_block *this, unsigned long msg, void struct net_device *dev = (struct net_device *)data; struct ec_device *edev; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; switch (msg) { diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 200ee1e63728..69dbc342a464 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -391,7 +391,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, wstats.updated = 0; if (rx_stats->mask & IEEE80211_STATMASK_RSSI) { - wstats.level = rx_stats->rssi; + wstats.level = rx_stats->signal; wstats.updated |= IW_QUAL_LEVEL_UPDATED; } else wstats.updated |= IW_QUAL_LEVEL_INVALID; diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index d8b02603cbe5..d996547f7a62 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -542,90 +542,4 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) return 1; } -/* Incoming 802.11 strucure is converted to a TXB - * a block of 802.11 fragment packets (stored as skbs) */ -int ieee80211_tx_frame(struct ieee80211_device *ieee, - struct ieee80211_hdr *frame, int hdr_len, int total_len, - int encrypt_mpdu) -{ - struct ieee80211_txb *txb = NULL; - unsigned long flags; - struct net_device_stats *stats = &ieee->stats; - struct sk_buff *skb_frag; - int priority = -1; - int fraglen = total_len; - int headroom = ieee->tx_headroom; - struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx]; - - spin_lock_irqsave(&ieee->lock, flags); - - if (encrypt_mpdu && (!ieee->sec.encrypt || !crypt)) - encrypt_mpdu = 0; - - /* If there is no driver handler to take the TXB, dont' bother - * creating it... */ - if (!ieee->hard_start_xmit) { - printk(KERN_WARNING "%s: No xmit handler.\n", ieee->dev->name); - goto success; - } - - if (unlikely(total_len < 24)) { - printk(KERN_WARNING "%s: skb too small (%d).\n", - ieee->dev->name, total_len); - goto success; - } - - if (encrypt_mpdu) { - frame->frame_ctl |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - fraglen += crypt->ops->extra_mpdu_prefix_len + - crypt->ops->extra_mpdu_postfix_len; - headroom += crypt->ops->extra_mpdu_prefix_len; - } - - /* When we allocate the TXB we allocate enough space for the reserve - * and full fragment bytes (bytes_per_frag doesn't include prefix, - * postfix, header, FCS, etc.) */ - txb = ieee80211_alloc_txb(1, fraglen, headroom, GFP_ATOMIC); - if (unlikely(!txb)) { - printk(KERN_WARNING "%s: Could not allocate TXB\n", - ieee->dev->name); - goto failed; - } - txb->encrypted = 0; - txb->payload_size = fraglen; - - skb_frag = txb->fragments[0]; - - memcpy(skb_put(skb_frag, total_len), frame, total_len); - - if (ieee->config & - (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) - skb_put(skb_frag, 4); - - /* To avoid overcomplicating things, we do the corner-case frame - * encryption in software. The only real situation where encryption is - * needed here is during software-based shared key authentication. */ - if (encrypt_mpdu) - ieee80211_encrypt_fragment(ieee, skb_frag, hdr_len); - - success: - spin_unlock_irqrestore(&ieee->lock, flags); - - if (txb) { - if ((*ieee->hard_start_xmit) (txb, ieee->dev, priority) == 0) { - stats->tx_packets++; - stats->tx_bytes += txb->payload_size; - return 0; - } - ieee80211_txb_free(txb); - } - return 0; - - failed: - spin_unlock_irqrestore(&ieee->lock, flags); - stats->tx_errors++; - return 1; -} - -EXPORT_SYMBOL(ieee80211_tx_frame); EXPORT_SYMBOL(ieee80211_txb_free); diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 623489afa62c..973832dd7faf 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -43,8 +43,9 @@ static const char *ieee80211_modes[] = { #define MAX_CUSTOM_LEN 64 static char *ieee80211_translate_scan(struct ieee80211_device *ieee, - char *start, char *stop, - struct ieee80211_network *network) + char *start, char *stop, + struct ieee80211_network *network, + struct iw_request_info *info) { char custom[MAX_CUSTOM_LEN]; char *p; @@ -57,7 +58,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, iwe.cmd = SIOCGIWAP; iwe.u.ap_addr.sa_family = ARPHRD_ETHER; memcpy(iwe.u.ap_addr.sa_data, network->bssid, ETH_ALEN); - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_ADDR_LEN); + start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_ADDR_LEN); /* Remaining entries will be displayed in the order we provide them */ @@ -66,17 +67,19 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, iwe.u.data.flags = 1; if (network->flags & NETWORK_EMPTY_ESSID) { iwe.u.data.length = sizeof("<hidden>"); - start = iwe_stream_add_point(start, stop, &iwe, "<hidden>"); + start = iwe_stream_add_point(info, start, stop, + &iwe, "<hidden>"); } else { iwe.u.data.length = min(network->ssid_len, (u8) 32); - start = iwe_stream_add_point(start, stop, &iwe, network->ssid); + start = iwe_stream_add_point(info, start, stop, + &iwe, network->ssid); } /* Add the protocol name */ iwe.cmd = SIOCGIWNAME; snprintf(iwe.u.name, IFNAMSIZ, "IEEE 802.11%s", ieee80211_modes[network->mode]); - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_CHAR_LEN); + start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_CHAR_LEN); /* Add mode */ iwe.cmd = SIOCGIWMODE; @@ -86,7 +89,8 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, else iwe.u.mode = IW_MODE_ADHOC; - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_UINT_LEN); + start = iwe_stream_add_event(info, start, stop, + &iwe, IW_EV_UINT_LEN); } /* Add channel and frequency */ @@ -95,7 +99,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, iwe.u.freq.m = ieee80211_channel_to_freq(ieee, network->channel); iwe.u.freq.e = 6; iwe.u.freq.i = 0; - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_FREQ_LEN); + start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_FREQ_LEN); /* Add encryption capability */ iwe.cmd = SIOCGIWENCODE; @@ -104,12 +108,13 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, else iwe.u.data.flags = IW_ENCODE_DISABLED; iwe.u.data.length = 0; - start = iwe_stream_add_point(start, stop, &iwe, network->ssid); + start = iwe_stream_add_point(info, start, stop, + &iwe, network->ssid); /* Add basic and extended rates */ /* Rate : stuffing multiple values in a single event require a bit * more of magic - Jean II */ - current_val = start + IW_EV_LCP_LEN; + current_val = start + iwe_stream_lcp_len(info); iwe.cmd = SIOCGIWRATE; /* Those two flags are ignored... */ iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; @@ -124,17 +129,19 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, /* Bit rate given in 500 kb/s units (+ 0x80) */ iwe.u.bitrate.value = ((rate & 0x7f) * 500000); /* Add new value to event */ - current_val = iwe_stream_add_value(start, current_val, stop, &iwe, IW_EV_PARAM_LEN); + current_val = iwe_stream_add_value(info, start, current_val, + stop, &iwe, IW_EV_PARAM_LEN); } for (; j < network->rates_ex_len; j++) { rate = network->rates_ex[j] & 0x7F; /* Bit rate given in 500 kb/s units (+ 0x80) */ iwe.u.bitrate.value = ((rate & 0x7f) * 500000); /* Add new value to event */ - current_val = iwe_stream_add_value(start, current_val, stop, &iwe, IW_EV_PARAM_LEN); + current_val = iwe_stream_add_value(info, start, current_val, + stop, &iwe, IW_EV_PARAM_LEN); } /* Check if we added any rate */ - if((current_val - start) > IW_EV_LCP_LEN) + if ((current_val - start) > iwe_stream_lcp_len(info)) start = current_val; /* Add quality statistics */ @@ -181,14 +188,14 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, iwe.u.qual.level = network->stats.signal; } - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_QUAL_LEN); + start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_QUAL_LEN); iwe.cmd = IWEVCUSTOM; p = custom; iwe.u.data.length = p - custom; if (iwe.u.data.length) - start = iwe_stream_add_point(start, stop, &iwe, custom); + start = iwe_stream_add_point(info, start, stop, &iwe, custom); memset(&iwe, 0, sizeof(iwe)); if (network->wpa_ie_len) { @@ -196,7 +203,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, memcpy(buf, network->wpa_ie, network->wpa_ie_len); iwe.cmd = IWEVGENIE; iwe.u.data.length = network->wpa_ie_len; - start = iwe_stream_add_point(start, stop, &iwe, buf); + start = iwe_stream_add_point(info, start, stop, &iwe, buf); } memset(&iwe, 0, sizeof(iwe)); @@ -205,7 +212,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, memcpy(buf, network->rsn_ie, network->rsn_ie_len); iwe.cmd = IWEVGENIE; iwe.u.data.length = network->rsn_ie_len; - start = iwe_stream_add_point(start, stop, &iwe, buf); + start = iwe_stream_add_point(info, start, stop, &iwe, buf); } /* Add EXTRA: Age to display seconds since last beacon/probe response @@ -217,7 +224,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, jiffies_to_msecs(jiffies - network->last_scanned)); iwe.u.data.length = p - custom; if (iwe.u.data.length) - start = iwe_stream_add_point(start, stop, &iwe, custom); + start = iwe_stream_add_point(info, start, stop, &iwe, custom); /* Add spectrum management information */ iwe.cmd = -1; @@ -238,7 +245,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, if (iwe.cmd == IWEVCUSTOM) { iwe.u.data.length = p - custom; - start = iwe_stream_add_point(start, stop, &iwe, custom); + start = iwe_stream_add_point(info, start, stop, &iwe, custom); } return start; @@ -272,7 +279,8 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee, if (ieee->scan_age == 0 || time_after(network->last_scanned + ieee->scan_age, jiffies)) - ev = ieee80211_translate_scan(ieee, ev, stop, network); + ev = ieee80211_translate_scan(ieee, ev, stop, network, + info); else IEEE80211_DEBUG_SCAN("Not showing network '%s (" "%s)' due to age (%dms).\n", @@ -744,98 +752,9 @@ int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee, return 0; } -int ieee80211_wx_set_auth(struct net_device *dev, - struct iw_request_info *info, - union iwreq_data *wrqu, - char *extra) -{ - struct ieee80211_device *ieee = netdev_priv(dev); - unsigned long flags; - int err = 0; - - spin_lock_irqsave(&ieee->lock, flags); - - switch (wrqu->param.flags & IW_AUTH_INDEX) { - case IW_AUTH_WPA_VERSION: - case IW_AUTH_CIPHER_PAIRWISE: - case IW_AUTH_CIPHER_GROUP: - case IW_AUTH_KEY_MGMT: - /* - * Host AP driver does not use these parameters and allows - * wpa_supplicant to control them internally. - */ - break; - case IW_AUTH_TKIP_COUNTERMEASURES: - break; /* FIXME */ - case IW_AUTH_DROP_UNENCRYPTED: - ieee->drop_unencrypted = !!wrqu->param.value; - break; - case IW_AUTH_80211_AUTH_ALG: - break; /* FIXME */ - case IW_AUTH_WPA_ENABLED: - ieee->privacy_invoked = ieee->wpa_enabled = !!wrqu->param.value; - break; - case IW_AUTH_RX_UNENCRYPTED_EAPOL: - ieee->ieee802_1x = !!wrqu->param.value; - break; - case IW_AUTH_PRIVACY_INVOKED: - ieee->privacy_invoked = !!wrqu->param.value; - break; - default: - err = -EOPNOTSUPP; - break; - } - spin_unlock_irqrestore(&ieee->lock, flags); - return err; -} - -int ieee80211_wx_get_auth(struct net_device *dev, - struct iw_request_info *info, - union iwreq_data *wrqu, - char *extra) -{ - struct ieee80211_device *ieee = netdev_priv(dev); - unsigned long flags; - int err = 0; - - spin_lock_irqsave(&ieee->lock, flags); - - switch (wrqu->param.flags & IW_AUTH_INDEX) { - case IW_AUTH_WPA_VERSION: - case IW_AUTH_CIPHER_PAIRWISE: - case IW_AUTH_CIPHER_GROUP: - case IW_AUTH_KEY_MGMT: - case IW_AUTH_TKIP_COUNTERMEASURES: /* FIXME */ - case IW_AUTH_80211_AUTH_ALG: /* FIXME */ - /* - * Host AP driver does not use these parameters and allows - * wpa_supplicant to control them internally. - */ - err = -EOPNOTSUPP; - break; - case IW_AUTH_DROP_UNENCRYPTED: - wrqu->param.value = ieee->drop_unencrypted; - break; - case IW_AUTH_WPA_ENABLED: - wrqu->param.value = ieee->wpa_enabled; - break; - case IW_AUTH_RX_UNENCRYPTED_EAPOL: - wrqu->param.value = ieee->ieee802_1x; - break; - default: - err = -EOPNOTSUPP; - break; - } - spin_unlock_irqrestore(&ieee->lock, flags); - return err; -} - EXPORT_SYMBOL(ieee80211_wx_set_encodeext); EXPORT_SYMBOL(ieee80211_wx_get_encodeext); EXPORT_SYMBOL(ieee80211_wx_get_scan); EXPORT_SYMBOL(ieee80211_wx_set_encode); EXPORT_SYMBOL(ieee80211_wx_get_encode); - -EXPORT_SYMBOL_GPL(ieee80211_wx_set_auth); -EXPORT_SYMBOL_GPL(ieee80211_wx_get_auth); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 4670683b4688..591ea23639ca 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -356,10 +356,8 @@ config INET_ESP config INET_IPCOMP tristate "IP: IPComp transformation" - select XFRM select INET_XFRM_TUNNEL - select CRYPTO - select CRYPTO_DEFLATE + select XFRM_IPCOMP ---help--- Support for IP Payload Compression Protocol (IPComp) (RFC3173), typically needed for IPsec. diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 24eca23c2db3..8a3ac1fa71a9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -5,8 +5,6 @@ * * PF_INET protocol family socket handler. * - * Version: $Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> @@ -112,12 +110,11 @@ #include <net/ipip.h> #include <net/inet_common.h> #include <net/xfrm.h> +#include <net/net_namespace.h> #ifdef CONFIG_IP_MROUTE #include <linux/mroute.h> #endif -DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly; - extern void ip_mc_drop_socket(struct sock *sk); /* The inetsw table contains everything that inet_create needs to @@ -151,10 +148,10 @@ void inet_sock_destruct(struct sock *sk) return; } - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(!sk->sk_wmem_queued); - BUG_TRAP(!sk->sk_forward_alloc); + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(sk->sk_wmem_queued); + WARN_ON(sk->sk_forward_alloc); kfree(inet->opt); dst_release(sk->sk_dst_cache); @@ -267,7 +264,6 @@ static inline int inet_netns_ok(struct net *net, int protocol) static int inet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; - struct list_head *p; struct inet_protosw *answer; struct inet_sock *inet; struct proto *answer_prot; @@ -284,13 +280,12 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ - answer = NULL; lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); - list_for_each_rcu(p, &inetsw[sock->type]) { - answer = list_entry(p, struct inet_protosw, list); + list_for_each_entry_rcu(answer, &inetsw[sock->type], list) { + err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) @@ -305,10 +300,9 @@ lookup_protocol: break; } err = -EPROTONOSUPPORT; - answer = NULL; } - if (unlikely(answer == NULL)) { + if (unlikely(err)) { if (try_loading_module < 2) { rcu_read_unlock(); /* @@ -344,7 +338,7 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - BUG_TRAP(answer_prot->slab != NULL); + WARN_ON(answer_prot->slab == NULL); err = -ENOBUFS; sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); @@ -664,8 +658,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) lock_sock(sk2); - BUG_TRAP((1 << sk2->sk_state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)); + WARN_ON(!((1 << sk2->sk_state) & + (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); sock_graft(sk2, newsock); @@ -1341,50 +1335,70 @@ static struct net_protocol icmp_protocol = { .netns_ok = 1, }; -static int __init init_ipv4_mibs(void) +static __net_init int ipv4_mib_init_net(struct net *net) { - if (snmp_mib_init((void **)net_statistics, - sizeof(struct linux_mib)) < 0) - goto err_net_mib; - if (snmp_mib_init((void **)ip_statistics, - sizeof(struct ipstats_mib)) < 0) - goto err_ip_mib; - if (snmp_mib_init((void **)icmp_statistics, - sizeof(struct icmp_mib)) < 0) - goto err_icmp_mib; - if (snmp_mib_init((void **)icmpmsg_statistics, - sizeof(struct icmpmsg_mib)) < 0) - goto err_icmpmsg_mib; - if (snmp_mib_init((void **)tcp_statistics, + if (snmp_mib_init((void **)net->mib.tcp_statistics, sizeof(struct tcp_mib)) < 0) goto err_tcp_mib; - if (snmp_mib_init((void **)udp_statistics, + if (snmp_mib_init((void **)net->mib.ip_statistics, + sizeof(struct ipstats_mib)) < 0) + goto err_ip_mib; + if (snmp_mib_init((void **)net->mib.net_statistics, + sizeof(struct linux_mib)) < 0) + goto err_net_mib; + if (snmp_mib_init((void **)net->mib.udp_statistics, sizeof(struct udp_mib)) < 0) goto err_udp_mib; - if (snmp_mib_init((void **)udplite_statistics, + if (snmp_mib_init((void **)net->mib.udplite_statistics, sizeof(struct udp_mib)) < 0) goto err_udplite_mib; + if (snmp_mib_init((void **)net->mib.icmp_statistics, + sizeof(struct icmp_mib)) < 0) + goto err_icmp_mib; + if (snmp_mib_init((void **)net->mib.icmpmsg_statistics, + sizeof(struct icmpmsg_mib)) < 0) + goto err_icmpmsg_mib; - tcp_mib_init(); - + tcp_mib_init(net); return 0; -err_udplite_mib: - snmp_mib_free((void **)udp_statistics); -err_udp_mib: - snmp_mib_free((void **)tcp_statistics); -err_tcp_mib: - snmp_mib_free((void **)icmpmsg_statistics); err_icmpmsg_mib: - snmp_mib_free((void **)icmp_statistics); + snmp_mib_free((void **)net->mib.icmp_statistics); err_icmp_mib: - snmp_mib_free((void **)ip_statistics); -err_ip_mib: - snmp_mib_free((void **)net_statistics); + snmp_mib_free((void **)net->mib.udplite_statistics); +err_udplite_mib: + snmp_mib_free((void **)net->mib.udp_statistics); +err_udp_mib: + snmp_mib_free((void **)net->mib.net_statistics); err_net_mib: + snmp_mib_free((void **)net->mib.ip_statistics); +err_ip_mib: + snmp_mib_free((void **)net->mib.tcp_statistics); +err_tcp_mib: return -ENOMEM; } +static __net_exit void ipv4_mib_exit_net(struct net *net) +{ + snmp_mib_free((void **)net->mib.icmpmsg_statistics); + snmp_mib_free((void **)net->mib.icmp_statistics); + snmp_mib_free((void **)net->mib.udplite_statistics); + snmp_mib_free((void **)net->mib.udp_statistics); + snmp_mib_free((void **)net->mib.net_statistics); + snmp_mib_free((void **)net->mib.ip_statistics); + snmp_mib_free((void **)net->mib.tcp_statistics); +} + +static __net_initdata struct pernet_operations ipv4_mib_ops = { + .init = ipv4_mib_init_net, + .exit = ipv4_mib_exit_net, +}; + +static int __init init_ipv4_mibs(void) +{ + return register_pernet_subsys(&ipv4_mib_ops); +} + static int ipv4_proc_init(void); /* @@ -1425,6 +1439,10 @@ static int __init inet_init(void) (void)sock_register(&inet_family_ops); +#ifdef CONFIG_SYSCTL + ip_static_sysctl_init(); +#endif + /* * Add all the base protocols. */ @@ -1481,14 +1499,15 @@ static int __init inet_init(void) * Initialise the multicast router */ #if defined(CONFIG_IP_MROUTE) - ip_mr_init(); + if (ip_mr_init()) + printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n"); #endif /* * Initialise per-cpu ipv4 mibs */ if (init_ipv4_mibs()) - printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ; + printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ipv4_proc_init(); @@ -1560,5 +1579,4 @@ EXPORT_SYMBOL(inet_sock_destruct); EXPORT_SYMBOL(inet_stream_connect); EXPORT_SYMBOL(inet_stream_ops); EXPORT_SYMBOL(inet_unregister_protosw); -EXPORT_SYMBOL(net_statistics); EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 68b72a7a1806..b043eda60b04 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1,7 +1,5 @@ /* linux/net/ipv4/arp.c * - * Version: $Id: arp.c,v 1.99 2001/08/30 22:55:42 davem Exp $ - * * Copyright (C) 1994 by Florian La Roche * * This module implements the Address Resolution Protocol ARP (RFC 826), @@ -423,11 +421,12 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) struct rtable *rt; int flag = 0; /*unsigned long now; */ + struct net *net = dev_net(dev); - if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0) + if (ip_route_output_key(net, &rt, &fl) < 0) return 1; if (rt->u.dst.dev != dev) { - NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); + NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); flag = 1; } ip_rt_put(rt); @@ -570,7 +569,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, * Allocate a buffer */ - skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) return NULL; @@ -1199,7 +1198,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); - rt_cache_flush(0); + rt_cache_flush(dev_net(dev), 0); break; default: break; @@ -1288,7 +1287,6 @@ static void arp_format_neigh_entry(struct seq_file *seq, struct neighbour *n) { char hbuffer[HBUFFERLEN]; - const char hexbuf[] = "0123456789ABCDEF"; int k, j; char tbuf[16]; struct net_device *dev = n->dev; @@ -1302,8 +1300,8 @@ static void arp_format_neigh_entry(struct seq_file *seq, else { #endif for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) { - hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15]; - hbuffer[k++] = hexbuf[n->ha[j] & 15]; + hbuffer[k++] = hex_asc_hi(n->ha[j]); + hbuffer[k++] = hex_asc_lo(n->ha[j]); hbuffer[k++] = ':'; } hbuffer[--k] = 0; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 05afb576d935..2c0e4572cc90 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -338,7 +338,7 @@ static int cipso_v4_cache_check(const unsigned char *key, return -ENOENT; hash = cipso_v4_map_cache_hash(key, key_len); - bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); + bkt = hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { if (entry->hash == hash && @@ -417,7 +417,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb, atomic_inc(&secattr->cache->refcount); entry->lsm_data = secattr->cache; - bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); + bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { list_add(&entry->list, &cipso_v4_cache[bkt].list); diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 0c0c73f368ce..5e6c5a0f3fde 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -52,7 +52,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->sport, usin->sin_port, sk, 1); if (err) { if (err == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6848e4760f34..91d3d96805d0 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1,8 +1,6 @@ /* * NET3 IP device support routines. * - * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -90,7 +88,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U32 }, [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, - [IFA_ANYCAST] = { .type = NLA_U32 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, }; @@ -141,8 +138,8 @@ void in_dev_finish_destroy(struct in_device *idev) { struct net_device *dev = idev->dev; - BUG_TRAP(!idev->ifa_list); - BUG_TRAP(!idev->mc_list); + WARN_ON(idev->ifa_list); + WARN_ON(idev->mc_list); #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", idev, dev ? dev->name : "NIL"); @@ -171,6 +168,8 @@ static struct in_device *inetdev_init(struct net_device *dev) in_dev->dev = dev; if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) goto out_kfree; + if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) + dev_disable_lro(dev); /* Reference in_dev->dev */ dev_hold(dev); /* Account for reference dev->ip_ptr (below) */ @@ -400,7 +399,7 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) } ipv4_devconf_setall(in_dev); if (ifa->ifa_dev != in_dev) { - BUG_TRAP(!ifa->ifa_dev); + WARN_ON(ifa->ifa_dev); in_dev_hold(in_dev); ifa->ifa_dev = in_dev; } @@ -536,9 +535,6 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) if (tb[IFA_BROADCAST]) ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); - if (tb[IFA_ANYCAST]) - ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]); - if (tb[IFA_LABEL]) nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); else @@ -745,7 +741,6 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_broadcast = 0; - ifa->ifa_anycast = 0; ifa->ifa_scope = 0; } @@ -1018,7 +1013,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) memcpy(old, ifa->ifa_label, IFNAMSIZ); memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); if (named++ == 0) - continue; + goto skip; dot = strchr(old, ':'); if (dot == NULL) { sprintf(old, ":%d", named); @@ -1029,6 +1024,8 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) } else { strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); } +skip: + rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); } } @@ -1113,7 +1110,6 @@ static inline size_t inet_nlmsg_size(void) + nla_total_size(4) /* IFA_ADDRESS */ + nla_total_size(4) /* IFA_LOCAL */ + nla_total_size(4) /* IFA_BROADCAST */ - + nla_total_size(4) /* IFA_ANYCAST */ + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ } @@ -1143,9 +1139,6 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, if (ifa->ifa_broadcast) NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); - if (ifa->ifa_anycast) - NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast); - if (ifa->ifa_label[0]) NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); @@ -1250,6 +1243,8 @@ static void inet_forward_change(struct net *net) read_lock(&dev_base_lock); for_each_netdev(net, dev) { struct in_device *in_dev; + if (on) + dev_disable_lro(dev); rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) @@ -1257,8 +1252,6 @@ static void inet_forward_change(struct net *net) rcu_read_unlock(); } read_unlock(&dev_base_lock); - - rt_cache_flush(0); } static int devinet_conf_proc(ctl_table *ctl, int write, @@ -1344,10 +1337,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, if (write && *valp != val) { struct net *net = ctl->extra2; - if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) - inet_forward_change(net); - else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) - rt_cache_flush(0); + if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { + rtnl_lock(); + if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { + inet_forward_change(net); + } else if (*valp) { + struct ipv4_devconf *cnf = ctl->extra1; + struct in_device *idev = + container_of(cnf, struct in_device, cnf); + dev_disable_lro(idev->dev); + } + rtnl_unlock(); + rt_cache_flush(net, 0); + } } return ret; @@ -1360,9 +1362,10 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, int *valp = ctl->data; int val = *valp; int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + struct net *net = ctl->extra2; if (write && *valp != val) - rt_cache_flush(0); + rt_cache_flush(net, 0); return ret; } @@ -1373,9 +1376,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, { int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, newval, newlen); + struct net *net = table->extra2; if (ret == 1) - rt_cache_flush(0); + rt_cache_flush(net, 0); return ret; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 0f1557a4ac7a..65c1503f8cc8 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -5,8 +5,6 @@ * * IPv4 Forwarding Information Base: FIB frontend. * - * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or @@ -146,7 +144,7 @@ static void fib_flush(struct net *net) } if (flushed) - rt_cache_flush(-1); + rt_cache_flush(net, -1); } /* @@ -506,7 +504,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { [RTA_PREFSRC] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, - [RTA_PROTOINFO] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 }, }; @@ -900,21 +897,22 @@ static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down_dev(dev, force)) fib_flush(dev_net(dev)); - rt_cache_flush(0); + rt_cache_flush(dev_net(dev), 0); arp_ifdown(dev); } static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; + struct net_device *dev = ifa->ifa_dev->dev; switch (event) { case NETDEV_UP: fib_add_ifaddr(ifa); #ifdef CONFIG_IP_ROUTE_MULTIPATH - fib_sync_up(ifa->ifa_dev->dev); + fib_sync_up(dev); #endif - rt_cache_flush(-1); + rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_del_ifaddr(ifa); @@ -922,9 +920,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. Disable IP. */ - fib_disable_ip(ifa->ifa_dev->dev, 1); + fib_disable_ip(dev, 1); } else { - rt_cache_flush(-1); + rt_cache_flush(dev_net(dev), -1); } break; } @@ -952,14 +950,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); #endif - rt_cache_flush(-1); + rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_disable_ip(dev, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - rt_cache_flush(0); + rt_cache_flush(dev_net(dev), 0); break; } return NOTIFY_DONE; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 2e2fc3376ac9..c8cac6c7f881 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -5,8 +5,6 @@ * * IPv4 FIB: lookup engine and maintenance routines. * - * Version: $Id: fib_hash.c,v 1.13 2001/10/31 21:55:54 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or @@ -474,7 +472,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); return 0; @@ -534,7 +532,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) if (new_f) fz->fz_nent++; - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -616,7 +614,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) write_unlock_bh(&fib_hash_lock); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); fn_free_alias(fa, f); if (kill_fn) { fn_free_node(f); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 1fb56876be54..6080d7120821 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -258,9 +258,9 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(4); /* flow */ } -static void fib4_rule_flush_cache(void) +static void fib4_rule_flush_cache(struct fib_rules_ops *ops) { - rt_cache_flush(-1); + rt_cache_flush(ops->fro_net, -1); } static struct fib_rules_ops fib4_rules_ops_template = { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3b83c34019fc..ded2ae34eab1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -5,8 +5,6 @@ * * IPv4 Forwarding Information Base: semantics. * - * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or @@ -960,7 +958,10 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtm->rtm_dst_len = dst_len; rtm->rtm_src_len = 0; rtm->rtm_tos = tos; - rtm->rtm_table = tb_id; + if (tb_id < 256) + rtm->rtm_table = tb_id; + else + rtm->rtm_table = RT_TABLE_COMPAT; NLA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4b02d14e7ab9..5cb72786a8af 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -22,8 +22,6 @@ * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 * - * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $ - * * * Code from fib_hash has been reused which includes the following header: * @@ -1273,7 +1271,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); @@ -1318,7 +1316,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); succeeded: @@ -1359,17 +1357,17 @@ static int check_leaf(struct trie *t, struct leaf *l, t->stats.semantic_match_miss++; #endif if (err <= 0) - return plen; + return err; } - return -1; + return 1; } static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) { struct trie *t = (struct trie *) tb->tb_data; - int plen, ret = 0; + int ret; struct node *n; struct tnode *pn; int pos, bits; @@ -1393,10 +1391,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, /* Just a leaf? */ if (IS_LEAF(n)) { - plen = check_leaf(t, (struct leaf *)n, key, flp, res); - if (plen < 0) - goto failed; - ret = 0; + ret = check_leaf(t, (struct leaf *)n, key, flp, res); goto found; } @@ -1421,11 +1416,9 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, } if (IS_LEAF(n)) { - plen = check_leaf(t, (struct leaf *)n, key, flp, res); - if (plen < 0) + ret = check_leaf(t, (struct leaf *)n, key, flp, res); + if (ret > 0) goto backtrace; - - ret = 0; goto found; } @@ -1666,7 +1659,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) trie_leaf_remove(t, l); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); @@ -2258,25 +2251,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) static int fib_triestat_seq_open(struct inode *inode, struct file *file) { - int err; - struct net *net; - - net = get_proc_net(inode); - if (net == NULL) - return -ENXIO; - err = single_open(file, fib_triestat_seq_show, net); - if (err < 0) { - put_net(net); - return err; - } - return 0; -} - -static int fib_triestat_seq_release(struct inode *ino, struct file *f) -{ - struct seq_file *seq = f->private_data; - put_net(seq->private); - return single_release(ino, f); + return single_open_net(inode, file, fib_triestat_seq_show); } static const struct file_operations fib_triestat_fops = { @@ -2284,7 +2259,7 @@ static const struct file_operations fib_triestat_fops = { .open = fib_triestat_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = fib_triestat_seq_release, + .release = single_release_net, }; static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 87397351ddac..860558633b2c 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -3,8 +3,6 @@ * * Alan Cox, <alan@redhat.com> * - * Version: $Id: icmp.c,v 1.85 2002/02/01 22:01:03 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -113,12 +111,6 @@ struct icmp_bxm { unsigned char optbuf[40]; }; -/* - * Statistics - */ -DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly; -DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics) __read_mostly; - /* An array of errno for error messages from dest unreach. */ /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ @@ -298,10 +290,10 @@ out: /* * Maintain the counters used in the SNMP statistics for outgoing ICMP */ -void icmp_out_count(unsigned char type) +void icmp_out_count(struct net *net, unsigned char type) { - ICMPMSGOUT_INC_STATS(type); - ICMP_INC_STATS(ICMP_MIB_OUTMSGS); + ICMPMSGOUT_INC_STATS(net, type); + ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS); } /* @@ -765,7 +757,7 @@ static void icmp_unreach(struct sk_buff *skb) out: return; out_err: - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); goto out; } @@ -805,7 +797,7 @@ static void icmp_redirect(struct sk_buff *skb) out: return; out_err: - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); goto out; } @@ -876,7 +868,7 @@ static void icmp_timestamp(struct sk_buff *skb) out: return; out_err: - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(dev_net(skb->dst->dev), ICMP_MIB_INERRORS); goto out; } @@ -975,6 +967,7 @@ int icmp_rcv(struct sk_buff *skb) { struct icmphdr *icmph; struct rtable *rt = skb->rtable; + struct net *net = dev_net(rt->u.dst.dev); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { int nh; @@ -995,7 +988,7 @@ int icmp_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -1013,7 +1006,7 @@ int icmp_rcv(struct sk_buff *skb) icmph = icmp_hdr(skb); - ICMPMSGIN_INC_STATS_BH(icmph->type); + ICMPMSGIN_INC_STATS_BH(net, icmph->type); /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * @@ -1029,9 +1022,6 @@ int icmp_rcv(struct sk_buff *skb) */ if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { - struct net *net; - - net = dev_net(rt->u.dst.dev); /* * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be * silently ignored (we let user decide with a sysctl). @@ -1057,7 +1047,7 @@ drop: kfree_skb(skb); return 0; error: - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); goto drop; } @@ -1217,5 +1207,4 @@ int __init icmp_init(void) EXPORT_SYMBOL(icmp_err_convert); EXPORT_SYMBOL(icmp_send); -EXPORT_SYMBOL(icmp_statistics); EXPORT_SYMBOL(xrlim_allow); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6250f4239b61..6203ece53606 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -8,8 +8,6 @@ * the older version didn't come out right using gcc 2.5.8, the newer one * seems to fall out with gcc 2.6.2. * - * Version: $Id: igmp.c,v 1.47 2002/02/01 22:01:03 davem Exp $ - * * Authors: * Alan Cox <Alan.Cox@linux.org> * @@ -292,7 +290,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct iphdr *pip; struct igmpv3_report *pig; - skb = alloc_skb(size + LL_RESERVED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) return NULL; @@ -653,7 +651,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, return -1; } - skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC); + skb=alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) { ip_rt_put(rt); return -1; @@ -1198,7 +1196,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - if (dev_net(in_dev->dev) != &init_net) + if (!net_eq(dev_net(in_dev->dev), &init_net)) return; for (im=in_dev->mc_list; im; im=im->next) { @@ -1280,7 +1278,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - if (dev_net(in_dev->dev) != &init_net) + if (!net_eq(dev_net(in_dev->dev), &init_net)) return; for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { @@ -1310,7 +1308,7 @@ void ip_mc_down(struct in_device *in_dev) ASSERT_RTNL(); - if (dev_net(in_dev->dev) != &init_net) + if (!net_eq(dev_net(in_dev->dev), &init_net)) return; for (i=in_dev->mc_list; i; i=i->next) @@ -1333,7 +1331,7 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); - if (dev_net(in_dev->dev) != &init_net) + if (!net_eq(dev_net(in_dev->dev), &init_net)) return; in_dev->mc_tomb = NULL; @@ -1359,7 +1357,7 @@ void ip_mc_up(struct in_device *in_dev) ASSERT_RTNL(); - if (dev_net(in_dev->dev) != &init_net) + if (!net_eq(dev_net(in_dev->dev), &init_net)) return; ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); @@ -1378,7 +1376,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev) ASSERT_RTNL(); - if (dev_net(in_dev->dev) != &init_net) + if (!net_eq(dev_net(in_dev->dev), &init_net)) return; /* Deactivate timers */ @@ -1762,7 +1760,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) if (!ipv4_is_multicast(addr)) return -EINVAL; - if (sock_net(sk) != &init_net) + if (!net_eq(sock_net(sk), &init_net)) return -EPROTONOSUPPORT; rtnl_lock(); @@ -1833,7 +1831,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) u32 ifindex; int ret = -EADDRNOTAVAIL; - if (sock_net(sk) != &init_net) + if (!net_eq(sock_net(sk), &init_net)) return -EPROTONOSUPPORT; rtnl_lock(); @@ -1881,7 +1879,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (!ipv4_is_multicast(addr)) return -EINVAL; - if (sock_net(sk) != &init_net) + if (!net_eq(sock_net(sk), &init_net)) return -EPROTONOSUPPORT; rtnl_lock(); @@ -2017,7 +2015,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) msf->imsf_fmode != MCAST_EXCLUDE) return -EINVAL; - if (sock_net(sk) != &init_net) + if (!net_eq(sock_net(sk), &init_net)) return -EPROTONOSUPPORT; rtnl_lock(); @@ -2100,7 +2098,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, if (!ipv4_is_multicast(addr)) return -EINVAL; - if (sock_net(sk) != &init_net) + if (!net_eq(sock_net(sk), &init_net)) return -EPROTONOSUPPORT; rtnl_lock(); @@ -2165,7 +2163,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!ipv4_is_multicast(addr)) return -EINVAL; - if (sock_net(sk) != &init_net) + if (!net_eq(sock_net(sk), &init_net)) return -EPROTONOSUPPORT; rtnl_lock(); @@ -2252,7 +2250,7 @@ void ip_mc_drop_socket(struct sock *sk) if (inet->mc_list == NULL) return; - if (sock_net(sk) != &init_net) + if (!net_eq(sock_net(sk), &init_net)) return; rtnl_lock(); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 828ea211ff21..0c1ae68ee84b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -103,7 +103,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) rover = net_random() % remaining + low; do { - head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(net, rover, + hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->ib_net == net && tb->port == rover) @@ -130,7 +131,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) */ snum = rover; } else { - head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(net, snum, + hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->ib_net == net && tb->port == snum) @@ -165,7 +167,7 @@ tb_not_found: success: if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); + WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); ret = 0; fail_unlock: @@ -258,7 +260,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) } newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); - BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); + WARN_ON(newsk->sk_state == TCP_SYN_RECV); out: release_sock(sk); return newsk; @@ -336,15 +338,16 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, .uli_u = { .ports = { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; + struct net *net = sock_net(sk); security_req_classify_flow(req, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) { - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { ip_rt_put(rt); - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } return &rt->u.dst; @@ -383,7 +386,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, ireq->rmt_addr == raddr && ireq->loc_addr == laddr && AF_INET_FAMILY(req->rsk_ops->family)) { - BUG_TRAP(!req->sk); + WARN_ON(req->sk); *prevp = prev; break; } @@ -419,7 +422,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, struct inet_connection_sock *icsk = inet_csk(parent); struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct listen_sock *lopt = queue->listen_opt; - int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + int thresh = max_retries; unsigned long now = jiffies; struct request_sock **reqp, *req; int i, budget; @@ -455,6 +459,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, } } + if (queue->rskq_defer_accept) + max_retries = queue->rskq_defer_accept; + budget = 2 * (lopt->nr_table_entries / (timeout / interval)); i = lopt->clock_hand; @@ -462,8 +469,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, reqp=&lopt->syn_table[i]; while ((req = *reqp) != NULL) { if (time_after_eq(now, req->expires)) { - if (req->retrans < thresh && - !req->rsk_ops->rtx_syn_ack(parent, req)) { + if ((req->retrans < thresh || + (inet_rsk(req)->acked && req->retrans < max_retries)) + && !req->rsk_ops->rtx_syn_ack(parent, req)) { unsigned long timeo; if (req->retrans++ == 0) @@ -531,14 +539,14 @@ EXPORT_SYMBOL_GPL(inet_csk_clone); */ void inet_csk_destroy_sock(struct sock *sk) { - BUG_TRAP(sk->sk_state == TCP_CLOSE); - BUG_TRAP(sock_flag(sk, SOCK_DEAD)); + WARN_ON(sk->sk_state != TCP_CLOSE); + WARN_ON(!sock_flag(sk, SOCK_DEAD)); /* It cannot be in hash table! */ - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); /* If it has not 0 inet_sk(sk)->num, it must be bound */ - BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); + WARN_ON(inet_sk(sk)->num && !inet_csk(sk)->icsk_bind_hash); sk->sk_prot->destroy(sk); @@ -621,7 +629,7 @@ void inet_csk_listen_stop(struct sock *sk) local_bh_disable(); bh_lock_sock(child); - BUG_TRAP(!sock_owned_by_user(child)); + WARN_ON(sock_owned_by_user(child)); sock_hold(child); sk->sk_prot->disconnect(child, O_NONBLOCK); @@ -639,7 +647,7 @@ void inet_csk_listen_stop(struct sock *sk) sk_acceptq_removed(sk); __reqsk_free(req); } - BUG_TRAP(!sk->sk_ack_backlog); + WARN_ON(sk->sk_ack_backlog); } EXPORT_SYMBOL_GPL(inet_csk_listen_stop); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index da97695e7096..c10036e7a463 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1,8 +1,6 @@ /* * inet_diag.c Module for monitoring INET transport protocols sockets. * - * Version: $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 4ed429bd5951..6c52e08f786e 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -134,8 +134,8 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, struct sk_buff *fp; struct netns_frags *nf; - BUG_TRAP(q->last_in & INET_FRAG_COMPLETE); - BUG_TRAP(del_timer(&q->timer) == 0); + WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); + WARN_ON(del_timer(&q->timer) != 0); /* Release all fragment data. */ fp = q->fragments; @@ -192,14 +192,21 @@ EXPORT_SYMBOL(inet_frag_evictor); static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, - unsigned int hash, void *arg) + void *arg) { struct inet_frag_queue *qp; #ifdef CONFIG_SMP struct hlist_node *n; #endif + unsigned int hash; write_lock(&f->lock); + /* + * While we stayed w/o the lock other CPU could update + * the rnd seed, so we need to re-calculate the hash + * chain. Fortunatelly the qp_in can be used to get one. + */ + hash = f->hashfn(qp_in); #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we @@ -247,7 +254,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - struct inet_frags *f, void *arg, unsigned int hash) + struct inet_frags *f, void *arg) { struct inet_frag_queue *q; @@ -255,7 +262,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, if (q == NULL) return NULL; - return inet_frag_intern(nf, q, f, hash, arg); + return inet_frag_intern(nf, q, f, arg); } struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, @@ -264,7 +271,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frag_queue *q; struct hlist_node *n; - read_lock(&f->lock); hlist_for_each_entry(q, n, &f->hash[hash], list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); @@ -274,6 +280,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, } read_unlock(&f->lock); - return inet_frag_create(nf, f, key, hash); + return inet_frag_create(nf, f, key); } EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 2023d37b2708..44981906fb91 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -70,7 +70,8 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, static void __inet_put_port(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); + const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num, + hashinfo->bhash_size); struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; @@ -95,7 +96,8 @@ EXPORT_SYMBOL(inet_put_port); void __inet_inherit_port(struct sock *sk, struct sock *child) { struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); + const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num, + table->bhash_size); struct inet_bind_hashbucket *head = &table->bhash[bhash]; struct inet_bind_bucket *tb; @@ -192,7 +194,7 @@ struct sock *__inet_lookup_listener(struct net *net, const struct hlist_head *head; read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; if (!hlist_empty(head)) { const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); @@ -225,7 +227,7 @@ struct sock * __inet_lookup_established(struct net *net, /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); + unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); @@ -265,13 +267,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); - unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct net *net = sock_net(sk); + unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; - struct net *net = sock_net(sk); prefetch(head->chain.first); write_lock(lock); @@ -303,18 +305,18 @@ unique: inet->num = lport; inet->sport = htons(lport); sk->sk_hash = hash; - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); if (twp) { *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule(tw, death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); } @@ -340,7 +342,7 @@ void __inet_hash_nolisten(struct sock *sk) rwlock_t *lock; struct inet_ehash_bucket *head; - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); sk->sk_hash = inet_sk_ehashfn(sk); head = inet_ehash_bucket(hashinfo, sk->sk_hash); @@ -365,7 +367,7 @@ static void __inet_hash(struct sock *sk) return; } - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; lock = &hashinfo->lhash_lock; @@ -438,7 +440,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, local_bh_disable(); for (i = 1; i <= remaining; i++) { port = low + (i + offset) % remaining; - head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, port, + hinfo->bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -447,7 +450,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, */ inet_bind_bucket_for_each(tb, node, &head->chain) { if (tb->ib_net == net && tb->port == port) { - BUG_TRAP(!hlist_empty(&tb->owners)); + WARN_ON(hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; if (!check_established(death_row, sk, @@ -493,7 +496,7 @@ ok: goto out; } - head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 4a4d49fca1f2..cfd034a2b96e 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -383,8 +383,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, out2: /* send aggregated SKBs to stack */ lro_flush(lro_mgr, lro_desc); -out: /* Original SKB has to be posted to stack */ - skb->ip_summed = lro_mgr->ip_summed; +out: return 1; } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index ce16e9ac24c1..d985bd613d25 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -32,7 +32,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, write_unlock(lock); /* Disassociate with bind bucket. */ - bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; __hlist_del(&tw->tw_bind_node); @@ -81,10 +82,11 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; - BUG_TRAP(icsk->icsk_bind_hash); + WARN_ON(!icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); @@ -158,6 +160,9 @@ rescan: __inet_twsk_del_dead_node(tw); spin_unlock(&twdr->death_lock); __inet_twsk_kill(tw, twdr->hashinfo); +#ifdef CONFIG_NET_NS + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); +#endif inet_twsk_put(tw); killed++; spin_lock(&twdr->death_lock); @@ -176,8 +181,9 @@ rescan: } twdr->tw_count -= killed; - NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); - +#ifndef CONFIG_NET_NS + NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed); +#endif return ret; } @@ -370,6 +376,9 @@ void inet_twdr_twcal_tick(unsigned long data) &twdr->twcal_row[slot]) { __inet_twsk_del_dead_node(tw); __inet_twsk_kill(tw, twdr->hashinfo); +#ifdef CONFIG_NET_NS + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); +#endif inet_twsk_put(tw); killed++; } @@ -393,7 +402,9 @@ void inet_twdr_twcal_tick(unsigned long data) out: if ((twdr->tw_count -= killed) == 0) del_timer(&twdr->tw_timer); - NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); +#ifndef CONFIG_NET_NS + NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed); +#endif spin_unlock(&twdr->death_lock); } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index af995198f643..a456ceeac3f2 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -3,8 +3,6 @@ * * This source is covered by the GNU GPL, the same as all kernel sources. * - * Version: $Id: inetpeer.c,v 1.7 2001/09/20 21:22:50 davem Exp $ - * * Authors: Andrey V. Savochkin <saw@msu.ru> */ diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 4813c39b438b..450016b89a18 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -5,8 +5,6 @@ * * The IP forwarding functionality. * - * Version: $Id: ip_forward.c,v 1.48 2000/12/13 18:31:48 davem Exp $ - * * Authors: see ip.c * * Fixes: @@ -44,7 +42,7 @@ static int ip_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -58,6 +56,9 @@ int ip_forward(struct sk_buff *skb) struct rtable *rt; /* Route we use */ struct ip_options * opt = &(IPCB(skb)->opt); + if (skb_warn_if_lro(skb)) + goto drop; + if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) goto drop; @@ -87,7 +88,7 @@ int ip_forward(struct sk_buff *skb) if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { - IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(dst_mtu(&rt->u.dst))); goto drop; @@ -122,7 +123,7 @@ sr_failed: too_many_hops: /* Tell the sender its packet died... */ - IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cd6ce6ac6358..2152d222b954 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -5,8 +5,6 @@ * * The IP fragmentation functionality. * - * Version: $Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $ - * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox <Alan.Cox@linux.org> * @@ -180,7 +178,7 @@ static void ip_evictor(struct net *net) evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags); if (evicted) - IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted); + IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); } /* @@ -189,8 +187,10 @@ static void ip_evictor(struct net *net) static void ip_expire(unsigned long arg) { struct ipq *qp; + struct net *net; qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); + net = container_of(qp->q.net, struct net, ipv4.frags); spin_lock(&qp->q.lock); @@ -199,14 +199,12 @@ static void ip_expire(unsigned long arg) ipq_kill(qp); - IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { struct sk_buff *head = qp->q.fragments; - struct net *net; - net = container_of(qp->q.net, struct net, ipv4.frags); /* Send an ICMP "Fragment Reassembly Timeout" message. */ if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); @@ -229,6 +227,8 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) arg.iph = iph; arg.user = user; + + read_lock(&ip4_frags.lock); hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); @@ -261,7 +261,10 @@ static inline int ip_frag_too_far(struct ipq *qp) rc = qp->q.fragments && (end - start) > max; if (rc) { - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + struct net *net; + + net = container_of(qp->q.net, struct net, ipv4.frags); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); } return rc; @@ -485,8 +488,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, qp->q.fragments = head; } - BUG_TRAP(head != NULL); - BUG_TRAP(FRAG_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(FRAG_CB(head)->offset != 0); /* Allocate a new buffer for the datagram. */ ihlen = ip_hdrlen(head); @@ -545,7 +548,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph = ip_hdr(head); iph->frag_off = 0; iph->tot_len = htons(len); - IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; return 0; @@ -560,7 +563,7 @@ out_oversize: "Oversized IP packet from " NIPQUAD_FMT ".\n", NIPQUAD(qp->saddr)); out_fail: - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS); return err; } @@ -570,9 +573,9 @@ int ip_defrag(struct sk_buff *skb, u32 user) struct ipq *qp; struct net *net; - IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); - net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); + /* Start by cleaning up the memory. */ if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) ip_evictor(net); @@ -590,7 +593,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) return ret; } - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -ENOMEM; } @@ -598,7 +601,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) #ifdef CONFIG_SYSCTL static int zero; -static struct ctl_table ip4_frags_ctl_table[] = { +static struct ctl_table ip4_frags_ns_ctl_table[] = { { .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, .procname = "ipfrag_high_thresh", @@ -624,6 +627,10 @@ static struct ctl_table ip4_frags_ctl_table[] = { .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies }, + { } +}; + +static struct ctl_table ip4_frags_ctl_table[] = { { .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, .procname = "ipfrag_secret_interval", @@ -644,22 +651,20 @@ static struct ctl_table ip4_frags_ctl_table[] = { { } }; -static int ip4_frags_ctl_register(struct net *net) +static int ip4_frags_ns_ctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; - table = ip4_frags_ctl_table; + table = ip4_frags_ns_ctl_table; if (net != &init_net) { - table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL); + table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; table[0].data = &net->ipv4.frags.high_thresh; table[1].data = &net->ipv4.frags.low_thresh; table[2].data = &net->ipv4.frags.timeout; - table[3].mode &= ~0222; - table[4].mode &= ~0222; } hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); @@ -676,7 +681,7 @@ err_alloc: return -ENOMEM; } -static void ip4_frags_ctl_unregister(struct net *net) +static void ip4_frags_ns_ctl_unregister(struct net *net) { struct ctl_table *table; @@ -684,13 +689,22 @@ static void ip4_frags_ctl_unregister(struct net *net) unregister_net_sysctl_table(net->ipv4.frags_hdr); kfree(table); } + +static void ip4_frags_ctl_register(void) +{ + register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table); +} #else -static inline int ip4_frags_ctl_register(struct net *net) +static inline int ip4_frags_ns_ctl_register(struct net *net) { return 0; } -static inline void ip4_frags_ctl_unregister(struct net *net) +static inline void ip4_frags_ns_ctl_unregister(struct net *net) +{ +} + +static inline void ip4_frags_ctl_register(void) { } #endif @@ -714,12 +728,12 @@ static int ipv4_frags_init_net(struct net *net) inet_frags_init_net(&net->ipv4.frags); - return ip4_frags_ctl_register(net); + return ip4_frags_ns_ctl_register(net); } static void ipv4_frags_exit_net(struct net *net) { - ip4_frags_ctl_unregister(net); + ip4_frags_ns_ctl_unregister(net); inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); } @@ -730,6 +744,7 @@ static struct pernet_operations ip4_frags_ops = { void __init ipfrag_init(void) { + ip4_frags_ctl_register(); register_pernet_subsys(&ip4_frags_ops); ip4_frags.hashfn = ip4_hashfn; ip4_frags.constructor = ip4_frag_init; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2ada033406de..2a61158ea722 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -313,9 +313,8 @@ static void ipgre_tunnel_uninit(struct net_device *dev) static void ipgre_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. @@ -398,149 +397,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipgre_lock); return; -#else - struct iphdr *iph = (struct iphdr*)dp; - struct iphdr *eiph; - __be16 *p = (__be16*)(dp+(iph->ihl<<2)); - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - __be32 rel_info = 0; - __u32 n = 0; - __be16 flags; - int grehlen = (iph->ihl<<2) + 4; - struct sk_buff *skb2; - struct flowi fl; - struct rtable *rt; - - if (p[1] != htons(ETH_P_IP)) - return; - - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) - return; - if (flags&GRE_CSUM) - grehlen += 4; - if (flags&GRE_KEY) - grehlen += 4; - if (flags&GRE_SEQ) - grehlen += 4; - } - if (len < grehlen + sizeof(struct iphdr)) - return; - eiph = (struct iphdr*)(dp + grehlen); - - switch (type) { - default: - return; - case ICMP_PARAMETERPROB: - n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; - if (n < (iph->ihl<<2)) - return; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMP_PARAMETERPROB; - n -= grehlen; - rel_info = htonl(n << 24); - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return; - case ICMP_FRAG_NEEDED: - /* And it is the only really necessary thing :-) */ - n = ntohs(icmp_hdr(skb)->un.frag.mtu); - if (n < grehlen+68) - return; - n -= grehlen; - /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (n > ntohs(eiph->tot_len)) - return; - rel_info = htonl(n); - break; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return; - break; - } - - /* Prepare fake skb to feed it to icmp_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)eiph); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - memset(&fl, 0, sizeof(fl)); - fl.fl4_dst = eiph->saddr; - fl.fl4_tos = RT_TOS(eiph->tos); - fl.proto = IPPROTO_GRE; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) { - kfree_skb(skb2); - return; - } - skb2->dev = rt->u.dst.dev; - - /* route "incoming" packet */ - if (rt->rt_flags&RTCF_LOCAL) { - ip_rt_put(rt); - rt = NULL; - fl.fl4_dst = eiph->daddr; - fl.fl4_src = eiph->saddr; - fl.fl4_tos = eiph->tos; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || - rt->u.dst.dev->type != ARPHRD_IPGRE) { - ip_rt_put(rt); - kfree_skb(skb2); - return; - } - } else { - ip_rt_put(rt); - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb2->dst->dev->type != ARPHRD_IPGRE) { - kfree_skb(skb2); - return; - } - } - - /* change mtu on this route */ - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (n > dst_mtu(skb2->dst)) { - kfree_skb(skb2); - return; - } - skb2->dst->ops->update_pmtu(skb2->dst, n); - } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = netdev_priv(skb2->dev); - if (t->parms.iph.ttl) { - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - } - } - - icmp_send(skb2, rel_type, rel_code, rel_info); - kfree_skb(skb2); -#endif } static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) @@ -617,6 +473,8 @@ static int ipgre_rcv(struct sk_buff *skb) read_lock(&ipgre_lock); if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr, key)) != NULL) { + struct net_device_stats *stats = &tunnel->dev->stats; + secpath_reset(skb); skb->protocol = *(__be16*)(h + 2); @@ -641,28 +499,28 @@ static int ipgre_rcv(struct sk_buff *skb) /* Looped back packet, drop it! */ if (skb->rtable->fl.iif == 0) goto drop; - tunnel->stat.multicast++; + stats->multicast++; skb->pkt_type = PACKET_BROADCAST; } #endif if (((flags&GRE_CSUM) && csum) || (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { - tunnel->stat.rx_crc_errors++; - tunnel->stat.rx_errors++; + stats->rx_crc_errors++; + stats->rx_errors++; goto drop; } if (tunnel->parms.i_flags&GRE_SEQ) { if (!(flags&GRE_SEQ) || (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { - tunnel->stat.rx_fifo_errors++; - tunnel->stat.rx_errors++; + stats->rx_fifo_errors++; + stats->rx_errors++; goto drop; } tunnel->i_seqno = seqno + 1; } - tunnel->stat.rx_packets++; - tunnel->stat.rx_bytes += skb->len; + stats->rx_packets++; + stats->rx_bytes += skb->len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; @@ -684,7 +542,7 @@ drop_nolock: static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->stat; + struct net_device_stats *stats = &tunnel->dev->stats; struct iphdr *old_iph = ip_hdr(skb); struct iphdr *tiph; u8 tos; @@ -698,7 +556,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int mtu; if (tunnel->recursion++) { - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -714,7 +572,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) /* NBMA tunnel */ if (skb->dst == NULL) { - tunnel->stat.tx_fifo_errors++; + stats->tx_fifo_errors++; goto tx_error; } @@ -765,7 +623,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .tos = RT_TOS(tos) } }, .proto = IPPROTO_GRE }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error; } } @@ -773,7 +631,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (tdev == dev) { ip_rt_put(rt); - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -1098,11 +956,6 @@ done: return err; } -static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev) -{ - return &(((struct ip_tunnel*)netdev_priv(dev))->stat); -} - static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { struct ip_tunnel *tunnel = netdev_priv(dev); @@ -1228,7 +1081,6 @@ static void ipgre_tunnel_setup(struct net_device *dev) dev->uninit = ipgre_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipgre_tunnel_xmit; - dev->get_stats = ipgre_tunnel_get_stats; dev->do_ioctl = ipgre_tunnel_ioctl; dev->change_mtu = ipgre_tunnel_change_mtu; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 7b4bad6d572f..e0bed56c51f1 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -5,8 +5,6 @@ * * The Internet Protocol (IP) module. * - * Version: $Id: ip_input.c,v 1.55 2002/01/12 07:39:45 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> @@ -147,12 +145,6 @@ #include <linux/netlink.h> /* - * SNMP management statistics - */ - -DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly; - -/* * Process Router Attention IP option */ int ip_call_ra_chain(struct sk_buff *skb) @@ -232,16 +224,16 @@ static int ip_local_deliver_finish(struct sk_buff *skb) protocol = -ret; goto resubmit; } - IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); } } else - IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); kfree_skb(skb); } } @@ -283,7 +275,7 @@ static inline int ip_rcv_options(struct sk_buff *skb) --ANK (980813) */ if (skb_cow(skb, skb_headroom(skb))) { - IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto drop; } @@ -292,7 +284,7 @@ static inline int ip_rcv_options(struct sk_buff *skb) opt->optlen = iph->ihl*4 - sizeof(struct iphdr); if (ip_options_compile(dev_net(dev), opt, skb)) { - IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); goto drop; } @@ -336,9 +328,11 @@ static int ip_rcv_finish(struct sk_buff *skb) skb->dev); if (unlikely(err)) { if (err == -EHOSTUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + IP_INC_STATS_BH(dev_net(skb->dev), + IPSTATS_MIB_INADDRERRORS); else if (err == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES); + IP_INC_STATS_BH(dev_net(skb->dev), + IPSTATS_MIB_INNOROUTES); goto drop; } } @@ -359,9 +353,9 @@ static int ip_rcv_finish(struct sk_buff *skb) rt = skb->rtable; if (rt->rt_type == RTN_MULTICAST) - IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); + IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCASTPKTS); else if (rt->rt_type == RTN_BROADCAST) - IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS); + IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCASTPKTS); return dst_input(skb); @@ -384,10 +378,10 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, if (skb->pkt_type == PACKET_OTHERHOST) goto drop; - IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { - IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto out; } @@ -397,7 +391,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, iph = ip_hdr(skb); /* - * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. + * RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum. * * Is the datagram acceptable? * @@ -420,7 +414,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, len = ntohs(iph->tot_len); if (skb->len < len) { - IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; @@ -430,7 +424,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, * Note this now means skb->len holds ntohs(iph->tot_len). */ if (pskb_trim_rcsum(skb, len)) { - IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto drop; } @@ -441,11 +435,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, ip_rcv_finish); inhdr_error: - IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); drop: kfree_skb(skb); out: return NET_RX_DROP; } - -EXPORT_SYMBOL(ip_statistics); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 33126ad2cfdc..be3f18a7a40e 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -5,8 +5,6 @@ * * The options processing module for ip.c * - * Version: $Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $ - * * Authors: A.N.Kuznetsov * */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e527628f56cf..d533a89e08de 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -5,8 +5,6 @@ * * The Internet Protocol (IP) output module. * - * Version: $Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> @@ -120,7 +118,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) __skb_pull(newskb, skb_network_offset(newskb)); newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; - BUG_TRAP(newskb->dst); + WARN_ON(!newskb->dst); netif_rx(newskb); return 0; } @@ -184,9 +182,9 @@ static inline int ip_finish_output2(struct sk_buff *skb) unsigned int hh_len = LL_RESERVED_SPACE(dev); if (rt->rt_type == RTN_MULTICAST) - IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTMCASTPKTS); else if (rt->rt_type == RTN_BROADCAST) - IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTBCASTPKTS); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { @@ -246,7 +244,7 @@ int ip_mc_output(struct sk_buff *skb) /* * If the indicated interface is up and running, send the packet. */ - IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS); skb->dev = dev; skb->protocol = htons(ETH_P_IP); @@ -300,7 +298,7 @@ int ip_output(struct sk_buff *skb) { struct net_device *dev = skb->dst->dev; - IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS); skb->dev = dev; skb->protocol = htons(ETH_P_IP); @@ -391,7 +389,7 @@ packet_routed: return ip_local_out(skb); no_route: - IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EHOSTUNREACH; } @@ -453,7 +451,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) iph = ip_hdr(skb); if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { - IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(ip_skb_dst_mtu(skb))); kfree_skb(skb); @@ -544,7 +542,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) err = output(skb); if (!err) - IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; @@ -554,7 +552,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) } if (err == 0) { - IP_INC_STATS(IPSTATS_MIB_FRAGOKS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); return 0; } @@ -563,7 +561,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) kfree_skb(frag); frag = skb; } - IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); return err; } @@ -675,15 +673,15 @@ slow_path: if (err) goto fail; - IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); - IP_INC_STATS(IPSTATS_MIB_FRAGOKS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); return err; fail: kfree_skb(skb); - IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); return err; } @@ -1049,7 +1047,7 @@ alloc_new_skb: error: inet->cork.length -= length; - IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); return err; } @@ -1191,7 +1189,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, error: inet->cork.length -= size; - IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); return err; } @@ -1213,6 +1211,7 @@ int ip_push_pending_frames(struct sock *sk) struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); struct ip_options *opt = NULL; struct rtable *rt = (struct rtable *)inet->cork.dst; struct iphdr *iph; @@ -1282,7 +1281,7 @@ int ip_push_pending_frames(struct sock *sk) skb->dst = dst_clone(&rt->u.dst); if (iph->protocol == IPPROTO_ICMP) - icmp_out_count(((struct icmphdr *) + icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); /* Netfilter gets whole the not fragmented skb. */ @@ -1299,7 +1298,7 @@ out: return err; error: - IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); goto out; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index e0514e82308e..105d92a039b9 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -5,8 +5,6 @@ * * The IP to API glue. * - * Version: $Id: ip_sockglue.c,v 1.62 2002/02/01 22:01:04 davem Exp $ - * * Authors: see ip.c * * Fixes: diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index a75807b971b3..38ccb6dfb02e 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -14,153 +14,14 @@ * - Adaptive compression. */ #include <linux/module.h> -#include <linux/crypto.h> #include <linux/err.h> -#include <linux/pfkeyv2.h> -#include <linux/percpu.h> -#include <linux/smp.h> -#include <linux/list.h> -#include <linux/vmalloc.h> #include <linux/rtnetlink.h> -#include <linux/mutex.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/icmp.h> #include <net/ipcomp.h> #include <net/protocol.h> - -struct ipcomp_tfms { - struct list_head list; - struct crypto_comp **tfms; - int users; -}; - -static DEFINE_MUTEX(ipcomp_resource_mutex); -static void **ipcomp_scratches; -static int ipcomp_scratch_users; -static LIST_HEAD(ipcomp_tfms_list); - -static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipcomp_data *ipcd = x->data; - const int plen = skb->len; - int dlen = IPCOMP_SCRATCH_SIZE; - const u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); - int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); - - if (err) - goto out; - - if (dlen < (plen + sizeof(struct ip_comp_hdr))) { - err = -EINVAL; - goto out; - } - - err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC); - if (err) - goto out; - - skb->truesize += dlen - plen; - __skb_put(skb, dlen - plen); - skb_copy_to_linear_data(skb, scratch, dlen); -out: - put_cpu(); - return err; -} - -static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int nexthdr; - int err = -ENOMEM; - struct ip_comp_hdr *ipch; - - if (skb_linearize_cow(skb)) - goto out; - - skb->ip_summed = CHECKSUM_NONE; - - /* Remove ipcomp header and decompress original payload */ - ipch = (void *)skb->data; - nexthdr = ipch->nexthdr; - - skb->transport_header = skb->network_header + sizeof(*ipch); - __skb_pull(skb, sizeof(*ipch)); - err = ipcomp_decompress(x, skb); - if (err) - goto out; - - err = nexthdr; - -out: - return err; -} - -static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipcomp_data *ipcd = x->data; - const int plen = skb->len; - int dlen = IPCOMP_SCRATCH_SIZE; - u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); - int err; - - local_bh_disable(); - err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - local_bh_enable(); - if (err) - goto out; - - if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) { - err = -EMSGSIZE; - goto out; - } - - memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); - put_cpu(); - - pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); - return 0; - -out: - put_cpu(); - return err; -} - -static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - struct ip_comp_hdr *ipch; - struct ipcomp_data *ipcd = x->data; - - if (skb->len < ipcd->threshold) { - /* Don't bother compressing */ - goto out_ok; - } - - if (skb_linearize_cow(skb)) - goto out_ok; - - err = ipcomp_compress(x, skb); - - if (err) { - goto out_ok; - } - - /* Install ipcomp header, convert into ipcomp datagram. */ - ipch = ip_comp_hdr(skb); - ipch->nexthdr = *skb_mac_header(skb); - ipch->flags = 0; - ipch->cpi = htons((u16 )ntohl(x->id.spi)); - *skb_mac_header(skb) = IPPROTO_COMP; -out_ok: - skb_push(skb, -skb_network_offset(skb)); - return 0; -} +#include <net/sock.h> static void ipcomp4_err(struct sk_buff *skb, u32 info) { @@ -241,155 +102,9 @@ out: return err; } -static void ipcomp_free_scratches(void) -{ - int i; - void **scratches; - - if (--ipcomp_scratch_users) - return; - - scratches = ipcomp_scratches; - if (!scratches) - return; - - for_each_possible_cpu(i) - vfree(*per_cpu_ptr(scratches, i)); - - free_percpu(scratches); -} - -static void **ipcomp_alloc_scratches(void) -{ - int i; - void **scratches; - - if (ipcomp_scratch_users++) - return ipcomp_scratches; - - scratches = alloc_percpu(void *); - if (!scratches) - return NULL; - - ipcomp_scratches = scratches; - - for_each_possible_cpu(i) { - void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); - if (!scratch) - return NULL; - *per_cpu_ptr(scratches, i) = scratch; - } - - return scratches; -} - -static void ipcomp_free_tfms(struct crypto_comp **tfms) -{ - struct ipcomp_tfms *pos; - int cpu; - - list_for_each_entry(pos, &ipcomp_tfms_list, list) { - if (pos->tfms == tfms) - break; - } - - BUG_TRAP(pos); - - if (--pos->users) - return; - - list_del(&pos->list); - kfree(pos); - - if (!tfms) - return; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_comp(tfm); - } - free_percpu(tfms); -} - -static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) -{ - struct ipcomp_tfms *pos; - struct crypto_comp **tfms; - int cpu; - - /* This can be any valid CPU ID so we don't need locking. */ - cpu = raw_smp_processor_id(); - - list_for_each_entry(pos, &ipcomp_tfms_list, list) { - struct crypto_comp *tfm; - - tfms = pos->tfms; - tfm = *per_cpu_ptr(tfms, cpu); - - if (!strcmp(crypto_comp_name(tfm), alg_name)) { - pos->users++; - return tfms; - } - } - - pos = kmalloc(sizeof(*pos), GFP_KERNEL); - if (!pos) - return NULL; - - pos->users = 1; - INIT_LIST_HEAD(&pos->list); - list_add(&pos->list, &ipcomp_tfms_list); - - pos->tfms = tfms = alloc_percpu(struct crypto_comp *); - if (!tfms) - goto error; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto error; - *per_cpu_ptr(tfms, cpu) = tfm; - } - - return tfms; - -error: - ipcomp_free_tfms(tfms); - return NULL; -} - -static void ipcomp_free_data(struct ipcomp_data *ipcd) +static int ipcomp4_init_state(struct xfrm_state *x) { - if (ipcd->tfms) - ipcomp_free_tfms(ipcd->tfms); - ipcomp_free_scratches(); -} - -static void ipcomp_destroy(struct xfrm_state *x) -{ - struct ipcomp_data *ipcd = x->data; - if (!ipcd) - return; - xfrm_state_delete_tunnel(x); - mutex_lock(&ipcomp_resource_mutex); - ipcomp_free_data(ipcd); - mutex_unlock(&ipcomp_resource_mutex); - kfree(ipcd); -} - -static int ipcomp_init_state(struct xfrm_state *x) -{ - int err; - struct ipcomp_data *ipcd; - struct xfrm_algo_desc *calg_desc; - - err = -EINVAL; - if (!x->calg) - goto out; - - if (x->encap) - goto out; + int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { @@ -402,40 +117,22 @@ static int ipcomp_init_state(struct xfrm_state *x) goto out; } - err = -ENOMEM; - ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); - if (!ipcd) + err = ipcomp_init_state(x); + if (err) goto out; - mutex_lock(&ipcomp_resource_mutex); - if (!ipcomp_alloc_scratches()) - goto error; - - ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name); - if (!ipcd->tfms) - goto error; - mutex_unlock(&ipcomp_resource_mutex); - if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) goto error_tunnel; } - calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0); - BUG_ON(!calg_desc); - ipcd->threshold = calg_desc->uinfo.comp.threshold; - x->data = ipcd; err = 0; out: return err; error_tunnel: - mutex_lock(&ipcomp_resource_mutex); -error: - ipcomp_free_data(ipcd); - mutex_unlock(&ipcomp_resource_mutex); - kfree(ipcd); + ipcomp_destroy(x); goto out; } @@ -443,7 +140,7 @@ static const struct xfrm_type ipcomp_type = { .description = "IPCOMP4", .owner = THIS_MODULE, .proto = IPPROTO_COMP, - .init_state = ipcomp_init_state, + .init_state = ipcomp4_init_state, .destructor = ipcomp_destroy, .input = ipcomp_input, .output = ipcomp_output @@ -481,7 +178,7 @@ module_init(ipcomp4_init); module_exit(ipcomp4_fini); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); +MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp/IPv4) - RFC3173"); MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 89dee4346f60..42065fff46c4 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1,6 +1,4 @@ /* - * $Id: ipconfig.c,v 1.46 2002/02/01 22:01:04 davem Exp $ - * * Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or * user-supplied information to configure own IP address and routes. * @@ -434,7 +432,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt unsigned char *sha, *tha; /* s for "source", t for "target" */ struct ic_device *d; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto drop; if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) @@ -710,14 +708,14 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d struct net_device *dev = d->dev; struct sk_buff *skb; struct bootp_pkt *b; - int hh_len = LL_RESERVED_SPACE(dev); struct iphdr *h; /* Allocate packet */ - skb = alloc_skb(sizeof(struct bootp_pkt) + hh_len + 15, GFP_KERNEL); + skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15, + GFP_KERNEL); if (!skb) return; - skb_reserve(skb, hh_len); + skb_reserve(skb, LL_RESERVED_SPACE(dev)); b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt)); memset(b, 0, sizeof(struct bootp_pkt)); @@ -854,7 +852,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str struct ic_device *d; int len, ext_len; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto drop; /* Perform verifications before taking the lock. */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 149111f08e8d..4c6d2caf9203 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -1,8 +1,6 @@ /* * Linux NET3: IP/IP protocol decoder. * - * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $ - * * Authors: * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 * @@ -278,9 +276,8 @@ static void ipip_tunnel_uninit(struct net_device *dev) static int ipip_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ @@ -337,133 +334,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipip_lock); return err; -#else - struct iphdr *iph = (struct iphdr*)dp; - int hlen = iph->ihl<<2; - struct iphdr *eiph; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - __be32 rel_info = 0; - __u32 n = 0; - struct sk_buff *skb2; - struct flowi fl; - struct rtable *rt; - - if (len < hlen + sizeof(struct iphdr)) - return 0; - eiph = (struct iphdr*)(dp + hlen); - - switch (type) { - default: - return 0; - case ICMP_PARAMETERPROB: - n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; - if (n < hlen) - return 0; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMP_PARAMETERPROB; - rel_info = htonl((n - hlen) << 24); - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return 0; - case ICMP_FRAG_NEEDED: - /* And it is the only really necessary thing :-) */ - n = ntohs(icmp_hdr(skb)->un.frag.mtu); - if (n < hlen+68) - return 0; - n -= hlen; - /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (n > ntohs(eiph->tot_len)) - return 0; - rel_info = htonl(n); - break; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return 0; - break; - } - - /* Prepare fake skb to feed it to icmp_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return 0; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)eiph); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - memset(&fl, 0, sizeof(fl)); - fl.fl4_daddr = eiph->saddr; - fl.fl4_tos = RT_TOS(eiph->tos); - fl.proto = IPPROTO_IPIP; - if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) { - kfree_skb(skb2); - return 0; - } - skb2->dev = rt->u.dst.dev; - - /* route "incoming" packet */ - if (rt->rt_flags&RTCF_LOCAL) { - ip_rt_put(rt); - rt = NULL; - fl.fl4_daddr = eiph->daddr; - fl.fl4_src = eiph->saddr; - fl.fl4_tos = eiph->tos; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || - rt->u.dst.dev->type != ARPHRD_TUNNEL) { - ip_rt_put(rt); - kfree_skb(skb2); - return 0; - } - } else { - ip_rt_put(rt); - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb2->dst->dev->type != ARPHRD_TUNNEL) { - kfree_skb(skb2); - return 0; - } - } - - /* change mtu on this route */ - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (n > dst_mtu(skb2->dst)) { - kfree_skb(skb2); - return 0; - } - skb2->dst->ops->update_pmtu(skb2->dst, n); - } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = netdev_priv(skb2->dev); - if (t->parms.iph.ttl) { - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - } - } - - icmp_send(skb2, rel_type, rel_code, rel_info); - kfree_skb(skb2); - return 0; -#endif } static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, @@ -496,8 +366,8 @@ static int ipip_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IP); skb->pkt_type = PACKET_HOST; - tunnel->stat.rx_packets++; - tunnel->stat.rx_bytes += skb->len; + tunnel->dev->stats.rx_packets++; + tunnel->dev->stats.rx_bytes += skb->len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; @@ -520,7 +390,7 @@ static int ipip_rcv(struct sk_buff *skb) static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->stat; + struct net_device_stats *stats = &tunnel->dev->stats; struct iphdr *tiph = &tunnel->parms.iph; u8 tos = tunnel->parms.iph.tos; __be16 df = tiph->frag_off; @@ -533,7 +403,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int mtu; if (tunnel->recursion++) { - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -546,7 +416,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (!dst) { /* NBMA tunnel */ if ((rt = skb->rtable) == NULL) { - tunnel->stat.tx_fifo_errors++; + stats->tx_fifo_errors++; goto tx_error; } if ((dst = rt->rt_gateway) == 0) @@ -561,7 +431,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .tos = RT_TOS(tos) } }, .proto = IPPROTO_IPIP }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error_icmp; } } @@ -569,7 +439,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (tdev == dev) { ip_rt_put(rt); - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -579,7 +449,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; if (mtu < 68) { - tunnel->stat.collisions++; + stats->collisions++; ip_rt_put(rt); goto tx_error; } @@ -813,11 +683,6 @@ done: return err; } -static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev) -{ - return &(((struct ip_tunnel*)netdev_priv(dev))->stat); -} - static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) @@ -830,7 +695,6 @@ static void ipip_tunnel_setup(struct net_device *dev) { dev->uninit = ipip_tunnel_uninit; dev->hard_start_xmit = ipip_tunnel_xmit; - dev->get_stats = ipip_tunnel_get_stats; dev->do_ioctl = ipip_tunnel_ioctl; dev->change_mtu = ipip_tunnel_change_mtu; dev->destructor = free_netdev; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 11700a4dcd95..c519b8d30eee 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -9,8 +9,6 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $ - * * Fixes: * Michael Chastain : Incorrect size of copying. * Alan Cox : Added the cache manager code @@ -120,6 +118,31 @@ static struct timer_list ipmr_expire_timer; /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ +static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) +{ + dev_close(dev); + + dev = __dev_get_by_name(&init_net, "tunl0"); + if (dev) { + struct ifreq ifr; + mm_segment_t oldfs; + struct ip_tunnel_parm p; + + memset(&p, 0, sizeof(p)); + p.iph.daddr = v->vifc_rmt_addr.s_addr; + p.iph.saddr = v->vifc_lcl_addr.s_addr; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPIP; + sprintf(p.name, "dvmrp%d", v->vifc_vifi); + ifr.ifr_ifru.ifru_data = (__force void __user *)&p; + + oldfs = get_fs(); set_fs(KERNEL_DS); + dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL); + set_fs(oldfs); + } +} + static struct net_device *ipmr_new_tunnel(struct vifctl *v) { @@ -161,6 +184,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) if (dev_open(dev)) goto failure; + dev_hold(dev); } } return dev; @@ -181,26 +205,20 @@ static int reg_vif_num = -1; static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { read_lock(&mrt_lock); - ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(dev))->tx_packets++; + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; } -static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) -{ - return (struct net_device_stats*)netdev_priv(dev); -} - static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; dev->hard_start_xmit = reg_vif_xmit; - dev->get_stats = reg_vif_get_stats; dev->destructor = free_netdev; } @@ -209,8 +227,7 @@ static struct net_device *ipmr_reg_vif(void) struct net_device *dev; struct in_device *in_dev; - dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg", - reg_vif_setup); + dev = alloc_netdev(0, "pimreg", reg_vif_setup); if (dev == NULL) return NULL; @@ -234,6 +251,8 @@ static struct net_device *ipmr_reg_vif(void) if (dev_open(dev)) goto failure; + dev_hold(dev); + return dev; failure: @@ -248,9 +267,10 @@ failure: /* * Delete a VIF entry + * @notify: Set to 1, if the caller is a notifier_call */ -static int vif_delete(int vifi) +static int vif_delete(int vifi, int notify) { struct vif_device *v; struct net_device *dev; @@ -293,7 +313,7 @@ static int vif_delete(int vifi) ip_rt_multicast_event(in_dev); } - if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) + if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) unregister_netdevice(dev); dev_put(dev); @@ -398,6 +418,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) struct vif_device *v = &vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; + int err; /* Is vif busy ? */ if (VIF_EXISTS(vifi)) @@ -415,18 +436,34 @@ static int vif_add(struct vifctl *vifc, int mrtsock) dev = ipmr_reg_vif(); if (!dev) return -ENOBUFS; + err = dev_set_allmulti(dev, 1); + if (err) { + unregister_netdevice(dev); + dev_put(dev); + return err; + } break; #endif case VIFF_TUNNEL: dev = ipmr_new_tunnel(vifc); if (!dev) return -ENOBUFS; + err = dev_set_allmulti(dev, 1); + if (err) { + ipmr_del_tunnel(dev, vifc); + dev_put(dev); + return err; + } break; case 0: dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); if (!dev) return -EADDRNOTAVAIL; - dev_put(dev); + err = dev_set_allmulti(dev, 1); + if (err) { + dev_put(dev); + return err; + } break; default: return -EINVAL; @@ -435,7 +472,6 @@ static int vif_add(struct vifctl *vifc, int mrtsock) if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) return -EADDRNOTAVAIL; IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; - dev_set_allmulti(dev, +1); ip_rt_multicast_event(in_dev); /* @@ -458,7 +494,6 @@ static int vif_add(struct vifctl *vifc, int mrtsock) /* And finish update writing critical data */ write_lock_bh(&mrt_lock); - dev_hold(dev); v->dev=dev; #ifdef CONFIG_IP_PIMSM if (v->flags&VIFF_REGISTER) @@ -805,7 +840,7 @@ static void mroute_clean_tables(struct sock *sk) */ for (i=0; i<maxvif; i++) { if (!(vif_table[i].flags&VIFF_STATIC)) - vif_delete(i); + vif_delete(i, 0); } /* @@ -918,7 +953,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt if (optname==MRT_ADD_VIF) { ret = vif_add(&vif, sk==mroute_socket); } else { - ret = vif_delete(vif.vifc_vifi); + ret = vif_delete(vif.vifc_vifi, 0); } rtnl_unlock(); return ret; @@ -1089,7 +1124,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v struct vif_device *v; int ct; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event != NETDEV_UNREGISTER) @@ -1097,7 +1132,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v v=&vif_table[0]; for (ct=0;ct<maxvif;ct++,v++) { if (v->dev==dev) - vif_delete(ct); + vif_delete(ct, 1); } return NOTIFY_DONE; } @@ -1143,7 +1178,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -1170,8 +1205,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out+=skb->len; - ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++; + vif->dev->stats.tx_bytes += skb->len; + vif->dev->stats.tx_packets++; ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); kfree_skb(skb); return; @@ -1206,7 +1241,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) to blackhole. */ - IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; } @@ -1230,8 +1265,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (vif->flags & VIFF_TUNNEL) { ip_encap(skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ - ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++; - ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len; + vif->dev->stats.tx_packets++; + vif->dev->stats.tx_bytes += skb->len; } IPCB(skb)->flags |= IPSKB_FORWARDED; @@ -1487,8 +1522,8 @@ int pim_rcv_v1(struct sk_buff * skb) skb->pkt_type = PACKET_HOST; dst_release(skb->dst); skb->dst = NULL; - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; + reg_dev->stats.rx_bytes += skb->len; + reg_dev->stats.rx_packets++; nf_reset(skb); netif_rx(skb); dev_put(reg_dev); @@ -1542,8 +1577,8 @@ static int pim_rcv(struct sk_buff * skb) skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; + reg_dev->stats.rx_bytes += skb->len; + reg_dev->stats.rx_packets++; skb->dst = NULL; nf_reset(skb); netif_rx(skb); @@ -1887,16 +1922,36 @@ static struct net_protocol pim_protocol = { * Setup for IP multicast routing */ -void __init ip_mr_init(void) +int __init ip_mr_init(void) { + int err; + mrt_cachep = kmem_cache_create("ip_mrt_cache", sizeof(struct mfc_cache), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + if (!mrt_cachep) + return -ENOMEM; + setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); - register_netdevice_notifier(&ip_mr_notifier); + err = register_netdevice_notifier(&ip_mr_notifier); + if (err) + goto reg_notif_fail; #ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops); - proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops); + err = -ENOMEM; + if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops)) + goto proc_vif_fail; + if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops)) + goto proc_cache_fail; #endif + return 0; +reg_notif_fail: + kmem_cache_destroy(mrt_cachep); +#ifdef CONFIG_PROC_FS +proc_vif_fail: + unregister_netdevice_notifier(&ip_mr_notifier); +proc_cache_fail: + proc_net_remove(&init_net, "ip_mr_vif"); +#endif + return err; } diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 535abe0c45e7..1f1897a1a702 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -1,8 +1,6 @@ /* * ip_vs_app.c: Application module support for IPVS * - * Version: $Id: ip_vs_app.c,v 1.17 2003/03/22 06:31:21 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 65f1ba112752..f8bdae47a77f 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 963981a9d501..a7879eafc3b5 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> @@ -993,7 +991,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, == sysctl_ip_vs_sync_threshold[0])) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || - (cp->state == IP_VS_TCP_S_CLOSE))))) + (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || + (cp->state == IP_VS_TCP_S_TIME_WAIT))))) ip_vs_sync_conn(cp); cp->old_state = cp->state; diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 94c5767c8e01..9a5ace0b4dd6 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index dcf5d46aaa5e..8afc1503ed20 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c @@ -1,8 +1,6 @@ /* * IPVS: Destination Hashing scheduling module * - * Version: $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@gnuchina.org> * * Inspired by the consistent hashing scheduler patch from diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index dfa0d713c801..bc04eedd6dbb 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c @@ -1,8 +1,6 @@ /* * ip_vs_est.c: simple rate estimator for IPVS * - * Version: $Id: ip_vs_est.c,v 1.4 2002/11/30 01:50:35 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index 59aa166b7678..c1c758e4f733 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -1,8 +1,6 @@ /* * ip_vs_ftp.c: IPVS ftp application module * - * Version: $Id: ip_vs_ftp.c,v 1.13 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * Changes: diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 3888642706ad..0efa3db4b180 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -1,8 +1,6 @@ /* * IPVS: Locality-Based Least-Connection scheduling module * - * Version: $Id: ip_vs_lblc.c,v 1.10 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@gnuchina.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index daa260eb21cf..8e3bbeb45138 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -1,8 +1,6 @@ /* * IPVS: Locality-Based Least-Connection with Replication scheduler * - * Version: $Id: ip_vs_lblcr.c,v 1.11 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@gnuchina.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c index d88fef90a641..ac9f08e065d5 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/ipv4/ipvs/ip_vs_lc.c @@ -1,8 +1,6 @@ /* * IPVS: Least-Connection Scheduling module * - * Version: $Id: ip_vs_lc.c,v 1.10 2003/04/18 09:03:16 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c index bc2a9e5f2a7b..a46bf258d420 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/ipv4/ipvs/ip_vs_nq.c @@ -1,8 +1,6 @@ /* * IPVS: Never Queue scheduling module * - * Version: $Id: ip_vs_nq.c,v 1.2 2003/06/08 09:31:19 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index 4b1c16cbb16b..876714f23d65 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -1,8 +1,6 @@ /* * ip_vs_proto.c: transport protocol load balancing support for IPVS * - * Version: $Id: ip_vs_proto.c,v 1.2 2003/04/18 09:03:16 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c index 4bf835e1d86d..73e0ea87c1f5 100644 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ b/net/ipv4/ipvs/ip_vs_proto_ah.c @@ -1,8 +1,6 @@ /* * ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS * - * Version: $Id: ip_vs_proto_ah.c,v 1.1 2003/07/04 15:04:37 wensong Exp $ - * * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 * Wensong Zhang <wensong@linuxvirtualserver.org> * diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c index db6a6b7b1a0b..21d70c8ffa54 100644 --- a/net/ipv4/ipvs/ip_vs_proto_esp.c +++ b/net/ipv4/ipvs/ip_vs_proto_esp.c @@ -1,8 +1,6 @@ /* * ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS * - * Version: $Id: ip_vs_proto_esp.c,v 1.1 2003/07/04 15:04:37 wensong Exp $ - * * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 * Wensong Zhang <wensong@linuxvirtualserver.org> * diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index b83dc14b0a4d..d0ea467986a0 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -1,8 +1,6 @@ /* * ip_vs_proto_tcp.c: TCP load balancing support for IPVS * - * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 75771cb3cd6f..c6be5d56823f 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -1,8 +1,6 @@ /* * ip_vs_proto_udp.c: UDP load balancing support for IPVS * - * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c index 433f8a947924..c8db12d39e61 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/ipv4/ipvs/ip_vs_rr.c @@ -1,8 +1,6 @@ /* * IPVS: Round-Robin Scheduling module * - * Version: $Id: ip_vs_rr.c,v 1.9 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c index 121a32b1b756..b64767309855 100644 --- a/net/ipv4/ipvs/ip_vs_sched.c +++ b/net/ipv4/ipvs/ip_vs_sched.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_sched.c,v 1.13 2003/05/10 03:05:23 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c index dd7c128f9db3..2a7d31358181 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/ipv4/ipvs/ip_vs_sed.c @@ -1,8 +1,6 @@ /* * IPVS: Shortest Expected Delay scheduling module * - * Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index 1b25b00ef1e1..b8fdfac65001 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c @@ -1,8 +1,6 @@ /* * IPVS: Source Hashing scheduling module * - * Version: $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@gnuchina.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index eff54efe0351..45e9bd96c286 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * ip_vs_sync: sync connection info from master load balancer to backups @@ -29,10 +27,12 @@ #include <linux/in.h> #include <linux/igmp.h> /* for ip_mc_join_group */ #include <linux/udp.h> +#include <linux/err.h> +#include <linux/kthread.h> +#include <linux/wait.h> #include <net/ip.h> #include <net/sock.h> -#include <asm/uaccess.h> /* for get_fs and set_fs */ #include <net/ip_vs.h> @@ -68,8 +68,8 @@ struct ip_vs_sync_conn_options { }; struct ip_vs_sync_thread_data { - struct completion *startup; - int state; + struct socket *sock; + char *buf; }; #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) @@ -140,18 +140,19 @@ volatile int ip_vs_backup_syncid = 0; char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; +/* sync daemon tasks */ +static struct task_struct *sync_master_thread; +static struct task_struct *sync_backup_thread; + /* multicast addr */ -static struct sockaddr_in mcast_addr; +static struct sockaddr_in mcast_addr = { + .sin_family = AF_INET, + .sin_port = __constant_htons(IP_VS_SYNC_PORT), + .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP), +}; -static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) -{ - spin_lock(&ip_vs_sync_lock); - list_add_tail(&sb->list, &ip_vs_sync_queue); - spin_unlock(&ip_vs_sync_lock); -} - -static inline struct ip_vs_sync_buff * sb_dequeue(void) +static inline struct ip_vs_sync_buff *sb_dequeue(void) { struct ip_vs_sync_buff *sb; @@ -195,6 +196,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) kfree(sb); } +static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) +{ + spin_lock(&ip_vs_sync_lock); + if (ip_vs_sync_state & IP_VS_STATE_MASTER) + list_add_tail(&sb->list, &ip_vs_sync_queue); + else + ip_vs_sync_buff_release(sb); + spin_unlock(&ip_vs_sync_lock); +} + /* * Get the current sync buffer if it has been created for more * than the specified time or the specified time is zero. @@ -574,14 +585,17 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) static struct socket * make_send_sock(void) { struct socket *sock; + int result; /* First create a socket */ - if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) { + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (result < 0) { IP_VS_ERR("Error during creation of socket; terminating\n"); - return NULL; + return ERR_PTR(result); } - if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) { + result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); + if (result < 0) { IP_VS_ERR("Error setting outbound mcast interface\n"); goto error; } @@ -589,14 +603,15 @@ static struct socket * make_send_sock(void) set_mcast_loop(sock->sk, 0); set_mcast_ttl(sock->sk, 1); - if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) { + result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); + if (result < 0) { IP_VS_ERR("Error binding address of the mcast interface\n"); goto error; } - if (sock->ops->connect(sock, - (struct sockaddr*)&mcast_addr, - sizeof(struct sockaddr), 0) < 0) { + result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, + sizeof(struct sockaddr), 0); + if (result < 0) { IP_VS_ERR("Error connecting to the multicast addr\n"); goto error; } @@ -605,7 +620,7 @@ static struct socket * make_send_sock(void) error: sock_release(sock); - return NULL; + return ERR_PTR(result); } @@ -615,27 +630,30 @@ static struct socket * make_send_sock(void) static struct socket * make_receive_sock(void) { struct socket *sock; + int result; /* First create a socket */ - if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) { + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (result < 0) { IP_VS_ERR("Error during creation of socket; terminating\n"); - return NULL; + return ERR_PTR(result); } /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = 1; - if (sock->ops->bind(sock, - (struct sockaddr*)&mcast_addr, - sizeof(struct sockaddr)) < 0) { + result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, + sizeof(struct sockaddr)); + if (result < 0) { IP_VS_ERR("Error binding to the multicast addr\n"); goto error; } /* join the multicast group */ - if (join_mcast_group(sock->sk, - (struct in_addr*)&mcast_addr.sin_addr, - ip_vs_backup_mcast_ifn) < 0) { + result = join_mcast_group(sock->sk, + (struct in_addr *) &mcast_addr.sin_addr, + ip_vs_backup_mcast_ifn); + if (result < 0) { IP_VS_ERR("Error joining to the multicast group\n"); goto error; } @@ -644,7 +662,7 @@ static struct socket * make_receive_sock(void) error: sock_release(sock); - return NULL; + return ERR_PTR(result); } @@ -702,44 +720,29 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) } -static DECLARE_WAIT_QUEUE_HEAD(sync_wait); -static pid_t sync_master_pid = 0; -static pid_t sync_backup_pid = 0; - -static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait); -static int stop_master_sync = 0; -static int stop_backup_sync = 0; - -static void sync_master_loop(void) +static int sync_thread_master(void *data) { - struct socket *sock; + struct ip_vs_sync_thread_data *tinfo = data; struct ip_vs_sync_buff *sb; - /* create the sending multicast socket */ - sock = make_send_sock(); - if (!sock) - return; - IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, " "syncid = %d\n", ip_vs_master_mcast_ifn, ip_vs_master_syncid); - for (;;) { - while ((sb=sb_dequeue())) { - ip_vs_send_sync_msg(sock, sb->mesg); + while (!kthread_should_stop()) { + while ((sb = sb_dequeue())) { + ip_vs_send_sync_msg(tinfo->sock, sb->mesg); ip_vs_sync_buff_release(sb); } /* check if entries stay in curr_sb for 2 seconds */ - if ((sb = get_curr_sync_buff(2*HZ))) { - ip_vs_send_sync_msg(sock, sb->mesg); + sb = get_curr_sync_buff(2 * HZ); + if (sb) { + ip_vs_send_sync_msg(tinfo->sock, sb->mesg); ip_vs_sync_buff_release(sb); } - if (stop_master_sync) - break; - - msleep_interruptible(1000); + schedule_timeout_interruptible(HZ); } /* clean up the sync_buff queue */ @@ -753,267 +756,175 @@ static void sync_master_loop(void) } /* release the sending multicast socket */ - sock_release(sock); + sock_release(tinfo->sock); + kfree(tinfo); + + return 0; } -static void sync_backup_loop(void) +static int sync_thread_backup(void *data) { - struct socket *sock; - char *buf; + struct ip_vs_sync_thread_data *tinfo = data; int len; - if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) { - IP_VS_ERR("sync_backup_loop: kmalloc error\n"); - return; - } - - /* create the receiving multicast socket */ - sock = make_receive_sock(); - if (!sock) - goto out; - IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, " "syncid = %d\n", ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); - for (;;) { - /* do you have data now? */ - while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) { - if ((len = - ip_vs_receive(sock, buf, - sync_recv_mesg_maxlen)) <= 0) { + while (!kthread_should_stop()) { + wait_event_interruptible(*tinfo->sock->sk->sk_sleep, + !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) + || kthread_should_stop()); + + /* do we have data now? */ + while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { + len = ip_vs_receive(tinfo->sock, tinfo->buf, + sync_recv_mesg_maxlen); + if (len <= 0) { IP_VS_ERR("receiving message error\n"); break; } - /* disable bottom half, because it accessed the data + + /* disable bottom half, because it accesses the data shared by softirq while getting/creating conns */ local_bh_disable(); - ip_vs_process_message(buf, len); + ip_vs_process_message(tinfo->buf, len); local_bh_enable(); } - - if (stop_backup_sync) - break; - - msleep_interruptible(1000); } /* release the sending multicast socket */ - sock_release(sock); + sock_release(tinfo->sock); + kfree(tinfo->buf); + kfree(tinfo); - out: - kfree(buf); + return 0; } -static void set_sync_pid(int sync_state, pid_t sync_pid) -{ - if (sync_state == IP_VS_STATE_MASTER) - sync_master_pid = sync_pid; - else if (sync_state == IP_VS_STATE_BACKUP) - sync_backup_pid = sync_pid; -} - -static void set_stop_sync(int sync_state, int set) +int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) { - if (sync_state == IP_VS_STATE_MASTER) - stop_master_sync = set; - else if (sync_state == IP_VS_STATE_BACKUP) - stop_backup_sync = set; - else { - stop_master_sync = set; - stop_backup_sync = set; - } -} + struct ip_vs_sync_thread_data *tinfo; + struct task_struct **realtask, *task; + struct socket *sock; + char *name, *buf = NULL; + int (*threadfn)(void *data); + int result = -ENOMEM; -static int sync_thread(void *startup) -{ - DECLARE_WAITQUEUE(wait, current); - mm_segment_t oldmm; - int state; - const char *name; - struct ip_vs_sync_thread_data *tinfo = startup; + IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); + IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", + sizeof(struct ip_vs_sync_conn)); - /* increase the module use count */ - ip_vs_use_count_inc(); + if (state == IP_VS_STATE_MASTER) { + if (sync_master_thread) + return -EEXIST; - if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) { - state = IP_VS_STATE_MASTER; + strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, + sizeof(ip_vs_master_mcast_ifn)); + ip_vs_master_syncid = syncid; + realtask = &sync_master_thread; name = "ipvs_syncmaster"; - } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) { - state = IP_VS_STATE_BACKUP; + threadfn = sync_thread_master; + sock = make_send_sock(); + } else if (state == IP_VS_STATE_BACKUP) { + if (sync_backup_thread) + return -EEXIST; + + strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, + sizeof(ip_vs_backup_mcast_ifn)); + ip_vs_backup_syncid = syncid; + realtask = &sync_backup_thread; name = "ipvs_syncbackup"; + threadfn = sync_thread_backup; + sock = make_receive_sock(); } else { - IP_VS_BUG(); - ip_vs_use_count_dec(); return -EINVAL; } - daemonize(name); - - oldmm = get_fs(); - set_fs(KERNEL_DS); - - /* Block all signals */ - spin_lock_irq(¤t->sighand->siglock); - siginitsetinv(¤t->blocked, 0); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + if (IS_ERR(sock)) { + result = PTR_ERR(sock); + goto out; + } - /* set the maximum length of sync message */ set_sync_mesg_maxlen(state); + if (state == IP_VS_STATE_BACKUP) { + buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); + if (!buf) + goto outsocket; + } - /* set up multicast address */ - mcast_addr.sin_family = AF_INET; - mcast_addr.sin_port = htons(IP_VS_SYNC_PORT); - mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP); - - add_wait_queue(&sync_wait, &wait); - - set_sync_pid(state, task_pid_nr(current)); - complete(tinfo->startup); - - /* - * once we call the completion queue above, we should - * null out that reference, since its allocated on the - * stack of the creating kernel thread - */ - tinfo->startup = NULL; - - /* processing master/backup loop here */ - if (state == IP_VS_STATE_MASTER) - sync_master_loop(); - else if (state == IP_VS_STATE_BACKUP) - sync_backup_loop(); - else IP_VS_BUG(); - - remove_wait_queue(&sync_wait, &wait); - - /* thread exits */ - - /* - * If we weren't explicitly stopped, then we - * exited in error, and should undo our state - */ - if ((!stop_master_sync) && (!stop_backup_sync)) - ip_vs_sync_state -= tinfo->state; + tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); + if (!tinfo) + goto outbuf; - set_sync_pid(state, 0); - IP_VS_INFO("sync thread stopped!\n"); + tinfo->sock = sock; + tinfo->buf = buf; - set_fs(oldmm); + task = kthread_run(threadfn, tinfo, name); + if (IS_ERR(task)) { + result = PTR_ERR(task); + goto outtinfo; + } - /* decrease the module use count */ - ip_vs_use_count_dec(); + /* mark as active */ + *realtask = task; + ip_vs_sync_state |= state; - set_stop_sync(state, 0); - wake_up(&stop_sync_wait); + /* increase the module use count */ + ip_vs_use_count_inc(); - /* - * we need to free the structure that was allocated - * for us in start_sync_thread - */ - kfree(tinfo); return 0; -} - - -static int fork_sync_thread(void *startup) -{ - pid_t pid; - - /* fork the sync thread here, then the parent process of the - sync thread is the init process after this thread exits. */ - repeat: - if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) { - IP_VS_ERR("could not create sync_thread due to %d... " - "retrying.\n", pid); - msleep_interruptible(1000); - goto repeat; - } - return 0; +outtinfo: + kfree(tinfo); +outbuf: + kfree(buf); +outsocket: + sock_release(sock); +out: + return result; } -int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) +int stop_sync_thread(int state) { - DECLARE_COMPLETION_ONSTACK(startup); - pid_t pid; - struct ip_vs_sync_thread_data *tinfo; - - if ((state == IP_VS_STATE_MASTER && sync_master_pid) || - (state == IP_VS_STATE_BACKUP && sync_backup_pid)) - return -EEXIST; - - /* - * Note that tinfo will be freed in sync_thread on exit - */ - tinfo = kmalloc(sizeof(struct ip_vs_sync_thread_data), GFP_KERNEL); - if (!tinfo) - return -ENOMEM; - IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); - IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", - sizeof(struct ip_vs_sync_conn)); - ip_vs_sync_state |= state; if (state == IP_VS_STATE_MASTER) { - strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, - sizeof(ip_vs_master_mcast_ifn)); - ip_vs_master_syncid = syncid; - } else { - strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, - sizeof(ip_vs_backup_mcast_ifn)); - ip_vs_backup_syncid = syncid; - } - - tinfo->state = state; - tinfo->startup = &startup; - - repeat: - if ((pid = kernel_thread(fork_sync_thread, tinfo, 0)) < 0) { - IP_VS_ERR("could not create fork_sync_thread due to %d... " - "retrying.\n", pid); - msleep_interruptible(1000); - goto repeat; - } - - wait_for_completion(&startup); - - return 0; -} + if (!sync_master_thread) + return -ESRCH; + IP_VS_INFO("stopping master sync thread %d ...\n", + task_pid_nr(sync_master_thread)); -int stop_sync_thread(int state) -{ - DECLARE_WAITQUEUE(wait, current); + /* + * The lock synchronizes with sb_queue_tail(), so that we don't + * add sync buffers to the queue, when we are already in + * progress of stopping the master sync daemon. + */ - if ((state == IP_VS_STATE_MASTER && !sync_master_pid) || - (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) - return -ESRCH; + spin_lock(&ip_vs_sync_lock); + ip_vs_sync_state &= ~IP_VS_STATE_MASTER; + spin_unlock(&ip_vs_sync_lock); + kthread_stop(sync_master_thread); + sync_master_thread = NULL; + } else if (state == IP_VS_STATE_BACKUP) { + if (!sync_backup_thread) + return -ESRCH; + + IP_VS_INFO("stopping backup sync thread %d ...\n", + task_pid_nr(sync_backup_thread)); + + ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; + kthread_stop(sync_backup_thread); + sync_backup_thread = NULL; + } else { + return -EINVAL; + } - IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); - IP_VS_INFO("stopping sync thread %d ...\n", - (state == IP_VS_STATE_MASTER) ? - sync_master_pid : sync_backup_pid); - - __set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&stop_sync_wait, &wait); - set_stop_sync(state, 1); - ip_vs_sync_state -= state; - wake_up(&sync_wait); - schedule(); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&stop_sync_wait, &wait); - - /* Note: no need to reap the sync thread, because its parent - process is the init process */ - - if ((state == IP_VS_STATE_MASTER && stop_master_sync) || - (state == IP_VS_STATE_BACKUP && stop_backup_sync)) - IP_VS_BUG(); + /* decrease the module use count */ + ip_vs_use_count_dec(); return 0; } diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c index 8a9d913261d8..772c3cb4eca1 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/ipv4/ipvs/ip_vs_wlc.c @@ -1,8 +1,6 @@ /* * IPVS: Weighted Least-Connection Scheduling module * - * Version: $Id: ip_vs_wlc.c,v 1.13 2003/04/18 09:03:16 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c index 85c680add6df..1d6932d7dc97 100644 --- a/net/ipv4/ipvs/ip_vs_wrr.c +++ b/net/ipv4/ipvs/ip_vs_wrr.c @@ -1,8 +1,6 @@ /* * IPVS: Weighted Round-Robin Scheduling module * - * Version: $Id: ip_vs_wrr.c,v 1.12 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index f63006caea03..9892d4aca42e 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -1,8 +1,6 @@ /* * ip_vs_xmit.c: various packet transmitters for IPVS * - * Version: $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 2767841a8cef..90eb7cb47e77 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -213,8 +213,7 @@ config IP_NF_TARGET_NETMAP help NETMAP is an implementation of static 1:1 NAT mapping of network addresses. It maps the network address part, while keeping the host - address part intact. It is similar to Fast NAT, except that - Netfilter's connection tracking doesn't work well with Fast NAT. + address part intact. To compile it as a module, choose M here. If unsure, say N. @@ -365,6 +364,18 @@ config IP_NF_RAW If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +# security table for MAC policy +config IP_NF_SECURITY + tristate "Security table" + depends on IP_NF_IPTABLES + depends on SECURITY + depends on NETFILTER_ADVANCED + help + This option adds a `security' table to iptables, for use + with Mandatory Access Control (MAC) policy. + + If unsure, say N. + # ARP tables config IP_NF_ARPTABLES tristate "ARP tables support" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index d9b92fbf5579..3f31291f37ce 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o obj-$(CONFIG_NF_NAT) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o +obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o # matches obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 3be4d07e7ed9..082f5dd3156c 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -55,32 +55,53 @@ static struct xt_table packet_filter = { }; /* The work comes in here from netfilter.c */ -static unsigned int arpt_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int arpt_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return arpt_do_table(skb, hook, in, out, init_net.ipv4.arptable_filter); + return arpt_do_table(skb, hook, in, out, + dev_net(in)->ipv4.arptable_filter); +} + +static unsigned int arpt_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return arpt_do_table(skb, hook, in, out, + dev_net(out)->ipv4.arptable_filter); +} + +static unsigned int arpt_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return arpt_do_table(skb, hook, in, out, + dev_net(in)->ipv4.arptable_filter); } static struct nf_hook_ops arpt_ops[] __read_mostly = { { - .hook = arpt_hook, + .hook = arpt_in_hook, .owner = THIS_MODULE, .pf = NF_ARP, .hooknum = NF_ARP_IN, .priority = NF_IP_PRI_FILTER, }, { - .hook = arpt_hook, + .hook = arpt_out_hook, .owner = THIS_MODULE, .pf = NF_ARP, .hooknum = NF_ARP_OUT, .priority = NF_IP_PRI_FILTER, }, { - .hook = arpt_hook, + .hook = arpt_forward_hook, .owner = THIS_MODULE, .pf = NF_ARP, .hooknum = NF_ARP_FORWARD, diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 26a37cedcf2e..432ce9d1c11c 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -156,7 +156,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) case IPQ_COPY_META: case IPQ_COPY_NONE: size = NLMSG_SPACE(sizeof(*pmsg)); - data_len = 0; break; case IPQ_COPY_PACKET: @@ -224,8 +223,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: - if (skb) - kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip_queue: error creating packet message\n"); return NULL; @@ -480,7 +477,7 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* Drop any packets associated with the downed device */ diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 84c26dd27d81..0841aefaa503 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -120,7 +120,7 @@ static int masq_device_event(struct notifier_block *this, { const struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event == NETDEV_DOWN) { diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c new file mode 100644 index 000000000000..db6d312128e1 --- /dev/null +++ b/net/ipv4/netfilter/iptable_security.c @@ -0,0 +1,180 @@ +/* + * "security" table + * + * This is for use by Mandatory Access Control (MAC) security models, + * which need to be able to manage security policy in separate context + * to DAC. + * + * Based on iptable_mangle.c + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org> + * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <net/ip.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>"); +MODULE_DESCRIPTION("iptables security table, for MAC rules"); + +#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[3]; + struct ipt_error term; +} initial_table __net_initdata = { + .repl = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .num_entries = 4, + .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), + .hook_entry = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ipt_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, + }, + .underflow = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ipt_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, + }, + }, + .entries = { + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ + IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ + }, + .term = IPT_ERROR_INIT, /* ERROR */ +}; + +static struct xt_table security_table = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .lock = __RW_LOCK_UNLOCKED(security_table.lock), + .me = THIS_MODULE, + .af = AF_INET, +}; + +static unsigned int +ipt_local_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(skb, hook, in, out, + nf_local_in_net(in, out)->ipv4.iptable_security); +} + +static unsigned int +ipt_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(skb, hook, in, out, + nf_forward_net(in, out)->ipv4.iptable_security); +} + +static unsigned int +ipt_local_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* Somebody is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { + if (net_ratelimit()) + printk(KERN_INFO "iptable_security: ignoring short " + "SOCK_RAW packet.\n"); + return NF_ACCEPT; + } + return ipt_do_table(skb, hook, in, out, + nf_local_out_net(in, out)->ipv4.iptable_security); +} + +static struct nf_hook_ops ipt_ops[] __read_mostly = { + { + .hook = ipt_local_in_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_SECURITY, + }, + { + .hook = ipt_forward_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_FORWARD, + .priority = NF_IP_PRI_SECURITY, + }, + { + .hook = ipt_local_out_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_SECURITY, + }, +}; + +static int __net_init iptable_security_net_init(struct net *net) +{ + net->ipv4.iptable_security = + ipt_register_table(net, &security_table, &initial_table.repl); + + if (IS_ERR(net->ipv4.iptable_security)) + return PTR_ERR(net->ipv4.iptable_security); + + return 0; +} + +static void __net_exit iptable_security_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.iptable_security); +} + +static struct pernet_operations iptable_security_net_ops = { + .init = iptable_security_net_init, + .exit = iptable_security_net_exit, +}; + +static int __init iptable_security_init(void) +{ + int ret; + + ret = register_pernet_subsys(&iptable_security_net_ops); + if (ret < 0) + return ret; + + ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + if (ret < 0) + goto cleanup_table; + + return ret; + +cleanup_table: + unregister_pernet_subsys(&iptable_security_net_ops); + return ret; +} + +static void __exit iptable_security_fini(void) +{ + nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + unregister_pernet_subsys(&iptable_security_net_ops); +} + +module_init(iptable_security_init); +module_exit(iptable_security_fini); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 40a46d482490..3a020720e40b 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -18,19 +18,7 @@ #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> - -#ifdef CONFIG_NF_CT_ACCT -static unsigned int -seq_print_counters(struct seq_file *s, - const struct ip_conntrack_counter *counter) -{ - return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)counter->packets, - (unsigned long long)counter->bytes); -} -#else -#define seq_print_counters(x, y) 0 -#endif +#include <net/netfilter/nf_conntrack_acct.h> struct ct_iter_state { unsigned int bucket; @@ -127,7 +115,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL])) + if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) return -ENOSPC; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) @@ -138,7 +126,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY])) + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) return -ENOSPC; if (test_bit(IPS_ASSURED_BIT, &ct->status)) diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 78ab19accace..97791048fa9b 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -87,9 +87,8 @@ static int icmp_packet(struct nf_conn *ct, means this will only run once even if count hits zero twice (theoretically possible with SMP) */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count) - && del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + if (atomic_dec_and_test(&ct->proto.icmp.count)) + nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 04578593e100..6c6a3cba8d50 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -240,12 +240,12 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, This is only required for source (ie. NAT/masq) mappings. So far, we don't do local source mappings, so multiple manips not an issue. */ - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == IP_NAT_MANIP_SRC && + !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { if (find_appropriate_src(orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); - if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) - if (!nf_nat_used_tuple(tuple, ct)) - return; + if (!nf_nat_used_tuple(tuple, ct)) + return; } } @@ -556,7 +556,6 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) spin_lock_bh(&nf_nat_lock); hlist_del_rcu(&nat->bysource); - nat->ct = NULL; spin_unlock_bh(&nf_nat_lock); } @@ -570,8 +569,8 @@ static void nf_nat_move_storage(void *new, void *old) return; spin_lock_bh(&nf_nat_lock); - hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); new_nat->ct = ct; + hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); spin_unlock_bh(&nf_nat_lock); } diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c index 82e4c0e286b8..65e470bc6123 100644 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c @@ -36,7 +36,7 @@ sctp_manip_pkt(struct sk_buff *skb, sctp_sctphdr_t *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; - u32 crc32; + __be32 crc32; if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return false; @@ -61,7 +61,7 @@ sctp_manip_pkt(struct sk_buff *skb, crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb), crc32); crc32 = sctp_end_cksum(crc32); - hdr->checksum = htonl(crc32); + hdr->checksum = crc32; return true; } diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 4334d5cabc5b..14544320c545 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -318,11 +318,11 @@ static int mangle_content_len(struct sk_buff *skb, buffer, buflen); } -static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr, - unsigned int dataoff, unsigned int *datalen, - enum sdp_header_types type, - enum sdp_header_types term, - char *buffer, int buflen) +static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, + unsigned int dataoff, unsigned int *datalen, + enum sdp_header_types type, + enum sdp_header_types term, + char *buffer, int buflen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -330,9 +330,9 @@ static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr, if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term, &matchoff, &matchlen) <= 0) - return 0; + return -ENOENT; return mangle_packet(skb, dptr, datalen, matchoff, matchlen, - buffer, buflen); + buffer, buflen) ? 0 : -EINVAL; } static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, @@ -346,8 +346,8 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, unsigned int buflen; buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip)); - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, - buffer, buflen)) + if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, + buffer, buflen)) return 0; return mangle_content_len(skb, dptr, datalen); @@ -381,15 +381,27 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, /* Mangle session description owner and contact addresses */ buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip)); - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, + if (mangle_sdp_packet(skb, dptr, dataoff, datalen, SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, buffer, buflen)) return 0; - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, - SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, - buffer, buflen)) + switch (mangle_sdp_packet(skb, dptr, dataoff, datalen, + SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, + buffer, buflen)) { + case 0: + /* + * RFC 2327: + * + * Session description + * + * c=* (connection information - not required if included in all media) + */ + case -ENOENT: + break; + default: return 0; + } return mangle_content_len(skb, dptr, datalen); } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 5daefad3d193..ffeaffc3fffe 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -232,6 +232,11 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx, } } } + + /* don't trust len bigger than ctx buffer */ + if (*len > ctx->end - ctx->pointer) + return 0; + return 1; } @@ -250,6 +255,10 @@ static unsigned char asn1_header_decode(struct asn1_ctx *ctx, if (!asn1_length_decode(ctx, &def, &len)) return 0; + /* primitive shall be definite, indefinite shall be constructed */ + if (*con == ASN1_PRI && !def) + return 0; + if (def) *eoc = ctx->pointer + len; else @@ -430,10 +439,15 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, unsigned int *len) { unsigned long subid; - unsigned int size; unsigned long *optr; + size_t size; size = eoc - ctx->pointer + 1; + + /* first subid actually encodes first two subids */ + if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) + return 0; + *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); if (*oid == NULL) { if (net_ratelimit()) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 552169b41b16..834356ea99df 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -7,8 +7,6 @@ * PROC file system. It is mainly used for debugging and * statistics. * - * Version: $Id: proc.c,v 1.45 2001/05/16 16:45:35 davem Exp $ - * * Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de> * Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de> @@ -73,32 +71,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) static int sockstat_seq_open(struct inode *inode, struct file *file) { - int err; - struct net *net; - - err = -ENXIO; - net = get_proc_net(inode); - if (net == NULL) - goto err_net; - - err = single_open(file, sockstat_seq_show, net); - if (err < 0) - goto err_open; - - return 0; - -err_open: - put_net(net); -err_net: - return err; -} - -static int sockstat_seq_release(struct inode *inode, struct file *file) -{ - struct net *net = ((struct seq_file *)file->private_data)->private; - - put_net(net); - return single_release(inode, file); + return single_open_net(inode, file, sockstat_seq_show); } static const struct file_operations sockstat_seq_fops = { @@ -106,7 +79,7 @@ static const struct file_operations sockstat_seq_fops = { .open = sockstat_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = sockstat_seq_release, + .release = single_release_net, }; /* snmp items */ @@ -268,11 +241,12 @@ static void icmpmsg_put(struct seq_file *seq) int j, i, count; static int out[PERLINE]; + struct net *net = seq->private; count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { - if (snmp_fold_field((void **) icmpmsg_statistics, i)) + if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i)) out[count++] = i; if (count < PERLINE) continue; @@ -284,7 +258,7 @@ static void icmpmsg_put(struct seq_file *seq) seq_printf(seq, "\nIcmpMsg: "); for (j = 0; j < PERLINE; ++j) seq_printf(seq, " %lu", - snmp_fold_field((void **) icmpmsg_statistics, + snmp_fold_field((void **) net->mib.icmpmsg_statistics, out[j])); seq_putc(seq, '\n'); } @@ -296,7 +270,7 @@ static void icmpmsg_put(struct seq_file *seq) seq_printf(seq, "\nIcmpMsg:"); for (j = 0; j < count; ++j) seq_printf(seq, " %lu", snmp_fold_field((void **) - icmpmsg_statistics, out[j])); + net->mib.icmpmsg_statistics, out[j])); } #undef PERLINE @@ -305,6 +279,7 @@ static void icmpmsg_put(struct seq_file *seq) static void icmp_put(struct seq_file *seq) { int i; + struct net *net = seq->private; seq_puts(seq, "\nIcmp: InMsgs InErrors"); for (i=0; icmpmibmap[i].name != NULL; i++) @@ -313,18 +288,18 @@ static void icmp_put(struct seq_file *seq) for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu", - snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INMSGS), - snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INERRORS)); + snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), + snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **) icmpmsg_statistics, + snmp_fold_field((void **) net->mib.icmpmsg_statistics, icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", - snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTMSGS), - snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTERRORS)); + snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), + snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **) icmpmsg_statistics, + snmp_fold_field((void **) net->mib.icmpmsg_statistics, icmpmibmap[i].index | 0x100)); } @@ -334,6 +309,7 @@ static void icmp_put(struct seq_file *seq) static int snmp_seq_show(struct seq_file *seq, void *v) { int i; + struct net *net = seq->private; seq_puts(seq, "Ip: Forwarding DefaultTTL"); @@ -341,12 +317,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", - IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2, + IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, sysctl_ip_default_ttl); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)ip_statistics, + snmp_fold_field((void **)net->mib.ip_statistics, snmp4_ipstats_list[i].entry)); icmp_put(seq); /* RFC 2011 compatibility */ @@ -361,11 +337,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v) /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", - snmp_fold_field((void **)tcp_statistics, + snmp_fold_field((void **)net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); else seq_printf(seq, " %lu", - snmp_fold_field((void **)tcp_statistics, + snmp_fold_field((void **)net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); } @@ -376,7 +352,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)udp_statistics, + snmp_fold_field((void **)net->mib.udp_statistics, snmp4_udp_list[i].entry)); /* the UDP and UDP-Lite MIBs are the same */ @@ -387,7 +363,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)udplite_statistics, + snmp_fold_field((void **)net->mib.udplite_statistics, snmp4_udp_list[i].entry)); seq_putc(seq, '\n'); @@ -396,7 +372,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) static int snmp_seq_open(struct inode *inode, struct file *file) { - return single_open(file, snmp_seq_show, NULL); + return single_open_net(inode, file, snmp_seq_show); } static const struct file_operations snmp_seq_fops = { @@ -404,7 +380,7 @@ static const struct file_operations snmp_seq_fops = { .open = snmp_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; @@ -415,6 +391,7 @@ static const struct file_operations snmp_seq_fops = { static int netstat_seq_show(struct seq_file *seq, void *v) { int i; + struct net *net = seq->private; seq_puts(seq, "TcpExt:"); for (i = 0; snmp4_net_list[i].name != NULL; i++) @@ -423,7 +400,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nTcpExt:"); for (i = 0; snmp4_net_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)net_statistics, + snmp_fold_field((void **)net->mib.net_statistics, snmp4_net_list[i].entry)); seq_puts(seq, "\nIpExt:"); @@ -433,7 +410,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nIpExt:"); for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)ip_statistics, + snmp_fold_field((void **)net->mib.ip_statistics, snmp4_ipextstats_list[i].entry)); seq_putc(seq, '\n'); @@ -442,7 +419,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) static int netstat_seq_open(struct inode *inode, struct file *file) { - return single_open(file, netstat_seq_show, NULL); + return single_open_net(inode, file, netstat_seq_show); } static const struct file_operations netstat_seq_fops = { @@ -450,18 +427,32 @@ static const struct file_operations netstat_seq_fops = { .open = netstat_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; static __net_init int ip_proc_init_net(struct net *net) { if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops)) - return -ENOMEM; + goto out_sockstat; + if (!proc_net_fops_create(net, "netstat", S_IRUGO, &netstat_seq_fops)) + goto out_netstat; + if (!proc_net_fops_create(net, "snmp", S_IRUGO, &snmp_seq_fops)) + goto out_snmp; + return 0; + +out_snmp: + proc_net_remove(net, "netstat"); +out_netstat: + proc_net_remove(net, "sockstat"); +out_sockstat: + return -ENOMEM; } static __net_exit void ip_proc_exit_net(struct net *net) { + proc_net_remove(net, "snmp"); + proc_net_remove(net, "netstat"); proc_net_remove(net, "sockstat"); } @@ -472,24 +463,6 @@ static __net_initdata struct pernet_operations ip_proc_ops = { int __init ip_misc_proc_init(void) { - int rc = 0; - - if (register_pernet_subsys(&ip_proc_ops)) - goto out_pernet; - - if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops)) - goto out_netstat; - - if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops)) - goto out_snmp; -out: - return rc; -out_snmp: - proc_net_remove(&init_net, "netstat"); -out_netstat: - unregister_pernet_subsys(&ip_proc_ops); -out_pernet: - rc = -ENOMEM; - goto out; + return register_pernet_subsys(&ip_proc_ops); } diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 971ab9356e51..ea50da0649fd 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -5,8 +5,6 @@ * * INET protocol dispatch tables. * - * Version: $Id: protocol.c,v 1.14 2001/05/18 02:25:49 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 11d7f753a820..cd975743bcd2 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -5,8 +5,6 @@ * * RAW - implementation of IP "raw" sockets. * - * Version: $Id: raw.c,v 1.64 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * @@ -322,7 +320,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); - int hh_len; + struct net *net = sock_net(sk); struct iphdr *iph; struct sk_buff *skb; unsigned int iphlen; @@ -336,13 +334,12 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, if (flags&MSG_PROBE) goto out; - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - - skb = sock_alloc_send_skb(sk, length+hh_len+15, - flags&MSG_DONTWAIT, &err); + skb = sock_alloc_send_skb(sk, + length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, + flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, hh_len); + skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; @@ -372,7 +369,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } if (iph->protocol == IPPROTO_ICMP) - icmp_out_count(((struct icmphdr *) + icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, @@ -388,7 +385,7 @@ error_fault: err = -EFAULT; kfree_skb(skb); error: - IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); return err; } @@ -610,6 +607,13 @@ static void raw_close(struct sock *sk, long timeout) sk_common_release(sk); } +static void raw_destroy(struct sock *sk) +{ + lock_sock(sk); + ip_flush_pending_frames(sk); + release_sock(sk); +} + /* This gets rid of all the nasties in af_inet. -DaveM */ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -822,6 +826,7 @@ struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, .close = raw_close, + .destroy = raw_destroy, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = raw_ioctl, @@ -927,7 +932,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) srcp = inet->num; seq_printf(seq, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d", + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", i, src, srcp, dest, destp, sp->sk_state, atomic_read(&sp->sk_wmem_alloc), atomic_read(&sp->sk_rmem_alloc), @@ -940,7 +945,7 @@ static int raw_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_printf(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " - "inode drops\n"); + "inode ref pointer drops\n"); else raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5e3685c5c407..380d6474cf66 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -5,8 +5,6 @@ * * ROUTE - implementation of the IP router. * - * Version: $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox, <gw4pts@gw4pts.ampr.org> @@ -134,7 +132,6 @@ static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; static void rt_worker_func(struct work_struct *work); static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); -static struct timer_list rt_secret_timer; /* * Interface to generic destination cache. @@ -160,7 +157,7 @@ static struct dst_ops ipv4_dst_ops = { .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, - .local_out = ip_local_out, + .local_out = __ip_local_out, .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; @@ -253,20 +250,25 @@ static inline void rt_hash_lock_init(void) static struct rt_hash_bucket *rt_hash_table __read_mostly; static unsigned rt_hash_mask __read_mostly; static unsigned int rt_hash_log __read_mostly; -static atomic_t rt_genid __read_mostly; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) \ (__raw_get_cpu_var(rt_cache_stat).field++) -static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx) +static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, + int genid) { return jhash_3words((__force u32)(__be32)(daddr), (__force u32)(__be32)(saddr), - idx, atomic_read(&rt_genid)) + idx, genid) & rt_hash_mask; } +static inline int rt_genid(struct net *net) +{ + return atomic_read(&net->ipv4.rt_genid); +} + #ifdef CONFIG_PROC_FS struct rt_cache_iter_state { struct seq_net_private p; @@ -336,7 +338,7 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) struct rt_cache_iter_state *st = seq->private; if (*pos) return rt_cache_get_idx(seq, *pos - 1); - st->genid = atomic_read(&rt_genid); + st->genid = rt_genid(seq_file_net(seq)); return SEQ_START_TOKEN; } @@ -683,6 +685,11 @@ static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); } +static inline int rt_is_expired(struct rtable *rth) +{ + return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); +} + /* * Perform a full scan of hash table and free all entries. * Can be called by a softirq or a process. @@ -692,6 +699,7 @@ static void rt_do_flush(int process_context) { unsigned int i; struct rtable *rth, *next; + struct rtable * tail; for (i = 0; i <= rt_hash_mask; i++) { if (process_context && need_resched()) @@ -701,11 +709,39 @@ static void rt_do_flush(int process_context) continue; spin_lock_bh(rt_hash_lock_addr(i)); +#ifdef CONFIG_NET_NS + { + struct rtable ** prev, * p; + + rth = rt_hash_table[i].chain; + + /* defer releasing the head of the list after spin_unlock */ + for (tail = rth; tail; tail = tail->u.dst.rt_next) + if (!rt_is_expired(tail)) + break; + if (rth != tail) + rt_hash_table[i].chain = tail; + + /* call rt_free on entries after the tail requiring flush */ + prev = &rt_hash_table[i].chain; + for (p = *prev; p; p = next) { + next = p->u.dst.rt_next; + if (!rt_is_expired(p)) { + prev = &p->u.dst.rt_next; + } else { + *prev = next; + rt_free(p); + } + } + } +#else rth = rt_hash_table[i].chain; rt_hash_table[i].chain = NULL; + tail = NULL; +#endif spin_unlock_bh(rt_hash_lock_addr(i)); - for (; rth; rth = next) { + for (; rth != tail; rth = next) { next = rth->u.dst.rt_next; rt_free(rth); } @@ -738,7 +774,7 @@ static void rt_check_expire(void) continue; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { - if (rth->rt_genid != atomic_read(&rt_genid)) { + if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); continue; @@ -781,21 +817,21 @@ static void rt_worker_func(struct work_struct *work) * many times (2^24) without giving recent rt_genid. * Jenkins hash is strong enough that litle changes of rt_genid are OK. */ -static void rt_cache_invalidate(void) +static void rt_cache_invalidate(struct net *net) { unsigned char shuffle; get_random_bytes(&shuffle, sizeof(shuffle)); - atomic_add(shuffle + 1U, &rt_genid); + atomic_add(shuffle + 1U, &net->ipv4.rt_genid); } /* * delay < 0 : invalidate cache (fast : entries will be deleted later) * delay >= 0 : invalidate & flush cache (can be long) */ -void rt_cache_flush(int delay) +void rt_cache_flush(struct net *net, int delay) { - rt_cache_invalidate(); + rt_cache_invalidate(net); if (delay >= 0) rt_do_flush(!in_softirq()); } @@ -803,10 +839,11 @@ void rt_cache_flush(int delay) /* * We change rt_genid and let gc do the cleanup */ -static void rt_secret_rebuild(unsigned long dummy) +static void rt_secret_rebuild(unsigned long __net) { - rt_cache_invalidate(); - mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); + struct net *net = (struct net *)__net; + rt_cache_invalidate(net); + mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); } /* @@ -882,7 +919,7 @@ static int rt_garbage_collect(struct dst_ops *ops) rthp = &rt_hash_table[k].chain; spin_lock_bh(rt_hash_lock_addr(k)); while ((rth = *rthp) != NULL) { - if (rth->rt_genid == atomic_read(&rt_genid) && + if (!rt_is_expired(rth) && !rt_may_expire(rth, tmo, expire)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; @@ -964,7 +1001,7 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { - if (rth->rt_genid != atomic_read(&rt_genid)) { + if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); continue; @@ -1140,7 +1177,7 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_lock_bh(rt_hash_lock_addr(hash)); ip_rt_put(rt); while ((aux = *rthp) != NULL) { - if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) { + if (aux == rt || rt_is_expired(aux)) { *rthp = aux->u.dst.rt_next; rt_free(aux); continue; @@ -1182,7 +1219,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, for (i = 0; i < 2; i++) { for (k = 0; k < 2; k++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); + unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], + rt_genid(net)); rthp=&rt_hash_table[hash].chain; @@ -1194,7 +1232,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rth->fl.fl4_src != skeys[i] || rth->fl.oif != ikeys[k] || rth->fl.iif != 0 || - rth->rt_genid != atomic_read(&rt_genid) || + rt_is_expired(rth) || !net_eq(dev_net(rth->u.dst.dev), net)) { rthp = &rth->u.dst.rt_next; continue; @@ -1233,7 +1271,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->u.dst.neighbour = NULL; rt->u.dst.hh = NULL; rt->u.dst.xfrm = NULL; - rt->rt_genid = atomic_read(&rt_genid); + rt->rt_genid = rt_genid(net); rt->rt_flags |= RTCF_REDIRECTED; /* Gateway is different ... */ @@ -1297,7 +1335,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) } else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->u.dst.expires) { unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, - rt->fl.oif); + rt->fl.oif, + rt_genid(dev_net(dst->dev))); #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ipv4_negative_advice: redirect to " NIPQUAD_FMT "/%02x dropped\n", @@ -1390,7 +1429,8 @@ static int ip_error(struct sk_buff *skb) break; case ENETUNREACH: code = ICMP_NET_UNREACH; - IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES); + IP_INC_STATS_BH(dev_net(rt->u.dst.dev), + IPSTATS_MIB_INNOROUTES); break; case EACCES: code = ICMP_PKT_FILTERED; @@ -1446,7 +1486,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, for (k = 0; k < 2; k++) { for (i = 0; i < 2; i++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); + unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], + rt_genid(net)); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -1461,21 +1502,21 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->fl.iif != 0 || dst_metric_locked(&rth->u.dst, RTAX_MTU) || !net_eq(dev_net(rth->u.dst.dev), net) || - rth->rt_genid != atomic_read(&rt_genid)) + rt_is_expired(rth)) continue; if (new_mtu < 68 || new_mtu >= old_mtu) { /* BSD 4.2 compatibility hack :-( */ if (mtu == 0 && - old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] && + old_mtu >= dst_metric(&rth->u.dst, RTAX_MTU) && old_mtu >= 68 + (iph->ihl << 2)) old_mtu -= iph->ihl << 2; mtu = guess_mtu(old_mtu); } - if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) { - if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) { + if (mtu <= dst_metric(&rth->u.dst, RTAX_MTU)) { + if (mtu < dst_metric(&rth->u.dst, RTAX_MTU)) { dst_confirm(&rth->u.dst); if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1497,7 +1538,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { - if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 && + if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= 68 && !(dst_metric_locked(dst, RTAX_MTU))) { if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1613,7 +1654,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) sizeof(rt->u.dst.metrics)); if (fi->fib_mtu == 0) { rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; - if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) && + if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && rt->rt_gateway != rt->rt_dst && rt->u.dst.dev->mtu > 576) rt->u.dst.metrics[RTAX_MTU-1] = 576; @@ -1624,14 +1665,14 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) } else rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; - if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) + if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; - if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU) + if (dst_metric(&rt->u.dst, RTAX_MTU) > IP_MAX_MTU) rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; - if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0) + if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, ip_rt_min_advmss); - if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40) + if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; #ifdef CONFIG_NET_CLS_ROUTE @@ -1696,7 +1737,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(dev_net(dev)); rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; if (our) { @@ -1711,7 +1752,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, RT_CACHE_STAT_INC(in_slow_mc); in_dev_put(in_dev); - hash = rt_hash(daddr, saddr, dev->ifindex); + hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); return rt_intern_hash(hash, rth, &skb->rtable); e_nobufs: @@ -1792,7 +1833,7 @@ static int __mkroute_input(struct sk_buff *skb, if (err) flags |= RTCF_DIRECTSRC; - if (out_dev == in_dev && err && !(flags & RTCF_MASQ) && + if (out_dev == in_dev && err && (IN_DEV_SHARED_MEDIA(out_dev) || inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) flags |= RTCF_DOREDIRECT; @@ -1837,7 +1878,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->u.dst.input = ip_forward; rth->u.dst.output = ip_output; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); rt_set_nexthop(rth, res, itag); @@ -1872,7 +1913,8 @@ static int ip_mkroute_input(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->iif); + hash = rt_hash(daddr, saddr, fl->iif, + rt_genid(dev_net(rth->u.dst.dev))); return rt_intern_hash(hash, rth, &skb->rtable); } @@ -1998,7 +2040,7 @@ local_input: goto e_nobufs; rth->u.dst.output= ip_rt_bug; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(net); atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; @@ -2028,7 +2070,7 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash(daddr, saddr, fl.iif); + hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); err = rt_intern_hash(hash, rth, &skb->rtable); goto done; @@ -2079,7 +2121,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, net = dev_net(dev); tos &= IPTOS_RT_MASK; - hash = rt_hash(daddr, saddr, iif); + hash = rt_hash(daddr, saddr, iif, rt_genid(net)); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2091,7 +2133,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, (rth->fl.fl4_tos ^ tos)) == 0 && rth->fl.mark == skb->mark && net_eq(dev_net(rth->u.dst.dev), net) && - rth->rt_genid == atomic_read(&rt_genid)) { + !rt_is_expired(rth)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); @@ -2219,7 +2261,7 @@ static int __mkroute_output(struct rtable **result, rth->rt_spec_dst= fl->fl4_src; rth->u.dst.output=ip_output; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); @@ -2268,7 +2310,8 @@ static int ip_mkroute_output(struct rtable **rp, int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); unsigned hash; if (err == 0) { - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif); + hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, + rt_genid(dev_net(dev_out))); err = rt_intern_hash(hash, rth, rp); } @@ -2480,7 +2523,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, unsigned hash; struct rtable *rth; - hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif); + hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); rcu_read_lock_bh(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2493,7 +2536,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->u.dst.dev), net) && - rth->rt_genid == atomic_read(&rt_genid)) { + !rt_is_expired(rth)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); @@ -2524,7 +2567,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { }; -static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) +static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp) { struct rtable *ort = *rp; struct rtable *rt = (struct rtable *) @@ -2548,7 +2591,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) rt->idev = ort->idev; if (rt->idev) in_dev_hold(rt->idev); - rt->rt_genid = atomic_read(&rt_genid); + rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; @@ -2584,7 +2627,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags ? XFRM_LOOKUP_WAIT : 0); if (err == -EREMOTE) - err = ipv4_dst_blackhole(rp, flp); + err = ipv4_dst_blackhole(net, rp, flp); return err; } @@ -2803,7 +2846,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) rt = rcu_dereference(rt->u.dst.rt_next), idx++) { if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) continue; - if (rt->rt_genid != atomic_read(&rt_genid)) + if (rt_is_expired(rt)) continue; skb->dst = dst_clone(&rt->u.dst); if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, @@ -2827,19 +2870,25 @@ done: void ip_rt_multicast_event(struct in_device *in_dev) { - rt_cache_flush(0); + rt_cache_flush(dev_net(in_dev->dev), 0); } #ifdef CONFIG_SYSCTL -static int flush_delay; - -static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, +static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { if (write) { - proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - rt_cache_flush(flush_delay); + int flush_delay; + ctl_table ctl; + struct net *net; + + memcpy(&ctl, __ctl, sizeof(ctl)); + ctl.data = &flush_delay; + proc_dointvec(&ctl, write, filp, buffer, lenp, ppos); + + net = (struct net *)__ctl->extra1; + rt_cache_flush(net, flush_delay); return 0; } @@ -2855,24 +2904,17 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, size_t newlen) { int delay; + struct net *net; if (newlen != sizeof(int)) return -EINVAL; if (get_user(delay, (int __user *)newval)) return -EFAULT; - rt_cache_flush(delay); + net = (struct net *)table->extra1; + rt_cache_flush(net, delay); return 0; } -ctl_table ipv4_route_table[] = { - { - .ctl_name = NET_IPV4_ROUTE_FLUSH, - .procname = "flush", - .data = &flush_delay, - .maxlen = sizeof(int), - .mode = 0200, - .proc_handler = &ipv4_sysctl_rtcache_flush, - .strategy = &ipv4_sysctl_rtcache_flush_strategy, - }, +static ctl_table ipv4_route_table[] = { { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, .procname = "gc_thresh", @@ -3011,8 +3053,97 @@ ctl_table ipv4_route_table[] = { }, { .ctl_name = 0 } }; + +static __net_initdata struct ctl_path ipv4_route_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, + { }, +}; + + +static struct ctl_table ipv4_route_flush_table[] = { + { + .ctl_name = NET_IPV4_ROUTE_FLUSH, + .procname = "flush", + .maxlen = sizeof(int), + .mode = 0200, + .proc_handler = &ipv4_sysctl_rtcache_flush, + .strategy = &ipv4_sysctl_rtcache_flush_strategy, + }, + { .ctl_name = 0 }, +}; + +static __net_init int sysctl_route_net_init(struct net *net) +{ + struct ctl_table *tbl; + + tbl = ipv4_route_flush_table; + if (net != &init_net) { + tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); + if (tbl == NULL) + goto err_dup; + } + tbl[0].extra1 = net; + + net->ipv4.route_hdr = + register_net_sysctl_table(net, ipv4_route_path, tbl); + if (net->ipv4.route_hdr == NULL) + goto err_reg; + return 0; + +err_reg: + if (tbl != ipv4_route_flush_table) + kfree(tbl); +err_dup: + return -ENOMEM; +} + +static __net_exit void sysctl_route_net_exit(struct net *net) +{ + struct ctl_table *tbl; + + tbl = net->ipv4.route_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv4.route_hdr); + BUG_ON(tbl == ipv4_route_flush_table); + kfree(tbl); +} + +static __net_initdata struct pernet_operations sysctl_route_ops = { + .init = sysctl_route_net_init, + .exit = sysctl_route_net_exit, +}; #endif + +static __net_init int rt_secret_timer_init(struct net *net) +{ + atomic_set(&net->ipv4.rt_genid, + (int) ((num_physpages ^ (num_physpages>>8)) ^ + (jiffies ^ (jiffies >> 7)))); + + net->ipv4.rt_secret_timer.function = rt_secret_rebuild; + net->ipv4.rt_secret_timer.data = (unsigned long)net; + init_timer_deferrable(&net->ipv4.rt_secret_timer); + + net->ipv4.rt_secret_timer.expires = + jiffies + net_random() % ip_rt_secret_interval + + ip_rt_secret_interval; + add_timer(&net->ipv4.rt_secret_timer); + return 0; +} + +static __net_exit void rt_secret_timer_exit(struct net *net) +{ + del_timer_sync(&net->ipv4.rt_secret_timer); +} + +static __net_initdata struct pernet_operations rt_secret_timer_ops = { + .init = rt_secret_timer_init, + .exit = rt_secret_timer_exit, +}; + + #ifdef CONFIG_NET_CLS_ROUTE struct ip_rt_acct *ip_rt_acct __read_mostly; #endif /* CONFIG_NET_CLS_ROUTE */ @@ -3031,9 +3162,6 @@ int __init ip_rt_init(void) { int rc = 0; - atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^ - (jiffies ^ (jiffies >> 7)))); - #ifdef CONFIG_NET_CLS_ROUTE ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); if (!ip_rt_acct) @@ -3065,19 +3193,14 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); - rt_secret_timer.function = rt_secret_rebuild; - rt_secret_timer.data = 0; - init_timer_deferrable(&rt_secret_timer); - /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ schedule_delayed_work(&expires_work, net_random() % ip_rt_gc_interval + ip_rt_gc_interval); - rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + - ip_rt_secret_interval; - add_timer(&rt_secret_timer); + if (register_pernet_subsys(&rt_secret_timer_ops)) + printk(KERN_ERR "Unable to setup rt_secret_timer\n"); if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); @@ -3087,9 +3210,21 @@ int __init ip_rt_init(void) #endif rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); +#ifdef CONFIG_SYSCTL + register_pernet_subsys(&sysctl_route_ops); +#endif return rc; } +/* + * We really need to sanitize the damn ipv4 init order, then all + * this nonsense will go away. + */ +void __init ip_static_sysctl_init(void) +{ + register_sysctl_paths(ipv4_route_path, ipv4_route_table); +} + EXPORT_SYMBOL(__ip_select_ident); EXPORT_SYMBOL(ip_route_input); EXPORT_SYMBOL(ip_route_output_key); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 73ba98921d64..9d38005abbac 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -8,8 +8,6 @@ * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. - * - * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $ */ #include <linux/tcp.h> @@ -175,7 +173,7 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) ; *mssp = msstab[mssind] + 1; - NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); return secure_tcp_syn_cookie(iph->saddr, iph->daddr, th->source, th->dest, ntohl(th->seq), @@ -271,11 +269,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || (mss = cookie_check(skb, cookie)) == 0) { - NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } - NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); @@ -285,7 +283,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, cookie_check_timestamp(&tcp_opt); ret = NULL; - req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ + req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ if (!req) goto out; @@ -301,7 +299,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; - ireq->opt = NULL; + ireq->ecn_ok = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index c437f804ee38..770d827f5ab8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1,8 +1,6 @@ /* * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. * - * $Id: sysctl_net_ipv4.c,v 1.50 2001/10/20 00:00:11 davem Exp $ - * * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] */ @@ -403,13 +401,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = &ipv4_local_port_range, .strategy = &ipv4_sysctl_local_port_range, }, - { - .ctl_name = NET_IPV4_ROUTE, - .procname = "route", - .maxlen = 0, - .mode = 0555, - .child = ipv4_route_table - }, #ifdef CONFIG_IP_MULTICAST { .ctl_name = NET_IPV4_IGMP_MAX_MEMBERSHIPS, @@ -795,7 +786,8 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ratelimit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies }, { .ctl_name = NET_IPV4_ICMP_RATEMASK, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f88653138621..1ab341e5d3e0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.216 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -255,11 +253,14 @@ #include <linux/init.h> #include <linux/fs.h> #include <linux/skbuff.h> +#include <linux/scatterlist.h> #include <linux/splice.h> #include <linux/net.h> #include <linux/socket.h> #include <linux/random.h> #include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/swap.h> #include <linux/cache.h> #include <linux/err.h> #include <linux/crypto.h> @@ -276,8 +277,6 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; -DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; - atomic_t tcp_orphan_count = ATOMIC_INIT(0); EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -315,10 +314,10 @@ int tcp_memory_pressure __read_mostly; EXPORT_SYMBOL(tcp_memory_pressure); -void tcp_enter_memory_pressure(void) +void tcp_enter_memory_pressure(struct sock *sk) { if (!tcp_memory_pressure) { - NET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES); tcp_memory_pressure = 1; } } @@ -343,8 +342,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) return inet_csk_listen_poll(sk); /* Socket is not locked. We are protected from async events - by poll logic and correct handling of state changes - made by another threads is impossible in any case. + * by poll logic and correct handling of state changes + * made by other threads is impossible in any case. */ mask = 0; @@ -370,10 +369,10 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP * if and only if shutdown has been made in both directions. * Actually, it is interesting to look how Solaris and DUX - * solve this dilemma. I would prefer, if PULLHUP were maskable, + * solve this dilemma. I would prefer, if POLLHUP were maskable, * then we could set it on SND_SHUTDOWN. BTW examples given * in Stevens' books assume exactly this behaviour, it explains - * why PULLHUP is incompatible with POLLOUT. --ANK + * why POLLHUP is incompatible with POLLOUT. --ANK * * NOTE. Check for TCP_CLOSE is added. The goal is to prevent * blocking on fresh not-connected or disconnected socket. --ANK @@ -648,7 +647,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) } __kfree_skb(skb); } else { - sk->sk_prot->enter_memory_pressure(); + sk->sk_prot->enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); } return NULL; @@ -1097,7 +1096,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) #if TCP_DEBUG struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); - BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); + WARN_ON(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); #endif if (inet_csk_ack_scheduled(sk)) { @@ -1152,7 +1151,7 @@ static void tcp_prequeue_process(struct sock *sk) struct sk_buff *skb; struct tcp_sock *tp = tcp_sk(sk); - NET_INC_STATS_USER(LINUX_MIB_TCPPREQUEUED); + NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED); /* RX process wants to run with disabled BHs, though it is not * necessary */ @@ -1206,7 +1205,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, return -ENOTCONN; while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { if (offset < skb->len) { - size_t used, len; + int used; + size_t len; len = skb->len - offset; /* Stop reading if we hit a patch of urgent data */ @@ -1227,7 +1227,14 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, copied += used; offset += used; } - if (offset != skb->len) + /* + * If recv_actor drops the lock (e.g. TCP splice + * receive) the skb pointer might be invalid when + * getting here: tcp_collapse might have deleted it + * while aggregating skbs from the socket queue. + */ + skb = tcp_recv_skb(sk, seq-1, &offset); + if (!skb || (offset+1 != skb->len)) break; } if (tcp_hdr(skb)->fin) { @@ -1351,7 +1358,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto found_ok_skb; if (tcp_hdr(skb)->fin) goto found_fin_ok; - BUG_TRAP(flags & MSG_PEEK); + WARN_ON(!(flags & MSG_PEEK)); skb = skb->next; } while (skb != (struct sk_buff *)&sk->sk_receive_queue); @@ -1414,8 +1421,8 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, tp->ucopy.len = len; - BUG_TRAP(tp->copied_seq == tp->rcv_nxt || - (flags & (MSG_PEEK | MSG_TRUNC))); + WARN_ON(tp->copied_seq != tp->rcv_nxt && + !(flags & (MSG_PEEK | MSG_TRUNC))); /* Ugly... If prequeue is not empty, we have to * process it before releasing socket, otherwise @@ -1466,7 +1473,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* __ Restore normal policy in scheduler __ */ if ((chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); + NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); len -= chunk; copied += chunk; } @@ -1477,7 +1484,7 @@ do_prequeue: tcp_prequeue_process(sk); if ((chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); + NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } @@ -1592,7 +1599,7 @@ skip_copy: tcp_prequeue_process(sk); if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); + NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } @@ -1659,12 +1666,12 @@ void tcp_set_state(struct sock *sk, int state) switch (state) { case TCP_ESTABLISHED: if (oldstate != TCP_ESTABLISHED) - TCP_INC_STATS(TCP_MIB_CURRESTAB); + TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); break; case TCP_CLOSE: if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED) - TCP_INC_STATS(TCP_MIB_ESTABRESETS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); if (inet_csk(sk)->icsk_bind_hash && @@ -1673,7 +1680,7 @@ void tcp_set_state(struct sock *sk, int state) /* fall through */ default: if (oldstate==TCP_ESTABLISHED) - TCP_DEC_STATS(TCP_MIB_CURRESTAB); + TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); } /* Change state AFTER socket is unhashed to avoid closed @@ -1784,13 +1791,13 @@ void tcp_close(struct sock *sk, long timeout) */ if (data_was_unread) { /* Unread data was tossed, zap the connection. */ - NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE); + NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_KERNEL); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); - NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA); } else if (tcp_close_state(sk)) { /* We FIN if the application ate all the data before * zapping the connection. @@ -1837,7 +1844,7 @@ adjudge_to_death: */ local_bh_disable(); bh_lock_sock(sk); - BUG_TRAP(!sock_owned_by_user(sk)); + WARN_ON(sock_owned_by_user(sk)); /* Have we already been destroyed by a softirq or backlog? */ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) @@ -1862,7 +1869,8 @@ adjudge_to_death: if (tp->linger2 < 0) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPABORTONLINGER); } else { const int tmo = tcp_fin_time(sk); @@ -1884,7 +1892,8 @@ adjudge_to_death: "sockets\n"); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPABORTONMEMORY); } } @@ -1964,7 +1973,7 @@ int tcp_disconnect(struct sock *sk, int flags) memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || icsk->icsk_bind_hash); + WARN_ON(inet->num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; @@ -2105,12 +2114,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_DEFER_ACCEPT: - if (val < 0) { - err = -EINVAL; - } else { - if (val > MAX_TCP_ACCEPT_DEFERRED) - val = MAX_TCP_ACCEPT_DEFERRED; - icsk->icsk_accept_queue.rskq_defer_accept = val; + icsk->icsk_accept_queue.rskq_defer_accept = 0; + if (val > 0) { + /* Translate value in seconds to number of + * retransmits */ + while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && + val > ((TCP_TIMEOUT_INIT / HZ) << + icsk->icsk_accept_queue.rskq_defer_accept)) + icsk->icsk_accept_queue.rskq_defer_accept++; + icsk->icsk_accept_queue.rskq_defer_accept++; } break; @@ -2292,7 +2304,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = (val ? : sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: - val = icsk->icsk_accept_queue.rskq_defer_accept; + val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : + ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; @@ -2575,12 +2588,69 @@ void __tcp_put_md5sig_pool(void) } EXPORT_SYMBOL(__tcp_put_md5sig_pool); + +int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, + struct tcphdr *th) +{ + struct scatterlist sg; + int err; + + __sum16 old_checksum = th->check; + th->check = 0; + /* options aren't included in the hash */ + sg_init_one(&sg, th, sizeof(struct tcphdr)); + err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr)); + th->check = old_checksum; + return err; +} + +EXPORT_SYMBOL(tcp_md5_hash_header); + +int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, + struct sk_buff *skb, unsigned header_len) +{ + struct scatterlist sg; + const struct tcphdr *tp = tcp_hdr(skb); + struct hash_desc *desc = &hp->md5_desc; + unsigned i; + const unsigned head_data_len = skb_headlen(skb) > header_len ? + skb_headlen(skb) - header_len : 0; + const struct skb_shared_info *shi = skb_shinfo(skb); + + sg_init_table(&sg, 1); + + sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len); + if (crypto_hash_update(desc, &sg, head_data_len)) + return 1; + + for (i = 0; i < shi->nr_frags; ++i) { + const struct skb_frag_struct *f = &shi->frags[i]; + sg_set_page(&sg, f->page, f->size, f->page_offset); + if (crypto_hash_update(desc, &sg, f->size)) + return 1; + } + + return 0; +} + +EXPORT_SYMBOL(tcp_md5_hash_skb_data); + +int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) +{ + struct scatterlist sg; + + sg_init_one(&sg, key->key, key->keylen); + return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); +} + +EXPORT_SYMBOL(tcp_md5_hash_key); + #endif void tcp_done(struct sock *sk) { if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); @@ -2609,7 +2679,7 @@ __setup("thash_entries=", set_thash_entries); void __init tcp_init(void) { struct sk_buff *skb = NULL; - unsigned long limit; + unsigned long nr_pages, limit; int order, i, max_share; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -2678,8 +2748,9 @@ void __init tcp_init(void) * is up to 1/2 at 256 MB, decreasing toward zero with the amount of * memory, with a floor of 128 pages. */ - limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + nr_pages = totalram_pages - totalhigh_pages; + limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); limit = max(limit, 128UL); sysctl_tcp_mem[0] = limit / 4 * 3; sysctl_tcp_mem[1] = limit; @@ -2716,4 +2787,3 @@ EXPORT_SYMBOL(tcp_splice_read); EXPORT_SYMBOL(tcp_sendpage); EXPORT_SYMBOL(tcp_setsockopt); EXPORT_SYMBOL(tcp_shutdown); -EXPORT_SYMBOL(tcp_statistics); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 2fbcc7d1b1a0..838d491dfda7 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -1,8 +1,6 @@ /* * tcp_diag.c Module for monitoring TCP transport protocols sockets. * - * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eda4f4a233f3..67ccce2a96bd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.243 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -66,6 +64,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/sysctl.h> +#include <net/dst.h> #include <net/tcp.h> #include <net/inet_common.h> #include <linux/ipsec.h> @@ -113,8 +112,6 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) #define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) -#define IsSackFrto() (sysctl_tcp_frto == 0x2) - #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -605,7 +602,7 @@ static u32 tcp_rto_min(struct sock *sk) u32 rto_min = TCP_RTO_MIN; if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) - rto_min = dst->metrics[RTAX_RTO_MIN - 1]; + rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); return rto_min; } @@ -732,6 +729,7 @@ void tcp_update_metrics(struct sock *sk) if (dst && (dst->flags & DST_HOST)) { const struct inet_connection_sock *icsk = inet_csk(sk); int m; + unsigned long rtt; if (icsk->icsk_backoff || !tp->srtt) { /* This session failed to estimate rtt. Why? @@ -743,7 +741,8 @@ void tcp_update_metrics(struct sock *sk) return; } - m = dst_metric(dst, RTAX_RTT) - tp->srtt; + rtt = dst_metric_rtt(dst, RTAX_RTT); + m = rtt - tp->srtt; /* If newly calculated rtt larger than stored one, * store new one. Otherwise, use EWMA. Remember, @@ -751,12 +750,13 @@ void tcp_update_metrics(struct sock *sk) */ if (!(dst_metric_locked(dst, RTAX_RTT))) { if (m <= 0) - dst->metrics[RTAX_RTT - 1] = tp->srtt; + set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt); else - dst->metrics[RTAX_RTT - 1] -= (m >> 3); + set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3)); } if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { + unsigned long var; if (m < 0) m = -m; @@ -765,11 +765,13 @@ void tcp_update_metrics(struct sock *sk) if (m < tp->mdev) m = tp->mdev; - if (m >= dst_metric(dst, RTAX_RTTVAR)) - dst->metrics[RTAX_RTTVAR - 1] = m; + var = dst_metric_rtt(dst, RTAX_RTTVAR); + if (m >= var) + var = m; else - dst->metrics[RTAX_RTTVAR-1] -= - (dst->metrics[RTAX_RTTVAR-1] - m)>>2; + var -= (var - m) >> 2; + + set_dst_metric_rtt(dst, RTAX_RTTVAR, var); } if (tp->snd_ssthresh >= 0xFFFF) { @@ -788,21 +790,21 @@ void tcp_update_metrics(struct sock *sk) dst->metrics[RTAX_SSTHRESH-1] = max(tp->snd_cwnd >> 1, tp->snd_ssthresh); if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1; + dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1; } else { /* Else slow start did not finish, cwnd is non-sense, ssthresh may be also invalid. */ if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1; - if (dst->metrics[RTAX_SSTHRESH-1] && + dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1; + if (dst_metric(dst, RTAX_SSTHRESH) && !dst_metric_locked(dst, RTAX_SSTHRESH) && - tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1]) + tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; } if (!dst_metric_locked(dst, RTAX_REORDERING)) { - if (dst->metrics[RTAX_REORDERING-1] < tp->reordering && + if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && tp->reordering != sysctl_tcp_reordering) dst->metrics[RTAX_REORDERING-1] = tp->reordering; } @@ -900,7 +902,7 @@ static void tcp_init_metrics(struct sock *sk) if (dst_metric(dst, RTAX_RTT) == 0) goto reset; - if (!tp->srtt && dst_metric(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3)) + if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3)) goto reset; /* Initial rtt is determined from SYN,SYN-ACK. @@ -917,12 +919,12 @@ static void tcp_init_metrics(struct sock *sk) * to low value, and then abruptly stops to do it and starts to delay * ACKs, wait for troubles. */ - if (dst_metric(dst, RTAX_RTT) > tp->srtt) { - tp->srtt = dst_metric(dst, RTAX_RTT); + if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) { + tp->srtt = dst_metric_rtt(dst, RTAX_RTT); tp->rtt_seq = tp->snd_nxt; } - if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) { - tp->mdev = dst_metric(dst, RTAX_RTTVAR); + if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) { + tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR); tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); } tcp_set_rto(sk); @@ -950,17 +952,21 @@ static void tcp_update_reordering(struct sock *sk, const int metric, { struct tcp_sock *tp = tcp_sk(sk); if (metric > tp->reordering) { + int mib_idx; + tp->reordering = min(TCP_MAX_REORDERING, metric); /* This exciting event is worth to be remembered. 8) */ if (ts) - NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER); + mib_idx = LINUX_MIB_TCPTSREORDER; else if (tcp_is_reno(tp)) - NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER); + mib_idx = LINUX_MIB_TCPRENOREORDER; else if (tcp_is_fack(tp)) - NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER); + mib_idx = LINUX_MIB_TCPFACKREORDER; else - NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); + mib_idx = LINUX_MIB_TCPSACKREORDER; + + NET_INC_STATS_BH(sock_net(sk), mib_idx); #if FASTRETRANS_DEBUG > 1 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, @@ -1156,7 +1162,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) tp->lost_out += tcp_skb_pcount(skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; } - NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); } else { if (before(ack_seq, new_low_seq)) new_low_seq = ack_seq; @@ -1168,10 +1174,11 @@ static void tcp_mark_lost_retrans(struct sock *sk) tp->lost_retrans_low = new_low_seq; } -static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, +static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp, int num_sacks, u32 prior_snd_una) { + struct tcp_sock *tp = tcp_sk(sk); u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq); u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq); int dup_sack = 0; @@ -1179,7 +1186,7 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { dup_sack = 1; tcp_dsack_seen(tp); - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1) { u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq); u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq); @@ -1188,7 +1195,8 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, !before(start_seq_0, start_seq_1)) { dup_sack = 1; tcp_dsack_seen(tp); - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPDSACKOFORECV); } } @@ -1393,9 +1401,9 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, if (before(next_dup->start_seq, skip_to_seq)) { skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); - tcp_sacktag_walk(skb, sk, NULL, - next_dup->start_seq, next_dup->end_seq, - 1, fack_count, reord, flag); + skb = tcp_sacktag_walk(skb, sk, NULL, + next_dup->start_seq, next_dup->end_seq, + 1, fack_count, reord, flag); } return skb; @@ -1415,10 +1423,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, unsigned char *ptr = (skb_transport_header(ack_skb) + TCP_SKB_CB(ack_skb)->sacked); struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); - struct tcp_sack_block sp[4]; + struct tcp_sack_block sp[TCP_NUM_SACKS]; struct tcp_sack_block *cache; struct sk_buff *skb; - int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE) >> 3; + int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); int used_sacks; int reord = tp->packets_out; int flag = 0; @@ -1433,7 +1441,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, tcp_highest_sack_reset(sk); } - found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire, + found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); if (found_dup_sack) flag |= FLAG_DSACKING_ACK; @@ -1459,18 +1467,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, if (!tcp_is_sackblock_valid(tp, dup_sack, sp[used_sacks].start_seq, sp[used_sacks].end_seq)) { + int mib_idx; + if (dup_sack) { if (!tp->undo_marker) - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); + mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO; else - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD); + mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD; } else { /* Don't count olds caused by ACK reordering */ if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && !after(sp[used_sacks].end_seq, tp->snd_una)) continue; - NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); + mib_idx = LINUX_MIB_TCPSACKDISCARD; } + + NET_INC_STATS_BH(sock_net(sk), mib_idx); if (i == 0) first_sack_index = -1; continue; @@ -1617,10 +1629,10 @@ advance_sp: out: #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); - BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0); + WARN_ON((int)tp->sacked_out < 0); + WARN_ON((int)tp->lost_out < 0); + WARN_ON((int)tp->retrans_out < 0); + WARN_ON((int)tcp_packets_in_flight(tp) < 0); #endif return flag; } @@ -1685,6 +1697,11 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) tp->sacked_out = 0; } +static int tcp_is_sackfrto(const struct tcp_sock *tp) +{ + return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); +} + /* F-RTO can only be used if TCP has never retransmitted anything other than * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) */ @@ -1701,7 +1718,7 @@ int tcp_use_frto(struct sock *sk) if (icsk->icsk_mtup.probe_size) return 0; - if (IsSackFrto()) + if (tcp_is_sackfrto(tp)) return 1; /* Avoid expensive walking of rexmit queue if possible */ @@ -1791,7 +1808,7 @@ void tcp_enter_frto(struct sock *sk) /* Earlier loss recovery underway (see RFC4138; Appendix B). * The last condition is necessary at least in tp->frto_counter case. */ - if (IsSackFrto() && (tp->frto_counter || + if (tcp_is_sackfrto(tp) && (tp->frto_counter || ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && after(tp->high_seq, tp->snd_una)) { tp->frto_highmark = tp->high_seq; @@ -1838,9 +1855,16 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; } - /* Don't lost mark skbs that were fwd transmitted after RTO */ - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) && - !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { + /* Marking forward transmissions that were made after RTO lost + * can cause unnecessary retransmissions in some scenarios, + * SACK blocks will mitigate that in some but not in all cases. + * We used to not mark them but it was causing break-ups with + * receivers that do only in-order receival. + * + * TODO: we could detect presence of such receiver and select + * different behavior per flow. + */ + if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); } @@ -1856,7 +1880,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); tcp_set_ca_state(sk, TCP_CA_Loss); - tp->high_seq = tp->frto_highmark; + tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); tcp_clear_retrans_hints_partial(tp); @@ -1951,7 +1975,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag) { if (flag & FLAG_SACK_RENEGING) { struct inet_connection_sock *icsk = inet_csk(sk); - NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tcp_enter_loss(sk, 1); icsk->icsk_retransmits++; @@ -2157,7 +2181,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) int err; unsigned int mss; - BUG_TRAP(packets <= tp->packets_out); + WARN_ON(packets > tp->packets_out); if (tp->lost_skb_hint) { skb = tp->lost_skb_hint; cnt = tp->lost_cnt_hint; @@ -2371,15 +2395,19 @@ static int tcp_try_undo_recovery(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tcp_may_undo(tp)) { + int mib_idx; + /* Happy end! We did not retransmit anything * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); tcp_undo_cwr(sk, 1); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) - NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); + mib_idx = LINUX_MIB_TCPLOSSUNDO; else - NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); + mib_idx = LINUX_MIB_TCPFULLUNDO; + + NET_INC_STATS_BH(sock_net(sk), mib_idx); tp->undo_marker = 0; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { @@ -2402,7 +2430,7 @@ static void tcp_try_undo_dsack(struct sock *sk) DBGUNDO(sk, "D-SACK"); tcp_undo_cwr(sk, 1); tp->undo_marker = 0; - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); } } @@ -2425,7 +2453,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) DBGUNDO(sk, "Hoe"); tcp_undo_cwr(sk, 0); - NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); /* So... Do not make Hoe's retransmit yet. * If the first packet was delayed, the rest @@ -2454,7 +2482,7 @@ static int tcp_try_undo_loss(struct sock *sk) DBGUNDO(sk, "partial loss"); tp->lost_out = 0; tcp_undo_cwr(sk, 1); - NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; if (tcp_is_sack(tp)) @@ -2472,28 +2500,34 @@ static inline void tcp_complete_cwr(struct sock *sk) tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } +static void tcp_try_keep_open(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + int state = TCP_CA_Open; + + if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) + state = TCP_CA_Disorder; + + if (inet_csk(sk)->icsk_ca_state != state) { + tcp_set_ca_state(sk, state); + tp->high_seq = tp->snd_nxt; + } +} + static void tcp_try_to_open(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); tcp_verify_left_out(tp); - if (tp->retrans_out == 0) + if (!tp->frto_counter && tp->retrans_out == 0) tp->retrans_stamp = 0; if (flag & FLAG_ECE) tcp_enter_cwr(sk, 1); if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { - int state = TCP_CA_Open; - - if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) - state = TCP_CA_Disorder; - - if (inet_csk(sk)->icsk_ca_state != state) { - tcp_set_ca_state(sk, state); - tp->high_seq = tp->snd_nxt; - } + tcp_try_keep_open(sk); tcp_moderate_cwnd(tp); } else { tcp_cwnd_down(sk, flag); @@ -2545,7 +2579,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); - int fast_rexmit = 0; + int fast_rexmit = 0, mib_idx; if (WARN_ON(!tp->packets_out && tp->sacked_out)) tp->sacked_out = 0; @@ -2567,7 +2601,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); - NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); } /* D. Check consistency of the current state. */ @@ -2576,7 +2610,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) /* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ if (icsk->icsk_ca_state == TCP_CA_Open) { - BUG_TRAP(tp->retrans_out == 0); + WARN_ON(tp->retrans_out != 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { switch (icsk->icsk_ca_state) { @@ -2668,9 +2702,11 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) /* Otherwise enter Recovery state */ if (tcp_is_reno(tp)) - NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY); + mib_idx = LINUX_MIB_TCPRENORECOVERY; else - NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY); + mib_idx = LINUX_MIB_TCPSACKRECOVERY; + + NET_INC_STATS_BH(sock_net(sk), mib_idx); tp->high_seq = tp->snd_nxt; tp->prior_ssthresh = 0; @@ -2936,9 +2972,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) } #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); + WARN_ON((int)tp->sacked_out < 0); + WARN_ON((int)tp->lost_out < 0); + WARN_ON((int)tp->retrans_out < 0); if (!tp->packets_out && tcp_is_sack(tp)) { icsk = inet_csk(sk); if (tp->lost_out) { @@ -3123,7 +3159,7 @@ static int tcp_process_frto(struct sock *sk, int flag) return 1; } - if (!IsSackFrto() || tcp_is_reno(tp)) { + if (!tcp_is_sackfrto(tp)) { /* RFC4138 shortcoming in step 2; should also have case c): * ACK isn't duplicate nor advances window, e.g., opposite dir * data, winupdate @@ -3181,7 +3217,7 @@ static int tcp_process_frto(struct sock *sk, int flag) } tp->frto_counter = 0; tp->undo_marker = 0; - NET_INC_STATS_BH(LINUX_MIB_TCPSPURIOUSRTOS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); } return 0; } @@ -3234,12 +3270,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tcp_ca_event(sk, CA_EVENT_FAST_ACK); - NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS); } else { if (ack_seq != TCP_SKB_CB(skb)->end_seq) flag |= FLAG_DATA; else - NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS); flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); @@ -3256,6 +3292,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) * log. Something worked... */ sk->sk_err_soft = 0; + icsk->icsk_probes_out = 0; tp->rcv_tstamp = tcp_time_stamp; prior_packets = tp->packets_out; if (!prior_packets) @@ -3288,8 +3325,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) return 1; no_queue: - icsk->icsk_probes_out = 0; - /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3299,8 +3334,11 @@ no_queue: return 1; old_ack: - if (TCP_SKB_CB(skb)->sacked) + if (TCP_SKB_CB(skb)->sacked) { tcp_sacktag_write_queue(sk, skb, prior_snd_una); + if (icsk->icsk_ca_state == TCP_CA_Open) + tcp_try_keep_open(sk); + } uninteresting_ack: SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); @@ -3430,6 +3468,43 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, return 1; } +#ifdef CONFIG_TCP_MD5SIG +/* + * Parse MD5 Signature option + */ +u8 *tcp_parse_md5sig_option(struct tcphdr *th) +{ + int length = (th->doff << 2) - sizeof (*th); + u8 *ptr = (u8*)(th + 1); + + /* If the TCP option is too short, we can short cut */ + if (length < TCPOLEN_MD5SIG) + return NULL; + + while (length > 0) { + int opcode = *ptr++; + int opsize; + + switch(opcode) { + case TCPOPT_EOL: + return NULL; + case TCPOPT_NOP: + length--; + continue; + default: + opsize = *ptr++; + if (opsize < 2 || opsize > length) + return NULL; + if (opcode == TCPOPT_MD5SIG) + return ptr; + } + ptr += opsize - 2; + length -= opsize; + } + return NULL; +} +#endif + static inline void tcp_store_ts_recent(struct tcp_sock *tp) { tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; @@ -3642,26 +3717,33 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, return 0; } -static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) +static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { + struct tcp_sock *tp = tcp_sk(sk); + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + int mib_idx; + if (before(seq, tp->rcv_nxt)) - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT); + mib_idx = LINUX_MIB_TCPDSACKOLDSENT; else - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT); + mib_idx = LINUX_MIB_TCPDSACKOFOSENT; + + NET_INC_STATS_BH(sock_net(sk), mib_idx); tp->rx_opt.dsack = 1; tp->duplicate_sack[0].start_seq = seq; tp->duplicate_sack[0].end_seq = end_seq; - tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, - 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1; } } -static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq) +static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) { + struct tcp_sock *tp = tcp_sk(sk); + if (!tp->rx_opt.dsack) - tcp_dsack_set(tp, seq, end_seq); + tcp_dsack_set(sk, seq, end_seq); else tcp_sack_extend(tp->duplicate_sack, seq, end_seq); } @@ -3672,7 +3754,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); if (tcp_is_sack(tp) && sysctl_tcp_dsack) { @@ -3680,7 +3762,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) end_seq = tp->rcv_nxt; - tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq); + tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq); } } @@ -3707,9 +3789,8 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) * Decrease num_sacks. */ tp->rx_opt.num_sacks--; - tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + - tp->rx_opt.dsack, - 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + + tp->rx_opt.dsack; for (i = this_sack; i < tp->rx_opt.num_sacks; i++) sp[i] = sp[i + 1]; continue; @@ -3759,7 +3840,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) * * If the sack array is full, forget about the last one. */ - if (this_sack >= 4) { + if (this_sack >= TCP_NUM_SACKS) { this_sack--; tp->rx_opt.num_sacks--; sp--; @@ -3772,8 +3853,7 @@ new_sack: sp->start_seq = seq; sp->end_seq = end_seq; tp->rx_opt.num_sacks++; - tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, - 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; } /* RCV.NXT advances, some SACKs should be eaten. */ @@ -3797,7 +3877,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) int i; /* RCV.NXT must cover all the block! */ - BUG_TRAP(!before(tp->rcv_nxt, sp->end_seq)); + WARN_ON(before(tp->rcv_nxt, sp->end_seq)); /* Zap this SACK, by moving forward any other SACKS. */ for (i=this_sack+1; i < num_sacks; i++) @@ -3810,9 +3890,8 @@ static void tcp_sack_remove(struct tcp_sock *tp) } if (num_sacks != tp->rx_opt.num_sacks) { tp->rx_opt.num_sacks = num_sacks; - tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + - tp->rx_opt.dsack, - 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + + tp->rx_opt.dsack; } } @@ -3833,7 +3912,7 @@ static void tcp_ofo_queue(struct sock *sk) __u32 dsack = dsack_high; if (before(TCP_SKB_CB(skb)->end_seq, dsack_high)) dsack_high = TCP_SKB_CB(skb)->end_seq; - tcp_dsack_extend(tp, TCP_SKB_CB(skb)->seq, dsack); + tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); } if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { @@ -3891,8 +3970,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (tp->rx_opt.dsack) { tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks, - 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; } /* Queue data for delivery to the user. @@ -3961,8 +4039,8 @@ queue_and_out: if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { /* A retransmit, 2nd most common case. Force an immediate ack. */ - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); - tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); + tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); out_of_window: tcp_enter_quickack_mode(sk); @@ -3984,7 +4062,7 @@ drop: tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt); + tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt); /* If window is closed, drop tail of packet. But after * remembering D-SACK for its head made in previous line. @@ -4049,12 +4127,12 @@ drop: if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Drop. */ __kfree_skb(skb); - tcp_dsack_set(tp, seq, end_seq); + tcp_dsack_set(sk, seq, end_seq); goto add_sack; } if (after(seq, TCP_SKB_CB(skb1)->seq)) { /* Partial overlap. */ - tcp_dsack_set(tp, seq, + tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq); } else { skb1 = skb1->prev; @@ -4067,12 +4145,12 @@ drop: (struct sk_buff *)&tp->out_of_order_queue && after(end_seq, TCP_SKB_CB(skb1)->seq)) { if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, + tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, end_seq); break; } __skb_unlink(skb1, &tp->out_of_order_queue); - tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, + tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); __kfree_skb(skb1); } @@ -4103,7 +4181,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct sk_buff *next = skb->next; __skb_unlink(skb, list); __kfree_skb(skb); - NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); skb = next; continue; } @@ -4171,7 +4249,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct sk_buff *next = skb->next; __skb_unlink(skb, list); __kfree_skb(skb); - NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); skb = next; if (skb == tail || tcp_hdr(skb)->syn || @@ -4234,7 +4312,7 @@ static int tcp_prune_ofo_queue(struct sock *sk) int res = 0; if (!skb_queue_empty(&tp->out_of_order_queue)) { - NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED); __skb_queue_purge(&tp->out_of_order_queue); /* Reset SACK state. A conforming SACK implementation will @@ -4263,7 +4341,7 @@ static int tcp_prune_queue(struct sock *sk) SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); - NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED); if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); @@ -4292,7 +4370,7 @@ static int tcp_prune_queue(struct sock *sk) * drop receive data on the floor. It will get retransmitted * and hopefully then we'll have sufficient space. */ - NET_INC_STATS_BH(LINUX_MIB_RCVPRUNED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED); /* Massive buffer overcommit. */ tp->pred_flags = 0; @@ -4521,49 +4599,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) } } -static int tcp_defer_accept_check(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (tp->defer_tcp_accept.request) { - int queued_data = tp->rcv_nxt - tp->copied_seq; - int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ? - tcp_hdr((struct sk_buff *) - sk->sk_receive_queue.prev)->fin : 0; - - if (queued_data && hasfin) - queued_data--; - - if (queued_data && - tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) { - if (sock_flag(sk, SOCK_KEEPOPEN)) { - inet_csk_reset_keepalive_timer(sk, - keepalive_time_when(tp)); - } else { - inet_csk_delete_keepalive_timer(sk); - } - - inet_csk_reqsk_queue_add( - tp->defer_tcp_accept.listen_sk, - tp->defer_tcp_accept.request, - sk); - - tp->defer_tcp_accept.listen_sk->sk_data_ready( - tp->defer_tcp_accept.listen_sk, 0); - - sock_put(tp->defer_tcp_accept.listen_sk); - sock_put(sk); - tp->defer_tcp_accept.listen_sk = NULL; - tp->defer_tcp_accept.request = NULL; - } else if (hasfin || - tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) { - tcp_reset(sk); - return -1; - } - } - return 0; -} - static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) { struct tcp_sock *tp = tcp_sk(sk); @@ -4765,7 +4800,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_data_snd_check(sk); return 0; } else { /* Header too small */ - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); goto discard; } } else { @@ -4802,7 +4837,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, __skb_pull(skb, tcp_header_len); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); } if (copied_early) tcp_cleanup_rbuf(sk, skb->len); @@ -4825,7 +4860,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if ((int)skb->truesize > sk->sk_forward_alloc) goto step5; - NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ __skb_pull(skb, tcp_header_len); @@ -4869,7 +4904,7 @@ slow_path: if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { - NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); tcp_send_dupack(sk, skb); goto discard; } @@ -4904,8 +4939,8 @@ slow_path: tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - TCP_INC_STATS_BH(TCP_MIB_INERRS); - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); tcp_reset(sk); return 1; } @@ -4924,12 +4959,10 @@ step5: tcp_data_snd_check(sk); tcp_ack_snd_check(sk); - - tcp_defer_accept_check(sk); return 0; csum_error: - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); discard: __kfree_skb(skb); @@ -4963,7 +4996,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) { - NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); goto reset_and_undo; } @@ -5247,7 +5280,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { - NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); tcp_send_dupack(sk, skb); goto discard; } @@ -5276,7 +5309,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * Check for a SYN in window. */ if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); tcp_reset(sk); return 1; } @@ -5358,7 +5391,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { tcp_done(sk); - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); return 1; } @@ -5418,7 +5451,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (sk->sk_shutdown & RCV_SHUTDOWN) { if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); tcp_reset(sk); return 1; } @@ -5447,6 +5480,9 @@ EXPORT_SYMBOL(sysctl_tcp_ecn); EXPORT_SYMBOL(sysctl_tcp_reordering); EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); EXPORT_SYMBOL(tcp_parse_options); +#ifdef CONFIG_TCP_MD5SIG +EXPORT_SYMBOL(tcp_parse_md5sig_option); +#endif EXPORT_SYMBOL(tcp_rcv_established); EXPORT_SYMBOL(tcp_rcv_state_process); EXPORT_SYMBOL(tcp_initialize_rcv_mss); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cd601a866c2f..a2b06d0cc26b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $ - * * IPv4 specific functions * * @@ -85,18 +83,18 @@ int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; -/* Check TCP sequence numbers in ICMP packets. */ -#define ICMP_MIN_LENGTH 8 - -void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); #ifdef CONFIG_TCP_MD5SIG static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr); -static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - __be32 saddr, __be32 daddr, - struct tcphdr *th, int protocol, - unsigned int tcplen); +static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, + __be32 daddr, __be32 saddr, struct tcphdr *th); +#else +static inline +struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) +{ + return NULL; +} #endif struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { @@ -176,7 +174,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->sport, usin->sin_port, sk, 1); if (tmp < 0) { if (tmp == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return tmp; } @@ -344,16 +342,17 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) struct sock *sk; __u32 seq; int err; + struct net *net = dev_net(skb->dev); if (skb->len < (iph->ihl << 2) + 8) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } - sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest, + sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, iph->saddr, th->source, inet_iif(skb)); if (!sk) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } if (sk->sk_state == TCP_TIME_WAIT) { @@ -366,7 +365,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; @@ -375,7 +374,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -419,10 +418,10 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) /* ICMPs are not backlogged, hence we cannot get an established socket here. */ - BUG_TRAP(!req->sk); + WARN_ON(req->sk); if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -544,6 +543,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; #endif + struct net *net; /* Never send a reset in response to a reset. */ if (th->rst) @@ -582,12 +582,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; rep.th.doff = arg.iov[0].iov_len / 4; - tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1], - key, - ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - &rep.th, IPPROTO_TCP, - arg.iov[0].iov_len); + tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], + key, ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, &rep.th); } #endif arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, @@ -595,20 +592,21 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) sizeof(struct tcphdr), IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; - ip_send_reply(dev_net(skb->dst->dev)->ipv4.tcp_sock, skb, + net = dev_net(skb->dst->dev); + ip_send_reply(net->ipv4.tcp_sock, skb, &arg, arg.iov[0].iov_len); - TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); - TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); + TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); + TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); } /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, - struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 ts) +static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, + u32 win, u32 ts, int oif, + struct tcp_md5sig_key *key) { struct tcphdr *th = tcp_hdr(skb); struct { @@ -620,10 +618,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, ]; } rep; struct ip_reply_arg arg; -#ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; - struct tcp_md5sig_key tw_key; -#endif + struct net *net = dev_net(skb->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -649,23 +644,6 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, rep.th.window = htons(win); #ifdef CONFIG_TCP_MD5SIG - /* - * The SKB holds an imcoming packet, but may not have a valid ->sk - * pointer. This is especially the case when we're dealing with a - * TIME_WAIT ack, because the sk structure is long gone, and only - * the tcp_timewait_sock remains. So the md5 key is stashed in that - * structure, and we use it in preference. I believe that (twsk || - * skb->sk) holds true, but we program defensively. - */ - if (!twsk && skb->sk) { - key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr); - } else if (twsk && twsk->tw_md5_keylen) { - tw_key.key = twsk->tw_md5_key; - tw_key.keylen = twsk->tw_md5_keylen; - key = &tw_key; - } else - key = NULL; - if (key) { int offset = (ts) ? 3 : 0; @@ -676,25 +654,22 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; rep.th.doff = arg.iov[0].iov_len/4; - tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset], - key, - ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - &rep.th, IPPROTO_TCP, - arg.iov[0].iov_len); + tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], + key, ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, &rep.th); } #endif arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; - if (twsk) - arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; + if (oif) + arg.bound_dev_if = oif; - ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb, + ip_send_reply(net->ipv4.tcp_sock, skb, &arg, arg.iov[0].iov_len); - TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); + TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); } static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -702,9 +677,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcptw->tw_ts_recent); + tcptw->tw_ts_recent, + tw->tw_bound_dev_if, + tcp_twsk_md5_key(tcptw) + ); inet_twsk_put(tw); } @@ -712,9 +690,11 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) { - tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, + tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, - req->ts_recent); + req->ts_recent, + 0, + tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr)); } /* @@ -1004,32 +984,13 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, newkey, cmd.tcpm_keylen); } -static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - __be32 saddr, __be32 daddr, - struct tcphdr *th, int protocol, - unsigned int tcplen) +static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, + __be32 daddr, __be32 saddr, int nbytes) { - struct scatterlist sg[4]; - __u16 data_len; - int block = 0; - __sum16 old_checksum; - struct tcp_md5sig_pool *hp; struct tcp4_pseudohdr *bp; - struct hash_desc *desc; - int err; - unsigned int nbytes = 0; - - /* - * Okay, so RFC2385 is turned on for this connection, - * so we need to generate the MD5 hash for the packet now. - */ - - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; + struct scatterlist sg; bp = &hp->md5_blk.ip4; - desc = &hp->md5_desc; /* * 1. the TCP pseudo-header (in the order: source IP address, @@ -1039,86 +1000,96 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, bp->saddr = saddr; bp->daddr = daddr; bp->pad = 0; - bp->protocol = protocol; - bp->len = htons(tcplen); - - sg_init_table(sg, 4); + bp->protocol = IPPROTO_TCP; + bp->len = cpu_to_be16(nbytes); - sg_set_buf(&sg[block++], bp, sizeof(*bp)); - nbytes += sizeof(*bp); - - /* 2. the TCP header, excluding options, and assuming a - * checksum of zero/ - */ - old_checksum = th->check; - th->check = 0; - sg_set_buf(&sg[block++], th, sizeof(struct tcphdr)); - nbytes += sizeof(struct tcphdr); - - /* 3. the TCP segment data (if any) */ - data_len = tcplen - (th->doff << 2); - if (data_len > 0) { - unsigned char *data = (unsigned char *)th + (th->doff << 2); - sg_set_buf(&sg[block++], data, data_len); - nbytes += data_len; - } + sg_init_one(&sg, bp, sizeof(*bp)); + return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); +} - /* 4. an independently-specified key or password, known to both - * TCPs and presumably connection-specific - */ - sg_set_buf(&sg[block++], key->key, key->keylen); - nbytes += key->keylen; +static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, + __be32 daddr, __be32 saddr, struct tcphdr *th) +{ + struct tcp_md5sig_pool *hp; + struct hash_desc *desc; - sg_mark_end(&sg[block - 1]); + hp = tcp_get_md5sig_pool(); + if (!hp) + goto clear_hash_noput; + desc = &hp->md5_desc; - /* Now store the Hash into the packet */ - err = crypto_hash_init(desc); - if (err) + if (crypto_hash_init(desc)) goto clear_hash; - err = crypto_hash_update(desc, sg, nbytes); - if (err) + if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) goto clear_hash; - err = crypto_hash_final(desc, md5_hash); - if (err) + if (tcp_md5_hash_header(hp, th)) + goto clear_hash; + if (tcp_md5_hash_key(hp, key)) + goto clear_hash; + if (crypto_hash_final(desc, md5_hash)) goto clear_hash; - /* Reset header, and free up the crypto */ tcp_put_md5sig_pool(); - th->check = old_checksum; - -out: return 0; + clear_hash: tcp_put_md5sig_pool(); clear_hash_noput: memset(md5_hash, 0, 16); - goto out; + return 1; } -int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - struct sock *sk, - struct dst_entry *dst, - struct request_sock *req, - struct tcphdr *th, int protocol, - unsigned int tcplen) +int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, + struct sock *sk, struct request_sock *req, + struct sk_buff *skb) { + struct tcp_md5sig_pool *hp; + struct hash_desc *desc; + struct tcphdr *th = tcp_hdr(skb); __be32 saddr, daddr; if (sk) { saddr = inet_sk(sk)->saddr; daddr = inet_sk(sk)->daddr; + } else if (req) { + saddr = inet_rsk(req)->loc_addr; + daddr = inet_rsk(req)->rmt_addr; } else { - struct rtable *rt = (struct rtable *)dst; - BUG_ON(!rt); - saddr = rt->rt_src; - daddr = rt->rt_dst; + const struct iphdr *iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; } - return tcp_v4_do_calc_md5_hash(md5_hash, key, - saddr, daddr, - th, protocol, tcplen); + + hp = tcp_get_md5sig_pool(); + if (!hp) + goto clear_hash_noput; + desc = &hp->md5_desc; + + if (crypto_hash_init(desc)) + goto clear_hash; + + if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) + goto clear_hash; + if (tcp_md5_hash_header(hp, th)) + goto clear_hash; + if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) + goto clear_hash; + if (tcp_md5_hash_key(hp, key)) + goto clear_hash; + if (crypto_hash_final(desc, md5_hash)) + goto clear_hash; + + tcp_put_md5sig_pool(); + return 0; + +clear_hash: + tcp_put_md5sig_pool(); +clear_hash_noput: + memset(md5_hash, 0, 16); + return 1; } -EXPORT_SYMBOL(tcp_v4_calc_md5_hash); +EXPORT_SYMBOL(tcp_v4_md5_hash_skb); static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) { @@ -1134,52 +1105,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) struct tcp_md5sig_key *hash_expected; const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); - int length = (th->doff << 2) - sizeof(struct tcphdr); int genhash; - unsigned char *ptr; unsigned char newhash[16]; hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); + hash_location = tcp_parse_md5sig_option(th); - /* - * If the TCP option length is less than the TCP_MD5SIG - * option length, then we can shortcut - */ - if (length < TCPOLEN_MD5SIG) { - if (hash_expected) - return 1; - else - return 0; - } - - /* Okay, we can't shortcut - we have to grub through the options */ - ptr = (unsigned char *)(th + 1); - while (length > 0) { - int opcode = *ptr++; - int opsize; - - switch (opcode) { - case TCPOPT_EOL: - goto done_opts; - case TCPOPT_NOP: - length--; - continue; - default: - opsize = *ptr++; - if (opsize < 2) - goto done_opts; - if (opsize > length) - goto done_opts; - - if (opcode == TCPOPT_MD5SIG) { - hash_location = ptr; - goto done_opts; - } - } - ptr += opsize-2; - length -= opsize; - } -done_opts: /* We've parsed the options - do we have a hash? */ if (!hash_expected && !hash_location) return 0; @@ -1203,11 +1134,9 @@ done_opts: /* Okay, so this is hash_expected and hash_location - * so we need to calculate the checksum. */ - genhash = tcp_v4_do_calc_md5_hash(newhash, - hash_expected, - iph->saddr, iph->daddr, - th, sk->sk_protocol, - skb->len); + genhash = tcp_v4_md5_hash_skb(newhash, + hash_expected, + NULL, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { @@ -1285,7 +1214,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = reqsk_alloc(&tcp_request_sock_ops); + req = inet_reqsk_alloc(&tcp_request_sock_ops); if (!req) goto drop; @@ -1351,7 +1280,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) { - NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } } @@ -1456,6 +1385,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (newkey != NULL) tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr, newkey, key->keylen); + newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; } #endif @@ -1465,9 +1395,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; exit_overflow: - NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); exit: - NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); dst_release(dst); return NULL; } @@ -1594,7 +1524,7 @@ discard: return 0; csum_err: - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); goto discard; } @@ -1608,12 +1538,13 @@ int tcp_v4_rcv(struct sk_buff *skb) struct tcphdr *th; struct sock *sk; int ret; + struct net *net = dev_net(skb->dev); if (skb->pkt_type != PACKET_HOST) goto discard_it; /* Count it even if it's bad */ - TCP_INC_STATS_BH(TCP_MIB_INSEGS); + TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; @@ -1642,7 +1573,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = iph->tos; TCP_SKB_CB(skb)->sacked = 0; - sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, + sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1689,7 +1620,7 @@ no_tcp_socket: if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { bad_packet: - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(net, TCP_MIB_INERRS); } else { tcp_v4_send_reset(NULL, skb); } @@ -1710,7 +1641,7 @@ do_time_wait: } if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(net, TCP_MIB_INERRS); inet_twsk_put(inet_twsk(sk)); goto discard_it; } @@ -1818,7 +1749,7 @@ struct inet_connection_sock_af_ops ipv4_specific = { #ifdef CONFIG_TCP_MD5SIG static struct tcp_sock_af_ops tcp_sock_ipv4_specific = { .md5_lookup = tcp_v4_md5_lookup, - .calc_md5_hash = tcp_v4_calc_md5_hash, + .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_add = tcp_v4_md5_add_func, .md5_parse = tcp_v4_parse_md5_keys, }; @@ -1875,7 +1806,7 @@ static int tcp_v4_init_sock(struct sock *sk) return 0; } -int tcp_v4_destroy_sock(struct sock *sk) +void tcp_v4_destroy_sock(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -1918,17 +1849,7 @@ int tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } - if (tp->defer_tcp_accept.request) { - reqsk_free(tp->defer_tcp_accept.request); - sock_put(tp->defer_tcp_accept.listen_sk); - sock_put(sk); - tp->defer_tcp_accept.listen_sk = NULL; - tp->defer_tcp_accept.request = NULL; - } - atomic_dec(&tcp_sockets_allocated); - - return 0; } EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -1971,8 +1892,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) req = req->dl_next; while (1) { while (req) { - if (req->rsk_ops->family == st->family && - net_eq(sock_net(req->sk), net)) { + if (req->rsk_ops->family == st->family) { cur = req; goto out; } @@ -2303,7 +2223,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) } seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " - "%08X %5d %8d %lu %d %p %u %u %u %u %d%n", + "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", i, src, srcp, dest, destp, sk->sk_state, tp->write_seq - tp->snd_una, sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : @@ -2315,8 +2235,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) icsk->icsk_probes_out, sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, - icsk->icsk_rto, - icsk->icsk_ack.ato, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 019c8c16e5cc..204c42162660 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_minisocks.c,v 1.15 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -246,7 +244,7 @@ kill: } if (paws_reject) - NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); if (!th->rst) { /* In this case we must reset the TIMEWAIT timer. @@ -482,7 +480,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); - TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); } return newsk; } @@ -571,8 +569,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, does sequence test, SYN is truncated, and thus we consider it a bare ACK. - Both ends (listening sockets) accept the new incoming - connection and try to talk to each other. 8-) + If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this + bare ACK. Otherwise, we create an established connection. Both + ends (listening sockets) accept the new incoming connection and try + to talk to each other. 8-) Note: This case is both harmless, and rare. Possibility is about the same as us discovering intelligent life on another plant tomorrow. @@ -611,7 +611,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, if (!(flg & TCP_FLAG_RST)) req->rsk_ops->send_ack(skb, req); if (paws_reject) - NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); return NULL; } @@ -630,7 +630,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, * "fourth, check the SYN bit" */ if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); goto embryonic_reset; } @@ -640,6 +640,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, if (!(flg & TCP_FLAG_ACK)) return NULL; + /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ + if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { + inet_rsk(req)->acked = 1; + return NULL; + } + /* OK, ACK is valid, create big socket and * feed this segment to it. It will repeat all * the tests. THIS SEGMENT MUST MOVE SOCKET TO @@ -678,24 +685,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, inet_csk_reqsk_queue_unlink(sk, req, prev); inet_csk_reqsk_queue_removed(sk, req); - if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && - TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { - - /* the accept queue handling is done is est recv slow - * path so lets make sure to start there - */ - tcp_sk(child)->pred_flags = 0; - sock_hold(sk); - sock_hold(child); - tcp_sk(child)->defer_tcp_accept.listen_sk = sk; - tcp_sk(child)->defer_tcp_accept.request = req; - - inet_csk_reset_keepalive_timer(child, - inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ); - } else { - inet_csk_reqsk_queue_add(sk, req, child); - } - + inet_csk_reqsk_queue_add(sk, req, child); return child; listen_overflow: @@ -705,7 +695,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, } embryonic_reset: - NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); if (!(flg & TCP_FLAG_RST)) req->rsk_ops->send_reset(sk, skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index debf23581606..a00532de2a8c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_output.c,v 1.146 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -347,28 +345,82 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) TCP_SKB_CB(skb)->end_seq = seq; } -static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, - __u32 tstamp, __u8 **md5_hash) -{ - if (tp->rx_opt.tstamp_ok) { +#define OPTION_SACK_ADVERTISE (1 << 0) +#define OPTION_TS (1 << 1) +#define OPTION_MD5 (1 << 2) + +struct tcp_out_options { + u8 options; /* bit field of OPTION_* */ + u8 ws; /* window scale, 0 to disable */ + u8 num_sack_blocks; /* number of SACK blocks to include */ + u16 mss; /* 0 to disable */ + __u32 tsval, tsecr; /* need to include OPTION_TS */ +}; + +static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, + const struct tcp_out_options *opts, + __u8 **md5_hash) { + if (unlikely(OPTION_MD5 & opts->options)) { + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | + TCPOLEN_MD5SIG); + *md5_hash = (__u8 *)ptr; + ptr += 4; + } else { + *md5_hash = NULL; + } + + if (likely(OPTION_TS & opts->options)) { + if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) { + *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | + (TCPOLEN_SACK_PERM << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + } else { + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + } + *ptr++ = htonl(opts->tsval); + *ptr++ = htonl(opts->tsecr); + } + + if (unlikely(opts->mss)) { + *ptr++ = htonl((TCPOPT_MSS << 24) | + (TCPOLEN_MSS << 16) | + opts->mss); + } + + if (unlikely(OPTION_SACK_ADVERTISE & opts->options && + !(OPTION_TS & opts->options))) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - *ptr++ = htonl(tstamp); - *ptr++ = htonl(tp->rx_opt.ts_recent); + (TCPOPT_SACK_PERM << 8) | + TCPOLEN_SACK_PERM); } - if (tp->rx_opt.eff_sacks) { - struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; + + if (unlikely(opts->ws)) { + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_WINDOW << 16) | + (TCPOLEN_WINDOW << 8) | + opts->ws); + } + + if (unlikely(opts->num_sack_blocks)) { + struct tcp_sack_block *sp = tp->rx_opt.dsack ? + tp->duplicate_sack : tp->selective_acks; int this_sack; *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK << 8) | - (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * + (TCPOLEN_SACK_BASE + (opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK))); - for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { + for (this_sack = 0; this_sack < opts->num_sack_blocks; + ++this_sack) { *ptr++ = htonl(sp[this_sack].start_seq); *ptr++ = htonl(sp[this_sack].end_seq); } @@ -378,81 +430,137 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, tp->rx_opt.eff_sacks--; } } +} + +static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, + struct tcp_out_options *opts, + struct tcp_md5sig_key **md5) { + struct tcp_sock *tp = tcp_sk(sk); + unsigned size = 0; + #ifdef CONFIG_TCP_MD5SIG - if (md5_hash) { - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - *md5_hash = (__u8 *)ptr; + *md5 = tp->af_specific->md5_lookup(sk, sk); + if (*md5) { + opts->options |= OPTION_MD5; + size += TCPOLEN_MD5SIG_ALIGNED; } +#else + *md5 = NULL; #endif + + /* We always get an MSS option. The option bytes which will be seen in + * normal data packets should timestamps be used, must be in the MSS + * advertised. But we subtract them from tp->mss_cache so that + * calculations in tcp_sendmsg are simpler etc. So account for this + * fact here if necessary. If we don't do this correctly, as a + * receiver we won't recognize data packets as being full sized when we + * should, and thus we won't abide by the delayed ACK rules correctly. + * SACKs don't matter, we never delay an ACK when we have any of those + * going out. */ + opts->mss = tcp_advertise_mss(sk); + size += TCPOLEN_MSS_ALIGNED; + + if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { + opts->options |= OPTION_TS; + opts->tsval = TCP_SKB_CB(skb)->when; + opts->tsecr = tp->rx_opt.ts_recent; + size += TCPOLEN_TSTAMP_ALIGNED; + } + if (likely(sysctl_tcp_window_scaling)) { + opts->ws = tp->rx_opt.rcv_wscale; + size += TCPOLEN_WSCALE_ALIGNED; + } + if (likely(sysctl_tcp_sack)) { + opts->options |= OPTION_SACK_ADVERTISE; + if (unlikely(!(OPTION_TS & opts->options))) + size += TCPOLEN_SACKPERM_ALIGNED; + } + + return size; } -/* Construct a tcp options header for a SYN or SYN_ACK packet. - * If this is every changed make sure to change the definition of - * MAX_SYN_SIZE to match the new maximum number of options that you - * can generate. - * - * Note - that with the RFC2385 TCP option, we make room for the - * 16 byte MD5 hash. This will be filled in later, so the pointer for the - * location to be filled is passed back up. - */ -static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, - int offer_wscale, int wscale, __u32 tstamp, - __u32 ts_recent, __u8 **md5_hash) -{ - /* We always get an MSS option. - * The option bytes which will be seen in normal data - * packets should timestamps be used, must be in the MSS - * advertised. But we subtract them from tp->mss_cache so - * that calculations in tcp_sendmsg are simpler etc. - * So account for this fact here if necessary. If we - * don't do this correctly, as a receiver we won't - * recognize data packets as being full sized when we - * should, and thus we won't abide by the delayed ACK - * rules correctly. - * SACKs don't matter, we never delay an ACK when we - * have any of those going out. - */ - *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); - if (ts) { - if (sack) - *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | - (TCPOLEN_SACK_PERM << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - else - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - *ptr++ = htonl(tstamp); /* TSVAL */ - *ptr++ = htonl(ts_recent); /* TSECR */ - } else if (sack) - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_SACK_PERM << 8) | - TCPOLEN_SACK_PERM); - if (offer_wscale) - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_WINDOW << 16) | - (TCPOLEN_WINDOW << 8) | - (wscale)); +static unsigned tcp_synack_options(struct sock *sk, + struct request_sock *req, + unsigned mss, struct sk_buff *skb, + struct tcp_out_options *opts, + struct tcp_md5sig_key **md5) { + unsigned size = 0; + struct inet_request_sock *ireq = inet_rsk(req); + char doing_ts; + #ifdef CONFIG_TCP_MD5SIG - /* - * If MD5 is enabled, then we set the option, and include the size - * (always 18). The actual MD5 hash is added just before the - * packet is sent. - */ - if (md5_hash) { - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - *md5_hash = (__u8 *)ptr; + *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); + if (*md5) { + opts->options |= OPTION_MD5; + size += TCPOLEN_MD5SIG_ALIGNED; } +#else + *md5 = NULL; #endif + + /* we can't fit any SACK blocks in a packet with MD5 + TS + options. There was discussion about disabling SACK rather than TS in + order to fit in better with old, buggy kernels, but that was deemed + to be unnecessary. */ + doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok); + + opts->mss = mss; + size += TCPOLEN_MSS_ALIGNED; + + if (likely(ireq->wscale_ok)) { + opts->ws = ireq->rcv_wscale; + size += TCPOLEN_WSCALE_ALIGNED; + } + if (likely(doing_ts)) { + opts->options |= OPTION_TS; + opts->tsval = TCP_SKB_CB(skb)->when; + opts->tsecr = req->ts_recent; + size += TCPOLEN_TSTAMP_ALIGNED; + } + if (likely(ireq->sack_ok)) { + opts->options |= OPTION_SACK_ADVERTISE; + if (unlikely(!doing_ts)) + size += TCPOLEN_SACKPERM_ALIGNED; + } + + return size; +} + +static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, + struct tcp_out_options *opts, + struct tcp_md5sig_key **md5) { + struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; + struct tcp_sock *tp = tcp_sk(sk); + unsigned size = 0; + +#ifdef CONFIG_TCP_MD5SIG + *md5 = tp->af_specific->md5_lookup(sk, sk); + if (unlikely(*md5)) { + opts->options |= OPTION_MD5; + size += TCPOLEN_MD5SIG_ALIGNED; + } +#else + *md5 = NULL; +#endif + + if (likely(tp->rx_opt.tstamp_ok)) { + opts->options |= OPTION_TS; + opts->tsval = tcb ? tcb->when : 0; + opts->tsecr = tp->rx_opt.ts_recent; + size += TCPOLEN_TSTAMP_ALIGNED; + } + + if (unlikely(tp->rx_opt.eff_sacks)) { + const unsigned remaining = MAX_TCP_OPTION_SPACE - size; + opts->num_sack_blocks = + min_t(unsigned, tp->rx_opt.eff_sacks, + (remaining - TCPOLEN_SACK_BASE_ALIGNED) / + TCPOLEN_SACK_PERBLOCK); + size += TCPOLEN_SACK_BASE_ALIGNED + + opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; + } + + return size; } /* This routine actually transmits TCP packets queued in by @@ -473,13 +581,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, struct inet_sock *inet; struct tcp_sock *tp; struct tcp_skb_cb *tcb; - int tcp_header_size; -#ifdef CONFIG_TCP_MD5SIG + struct tcp_out_options opts; + unsigned tcp_options_size, tcp_header_size; struct tcp_md5sig_key *md5; __u8 *md5_hash_location; -#endif struct tcphdr *th; - int sysctl_flags; int err; BUG_ON(!skb || !tcp_skb_pcount(skb)); @@ -502,50 +608,18 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, inet = inet_sk(sk); tp = tcp_sk(sk); tcb = TCP_SKB_CB(skb); - tcp_header_size = tp->tcp_header_len; - -#define SYSCTL_FLAG_TSTAMPS 0x1 -#define SYSCTL_FLAG_WSCALE 0x2 -#define SYSCTL_FLAG_SACK 0x4 + memset(&opts, 0, sizeof(opts)); - sysctl_flags = 0; - if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { - tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; - if (sysctl_tcp_timestamps) { - tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; - sysctl_flags |= SYSCTL_FLAG_TSTAMPS; - } - if (sysctl_tcp_window_scaling) { - tcp_header_size += TCPOLEN_WSCALE_ALIGNED; - sysctl_flags |= SYSCTL_FLAG_WSCALE; - } - if (sysctl_tcp_sack) { - sysctl_flags |= SYSCTL_FLAG_SACK; - if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS)) - tcp_header_size += TCPOLEN_SACKPERM_ALIGNED; - } - } else if (unlikely(tp->rx_opt.eff_sacks)) { - /* A SACK is 2 pad bytes, a 2 byte header, plus - * 2 32-bit sequence numbers for each SACK block. - */ - tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED + - (tp->rx_opt.eff_sacks * - TCPOLEN_SACK_PERBLOCK)); - } + if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) + tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); + else + tcp_options_size = tcp_established_options(sk, skb, &opts, + &md5); + tcp_header_size = tcp_options_size + sizeof(struct tcphdr); if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); -#ifdef CONFIG_TCP_MD5SIG - /* - * Are we doing MD5 on this segment? If so - make - * room for it. - */ - md5 = tp->af_specific->md5_lookup(sk, sk); - if (md5) - tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; -#endif - skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); skb_set_owner_w(skb, sk); @@ -576,39 +650,16 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->urg = 1; } - if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { - tcp_syn_build_options((__be32 *)(th + 1), - tcp_advertise_mss(sk), - (sysctl_flags & SYSCTL_FLAG_TSTAMPS), - (sysctl_flags & SYSCTL_FLAG_SACK), - (sysctl_flags & SYSCTL_FLAG_WSCALE), - tp->rx_opt.rcv_wscale, - tcb->when, - tp->rx_opt.ts_recent, - -#ifdef CONFIG_TCP_MD5SIG - md5 ? &md5_hash_location : -#endif - NULL); - } else { - tcp_build_and_update_options((__be32 *)(th + 1), - tp, tcb->when, -#ifdef CONFIG_TCP_MD5SIG - md5 ? &md5_hash_location : -#endif - NULL); + tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); + if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) TCP_ECN_send(sk, skb, tcp_header_size); - } #ifdef CONFIG_TCP_MD5SIG /* Calculate the MD5 hash, as we have all we need now */ if (md5) { + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; tp->af_specific->calc_md5_hash(md5_hash_location, - md5, - sk, NULL, NULL, - tcp_hdr(skb), - sk->sk_protocol, - skb->len); + md5, sk, NULL, skb); } #endif @@ -621,7 +672,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_event_data_sent(tp, skb, sk); if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) - TCP_INC_STATS(TCP_MIB_OUTSEGS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); err = icsk->icsk_af_ops->queue_xmit(skb, 0); if (likely(err <= 0)) @@ -630,10 +681,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_enter_cwr(sk, 1); return net_xmit_eval(err); - -#undef SYSCTL_FLAG_TSTAMPS -#undef SYSCTL_FLAG_WSCALE -#undef SYSCTL_FLAG_SACK } /* This routine just queue's the buffer @@ -974,6 +1021,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) u32 mss_now; u16 xmit_size_goal; int doing_tso = 0; + unsigned header_len; + struct tcp_out_options opts; + struct tcp_md5sig_key *md5; mss_now = tp->mss_cache; @@ -986,14 +1036,16 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tcp_sync_mss(sk, mtu); } - if (tp->rx_opt.eff_sacks) - mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + - (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); - -#ifdef CONFIG_TCP_MD5SIG - if (tp->af_specific->md5_lookup(sk, sk)) - mss_now -= TCPOLEN_MD5SIG_ALIGNED; -#endif + header_len = tcp_established_options(sk, NULL, &opts, &md5) + + sizeof(struct tcphdr); + /* The mss_cache is sized based on tp->tcp_header_len, which assumes + * some common options. If this is an odd packet (because we have SACK + * blocks etc) then our calculated header_len will be different, and + * we have to adjust mss_now correspondingly */ + if (header_len != tp->tcp_header_len) { + int delta = (int) header_len - tp->tcp_header_len; + mss_now -= delta; + } xmit_size_goal = mss_now; @@ -1836,7 +1888,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - unsigned int cur_mss = tcp_current_mss(sk, 0); + unsigned int cur_mss; int err; /* Inconslusive MTU probe */ @@ -1858,6 +1910,11 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return -ENOMEM; } + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) + return -EHOSTUNREACH; /* Routing failure or similar. */ + + cur_mss = tcp_current_mss(sk, 0); + /* If receiver has shrunk his window, and skb is out of * new window, do not retransmit it. The exception is the * case, when window is shrunk to zero. In this case @@ -1884,9 +1941,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) (sysctl_tcp_retrans_collapse != 0)) tcp_retrans_try_collapse(sk, skb, cur_mss); - if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) - return -EHOSTUNREACH; /* Routing failure or similar. */ - /* Some Solaris stacks overoptimize and ignore the FIN on a * retransmit when old data is attached. So strip it off * since it is cheap to do so and saves bytes on the network. @@ -1911,7 +1965,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (err == 0) { /* Update global TCP statistics. */ - TCP_INC_STATS(TCP_MIB_RETRANSSEGS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); tp->total_retrans++; @@ -1986,14 +2040,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (sacked & TCPCB_LOST) { if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { + int mib_idx; + if (tcp_retransmit_skb(sk, skb)) { tp->retransmit_skb_hint = NULL; return; } if (icsk->icsk_ca_state != TCP_CA_Loss) - NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); + mib_idx = LINUX_MIB_TCPFASTRETRANS; else - NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); + mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; + NET_INC_STATS_BH(sock_net(sk), mib_idx); if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, @@ -2063,7 +2120,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) inet_csk(sk)->icsk_rto, TCP_RTO_MAX); - NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS); } } @@ -2117,7 +2174,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) /* NOTE: No TCP options attached and we never retransmit this. */ skb = alloc_skb(MAX_TCP_HEADER, priority); if (!skb) { - NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); return; } @@ -2128,7 +2185,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) /* Send it off. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb, 0, priority)) - NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); + + TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); } /* WARNING: This routine must only be called when we have already sent @@ -2176,11 +2235,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th; int tcp_header_size; + struct tcp_out_options opts; struct sk_buff *skb; -#ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *md5; __u8 *md5_hash_location; -#endif skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); if (skb == NULL) @@ -2191,18 +2249,27 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb->dst = dst_clone(dst); - tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + - (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + - /* SACK_PERM is in the place of NOP NOP of TS */ - ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); + if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ + __u8 rcv_wscale; + /* Set this up on the first call only */ + req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); + /* tcp_full_space because it is guaranteed to be the first packet */ + tcp_select_initial_window(tcp_full_space(sk), + dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), + &req->rcv_wnd, + &req->window_clamp, + ireq->wscale_ok, + &rcv_wscale); + ireq->rcv_wscale = rcv_wscale; + } + + memset(&opts, 0, sizeof(opts)); + TCP_SKB_CB(skb)->when = tcp_time_stamp; + tcp_header_size = tcp_synack_options(sk, req, + dst_metric(dst, RTAX_ADVMSS), + skb, &opts, &md5) + + sizeof(struct tcphdr); -#ifdef CONFIG_TCP_MD5SIG - /* Are we doing MD5 on this segment? If so - make room for it */ - md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); - if (md5) - tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; -#endif skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2220,19 +2287,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); th->seq = htonl(TCP_SKB_CB(skb)->seq); th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); - if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ - __u8 rcv_wscale; - /* Set this up on the first call only */ - req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); - /* tcp_full_space because it is guaranteed to be the first packet */ - tcp_select_initial_window(tcp_full_space(sk), - dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), - &req->rcv_wnd, - &req->window_clamp, - ireq->wscale_ok, - &rcv_wscale); - ireq->rcv_wscale = rcv_wscale; - } /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); @@ -2241,29 +2295,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); else #endif - TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, - ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale, - TCP_SKB_CB(skb)->when, - req->ts_recent, - ( -#ifdef CONFIG_TCP_MD5SIG - md5 ? &md5_hash_location : -#endif - NULL) - ); - + tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); th->doff = (tcp_header_size >> 2); - TCP_INC_STATS(TCP_MIB_OUTSEGS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ if (md5) { tp->af_specific->calc_md5_hash(md5_hash_location, - md5, - NULL, dst, req, - tcp_hdr(skb), sk->sk_protocol, - skb->len); + md5, NULL, req, skb); } #endif @@ -2363,7 +2403,7 @@ int tcp_connect(struct sock *sk) */ tp->snd_nxt = tp->write_seq; tp->pushed_seq = tp->write_seq; - TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 5ff0ce6e9d39..7ddc30f0744f 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -224,7 +224,7 @@ static __init int tcpprobe_init(void) if (bufsize < 0) return -EINVAL; - tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL); + tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); if (!tcp_probe.log) goto err0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4de68cf5f2aa..5ab6ba19c3ce 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -50,7 +48,7 @@ static void tcp_write_err(struct sock *sk) sk->sk_error_report(sk); tcp_done(sk); - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); } /* Do not allow orphaned sockets to eat all our resources. @@ -91,7 +89,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC); tcp_done(sk); - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; } return 0; @@ -181,7 +179,7 @@ static void tcp_delack_timer(unsigned long data) if (sock_owned_by_user(sk)) { /* Try again later. */ icsk->icsk_ack.blocked = 1; - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); goto out_unlock; } @@ -200,7 +198,7 @@ static void tcp_delack_timer(unsigned long data) if (!skb_queue_empty(&tp->ucopy.prequeue)) { struct sk_buff *skb; - NET_INC_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) sk->sk_backlog_rcv(sk, skb); @@ -220,7 +218,7 @@ static void tcp_delack_timer(unsigned long data) icsk->icsk_ack.ato = TCP_ATO_MIN; } tcp_send_ack(sk); - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); } TCP_CHECK_TIMER(sk); @@ -289,7 +287,7 @@ static void tcp_retransmit_timer(struct sock *sk) if (!tp->packets_out) goto out; - BUG_TRAP(!tcp_write_queue_empty(sk)); + WARN_ON(tcp_write_queue_empty(sk)); if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { @@ -328,24 +326,27 @@ static void tcp_retransmit_timer(struct sock *sk) goto out; if (icsk->icsk_retransmits == 0) { + int mib_idx; + if (icsk->icsk_ca_state == TCP_CA_Disorder || icsk->icsk_ca_state == TCP_CA_Recovery) { if (tcp_is_sack(tp)) { if (icsk->icsk_ca_state == TCP_CA_Recovery) - NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); + mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; else - NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES); + mib_idx = LINUX_MIB_TCPSACKFAILURES; } else { if (icsk->icsk_ca_state == TCP_CA_Recovery) - NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL); + mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; else - NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES); + mib_idx = LINUX_MIB_TCPRENOFAILURES; } } else if (icsk->icsk_ca_state == TCP_CA_Loss) { - NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES); + mib_idx = LINUX_MIB_TCPLOSSFAILURES; } else { - NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS); + mib_idx = LINUX_MIB_TCPTIMEOUTS; } + NET_INC_STATS_BH(sock_net(sk), mib_idx); } if (tcp_use_frto(sk)) { @@ -489,11 +490,6 @@ static void tcp_keepalive_timer (unsigned long data) goto death; } - if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) { - tcp_send_active_reset(sk, GFP_ATOMIC); - goto death; - } - if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) goto out; diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index d3b709a6f264..cb1f0e83830b 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -97,7 +97,7 @@ static int tunnel64_rcv(struct sk_buff *skb) { struct xfrm_tunnel *handler; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; for (handler = tunnel64_handlers; handler; handler = handler->next) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index db1cb7c96d63..383d17359d01 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -5,8 +5,6 @@ * * The User Datagram Protocol (UDP). * - * Version: $Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> @@ -110,9 +108,6 @@ * Snmp MIB for the UDP layer */ -DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; -EXPORT_SYMBOL(udp_statistics); - DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; EXPORT_SYMBOL(udp_stats_in6); @@ -136,7 +131,7 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, struct sock *sk; struct hlist_node *node; - sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) + sk_for_each(sk, node, &udptable[udp_hashfn(net, num)]) if (net_eq(sock_net(sk), net) && sk->sk_hash == num) return 1; return 0; @@ -176,7 +171,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, for (i = 0; i < UDP_HTABLE_SIZE; i++) { int size = 0; - head = &udptable[rover & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[udp_hashfn(net, rover)]; if (hlist_empty(head)) goto gotit; @@ -213,7 +208,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, gotit: snum = rover; } else { - head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[udp_hashfn(net, snum)]; sk_for_each(sk2, node, head) if (sk2->sk_hash == snum && @@ -229,7 +224,7 @@ gotit: inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[udp_hashfn(net, snum)]; sk_add_node(sk, head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } @@ -266,7 +261,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, int badness = -1; read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { + sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && @@ -356,11 +351,12 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) struct sock *sk; int harderr; int err; + struct net *net = dev_net(skb->dev); - sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest, + sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, udptable); if (sk == NULL) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; /* No socket for error */ } @@ -420,7 +416,7 @@ void udp_err(struct sk_buff *skb, u32 info) /* * Throw away all pending data and cancel the corking. Socket is locked. */ -static void udp_flush_pending_frames(struct sock *sk) +void udp_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); @@ -430,6 +426,7 @@ static void udp_flush_pending_frames(struct sock *sk) ip_flush_pending_frames(sk); } } +EXPORT_SYMBOL(udp_flush_pending_frames); /** * udp4_hwcsum_outgoing - handle outgoing HW checksumming @@ -527,7 +524,8 @@ out: up->len = 0; up->pending = 0; if (!err) - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } @@ -655,11 +653,13 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + struct net *net = sock_net(sk); + security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); + err = ip_route_output_flow(net, &rt, &fl, sk, 1); if (err) { if (err == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); goto out; } @@ -726,7 +726,8 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -889,7 +890,8 @@ try_again: goto out_free; if (!peeked) - UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); sock_recv_timestamp(msg, sk, skb); @@ -918,7 +920,7 @@ out: csum_copy_err: lock_sock(sk); if (!skb_kill_datagram(sk, skb, flags)) - UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); release_sock(sk); if (noblock) @@ -989,7 +991,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) ret = (*up->encap_rcv)(sk, skb); if (ret <= 0) { - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); return -ret; } @@ -1041,15 +1044,18 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); + if (rc == -ENOMEM) { + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); + atomic_inc(&sk->sk_drops); + } goto drop; } return 0; drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } @@ -1060,7 +1066,7 @@ drop: * Note: called only from the BH handler context, * so we don't need to lock the hashes. */ -static int __udp4_lib_mcast_deliver(struct sk_buff *skb, +static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udphdr *uh, __be32 saddr, __be32 daddr, struct hlist_head udptable[]) @@ -1069,7 +1075,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, int dif; read_lock(&udp_hash_lock); - sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); dif = skb->dev->ifindex; sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { @@ -1157,6 +1163,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], struct rtable *rt = (struct rtable*)skb->dst; __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; + struct net *net = dev_net(skb->dev); /* * Validate the packet. @@ -1179,9 +1186,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], goto csum_error; if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); + return __udp4_lib_mcast_deliver(net, skb, uh, + saddr, daddr, udptable); - sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr, + sk = __udp4_lib_lookup(net, saddr, uh->source, daddr, uh->dest, inet_iif(skb), udptable); if (sk != NULL) { @@ -1210,7 +1218,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp_lib_checksum_complete(skb)) goto csum_error; - UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* @@ -1244,7 +1252,7 @@ csum_error: ntohs(uh->dest), ulen); drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } @@ -1254,12 +1262,11 @@ int udp_rcv(struct sk_buff *skb) return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); } -int udp_destroy_sock(struct sock *sk) +void udp_destroy_sock(struct sock *sk) { lock_sock(sk); udp_flush_pending_frames(sk); release_sock(sk); - return 0; } /* @@ -1318,6 +1325,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ val = 8; + else if (val > USHORT_MAX) + val = USHORT_MAX; up->pcslen = val; up->pcflag |= UDPLITE_SEND_CC; break; @@ -1330,6 +1339,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; if (val != 0 && val < 8) /* Avoid silly minimal values. */ val = 8; + else if (val > USHORT_MAX) + val = USHORT_MAX; up->pcrlen = val; up->pcflag |= UDPLITE_RECV_CC; break; @@ -1452,7 +1463,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { - UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite); + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_INERRORS, is_lite); __skb_unlink(skb, rcvq); kfree_skb(skb); } @@ -1628,12 +1640,13 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, __u16 srcp = ntohs(inet->sport); seq_printf(f, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p%n", + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", bucket, src, srcp, dest, destp, sp->sk_state, atomic_read(&sp->sk_wmem_alloc), atomic_read(&sp->sk_rmem_alloc), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, len); + atomic_read(&sp->sk_refcnt), sp, + atomic_read(&sp->sk_drops), len); } int udp4_seq_show(struct seq_file *seq, void *v) @@ -1642,7 +1655,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%-127s\n", " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " - "inode"); + "inode ref pointer drops"); else { struct udp_iter_state *state = seq->private; int len; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 7288bf7977fb..2e9bad2fa1bc 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -26,7 +26,7 @@ extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, extern int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); -extern int udp_destroy_sock(struct sock *sk); +extern void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS extern int udp4_seq_show(struct seq_file *seq, void *v); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 72ce26b6c4d3..3c807964da96 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -1,8 +1,6 @@ /* * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). * - * Version: $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $ - * * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> * * Changes: @@ -13,7 +11,6 @@ * 2 of the License, or (at your option) any later version. */ #include "udp_impl.h" -DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly; struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 584e6d74e3a9..7135279f3f84 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -52,7 +52,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) IP_ECN_clear(top_iph); top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? - 0 : XFRM_MODE_SKB_CB(skb)->frag_off; + 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); ip_select_ident(top_iph, dst->child, NULL); top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 42814a2ec9d7..ec992159b5f8 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -96,10 +96,8 @@ config INET6_ESP config INET6_IPCOMP tristate "IPv6: IPComp transformation" - select XFRM select INET6_XFRM_TUNNEL - select CRYPTO - select CRYPTO_DEFLATE + select XFRM_IPCOMP ---help--- Support for IP Payload Compression Protocol (IPComp) (RFC3173), typically needed for IPsec. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e591e09e5e4e..a7842c54f58a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6,8 +6,6 @@ * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -121,6 +119,7 @@ static void ipv6_regen_rndid(unsigned long data); static int desync_factor = MAX_DESYNC_FACTOR * HZ; #endif +static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); /* @@ -154,7 +153,7 @@ static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); -struct ipv6_devconf ipv6_devconf __read_mostly = { +static struct ipv6_devconf ipv6_devconf __read_mostly = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, .mtu6 = IPV6_MIN_MTU, @@ -185,6 +184,8 @@ struct ipv6_devconf ipv6_devconf __read_mostly = { #endif .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ + .disable_ipv6 = 0, + .accept_dad = 1, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -217,6 +218,8 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { #endif .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ + .disable_ipv6 = 0, + .accept_dad = 1, }; /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ @@ -226,9 +229,15 @@ const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_IN const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; /* Check if a valid qdisc is available */ -static inline int addrconf_qdisc_ok(struct net_device *dev) +static inline bool addrconf_qdisc_ok(const struct net_device *dev) { - return (dev->qdisc != &noop_qdisc); + return !qdisc_tx_is_noop(dev); +} + +/* Check if a route is valid prefix route */ +static inline int addrconf_is_prefix_route(const struct rt6_info *rt) +{ + return ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0); } static void addrconf_del_timer(struct inet6_ifaddr *ifp) @@ -304,8 +313,10 @@ static void in6_dev_finish_destroy_rcu(struct rcu_head *head) void in6_dev_finish_destroy(struct inet6_dev *idev) { struct net_device *dev = idev->dev; - BUG_TRAP(idev->addr_list==NULL); - BUG_TRAP(idev->mc_list==NULL); + + WARN_ON(idev->addr_list != NULL); + WARN_ON(idev->mc_list != NULL); + #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL"); #endif @@ -344,6 +355,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) kfree(ndev); return NULL; } + if (ndev->cnf.forwarding) + dev_disable_lro(dev); /* We refer to the device */ dev_hold(dev); @@ -372,6 +385,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) */ in6_dev_hold(ndev); + if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) + ndev->cnf.accept_dad = -1; + #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { printk(KERN_INFO @@ -438,6 +454,8 @@ static void dev_forward_change(struct inet6_dev *idev) if (!idev) return; dev = idev->dev; + if (idev->cnf.forwarding) + dev_disable_lro(dev); if (dev && (dev->flags & IFF_MULTICAST)) { if (idev->cnf.forwarding) ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); @@ -483,12 +501,14 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) if (p == &net->ipv6.devconf_dflt->forwarding) return; + rtnl_lock(); if (p == &net->ipv6.devconf_all->forwarding) { __s32 newf = net->ipv6.devconf_all->forwarding; net->ipv6.devconf_dflt->forwarding = newf; addrconf_forward_change(net, newf); } else if ((!*p) ^ (!old)) dev_forward_change((struct inet6_dev *)table->extra1); + rtnl_unlock(); if (*p) rt6_purge_dflt_routers(net); @@ -499,8 +519,9 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) { - BUG_TRAP(ifp->if_next==NULL); - BUG_TRAP(ifp->lst_next==NULL); + WARN_ON(ifp->if_next != NULL); + WARN_ON(ifp->lst_next != NULL); + #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); #endif @@ -568,6 +589,13 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, struct rt6_info *rt; int hash; int err = 0; + int addr_type = ipv6_addr_type(addr); + + if (addr_type == IPV6_ADDR_ANY || + addr_type & IPV6_ADDR_MULTICAST || + (!(idev->dev->flags & IFF_LOOPBACK) && + addr_type & IPV6_ADDR_LOOPBACK)) + return ERR_PTR(-EADDRNOTAVAIL); rcu_read_lock_bh(); if (idev->dead) { @@ -731,8 +759,13 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) onlink = -1; spin_lock(&ifa->lock); - lifetime = min_t(unsigned long, - ifa->valid_lft, 0x7fffffffUL/HZ); + + lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); + /* + * Note: Because this address is + * not permanent, lifetime < + * LONG_MAX / HZ here. + */ if (time_before(expires, ifa->tstamp + lifetime * HZ)) expires = ifa->tstamp + lifetime * HZ; @@ -744,12 +777,12 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } write_unlock_bh(&idev->lock); + addrconf_del_timer(ifp); + ipv6_ifa_notify(RTM_DELADDR, ifp); atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); - addrconf_del_timer(ifp); - /* * Purge or update corresponding prefix * @@ -772,7 +805,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1); - if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { + if (rt && addrconf_is_prefix_route(rt)) { if (onlink == 0) { ip6_del_rt(rt); rt = NULL; @@ -953,7 +986,8 @@ static inline int ipv6_saddr_preferred(int type) return 0; } -static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score, +static int ipv6_get_saddr_eval(struct net *net, + struct ipv6_saddr_score *score, struct ipv6_saddr_dst *dst, int i) { @@ -1032,7 +1066,8 @@ static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score, break; case IPV6_SADDR_RULE_LABEL: /* Rule 6: Prefer matching label */ - ret = ipv6_addr_label(&score->ifa->addr, score->addr_type, + ret = ipv6_addr_label(net, + &score->ifa->addr, score->addr_type, score->ifa->idev->dev->ifindex) == dst->label; break; #ifdef CONFIG_IPV6_PRIVACY @@ -1086,7 +1121,7 @@ int ipv6_dev_get_saddr(struct net_device *dst_dev, dst.addr = daddr; dst.ifindex = dst_dev ? dst_dev->ifindex : 0; dst.scope = __ipv6_addr_src_scope(dst_type); - dst.label = ipv6_addr_label(daddr, dst_type, dst.ifindex); + dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex); dst.prefs = prefs; hiscore->rule = -1; @@ -1154,8 +1189,8 @@ int ipv6_dev_get_saddr(struct net_device *dst_dev, for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) { int minihiscore, miniscore; - minihiscore = ipv6_get_saddr_eval(hiscore, &dst, i); - miniscore = ipv6_get_saddr_eval(score, &dst, i); + minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i); + miniscore = ipv6_get_saddr_eval(net, score, &dst, i); if (minihiscore > miniscore) { if (i == IPV6_SADDR_RULE_SCOPE && @@ -1395,6 +1430,20 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp) void addrconf_dad_failure(struct inet6_ifaddr *ifp) { + struct inet6_dev *idev = ifp->idev; + if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { + struct in6_addr addr; + + addr.s6_addr32[0] = htonl(0xfe800000); + addr.s6_addr32[1] = 0; + + if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && + ipv6_addr_equal(&ifp->addr, &addr)) { + /* DAD failed for link-local based on MAC address */ + idev->cnf.disable_ipv6 = 1; + } + } + if (net_ratelimit()) printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); addrconf_dad_stop(ifp); @@ -1722,7 +1771,6 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) __u32 valid_lft; __u32 prefered_lft; int addr_type; - unsigned long rt_expires; struct inet6_dev *in6_dev; pinfo = (struct prefix_info *) opt; @@ -1764,41 +1812,49 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) * 2) Configure prefixes with the auto flag set */ - /* Avoid arithmetic overflow. Really, we could - save rt_expires in seconds, likely valid_lft, - but it would require division in fib gc, that it - not good. - */ - if (valid_lft >= 0x7FFFFFFF/HZ) - rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); - else - rt_expires = valid_lft * HZ; - - /* - * We convert this (in jiffies) to clock_t later. - * Avoid arithmetic overflow there as well. - * Overflow can happen only if HZ < USER_HZ. - */ - if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ) - rt_expires = 0x7FFFFFFF / USER_HZ; - if (pinfo->onlink) { struct rt6_info *rt; + unsigned long rt_expires; + + /* Avoid arithmetic overflow. Really, we could + * save rt_expires in seconds, likely valid_lft, + * but it would require division in fib gc, that it + * not good. + */ + if (HZ > USER_HZ) + rt_expires = addrconf_timeout_fixup(valid_lft, HZ); + else + rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ); + + if (addrconf_finite_timeout(rt_expires)) + rt_expires *= HZ; + rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL, dev->ifindex, 1); - if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { - if (rt->rt6i_flags&RTF_EXPIRES) { - if (valid_lft == 0) { - ip6_del_rt(rt); - rt = NULL; - } else { - rt->rt6i_expires = jiffies + rt_expires; - } + if (rt && addrconf_is_prefix_route(rt)) { + /* Autoconf prefix route */ + if (valid_lft == 0) { + ip6_del_rt(rt); + rt = NULL; + } else if (addrconf_finite_timeout(rt_expires)) { + /* not infinity */ + rt->rt6i_expires = jiffies + rt_expires; + rt->rt6i_flags |= RTF_EXPIRES; + } else { + rt->rt6i_flags &= ~RTF_EXPIRES; + rt->rt6i_expires = 0; } } else if (valid_lft) { + clock_t expires = 0; + int flags = RTF_ADDRCONF | RTF_PREFIX_RT; + if (addrconf_finite_timeout(rt_expires)) { + /* not infinity */ + flags |= RTF_EXPIRES; + expires = jiffies_to_clock_t(rt_expires); + } addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, - dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT); + dev, expires, flags); } if (rt) dst_release(&rt->u.dst); @@ -1810,6 +1866,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) struct inet6_ifaddr * ifp; struct in6_addr addr; int create = 0, update_lft = 0; + struct net *net = dev_net(dev); if (pinfo->prefix_len == 64) { memcpy(&addr, &pinfo->prefix, 8); @@ -1828,7 +1885,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) ok: - ifp = ipv6_get_ifaddr(dev_net(dev), &addr, dev, 1); + ifp = ipv6_get_ifaddr(net, &addr, dev, 1); if (ifp == NULL && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -1836,7 +1893,7 @@ ok: #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (in6_dev->cnf.optimistic_dad && - !ipv6_devconf.forwarding) + !net->ipv6.devconf_all->forwarding) addr_flags = IFA_F_OPTIMISTIC; #endif @@ -2014,17 +2071,22 @@ err_exit: * Manual configuration of address on an interface */ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, - int plen, __u8 ifa_flags, __u32 prefered_lft, + unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; int scope; - u32 flags = RTF_EXPIRES; + u32 flags; + clock_t expires; + unsigned long timeout; ASSERT_RTNL(); + if (plen > 128) + return -EINVAL; + /* check the lifetime */ if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; @@ -2038,17 +2100,23 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, scope = ipv6_addr_scope(pfx); - if (valid_lft == INFINITY_LIFE_TIME) { - ifa_flags |= IFA_F_PERMANENT; + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + expires = jiffies_to_clock_t(timeout * HZ); + valid_lft = timeout; + flags = RTF_EXPIRES; + } else { + expires = 0; flags = 0; - } else if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + ifa_flags |= IFA_F_PERMANENT; + } - if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; - else if ((prefered_lft >= 0x7FFFFFFF/HZ) && - (prefered_lft != INFINITY_LIFE_TIME)) - prefered_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa_flags |= IFA_F_DEPRECATED; + prefered_lft = timeout; + } ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); @@ -2060,7 +2128,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, spin_unlock_bh(&ifp->lock); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, - jiffies_to_clock_t(valid_lft * HZ), flags); + expires, flags); /* * Note that section 3.1 of RFC 4429 indicates * that the Optimistic flag should not be set for @@ -2076,12 +2144,15 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, } static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, - int plen) + unsigned int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + if (plen > 128) + return -EINVAL; + dev = __dev_get_by_index(net, ifindex); if (!dev) return -ENODEV; @@ -2251,7 +2322,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (idev->cnf.optimistic_dad && - !ipv6_devconf.forwarding) + !dev_net(idev->dev)->ipv6.devconf_all->forwarding) addr_flags |= IFA_F_OPTIMISTIC; #endif @@ -2706,6 +2777,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) spin_lock_bh(&ifp->lock); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || + idev->cnf.accept_dad < 1 || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC); @@ -2753,6 +2825,11 @@ static void addrconf_dad_timer(unsigned long data) read_unlock_bh(&idev->lock); goto out; } + if (idev->cnf.accept_dad > 1 && idev->cnf.disable_ipv6) { + read_unlock_bh(&idev->lock); + addrconf_dad_failure(ifp); + return; + } spin_lock_bh(&ifp->lock); if (ifp->probes == 0) { /* @@ -3148,22 +3225,30 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, u32 prefered_lft, u32 valid_lft) { - u32 flags = RTF_EXPIRES; + u32 flags; + clock_t expires; + unsigned long timeout; if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; - if (valid_lft == INFINITY_LIFE_TIME) { - ifa_flags |= IFA_F_PERMANENT; + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + expires = jiffies_to_clock_t(timeout * HZ); + valid_lft = timeout; + flags = RTF_EXPIRES; + } else { + expires = 0; flags = 0; - } else if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + ifa_flags |= IFA_F_PERMANENT; + } - if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; - else if ((prefered_lft >= 0x7FFFFFFF/HZ) && - (prefered_lft != INFINITY_LIFE_TIME)) - prefered_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa_flags |= IFA_F_DEPRECATED; + prefered_lft = timeout; + } spin_lock_bh(&ifp->lock); ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; @@ -3176,7 +3261,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, ipv6_ifa_notify(0, ifp); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, - jiffies_to_clock_t(valid_lft * HZ), flags); + expires, flags); addrconf_verify(0); return 0; @@ -3604,6 +3689,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, #ifdef CONFIG_IPV6_MROUTE array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; #endif + array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; + array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; } static inline size_t inet6_if_nlmsg_size(void) @@ -4163,6 +4250,22 @@ static struct addrconf_sysctl_table }, #endif { + .ctl_name = CTL_UNNUMBERED, + .procname = "disable_ipv6", + .data = &ipv6_devconf.disable_ipv6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "accept_dad", + .data = &ipv6_devconf.accept_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0, /* sentinel */ } }, @@ -4242,7 +4345,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev) neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change, - NULL); + ndisc_ifinfo_sysctl_strategy); __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, idev->dev->ifindex, idev, &idev->cnf); } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 9bfa8846f262..08909039d87b 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -29,6 +29,9 @@ */ struct ip6addrlbl_entry { +#ifdef CONFIG_NET_NS + struct net *lbl_net; +#endif struct in6_addr prefix; int prefixlen; int ifindex; @@ -46,6 +49,16 @@ static struct ip6addrlbl_table u32 seq; } ip6addrlbl_table; +static inline +struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) +{ +#ifdef CONFIG_NET_NS + return lbl->lbl_net; +#else + return &init_net; +#endif +} + /* * Default policy table (RFC3484 + extensions) * @@ -65,7 +78,7 @@ static struct ip6addrlbl_table #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL -static const __initdata struct ip6addrlbl_init_table +static const __net_initdata struct ip6addrlbl_init_table { const struct in6_addr *prefix; int prefixlen; @@ -108,6 +121,9 @@ static const __initdata struct ip6addrlbl_init_table /* Object management */ static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) { +#ifdef CONFIG_NET_NS + release_net(p->lbl_net); +#endif kfree(p); } @@ -128,10 +144,13 @@ static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) } /* Find label */ -static int __ip6addrlbl_match(struct ip6addrlbl_entry *p, +static int __ip6addrlbl_match(struct net *net, + struct ip6addrlbl_entry *p, const struct in6_addr *addr, int addrtype, int ifindex) { + if (!net_eq(ip6addrlbl_net(p), net)) + return 0; if (p->ifindex && p->ifindex != ifindex) return 0; if (p->addrtype && p->addrtype != addrtype) @@ -141,19 +160,21 @@ static int __ip6addrlbl_match(struct ip6addrlbl_entry *p, return 1; } -static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr, +static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, + const struct in6_addr *addr, int type, int ifindex) { struct hlist_node *pos; struct ip6addrlbl_entry *p; hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { - if (__ip6addrlbl_match(p, addr, type, ifindex)) + if (__ip6addrlbl_match(net, p, addr, type, ifindex)) return p; } return NULL; } -u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) +u32 ipv6_addr_label(struct net *net, + const struct in6_addr *addr, int type, int ifindex) { u32 label; struct ip6addrlbl_entry *p; @@ -161,7 +182,7 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; rcu_read_lock(); - p = __ipv6_addr_label(addr, type, ifindex); + p = __ipv6_addr_label(net, addr, type, ifindex); label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); @@ -174,7 +195,8 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) } /* allocate one entry */ -static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, +static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, + const struct in6_addr *prefix, int prefixlen, int ifindex, u32 label) { @@ -216,6 +238,9 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, newp->addrtype = addrtype; newp->label = label; INIT_HLIST_NODE(&newp->list); +#ifdef CONFIG_NET_NS + newp->lbl_net = hold_net(net); +#endif atomic_set(&newp->refcnt, 1); return newp; } @@ -237,6 +262,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == newp->prefixlen && + net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && p->ifindex == newp->ifindex && ipv6_addr_equal(&p->prefix, &newp->prefix)) { if (!replace) { @@ -261,7 +287,8 @@ out: } /* add a label */ -static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, +static int ip6addrlbl_add(struct net *net, + const struct in6_addr *prefix, int prefixlen, int ifindex, u32 label, int replace) { struct ip6addrlbl_entry *newp; @@ -274,7 +301,7 @@ static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, (unsigned int)label, replace); - newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); + newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) return PTR_ERR(newp); spin_lock(&ip6addrlbl_table.lock); @@ -286,7 +313,8 @@ static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, } /* remove a label */ -static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, +static int __ip6addrlbl_del(struct net *net, + const struct in6_addr *prefix, int prefixlen, int ifindex) { struct ip6addrlbl_entry *p = NULL; @@ -300,6 +328,7 @@ static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && + net_eq(ip6addrlbl_net(p), net) && p->ifindex == ifindex && ipv6_addr_equal(&p->prefix, prefix)) { hlist_del_rcu(&p->list); @@ -311,7 +340,8 @@ static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, return ret; } -static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, +static int ip6addrlbl_del(struct net *net, + const struct in6_addr *prefix, int prefixlen, int ifindex) { struct in6_addr prefix_buf; @@ -324,13 +354,13 @@ static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); spin_lock(&ip6addrlbl_table.lock); - ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex); + ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); spin_unlock(&ip6addrlbl_table.lock); return ret; } /* add default label */ -static __init int ip6addrlbl_init(void) +static int __net_init ip6addrlbl_net_init(struct net *net) { int err = 0; int i; @@ -338,7 +368,8 @@ static __init int ip6addrlbl_init(void) ADDRLABEL(KERN_DEBUG "%s()\n", __func__); for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { - int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix, + int ret = ip6addrlbl_add(net, + ip6addrlbl_init_table[i].prefix, ip6addrlbl_init_table[i].prefixlen, 0, ip6addrlbl_init_table[i].label, 0); @@ -349,11 +380,32 @@ static __init int ip6addrlbl_init(void) return err; } +static void __net_exit ip6addrlbl_net_exit(struct net *net) +{ + struct ip6addrlbl_entry *p = NULL; + struct hlist_node *pos, *n; + + /* Remove all labels belonging to the exiting net */ + spin_lock(&ip6addrlbl_table.lock); + hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { + if (net_eq(ip6addrlbl_net(p), net)) { + hlist_del_rcu(&p->list); + ip6addrlbl_put(p); + } + } + spin_unlock(&ip6addrlbl_table.lock); +} + +static struct pernet_operations ipv6_addr_label_ops = { + .init = ip6addrlbl_net_init, + .exit = ip6addrlbl_net_exit, +}; + int __init ipv6_addr_label_init(void) { spin_lock_init(&ip6addrlbl_table.lock); - return ip6addrlbl_init(); + return register_pernet_subsys(&ipv6_addr_label_ops); } static const struct nla_policy ifal_policy[IFAL_MAX+1] = { @@ -371,9 +423,6 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, u32 label; int err = 0; - if (net != &init_net) - return 0; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); if (err < 0) return err; @@ -385,7 +434,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; if (ifal->ifal_index && - !__dev_get_by_index(&init_net, ifal->ifal_index)) + !__dev_get_by_index(net, ifal->ifal_index)) return -EINVAL; if (!tb[IFAL_ADDRESS]) @@ -403,12 +452,12 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, switch(nlh->nlmsg_type) { case RTM_NEWADDRLABEL: - err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen, + err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, ifal->ifal_index, label, nlh->nlmsg_flags & NLM_F_REPLACE); break; case RTM_DELADDRLABEL: - err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen, + err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, ifal->ifal_index); break; default: @@ -458,12 +507,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0, s_idx = cb->args[0]; int err; - if (net != &init_net) - return 0; - rcu_read_lock(); hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { - if (idx >= s_idx) { + if (idx >= s_idx && + net_eq(ip6addrlbl_net(p), net)) { if ((err = ip6addrlbl_fill(skb, p, ip6addrlbl_table.seq, NETLINK_CB(cb->skb).pid, @@ -499,9 +546,6 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, struct ip6addrlbl_entry *p; struct sk_buff *skb; - if (net != &init_net) - return 0; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); if (err < 0) return err; @@ -513,7 +557,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, return -EINVAL; if (ifal->ifal_index && - !__dev_get_by_index(&init_net, ifal->ifal_index)) + !__dev_get_by_index(net, ifal->ifal_index)) return -EINVAL; if (!tb[IFAL_ADDRESS]) @@ -524,7 +568,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, return -EINVAL; rcu_read_lock(); - p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index); + p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); if (p && ip6addrlbl_hold(p)) p = NULL; lseq = ip6addrlbl_table.seq; @@ -552,7 +596,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, goto out; } - err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); out: return err; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3c6aafb02183..95055f8c3f35 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -7,8 +7,6 @@ * * Adapted from linux/net/ipv4/af_inet.c * - * $Id: af_inet6.c,v 1.66 2002/02/01 22:01:04 davem Exp $ - * * Fixes: * piggy, Karl Knutson : Socket protocol table * Hideaki YOSHIFUJI : sin6_scope_id support @@ -61,9 +59,7 @@ #include <asm/uaccess.h> #include <asm/system.h> -#ifdef CONFIG_IPV6_MROUTE #include <linux/mroute6.h> -#endif MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); @@ -87,7 +83,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol) struct inet_sock *inet; struct ipv6_pinfo *np; struct sock *sk; - struct list_head *p; struct inet_protosw *answer; struct proto *answer_prot; unsigned char answer_flags; @@ -101,13 +96,12 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol) build_ehash_secret(); /* Look for the requested type/protocol pair. */ - answer = NULL; lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); - list_for_each_rcu(p, &inetsw6[sock->type]) { - answer = list_entry(p, struct inet_protosw, list); + list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) { + err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) @@ -122,10 +116,9 @@ lookup_protocol: break; } err = -EPROTONOSUPPORT; - answer = NULL; } - if (!answer) { + if (err) { if (try_loading_module < 2) { rcu_read_unlock(); /* @@ -157,7 +150,7 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - BUG_TRAP(answer_prot->slab != NULL); + WARN_ON(answer_prot->slab == NULL); err = -ENOBUFS; sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); @@ -191,7 +184,7 @@ lookup_protocol: np->mcast_hops = -1; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->ipv6only = init_net.ipv6.sysctl.bindv6only; + np->ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. @@ -373,7 +366,7 @@ int inet6_release(struct socket *sock) EXPORT_SYMBOL(inet6_release); -int inet6_destroy_sock(struct sock *sk) +void inet6_destroy_sock(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -391,8 +384,6 @@ int inet6_destroy_sock(struct sock *sk) if ((opt = xchg(&np->opt, NULL)) != NULL) sock_kfree_s(sk, opt, opt->tot_len); - - return 0; } EXPORT_SYMBOL_GPL(inet6_destroy_sock); @@ -943,6 +934,11 @@ static int __init inet6_init(void) if (err) goto out_unregister_sock; +#ifdef CONFIG_SYSCTL + err = ipv6_static_sysctl_register(); + if (err) + goto static_sysctl_fail; +#endif /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance @@ -956,9 +952,9 @@ static int __init inet6_init(void) err = icmpv6_init(); if (err) goto icmp_fail; -#ifdef CONFIG_IPV6_MROUTE - ip6_mr_init(); -#endif + err = ip6_mr_init(); + if (err) + goto ipmr_fail; err = ndisc_init(); if (err) goto ndisc_fail; @@ -1061,10 +1057,16 @@ netfilter_fail: igmp_fail: ndisc_cleanup(); ndisc_fail: + ip6_mr_cleanup(); +ipmr_fail: icmpv6_cleanup(); icmp_fail: unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: +#ifdef CONFIG_SYSCTL + ipv6_static_sysctl_unregister(); +static_sysctl_fail: +#endif cleanup_ipv6_mibs(); out_unregister_sock: sock_unregister(PF_INET6); @@ -1115,10 +1117,14 @@ static void __exit inet6_exit(void) ipv6_netfilter_fini(); igmp6_cleanup(); ndisc_cleanup(); + ip6_mr_cleanup(); icmpv6_cleanup(); rawv6_exit(); unregister_pernet_subsys(&inet6_net_ops); +#ifdef CONFIG_SYSCTL + ipv6_static_sysctl_unregister(); +#endif cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); proto_unregister(&udplitev6_prot); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4e1b29fabdf0..8336cd81cb4f 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -60,7 +60,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) struct inet6_dev *idev; struct ipv6_ac_socklist *pac; struct net *net = sock_net(sk); - int ishost = !ipv6_devconf.forwarding; + int ishost = !net->ipv6.devconf_all->forwarding; int err = 0; if (!capable(CAP_NET_ADMIN)) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 94fa6ae77cfe..f7b535dec860 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: datagram.c,v 1.24 2002/02/01 22:01:04 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -496,7 +494,8 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) return 0; } -int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, +int datagram_send_ctl(struct net *net, + struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, int *hlimit, int *tclass) { @@ -509,7 +508,6 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { int addr_type; - struct net_device *dev = NULL; if (!CMSG_OK(msg, cmsg)) { err = -EINVAL; @@ -522,6 +520,9 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, switch (cmsg->cmsg_type) { case IPV6_PKTINFO: case IPV6_2292PKTINFO: + { + struct net_device *dev = NULL; + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { err = -EINVAL; goto exit_f; @@ -535,32 +536,32 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, fl->oif = src_info->ipi6_ifindex; } - addr_type = ipv6_addr_type(&src_info->ipi6_addr); + addr_type = __ipv6_addr_type(&src_info->ipi6_addr); - if (addr_type == IPV6_ADDR_ANY) - break; + if (fl->oif) { + dev = dev_get_by_index(net, fl->oif); + if (!dev) + return -ENODEV; + } else if (addr_type & IPV6_ADDR_LINKLOCAL) + return -EINVAL; - if (addr_type & IPV6_ADDR_LINKLOCAL) { - if (!src_info->ipi6_ifindex) - return -EINVAL; - else { - dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex); - if (!dev) - return -ENODEV; - } - } - if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, - dev, 0)) { - if (dev) - dev_put(dev); - err = -EINVAL; - goto exit_f; + if (addr_type != IPV6_ADDR_ANY) { + int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; + if (!ipv6_chk_addr(net, &src_info->ipi6_addr, + strict ? dev : NULL, 0)) + err = -EINVAL; + else + ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); } + if (dev) dev_put(dev); - ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); + if (err) + goto exit_f; + break; + } case IPV6_FLOWINFO: if (cmsg->cmsg_len < CMSG_LEN(4)) { @@ -702,6 +703,11 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, } *hlimit = *(int *)CMSG_DATA(cmsg); + if (*hlimit < -1 || *hlimit > 0xff) { + err = -EINVAL; + goto exit_f; + } + break; case IPV6_TCLASS: diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3cd1c993d52b..837c830d6d8e 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -7,8 +7,6 @@ * Andi Kleen <ak@muc.de> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: exthdrs.c,v 1.13 2001/06/19 15:58:56 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -321,7 +319,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) int n, i; struct ipv6_rt_hdr *hdr; struct rt0_hdr *rthdr; - int accept_source_route = ipv6_devconf.accept_source_route; + int accept_source_route = dev_net(skb->dev)->ipv6.devconf_all->accept_source_route; idev = in6_dev_get(skb->dev); if (idev) { @@ -445,7 +443,7 @@ looped_back: kfree_skb(skb); return -1; } - if (!ipv6_chk_home_addr(&init_net, addr)) { + if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index d42dd16d3487..abedf95fdf2d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $ - * * Based on net/ipv4/icmp.c * * RFC 1885 @@ -956,7 +954,8 @@ ctl_table ipv6_icmp_table_template[] = { .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies }, { .ctl_name = 0 }, }; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 87801cc1b2f8..16d43f20b32f 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -98,7 +98,7 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, ipv6_addr_equal(&treq->rmt_addr, raddr) && ipv6_addr_equal(&treq->loc_addr, laddr) && (!treq->iif || treq->iif == iif)) { - BUG_TRAP(req->sk == NULL); + WARN_ON(req->sk != NULL); *prevp = prev; return req; } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 580014aea4d6..1646a5658255 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -28,7 +28,7 @@ void __inet6_hash(struct sock *sk) struct hlist_head *list; rwlock_t *lock; - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; @@ -68,7 +68,7 @@ struct sock *__inet6_lookup_established(struct net *net, /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); + unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); @@ -104,7 +104,8 @@ struct sock *inet6_lookup_listener(struct net *net, int score, hiscore = 0; read_lock(&hashinfo->lhash_lock); - sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { + sk_for_each(sk, node, + &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) { if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); @@ -165,14 +166,14 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const struct in6_addr *saddr = &np->daddr; const int dif = sk->sk_bound_dev_if; const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); - const unsigned int hash = inet6_ehashfn(daddr, lport, saddr, + struct net *net = sock_net(sk); + const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; - struct net *net = sock_net(sk); prefetch(head->chain.first); write_lock(lock); @@ -201,7 +202,7 @@ unique: * in hash table socket with a funny identity. */ inet->num = lport; inet->sport = htons(lport); - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sk->sk_hash = hash; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -209,11 +210,11 @@ unique: if (twp != NULL) { *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED); } else if (tw != NULL) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule(tw, death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1ee4fa17c129..52dddc25d3e6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -289,7 +287,7 @@ static int fib6_dump_node(struct fib6_walker_t *w) w->leaf = rt; return 1; } - BUG_TRAP(res!=0); + WARN_ON(res == 0); } w->leaf = NULL; return 0; @@ -663,17 +661,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) { - if (net->ipv6.ip6_fib_timer->expires == 0 && + if (!timer_pending(&net->ipv6.ip6_fib_timer) && (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) - mod_timer(net->ipv6.ip6_fib_timer, jiffies + - net->ipv6.sysctl.ip6_rt_gc_interval); + mod_timer(&net->ipv6.ip6_fib_timer, + jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } void fib6_force_start_gc(struct net *net) { - if (net->ipv6.ip6_fib_timer->expires == 0) - mod_timer(net->ipv6.ip6_fib_timer, jiffies + - net->ipv6.sysctl.ip6_rt_gc_interval); + if (!timer_pending(&net->ipv6.ip6_fib_timer)) + mod_timer(&net->ipv6.ip6_fib_timer, + jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } /* @@ -780,7 +778,7 @@ out: pn->leaf = fib6_find_prefix(info->nl_net, pn); #if RT6_DEBUG >= 2 if (!pn->leaf) { - BUG_TRAP(pn->leaf != NULL); + WARN_ON(pn->leaf == NULL); pn->leaf = info->nl_net->ipv6.ip6_null_entry; } #endif @@ -944,7 +942,7 @@ struct fib6_node * fib6_locate(struct fib6_node *root, #ifdef CONFIG_IPV6_SUBTREES if (src_len) { - BUG_TRAP(saddr!=NULL); + WARN_ON(saddr == NULL); if (fn && fn->subtree) fn = fib6_locate_1(fn->subtree, saddr, src_len, offsetof(struct rt6_info, rt6i_src)); @@ -998,9 +996,9 @@ static struct fib6_node *fib6_repair_tree(struct net *net, RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); iter++; - BUG_TRAP(!(fn->fn_flags&RTN_RTINFO)); - BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT)); - BUG_TRAP(fn->leaf==NULL); + WARN_ON(fn->fn_flags & RTN_RTINFO); + WARN_ON(fn->fn_flags & RTN_TL_ROOT); + WARN_ON(fn->leaf != NULL); children = 0; child = NULL; @@ -1016,7 +1014,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, fn->leaf = fib6_find_prefix(net, fn); #if RT6_DEBUG >= 2 if (fn->leaf==NULL) { - BUG_TRAP(fn->leaf); + WARN_ON(!fn->leaf); fn->leaf = net->ipv6.ip6_null_entry; } #endif @@ -1027,16 +1025,17 @@ static struct fib6_node *fib6_repair_tree(struct net *net, pn = fn->parent; #ifdef CONFIG_IPV6_SUBTREES if (FIB6_SUBTREE(pn) == fn) { - BUG_TRAP(fn->fn_flags&RTN_ROOT); + WARN_ON(!(fn->fn_flags & RTN_ROOT)); FIB6_SUBTREE(pn) = NULL; nstate = FWS_L; } else { - BUG_TRAP(!(fn->fn_flags&RTN_ROOT)); + WARN_ON(fn->fn_flags & RTN_ROOT); #endif if (pn->right == fn) pn->right = child; else if (pn->left == fn) pn->left = child; #if RT6_DEBUG >= 2 - else BUG_TRAP(0); + else + WARN_ON(1); #endif if (child) child->parent = pn; @@ -1156,14 +1155,14 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) #if RT6_DEBUG >= 2 if (rt->u.dst.obsolete>0) { - BUG_TRAP(fn==NULL); + WARN_ON(fn != NULL); return -ENOENT; } #endif if (fn == NULL || rt == net->ipv6.ip6_null_entry) return -ENOENT; - BUG_TRAP(fn->fn_flags&RTN_RTINFO); + WARN_ON(!(fn->fn_flags & RTN_RTINFO)); if (!(rt->rt6i_flags&RTF_CACHE)) { struct fib6_node *pn = fn; @@ -1268,7 +1267,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) w->node = pn; #ifdef CONFIG_IPV6_SUBTREES if (FIB6_SUBTREE(pn) == fn) { - BUG_TRAP(fn->fn_flags&RTN_ROOT); + WARN_ON(!(fn->fn_flags & RTN_ROOT)); w->state = FWS_L; continue; } @@ -1283,7 +1282,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) continue; } #if RT6_DEBUG >= 2 - BUG_TRAP(0); + WARN_ON(1); #endif } } @@ -1325,7 +1324,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) } return 0; } - BUG_TRAP(res==0); + WARN_ON(res != 0); } w->leaf = rt; return 0; @@ -1449,27 +1448,23 @@ void fib6_run_gc(unsigned long expires, struct net *net) gc_args.timeout = expires ? (int)expires : net->ipv6.sysctl.ip6_rt_gc_interval; } else { - local_bh_disable(); - if (!spin_trylock(&fib6_gc_lock)) { - mod_timer(net->ipv6.ip6_fib_timer, jiffies + HZ); - local_bh_enable(); + if (!spin_trylock_bh(&fib6_gc_lock)) { + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); return; } gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; } - gc_args.more = 0; - icmp6_dst_gc(&gc_args.more); + gc_args.more = icmp6_dst_gc(); fib6_clean_all(net, fib6_age, 0, NULL); if (gc_args.more) - mod_timer(net->ipv6.ip6_fib_timer, jiffies + - net->ipv6.sysctl.ip6_rt_gc_interval); - else { - del_timer(net->ipv6.ip6_fib_timer); - net->ipv6.ip6_fib_timer->expires = 0; - } + mod_timer(&net->ipv6.ip6_fib_timer, + round_jiffies(jiffies + + net->ipv6.sysctl.ip6_rt_gc_interval)); + else + del_timer(&net->ipv6.ip6_fib_timer); spin_unlock_bh(&fib6_gc_lock); } @@ -1480,24 +1475,15 @@ static void fib6_gc_timer_cb(unsigned long arg) static int fib6_net_init(struct net *net) { - int ret; - struct timer_list *timer; - - ret = -ENOMEM; - timer = kzalloc(sizeof(*timer), GFP_KERNEL); - if (!timer) - goto out; - - setup_timer(timer, fib6_gc_timer_cb, (unsigned long)net); - net->ipv6.ip6_fib_timer = timer; + setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); if (!net->ipv6.rt6_stats) goto out_timer; - net->ipv6.fib_table_hash = - kzalloc(sizeof(*net->ipv6.fib_table_hash)*FIB_TABLE_HASHSZ, - GFP_KERNEL); + net->ipv6.fib_table_hash = kcalloc(FIB_TABLE_HASHSZ, + sizeof(*net->ipv6.fib_table_hash), + GFP_KERNEL); if (!net->ipv6.fib_table_hash) goto out_rt6_stats; @@ -1523,9 +1509,7 @@ static int fib6_net_init(struct net *net) #endif fib6_tables_init(net); - ret = 0; -out: - return ret; + return 0; #ifdef CONFIG_IPV6_MULTIPLE_TABLES out_fib6_main_tbl: @@ -1536,15 +1520,14 @@ out_fib_table_hash: out_rt6_stats: kfree(net->ipv6.rt6_stats); out_timer: - kfree(timer); - goto out; + return -ENOMEM; } static void fib6_net_exit(struct net *net) { rt6_ifdown(net, NULL); - del_timer_sync(net->ipv6.ip6_fib_timer); - kfree(net->ipv6.ip6_fib_timer); + del_timer_sync(&net->ipv6.ip6_fib_timer); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES kfree(net->ipv6.fib6_local_tbl); #endif diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index eb7a940310f4..37a4e777e347 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -354,7 +354,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_control = (void*)(fl->opt+1); flowi.oif = 0; - err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); + err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 4e5c8615832c..7e14cccd0561 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -6,8 +6,6 @@ * Pedro Roque <roque@di.fc.ul.pt> * Ian P. Morris <I.P.Morris@soton.ac.uk> * - * $Id: ip6_input.c,v 1.19 2000/12/13 18:31:50 davem Exp $ - * * Based in linux/net/ipv4/ip_input.c * * This program is free software; you can redistribute it and/or @@ -73,7 +71,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES); - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || + !idev || unlikely(idev->cnf.disable_ipv6)) { IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); goto out; @@ -102,6 +101,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->version != 6) goto err; + /* + * RFC4291 2.5.3 + * A packet received on an interface with a destination address + * of loopback must be dropped. + */ + if (!(dev->flags & IFF_LOOPBACK) && + ipv6_addr_loopback(&hdr->daddr)) + goto err; + skb->transport_header = skb->network_header + sizeof(*hdr); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); @@ -241,7 +249,7 @@ int ip6_mc_input(struct sk_buff *skb) /* * IPv6 multicast router mode is now supported ;) */ - if (ipv6_devconf.mc_forwarding && + if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { /* * Okay, we try to forward - split and duplicate diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0af2e055f883..6811901e6b1e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $ - * * Based on linux/net/ipv4/ip_output.c * * This program is free software; you can redistribute it and/or @@ -118,7 +116,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) __skb_pull(newskb, skb_network_offset(newskb)); newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; - BUG_TRAP(newskb->dst); + WARN_ON(!newskb->dst); netif_rx(newskb); return 0; @@ -175,6 +173,13 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) int ip6_output(struct sk_buff *skb) { + struct inet6_dev *idev = ip6_dst_idev(skb->dst); + if (unlikely(idev->cnf.disable_ipv6)) { + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return 0; + } + if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb->dst)) return ip6_fragment(skb, ip6_output2); @@ -406,9 +411,12 @@ int ip6_forward(struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); - if (ipv6_devconf.forwarding == 0) + if (net->ipv6.devconf_all->forwarding == 0) goto error; + if (skb_warn_if_lro(skb)) + goto drop; + if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; @@ -450,7 +458,7 @@ int ip6_forward(struct sk_buff *skb) } /* XXX: idev->cnf.proxy_ndp? */ - if (ipv6_devconf.proxy_ndp && + if (net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) @@ -497,7 +505,8 @@ int ip6_forward(struct sk_buff *skb) int addrtype = ipv6_addr_type(&hdr->saddr); /* This check is security critical. */ - if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK)) + if (addrtype == IPV6_ADDR_ANY || + addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) goto error; if (addrtype & IPV6_ADDR_LINKLOCAL) { icmpv6_send(skb, ICMPV6_DEST_UNREACH, @@ -780,7 +789,7 @@ slow_path: * Allocate buffer. */ - if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { + if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2bda3ba100b1..17c7b098cdb0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -6,8 +6,6 @@ * Ville Nuorvala <vnuorval@tcs.hut.fi> * Yasuyuki Kozakai <kozakai@linux-ipv6.org> * - * $Id$ - * * Based on: * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c * @@ -711,7 +709,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, } if (!ip6_tnl_rcv_ctl(t)) { - t->stat.rx_dropped++; + t->dev->stats.rx_dropped++; read_unlock(&ip6_tnl_lock); goto discard; } @@ -728,8 +726,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, dscp_ecn_decapsulate(t, ipv6h, skb); - t->stat.rx_packets++; - t->stat.rx_bytes += skb->len; + t->dev->stats.rx_packets++; + t->dev->stats.rx_bytes += skb->len; netif_rx(skb); read_unlock(&ip6_tnl_lock); return 0; @@ -849,7 +847,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, __u32 *pmtu) { struct ip6_tnl *t = netdev_priv(dev); - struct net_device_stats *stats = &t->stat; + struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct ipv6_tel_txoption opt; struct dst_entry *dst; @@ -1043,11 +1041,11 @@ static int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct net_device_stats *stats = &t->stat; + struct net_device_stats *stats = &t->dev->stats; int ret; if (t->recursion++) { - t->stat.collisions++; + stats->collisions++; goto tx_err; } @@ -1289,19 +1287,6 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } /** - * ip6_tnl_get_stats - return the stats for tunnel device - * @dev: virtual device associated with tunnel - * - * Return: stats for device - **/ - -static struct net_device_stats * -ip6_tnl_get_stats(struct net_device *dev) -{ - return &(((struct ip6_tnl *)netdev_priv(dev))->stat); -} - -/** * ip6_tnl_change_mtu - change mtu manually for tunnel device * @dev: virtual device associated with tunnel * @new_mtu: the new mtu @@ -1334,7 +1319,6 @@ static void ip6_tnl_dev_setup(struct net_device *dev) dev->uninit = ip6_tnl_dev_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ip6_tnl_xmit; - dev->get_stats = ip6_tnl_get_stats; dev->do_ioctl = ip6_tnl_ioctl; dev->change_mtu = ip6_tnl_change_mtu; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2de3c464fe75..095bc453ff4c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -197,7 +197,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) const char *name = vif->dev ? vif->dev->name : "none"; seq_printf(seq, - "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n", + "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", vif - vif6_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, @@ -388,8 +388,8 @@ static int pim6_rcv(struct sk_buff *skb) skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); - ((struct net_device_stats *)netdev_priv(reg_dev))->rx_bytes += skb->len; - ((struct net_device_stats *)netdev_priv(reg_dev))->rx_packets++; + reg_dev->stats.rx_bytes += skb->len; + reg_dev->stats.rx_packets++; skb->dst = NULL; nf_reset(skb); netif_rx(skb); @@ -409,26 +409,20 @@ static struct inet6_protocol pim6_protocol = { static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { read_lock(&mrt_lock); - ((struct net_device_stats *)netdev_priv(dev))->tx_bytes += skb->len; - ((struct net_device_stats *)netdev_priv(dev))->tx_packets++; + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; } -static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) -{ - return (struct net_device_stats *)netdev_priv(dev); -} - static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; dev->flags = IFF_NOARP; dev->hard_start_xmit = reg_vif_xmit; - dev->get_stats = reg_vif_get_stats; dev->destructor = free_netdev; } @@ -436,9 +430,7 @@ static struct net_device *ip6mr_reg_vif(void) { struct net_device *dev; - dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg", - reg_vif_setup); - + dev = alloc_netdev(0, "pim6reg", reg_vif_setup); if (dev == NULL) return NULL; @@ -451,6 +443,7 @@ static struct net_device *ip6mr_reg_vif(void) if (dev_open(dev)) goto failure; + dev_hold(dev); return dev; failure: @@ -603,6 +596,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) int vifi = vifc->mif6c_mifi; struct mif_device *v = &vif6_table[vifi]; struct net_device *dev; + int err; /* Is vif busy ? */ if (MIF_EXISTS(vifi)) @@ -620,20 +614,28 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) dev = ip6mr_reg_vif(); if (!dev) return -ENOBUFS; + err = dev_set_allmulti(dev, 1); + if (err) { + unregister_netdevice(dev); + dev_put(dev); + return err; + } break; #endif case 0: dev = dev_get_by_index(&init_net, vifc->mif6c_pifi); if (!dev) return -EADDRNOTAVAIL; - dev_put(dev); + err = dev_set_allmulti(dev, 1); + if (err) { + dev_put(dev); + return err; + } break; default: return -EINVAL; } - dev_set_allmulti(dev, 1); - /* * Fill in the VIF structures */ @@ -652,7 +654,6 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) /* And finish update writing critical data */ write_lock_bh(&mrt_lock); - dev_hold(dev); v->dev = dev; #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) @@ -934,7 +935,7 @@ static int ip6mr_device_event(struct notifier_block *this, struct mif_device *v; int ct; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event != NETDEV_UNREGISTER) @@ -956,23 +957,51 @@ static struct notifier_block ip6_mr_notifier = { * Setup for IP multicast routing */ -void __init ip6_mr_init(void) +int __init ip6_mr_init(void) { + int err; + mrt_cachep = kmem_cache_create("ip6_mrt_cache", sizeof(struct mfc6_cache), 0, SLAB_HWCACHE_ALIGN, NULL); if (!mrt_cachep) - panic("cannot allocate ip6_mrt_cache"); + return -ENOMEM; setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); - register_netdevice_notifier(&ip6_mr_notifier); + err = register_netdevice_notifier(&ip6_mr_notifier); + if (err) + goto reg_notif_fail; +#ifdef CONFIG_PROC_FS + err = -ENOMEM; + if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) + goto proc_vif_fail; + if (!proc_net_fops_create(&init_net, "ip6_mr_cache", + 0, &ip6mr_mfc_fops)) + goto proc_cache_fail; +#endif + return 0; +reg_notif_fail: + kmem_cache_destroy(mrt_cachep); #ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops); - proc_net_fops_create(&init_net, "ip6_mr_cache", 0, &ip6mr_mfc_fops); +proc_vif_fail: + unregister_netdevice_notifier(&ip6_mr_notifier); +proc_cache_fail: + proc_net_remove(&init_net, "ip6_mr_vif"); #endif + return err; } +void ip6_mr_cleanup(void) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(&init_net, "ip6_mr_cache"); + proc_net_remove(&init_net, "ip6_mr_vif"); +#endif + unregister_netdevice_notifier(&ip6_mr_notifier); + del_timer(&ipmr_expire_timer); + kmem_cache_destroy(mrt_cachep); +} static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) { @@ -1248,7 +1277,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int #endif /* - * Spurious command, or MRT_VERSION which you cannot + * Spurious command, or MRT6_VERSION which you cannot * set. */ default: @@ -1377,8 +1406,8 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) if (vif->flags & MIFF_REGISTER) { vif->pkt_out++; vif->bytes_out += skb->len; - ((struct net_device_stats *)netdev_priv(vif->dev))->tx_bytes += skb->len; - ((struct net_device_stats *)netdev_priv(vif->dev))->tx_packets++; + vif->dev->stats.tx_bytes += skb->len; + vif->dev->stats.tx_packets++; ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT); kfree_skb(skb); return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index ee6de425ce6b..4545e4306862 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -50,125 +50,6 @@ #include <linux/icmpv6.h> #include <linux/mutex.h> -struct ipcomp6_tfms { - struct list_head list; - struct crypto_comp **tfms; - int users; -}; - -static DEFINE_MUTEX(ipcomp6_resource_mutex); -static void **ipcomp6_scratches; -static int ipcomp6_scratch_users; -static LIST_HEAD(ipcomp6_tfms_list); - -static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int nexthdr; - int err = -ENOMEM; - struct ip_comp_hdr *ipch; - int plen, dlen; - struct ipcomp_data *ipcd = x->data; - u8 *start, *scratch; - struct crypto_comp *tfm; - int cpu; - - if (skb_linearize_cow(skb)) - goto out; - - skb->ip_summed = CHECKSUM_NONE; - - /* Remove ipcomp header and decompress original payload */ - ipch = (void *)skb->data; - nexthdr = ipch->nexthdr; - - skb->transport_header = skb->network_header + sizeof(*ipch); - __skb_pull(skb, sizeof(*ipch)); - - /* decompression */ - plen = skb->len; - dlen = IPCOMP_SCRATCH_SIZE; - start = skb->data; - - cpu = get_cpu(); - scratch = *per_cpu_ptr(ipcomp6_scratches, cpu); - tfm = *per_cpu_ptr(ipcd->tfms, cpu); - - err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); - if (err) - goto out_put_cpu; - - if (dlen < (plen + sizeof(*ipch))) { - err = -EINVAL; - goto out_put_cpu; - } - - err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC); - if (err) { - goto out_put_cpu; - } - - skb->truesize += dlen - plen; - __skb_put(skb, dlen - plen); - skb_copy_to_linear_data(skb, scratch, dlen); - err = nexthdr; - -out_put_cpu: - put_cpu(); -out: - return err; -} - -static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - struct ip_comp_hdr *ipch; - struct ipcomp_data *ipcd = x->data; - int plen, dlen; - u8 *start, *scratch; - struct crypto_comp *tfm; - int cpu; - - /* check whether datagram len is larger than threshold */ - if (skb->len < ipcd->threshold) { - goto out_ok; - } - - if (skb_linearize_cow(skb)) - goto out_ok; - - /* compression */ - plen = skb->len; - dlen = IPCOMP_SCRATCH_SIZE; - start = skb->data; - - cpu = get_cpu(); - scratch = *per_cpu_ptr(ipcomp6_scratches, cpu); - tfm = *per_cpu_ptr(ipcd->tfms, cpu); - - local_bh_disable(); - err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - local_bh_enable(); - if (err || (dlen + sizeof(*ipch)) >= plen) { - put_cpu(); - goto out_ok; - } - memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); - put_cpu(); - pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); - - /* insert ipcomp header and replace datagram */ - ipch = ip_comp_hdr(skb); - ipch->nexthdr = *skb_mac_header(skb); - ipch->flags = 0; - ipch->cpi = htons((u16 )ntohl(x->id.spi)); - *skb_mac_header(skb) = IPPROTO_COMP; - -out_ok: - skb_push(skb, -skb_network_offset(skb)); - - return 0; -} - static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { @@ -251,160 +132,9 @@ out: return err; } -static void ipcomp6_free_scratches(void) -{ - int i; - void **scratches; - - if (--ipcomp6_scratch_users) - return; - - scratches = ipcomp6_scratches; - if (!scratches) - return; - - for_each_possible_cpu(i) { - void *scratch = *per_cpu_ptr(scratches, i); - - vfree(scratch); - } - - free_percpu(scratches); -} - -static void **ipcomp6_alloc_scratches(void) -{ - int i; - void **scratches; - - if (ipcomp6_scratch_users++) - return ipcomp6_scratches; - - scratches = alloc_percpu(void *); - if (!scratches) - return NULL; - - ipcomp6_scratches = scratches; - - for_each_possible_cpu(i) { - void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); - if (!scratch) - return NULL; - *per_cpu_ptr(scratches, i) = scratch; - } - - return scratches; -} - -static void ipcomp6_free_tfms(struct crypto_comp **tfms) -{ - struct ipcomp6_tfms *pos; - int cpu; - - list_for_each_entry(pos, &ipcomp6_tfms_list, list) { - if (pos->tfms == tfms) - break; - } - - BUG_TRAP(pos); - - if (--pos->users) - return; - - list_del(&pos->list); - kfree(pos); - - if (!tfms) - return; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_comp(tfm); - } - free_percpu(tfms); -} - -static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name) -{ - struct ipcomp6_tfms *pos; - struct crypto_comp **tfms; - int cpu; - - /* This can be any valid CPU ID so we don't need locking. */ - cpu = raw_smp_processor_id(); - - list_for_each_entry(pos, &ipcomp6_tfms_list, list) { - struct crypto_comp *tfm; - - tfms = pos->tfms; - tfm = *per_cpu_ptr(tfms, cpu); - - if (!strcmp(crypto_comp_name(tfm), alg_name)) { - pos->users++; - return tfms; - } - } - - pos = kmalloc(sizeof(*pos), GFP_KERNEL); - if (!pos) - return NULL; - - pos->users = 1; - INIT_LIST_HEAD(&pos->list); - list_add(&pos->list, &ipcomp6_tfms_list); - - pos->tfms = tfms = alloc_percpu(struct crypto_comp *); - if (!tfms) - goto error; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto error; - *per_cpu_ptr(tfms, cpu) = tfm; - } - - return tfms; - -error: - ipcomp6_free_tfms(tfms); - return NULL; -} - -static void ipcomp6_free_data(struct ipcomp_data *ipcd) -{ - if (ipcd->tfms) - ipcomp6_free_tfms(ipcd->tfms); - ipcomp6_free_scratches(); -} - -static void ipcomp6_destroy(struct xfrm_state *x) -{ - struct ipcomp_data *ipcd = x->data; - if (!ipcd) - return; - xfrm_state_delete_tunnel(x); - mutex_lock(&ipcomp6_resource_mutex); - ipcomp6_free_data(ipcd); - mutex_unlock(&ipcomp6_resource_mutex); - kfree(ipcd); - - xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr); -} - static int ipcomp6_init_state(struct xfrm_state *x) { - int err; - struct ipcomp_data *ipcd; - struct xfrm_algo_desc *calg_desc; - - err = -EINVAL; - if (!x->calg) - goto out; - - if (x->encap) - goto out; + int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { @@ -417,39 +147,21 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto out; } - err = -ENOMEM; - ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); - if (!ipcd) + err = ipcomp_init_state(x); + if (err) goto out; - mutex_lock(&ipcomp6_resource_mutex); - if (!ipcomp6_alloc_scratches()) - goto error; - - ipcd->tfms = ipcomp6_alloc_tfms(x->calg->alg_name); - if (!ipcd->tfms) - goto error; - mutex_unlock(&ipcomp6_resource_mutex); - if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) goto error_tunnel; } - calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0); - BUG_ON(!calg_desc); - ipcd->threshold = calg_desc->uinfo.comp.threshold; - x->data = ipcd; err = 0; out: return err; error_tunnel: - mutex_lock(&ipcomp6_resource_mutex); -error: - ipcomp6_free_data(ipcd); - mutex_unlock(&ipcomp6_resource_mutex); - kfree(ipcd); + ipcomp_destroy(x); goto out; } @@ -460,9 +172,9 @@ static const struct xfrm_type ipcomp6_type = .owner = THIS_MODULE, .proto = IPPROTO_COMP, .init_state = ipcomp6_init_state, - .destructor = ipcomp6_destroy, - .input = ipcomp6_input, - .output = ipcomp6_output, + .destructor = ipcomp_destroy, + .input = ipcomp_input, + .output = ipcomp_output, .hdr_offset = xfrm6_find_1stfragopt, }; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 56d55fecf8ec..ea33b26512c2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -7,8 +7,6 @@ * * Based on linux/net/ipv4/ip_sockglue.c * - * $Id: ipv6_sockglue.c,v 1.41 2002/02/01 22:01:04 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -61,13 +59,13 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; struct ip6_ra_chain *ip6_ra_chain; DEFINE_RWLOCK(ip6_ra_lock); -int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) +int ip6_ra_control(struct sock *sk, int sel) { struct ip6_ra_chain *ra, *new_ra, **rap; /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW) - return -EINVAL; + return -ENOPROTOOPT; new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; @@ -83,8 +81,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) *rap = ra->next; write_unlock_bh(&ip6_ra_lock); - if (ra->destructor) - ra->destructor(sk); sock_put(sk); kfree(ra); return 0; @@ -96,7 +92,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) } new_ra->sk = sk; new_ra->sel = sel; - new_ra->destructor = destructor; new_ra->next = ra; *rap = new_ra; sock_hold(sk); @@ -161,9 +156,17 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct ipv6_txoptions *opt; struct sk_buff *pktopt; - if (sk->sk_protocol != IPPROTO_UDP && - sk->sk_protocol != IPPROTO_UDPLITE && - sk->sk_protocol != IPPROTO_TCP) + if (sk->sk_type == SOCK_RAW) + break; + + if (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_UDPLITE) { + struct udp_sock *up = udp_sk(sk); + if (up->pending == AF_INET6) { + retv = -EBUSY; + break; + } + } else if (sk->sk_protocol != IPPROTO_TCP) break; if (sk->sk_state != TCP_ESTABLISHED) { @@ -337,18 +340,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_DSTOPTS: { struct ipv6_txoptions *opt; + + /* remove any sticky options header with a zero option + * length, per RFC3542. + */ if (optlen == 0) optval = NULL; + else if (optlen < sizeof(struct ipv6_opt_hdr) || + optlen & 0x7 || optlen > 8 * 255) + goto e_inval; /* hop-by-hop / destination options are privileged option */ retv = -EPERM; if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) break; - if (optlen < sizeof(struct ipv6_opt_hdr) || - optlen & 0x7 || optlen > 8 * 255) - goto e_inval; - opt = ipv6_renew_options(sk, np->opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); @@ -416,7 +422,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk); + retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk); if (retv) goto done; update: @@ -438,7 +444,7 @@ done: case IPV6_MULTICAST_HOPS: if (sk->sk_type == SOCK_STREAM) - goto e_inval; + break; if (optlen < sizeof(int)) goto e_inval; if (val > 255 || val < -1) @@ -450,13 +456,15 @@ done: case IPV6_MULTICAST_LOOP: if (optlen < sizeof(int)) goto e_inval; + if (val != valbool) + goto e_inval; np->mc_loop = valbool; retv = 0; break; case IPV6_MULTICAST_IF: if (sk->sk_type == SOCK_STREAM) - goto e_inval; + break; if (optlen < sizeof(int)) goto e_inval; @@ -621,7 +629,7 @@ done: case IPV6_ROUTER_ALERT: if (optlen < sizeof(int)) goto e_inval; - retv = ip6_ra_control(sk, val, NULL); + retv = ip6_ra_control(sk, val); break; case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) @@ -832,7 +840,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, len = min_t(unsigned int, len, ipv6_optlen(hdr)); if (copy_to_user(optval, hdr, len)) return -EFAULT; - return ipv6_optlen(hdr); + return len; } static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, @@ -852,7 +860,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (sk->sk_protocol != IPPROTO_UDP && sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) - return -EINVAL; + return -ENOPROTOOPT; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; val = sk->sk_family; @@ -866,6 +874,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, return -EINVAL; if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) return -EFAULT; + if (gsf.gf_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; lock_sock(sk); err = ip6_mc_msfget(sk, &gsf, (struct group_filter __user *)optval, optlen); @@ -975,6 +985,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, len = ipv6_getsockopt_sticky(sk, np->opt, optname, optval, len); release_sock(sk); + /* check if ipv6_getsockopt_sticky() returns err code */ + if (len < 0) + return len; return put_user(len, optlen); } @@ -1025,7 +1038,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, dst_release(dst); } if (val < 0) - val = ipv6_devconf.hop_limit; + val = sock_net(sk)->ipv6.devconf_all->hop_limit; break; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 54f91efdae58..e7c03bcc2788 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: mcast.c,v 1.40 2002/02/08 03:57:19 davem Exp $ - * * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c * * This program is free software; you can redistribute it and/or @@ -153,7 +151,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, #define IGMP6_UNSOLICITED_IVAL (10*HZ) #define MLD_QRV_DEFAULT 2 -#define MLD_V1_SEEN(idev) (ipv6_devconf.force_mld_version == 1 || \ +#define MLD_V1_SEEN(idev) (dev_net((idev)->dev)->ipv6.devconf_all->force_mld_version == 1 || \ (idev)->cnf.force_mld_version == 1 || \ ((idev)->mc_v1_seen && \ time_before(jiffies, (idev)->mc_v1_seen))) @@ -164,7 +162,6 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, ((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \ (MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp)))) -#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value) #define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value) #define IPV6_MLD_MAX_MSF 64 @@ -370,10 +367,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, int pmclocked = 0; int err; - if (pgsr->gsr_group.ss_family != AF_INET6 || - pgsr->gsr_source.ss_family != AF_INET6) - return -EINVAL; - source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr; group = &((struct sockaddr_in6 *)&pgsr->gsr_group)->sin6_addr; @@ -1411,7 +1404,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - skb = sock_alloc_send_skb(sk, size + LL_RESERVED_SPACE(dev), 1, &err); + skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err); if (!skb) return NULL; @@ -1790,7 +1783,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) payload_len = len + sizeof(ra); full_len = sizeof(struct ipv6hdr) + payload_len; - skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err); + skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + full_len, 1, &err); if (skb == NULL) { rcu_read_lock(); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index ad1cc5bbf977..31295c8f6196 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -164,8 +164,8 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) calc_padlen(sizeof(*dstopt), 6)); hao->type = IPV6_TLV_HAO; + BUILD_BUG_ON(sizeof(*hao) != 18); hao->length = sizeof(*hao) - 2; - BUG_TRAP(hao->length == 16); len = ((char *)hao - (char *)dstopt) + sizeof(*hao); @@ -174,7 +174,7 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr)); spin_unlock_bh(&x->lock); - BUG_TRAP(len == x->props.header_len); + WARN_ON(len != x->props.header_len); dstopt->hdrlen = (x->props.header_len >> 3) - 1; return 0; @@ -317,7 +317,7 @@ static int mip6_destopt_init_state(struct xfrm_state *x) x->props.header_len = sizeof(struct ipv6_destopt_hdr) + calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) + sizeof(struct ipv6_destopt_hao); - BUG_TRAP(x->props.header_len == 24); + WARN_ON(x->props.header_len != 24); return 0; } @@ -380,7 +380,7 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) rt2->rt_hdr.segments_left = 1; memset(&rt2->reserved, 0, sizeof(rt2->reserved)); - BUG_TRAP(rt2->rt_hdr.hdrlen == 2); + WARN_ON(rt2->rt_hdr.hdrlen != 2); memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr)); spin_lock_bh(&x->lock); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2c74885f8355..beb48e3f038a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -479,7 +479,7 @@ static void __ndisc_send(struct net_device *dev, skb = sock_alloc_send_skb(sk, (MAX_HEADER + sizeof(struct ipv6hdr) + - len + LL_RESERVED_SPACE(dev)), + len + LL_ALLOCATED_SPACE(dev)), 1, &err); if (!skb) { ND_PRINTK0(KERN_ERR @@ -784,15 +784,17 @@ static void ndisc_recv_ns(struct sk_buff *skb) idev = ifp->idev; } else { + struct net *net = dev_net(dev); + idev = in6_dev_get(dev); if (!idev) { /* XXX: count this drop? */ return; } - if (ipv6_chk_acast_addr(dev_net(dev), dev, &msg->target) || + if (ipv6_chk_acast_addr(net, dev, &msg->target) || (idev->cnf.forwarding && - (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && + (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) && (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && @@ -921,6 +923,7 @@ static void ndisc_recv_na(struct sk_buff *skb) if (neigh) { u8 old_flags = neigh->flags; + struct net *net = dev_net(dev); if (neigh->nud_state & NUD_FAILED) goto out; @@ -931,8 +934,8 @@ static void ndisc_recv_na(struct sk_buff *skb) * has already sent a NA to us. */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && - ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, dev_net(dev), &msg->target, dev, 0)) { + net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp && + pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { /* XXX: idev->cnf.prixy_ndp */ goto out; } @@ -1521,7 +1524,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, buff = sock_alloc_send_skb(sk, (MAX_HEADER + sizeof(struct ipv6hdr) + - len + LL_RESERVED_SPACE(dev)), + len + LL_ALLOCATED_SPACE(dev)), 1, &err); if (buff == NULL) { ND_PRINTK0(KERN_ERR @@ -1727,10 +1730,10 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f return ret; } -static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, - int nlen, void __user *oldval, - size_t __user *oldlenp, - void __user *newval, size_t newlen) +int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6cae5475737e..0cfcce7b18d8 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -208,5 +208,17 @@ config IP6_NF_RAW If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +# security table for MAC policy +config IP6_NF_SECURITY + tristate "Security table" + depends on IP6_NF_IPTABLES + depends on SECURITY + depends on NETFILTER_ADVANCED + help + This option adds a `security' table to iptables, for use + with Mandatory Access Control (MAC) policy. + + If unsure, say N. + endmenu diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index fbf2c14ed887..3f17c948eefb 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o +obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o # objects for l3 independent conntrack nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 2eff3ae8977d..5859c046cbc4 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -159,7 +159,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) case IPQ_COPY_META: case IPQ_COPY_NONE: size = NLMSG_SPACE(sizeof(*pmsg)); - data_len = 0; break; case IPQ_COPY_PACKET: @@ -226,8 +225,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: - if (skb) - kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip6_queue: error creating packet message\n"); return NULL; @@ -483,7 +480,7 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* Drop any packets associated with the downed device */ diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index f979e48b469b..55a2c290bad4 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -61,13 +61,25 @@ static struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6t_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +ip6t_local_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(skb, hook, in, out, + nf_local_in_net(in, out)->ipv6.ip6table_filter); +} + +static unsigned int +ip6t_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_filter); + return ip6t_do_table(skb, hook, in, out, + nf_forward_net(in, out)->ipv6.ip6table_filter); } static unsigned int @@ -87,19 +99,20 @@ ip6t_local_out_hook(unsigned int hook, } #endif - return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_filter); + return ip6t_do_table(skb, hook, in, out, + nf_local_out_net(in, out)->ipv6.ip6table_filter); } static struct nf_hook_ops ip6t_ops[] __read_mostly = { { - .hook = ip6t_hook, + .hook = ip6t_local_in_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_FILTER, }, { - .hook = ip6t_hook, + .hook = ip6t_forward_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_FORWARD, diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 27a5e8b48d93..f405cea21a8b 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -129,7 +129,7 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_local_hook, + .hook = ip6t_route_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c new file mode 100644 index 000000000000..6e7131036bc6 --- /dev/null +++ b/net/ipv6/netfilter/ip6table_security.c @@ -0,0 +1,172 @@ +/* + * "security" table for IPv6 + * + * This is for use by Mandatory Access Control (MAC) security models, + * which need to be able to manage security policy in separate context + * to DAC. + * + * Based on iptable_mangle.c + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org> + * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/netfilter_ipv6/ip6_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>"); +MODULE_DESCRIPTION("ip6tables security table, for MAC rules"); + +#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) + +static struct +{ + struct ip6t_replace repl; + struct ip6t_standard entries[3]; + struct ip6t_error term; +} initial_table __net_initdata = { + .repl = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .num_entries = 4, + .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), + .hook_entry = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ip6t_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2, + }, + .underflow = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ip6t_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2, + }, + }, + .entries = { + IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ + }, + .term = IP6T_ERROR_INIT, /* ERROR */ +}; + +static struct xt_table security_table = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .lock = __RW_LOCK_UNLOCKED(security_table.lock), + .me = THIS_MODULE, + .af = AF_INET6, +}; + +static unsigned int +ip6t_local_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(skb, hook, in, out, + nf_local_in_net(in, out)->ipv6.ip6table_security); +} + +static unsigned int +ip6t_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(skb, hook, in, out, + nf_forward_net(in, out)->ipv6.ip6table_security); +} + +static unsigned int +ip6t_local_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* TBD: handle short packets via raw socket */ + return ip6t_do_table(skb, hook, in, out, + nf_local_out_net(in, out)->ipv6.ip6table_security); +} + +static struct nf_hook_ops ip6t_ops[] __read_mostly = { + { + .hook = ip6t_local_in_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP6_PRI_SECURITY, + }, + { + .hook = ip6t_forward_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_FORWARD, + .priority = NF_IP6_PRI_SECURITY, + }, + { + .hook = ip6t_local_out_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_SECURITY, + }, +}; + +static int __net_init ip6table_security_net_init(struct net *net) +{ + net->ipv6.ip6table_security = + ip6t_register_table(net, &security_table, &initial_table.repl); + + if (IS_ERR(net->ipv6.ip6table_security)) + return PTR_ERR(net->ipv6.ip6table_security); + + return 0; +} + +static void __net_exit ip6table_security_net_exit(struct net *net) +{ + ip6t_unregister_table(net->ipv6.ip6table_security); +} + +static struct pernet_operations ip6table_security_net_ops = { + .init = ip6table_security_net_init, + .exit = ip6table_security_net_exit, +}; + +static int __init ip6table_security_init(void) +{ + int ret; + + ret = register_pernet_subsys(&ip6table_security_net_ops); + if (ret < 0) + return ret; + + ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); + if (ret < 0) + goto cleanup_table; + + return ret; + +cleanup_table: + unregister_pernet_subsys(&ip6table_security_net_ops); + return ret; +} + +static void __exit ip6table_security_fini(void) +{ + nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); + unregister_pernet_subsys(&ip6table_security_net_ops); +} + +module_init(ip6table_security_init); +module_exit(ip6table_security_fini); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index ee713b03e9ec..14d47d833545 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -89,9 +89,8 @@ static int icmpv6_packet(struct nf_conn *ct, means this will only run once even if count hits zero twice (theoretically possible with SMP) */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count) - && del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + if (atomic_dec_and_test(&ct->proto.icmp.count)) + nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 2dccad48058c..52d06dd4b817 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -207,9 +207,12 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.id = id; arg.src = src; arg.dst = dst; + + read_lock_bh(&nf_frags.lock); hash = ip6qhashfn(id, src, dst); q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); + local_bh_enable(); if (q == NULL) goto oom; @@ -413,8 +416,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) fq_kill(fq); - BUG_TRAP(head != NULL); - BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - @@ -638,10 +641,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - spin_lock(&fq->q.lock); + spin_lock_bh(&fq->q.lock); if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { - spin_unlock(&fq->q.lock); + spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); fq_put(fq); goto ret_orig; @@ -653,7 +656,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } - spin_unlock(&fq->q.lock); + spin_unlock_bh(&fq->q.lock); fq_put(fq); return ret_skb; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index df0736a4cafa..f82f6074cf85 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -7,8 +7,6 @@ * PROC file system. This is very similar to the IPv4 version, * except it reports the sockets in the INET6 address family. * - * Version: $Id: proc.c,v 1.17 2002/02/01 22:01:04 davem Exp $ - * * Authors: David S. Miller (davem@caip.rutgers.edu) * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> * @@ -185,32 +183,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) static int sockstat6_seq_open(struct inode *inode, struct file *file) { - int err; - struct net *net; - - err = -ENXIO; - net = get_proc_net(inode); - if (net == NULL) - goto err_net; - - err = single_open(file, sockstat6_seq_show, net); - if (err < 0) - goto err_open; - - return 0; - -err_open: - put_net(net); -err_net: - return err; -} - -static int sockstat6_seq_release(struct inode *inode, struct file *file) -{ - struct net *net = ((struct seq_file *)file->private_data)->private; - - put_net(net); - return single_release(inode, file); + return single_open_net(inode, file, sockstat6_seq_show); } static const struct file_operations sockstat6_seq_fops = { @@ -218,7 +191,7 @@ static const struct file_operations sockstat6_seq_fops = { .open = sockstat6_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = sockstat6_seq_release, + .release = single_release_net, }; static int snmp6_seq_open(struct inode *inode, struct file *file) @@ -241,7 +214,7 @@ int snmp6_register_dev(struct inet6_dev *idev) if (!idev || !idev->dev) return -EINVAL; - if (dev_net(idev->dev) != &init_net) + if (!net_eq(dev_net(idev->dev), &init_net)) return 0; if (!proc_net_devsnmp6) diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index f929f47b925e..9ab789159913 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -5,8 +5,6 @@ * * PF_INET6 protocol dispatch tables. * - * Version: $Id: protocol.c,v 1.10 2001/05/18 02:25:49 davem Exp $ - * * Authors: Pedro Roque <roque@di.fc.ul.pt> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 396f0ea11090..01d47674f7e5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -7,8 +7,6 @@ * * Adapted from linux/net/ipv4/raw.c * - * $Id: raw.c,v 1.51 2002/02/01 22:01:04 davem Exp $ - * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance) @@ -609,7 +607,6 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *iph; struct sk_buff *skb; - unsigned int hh_len; int err; if (length > rt->u.dst.dev->mtu) { @@ -619,13 +616,12 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, if (flags&MSG_PROBE) goto out; - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - - skb = sock_alloc_send_skb(sk, length+hh_len+15, - flags&MSG_DONTWAIT, &err); + skb = sock_alloc_send_skb(sk, + length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, + flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, hh_len); + skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; @@ -815,7 +811,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1161,11 +1157,20 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) static void rawv6_close(struct sock *sk, long timeout) { if (inet_sk(sk)->num == IPPROTO_RAW) - ip6_ra_control(sk, -1, NULL); + ip6_ra_control(sk, -1); ip6mr_sk_done(sk); sk_common_release(sk); } +static void raw6_destroy(struct sock *sk) +{ + lock_sock(sk); + ip6_flush_pending_frames(sk); + release_sock(sk); + + inet6_destroy_sock(sk); +} + static int rawv6_init_sk(struct sock *sk) { struct raw6_sock *rp = raw6_sk(sk); @@ -1189,11 +1194,11 @@ struct proto rawv6_prot = { .name = "RAWv6", .owner = THIS_MODULE, .close = rawv6_close, + .destroy = raw6_destroy, .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, - .destroy = inet6_destroy_sock, .setsockopt = rawv6_setsockopt, .getsockopt = rawv6_getsockopt, .sendmsg = rawv6_sendmsg, @@ -1246,7 +1251,7 @@ static int raw6_seq_show(struct seq_file *seq, void *v) "local_address " "remote_address " "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode drops\n"); + " uid timeout inode ref pointer drops\n"); else raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); return 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 798cabc7535b..89184b576e23 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: reassembly.c,v 1.26 2001/03/07 22:00:57 davem Exp $ - * * Based on: net/ipv4/ip_fragment.c * * This program is free software; you can redistribute it and/or @@ -247,6 +245,8 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, arg.id = id; arg.src = src; arg.dst = dst; + + read_lock(&ip6_frags.lock); hash = ip6qhashfn(id, src, dst); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); @@ -473,8 +473,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, fq->q.fragments = head; } - BUG_TRAP(head != NULL); - BUG_TRAP(FRAG6_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(FRAG6_CB(head)->offset != 0); /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - @@ -632,7 +632,7 @@ static struct inet6_protocol frag_protocol = }; #ifdef CONFIG_SYSCTL -static struct ctl_table ip6_frags_ctl_table[] = { +static struct ctl_table ip6_frags_ns_ctl_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, .procname = "ip6frag_high_thresh", @@ -658,6 +658,10 @@ static struct ctl_table ip6_frags_ctl_table[] = { .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies, }, + { } +}; + +static struct ctl_table ip6_frags_ctl_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, .procname = "ip6frag_secret_interval", @@ -670,21 +674,20 @@ static struct ctl_table ip6_frags_ctl_table[] = { { } }; -static int ip6_frags_sysctl_register(struct net *net) +static int ip6_frags_ns_sysctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; - table = ip6_frags_ctl_table; + table = ip6_frags_ns_ctl_table; if (net != &init_net) { - table = kmemdup(table, sizeof(ip6_frags_ctl_table), GFP_KERNEL); + table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; table[0].data = &net->ipv6.frags.high_thresh; table[1].data = &net->ipv6.frags.low_thresh; table[2].data = &net->ipv6.frags.timeout; - table[3].mode &= ~0222; } hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table); @@ -701,7 +704,7 @@ err_alloc: return -ENOMEM; } -static void ip6_frags_sysctl_unregister(struct net *net) +static void ip6_frags_ns_sysctl_unregister(struct net *net) { struct ctl_table *table; @@ -709,13 +712,36 @@ static void ip6_frags_sysctl_unregister(struct net *net) unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr); kfree(table); } + +static struct ctl_table_header *ip6_ctl_header; + +static int ip6_frags_sysctl_register(void) +{ + ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path, + ip6_frags_ctl_table); + return ip6_ctl_header == NULL ? -ENOMEM : 0; +} + +static void ip6_frags_sysctl_unregister(void) +{ + unregister_net_sysctl_table(ip6_ctl_header); +} #else -static inline int ip6_frags_sysctl_register(struct net *net) +static inline int ip6_frags_ns_sysctl_register(struct net *net) { return 0; } -static inline void ip6_frags_sysctl_unregister(struct net *net) +static inline void ip6_frags_ns_sysctl_unregister(struct net *net) +{ +} + +static inline int ip6_frags_sysctl_register(void) +{ + return 0; +} + +static inline void ip6_frags_sysctl_unregister(void) { } #endif @@ -728,12 +754,12 @@ static int ipv6_frags_init_net(struct net *net) inet_frags_init_net(&net->ipv6.frags); - return ip6_frags_sysctl_register(net); + return ip6_frags_ns_sysctl_register(net); } static void ipv6_frags_exit_net(struct net *net) { - ip6_frags_sysctl_unregister(net); + ip6_frags_ns_sysctl_unregister(net); inet_frags_exit_net(&net->ipv6.frags, &ip6_frags); } @@ -750,7 +776,13 @@ int __init ipv6_frag_init(void) if (ret) goto out; - register_pernet_subsys(&ip6_frags_ops); + ret = ip6_frags_sysctl_register(); + if (ret) + goto err_sysctl; + + ret = register_pernet_subsys(&ip6_frags_ops); + if (ret) + goto err_pernet; ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; @@ -763,11 +795,18 @@ int __init ipv6_frag_init(void) inet_frags_init(&ip6_frags); out: return ret; + +err_pernet: + ip6_frags_sysctl_unregister(); +err_sysctl: + inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); + goto out; } void ipv6_frag_exit(void) { inet_frags_fini(&ip6_frags); + ip6_frags_sysctl_unregister(); unregister_pernet_subsys(&ip6_frags_ops); inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a493ad9b8914..86540b24b27c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -109,7 +107,7 @@ static struct dst_ops ip6_dst_ops_template = { .negative_advice = ip6_negative_advice, .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, - .local_out = ip6_local_out, + .local_out = __ip6_local_out, .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; @@ -230,7 +228,7 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt) static inline int rt6_need_strict(struct in6_addr *daddr) { return (ipv6_addr_type(daddr) & - (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); } /* @@ -239,21 +237,26 @@ static inline int rt6_need_strict(struct in6_addr *daddr) static inline struct rt6_info *rt6_device_match(struct net *net, struct rt6_info *rt, + struct in6_addr *saddr, int oif, - int strict) + int flags) { struct rt6_info *local = NULL; struct rt6_info *sprt; - if (oif) { - for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { - struct net_device *dev = sprt->rt6i_dev; + if (!oif && ipv6_addr_any(saddr)) + goto out; + + for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { + struct net_device *dev = sprt->rt6i_dev; + + if (oif) { if (dev->ifindex == oif) return sprt; if (dev->flags & IFF_LOOPBACK) { if (sprt->rt6i_idev == NULL || sprt->rt6i_idev->dev->ifindex != oif) { - if (strict && oif) + if (flags & RT6_LOOKUP_F_IFACE && oif) continue; if (local && (!oif || local->rt6i_idev->dev->ifindex == oif)) @@ -261,14 +264,21 @@ static inline struct rt6_info *rt6_device_match(struct net *net, } local = sprt; } + } else { + if (ipv6_chk_addr(net, saddr, dev, + flags & RT6_LOOKUP_F_IFACE)) + return sprt; } + } + if (oif) { if (local) return local; - if (strict) + if (flags & RT6_LOOKUP_F_IFACE) return net->ipv6.ip6_null_entry; } +out: return rt; } @@ -446,7 +456,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; - u32 lifetime; + unsigned long lifetime; struct rt6_info *rt; if (len < sizeof(struct route_info)) { @@ -472,13 +482,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (pref == ICMPV6_ROUTER_PREF_INVALID) pref = ICMPV6_ROUTER_PREF_MEDIUM; - lifetime = ntohl(rinfo->lifetime); - if (lifetime == 0xffffffff) { - /* infinity */ - } else if (lifetime > 0x7fffffff/HZ) { - /* Avoid arithmetic overflow */ - lifetime = 0x7fffffff/HZ - 1; - } + lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); if (rinfo->length == 3) prefix = (struct in6_addr *)rinfo->prefix; @@ -506,7 +510,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { - if (lifetime == 0xffffffff) { + if (!addrconf_finite_timeout(lifetime)) { rt->rt6i_flags &= ~RTF_EXPIRES; } else { rt->rt6i_expires = jiffies + HZ * lifetime; @@ -547,7 +551,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; - rt = rt6_device_match(net, rt, fl->oif, flags); + rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); BACKTRACK(net, &fl->fl6_src); out: dst_use(&rt->u.dst, jiffies); @@ -672,7 +676,7 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int strict = 0; int attempts = 3; int err; - int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; + int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -974,13 +978,12 @@ out: return &rt->u.dst; } -int icmp6_dst_gc(int *more) +int icmp6_dst_gc(void) { struct dst_entry *dst, *next, **pprev; - int freed; + int more = 0; next = NULL; - freed = 0; spin_lock_bh(&icmp6_dst_lock); pprev = &icmp6_dst_gc_list; @@ -989,16 +992,15 @@ int icmp6_dst_gc(int *more) if (!atomic_read(&dst->__refcnt)) { *pprev = dst->next; dst_free(dst); - freed++; } else { pprev = &dst->next; - (*more)++; + ++more; } } spin_unlock_bh(&icmp6_dst_lock); - return freed; + return more; } static int ip6_dst_gc(struct dst_ops *ops) @@ -1054,7 +1056,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst) hoplimit = idev->cnf.hop_limit; in6_dev_put(idev); } else - hoplimit = ipv6_devconf.hop_limit; + hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; } return hoplimit; } @@ -1106,7 +1108,9 @@ int ip6_route_add(struct fib6_config *cfg) } rt->u.dst.obsolete = -1; - rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); + rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? + jiffies + clock_t_to_jiffies(cfg->fc_expires) : + 0; if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; @@ -1243,11 +1247,11 @@ install_route: } } - if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) + if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; - if (!rt->u.dst.metrics[RTAX_MTU-1]) + if (!dst_metric(&rt->u.dst, RTAX_MTU)) rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); - if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) + if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = idev; @@ -2200,7 +2204,13 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); - expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0; + if (!(rt->rt6i_flags & RTF_EXPIRES)) + expires = 0; + else if (rt->rt6i_expires - jiffies < INT_MAX) + expires = rt->rt6i_expires - jiffies; + else + expires = INT_MAX; + if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, rt->u.dst.error) < 0) goto nla_put_failure; @@ -2404,26 +2414,7 @@ static int ipv6_route_show(struct seq_file *m, void *v) static int ipv6_route_open(struct inode *inode, struct file *file) { - int err; - struct net *net = get_proc_net(inode); - if (!net) - return -ENXIO; - - err = single_open(file, ipv6_route_show, net); - if (err < 0) { - put_net(net); - return err; - } - - return 0; -} - -static int ipv6_route_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct net *net = seq->private; - put_net(net); - return single_release(inode, file); + return single_open_net(inode, file, ipv6_route_show); } static const struct file_operations ipv6_route_proc_fops = { @@ -2431,7 +2422,7 @@ static const struct file_operations ipv6_route_proc_fops = { .open = ipv6_route_open, .read = seq_read, .llseek = seq_lseek, - .release = ipv6_route_release, + .release = single_release_net, }; static int rt6_stats_seq_show(struct seq_file *seq, void *v) @@ -2451,26 +2442,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) static int rt6_stats_seq_open(struct inode *inode, struct file *file) { - int err; - struct net *net = get_proc_net(inode); - if (!net) - return -ENXIO; - - err = single_open(file, rt6_stats_seq_show, net); - if (err < 0) { - put_net(net); - return err; - } - - return 0; -} - -static int rt6_stats_seq_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct net *net = (struct net *)seq->private; - put_net(net); - return single_release(inode, file); + return single_open_net(inode, file, rt6_stats_seq_show); } static const struct file_operations rt6_stats_seq_fops = { @@ -2478,7 +2450,7 @@ static const struct file_operations rt6_stats_seq_fops = { .open = rt6_stats_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = rt6_stats_seq_release, + .release = single_release_net, }; #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 4b2f1033994e..b7a50e968506 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -6,8 +6,6 @@ * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: sit.c,v 1.53 2001/09/25 05:09:53 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -222,15 +220,18 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) } -static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) +static int ipip6_tunnel_get_prl(struct ip_tunnel *t, + struct ip_tunnel_prl __user *a) { - struct ip_tunnel_prl *kp; + struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; unsigned int cmax, c = 0, ca, len; int ret = 0; - cmax = a->datalen / sizeof(*a); - if (cmax > 1 && a->addr != htonl(INADDR_ANY)) + if (copy_from_user(&kprl, a, sizeof(kprl))) + return -EFAULT; + cmax = kprl.datalen / sizeof(kprl); + if (cmax > 1 && kprl.addr != htonl(INADDR_ANY)) cmax = 1; /* For simple GET or for root users, @@ -261,26 +262,25 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) for (prl = t->prl; prl; prl = prl->next) { if (c > cmax) break; - if (a->addr != htonl(INADDR_ANY) && prl->addr != a->addr) + if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) continue; kp[c].addr = prl->addr; kp[c].flags = prl->flags; c++; - if (a->addr != htonl(INADDR_ANY)) + if (kprl.addr != htonl(INADDR_ANY)) break; } out: read_unlock(&ipip6_lock); len = sizeof(*kp) * c; - ret = len ? copy_to_user(a->data, kp, len) : 0; + ret = 0; + if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen)) + ret = -EFAULT; kfree(kp); - if (ret) - return -EFAULT; - a->datalen = len; - return 0; + return ret; } static int @@ -403,9 +403,8 @@ static void ipip6_tunnel_uninit(struct net_device *dev) static int ipip6_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ @@ -462,92 +461,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipip6_lock); return err; -#else - struct iphdr *iph = (struct iphdr*)dp; - int hlen = iph->ihl<<2; - struct ipv6hdr *iph6; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - int rel_info = 0; - struct sk_buff *skb2; - struct rt6_info *rt6i; - - if (len < hlen + sizeof(struct ipv6hdr)) - return; - iph6 = (struct ipv6hdr*)(dp + hlen); - - switch (type) { - default: - return; - case ICMP_PARAMETERPROB: - if (icmp_hdr(skb)->un.gateway < hlen) - return; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMPV6_PARAMPROB; - rel_info = icmp_hdr(skb)->un.gateway - hlen; - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return; - case ICMP_FRAG_NEEDED: - /* Too complicated case ... */ - return; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMPV6_DEST_UNREACH; - rel_code = ICMPV6_ADDR_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return; - rel_type = ICMPV6_TIME_EXCEED; - rel_code = ICMPV6_EXC_HOPLIMIT; - break; - } - - /* Prepare fake skb to feed it to icmpv6_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return 0; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)iph6); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - rt6i = rt6_lookup(dev_net(skb->dev), &iph6->saddr, NULL, NULL, 0); - if (rt6i && rt6i->rt6i_dev) { - skb2->dev = rt6i->rt6i_dev; - - rt6i = rt6_lookup(dev_net(skb->dev), - &iph6->daddr, &iph6->saddr, NULL, 0); - - if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { - struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev); - if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) { - rel_type = ICMPV6_DEST_UNREACH; - rel_code = ICMPV6_ADDR_UNREACH; - } - icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev); - } - } - kfree_skb(skb2); - return 0; -#endif } static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) @@ -578,13 +491,13 @@ static int ipip6_rcv(struct sk_buff *skb) if ((tunnel->dev->priv_flags & IFF_ISATAP) && !isatap_chksrc(skb, iph, tunnel)) { - tunnel->stat.rx_errors++; + tunnel->dev->stats.rx_errors++; read_unlock(&ipip6_lock); kfree_skb(skb); return 0; } - tunnel->stat.rx_packets++; - tunnel->stat.rx_bytes += skb->len; + tunnel->dev->stats.rx_packets++; + tunnel->dev->stats.rx_bytes += skb->len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; @@ -596,9 +509,9 @@ static int ipip6_rcv(struct sk_buff *skb) } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - kfree_skb(skb); read_unlock(&ipip6_lock); out: + kfree_skb(skb); return 0; } @@ -624,7 +537,7 @@ static inline __be32 try_6to4(struct in6_addr *v6dst) static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->stat; + struct net_device_stats *stats = &tunnel->dev->stats; struct iphdr *tiph = &tunnel->parms.iph; struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; @@ -638,7 +551,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int addr_type; if (tunnel->recursion++) { - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -705,20 +618,20 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error_icmp; } } if (rt->rt_type != RTN_UNICAST) { ip_rt_put(rt); - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error_icmp; } tdev = rt->u.dst.dev; if (tdev == dev) { ip_rt_put(rt); - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -728,7 +641,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; if (mtu < 68) { - tunnel->stat.collisions++; + stats->collisions++; ip_rt_put(rt); goto tx_error; } @@ -960,11 +873,20 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) break; case SIOCGETPRL: + err = -EINVAL; + if (dev == sitn->fb_tunnel_dev) + goto done; + err = -ENOENT; + if (!(t = netdev_priv(dev))) + goto done; + err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); + break; + case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: err = -EPERM; - if (cmd != SIOCGETPRL && !capable(CAP_NET_ADMIN)) + if (!capable(CAP_NET_ADMIN)) goto done; err = -EINVAL; if (dev == sitn->fb_tunnel_dev) @@ -977,12 +899,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; switch (cmd) { - case SIOCGETPRL: - err = ipip6_tunnel_get_prl(t, &prl); - if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, - &prl, sizeof(prl))) - err = -EFAULT; - break; case SIOCDELPRL: err = ipip6_tunnel_del_prl(t, &prl); break; @@ -991,8 +907,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } - if (cmd != SIOCGETPRL) - netdev_state_change(dev); + netdev_state_change(dev); break; default: @@ -1003,11 +918,6 @@ done: return err; } -static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev) -{ - return &(((struct ip_tunnel*)netdev_priv(dev))->stat); -} - static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr)) @@ -1021,7 +931,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->uninit = ipip6_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipip6_tunnel_xmit; - dev->get_stats = ipip6_tunnel_get_stats; dev->do_ioctl = ipip6_tunnel_ioctl; dev->change_mtu = ipip6_tunnel_change_mtu; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 938ce4ecde55..a46badd1082d 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -137,7 +137,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) ; *mssp = msstab[mssind] + 1; - NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source, th->dest, ntohl(th->seq), @@ -177,11 +177,11 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || (mss = cookie_check(skb, cookie)) == 0) { - NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } - NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); @@ -198,7 +198,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq6 = inet6_rsk(req); treq = tcp_rsk(req); - ireq6->pktopts = NULL; if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); @@ -224,6 +223,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->retrans = 0; + ireq->ecn_ok = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 3804dcbbfab0..e6dfaeac6be3 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -37,6 +37,10 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = &proc_dointvec }, + { .ctl_name = 0 } +}; + +static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_MLD_MAX_MSF, .procname = "mld_max_msf", @@ -80,12 +84,6 @@ static int ipv6_sysctl_net_init(struct net *net) ipv6_table[2].data = &net->ipv6.sysctl.bindv6only; - /* We don't want this value to be per namespace, it should be global - to all namespaces, so make it read-only when we are not in the - init network namespace */ - if (net != &init_net) - ipv6_table[3].mode = 0444; - net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path, ipv6_table); if (!net->ipv6.sysctl.table) @@ -126,12 +124,45 @@ static struct pernet_operations ipv6_sysctl_net_ops = { .exit = ipv6_sysctl_net_exit, }; +static struct ctl_table_header *ip6_header; + int ipv6_sysctl_register(void) { - return register_pernet_subsys(&ipv6_sysctl_net_ops); + int err = -ENOMEM;; + + ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_table); + if (ip6_header == NULL) + goto out; + + err = register_pernet_subsys(&ipv6_sysctl_net_ops); + if (err) + goto err_pernet; +out: + return err; + +err_pernet: + unregister_net_sysctl_table(ip6_header); + goto out; } void ipv6_sysctl_unregister(void) { + unregister_net_sysctl_table(ip6_header); unregister_pernet_subsys(&ipv6_sysctl_net_ops); } + +static struct ctl_table_header *ip6_base; + +int ipv6_static_sysctl_register(void) +{ + static struct ctl_table empty[1]; + ip6_base = register_net_sysctl_rotable(net_ipv6_ctl_path, empty); + if (ip6_base == NULL) + return -ENOMEM; + return 0; +} + +void ipv6_static_sysctl_unregister(void) +{ + unregister_net_sysctl_table(ip6_base); +} diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 715965f0fac0..cff778b23a7f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $ - * * Based on: * linux/net/ipv4/tcp.c * linux/net/ipv4/tcp_input.c @@ -72,8 +70,6 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); -static void tcp_v6_send_check(struct sock *sk, int len, - struct sk_buff *skb); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); @@ -82,6 +78,12 @@ static struct inet_connection_sock_af_ops ipv6_specific; #ifdef CONFIG_TCP_MD5SIG static struct tcp_sock_af_ops tcp_sock_ipv6_specific; static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; +#else +static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, + struct in6_addr *addr) +{ + return NULL; +} #endif static void tcp_v6_hash(struct sock *sk) @@ -321,8 +323,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int err; struct tcp_sock *tp; __u32 seq; + struct net *net = dev_net(skb->dev); - sk = inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, &hdr->daddr, + sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { @@ -337,7 +340,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, bh_lock_sock(sk); if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; @@ -346,7 +349,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -418,10 +421,10 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* ICMPs are not backlogged, hence we cannot get * an established socket here. */ - BUG_TRAP(req->sk == NULL); + WARN_ON(req->sk != NULL); if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -733,109 +736,105 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen); } -static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - struct in6_addr *saddr, - struct in6_addr *daddr, - struct tcphdr *th, int protocol, - unsigned int tcplen) +static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, + struct in6_addr *daddr, + struct in6_addr *saddr, int nbytes) { - struct scatterlist sg[4]; - __u16 data_len; - int block = 0; - __sum16 cksum; - struct tcp_md5sig_pool *hp; struct tcp6_pseudohdr *bp; - struct hash_desc *desc; - int err; - unsigned int nbytes = 0; + struct scatterlist sg; - hp = tcp_get_md5sig_pool(); - if (!hp) { - printk(KERN_WARNING "%s(): hash pool not found...\n", __func__); - goto clear_hash_noput; - } bp = &hp->md5_blk.ip6; - desc = &hp->md5_desc; - /* 1. TCP pseudo-header (RFC2460) */ ipv6_addr_copy(&bp->saddr, saddr); ipv6_addr_copy(&bp->daddr, daddr); - bp->len = htonl(tcplen); - bp->protocol = htonl(protocol); - - sg_init_table(sg, 4); - - sg_set_buf(&sg[block++], bp, sizeof(*bp)); - nbytes += sizeof(*bp); + bp->protocol = cpu_to_be32(IPPROTO_TCP); + bp->len = cpu_to_be16(nbytes); - /* 2. TCP header, excluding options */ - cksum = th->check; - th->check = 0; - sg_set_buf(&sg[block++], th, sizeof(*th)); - nbytes += sizeof(*th); - - /* 3. TCP segment data (if any) */ - data_len = tcplen - (th->doff << 2); - if (data_len > 0) { - u8 *data = (u8 *)th + (th->doff << 2); - sg_set_buf(&sg[block++], data, data_len); - nbytes += data_len; - } + sg_init_one(&sg, bp, sizeof(*bp)); + return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); +} - /* 4. shared key */ - sg_set_buf(&sg[block++], key->key, key->keylen); - nbytes += key->keylen; +static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, + struct in6_addr *daddr, struct in6_addr *saddr, + struct tcphdr *th) +{ + struct tcp_md5sig_pool *hp; + struct hash_desc *desc; - sg_mark_end(&sg[block - 1]); + hp = tcp_get_md5sig_pool(); + if (!hp) + goto clear_hash_noput; + desc = &hp->md5_desc; - /* Now store the hash into the packet */ - err = crypto_hash_init(desc); - if (err) { - printk(KERN_WARNING "%s(): hash_init failed\n", __func__); + if (crypto_hash_init(desc)) goto clear_hash; - } - err = crypto_hash_update(desc, sg, nbytes); - if (err) { - printk(KERN_WARNING "%s(): hash_update failed\n", __func__); + if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) goto clear_hash; - } - err = crypto_hash_final(desc, md5_hash); - if (err) { - printk(KERN_WARNING "%s(): hash_final failed\n", __func__); + if (tcp_md5_hash_header(hp, th)) + goto clear_hash; + if (tcp_md5_hash_key(hp, key)) + goto clear_hash; + if (crypto_hash_final(desc, md5_hash)) goto clear_hash; - } - /* Reset header, and free up the crypto */ tcp_put_md5sig_pool(); - th->check = cksum; -out: return 0; + clear_hash: tcp_put_md5sig_pool(); clear_hash_noput: memset(md5_hash, 0, 16); - goto out; + return 1; } -static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - struct sock *sk, - struct dst_entry *dst, - struct request_sock *req, - struct tcphdr *th, int protocol, - unsigned int tcplen) +static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, + struct sock *sk, struct request_sock *req, + struct sk_buff *skb) { struct in6_addr *saddr, *daddr; + struct tcp_md5sig_pool *hp; + struct hash_desc *desc; + struct tcphdr *th = tcp_hdr(skb); if (sk) { saddr = &inet6_sk(sk)->saddr; daddr = &inet6_sk(sk)->daddr; - } else { + } else if (req) { saddr = &inet6_rsk(req)->loc_addr; daddr = &inet6_rsk(req)->rmt_addr; + } else { + struct ipv6hdr *ip6h = ipv6_hdr(skb); + saddr = &ip6h->saddr; + daddr = &ip6h->daddr; } - return tcp_v6_do_calc_md5_hash(md5_hash, key, - saddr, daddr, - th, protocol, tcplen); + + hp = tcp_get_md5sig_pool(); + if (!hp) + goto clear_hash_noput; + desc = &hp->md5_desc; + + if (crypto_hash_init(desc)) + goto clear_hash; + + if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) + goto clear_hash; + if (tcp_md5_hash_header(hp, th)) + goto clear_hash; + if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) + goto clear_hash; + if (tcp_md5_hash_key(hp, key)) + goto clear_hash; + if (crypto_hash_final(desc, md5_hash)) + goto clear_hash; + + tcp_put_md5sig_pool(); + return 0; + +clear_hash: + tcp_put_md5sig_pool(); +clear_hash_noput: + memset(md5_hash, 0, 16); + return 1; } static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) @@ -844,43 +843,12 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) struct tcp_md5sig_key *hash_expected; struct ipv6hdr *ip6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); - int length = (th->doff << 2) - sizeof (*th); int genhash; - u8 *ptr; u8 newhash[16]; hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr); + hash_location = tcp_parse_md5sig_option(th); - /* If the TCP option is too short, we can short cut */ - if (length < TCPOLEN_MD5SIG) - return hash_expected ? 1 : 0; - - /* parse options */ - ptr = (u8*)(th + 1); - while (length > 0) { - int opcode = *ptr++; - int opsize; - - switch(opcode) { - case TCPOPT_EOL: - goto done_opts; - case TCPOPT_NOP: - length--; - continue; - default: - opsize = *ptr++; - if (opsize < 2 || opsize > length) - goto done_opts; - if (opcode == TCPOPT_MD5SIG) { - hash_location = ptr; - goto done_opts; - } - } - ptr += opsize - 2; - length -= opsize; - } - -done_opts: /* do we have a hash as expected? */ if (!hash_expected) { if (!hash_location) @@ -907,11 +875,10 @@ done_opts: } /* check the signature */ - genhash = tcp_v6_do_calc_md5_hash(newhash, - hash_expected, - &ip6h->saddr, &ip6h->daddr, - th, sk->sk_protocol, - skb->len); + genhash = tcp_v6_md5_hash_skb(newhash, + hash_expected, + NULL, NULL, skb); + if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { printk(KERN_INFO "MD5 Hash %s for " @@ -1048,10 +1015,9 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); - tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key, - &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr, - t1, IPPROTO_TCP, tot_len); + tcp_v6_md5_hash_hdr((__u8 *)&opt[1], key, + &ipv6_hdr(skb)->daddr, + &ipv6_hdr(skb)->saddr, t1); } #endif @@ -1079,8 +1045,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, buff, &fl, NULL, 0); - TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); - TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); + TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); + TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); return; } } @@ -1088,8 +1054,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) kfree_skb(buff); } -static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, - struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts) +static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, + struct tcp_md5sig_key *key) { struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; @@ -1098,22 +1064,6 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); __be32 *topt; -#ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; - struct tcp_md5sig_key tw_key; -#endif - -#ifdef CONFIG_TCP_MD5SIG - if (!tw && skb->sk) { - key = tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr); - } else if (tw && tw->tw_md5_keylen) { - tw_key.key = tw->tw_md5_key; - tw_key.keylen = tw->tw_md5_keylen; - key = &tw_key; - } else { - key = NULL; - } -#endif if (ts) tot_len += TCPOLEN_TSTAMP_ALIGNED; @@ -1154,10 +1104,9 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, if (key) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); - tcp_v6_do_calc_md5_hash((__u8 *)topt, key, - &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr, - t1, IPPROTO_TCP, tot_len); + tcp_v6_md5_hash_hdr((__u8 *)topt, key, + &ipv6_hdr(skb)->daddr, + &ipv6_hdr(skb)->saddr, t1); } #endif @@ -1180,7 +1129,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, buff, &fl, NULL, 0); - TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); + TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); return; } } @@ -1193,16 +1142,17 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v6_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcptw->tw_ts_recent); + tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw)); inet_twsk_put(tw); } static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) { - tcp_v6_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent); + tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, + tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr)); } @@ -1299,7 +1249,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq = inet6_rsk(req); ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); - treq->pktopts = NULL; if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); @@ -1539,9 +1488,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; out_overflow: - NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out: - NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); if (opt && opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); @@ -1670,7 +1619,7 @@ discard: kfree_skb(skb); return 0; csum_err: - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); goto discard; @@ -1708,6 +1657,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) struct tcphdr *th; struct sock *sk; int ret; + struct net *net = dev_net(skb->dev); if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -1715,7 +1665,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) /* * Count it even if it's bad. */ - TCP_INC_STATS_BH(TCP_MIB_INSEGS); + TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; @@ -1739,7 +1689,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb)); TCP_SKB_CB(skb)->sacked = 0; - sk = __inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, + sk = __inet6_lookup(net, &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); @@ -1787,7 +1737,7 @@ no_tcp_socket: if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { bad_packet: - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(net, TCP_MIB_INERRS); } else { tcp_v6_send_reset(NULL, skb); } @@ -1812,7 +1762,7 @@ do_time_wait: } if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(net, TCP_MIB_INERRS); inet_twsk_put(inet_twsk(sk)); goto discard_it; } @@ -1872,7 +1822,7 @@ static struct inet_connection_sock_af_ops ipv6_specific = { #ifdef CONFIG_TCP_MD5SIG static struct tcp_sock_af_ops tcp_sock_ipv6_specific = { .md5_lookup = tcp_v6_md5_lookup, - .calc_md5_hash = tcp_v6_calc_md5_hash, + .calc_md5_hash = tcp_v6_md5_hash_skb, .md5_add = tcp_v6_md5_add_func, .md5_parse = tcp_v6_parse_md5_keys, }; @@ -1904,7 +1854,7 @@ static struct inet_connection_sock_af_ops ipv6_mapped = { #ifdef CONFIG_TCP_MD5SIG static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { .md5_lookup = tcp_v4_md5_lookup, - .calc_md5_hash = tcp_v4_calc_md5_hash, + .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_add = tcp_v6_md5_add_func, .md5_parse = tcp_v6_parse_md5_keys, }; @@ -1961,7 +1911,7 @@ static int tcp_v6_init_sock(struct sock *sk) return 0; } -static int tcp_v6_destroy_sock(struct sock *sk) +static void tcp_v6_destroy_sock(struct sock *sk) { #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list */ @@ -1969,7 +1919,7 @@ static int tcp_v6_destroy_sock(struct sock *sk) tcp_v6_clear_md5_list(sk); #endif tcp_v4_destroy_sock(sk); - return inet6_destroy_sock(sk); + inet6_destroy_sock(sk); } #ifdef CONFIG_PROC_FS @@ -2037,7 +1987,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -2053,8 +2003,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - icsk->icsk_rto, - icsk->icsk_ack.ato, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh ); diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 6323921b40be..669f280989c3 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -109,7 +109,7 @@ static int tunnel46_rcv(struct sk_buff *skb) { struct xfrm6_tunnel *handler; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; for (handler = tunnel46_handlers; handler; handler = handler->next) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1fd784f3e2ec..d1477b350f76 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -7,8 +7,6 @@ * * Based on linux/ipv4/udp.c * - * $Id: udp.c,v 1.65 2002/02/01 22:01:04 davem Exp $ - * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which @@ -67,7 +65,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, int badness = -1; read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { + sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && @@ -168,7 +166,8 @@ try_again: goto out_free; if (!peeked) - UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); sock_recv_timestamp(msg, sk, skb); @@ -215,7 +214,7 @@ out: csum_copy_err: lock_sock(sk); if (!skb_kill_datagram(sk, skb, flags)) - UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); release_sock(sk); if (flags & MSG_DONTWAIT) @@ -299,14 +298,17 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); + if (rc == -ENOMEM) { + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); + atomic_inc(&sk->sk_drops); + } goto drop; } return 0; drop: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } @@ -355,15 +357,16 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, * Note: called only from the BH handler context, * so we don't need to lock the hashes. */ -static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr, - struct in6_addr *daddr, struct hlist_head udptable[]) +static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, + struct in6_addr *saddr, struct in6_addr *daddr, + struct hlist_head udptable[]) { struct sock *sk, *sk2; const struct udphdr *uh = udp_hdr(skb); int dif; read_lock(&udp_hash_lock); - sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); dif = inet6_iif(skb); sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { @@ -437,6 +440,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], struct net_device *dev = skb->dev; struct in6_addr *saddr, *daddr; u32 ulen = 0; + struct net *net = dev_net(skb->dev); if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto short_packet; @@ -475,7 +479,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], * Multicast receive code */ if (ipv6_addr_is_multicast(daddr)) - return __udp6_lib_mcast_deliver(skb, saddr, daddr, udptable); + return __udp6_lib_mcast_deliver(net, skb, + saddr, daddr, udptable); /* Unicast */ @@ -483,7 +488,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], * check socket cache ... must talk to Alan about his plans * for sock caches... i'll skip this for now. */ - sk = __udp6_lib_lookup(dev_net(skb->dev), saddr, uh->source, + sk = __udp6_lib_lookup(net, saddr, uh->source, daddr, uh->dest, inet6_iif(skb), udptable); if (sk == NULL) { @@ -492,7 +497,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp_lib_checksum_complete(skb)) goto discard; - UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, + proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); @@ -517,7 +523,7 @@ short_packet: ulen, skb->len); discard: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } @@ -534,7 +540,9 @@ static void udp_v6_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); - if (up->pending) { + if (up->pending == AF_INET) + udp_flush_pending_frames(sk); + else if (up->pending) { up->len = 0; up->pending = 0; ip6_flush_pending_frames(sk); @@ -585,7 +593,8 @@ out: up->len = 0; up->pending = 0; if (!err) - UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } @@ -731,7 +740,7 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -848,12 +857,14 @@ do_append_data: } else { dst_release(dst); } + dst = NULL; } if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; release_sock(sk); out: + dst_release(dst); fl6_sock_release(flowlabel); if (!err) return len; @@ -865,7 +876,8 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -877,15 +889,13 @@ do_confirm: goto out; } -int udpv6_destroy_sock(struct sock *sk) +void udpv6_destroy_sock(struct sock *sk) { lock_sock(sk); udp_v6_flush_pending_frames(sk); release_sock(sk); inet6_destroy_sock(sk); - - return 0; } /* @@ -951,7 +961,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket srcp = ntohs(inet->sport); seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", bucket, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -963,7 +973,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp); + atomic_read(&sp->sk_refcnt), sp, + atomic_read(&sp->sk_drops)); } int udp6_seq_show(struct seq_file *seq, void *v) @@ -974,7 +985,7 @@ int udp6_seq_show(struct seq_file *seq, void *v) "local_address " "remote_address " "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode\n"); + " uid timeout inode ref pointer drops\n"); else udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); return 0; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 321b81a4d418..92dd7da766d8 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -29,7 +29,7 @@ extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); -extern int udpv6_destroy_sock(struct sock *sk); +extern void udpv6_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS extern int udp6_seq_show(struct seq_file *seq, void *v); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 491efd00a866..f6cdcb348e05 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -2,8 +2,6 @@ * UDPLITEv6 An implementation of the UDP-Lite protocol over IPv6. * See also net/ipv4/udplite.c * - * Version: $Id: udplite.c,v 1.9 2006/10/19 08:28:10 gerrit Exp $ - * * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> * * Changes: diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 81ae8735f5e3..b6e70f92e7fb 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -335,7 +335,7 @@ static int ipxitf_device_event(struct notifier_block *notifier, struct net_device *dev = ptr; struct ipx_interface *i, *tmp; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event != NETDEV_DOWN && event != NETDEV_UP) @@ -1636,7 +1636,7 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty u16 ipx_pktsize; int rc = 0; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto drop; /* Not ours */ diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ae54b20d0470..3eb5bcc75f99 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1093,11 +1093,6 @@ static int irda_create(struct net *net, struct socket *sock, int protocol) init_waitqueue_head(&self->query_wait); - /* Initialise networking socket struct */ - sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */ - sk->sk_family = PF_IRDA; - sk->sk_protocol = protocol; - switch (sock->type) { case SOCK_STREAM: sock->ops = &irda_stream_ops; @@ -1124,13 +1119,20 @@ static int irda_create(struct net *net, struct socket *sock, int protocol) self->max_sdu_size_rx = TTP_SAR_UNBOUND; break; default: + sk_free(sk); return -ESOCKTNOSUPPORT; } break; default: + sk_free(sk); return -ESOCKTNOSUPPORT; } + /* Initialise networking socket struct */ + sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */ + sk->sk_family = PF_IRDA; + sk->sk_protocol = protocol; + /* Register as a client with IrLMP */ self->ckey = irlmp_register_client(0, NULL, NULL, NULL); self->mask.word = 0xffff; diff --git a/net/irda/discovery.c b/net/irda/discovery.c index bfacef8b76f4..a6f99b5a1499 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c @@ -40,6 +40,8 @@ #include <net/irda/discovery.h> +#include <asm/unaligned.h> + /* * Function irlmp_add_discovery (cachelog, discovery) * @@ -87,7 +89,7 @@ void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *new) */ hashbin_remove_this(cachelog, (irda_queue_t *) node); /* Check if hints bits are unchanged */ - if(u16ho(node->data.hints) == u16ho(new->data.hints)) + if (get_unaligned((__u16 *)node->data.hints) == get_unaligned((__u16 *)new->data.hints)) /* Set time of first discovery for this node */ new->firststamp = node->firststamp; kfree(node); @@ -281,9 +283,9 @@ struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn, /* Mask out the ones we don't want : * We want to match the discovery mask, and to get only * the most recent one (unless we want old ones) */ - if ((u16ho(discovery->data.hints) & mask) && + if ((get_unaligned((__u16 *)discovery->data.hints) & mask) && ((old_entries) || - ((jiffies - discovery->firststamp) < j_timeout)) ) { + ((jiffies - discovery->firststamp) < j_timeout))) { /* Create buffer as needed. * As this function get called a lot and most time * we don't have anything to put in the log (we are diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 76c3057d0179..e4e2caeb9d82 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -650,12 +650,7 @@ static void ircomm_tty_do_softint(struct work_struct *work) } /* Check if user (still) wants to be waken up */ - if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && - tty->ldisc.write_wakeup) - { - (tty->ldisc.write_wakeup)(tty); - } - wake_up_interruptible(&tty->write_wait); + tty_wakeup(tty); } /* @@ -1141,6 +1136,7 @@ static int ircomm_tty_data_indication(void *instance, void *sap, struct sk_buff *skb) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; + struct tty_ldisc *ld; IRDA_DEBUG(2, "%s()\n", __func__ ); @@ -1173,7 +1169,11 @@ static int ircomm_tty_data_indication(void *instance, void *sap, * involve the flip buffers, since we are not running in an interrupt * handler */ - self->tty->ldisc.receive_buf(self->tty, skb->data, NULL, skb->len); + + ld = tty_ldisc_ref(self->tty); + if (ld) + ld->ops->receive_buf(self->tty, skb->data, NULL, skb->len); + tty_ldisc_deref(ld); /* No need to kfree_skb - see ircomm_ttp_data_indication() */ diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 90894534f3cc..f17b65af9c9b 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -1326,7 +1326,7 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, int command; __u8 control; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto out; /* FIXME: should we get our own field? */ diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 1f81f8e7c61d..7bf5b913828b 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -1062,7 +1062,8 @@ void irlmp_discovery_expiry(discinfo_t *expiries, int number) for(i = 0; i < number; i++) { /* Check if we should notify client */ if ((client->expir_callback) && - (client->hint_mask.word & u16ho(expiries[i].hints) + (client->hint_mask.word & + get_unaligned((__u16 *)expiries[i].hints) & 0x7f7f) ) client->expir_callback(&(expiries[i]), EXPIRY_TIMEOUT, @@ -1086,7 +1087,7 @@ discovery_t *irlmp_get_discovery_response(void) IRDA_ASSERT(irlmp != NULL, return NULL;); - u16ho(irlmp->discovery_rsp.data.hints) = irlmp->hints.word; + put_unaligned(irlmp->hints.word, (__u16 *)irlmp->discovery_rsp.data.hints); /* * Set character set for device name (we use ASCII), and diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index b001c361ad30..bccf4d0059f0 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -241,6 +241,7 @@ #include <linux/module.h> #include <linux/kernel.h> +#include <linux/smp_lock.h> #include <linux/skbuff.h> #include <linux/tty.h> #include <linux/proc_fs.h> diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c index 75497e55927d..cf9a4b531a98 100644 --- a/net/irda/irnet/irnet_irda.c +++ b/net/irda/irnet/irnet_irda.c @@ -10,6 +10,7 @@ #include "irnet_irda.h" /* Private header */ #include <linux/seq_file.h> +#include <asm/unaligned.h> /* * PPP disconnect work: we need to make sure we're in @@ -1673,7 +1674,7 @@ irnet_discovery_indication(discinfo_t * discovery, /* Notify the control channel */ irnet_post_event(NULL, IRNET_DISCOVER, discovery->saddr, discovery->daddr, discovery->info, - u16ho(discovery->hints)); + get_unaligned((__u16 *)discovery->hints)); DEXIT(IRDA_OCB_TRACE, "\n"); } @@ -1704,7 +1705,7 @@ irnet_expiry_indication(discinfo_t * expiry, /* Notify the control channel */ irnet_post_event(NULL, IRNET_EXPIRE, expiry->saddr, expiry->daddr, expiry->info, - u16ho(expiry->hints)); + get_unaligned((__u16 *)expiry->hints)); DEXIT(IRDA_OCB_TRACE, "\n"); } diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index e0eab5927c4f..6d8ae03c14f5 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -479,6 +479,7 @@ dev_irnet_open(struct inode * inode, ap = kzalloc(sizeof(*ap), GFP_KERNEL); DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n"); + lock_kernel(); /* initialize the irnet structure */ ap->file = file; @@ -500,6 +501,7 @@ dev_irnet_open(struct inode * inode, { DERROR(FS_ERROR, "Can't setup IrDA link...\n"); kfree(ap); + unlock_kernel(); return err; } @@ -510,6 +512,7 @@ dev_irnet_open(struct inode * inode, file->private_data = ap; DEXIT(FS_TRACE, " - ap=0x%p\n", ap); + unlock_kernel(); return 0; } @@ -628,8 +631,8 @@ dev_irnet_poll(struct file * file, * This is the way pppd configure us and control us while the PPP * instance is active. */ -static int -dev_irnet_ioctl(struct inode * inode, +static long +dev_irnet_ioctl( struct file * file, unsigned int cmd, unsigned long arg) @@ -660,6 +663,7 @@ dev_irnet_ioctl(struct inode * inode, { DEBUG(FS_INFO, "Entering PPP discipline.\n"); /* PPP channel setup (ap->chan in configued in dev_irnet_open())*/ + lock_kernel(); err = ppp_register_channel(&ap->chan); if(err == 0) { @@ -672,12 +676,14 @@ dev_irnet_ioctl(struct inode * inode, } else DERROR(FS_ERROR, "Can't setup PPP channel...\n"); + unlock_kernel(); } else { /* In theory, should be N_TTY */ DEBUG(FS_INFO, "Exiting PPP discipline.\n"); /* Disconnect from the generic PPP layer */ + lock_kernel(); if(ap->ppp_open) { ap->ppp_open = 0; @@ -686,24 +692,20 @@ dev_irnet_ioctl(struct inode * inode, else DERROR(FS_ERROR, "Channel not registered !\n"); err = 0; + unlock_kernel(); } break; /* Query PPP channel and unit number */ case PPPIOCGCHAN: - if(!ap->ppp_open) - break; - if(put_user(ppp_channel_index(&ap->chan), (int __user *)argp)) - break; - DEBUG(FS_INFO, "Query channel.\n"); - err = 0; + if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan), + (int __user *)argp)) + err = 0; break; case PPPIOCGUNIT: - if(!ap->ppp_open) - break; - if(put_user(ppp_unit_number(&ap->chan), (int __user *)argp)) - break; - DEBUG(FS_INFO, "Query unit number.\n"); + lock_kernel(); + if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan), + (int __user *)argp)) err = 0; break; @@ -723,34 +725,39 @@ dev_irnet_ioctl(struct inode * inode, DEBUG(FS_INFO, "Standard PPP ioctl.\n"); if(!capable(CAP_NET_ADMIN)) err = -EPERM; - else + else { + lock_kernel(); err = ppp_irnet_ioctl(&ap->chan, cmd, arg); + unlock_kernel(); + } break; /* TTY IOCTLs : Pretend that we are a tty, to keep pppd happy */ /* Get termios */ case TCGETS: DEBUG(FS_INFO, "Get termios.\n"); + lock_kernel(); #ifndef TCGETS2 - if(kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios)) - break; + if(!kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios)) + err = 0; #else if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios)) - break; + err = 0; #endif - err = 0; + unlock_kernel(); break; /* Set termios */ case TCSETSF: DEBUG(FS_INFO, "Set termios.\n"); + lock_kernel(); #ifndef TCGETS2 - if(user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp)) - break; + if(!user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp)) + err = 0; #else - if(user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp)) - break; + if(!user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp)) + err = 0; #endif - err = 0; + unlock_kernel(); break; /* Set DTR/RTS */ @@ -773,7 +780,9 @@ dev_irnet_ioctl(struct inode * inode, * We should also worry that we don't accept junk here and that * we get rid of our own buffers */ #ifdef FLUSH_TO_PPP + lock_kernel(); ppp_output_wakeup(&ap->chan); + unlock_kernel(); #endif /* FLUSH_TO_PPP */ err = 0; break; @@ -788,7 +797,7 @@ dev_irnet_ioctl(struct inode * inode, default: DERROR(FS_ERROR, "Unsupported ioctl (0x%X)\n", cmd); - err = -ENOIOCTLCMD; + err = -ENOTTY; } DEXIT(FS_TRACE, " - err = 0x%X\n", err); diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h index d2beb7df8f7f..d9f8bd4ebd05 100644 --- a/net/irda/irnet/irnet_ppp.h +++ b/net/irda/irnet/irnet_ppp.h @@ -76,9 +76,8 @@ static ssize_t static unsigned int dev_irnet_poll(struct file *, poll_table *); -static int - dev_irnet_ioctl(struct inode *, - struct file *, +static long + dev_irnet_ioctl(struct file *, unsigned int, unsigned long); /* ------------------------ PPP INTERFACE ------------------------ */ @@ -102,7 +101,7 @@ static struct file_operations irnet_device_fops = .read = dev_irnet_read, .write = dev_irnet_write, .poll = dev_irnet_poll, - .ioctl = dev_irnet_ioctl, + .unlocked_ioctl = dev_irnet_ioctl, .open = dev_irnet_open, .release = dev_irnet_close /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */ diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 9e1fb82e3220..2f05ec1037ab 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -101,8 +101,8 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); - if (IS_ERR(hdr)) { - ret = PTR_ERR(hdr); + if (hdr == NULL) { + ret = -EMSGSIZE; goto err_out; } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 7b0038f45b16..29f7baa25110 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -644,6 +644,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, } txmsg.class = 0; + memcpy(&txmsg.class, skb->data, skb->len >= 4 ? 4 : skb->len); txmsg.tag = iucv->send_tag++; memcpy(skb->cb, &txmsg.tag, 4); skb_queue_tail(&iucv->send_skb_q, skb); @@ -1135,8 +1136,7 @@ static void iucv_callback_txdone(struct iucv_path *path, if (this) kfree_skb(this); } - if (!this) - printk(KERN_ERR "AF_IUCV msg tag %u not found\n", msg->tag); + BUG_ON(!this); if (sk->sk_state == IUCV_CLOSING) { if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) { @@ -1196,7 +1196,7 @@ static int __init afiucv_init(void) } cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); if (unlikely(err)) { - printk(KERN_ERR "AF_IUCV needs the VM userid\n"); + WARN_ON(err); err = -EPROTONOSUPPORT; goto out; } @@ -1210,7 +1210,6 @@ static int __init afiucv_init(void) err = sock_register(&iucv_sock_family_ops); if (err) goto out_proto; - printk(KERN_INFO "AF_IUCV lowlevel driver initialized\n"); return 0; out_proto: @@ -1226,8 +1225,6 @@ static void __exit afiucv_exit(void) sock_unregister(PF_IUCV); proto_unregister(&iucv_proto); iucv_unregister(&af_iucv_handler, 0); - - printk(KERN_INFO "AF_IUCV lowlevel driver unloaded\n"); } module_init(afiucv_init); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 918970762131..705959b31e24 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -474,14 +474,14 @@ static void iucv_setmask_mp(void) { int cpu; - preempt_disable(); + get_online_cpus(); for_each_online_cpu(cpu) /* Enable all cpus with a declared buffer. */ if (cpu_isset(cpu, iucv_buffer_cpumask) && !cpu_isset(cpu, iucv_irq_cpumask)) smp_call_function_single(cpu, iucv_allow_cpu, - NULL, 0, 1); - preempt_enable(); + NULL, 1); + put_online_cpus(); } /** @@ -497,8 +497,8 @@ static void iucv_setmask_up(void) /* Disable all cpu but the first in cpu_irq_cpumask. */ cpumask = iucv_irq_cpumask; cpu_clear(first_cpu(iucv_irq_cpumask), cpumask); - for_each_cpu_mask(cpu, cpumask) - smp_call_function_single(cpu, iucv_block_cpu, NULL, 0, 1); + for_each_cpu_mask_nr(cpu, cpumask) + smp_call_function_single(cpu, iucv_block_cpu, NULL, 1); } /** @@ -521,16 +521,18 @@ static int iucv_enable(void) goto out; /* Declare per cpu buffers. */ rc = -EIO; - preempt_disable(); + get_online_cpus(); for_each_online_cpu(cpu) - smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1); + smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); preempt_enable(); if (cpus_empty(iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ goto out_path; + put_online_cpus(); return 0; out_path: + put_online_cpus(); kfree(iucv_path_table); out: return rc; @@ -545,7 +547,7 @@ out: */ static void iucv_disable(void) { - on_each_cpu(iucv_retrieve_cpu, NULL, 0, 1); + on_each_cpu(iucv_retrieve_cpu, NULL, 1); kfree(iucv_path_table); } @@ -564,8 +566,11 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, return NOTIFY_BAD; iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); - if (!iucv_param[cpu]) + if (!iucv_param[cpu]) { + kfree(iucv_irq_data[cpu]); + iucv_irq_data[cpu] = NULL; return NOTIFY_BAD; + } break; case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: @@ -580,7 +585,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, case CPU_ONLINE_FROZEN: case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1); + smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: @@ -589,16 +594,16 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, if (cpus_empty(cpumask)) /* Can't offline last IUCV enabled cpu. */ return NOTIFY_BAD; - smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 0, 1); + smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1); if (cpus_empty(iucv_irq_cpumask)) smp_call_function_single(first_cpu(iucv_buffer_cpumask), - iucv_allow_cpu, NULL, 0, 1); + iucv_allow_cpu, NULL, 1); break; } return NOTIFY_OK; } -static struct notifier_block __cpuinitdata iucv_cpu_notifier = { +static struct notifier_block __refdata iucv_cpu_notifier = { .notifier_call = iucv_cpu_notify, }; @@ -652,7 +657,7 @@ static void iucv_cleanup_queue(void) * pending interrupts force them to the work queue by calling * an empty function on all cpus. */ - smp_call_function(__iucv_cleanup_queue, NULL, 0, 1); + smp_call_function(__iucv_cleanup_queue, NULL, 1); spin_lock_irq(&iucv_queue_lock); list_for_each_entry_safe(p, n, &iucv_task_queue, list) { /* Remove stale work items from the task queue. */ @@ -1559,16 +1564,11 @@ static void iucv_external_interrupt(u16 code) p = iucv_irq_data[smp_processor_id()]; if (p->ippathid >= iucv_max_pathid) { - printk(KERN_WARNING "iucv_do_int: Got interrupt with " - "pathid %d > max_connections (%ld)\n", - p->ippathid, iucv_max_pathid - 1); + WARN_ON(p->ippathid >= iucv_max_pathid); iucv_sever_pathid(p->ippathid, iucv_error_no_listener); return; } - if (p->iptype < 0x01 || p->iptype > 0x09) { - printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n"); - return; - } + BUG_ON(p->iptype < 0x01 || p->iptype > 0x09); work = kmalloc(sizeof(struct iucv_irq_list), GFP_ATOMIC); if (!work) { printk(KERN_WARNING "iucv_external_interrupt: out of memory\n"); diff --git a/net/key/af_key.c b/net/key/af_key.c index 9e7236ff6bcc..d628df97e02e 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -96,8 +96,8 @@ static void pfkey_sock_destruct(struct sock *sk) return; } - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); atomic_dec(&pfkey_socks_nr); } @@ -579,25 +579,43 @@ static uint8_t pfkey_proto_from_xfrm(uint8_t proto) return (proto ? proto : IPSEC_PROTO_ANY); } -static int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr, - xfrm_address_t *xaddr) +static inline int pfkey_sockaddr_len(sa_family_t family) { - switch (((struct sockaddr*)(addr + 1))->sa_family) { + switch (family) { + case AF_INET: + return sizeof(struct sockaddr_in); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + return sizeof(struct sockaddr_in6); +#endif + } + return 0; +} + +static +int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr) +{ + switch (sa->sa_family) { case AF_INET: xaddr->a4 = - ((struct sockaddr_in *)(addr + 1))->sin_addr.s_addr; + ((struct sockaddr_in *)sa)->sin_addr.s_addr; return AF_INET; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: memcpy(xaddr->a6, - &((struct sockaddr_in6 *)(addr + 1))->sin6_addr, + &((struct sockaddr_in6 *)sa)->sin6_addr, sizeof(struct in6_addr)); return AF_INET6; #endif - default: - return 0; } - /* NOTREACHED */ + return 0; +} + +static +int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr, xfrm_address_t *xaddr) +{ + return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1), + xaddr); } static struct xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void **ext_hdrs) @@ -642,20 +660,11 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void ** } #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) + static int pfkey_sockaddr_size(sa_family_t family) { - switch (family) { - case AF_INET: - return PFKEY_ALIGN8(sizeof(struct sockaddr_in)); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - return PFKEY_ALIGN8(sizeof(struct sockaddr_in6)); -#endif - default: - return 0; - } - /* NOTREACHED */ + return PFKEY_ALIGN8(pfkey_sockaddr_len(family)); } static inline int pfkey_mode_from_xfrm(int mode) @@ -687,6 +696,36 @@ static inline int pfkey_mode_to_xfrm(int mode) } } +static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port, + struct sockaddr *sa, + unsigned short family) +{ + switch (family) { + case AF_INET: + { + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + sin->sin_family = AF_INET; + sin->sin_port = port; + sin->sin_addr.s_addr = xaddr->a4; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + return 32; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = port; + sin6->sin6_flowinfo = 0; + ipv6_addr_copy(&sin6->sin6_addr, (struct in6_addr *)xaddr->a6); + sin6->sin6_scope_id = 0; + return 128; + } +#endif + } + return 0; +} + static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, int hsc) { @@ -697,13 +736,9 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, struct sadb_address *addr; struct sadb_key *key; struct sadb_x_sa2 *sa2; - struct sockaddr_in *sin; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif int size; int auth_key_size = 0; int encrypt_key_size = 0; @@ -732,14 +767,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, } /* identity & sensitivity */ - - if ((x->props.family == AF_INET && - x->sel.saddr.a4 != x->props.saddr.a4) -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - || (x->props.family == AF_INET6 && - memcmp (x->sel.saddr.a6, x->props.saddr.a6, sizeof (struct in6_addr))) -#endif - ) + if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, x->props.family)) size += sizeof(struct sadb_address) + sockaddr_size; if (add_keys) { @@ -861,29 +889,12 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, protocol's number." - RFC2367 */ addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; - if (x->props.family == AF_INET) { - addr->sadb_address_prefixlen = 32; - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = x->props.saddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (x->props.family == AF_INET6) { - addr->sadb_address_prefixlen = 128; - - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, x->props.saddr.a6, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else + addr->sadb_address_prefixlen = + pfkey_sockaddr_fill(&x->props.saddr, 0, + (struct sockaddr *) (addr + 1), + x->props.family); + if (!addr->sadb_address_prefixlen) BUG(); /* dst address */ @@ -894,70 +905,32 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; - addr->sadb_address_prefixlen = 32; /* XXX */ addr->sadb_address_reserved = 0; - if (x->props.family == AF_INET) { - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = x->id.daddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - if (x->sel.saddr.a4 != x->props.saddr.a4) { - addr = (struct sadb_address*) skb_put(skb, - sizeof(struct sadb_address)+sockaddr_size); - addr->sadb_address_len = - (sizeof(struct sadb_address)+sockaddr_size)/ - sizeof(uint64_t); - addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY; - addr->sadb_address_proto = - pfkey_proto_from_xfrm(x->sel.proto); - addr->sadb_address_prefixlen = x->sel.prefixlen_s; - addr->sadb_address_reserved = 0; - - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = x->sel.saddr.a4; - sin->sin_port = x->sel.sport; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (x->props.family == AF_INET6) { - addr->sadb_address_prefixlen = 128; + addr->sadb_address_prefixlen = + pfkey_sockaddr_fill(&x->id.daddr, 0, + (struct sockaddr *) (addr + 1), + x->props.family); + if (!addr->sadb_address_prefixlen) + BUG(); - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, x->id.daddr.a6, sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; + if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, + x->props.family)) { + addr = (struct sadb_address*) skb_put(skb, + sizeof(struct sadb_address)+sockaddr_size); + addr->sadb_address_len = + (sizeof(struct sadb_address)+sockaddr_size)/ + sizeof(uint64_t); + addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY; + addr->sadb_address_proto = + pfkey_proto_from_xfrm(x->sel.proto); + addr->sadb_address_prefixlen = x->sel.prefixlen_s; + addr->sadb_address_reserved = 0; - if (memcmp (x->sel.saddr.a6, x->props.saddr.a6, - sizeof(struct in6_addr))) { - addr = (struct sadb_address *) skb_put(skb, - sizeof(struct sadb_address)+sockaddr_size); - addr->sadb_address_len = - (sizeof(struct sadb_address)+sockaddr_size)/ - sizeof(uint64_t); - addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY; - addr->sadb_address_proto = - pfkey_proto_from_xfrm(x->sel.proto); - addr->sadb_address_prefixlen = x->sel.prefixlen_s; - addr->sadb_address_reserved = 0; - - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = x->sel.sport; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, x->sel.saddr.a6, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } + pfkey_sockaddr_fill(&x->sel.saddr, x->sel.sport, + (struct sockaddr *) (addr + 1), + x->props.family); } -#endif - else - BUG(); /* auth key */ if (add_keys && auth_key_size) { @@ -1251,7 +1224,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, x->sel.prefixlen_s = addr->sadb_address_prefixlen; } - if (x->props.mode == XFRM_MODE_TRANSPORT) + if (!x->sel.family) x->sel.family = x->props.family; if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { @@ -1853,10 +1826,6 @@ static int parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) { struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr; - struct sockaddr_in *sin; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif int mode; if (xp->xfrm_nr >= XFRM_MAX_DEPTH) @@ -1881,31 +1850,19 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) /* addresses present only in tunnel mode */ if (t->mode == XFRM_MODE_TUNNEL) { - struct sockaddr *sa; - sa = (struct sockaddr *)(rq+1); - switch(sa->sa_family) { - case AF_INET: - sin = (struct sockaddr_in*)sa; - t->saddr.a4 = sin->sin_addr.s_addr; - sin++; - if (sin->sin_family != AF_INET) - return -EINVAL; - t->id.daddr.a4 = sin->sin_addr.s_addr; - break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - sin6 = (struct sockaddr_in6*)sa; - memcpy(t->saddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr)); - sin6++; - if (sin6->sin6_family != AF_INET6) - return -EINVAL; - memcpy(t->id.daddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr)); - break; -#endif - default: + u8 *sa = (u8 *) (rq + 1); + int family, socklen; + + family = pfkey_sockaddr_extract((struct sockaddr *)sa, + &t->saddr); + if (!family) return -EINVAL; - } - t->encap_family = sa->sa_family; + + socklen = pfkey_sockaddr_len(family); + if (pfkey_sockaddr_extract((struct sockaddr *)(sa + socklen), + &t->id.daddr) != family) + return -EINVAL; + t->encap_family = family; } else t->encap_family = xp->family; @@ -1952,9 +1909,7 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) for (i=0; i<xp->xfrm_nr; i++) { t = xp->xfrm_vec + i; - socklen += (t->encap_family == AF_INET ? - sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)); + socklen += pfkey_sockaddr_len(t->encap_family); } return sizeof(struct sadb_msg) + @@ -1987,18 +1942,12 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in struct sadb_address *addr; struct sadb_lifetime *lifetime; struct sadb_x_policy *pol; - struct sockaddr_in *sin; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif int i; int size; int sockaddr_size = pfkey_sockaddr_size(xp->family); - int socklen = (xp->family == AF_INET ? - sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)); + int socklen = pfkey_sockaddr_len(xp->family); size = pfkey_xfrm_policy2msg_size(xp); @@ -2016,26 +1965,10 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto); addr->sadb_address_prefixlen = xp->selector.prefixlen_s; addr->sadb_address_reserved = 0; - /* src address */ - if (xp->family == AF_INET) { - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = xp->selector.saddr.a4; - sin->sin_port = xp->selector.sport; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (xp->family == AF_INET6) { - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = xp->selector.sport; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, xp->selector.saddr.a6, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else + if (!pfkey_sockaddr_fill(&xp->selector.saddr, + xp->selector.sport, + (struct sockaddr *) (addr + 1), + xp->family)) BUG(); /* dst address */ @@ -2048,26 +1981,10 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto); addr->sadb_address_prefixlen = xp->selector.prefixlen_d; addr->sadb_address_reserved = 0; - if (xp->family == AF_INET) { - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = xp->selector.daddr.a4; - sin->sin_port = xp->selector.dport; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (xp->family == AF_INET6) { - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = xp->selector.dport; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, xp->selector.daddr.a6, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else - BUG(); + + pfkey_sockaddr_fill(&xp->selector.daddr, xp->selector.dport, + (struct sockaddr *) (addr + 1), + xp->family); /* hard time */ lifetime = (struct sadb_lifetime *) skb_put(skb, @@ -2121,12 +2038,13 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in int mode; req_size = sizeof(struct sadb_x_ipsecrequest); - if (t->mode == XFRM_MODE_TUNNEL) - req_size += ((t->encap_family == AF_INET ? - sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)) * 2); - else + if (t->mode == XFRM_MODE_TUNNEL) { + socklen = pfkey_sockaddr_len(t->encap_family); + req_size += socklen * 2; + } else { size -= 2*socklen; + socklen = 0; + } rq = (void*)skb_put(skb, req_size); pol->sadb_x_policy_len += req_size/8; memset(rq, 0, sizeof(*rq)); @@ -2141,42 +2059,15 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in if (t->optional) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; rq->sadb_x_ipsecrequest_reqid = t->reqid; + if (t->mode == XFRM_MODE_TUNNEL) { - switch (t->encap_family) { - case AF_INET: - sin = (void*)(rq+1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = t->saddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - sin++; - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = t->id.daddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - sin6 = (void*)(rq+1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, t->saddr.a6, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - - sin6++; - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, t->id.daddr.a6, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - break; -#endif - default: - break; - } + u8 *sa = (void *)(rq + 1); + pfkey_sockaddr_fill(&t->saddr, 0, + (struct sockaddr *)sa, + t->encap_family); + pfkey_sockaddr_fill(&t->id.daddr, 0, + (struct sockaddr *) (sa + socklen), + t->encap_family); } } @@ -2459,61 +2350,31 @@ out: #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_sockaddr_pair_size(sa_family_t family) { - switch (family) { - case AF_INET: - return PFKEY_ALIGN8(sizeof(struct sockaddr_in) * 2); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - return PFKEY_ALIGN8(sizeof(struct sockaddr_in6) * 2); -#endif - default: - return 0; - } - /* NOTREACHED */ + return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); } static int parse_sockaddr_pair(struct sadb_x_ipsecrequest *rq, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family) { - struct sockaddr *sa = (struct sockaddr *)(rq + 1); + u8 *sa = (u8 *) (rq + 1); + int af, socklen; + if (rq->sadb_x_ipsecrequest_len < - pfkey_sockaddr_pair_size(sa->sa_family)) + pfkey_sockaddr_pair_size(((struct sockaddr *)sa)->sa_family)) return -EINVAL; - switch (sa->sa_family) { - case AF_INET: - { - struct sockaddr_in *sin; - sin = (struct sockaddr_in *)sa; - if ((sin+1)->sin_family != AF_INET) - return -EINVAL; - memcpy(&saddr->a4, &sin->sin_addr, sizeof(saddr->a4)); - sin++; - memcpy(&daddr->a4, &sin->sin_addr, sizeof(daddr->a4)); - *family = AF_INET; - break; - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - { - struct sockaddr_in6 *sin6; - sin6 = (struct sockaddr_in6 *)sa; - if ((sin6+1)->sin6_family != AF_INET6) - return -EINVAL; - memcpy(&saddr->a6, &sin6->sin6_addr, - sizeof(saddr->a6)); - sin6++; - memcpy(&daddr->a6, &sin6->sin6_addr, - sizeof(daddr->a6)); - *family = AF_INET6; - break; - } -#endif - default: + af = pfkey_sockaddr_extract((struct sockaddr *) sa, + saddr); + if (!af) + return -EINVAL; + + socklen = pfkey_sockaddr_len(af); + if (pfkey_sockaddr_extract((struct sockaddr *) (sa + socklen), + daddr) != af) return -EINVAL; - } + *family = af; return 0; } @@ -3030,6 +2891,9 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) { + if (atomic_read(&pfkey_socks_nr) == 0) + return 0; + switch (c->event) { case XFRM_MSG_EXPIRE: return key_notify_sa_expire(x, c); @@ -3091,10 +2955,6 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct struct sadb_msg *hdr; struct sadb_address *addr; struct sadb_x_policy *pol; - struct sockaddr_in *sin; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif int sockaddr_size; int size; struct sadb_x_sec_ctx *sec_ctx; @@ -3143,29 +3003,11 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; - if (x->props.family == AF_INET) { - addr->sadb_address_prefixlen = 32; - - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = x->props.saddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (x->props.family == AF_INET6) { - addr->sadb_address_prefixlen = 128; - - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, - x->props.saddr.a6, sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else + addr->sadb_address_prefixlen = + pfkey_sockaddr_fill(&x->props.saddr, 0, + (struct sockaddr *) (addr + 1), + x->props.family); + if (!addr->sadb_address_prefixlen) BUG(); /* dst address */ @@ -3177,29 +3019,11 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; - if (x->props.family == AF_INET) { - addr->sadb_address_prefixlen = 32; - - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = x->id.daddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (x->props.family == AF_INET6) { - addr->sadb_address_prefixlen = 128; - - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, - x->id.daddr.a6, sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else + addr->sadb_address_prefixlen = + pfkey_sockaddr_fill(&x->id.daddr, 0, + (struct sockaddr *) (addr + 1), + x->props.family); + if (!addr->sadb_address_prefixlen) BUG(); pol = (struct sadb_x_policy *) skb_put(skb, sizeof(struct sadb_x_policy)); @@ -3325,10 +3149,6 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, struct sadb_sa *sa; struct sadb_address *addr; struct sadb_x_nat_t_port *n_port; - struct sockaddr_in *sin; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif int sockaddr_size; int size; __u8 satype = (x->id.proto == IPPROTO_ESP ? SADB_SATYPE_ESP : 0); @@ -3392,29 +3212,11 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; - if (x->props.family == AF_INET) { - addr->sadb_address_prefixlen = 32; - - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = x->props.saddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (x->props.family == AF_INET6) { - addr->sadb_address_prefixlen = 128; - - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, - x->props.saddr.a6, sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else + addr->sadb_address_prefixlen = + pfkey_sockaddr_fill(&x->props.saddr, 0, + (struct sockaddr *) (addr + 1), + x->props.family); + if (!addr->sadb_address_prefixlen) BUG(); /* NAT_T_SPORT (old port) */ @@ -3433,28 +3235,11 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; - if (x->props.family == AF_INET) { - addr->sadb_address_prefixlen = 32; - - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = ipaddr->a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (x->props.family == AF_INET6) { - addr->sadb_address_prefixlen = 128; - - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, &ipaddr->a6, sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else + addr->sadb_address_prefixlen = + pfkey_sockaddr_fill(ipaddr, 0, + (struct sockaddr *) (addr + 1), + x->props.family); + if (!addr->sadb_address_prefixlen) BUG(); /* NAT_T_DPORT (new port) */ @@ -3472,10 +3257,6 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, struct xfrm_selector *sel) { struct sadb_address *addr; - struct sockaddr_in *sin; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize); addr->sadb_address_len = (sizeof(struct sadb_address) + sasize)/8; addr->sadb_address_exttype = type; @@ -3484,50 +3265,16 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, switch (type) { case SADB_EXT_ADDRESS_SRC: - if (sel->family == AF_INET) { - addr->sadb_address_prefixlen = sel->prefixlen_s; - sin = (struct sockaddr_in *)(addr + 1); - sin->sin_family = AF_INET; - memcpy(&sin->sin_addr.s_addr, &sel->saddr, - sizeof(sin->sin_addr.s_addr)); - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (sel->family == AF_INET6) { - addr->sadb_address_prefixlen = sel->prefixlen_s; - sin6 = (struct sockaddr_in6 *)(addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - memcpy(&sin6->sin6_addr.s6_addr, &sel->saddr, - sizeof(sin6->sin6_addr.s6_addr)); - } -#endif + addr->sadb_address_prefixlen = sel->prefixlen_s; + pfkey_sockaddr_fill(&sel->saddr, 0, + (struct sockaddr *)(addr + 1), + sel->family); break; case SADB_EXT_ADDRESS_DST: - if (sel->family == AF_INET) { - addr->sadb_address_prefixlen = sel->prefixlen_d; - sin = (struct sockaddr_in *)(addr + 1); - sin->sin_family = AF_INET; - memcpy(&sin->sin_addr.s_addr, &sel->daddr, - sizeof(sin->sin_addr.s_addr)); - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (sel->family == AF_INET6) { - addr->sadb_address_prefixlen = sel->prefixlen_d; - sin6 = (struct sockaddr_in6 *)(addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - memcpy(&sin6->sin6_addr.s6_addr, &sel->daddr, - sizeof(sin6->sin6_addr.s6_addr)); - } -#endif + addr->sadb_address_prefixlen = sel->prefixlen_d; + pfkey_sockaddr_fill(&sel->daddr, 0, + (struct sockaddr *)(addr + 1), + sel->family); break; default: return -EINVAL; @@ -3542,10 +3289,8 @@ static int set_ipsecrequest(struct sk_buff *skb, xfrm_address_t *src, xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; - struct sockaddr_in *sin; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif + u8 *sa; + int socklen = pfkey_sockaddr_len(family); int size_req; size_req = sizeof(struct sadb_x_ipsecrequest) + @@ -3559,38 +3304,10 @@ static int set_ipsecrequest(struct sk_buff *skb, rq->sadb_x_ipsecrequest_level = level; rq->sadb_x_ipsecrequest_reqid = reqid; - switch (family) { - case AF_INET: - sin = (struct sockaddr_in *)(rq + 1); - sin->sin_family = AF_INET; - memcpy(&sin->sin_addr.s_addr, src, - sizeof(sin->sin_addr.s_addr)); - sin++; - sin->sin_family = AF_INET; - memcpy(&sin->sin_addr.s_addr, dst, - sizeof(sin->sin_addr.s_addr)); - break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - sin6 = (struct sockaddr_in6 *)(rq + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - memcpy(&sin6->sin6_addr.s6_addr, src, - sizeof(sin6->sin6_addr.s6_addr)); - sin6++; - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - memcpy(&sin6->sin6_addr.s6_addr, dst, - sizeof(sin6->sin6_addr.s6_addr)); - break; -#endif - default: + sa = (u8 *) (rq + 1); + if (!pfkey_sockaddr_fill(src, 0, (struct sockaddr *)sa, family) || + !pfkey_sockaddr_fill(dst, 0, (struct sockaddr *)(sa + socklen), family)) return -EINVAL; - } return 0; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 97101dcde4c0..5bcc452a247f 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -128,10 +128,8 @@ static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock) static void llc_ui_sk_init(struct socket *sock, struct sock *sk) { + sock_graft(sk, sock); sk->sk_type = sock->type; - sk->sk_sleep = &sock->wait; - sk->sk_socket = sock; - sock->sk = sk; sock->ops = &llc_ui_ops; } diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 1c45f172991e..57ad974e4d94 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -150,7 +150,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, int (*rcv)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto drop; /* diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index e2ddde755019..008de1fc42ca 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -286,12 +286,14 @@ void llc_build_and_send_xid_pkt(struct llc_sap *sap, struct sk_buff *skb, * * Sends received pdus to the sap state machine. */ -static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb) +static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, + struct sock *sk) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; + skb->sk = sk; llc_sap_state_process(sap, skb); } @@ -360,8 +362,7 @@ static void llc_sap_mcast(struct llc_sap *sap, break; sock_hold(sk); - skb_set_owner_r(skb1, sk); - llc_sap_rcv(sap, skb1); + llc_sap_rcv(sap, skb1, sk); sock_put(sk); } read_unlock_bh(&sap->sk_list.lock); @@ -381,8 +382,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb) } else { struct sock *sk = llc_lookup_dgram(sap, &laddr); if (sk) { - skb_set_owner_r(skb, sk); - llc_sap_rcv(sap, skb); + llc_sap_rcv(sap, skb, sk); sock_put(sk); } else kfree_skb(skb); diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index a24b459dd45a..80d693392b0f 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -7,7 +7,6 @@ config MAC80211 select CRC32 select WIRELESS_EXT select CFG80211 - select NET_SCH_FIFO ---help--- This option enables the hardware independent IEEE 802.11 networking stack. @@ -15,6 +14,14 @@ config MAC80211 menu "Rate control algorithm selection" depends on MAC80211 != n +config MAC80211_RC_PID + bool "PID controller based rate control algorithm" if EMBEDDED + default y + ---help--- + This option enables a TX rate control algorithm for + mac80211 that uses a PID controller to select the TX + rate. + choice prompt "Default rate control algorithm" default MAC80211_RC_DEFAULT_PID @@ -26,40 +33,19 @@ choice config MAC80211_RC_DEFAULT_PID bool "PID controller based rate control algorithm" - select MAC80211_RC_PID + depends on MAC80211_RC_PID ---help--- Select the PID controller based rate control as the default rate control algorithm. You should choose this unless you know what you are doing. -config MAC80211_RC_DEFAULT_NONE - bool "No default algorithm" - depends on EMBEDDED - help - Selecting this option will select no default algorithm - and allow you to not build any. Do not choose this - option unless you know your driver comes with another - suitable algorithm. endchoice -comment "Selecting 'y' for an algorithm will" -comment "build the algorithm into mac80211." - config MAC80211_RC_DEFAULT string default "pid" if MAC80211_RC_DEFAULT_PID default "" -config MAC80211_RC_PID - tristate "PID controller based rate control algorithm" - ---help--- - This option enables a TX rate control algorithm for - mac80211 that uses a PID controller to select the TX - rate. - - Say Y or M unless you're sure you want to use a - different rate control algorithm. - endmenu config MAC80211_MESH @@ -89,10 +75,16 @@ config MAC80211_DEBUGFS Say N unless you know you need this. +menuconfig MAC80211_DEBUG_MENU + bool "Select mac80211 debugging features" + depends on MAC80211 + ---help--- + This option collects various mac80211 debug settings. + config MAC80211_DEBUG_PACKET_ALIGNMENT bool "Enable packet alignment debugging" - depends on MAC80211 - help + depends on MAC80211_DEBUG_MENU + ---help--- This option is recommended for driver authors and strongly discouraged for everybody else, it will trigger a warning when a driver hands mac80211 a buffer that is aligned in @@ -101,33 +93,95 @@ config MAC80211_DEBUG_PACKET_ALIGNMENT Say N unless you're writing a mac80211 based driver. -config MAC80211_DEBUG - bool "Enable debugging output" - depends on MAC80211 +config MAC80211_NOINLINE + bool "Do not inline TX/RX handlers" + depends on MAC80211_DEBUG_MENU ---help--- - This option will enable debug tracing output for the - ieee80211 network stack. + This option affects code generation in mac80211, when + selected some functions are marked "noinline" to allow + easier debugging of problems in the transmit and receive + paths. + + This option increases code size a bit and inserts a lot + of function calls in the code, but is otherwise safe to + enable. - If you are not trying to debug or develop the ieee80211 - subsystem, you most likely want to say N here. + If unsure, say N unless you expect to be finding problems + in mac80211. + +config MAC80211_VERBOSE_DEBUG + bool "Verbose debugging output" + depends on MAC80211_DEBUG_MENU + ---help--- + Selecting this option causes mac80211 to print out + many debugging messages. It should not be selected + on production systems as some of the messages are + remotely triggerable. + + Do not select this option. config MAC80211_HT_DEBUG - bool "Enable HT debugging output" - depends on MAC80211_DEBUG + bool "Verbose HT debugging" + depends on MAC80211_DEBUG_MENU ---help--- This option enables 802.11n High Throughput features debug tracing output. - If you are not trying to debug of develop the ieee80211 - subsystem, you most likely want to say N here. + It should not be selected on production systems as some + of the messages are remotely triggerable. -config MAC80211_VERBOSE_DEBUG - bool "Verbose debugging output" - depends on MAC80211_DEBUG + Do not select this option. + +config MAC80211_TKIP_DEBUG + bool "Verbose TKIP debugging" + depends on MAC80211_DEBUG_MENU + ---help--- + Selecting this option causes mac80211 to print out + very verbose TKIP debugging messages. It should not + be selected on production systems as those messages + are remotely triggerable. + + Do not select this option. + +config MAC80211_IBSS_DEBUG + bool "Verbose IBSS debugging" + depends on MAC80211_DEBUG_MENU + ---help--- + Selecting this option causes mac80211 to print out + very verbose IBSS debugging messages. It should not + be selected on production systems as those messages + are remotely triggerable. + + Do not select this option. + +config MAC80211_VERBOSE_PS_DEBUG + bool "Verbose powersave mode debugging" + depends on MAC80211_DEBUG_MENU + ---help--- + Selecting this option causes mac80211 to print out very + verbose power save mode debugging messages (when mac80211 + is an AP and has power saving stations.) + It should not be selected on production systems as those + messages are remotely triggerable. + + Do not select this option. + +config MAC80211_VERBOSE_MPL_DEBUG + bool "Verbose mesh peer link debugging" + depends on MAC80211_DEBUG_MENU + depends on MAC80211_MESH + ---help--- + Selecting this option causes mac80211 to print out very + verbose mesh peer link debugging messages (when mac80211 + is taking part in a mesh network). + It should not be selected on production systems as those + messages are remotely triggerable. + + Do not select this option. config MAC80211_LOWTX_FRAME_DUMP bool "Debug frame dumping" - depends on MAC80211_DEBUG + depends on MAC80211_DEBUG_MENU ---help--- Selecting this option will cause the stack to print a message for each frame that is handed @@ -138,30 +192,20 @@ config MAC80211_LOWTX_FRAME_DUMP If unsure, say N and insert the debugging code you require into the driver you are debugging. -config TKIP_DEBUG - bool "TKIP debugging" - depends on MAC80211_DEBUG - config MAC80211_DEBUG_COUNTERS bool "Extra statistics for TX/RX debugging" - depends on MAC80211_DEBUG - -config MAC80211_IBSS_DEBUG - bool "Support for IBSS testing" - depends on MAC80211_DEBUG + depends on MAC80211_DEBUG_MENU + depends on MAC80211_DEBUGFS ---help--- - Say Y here if you intend to debug the IBSS code. + Selecting this option causes mac80211 to keep additional + and very verbose statistics about TX and RX handler use + and show them in debugfs. -config MAC80211_VERBOSE_PS_DEBUG - bool "Verbose powersave mode debugging" - depends on MAC80211_DEBUG - ---help--- - Say Y here to print out verbose powersave - mode debug messages. + If unsure, say N. -config MAC80211_VERBOSE_MPL_DEBUG - bool "Verbose mesh peer link debugging" - depends on MAC80211_DEBUG && MAC80211_MESH +config MAC80211_VERBOSE_SPECT_MGMT_DEBUG + bool "Verbose Spectrum Management (IEEE 802.11h)debugging" + depends on MAC80211_DEBUG_MENU ---help--- - Say Y here to print out verbose mesh peer link + Say Y here to print out verbose Spectrum Management (IEEE 802.11h) debug messages. diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 4e5847fd316c..a169b0201d61 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -1,13 +1,5 @@ obj-$(CONFIG_MAC80211) += mac80211.o -# objects for PID algorithm -rc80211_pid-y := rc80211_pid_algo.o -rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o - -# build helper for PID algorithm -rc-pid-y := $(rc80211_pid-y) -rc-pid-m := rc80211_pid.o - # mac80211 objects mac80211-y := \ main.o \ @@ -26,10 +18,10 @@ mac80211-y := \ tx.o \ key.o \ util.o \ + wme.o \ event.o mac80211-$(CONFIG_MAC80211_LEDS) += led.o -mac80211-$(CONFIG_NET_SCHED) += wme.o mac80211-$(CONFIG_MAC80211_DEBUGFS) += \ debugfs.o \ debugfs_sta.o \ @@ -42,10 +34,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \ mesh_plink.o \ mesh_hwmp.o +# objects for PID algorithm +rc80211_pid-y := rc80211_pid_algo.o +rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o -# Build rate control algorithm(s) -CFLAGS_rc80211_pid_algo.o += -DRC80211_PID_COMPILE -mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc-pid-$(CONFIG_MAC80211_RC_PID)) - -# Modular rate algorithms are assigned to mac80211-m - make separate modules -obj-m += $(mac80211-m) +mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y) diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 59f1691f62c8..a87cb3ba2df6 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -16,31 +16,28 @@ #include "key.h" #include "aes_ccm.h" - -static void ieee80211_aes_encrypt(struct crypto_cipher *tfm, - const u8 pt[16], u8 ct[16]) -{ - crypto_cipher_encrypt_one(tfm, ct, pt); -} - - -static inline void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *b_0, u8 *aad, - u8 *b, u8 *s_0, u8 *a) +static void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *scratch, u8 *a) { int i; + u8 *b_0, *aad, *b, *s_0; - ieee80211_aes_encrypt(tfm, b_0, b); + b_0 = scratch + 3 * AES_BLOCK_LEN; + aad = scratch + 4 * AES_BLOCK_LEN; + b = scratch; + s_0 = scratch + AES_BLOCK_LEN; + + crypto_cipher_encrypt_one(tfm, b, b_0); /* Extra Authenticate-only data (always two AES blocks) */ for (i = 0; i < AES_BLOCK_LEN; i++) aad[i] ^= b[i]; - ieee80211_aes_encrypt(tfm, aad, b); + crypto_cipher_encrypt_one(tfm, b, aad); aad += AES_BLOCK_LEN; for (i = 0; i < AES_BLOCK_LEN; i++) aad[i] ^= b[i]; - ieee80211_aes_encrypt(tfm, aad, a); + crypto_cipher_encrypt_one(tfm, a, aad); /* Mask out bits from auth-only-b_0 */ b_0[0] &= 0x07; @@ -48,24 +45,26 @@ static inline void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *b_0, u8 *aad, /* S_0 is used to encrypt T (= MIC) */ b_0[14] = 0; b_0[15] = 0; - ieee80211_aes_encrypt(tfm, b_0, s_0); + crypto_cipher_encrypt_one(tfm, s_0, b_0); } void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, - u8 *b_0, u8 *aad, u8 *data, size_t data_len, + u8 *data, size_t data_len, u8 *cdata, u8 *mic) { int i, j, last_len, num_blocks; - u8 *pos, *cpos, *b, *s_0, *e; + u8 *pos, *cpos, *b, *s_0, *e, *b_0, *aad; b = scratch; s_0 = scratch + AES_BLOCK_LEN; e = scratch + 2 * AES_BLOCK_LEN; + b_0 = scratch + 3 * AES_BLOCK_LEN; + aad = scratch + 4 * AES_BLOCK_LEN; num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last_len = data_len % AES_BLOCK_LEN; - aes_ccm_prepare(tfm, b_0, aad, b, s_0, b); + aes_ccm_prepare(tfm, scratch, b); /* Process payload blocks */ pos = data; @@ -77,11 +76,11 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, /* Authentication followed by encryption */ for (i = 0; i < blen; i++) b[i] ^= pos[i]; - ieee80211_aes_encrypt(tfm, b, b); + crypto_cipher_encrypt_one(tfm, b, b); b_0[14] = (j >> 8) & 0xff; b_0[15] = j & 0xff; - ieee80211_aes_encrypt(tfm, b_0, e); + crypto_cipher_encrypt_one(tfm, e, b_0); for (i = 0; i < blen; i++) *cpos++ = *pos++ ^ e[i]; } @@ -92,19 +91,20 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, - u8 *b_0, u8 *aad, u8 *cdata, size_t data_len, - u8 *mic, u8 *data) + u8 *cdata, size_t data_len, u8 *mic, u8 *data) { int i, j, last_len, num_blocks; - u8 *pos, *cpos, *b, *s_0, *a; + u8 *pos, *cpos, *b, *s_0, *a, *b_0, *aad; b = scratch; s_0 = scratch + AES_BLOCK_LEN; a = scratch + 2 * AES_BLOCK_LEN; + b_0 = scratch + 3 * AES_BLOCK_LEN; + aad = scratch + 4 * AES_BLOCK_LEN; num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last_len = data_len % AES_BLOCK_LEN; - aes_ccm_prepare(tfm, b_0, aad, b, s_0, a); + aes_ccm_prepare(tfm, scratch, a); /* Process payload blocks */ cpos = cdata; @@ -116,13 +116,12 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, /* Decryption followed by authentication */ b_0[14] = (j >> 8) & 0xff; b_0[15] = j & 0xff; - ieee80211_aes_encrypt(tfm, b_0, b); + crypto_cipher_encrypt_one(tfm, b, b_0); for (i = 0; i < blen; i++) { *pos = *cpos++ ^ b[i]; a[i] ^= *pos++; } - - ieee80211_aes_encrypt(tfm, a, a); + crypto_cipher_encrypt_one(tfm, a, a); } for (i = 0; i < CCMP_MIC_LEN; i++) { @@ -134,7 +133,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, } -struct crypto_cipher * ieee80211_aes_key_setup_encrypt(const u8 key[]) +struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]) { struct crypto_cipher *tfm; diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h index 885f19030b29..6e7820ef3448 100644 --- a/net/mac80211/aes_ccm.h +++ b/net/mac80211/aes_ccm.h @@ -14,12 +14,12 @@ #define AES_BLOCK_LEN 16 -struct crypto_cipher * ieee80211_aes_key_setup_encrypt(const u8 key[]); +struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]); void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, - u8 *b_0, u8 *aad, u8 *data, size_t data_len, + u8 *data, size_t data_len, u8 *cdata, u8 *mic); int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, - u8 *b_0, u8 *aad, u8 *cdata, size_t data_len, + u8 *cdata, size_t data_len, u8 *mic, u8 *data); void ieee80211_aes_key_free(struct crypto_cipher *tfm); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 699d97b8de5e..8e7ba0e62cf5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -50,14 +50,11 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name, struct ieee80211_sub_if_data *sdata; int err; - if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) - return -ENODEV; - itype = nl80211_type_to_mac80211_type(type); if (itype == IEEE80211_IF_TYPE_INVALID) return -EINVAL; - err = ieee80211_if_add(local->mdev, name, &dev, itype, params); + err = ieee80211_if_add(local, name, &dev, itype, params); if (err || itype != IEEE80211_IF_TYPE_MNTR || !flags) return err; @@ -68,54 +65,41 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name, static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct net_device *dev; - char *name; - - if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) - return -ENODEV; /* we're under RTNL */ dev = __dev_get_by_index(&init_net, ifindex); if (!dev) - return 0; + return -ENODEV; - name = dev->name; + ieee80211_if_remove(dev); - return ieee80211_if_remove(local->mdev, name, -1); + return 0; } static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, enum nl80211_iftype type, u32 *flags, struct vif_params *params) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct net_device *dev; enum ieee80211_if_types itype; struct ieee80211_sub_if_data *sdata; - - if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) - return -ENODEV; + int ret; /* we're under RTNL */ dev = __dev_get_by_index(&init_net, ifindex); if (!dev) return -ENODEV; - if (netif_running(dev)) - return -EBUSY; - itype = nl80211_type_to_mac80211_type(type); if (itype == IEEE80211_IF_TYPE_INVALID) return -EINVAL; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) - return -EOPNOTSUPP; - - ieee80211_if_reinit(dev); - ieee80211_if_set_type(dev, itype); + ret = ieee80211_if_change_type(sdata, itype); + if (ret) + return ret; if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len) ieee80211_if_sta_set_mesh_id(&sdata->u.sta, @@ -256,8 +240,8 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, case ALG_TKIP: params.cipher = WLAN_CIPHER_SUITE_TKIP; - iv32 = key->u.tkip.iv32; - iv16 = key->u.tkip.iv16; + iv32 = key->u.tkip.tx.iv32; + iv16 = key->u.tkip.tx.iv16; if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && sdata->local->ops->get_tkip_seq) @@ -485,7 +469,7 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, kfree(old); - return ieee80211_if_config_beacon(sdata->dev); + return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); } static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, @@ -539,7 +523,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) synchronize_rcu(); kfree(old); - return ieee80211_if_config_beacon(dev); + return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); } /* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */ @@ -602,6 +586,7 @@ static void sta_apply_parameters(struct ieee80211_local *local, */ if (params->station_flags & STATION_FLAG_CHANGED) { + spin_lock_bh(&sta->lock); sta->flags &= ~WLAN_STA_AUTHORIZED; if (params->station_flags & STATION_FLAG_AUTHORIZED) sta->flags |= WLAN_STA_AUTHORIZED; @@ -613,6 +598,7 @@ static void sta_apply_parameters(struct ieee80211_local *local, sta->flags &= ~WLAN_STA_WME; if (params->station_flags & STATION_FLAG_WME) sta->flags |= WLAN_STA_WME; + spin_unlock_bh(&sta->lock); } /* @@ -672,7 +658,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN || + if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN && sdata->vif.type != IEEE80211_IF_TYPE_AP) return -EINVAL; } else @@ -760,7 +746,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN || + if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN && vlansdata->vif.type != IEEE80211_IF_TYPE_AP) { rcu_read_unlock(); return -EINVAL; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 1cccbfd781f6..ee509f1109e2 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -70,16 +70,6 @@ DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", /* statistics stuff */ -static inline int rtnl_lock_local(struct ieee80211_local *local) -{ - rtnl_lock(); - if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) { - rtnl_unlock(); - return -ENODEV; - } - return 0; -} - #define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \ DEBUGFS_READONLY_FILE(stats_ ##name, buflen, fmt, ##value) @@ -96,10 +86,7 @@ static ssize_t format_devstat_counter(struct ieee80211_local *local, if (!local->ops->get_stats) return -EOPNOTSUPP; - res = rtnl_lock_local(local); - if (res) - return res; - + rtnl_lock(); res = local->ops->get_stats(local_to_hw(local), &stats); rtnl_unlock(); if (!res) @@ -197,45 +184,6 @@ DEBUGFS_STATS_FILE(rx_handlers_fragments, 20, "%u", DEBUGFS_STATS_FILE(tx_status_drop, 20, "%u", local->tx_status_drop); -static ssize_t stats_wme_rx_queue_read(struct file *file, - char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - char buf[NUM_RX_DATA_QUEUES*15], *p = buf; - int i; - - for (i = 0; i < NUM_RX_DATA_QUEUES; i++) - p += scnprintf(p, sizeof(buf)+buf-p, - "%u\n", local->wme_rx_queue[i]); - - return simple_read_from_buffer(userbuf, count, ppos, buf, p-buf); -} - -static const struct file_operations stats_wme_rx_queue_ops = { - .read = stats_wme_rx_queue_read, - .open = mac80211_open_file_generic, -}; - -static ssize_t stats_wme_tx_queue_read(struct file *file, - char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - char buf[NUM_TX_DATA_QUEUES*15], *p = buf; - int i; - - for (i = 0; i < NUM_TX_DATA_QUEUES; i++) - p += scnprintf(p, sizeof(buf)+buf-p, - "%u\n", local->wme_tx_queue[i]); - - return simple_read_from_buffer(userbuf, count, ppos, buf, p-buf); -} - -static const struct file_operations stats_wme_tx_queue_ops = { - .read = stats_wme_tx_queue_read, - .open = mac80211_open_file_generic, -}; #endif DEBUGFS_DEVSTATS_FILE(dot11ACKFailureCount); @@ -303,8 +251,6 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_STATS_ADD(rx_expand_skb_head2); DEBUGFS_STATS_ADD(rx_handlers_fragments); DEBUGFS_STATS_ADD(tx_status_drop); - DEBUGFS_STATS_ADD(wme_tx_queue); - DEBUGFS_STATS_ADD(wme_rx_queue); #endif DEBUGFS_STATS_ADD(dot11ACKFailureCount); DEBUGFS_STATS_ADD(dot11RTSFailureCount); @@ -356,8 +302,6 @@ void debugfs_hw_del(struct ieee80211_local *local) DEBUGFS_STATS_DEL(rx_expand_skb_head2); DEBUGFS_STATS_DEL(rx_handlers_fragments); DEBUGFS_STATS_DEL(tx_status_drop); - DEBUGFS_STATS_DEL(wme_tx_queue); - DEBUGFS_STATS_DEL(wme_rx_queue); #endif DEBUGFS_STATS_DEL(dot11ACKFailureCount); DEBUGFS_STATS_DEL(dot11RTSFailureCount); diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 879e7210458a..7439b63df5d0 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -97,8 +97,8 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, break; case ALG_TKIP: len = scnprintf(buf, sizeof(buf), "%08x %04x\n", - key->u.tkip.iv32, - key->u.tkip.iv16); + key->u.tkip.tx.iv32, + key->u.tkip.tx.iv16); break; case ALG_CCMP: tpn = key->u.ccmp.tx_pn; @@ -128,8 +128,8 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, for (i = 0; i < NUM_RX_DATA_QUEUES; i++) p += scnprintf(p, sizeof(buf)+buf-p, "%08x %04x\n", - key->u.tkip.iv32_rx[i], - key->u.tkip.iv16_rx[i]); + key->u.tkip.rx[i].iv32, + key->u.tkip.rx[i].iv16); len = p - buf; break; case ALG_CCMP: @@ -255,14 +255,23 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key) void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata) { char buf[50]; + struct ieee80211_key *key; if (!sdata->debugfsdir) return; - sprintf(buf, "../keys/%d", sdata->default_key->debugfs.cnt); - sdata->debugfs.default_key = - debugfs_create_symlink("default_key", sdata->debugfsdir, buf); + /* this is running under the key lock */ + + key = sdata->default_key; + if (key) { + sprintf(buf, "../keys/%d", key->debugfs.cnt); + sdata->debugfs.default_key = + debugfs_create_symlink("default_key", + sdata->debugfsdir, buf); + } else + ieee80211_debugfs_key_remove_default(sdata); } + void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata) { if (!sdata) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index e3326d046944..475f89a8aee1 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -155,8 +155,9 @@ static const struct file_operations name##_ops = { \ __IEEE80211_IF_WFILE(name) /* common attributes */ -IEEE80211_IF_FILE(channel_use, channel_use, DEC); IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC); +IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC); +IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC); /* STA/IBSS attributes */ IEEE80211_IF_FILE(state, u.sta.state, DEC); @@ -192,8 +193,6 @@ __IEEE80211_IF_FILE(flags); IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); IEEE80211_IF_FILE(num_beacons, u.ap.num_beacons, DEC); -IEEE80211_IF_FILE(force_unicast_rateidx, u.ap.force_unicast_rateidx, DEC); -IEEE80211_IF_FILE(max_ratectrl_rateidx, u.ap.max_ratectrl_rateidx, DEC); static ssize_t ieee80211_if_fmt_num_buffered_multicast( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) @@ -248,8 +247,10 @@ IEEE80211_IF_WFILE(min_discovery_timeout, static void add_sta_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_ADD(channel_use, sta); DEBUGFS_ADD(drop_unencrypted, sta); + DEBUGFS_ADD(force_unicast_rateidx, ap); + DEBUGFS_ADD(max_ratectrl_rateidx, ap); + DEBUGFS_ADD(state, sta); DEBUGFS_ADD(bssid, sta); DEBUGFS_ADD(prev_bssid, sta); @@ -269,27 +270,30 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) static void add_ap_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_ADD(channel_use, ap); DEBUGFS_ADD(drop_unencrypted, ap); + DEBUGFS_ADD(force_unicast_rateidx, ap); + DEBUGFS_ADD(max_ratectrl_rateidx, ap); + DEBUGFS_ADD(num_sta_ps, ap); DEBUGFS_ADD(dtim_count, ap); DEBUGFS_ADD(num_beacons, ap); - DEBUGFS_ADD(force_unicast_rateidx, ap); - DEBUGFS_ADD(max_ratectrl_rateidx, ap); DEBUGFS_ADD(num_buffered_multicast, ap); } static void add_wds_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_ADD(channel_use, wds); DEBUGFS_ADD(drop_unencrypted, wds); + DEBUGFS_ADD(force_unicast_rateidx, ap); + DEBUGFS_ADD(max_ratectrl_rateidx, ap); + DEBUGFS_ADD(peer, wds); } static void add_vlan_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_ADD(channel_use, vlan); DEBUGFS_ADD(drop_unencrypted, vlan); + DEBUGFS_ADD(force_unicast_rateidx, ap); + DEBUGFS_ADD(max_ratectrl_rateidx, ap); } static void add_monitor_files(struct ieee80211_sub_if_data *sdata) @@ -376,8 +380,10 @@ static void add_files(struct ieee80211_sub_if_data *sdata) static void del_sta_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_DEL(channel_use, sta); DEBUGFS_DEL(drop_unencrypted, sta); + DEBUGFS_DEL(force_unicast_rateidx, ap); + DEBUGFS_DEL(max_ratectrl_rateidx, ap); + DEBUGFS_DEL(state, sta); DEBUGFS_DEL(bssid, sta); DEBUGFS_DEL(prev_bssid, sta); @@ -397,27 +403,30 @@ static void del_sta_files(struct ieee80211_sub_if_data *sdata) static void del_ap_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_DEL(channel_use, ap); DEBUGFS_DEL(drop_unencrypted, ap); + DEBUGFS_DEL(force_unicast_rateidx, ap); + DEBUGFS_DEL(max_ratectrl_rateidx, ap); + DEBUGFS_DEL(num_sta_ps, ap); DEBUGFS_DEL(dtim_count, ap); DEBUGFS_DEL(num_beacons, ap); - DEBUGFS_DEL(force_unicast_rateidx, ap); - DEBUGFS_DEL(max_ratectrl_rateidx, ap); DEBUGFS_DEL(num_buffered_multicast, ap); } static void del_wds_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_DEL(channel_use, wds); DEBUGFS_DEL(drop_unencrypted, wds); + DEBUGFS_DEL(force_unicast_rateidx, ap); + DEBUGFS_DEL(max_ratectrl_rateidx, ap); + DEBUGFS_DEL(peer, wds); } static void del_vlan_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_DEL(channel_use, vlan); DEBUGFS_DEL(drop_unencrypted, vlan); + DEBUGFS_DEL(force_unicast_rateidx, ap); + DEBUGFS_DEL(max_ratectrl_rateidx, ap); } static void del_monitor_files(struct ieee80211_sub_if_data *sdata) @@ -467,12 +476,12 @@ static void del_mesh_config(struct ieee80211_sub_if_data *sdata) } #endif -static void del_files(struct ieee80211_sub_if_data *sdata, int type) +static void del_files(struct ieee80211_sub_if_data *sdata) { if (!sdata->debugfsdir) return; - switch (type) { + switch (sdata->vif.type) { case IEEE80211_IF_TYPE_MESH_POINT: #ifdef CONFIG_MAC80211_MESH del_mesh_stats(sdata); @@ -512,29 +521,23 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) sprintf(buf, "netdev:%s", sdata->dev->name); sdata->debugfsdir = debugfs_create_dir(buf, sdata->local->hw.wiphy->debugfsdir); + add_files(sdata); } void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) { - del_files(sdata, sdata->vif.type); + del_files(sdata); debugfs_remove(sdata->debugfsdir); sdata->debugfsdir = NULL; } -void ieee80211_debugfs_change_if_type(struct ieee80211_sub_if_data *sdata, - int oldtype) -{ - del_files(sdata, oldtype); - add_files(sdata); -} - -static int netdev_notify(struct notifier_block * nb, +static int netdev_notify(struct notifier_block *nb, unsigned long state, void *ndev) { struct net_device *dev = ndev; struct dentry *dir; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata; char buf[10+IFNAMSIZ]; if (state != NETDEV_CHANGENAME) @@ -546,6 +549,8 @@ static int netdev_notify(struct notifier_block * nb, if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid) return 0; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + sprintf(buf, "netdev:%s", dev->name); dir = sdata->debugfsdir; if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf)) diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h index a690071fde8a..7af731f0b731 100644 --- a/net/mac80211/debugfs_netdev.h +++ b/net/mac80211/debugfs_netdev.h @@ -6,8 +6,6 @@ #ifdef CONFIG_MAC80211_DEBUGFS void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata); -void ieee80211_debugfs_change_if_type(struct ieee80211_sub_if_data *sdata, - int oldtype); void ieee80211_debugfs_netdev_init(void); void ieee80211_debugfs_netdev_exit(void); #else @@ -17,9 +15,6 @@ static inline void ieee80211_debugfs_add_netdev( static inline void ieee80211_debugfs_remove_netdev( struct ieee80211_sub_if_data *sdata) {} -static inline void ieee80211_debugfs_change_if_type( - struct ieee80211_sub_if_data *sdata, int oldtype) -{} static inline void ieee80211_debugfs_netdev_init(void) {} diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 6d47a1d31b37..79a062782d52 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -63,10 +63,9 @@ STA_FILE(tx_fragments, tx_fragments, LU); STA_FILE(tx_filtered, tx_filtered_count, LU); STA_FILE(tx_retry_failed, tx_retry_failed, LU); STA_FILE(tx_retry_count, tx_retry_count, LU); -STA_FILE(last_rssi, last_rssi, D); STA_FILE(last_signal, last_signal, D); +STA_FILE(last_qual, last_qual, D); STA_FILE(last_noise, last_noise, D); -STA_FILE(channel_use, channel_use, D); STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU); static ssize_t sta_flags_read(struct file *file, char __user *userbuf, @@ -74,14 +73,15 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf, { char buf[100]; struct sta_info *sta = file->private_data; + u32 staflags = get_sta_flags(sta); int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s", - sta->flags & WLAN_STA_AUTH ? "AUTH\n" : "", - sta->flags & WLAN_STA_ASSOC ? "ASSOC\n" : "", - sta->flags & WLAN_STA_PS ? "PS\n" : "", - sta->flags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "", - sta->flags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "", - sta->flags & WLAN_STA_WME ? "WME\n" : "", - sta->flags & WLAN_STA_WDS ? "WDS\n" : ""); + staflags & WLAN_STA_AUTH ? "AUTH\n" : "", + staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "", + staflags & WLAN_STA_PS ? "PS\n" : "", + staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "", + staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "", + staflags & WLAN_STA_WME ? "WME\n" : "", + staflags & WLAN_STA_WDS ? "WDS\n" : ""); return simple_read_from_buffer(userbuf, count, ppos, buf, res); } STA_OPS(flags); @@ -123,36 +123,6 @@ static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf, } STA_OPS(last_seq_ctrl); -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS -static ssize_t sta_wme_rx_queue_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - char buf[15*NUM_RX_DATA_QUEUES], *p = buf; - int i; - struct sta_info *sta = file->private_data; - for (i = 0; i < NUM_RX_DATA_QUEUES; i++) - p += scnprintf(p, sizeof(buf)+buf-p, "%u ", - sta->wme_rx_queue[i]); - p += scnprintf(p, sizeof(buf)+buf-p, "\n"); - return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); -} -STA_OPS(wme_rx_queue); - -static ssize_t sta_wme_tx_queue_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - char buf[15*NUM_TX_DATA_QUEUES], *p = buf; - int i; - struct sta_info *sta = file->private_data; - for (i = 0; i < NUM_TX_DATA_QUEUES; i++) - p += scnprintf(p, sizeof(buf)+buf-p, "%u ", - sta->wme_tx_queue[i]); - p += scnprintf(p, sizeof(buf)+buf-p, "\n"); - return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); -} -STA_OPS(wme_tx_queue); -#endif - static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -293,10 +263,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(num_ps_buf_frames); DEBUGFS_ADD(inactive_ms); DEBUGFS_ADD(last_seq_ctrl); -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - DEBUGFS_ADD(wme_rx_queue); - DEBUGFS_ADD(wme_tx_queue); -#endif DEBUGFS_ADD(agg_status); } @@ -306,10 +272,6 @@ void ieee80211_sta_debugfs_remove(struct sta_info *sta) DEBUGFS_DEL(num_ps_buf_frames); DEBUGFS_DEL(inactive_ms); DEBUGFS_DEL(last_seq_ctrl); -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - DEBUGFS_DEL(wme_rx_queue); - DEBUGFS_DEL(wme_tx_queue); -#endif DEBUGFS_DEL(agg_status); debugfs_remove(sta->debugfs.dir); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c7314bf4bec2..a4f9a832722a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2,6 +2,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -23,6 +24,8 @@ #include <linux/spinlock.h> #include <linux/etherdevice.h> #include <net/wireless.h> +#include <net/iw_handler.h> +#include <net/mac80211.h> #include "key.h" #include "sta_info.h" @@ -82,7 +85,7 @@ struct ieee80211_sta_bss { u16 capability; /* host byte order */ enum ieee80211_band band; int freq; - int rssi, signal, noise; + int signal, noise, qual; u8 *wpa_ie; size_t wpa_ie_len; u8 *rsn_ie; @@ -91,6 +94,8 @@ struct ieee80211_sta_bss { size_t wmm_ie_len; u8 *ht_ie; size_t ht_ie_len; + u8 *ht_add_ie; + size_t ht_add_ie_len; #ifdef CONFIG_MAC80211_MESH u8 *mesh_id; size_t mesh_id_len; @@ -147,7 +152,6 @@ typedef unsigned __bitwise__ ieee80211_tx_result; #define IEEE80211_TX_UNICAST BIT(1) #define IEEE80211_TX_PS_BUFFERED BIT(2) #define IEEE80211_TX_PROBE_LAST_FRAG BIT(3) -#define IEEE80211_TX_INJECTED BIT(4) struct ieee80211_tx_data { struct sk_buff *skb; @@ -157,13 +161,12 @@ struct ieee80211_tx_data { struct sta_info *sta; struct ieee80211_key *key; - struct ieee80211_tx_control *control; struct ieee80211_channel *channel; - struct ieee80211_rate *rate; + s8 rate_idx; /* use this rate (if set) for last fragment; rate can * be set to lower rate for the first fragments, e.g., * when using CTS protection with IEEE 802.11g. */ - struct ieee80211_rate *last_frag_rate; + s8 last_frag_rate_idx; /* Extra fragments (in addition to the first fragment * in skb) */ @@ -202,32 +205,16 @@ struct ieee80211_rx_data { unsigned int flags; int sent_ps_buffered; int queue; - int load; u32 tkip_iv32; u16 tkip_iv16; }; -/* flags used in struct ieee80211_tx_packet_data.flags */ -#define IEEE80211_TXPD_REQ_TX_STATUS BIT(0) -#define IEEE80211_TXPD_DO_NOT_ENCRYPT BIT(1) -#define IEEE80211_TXPD_REQUEUE BIT(2) -#define IEEE80211_TXPD_EAPOL_FRAME BIT(3) -#define IEEE80211_TXPD_AMPDU BIT(4) -/* Stored in sk_buff->cb */ -struct ieee80211_tx_packet_data { - int ifindex; - unsigned long jiffies; - unsigned int flags; - u8 queue; -}; - struct ieee80211_tx_stored_packet { - struct ieee80211_tx_control control; struct sk_buff *skb; struct sk_buff **extra_frag; - struct ieee80211_rate *last_frag_rate; + s8 last_frag_rate_idx; int num_extra_frag; - unsigned int last_frag_rate_ctrl_probe; + bool last_frag_rate_ctrl_probe; }; struct beacon_data { @@ -251,8 +238,6 @@ struct ieee80211_if_ap { struct sk_buff_head ps_bc_buf; atomic_t num_sta_ps; /* number of stations in PS mode */ int dtim_count; - int force_unicast_rateidx; /* forced TX rateidx for unicast frames */ - int max_ratectrl_rateidx; /* max TX rateidx for rate control */ int num_beacons; /* number of TXed beacon frames for this BSS */ }; @@ -262,7 +247,6 @@ struct ieee80211_if_wds { }; struct ieee80211_if_vlan { - struct ieee80211_sub_if_data *ap; struct list_head list; }; @@ -436,8 +420,6 @@ struct ieee80211_sub_if_data { */ u64 basic_rates; - u16 sequence; - /* Fragment table for host-based reassembly */ struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; unsigned int fragment_next; @@ -446,16 +428,18 @@ struct ieee80211_sub_if_data { struct ieee80211_key *keys[NUM_DEFAULT_KEYS]; struct ieee80211_key *default_key; + /* BSS configuration for this interface. */ + struct ieee80211_bss_conf bss_conf; + /* - * BSS configuration for this interface. - * - * FIXME: I feel bad putting this here when we already have a - * bss pointer, but the bss pointer is just wrong when - * you have multiple virtual STA mode interfaces... - * This needs to be fixed. + * AP this belongs to: self in AP mode and + * corresponding AP in VLAN mode, NULL for + * all others (might be needed later in IBSS) */ - struct ieee80211_bss_conf bss_conf; - struct ieee80211_if_ap *bss; /* BSS that this device belongs to */ + struct ieee80211_if_ap *bss; + + int force_unicast_rateidx; /* forced TX rateidx for unicast frames */ + int max_ratectrl_rateidx; /* max TX rateidx for rate control */ union { struct ieee80211_if_ap ap; @@ -464,14 +448,11 @@ struct ieee80211_sub_if_data { struct ieee80211_if_sta sta; u32 mntr_flags; } u; - int channel_use; - int channel_use_raw; #ifdef CONFIG_MAC80211_DEBUGFS struct dentry *debugfsdir; union { struct { - struct dentry *channel_use; struct dentry *drop_unencrypted; struct dentry *state; struct dentry *bssid; @@ -490,7 +471,6 @@ struct ieee80211_sub_if_data { struct dentry *num_beacons_sta; } sta; struct { - struct dentry *channel_use; struct dentry *drop_unencrypted; struct dentry *num_sta_ps; struct dentry *dtim_count; @@ -500,12 +480,10 @@ struct ieee80211_sub_if_data { struct dentry *num_buffered_multicast; } ap; struct { - struct dentry *channel_use; struct dentry *drop_unencrypted; struct dentry *peer; } wds; struct { - struct dentry *channel_use; struct dentry *drop_unencrypted; } vlan; struct { @@ -553,8 +531,6 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) return container_of(p, struct ieee80211_sub_if_data, vif); } -#define IEEE80211_DEV_TO_SUB_IF(dev) netdev_priv(dev) - enum { IEEE80211_RX_MSG = 1, IEEE80211_TX_STATUS_MSG = 2, @@ -562,6 +538,9 @@ enum { IEEE80211_ADDBA_MSG = 4, }; +/* maximum number of hardware queues we support. */ +#define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES) + struct ieee80211_local { /* embed the driver visible part. * don't cast (use the static inlines below), but we keep @@ -570,6 +549,8 @@ struct ieee80211_local { const struct ieee80211_ops *ops; + unsigned long queue_pool[BITS_TO_LONGS(QD_MAX_QUEUES)]; + struct net_device *mdev; /* wmaster# - "master" 802.11 device */ int open_count; int monitors, cooked_mntrs; @@ -581,12 +562,6 @@ struct ieee80211_local { bool tim_in_locked_section; /* see ieee80211_beacon_get() */ int tx_headroom; /* required headroom for hardware/radiotap */ - enum { - IEEE80211_DEV_UNINITIALIZED = 0, - IEEE80211_DEV_REGISTERED, - IEEE80211_DEV_UNREGISTERED, - } reg_state; - /* Tasklet and skb queue to process calls from IRQ mode. All frames * added to skb_queue will be processed, but frames in * skb_queue_unreliable may be dropped if the total length of these @@ -610,8 +585,8 @@ struct ieee80211_local { struct sta_info *sta_hash[STA_HASH_SIZE]; struct timer_list sta_cleanup; - unsigned long state[NUM_TX_DATA_QUEUES_AMPDU]; - struct ieee80211_tx_stored_packet pending_packet[NUM_TX_DATA_QUEUES_AMPDU]; + unsigned long queues_pending[BITS_TO_LONGS(IEEE80211_MAX_QUEUES)]; + struct ieee80211_tx_stored_packet pending_packet[IEEE80211_MAX_QUEUES]; struct tasklet_struct tx_pending_tasklet; /* number of interfaces with corresponding IFF_ flags */ @@ -677,9 +652,6 @@ struct ieee80211_local { assoc_led_name[32], radio_led_name[32]; #endif - u32 channel_use; - u32 channel_use_raw; - #ifdef CONFIG_MAC80211_DEBUGFS struct work_struct sta_debugfs_add; #endif @@ -705,8 +677,6 @@ struct ieee80211_local { unsigned int rx_expand_skb_head2; unsigned int rx_handlers_fragments; unsigned int tx_status_drop; - unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES]; - unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES]; #define I802_DEBUG_INC(c) (c)++ #else /* CONFIG_MAC80211_DEBUG_COUNTERS */ #define I802_DEBUG_INC(c) do { } while (0) @@ -764,8 +734,6 @@ struct ieee80211_local { struct dentry *rx_expand_skb_head2; struct dentry *rx_handlers_fragments; struct dentry *tx_status_drop; - struct dentry *wme_tx_queue; - struct dentry *wme_rx_queue; #endif struct dentry *dot11ACKFailureCount; struct dentry *dot11RTSFailureCount; @@ -778,6 +746,16 @@ struct ieee80211_local { #endif }; +static inline struct ieee80211_sub_if_data * +IEEE80211_DEV_TO_SUB_IF(struct net_device *dev) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + + BUG_ON(!local || local->mdev == dev); + + return netdev_priv(dev); +} + /* this struct represents 802.11n's RA/TID combination */ struct ieee80211_ra_tid { u8 ra[ETH_ALEN]; @@ -809,6 +787,10 @@ struct ieee802_11_elems { u8 *preq; u8 *prep; u8 *perr; + u8 *ch_switch_elem; + u8 *country_elem; + u8 *pwr_constr_elem; + u8 *quiet_elem; /* first quite element */ /* length of them, respectively */ u8 ssid_len; @@ -833,6 +815,11 @@ struct ieee802_11_elems { u8 preq_len; u8 prep_len; u8 perr_len; + u8 ch_switch_elem_len; + u8 country_elem_len; + u8 pwr_constr_elem_len; + u8 quiet_elem_len; + u8 num_of_quiet_elem; /* can be more the one */ }; static inline struct ieee80211_local *hw_to_local( @@ -847,11 +834,6 @@ static inline struct ieee80211_hw *local_to_hw( return &local->hw; } -enum ieee80211_link_state_t { - IEEE80211_LINK_STATE_XOFF = 0, - IEEE80211_LINK_STATE_PENDING, -}; - struct sta_attribute { struct attribute attr; ssize_t (*show)(const struct sta_info *, char *buf); @@ -867,39 +849,16 @@ static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) /* ieee80211.c */ int ieee80211_hw_config(struct ieee80211_local *local); -int ieee80211_if_config(struct net_device *dev); -int ieee80211_if_config_beacon(struct net_device *dev); +int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed); void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx); -void ieee80211_if_setup(struct net_device *dev); u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht, struct ieee80211_ht_info *req_ht_cap, struct ieee80211_ht_bss_info *req_bss_cap); /* ieee80211_ioctl.c */ extern const struct iw_handler_def ieee80211_iw_handler_def; +int ieee80211_set_freq(struct net_device *dev, int freq); - -/* Least common multiple of the used rates (in 100 kbps). This is used to - * calculate rate_inv values for each rate so that only integers are needed. */ -#define CHAN_UTIL_RATE_LCM 95040 -/* 1 usec is 1/8 * (95040/10) = 1188 */ -#define CHAN_UTIL_PER_USEC 1188 -/* Amount of bits to shift the result right to scale the total utilization - * to values that will not wrap around 32-bit integers. */ -#define CHAN_UTIL_SHIFT 9 -/* Theoretical maximum of channel utilization counter in 10 ms (stat_time=1): - * (CHAN_UTIL_PER_USEC * 10000) >> CHAN_UTIL_SHIFT = 23203. So dividing the - * raw value with about 23 should give utilization in 10th of a percentage - * (1/1000). However, utilization is only estimated and not all intervals - * between frames etc. are calculated. 18 seems to give numbers that are closer - * to the real maximum. */ -#define CHAN_UTIL_PER_10MS 18 -#define CHAN_UTIL_HDR_LONG (202 * CHAN_UTIL_PER_USEC) -#define CHAN_UTIL_HDR_SHORT (40 * CHAN_UTIL_PER_USEC) - - -/* ieee80211_ioctl.c */ -int ieee80211_set_freq(struct ieee80211_local *local, int freq); /* ieee80211_sta.c */ void ieee80211_sta_timer(unsigned long data); void ieee80211_sta_work(struct work_struct *work); @@ -912,21 +871,23 @@ int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid); int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len); void ieee80211_sta_req_auth(struct net_device *dev, struct ieee80211_if_sta *ifsta); -int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len); +int ieee80211_sta_scan_results(struct net_device *dev, + struct iw_request_info *info, + char *buf, size_t len); ieee80211_rx_result ieee80211_sta_rx_scan( struct net_device *dev, struct sk_buff *skb, struct ieee80211_rx_status *rx_status); -void ieee80211_rx_bss_list_init(struct net_device *dev); -void ieee80211_rx_bss_list_deinit(struct net_device *dev); +void ieee80211_rx_bss_list_init(struct ieee80211_local *local); +void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local); int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len); -struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev, - struct sk_buff *skb, u8 *bssid, - u8 *addr); +struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev, + struct sk_buff *skb, u8 *bssid, + u8 *addr, u64 supp_rates); int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason); int ieee80211_sta_disassociate(struct net_device *dev, u16 reason); void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 changed); -void ieee80211_reset_erp_info(struct net_device *dev); +u32 ieee80211_reset_erp_info(struct net_device *dev); int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, struct ieee80211_ht_info *ht_info); int ieee80211_ht_addt_info_ie_to_ht_bss_info( @@ -937,10 +898,10 @@ void ieee80211_send_addba_request(struct net_device *dev, const u8 *da, u16 agg_size, u16 timeout); void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid, u16 initiator, u16 reason_code); +void ieee80211_send_bar(struct net_device *dev, u8 *ra, u16 tid, u16 ssn); void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *da, u16 tid, u16 initiator, u16 reason); -void sta_rx_agg_session_timer_expired(unsigned long data); void sta_addba_resp_timer_expired(unsigned long data); void ieee80211_sta_tear_down_BA_sessions(struct net_device *dev, u8 *addr); u64 ieee80211_sta_get_rates(struct ieee80211_local *local, @@ -958,17 +919,15 @@ static inline void ieee80211_start_mesh(struct net_device *dev) {} #endif -/* ieee80211_iface.c */ -int ieee80211_if_add(struct net_device *dev, const char *name, - struct net_device **new_dev, int type, +/* interface handling */ +void ieee80211_if_setup(struct net_device *dev); +int ieee80211_if_add(struct ieee80211_local *local, const char *name, + struct net_device **new_dev, enum ieee80211_if_types type, struct vif_params *params); -void ieee80211_if_set_type(struct net_device *dev, int type); -void ieee80211_if_reinit(struct net_device *dev); -void __ieee80211_if_del(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata); -int ieee80211_if_remove(struct net_device *dev, const char *name, int id); -void ieee80211_if_free(struct net_device *dev); -void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata); +int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, + enum ieee80211_if_types type); +void ieee80211_if_remove(struct net_device *dev); +void ieee80211_remove_interfaces(struct ieee80211_local *local); /* tx handling */ void ieee80211_clear_tx_pending(struct ieee80211_local *local); @@ -988,4 +947,10 @@ int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, struct ieee80211_hdr *hdr); +#ifdef CONFIG_MAC80211_NOINLINE +#define debug_noinline noinline +#else +#define debug_noinline +#endif + #endif /* IEEE80211_I_H */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 80954a512185..610ed1d9893a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -2,6 +2,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> + * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -17,137 +18,91 @@ #include "debugfs_netdev.h" #include "mesh.h" -void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata) +/* + * Called when the netdev is removed or, by the code below, before + * the interface type changes. + */ +static void ieee80211_teardown_sdata(struct net_device *dev) { + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct beacon_data *beacon; + struct sk_buff *skb; + int flushed; int i; - /* Default values for sub-interface parameters */ - sdata->drop_unencrypted = 0; - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) - skb_queue_head_init(&sdata->fragments[i].skb_list); - - INIT_LIST_HEAD(&sdata->key_list); -} + ieee80211_debugfs_remove_netdev(sdata); -static void ieee80211_if_sdata_deinit(struct ieee80211_sub_if_data *sdata) -{ - int i; + /* free extra data */ + ieee80211_free_keys(sdata); - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { + for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) __skb_queue_purge(&sdata->fragments[i].skb_list); - } -} - -/* Must be called with rtnl lock held. */ -int ieee80211_if_add(struct net_device *dev, const char *name, - struct net_device **new_dev, int type, - struct vif_params *params) -{ - struct net_device *ndev; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = NULL; - int ret; - - ASSERT_RTNL(); - ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, - name, ieee80211_if_setup); - if (!ndev) - return -ENOMEM; - - ret = dev_alloc_name(ndev, ndev->name); - if (ret < 0) - goto fail; - - memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); - ndev->base_addr = dev->base_addr; - ndev->irq = dev->irq; - ndev->mem_start = dev->mem_start; - ndev->mem_end = dev->mem_end; - SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); - - sdata = IEEE80211_DEV_TO_SUB_IF(ndev); - ndev->ieee80211_ptr = &sdata->wdev; - sdata->wdev.wiphy = local->hw.wiphy; - sdata->vif.type = IEEE80211_IF_TYPE_AP; - sdata->dev = ndev; - sdata->local = local; - ieee80211_if_sdata_init(sdata); + sdata->fragment_next = 0; - ret = register_netdevice(ndev); - if (ret) - goto fail; - - ieee80211_debugfs_add_netdev(sdata); - ieee80211_if_set_type(ndev, type); + switch (sdata->vif.type) { + case IEEE80211_IF_TYPE_AP: + beacon = sdata->u.ap.beacon; + rcu_assign_pointer(sdata->u.ap.beacon, NULL); + synchronize_rcu(); + kfree(beacon); - if (ieee80211_vif_is_mesh(&sdata->vif) && - params && params->mesh_id_len) - ieee80211_if_sta_set_mesh_id(&sdata->u.sta, - params->mesh_id_len, - params->mesh_id); + while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) { + local->total_ps_buffered--; + dev_kfree_skb(skb); + } - /* we're under RTNL so all this is fine */ - if (unlikely(local->reg_state == IEEE80211_DEV_UNREGISTERED)) { - __ieee80211_if_del(local, sdata); - return -ENODEV; + break; + case IEEE80211_IF_TYPE_MESH_POINT: + /* Allow compiler to elide mesh_rmc_free call. */ + if (ieee80211_vif_is_mesh(&sdata->vif)) + mesh_rmc_free(dev); + /* fall through */ + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_IBSS: + kfree(sdata->u.sta.extra_ie); + kfree(sdata->u.sta.assocreq_ies); + kfree(sdata->u.sta.assocresp_ies); + kfree_skb(sdata->u.sta.probe_resp); + break; + case IEEE80211_IF_TYPE_WDS: + case IEEE80211_IF_TYPE_VLAN: + case IEEE80211_IF_TYPE_MNTR: + break; + case IEEE80211_IF_TYPE_INVALID: + BUG(); + break; } - list_add_tail_rcu(&sdata->list, &local->interfaces); - - if (new_dev) - *new_dev = ndev; - return 0; - -fail: - free_netdev(ndev); - return ret; + flushed = sta_info_flush(local, sdata); + WARN_ON(flushed); } -void ieee80211_if_set_type(struct net_device *dev, int type) +/* + * Helper function to initialise an interface to a specific type. + */ +static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, + enum ieee80211_if_types type) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - int oldtype = sdata->vif.type; - - /* - * We need to call this function on the master interface - * which already has a hard_start_xmit routine assigned - * which must not be changed. - */ - if (dev != sdata->local->mdev) - dev->hard_start_xmit = ieee80211_subif_start_xmit; + struct ieee80211_if_sta *ifsta; - /* - * Called even when register_netdevice fails, it would - * oops if assigned before initialising the rest. - */ - dev->uninit = ieee80211_if_reinit; + /* clear type-dependent union */ + memset(&sdata->u, 0, sizeof(sdata->u)); - /* most have no BSS pointer */ - sdata->bss = NULL; + /* and set some type-dependent values */ sdata->vif.type = type; - sdata->basic_rates = 0; + /* only monitor differs */ + sdata->dev->type = ARPHRD_ETHER; switch (type) { - case IEEE80211_IF_TYPE_WDS: - /* nothing special */ - break; - case IEEE80211_IF_TYPE_VLAN: - sdata->u.vlan.ap = NULL; - break; case IEEE80211_IF_TYPE_AP: - sdata->u.ap.force_unicast_rateidx = -1; - sdata->u.ap.max_ratectrl_rateidx = -1; skb_queue_head_init(&sdata->u.ap.ps_bc_buf); - sdata->bss = &sdata->u.ap; INIT_LIST_HEAD(&sdata->u.ap.vlans); break; case IEEE80211_IF_TYPE_MESH_POINT: case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: { - struct ieee80211_sub_if_data *msdata; - struct ieee80211_if_sta *ifsta; - + case IEEE80211_IF_TYPE_IBSS: ifsta = &sdata->u.sta; INIT_WORK(&ifsta->work, ieee80211_sta_work); setup_timer(&ifsta->timer, ieee80211_sta_timer, @@ -158,157 +113,157 @@ void ieee80211_if_set_type(struct net_device *dev, int type) ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN | IEEE80211_AUTH_ALG_SHARED_KEY; ifsta->flags |= IEEE80211_STA_CREATE_IBSS | - IEEE80211_STA_WMM_ENABLED | IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL; - - msdata = IEEE80211_DEV_TO_SUB_IF(sdata->local->mdev); - sdata->bss = &msdata->u.ap; + if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4) + ifsta->flags |= IEEE80211_STA_WMM_ENABLED; if (ieee80211_vif_is_mesh(&sdata->vif)) ieee80211_mesh_init_sdata(sdata); break; - } case IEEE80211_IF_TYPE_MNTR: - dev->type = ARPHRD_IEEE80211_RADIOTAP; - dev->hard_start_xmit = ieee80211_monitor_start_xmit; + sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP; + sdata->dev->hard_start_xmit = ieee80211_monitor_start_xmit; sdata->u.mntr_flags = MONITOR_FLAG_CONTROL | MONITOR_FLAG_OTHER_BSS; break; - default: - printk(KERN_WARNING "%s: %s: Unknown interface type 0x%x", - dev->name, __func__, type); + case IEEE80211_IF_TYPE_WDS: + case IEEE80211_IF_TYPE_VLAN: + break; + case IEEE80211_IF_TYPE_INVALID: + BUG(); + break; } - ieee80211_debugfs_change_if_type(sdata, oldtype); + + ieee80211_debugfs_add_netdev(sdata); } -/* Must be called with rtnl lock held. */ -void ieee80211_if_reinit(struct net_device *dev) +int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, + enum ieee80211_if_types type) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct sk_buff *skb; - int flushed; + ASSERT_RTNL(); + + if (type == sdata->vif.type) + return 0; + + /* + * We could, here, on changes between IBSS/STA/MESH modes, + * invoke an MLME function instead that disassociates etc. + * and goes into the requested mode. + */ + + if (netif_running(sdata->dev)) + return -EBUSY; + + /* Purge and reset type-dependent state. */ + ieee80211_teardown_sdata(sdata->dev); + ieee80211_setup_sdata(sdata, type); + + /* reset some values that shouldn't be kept across type changes */ + sdata->basic_rates = 0; + sdata->drop_unencrypted = 0; + + return 0; +} + +int ieee80211_if_add(struct ieee80211_local *local, const char *name, + struct net_device **new_dev, enum ieee80211_if_types type, + struct vif_params *params) +{ + struct net_device *ndev; + struct ieee80211_sub_if_data *sdata = NULL; + int ret, i; ASSERT_RTNL(); - ieee80211_free_keys(sdata); + ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, + name, ieee80211_if_setup); + if (!ndev) + return -ENOMEM; - ieee80211_if_sdata_deinit(sdata); + ndev->needed_headroom = local->tx_headroom + + 4*6 /* four MAC addresses */ + + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */ + + 6 /* mesh */ + + 8 /* rfc1042/bridge tunnel */ + - ETH_HLEN /* ethernet hard_header_len */ + + IEEE80211_ENCRYPT_HEADROOM; + ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM; - /* Need to handle mesh specially to allow eliding the function call */ - if (ieee80211_vif_is_mesh(&sdata->vif)) - mesh_rmc_free(dev); + ret = dev_alloc_name(ndev, ndev->name); + if (ret < 0) + goto fail; - switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_INVALID: - /* cannot happen */ - WARN_ON(1); - break; - case IEEE80211_IF_TYPE_AP: { - /* Remove all virtual interfaces that use this BSS - * as their sdata->bss */ - struct ieee80211_sub_if_data *tsdata, *n; - struct beacon_data *beacon; - - list_for_each_entry_safe(tsdata, n, &local->interfaces, list) { - if (tsdata != sdata && tsdata->bss == &sdata->u.ap) { - printk(KERN_DEBUG "%s: removing virtual " - "interface %s because its BSS interface" - " is being removed\n", - sdata->dev->name, tsdata->dev->name); - list_del_rcu(&tsdata->list); - /* - * We have lots of time and can afford - * to sync for each interface - */ - synchronize_rcu(); - __ieee80211_if_del(local, tsdata); - } - } + memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); + SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); - beacon = sdata->u.ap.beacon; - rcu_assign_pointer(sdata->u.ap.beacon, NULL); - synchronize_rcu(); - kfree(beacon); + /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */ + sdata = netdev_priv(ndev); + ndev->ieee80211_ptr = &sdata->wdev; - while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) { - local->total_ps_buffered--; - dev_kfree_skb(skb); - } + /* initialise type-independent data */ + sdata->wdev.wiphy = local->hw.wiphy; + sdata->local = local; + sdata->dev = ndev; - break; - } - case IEEE80211_IF_TYPE_WDS: - /* nothing to do */ - break; - case IEEE80211_IF_TYPE_MESH_POINT: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - kfree(sdata->u.sta.extra_ie); - sdata->u.sta.extra_ie = NULL; - kfree(sdata->u.sta.assocreq_ies); - sdata->u.sta.assocreq_ies = NULL; - kfree(sdata->u.sta.assocresp_ies); - sdata->u.sta.assocresp_ies = NULL; - if (sdata->u.sta.probe_resp) { - dev_kfree_skb(sdata->u.sta.probe_resp); - sdata->u.sta.probe_resp = NULL; - } + for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) + skb_queue_head_init(&sdata->fragments[i].skb_list); - break; - case IEEE80211_IF_TYPE_MNTR: - dev->type = ARPHRD_ETHER; - break; - case IEEE80211_IF_TYPE_VLAN: - sdata->u.vlan.ap = NULL; - break; - } + INIT_LIST_HEAD(&sdata->key_list); - flushed = sta_info_flush(local, sdata); - WARN_ON(flushed); + sdata->force_unicast_rateidx = -1; + sdata->max_ratectrl_rateidx = -1; - memset(&sdata->u, 0, sizeof(sdata->u)); - ieee80211_if_sdata_init(sdata); -} + /* setup type-dependent data */ + ieee80211_setup_sdata(sdata, type); -/* Must be called with rtnl lock held. */ -void __ieee80211_if_del(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) -{ - struct net_device *dev = sdata->dev; + ret = register_netdevice(ndev); + if (ret) + goto fail; - ieee80211_debugfs_remove_netdev(sdata); - unregister_netdevice(dev); - /* Except master interface, the net_device will be freed by - * net_device->destructor (i. e. ieee80211_if_free). */ + ndev->uninit = ieee80211_teardown_sdata; + + if (ieee80211_vif_is_mesh(&sdata->vif) && + params && params->mesh_id_len) + ieee80211_if_sta_set_mesh_id(&sdata->u.sta, + params->mesh_id_len, + params->mesh_id); + + list_add_tail_rcu(&sdata->list, &local->interfaces); + + if (new_dev) + *new_dev = ndev; + + return 0; + + fail: + free_netdev(ndev); + return ret; } -/* Must be called with rtnl lock held. */ -int ieee80211_if_remove(struct net_device *dev, const char *name, int id) +void ieee80211_if_remove(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata, *n; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); ASSERT_RTNL(); - list_for_each_entry_safe(sdata, n, &local->interfaces, list) { - if ((sdata->vif.type == id || id == -1) && - strcmp(name, sdata->dev->name) == 0 && - sdata->dev != local->mdev) { - list_del_rcu(&sdata->list); - synchronize_rcu(); - __ieee80211_if_del(local, sdata); - return 0; - } - } - return -ENODEV; + list_del_rcu(&sdata->list); + synchronize_rcu(); + unregister_netdevice(dev); } -void ieee80211_if_free(struct net_device *dev) +/* + * Remove all interfaces, may only be called at hardware unregistration + * time because it doesn't do RCU-safe list removals. + */ +void ieee80211_remove_interfaces(struct ieee80211_local *local) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata, *tmp; - ieee80211_if_sdata_deinit(sdata); - free_netdev(dev); + ASSERT_RTNL(); + + list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { + list_del(&sdata->list); + unregister_netdevice(sdata->dev); + } } diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 150d66dbda9d..6597c779e35a 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -321,8 +321,15 @@ void ieee80211_key_link(struct ieee80211_key *key, * some hardware cannot handle TKIP with QoS, so * we indicate whether QoS could be in use. */ - if (sta->flags & WLAN_STA_WME) + if (test_sta_flags(sta, WLAN_STA_WME)) key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; + + /* + * This key is for a specific sta interface, + * inform the driver that it should try to store + * this key as pairwise key. + */ + key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; } else { if (sdata->vif.type == IEEE80211_IF_TYPE_STA) { struct sta_info *ap; @@ -335,7 +342,7 @@ void ieee80211_key_link(struct ieee80211_key *key, /* same here, the AP could be using QoS */ ap = sta_info_get(key->local, key->sdata->u.sta.bssid); if (ap) { - if (ap->flags & WLAN_STA_WME) + if (test_sta_flags(ap, WLAN_STA_WME)) key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; } @@ -380,6 +387,15 @@ void ieee80211_key_free(struct ieee80211_key *key) if (!key) return; + if (!key->sdata) { + /* The key has not been linked yet, simply free it + * and don't Oops */ + if (key->conf.alg == ALG_CCMP) + ieee80211_aes_key_free(key->u.ccmp.tfm); + kfree(key); + return; + } + spin_lock_irqsave(&key->sdata->local->key_lock, flags); __ieee80211_key_free(key); spin_unlock_irqrestore(&key->sdata->local->key_lock, flags); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index f52c3df1fe9a..425816e0996c 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -16,31 +16,18 @@ #include <linux/rcupdate.h> #include <net/mac80211.h> -/* ALG_TKIP - * struct ieee80211_key::key is encoded as a 256-bit (32 byte) data block: - * Temporal Encryption Key (128 bits) - * Temporal Authenticator Tx MIC Key (64 bits) - * Temporal Authenticator Rx MIC Key (64 bits) - */ - -#define WEP_IV_LEN 4 -#define WEP_ICV_LEN 4 - -#define ALG_TKIP_KEY_LEN 32 -/* Starting offsets for each key */ -#define ALG_TKIP_TEMP_ENCR_KEY 0 -#define ALG_TKIP_TEMP_AUTH_TX_MIC_KEY 16 -#define ALG_TKIP_TEMP_AUTH_RX_MIC_KEY 24 -#define TKIP_IV_LEN 8 -#define TKIP_ICV_LEN 4 - -#define ALG_CCMP_KEY_LEN 16 -#define CCMP_HDR_LEN 8 -#define CCMP_MIC_LEN 8 -#define CCMP_TK_LEN 16 -#define CCMP_PN_LEN 6 - -#define NUM_RX_DATA_QUEUES 17 +#define WEP_IV_LEN 4 +#define WEP_ICV_LEN 4 +#define ALG_TKIP_KEY_LEN 32 +#define ALG_CCMP_KEY_LEN 16 +#define CCMP_HDR_LEN 8 +#define CCMP_MIC_LEN 8 +#define CCMP_TK_LEN 16 +#define CCMP_PN_LEN 6 +#define TKIP_IV_LEN 8 +#define TKIP_ICV_LEN 4 + +#define NUM_RX_DATA_QUEUES 17 struct ieee80211_local; struct ieee80211_sub_if_data; @@ -69,6 +56,13 @@ enum ieee80211_internal_key_flags { KEY_FLAG_TODO_ADD_DEBUGFS = BIT(5), }; +struct tkip_ctx { + u32 iv32; + u16 iv16; + u16 p1k[5]; + int initialized; +}; + struct ieee80211_key { struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; @@ -85,16 +79,10 @@ struct ieee80211_key { union { struct { /* last used TSC */ - u32 iv32; - u16 iv16; - u16 p1k[5]; - int tx_initialized; + struct tkip_ctx tx; /* last received RSC */ - u32 iv32_rx[NUM_RX_DATA_QUEUES]; - u16 iv16_rx[NUM_RX_DATA_QUEUES]; - u16 p1k_rx[NUM_RX_DATA_QUEUES][5]; - int rx_initialized[NUM_RX_DATA_QUEUES]; + struct tkip_ctx rx[NUM_RX_DATA_QUEUES]; } tkip; struct { u8 tx_pn[6]; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 9ad4e3631b6b..f1a83d450ea0 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -35,8 +35,6 @@ #include "debugfs.h" #include "debugfs_netdev.h" -#define SUPP_MCS_SET_LEN 16 - /* * For seeing transmitted packets on monitor interfaces * we have a radiotap header too. @@ -107,12 +105,18 @@ static int ieee80211_master_open(struct net_device *dev) /* we hold the RTNL here so can safely walk the list */ list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->dev != dev && netif_running(sdata->dev)) { + if (netif_running(sdata->dev)) { res = 0; break; } } - return res; + + if (res) + return res; + + netif_tx_start_all_queues(local->mdev); + + return 0; } static int ieee80211_master_stop(struct net_device *dev) @@ -122,7 +126,7 @@ static int ieee80211_master_stop(struct net_device *dev) /* we hold the RTNL here so can safely walk the list */ list_for_each_entry(sdata, &local->interfaces, list) - if (sdata->dev != dev && netif_running(sdata->dev)) + if (netif_running(sdata->dev)) dev_close(sdata->dev); return 0; @@ -147,9 +151,7 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) /* FIX: what would be proper limits for MTU? * This interface uses 802.3 frames. */ if (new_mtu < 256 || - new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6 - meshhdrlen) { - printk(KERN_WARNING "%s: invalid MTU %d\n", - dev->name, new_mtu); + new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6 - meshhdrlen) { return -EINVAL; } @@ -180,10 +182,11 @@ static int ieee80211_open(struct net_device *dev) { struct ieee80211_sub_if_data *sdata, *nsdata; struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct sta_info *sta; struct ieee80211_if_init_conf conf; + u32 changed = 0; int res; bool need_hw_reconfig = 0; - struct sta_info *sta; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -191,7 +194,7 @@ static int ieee80211_open(struct net_device *dev) list_for_each_entry(nsdata, &local->interfaces, list) { struct net_device *ndev = nsdata->dev; - if (ndev != dev && ndev != local->mdev && netif_running(ndev)) { + if (ndev != dev && netif_running(ndev)) { /* * Allow only a single IBSS interface to be up at any * time. This is restricted because beacon distribution @@ -207,30 +210,6 @@ static int ieee80211_open(struct net_device *dev) return -EBUSY; /* - * Disallow multiple IBSS/STA mode interfaces. - * - * This is a technical restriction, it is possible although - * most likely not IEEE 802.11 compliant to have multiple - * STAs with just a single hardware (the TSF timer will not - * be adjusted properly.) - * - * However, because mac80211 uses the master device's BSS - * information for each STA/IBSS interface, doing this will - * currently corrupt that BSS information completely, unless, - * a not very useful case, both STAs are associated to the - * same BSS. - * - * To remove this restriction, the BSS information needs to - * be embedded in the STA/IBSS mode sdata instead of using - * the master device's BSS structure. - */ - if ((sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) && - (nsdata->vif.type == IEEE80211_IF_TYPE_STA || - nsdata->vif.type == IEEE80211_IF_TYPE_IBSS)) - return -EBUSY; - - /* * The remaining checks are only performed for interfaces * with the same MAC address. */ @@ -249,7 +228,7 @@ static int ieee80211_open(struct net_device *dev) */ if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN && nsdata->vif.type == IEEE80211_IF_TYPE_AP) - sdata->u.vlan.ap = nsdata; + sdata->bss = &nsdata->u.ap; } } @@ -259,10 +238,13 @@ static int ieee80211_open(struct net_device *dev) return -ENOLINK; break; case IEEE80211_IF_TYPE_VLAN: - if (!sdata->u.vlan.ap) + if (!sdata->bss) return -ENOLINK; + list_add(&sdata->u.vlan.list, &sdata->bss->vlans); break; case IEEE80211_IF_TYPE_AP: + sdata->bss = &sdata->u.ap; + break; case IEEE80211_IF_TYPE_STA: case IEEE80211_IF_TYPE_MNTR: case IEEE80211_IF_TYPE_IBSS: @@ -280,14 +262,13 @@ static int ieee80211_open(struct net_device *dev) if (local->ops->start) res = local->ops->start(local_to_hw(local)); if (res) - return res; + goto err_del_bss; need_hw_reconfig = 1; ieee80211_led_radio(local, local->hw.conf.radio_enabled); } switch (sdata->vif.type) { case IEEE80211_IF_TYPE_VLAN: - list_add(&sdata->u.vlan.list, &sdata->u.vlan.ap->u.ap.vlans); /* no need to tell driver */ break; case IEEE80211_IF_TYPE_MNTR: @@ -310,9 +291,9 @@ static int ieee80211_open(struct net_device *dev) if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS) local->fif_other_bss++; - netif_tx_lock_bh(local->mdev); + netif_addr_lock_bh(local->mdev); ieee80211_configure_filter(local); - netif_tx_unlock_bh(local->mdev); + netif_addr_unlock_bh(local->mdev); break; case IEEE80211_IF_TYPE_STA: case IEEE80211_IF_TYPE_IBSS: @@ -326,8 +307,10 @@ static int ieee80211_open(struct net_device *dev) if (res) goto err_stop; - ieee80211_if_config(dev); - ieee80211_reset_erp_info(dev); + if (ieee80211_vif_is_mesh(&sdata->vif)) + ieee80211_start_mesh(sdata->dev); + changed |= ieee80211_reset_erp_info(dev); + ieee80211_bss_info_change_notify(sdata, changed); ieee80211_enable_keys(sdata); if (sdata->vif.type == IEEE80211_IF_TYPE_STA && @@ -346,6 +329,7 @@ static int ieee80211_open(struct net_device *dev) goto err_del_interface; } + /* no locking required since STA is not live yet */ sta->flags |= WLAN_STA_AUTHORIZED; res = sta_info_insert(sta); @@ -385,13 +369,13 @@ static int ieee80211_open(struct net_device *dev) * yet be effective. Trigger execution of ieee80211_sta_work * to fix this. */ - if(sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == IEEE80211_IF_TYPE_STA || + sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { struct ieee80211_if_sta *ifsta = &sdata->u.sta; queue_work(local->hw.workqueue, &ifsta->work); } - netif_start_queue(dev); + netif_tx_start_all_queues(dev); return 0; err_del_interface: @@ -399,6 +383,10 @@ static int ieee80211_open(struct net_device *dev) err_stop: if (!local->open_count && local->ops->stop) local->ops->stop(local_to_hw(local)); + err_del_bss: + sdata->bss = NULL; + if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) + list_del(&sdata->u.vlan.list); return res; } @@ -412,7 +400,7 @@ static int ieee80211_stop(struct net_device *dev) /* * Stop TX on this interface first. */ - netif_stop_queue(dev); + netif_tx_stop_all_queues(dev); /* * Now delete all active aggregation sessions. @@ -481,7 +469,6 @@ static int ieee80211_stop(struct net_device *dev) switch (sdata->vif.type) { case IEEE80211_IF_TYPE_VLAN: list_del(&sdata->u.vlan.list); - sdata->u.vlan.ap = NULL; /* no need to tell driver */ break; case IEEE80211_IF_TYPE_MNTR: @@ -503,14 +490,15 @@ static int ieee80211_stop(struct net_device *dev) if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS) local->fif_other_bss--; - netif_tx_lock_bh(local->mdev); + netif_addr_lock_bh(local->mdev); ieee80211_configure_filter(local); - netif_tx_unlock_bh(local->mdev); + netif_addr_unlock_bh(local->mdev); break; case IEEE80211_IF_TYPE_MESH_POINT: case IEEE80211_IF_TYPE_STA: case IEEE80211_IF_TYPE_IBSS: sdata->u.sta.state = IEEE80211_DISABLED; + memset(sdata->u.sta.bssid, 0, ETH_ALEN); del_timer_sync(&sdata->u.sta.timer); /* * When we get here, the interface is marked down. @@ -529,8 +517,6 @@ static int ieee80211_stop(struct net_device *dev) local->sta_hw_scanning = 0; } - flush_workqueue(local->hw.workqueue); - sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; kfree(sdata->u.sta.extra_ie); sdata->u.sta.extra_ie = NULL; @@ -545,6 +531,8 @@ static int ieee80211_stop(struct net_device *dev) local->ops->remove_interface(local_to_hw(local), &conf); } + sdata->bss = NULL; + if (local->open_count == 0) { if (netif_running(local->mdev)) dev_close(local->mdev); @@ -554,6 +542,8 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_led_radio(local, 0); + flush_workqueue(local->hw.workqueue); + tasklet_disable(&local->tx_pending_tasklet); tasklet_disable(&local->tasklet); } @@ -583,17 +573,19 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) sta = sta_info_get(local, ra); if (!sta) { +#ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Could not find the station\n"); - rcu_read_unlock(); - return -ENOENT; +#endif + ret = -ENOENT; + goto exit; } - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_lock_bh(&sta->lock); /* we have tried too many times, receiver does not want A-MPDU */ if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { ret = -EBUSY; - goto start_ba_exit; + goto err_unlock_sta; } state = &sta->ampdu_mlme.tid_state_tx[tid]; @@ -604,18 +596,20 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) "idle on tid %u\n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ ret = -EAGAIN; - goto start_ba_exit; + goto err_unlock_sta; } /* prepare A-MPDU MLME for Tx aggregation */ sta->ampdu_mlme.tid_tx[tid] = kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); if (!sta->ampdu_mlme.tid_tx[tid]) { +#ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_ERR "allocate tx mlme to tid %d failed\n", tid); +#endif ret = -ENOMEM; - goto start_ba_exit; + goto err_unlock_sta; } /* Tx timer */ sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = @@ -624,10 +618,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) (unsigned long)&sta->timer_to_tid[tid]; init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); - /* ensure that TX flow won't interrupt us - * until the end of the call to requeue function */ - spin_lock_bh(&local->mdev->queue_lock); - /* create a new queue for this aggregation */ ret = ieee80211_ht_agg_queue_add(local, sta, tid); @@ -638,7 +628,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) printk(KERN_DEBUG "BA request denied - queue unavailable for" " tid %d\n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - goto start_ba_err; + goto err_unlock_queue; } sdata = sta->sdata; @@ -654,18 +644,18 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) /* No need to requeue the packets in the agg queue, since we * held the tx lock: no packet could be enqueued to the newly * allocated queue */ - ieee80211_ht_agg_queue_remove(local, sta, tid, 0); + ieee80211_ht_agg_queue_remove(local, sta, tid, 0); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "BA request denied - HW unavailable for" " tid %d\n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ *state = HT_AGG_STATE_IDLE; - goto start_ba_err; + goto err_unlock_queue; } /* Will put all the packets in the new SW queue */ ieee80211_requeue(local, ieee802_1d_to_ac[tid]); - spin_unlock_bh(&local->mdev->queue_lock); + spin_unlock_bh(&sta->lock); /* send an addBA request */ sta->ampdu_mlme.dialog_token_allocator++; @@ -673,25 +663,27 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) sta->ampdu_mlme.dialog_token_allocator; sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; + ieee80211_send_addba_request(sta->sdata->dev, ra, tid, sta->ampdu_mlme.tid_tx[tid]->dialog_token, sta->ampdu_mlme.tid_tx[tid]->ssn, 0x40, 5000); - /* activate the timer for the recipient's addBA response */ sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires = jiffies + ADDBA_RESP_INTERVAL; add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); +#ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); - goto start_ba_exit; +#endif + goto exit; -start_ba_err: +err_unlock_queue: kfree(sta->ampdu_mlme.tid_tx[tid]); sta->ampdu_mlme.tid_tx[tid] = NULL; - spin_unlock_bh(&local->mdev->queue_lock); ret = -EBUSY; -start_ba_exit: - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); +err_unlock_sta: + spin_unlock_bh(&sta->lock); +exit: rcu_read_unlock(); return ret; } @@ -719,7 +711,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, /* check if the TID is in aggregation */ state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_lock_bh(&sta->lock); if (*state != HT_AGG_STATE_OPERATIONAL) { ret = -ENOENT; @@ -749,7 +741,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, } stop_BA_exit: - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_unlock_bh(&sta->lock); rcu_read_unlock(); return ret; } @@ -763,8 +755,10 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) DECLARE_MAC_BUF(mac); if (tid >= STA_TID_NUM) { +#ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); +#endif return; } @@ -772,18 +766,22 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) sta = sta_info_get(local, ra); if (!sta) { rcu_read_unlock(); +#ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Could not find station: %s\n", print_mac(mac, ra)); +#endif return; } state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_lock_bh(&sta->lock); if (!(*state & HT_ADDBA_REQUESTED_MSK)) { +#ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", *state); - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); +#endif + spin_unlock_bh(&sta->lock); rcu_read_unlock(); return; } @@ -793,10 +791,12 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) *state |= HT_ADDBA_DRV_READY_MSK; if (*state == HT_AGG_STATE_OPERATIONAL) { +#ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); +#endif ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); } - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_unlock_bh(&sta->lock); rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_start_tx_ba_cb); @@ -810,8 +810,10 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) DECLARE_MAC_BUF(mac); if (tid >= STA_TID_NUM) { +#ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); +#endif return; } @@ -823,17 +825,23 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) rcu_read_lock(); sta = sta_info_get(local, ra); if (!sta) { +#ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Could not find station: %s\n", print_mac(mac, ra)); +#endif rcu_read_unlock(); return; } state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); + /* NOTE: no need to use sta->lock in this state check, as + * ieee80211_stop_tx_ba_session will let only one stop call to + * pass through per sta/tid + */ if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) { +#ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n"); - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); +#endif rcu_read_unlock(); return; } @@ -844,23 +852,20 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) agg_queue = sta->tid_to_tx_q[tid]; - /* avoid ordering issues: we are the only one that can modify - * the content of the qdiscs */ - spin_lock_bh(&local->mdev->queue_lock); - /* remove the queue for this aggregation */ ieee80211_ht_agg_queue_remove(local, sta, tid, 1); - spin_unlock_bh(&local->mdev->queue_lock); - /* we just requeued the all the frames that were in the removed - * queue, and since we might miss a softirq we do netif_schedule. - * ieee80211_wake_queue is not used here as this queue is not - * necessarily stopped */ - netif_schedule(local->mdev); + /* We just requeued the all the frames that were in the + * removed queue, and since we might miss a softirq we do + * netif_schedule_queue. ieee80211_wake_queue is not used + * here as this queue is not necessarily stopped + */ + netif_schedule_queue(netdev_get_tx_queue(local->mdev, agg_queue)); + spin_lock_bh(&sta->lock); *state = HT_AGG_STATE_IDLE; sta->ampdu_mlme.addba_req_num[tid] = 0; kfree(sta->ampdu_mlme.tid_tx[tid]); sta->ampdu_mlme.tid_tx[tid] = NULL; - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_unlock_bh(&sta->lock); rcu_read_unlock(); } @@ -874,9 +879,11 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb = dev_alloc_skb(0); if (unlikely(!skb)) { +#ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_WARNING "%s: Not enough memory, " "dropping start BA session", skb->dev->name); +#endif return; } ra_tid = (struct ieee80211_ra_tid *) &skb->cb; @@ -897,9 +904,11 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb = dev_alloc_skb(0); if (unlikely(!skb)) { +#ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_WARNING "%s: Not enough memory, " "dropping stop BA session", skb->dev->name); +#endif return; } ra_tid = (struct ieee80211_ra_tid *) &skb->cb; @@ -950,7 +959,6 @@ static const struct header_ops ieee80211_header_ops = { .cache_update = eth_header_cache_update, }; -/* Must not be called for mdev */ void ieee80211_if_setup(struct net_device *dev) { ether_setup(dev); @@ -960,67 +968,52 @@ void ieee80211_if_setup(struct net_device *dev) dev->change_mtu = ieee80211_change_mtu; dev->open = ieee80211_open; dev->stop = ieee80211_stop; - dev->destructor = ieee80211_if_free; + dev->destructor = free_netdev; } /* everything else */ -static int __ieee80211_if_config(struct net_device *dev, - struct sk_buff *beacon, - struct ieee80211_tx_control *control) +int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct ieee80211_if_conf conf; - if (!local->ops->config_interface || !netif_running(dev)) + if (WARN_ON(!netif_running(sdata->dev))) + return 0; + + if (!local->ops->config_interface) return 0; memset(&conf, 0, sizeof(conf)); - conf.type = sdata->vif.type; + conf.changed = changed; + if (sdata->vif.type == IEEE80211_IF_TYPE_STA || sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { conf.bssid = sdata->u.sta.bssid; conf.ssid = sdata->u.sta.ssid; conf.ssid_len = sdata->u.sta.ssid_len; - } else if (ieee80211_vif_is_mesh(&sdata->vif)) { - conf.beacon = beacon; - conf.beacon_control = control; - ieee80211_start_mesh(dev); } else if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + conf.bssid = sdata->dev->dev_addr; conf.ssid = sdata->u.ap.ssid; conf.ssid_len = sdata->u.ap.ssid_len; - conf.beacon = beacon; - conf.beacon_control = control; + } else if (ieee80211_vif_is_mesh(&sdata->vif)) { + u8 zero[ETH_ALEN] = { 0 }; + conf.bssid = zero; + conf.ssid = zero; + conf.ssid_len = 0; + } else { + WARN_ON(1); + return -EINVAL; } - return local->ops->config_interface(local_to_hw(local), - &sdata->vif, &conf); -} -int ieee80211_if_config(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT && - (local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE)) - return ieee80211_if_config_beacon(dev); - return __ieee80211_if_config(dev, NULL, NULL); -} + if (WARN_ON(!conf.bssid && (changed & IEEE80211_IFCC_BSSID))) + return -EINVAL; -int ieee80211_if_config_beacon(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_control control; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct sk_buff *skb; + if (WARN_ON(!conf.ssid && (changed & IEEE80211_IFCC_SSID))) + return -EINVAL; - if (!(local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE)) - return 0; - skb = ieee80211_beacon_get(local_to_hw(local), &sdata->vif, - &control); - if (!skb) - return -ENOMEM; - return __ieee80211_if_config(dev, skb, &control); + return local->ops->config_interface(local_to_hw(local), + &sdata->vif, &conf); } int ieee80211_hw_config(struct ieee80211_local *local) @@ -1067,56 +1060,84 @@ u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht, struct ieee80211_supported_band *sband; struct ieee80211_ht_info ht_conf; struct ieee80211_ht_bss_info ht_bss_conf; - int i; u32 changed = 0; + int i; + u8 max_tx_streams = IEEE80211_HT_CAP_MAX_STREAMS; + u8 tx_mcs_set_cap; sband = local->hw.wiphy->bands[conf->channel->band]; + memset(&ht_conf, 0, sizeof(struct ieee80211_ht_info)); + memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info)); + /* HT is not supported */ if (!sband->ht_info.ht_supported) { conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE; - return 0; + goto out; } - memset(&ht_conf, 0, sizeof(struct ieee80211_ht_info)); - memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info)); - - if (enable_ht) { - if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)) + /* disable HT */ + if (!enable_ht) { + if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) changed |= BSS_CHANGED_HT; + conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE; + conf->ht_conf.ht_supported = 0; + goto out; + } - conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE; - ht_conf.ht_supported = 1; - ht_conf.cap = req_ht_cap->cap & sband->ht_info.cap; - ht_conf.cap &= ~(IEEE80211_HT_CAP_MIMO_PS); - ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_MIMO_PS; + if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)) + changed |= BSS_CHANGED_HT; - for (i = 0; i < SUPP_MCS_SET_LEN; i++) - ht_conf.supp_mcs_set[i] = - sband->ht_info.supp_mcs_set[i] & - req_ht_cap->supp_mcs_set[i]; + conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE; + ht_conf.ht_supported = 1; - ht_bss_conf.primary_channel = req_bss_cap->primary_channel; - ht_bss_conf.bss_cap = req_bss_cap->bss_cap; - ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode; + ht_conf.cap = req_ht_cap->cap & sband->ht_info.cap; + ht_conf.cap &= ~(IEEE80211_HT_CAP_MIMO_PS); + ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_MIMO_PS; + ht_bss_conf.primary_channel = req_bss_cap->primary_channel; + ht_bss_conf.bss_cap = req_bss_cap->bss_cap; + ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode; - ht_conf.ampdu_factor = req_ht_cap->ampdu_factor; - ht_conf.ampdu_density = req_ht_cap->ampdu_density; + ht_conf.ampdu_factor = req_ht_cap->ampdu_factor; + ht_conf.ampdu_density = req_ht_cap->ampdu_density; - /* if bss configuration changed store the new one */ - if (memcmp(&conf->ht_conf, &ht_conf, sizeof(ht_conf)) || - memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) { - changed |= BSS_CHANGED_HT; - memcpy(&conf->ht_conf, &ht_conf, sizeof(ht_conf)); - memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf)); - } - } else { - if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) - changed |= BSS_CHANGED_HT; - conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE; - } + /* Bits 96-100 */ + tx_mcs_set_cap = sband->ht_info.supp_mcs_set[12]; + + /* configure suppoerted Tx MCS according to requested MCS + * (based in most cases on Rx capabilities of peer) and self + * Tx MCS capabilities (as defined by low level driver HW + * Tx capabilities) */ + if (!(tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_DEFINED)) + goto check_changed; + + /* Counting from 0 therfore + 1 */ + if (tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_RX_DIFF) + max_tx_streams = ((tx_mcs_set_cap & + IEEE80211_HT_CAP_MCS_TX_STREAMS) >> 2) + 1; + for (i = 0; i < max_tx_streams; i++) + ht_conf.supp_mcs_set[i] = + sband->ht_info.supp_mcs_set[i] & + req_ht_cap->supp_mcs_set[i]; + + if (tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_UEQM) + for (i = IEEE80211_SUPP_MCS_SET_UEQM; + i < IEEE80211_SUPP_MCS_SET_LEN; i++) + ht_conf.supp_mcs_set[i] = + sband->ht_info.supp_mcs_set[i] & + req_ht_cap->supp_mcs_set[i]; + +check_changed: + /* if bss configuration changed store the new one */ + if (memcmp(&conf->ht_conf, &ht_conf, sizeof(ht_conf)) || + memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) { + changed |= BSS_CHANGED_HT; + memcpy(&conf->ht_conf, &ht_conf, sizeof(ht_conf)); + memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf)); + } +out: return changed; } @@ -1135,50 +1156,30 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, changed); } -void ieee80211_reset_erp_info(struct net_device *dev) +u32 ieee80211_reset_erp_info(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata->bss_conf.use_cts_prot = 0; sdata->bss_conf.use_short_preamble = 0; - ieee80211_bss_info_change_notify(sdata, - BSS_CHANGED_ERP_CTS_PROT | - BSS_CHANGED_ERP_PREAMBLE); + return BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE; } void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, - struct sk_buff *skb, - struct ieee80211_tx_status *status) + struct sk_buff *skb) { struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_tx_status *saved; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int tmp; skb->dev = local->mdev; - saved = kmalloc(sizeof(struct ieee80211_tx_status), GFP_ATOMIC); - if (unlikely(!saved)) { - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping tx status", skb->dev->name); - /* should be dev_kfree_skb_irq, but due to this function being - * named _irqsafe instead of just _irq we can't be sure that - * people won't call it from non-irq contexts */ - dev_kfree_skb_any(skb); - return; - } - memcpy(saved, status, sizeof(struct ieee80211_tx_status)); - /* copy pointer to saved status into skb->cb for use by tasklet */ - memcpy(skb->cb, &saved, sizeof(saved)); - skb->pkt_type = IEEE80211_TX_STATUS_MSG; - skb_queue_tail(status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS ? + skb_queue_tail(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS ? &local->skb_queue : &local->skb_queue_unreliable, skb); tmp = skb_queue_len(&local->skb_queue) + skb_queue_len(&local->skb_queue_unreliable); while (tmp > IEEE80211_IRQSAFE_QUEUE_LIMIT && (skb = skb_dequeue(&local->skb_queue_unreliable))) { - memcpy(&saved, skb->cb, sizeof(saved)); - kfree(saved); dev_kfree_skb_irq(skb); tmp--; I802_DEBUG_INC(local->tx_status_drop); @@ -1192,7 +1193,6 @@ static void ieee80211_tasklet_handler(unsigned long data) struct ieee80211_local *local = (struct ieee80211_local *) data; struct sk_buff *skb; struct ieee80211_rx_status rx_status; - struct ieee80211_tx_status *tx_status; struct ieee80211_ra_tid *ra_tid; while ((skb = skb_dequeue(&local->skb_queue)) || @@ -1207,12 +1207,8 @@ static void ieee80211_tasklet_handler(unsigned long data) __ieee80211_rx(local_to_hw(local), skb, &rx_status); break; case IEEE80211_TX_STATUS_MSG: - /* get pointer to saved status out of skb->cb */ - memcpy(&tx_status, skb->cb, sizeof(tx_status)); skb->pkt_type = 0; - ieee80211_tx_status(local_to_hw(local), - skb, tx_status); - kfree(tx_status); + ieee80211_tx_status(local_to_hw(local), skb); break; case IEEE80211_DELBA_MSG: ra_tid = (struct ieee80211_ra_tid *) &skb->cb; @@ -1226,9 +1222,8 @@ static void ieee80211_tasklet_handler(unsigned long data) ra_tid->ra, ra_tid->tid); dev_kfree_skb(skb); break ; - default: /* should never get here! */ - printk(KERN_ERR "%s: Unknown message type (%d)\n", - wiphy_name(local->hw.wiphy), skb->pkt_type); + default: + WARN_ON(1); dev_kfree_skb(skb); break; } @@ -1241,24 +1236,15 @@ static void ieee80211_tasklet_handler(unsigned long data) * Also, tx_packet_data in cb is restored from tx_control. */ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, struct ieee80211_key *key, - struct sk_buff *skb, - struct ieee80211_tx_control *control) + struct sk_buff *skb) { int hdrlen, iv_len, mic_len; - struct ieee80211_tx_packet_data *pkt_data; - - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - pkt_data->ifindex = vif_to_sdata(control->vif)->dev->ifindex; - pkt_data->flags = 0; - if (control->flags & IEEE80211_TXCTL_REQ_TX_STATUS) - pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS; - if (control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT) - pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; - if (control->flags & IEEE80211_TXCTL_REQUEUE) - pkt_data->flags |= IEEE80211_TXPD_REQUEUE; - if (control->flags & IEEE80211_TXCTL_EAPOL_FRAME) - pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME; - pkt_data->queue = control->queue; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + info->flags &= IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_CTL_DO_NOT_ENCRYPT | + IEEE80211_TX_CTL_REQUEUE | + IEEE80211_TX_CTL_EAPOL_FRAME; hdrlen = ieee80211_get_hdrlen_from_skb(skb); @@ -1305,17 +1291,18 @@ no_key: static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, struct sta_info *sta, - struct sk_buff *skb, - struct ieee80211_tx_status *status) + struct sk_buff *skb) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + sta->tx_filtered_count++; /* * Clear the TX filter mask for this STA when sending the next * packet. If the STA went to power save mode, this will happen - * happen when it wakes up for the next time. + * when it wakes up for the next time. */ - sta->flags |= WLAN_STA_CLEAR_PS_FILT; + set_sta_flags(sta, WLAN_STA_CLEAR_PS_FILT); /* * This code races in the following way: @@ -1347,84 +1334,89 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, * can be unknown, for example with different interrupt status * bits. */ - if (sta->flags & WLAN_STA_PS && + if (test_sta_flags(sta, WLAN_STA_PS) && skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { - ieee80211_remove_tx_extra(local, sta->key, skb, - &status->control); + ieee80211_remove_tx_extra(local, sta->key, skb); skb_queue_tail(&sta->tx_filtered, skb); return; } - if (!(sta->flags & WLAN_STA_PS) && - !(status->control.flags & IEEE80211_TXCTL_REQUEUE)) { + if (!test_sta_flags(sta, WLAN_STA_PS) && + !(info->flags & IEEE80211_TX_CTL_REQUEUE)) { /* Software retry the packet once */ - status->control.flags |= IEEE80211_TXCTL_REQUEUE; - ieee80211_remove_tx_extra(local, sta->key, skb, - &status->control); + info->flags |= IEEE80211_TX_CTL_REQUEUE; + ieee80211_remove_tx_extra(local, sta->key, skb); dev_queue_xmit(skb); return; } +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "%s: dropped TX filtered frame, " "queue_len=%d PS=%d @%lu\n", wiphy_name(local->hw.wiphy), skb_queue_len(&sta->tx_filtered), - !!(sta->flags & WLAN_STA_PS), jiffies); + !!test_sta_flags(sta, WLAN_STA_PS), jiffies); +#endif dev_kfree_skb(skb); } -void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, - struct ieee80211_tx_status *status) +void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) { struct sk_buff *skb2; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); u16 frag, type; + __le16 fc; struct ieee80211_tx_status_rtap_hdr *rthdr; struct ieee80211_sub_if_data *sdata; struct net_device *prev_dev = NULL; - - if (!status) { - printk(KERN_ERR - "%s: ieee80211_tx_status called with NULL status\n", - wiphy_name(local->hw.wiphy)); - dev_kfree_skb(skb); - return; - } + struct sta_info *sta; rcu_read_lock(); - if (status->excessive_retries) { - struct sta_info *sta; + if (info->status.excessive_retries) { sta = sta_info_get(local, hdr->addr1); if (sta) { - if (sta->flags & WLAN_STA_PS) { + if (test_sta_flags(sta, WLAN_STA_PS)) { /* * The STA is in power save mode, so assume * that this TX packet failed because of that. */ - status->excessive_retries = 0; - status->flags |= IEEE80211_TX_STATUS_TX_FILTERED; - ieee80211_handle_filtered_frame(local, sta, - skb, status); + ieee80211_handle_filtered_frame(local, sta, skb); rcu_read_unlock(); return; } } } - if (status->flags & IEEE80211_TX_STATUS_TX_FILTERED) { - struct sta_info *sta; + fc = hdr->frame_control; + + if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && + (ieee80211_is_data_qos(fc))) { + u16 tid, ssn; + u8 *qc; + sta = sta_info_get(local, hdr->addr1); + if (sta) { + qc = ieee80211_get_qos_ctl(hdr); + tid = qc[0] & 0xf; + ssn = ((le16_to_cpu(hdr->seq_ctrl) + 0x10) + & IEEE80211_SCTL_SEQ); + ieee80211_send_bar(sta->sdata->dev, hdr->addr1, + tid, ssn); + } + } + + if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) { sta = sta_info_get(local, hdr->addr1); if (sta) { - ieee80211_handle_filtered_frame(local, sta, skb, - status); + ieee80211_handle_filtered_frame(local, sta, skb); rcu_read_unlock(); return; } } else - rate_control_tx_status(local->mdev, skb, status); + rate_control_tx_status(local->mdev, skb); rcu_read_unlock(); @@ -1438,14 +1430,14 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, frag = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG; type = le16_to_cpu(hdr->frame_control) & IEEE80211_FCTL_FTYPE; - if (status->flags & IEEE80211_TX_STATUS_ACK) { + if (info->flags & IEEE80211_TX_STAT_ACK) { if (frag == 0) { local->dot11TransmittedFrameCount++; if (is_multicast_ether_addr(hdr->addr1)) local->dot11MulticastTransmittedFrameCount++; - if (status->retry_count > 0) + if (info->status.retry_count > 0) local->dot11RetryCount++; - if (status->retry_count > 1) + if (info->status.retry_count > 1) local->dot11MultipleRetryCount++; } @@ -1482,7 +1474,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, return; } - rthdr = (struct ieee80211_tx_status_rtap_hdr*) + rthdr = (struct ieee80211_tx_status_rtap_hdr *) skb_push(skb, sizeof(*rthdr)); memset(rthdr, 0, sizeof(*rthdr)); @@ -1491,17 +1483,17 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | (1 << IEEE80211_RADIOTAP_DATA_RETRIES)); - if (!(status->flags & IEEE80211_TX_STATUS_ACK) && + if (!(info->flags & IEEE80211_TX_STAT_ACK) && !is_multicast_ether_addr(hdr->addr1)) rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL); - if ((status->control.flags & IEEE80211_TXCTL_USE_RTS_CTS) && - (status->control.flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) + if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) && + (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS); - else if (status->control.flags & IEEE80211_TXCTL_USE_RTS_CTS) + else if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS); - rthdr->data_retries = status->retry_count; + rthdr->data_retries = info->status.retry_count; /* XXX: is this sufficient for BPF? */ skb_set_mac_header(skb, 0); @@ -1627,7 +1619,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) int result; enum ieee80211_band band; struct net_device *mdev; - struct ieee80211_sub_if_data *sdata; + struct wireless_dev *mwdev; /* * generic code guarantees at least one band, @@ -1651,19 +1643,30 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (result < 0) return result; - /* for now, mdev needs sub_if_data :/ */ - mdev = alloc_netdev(sizeof(struct ieee80211_sub_if_data), - "wmaster%d", ether_setup); + /* + * We use the number of queues for feature tests (QoS, HT) internally + * so restrict them appropriately. + */ + if (hw->queues > IEEE80211_MAX_QUEUES) + hw->queues = IEEE80211_MAX_QUEUES; + if (hw->ampdu_queues > IEEE80211_MAX_AMPDU_QUEUES) + hw->ampdu_queues = IEEE80211_MAX_AMPDU_QUEUES; + if (hw->queues < 4) + hw->ampdu_queues = 0; + + mdev = alloc_netdev_mq(sizeof(struct wireless_dev), + "wmaster%d", ether_setup, + ieee80211_num_queues(hw)); if (!mdev) goto fail_mdev_alloc; - sdata = IEEE80211_DEV_TO_SUB_IF(mdev); - mdev->ieee80211_ptr = &sdata->wdev; - sdata->wdev.wiphy = local->hw.wiphy; + mwdev = netdev_priv(mdev); + mdev->ieee80211_ptr = mwdev; + mwdev->wiphy = local->hw.wiphy; local->mdev = mdev; - ieee80211_rx_bss_list_init(mdev); + ieee80211_rx_bss_list_init(local); mdev->hard_start_xmit = ieee80211_master_start_xmit; mdev->open = ieee80211_master_open; @@ -1672,18 +1675,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) mdev->header_ops = &ieee80211_header_ops; mdev->set_multicast_list = ieee80211_master_set_multicast_list; - sdata->vif.type = IEEE80211_IF_TYPE_AP; - sdata->dev = mdev; - sdata->local = local; - sdata->u.ap.force_unicast_rateidx = -1; - sdata->u.ap.max_ratectrl_rateidx = -1; - ieee80211_if_sdata_init(sdata); - - /* no RCU needed since we're still during init phase */ - list_add_tail(&sdata->list, &local->interfaces); - name = wiphy_dev(local->hw.wiphy)->driver->name; - local->hw.workqueue = create_singlethread_workqueue(name); + local->hw.workqueue = create_freezeable_workqueue(name); if (!local->hw.workqueue) { result = -ENOMEM; goto fail_workqueue; @@ -1699,15 +1692,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) debugfs_hw_add(local); - local->hw.conf.beacon_int = 1000; + if (local->hw.conf.beacon_int < 10) + local->hw.conf.beacon_int = 100; - local->wstats_flags |= local->hw.max_rssi ? - IW_QUAL_LEVEL_UPDATED : IW_QUAL_LEVEL_INVALID; - local->wstats_flags |= local->hw.max_signal ? + local->wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC | + IEEE80211_HW_SIGNAL_DB | + IEEE80211_HW_SIGNAL_DBM) ? IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID; - local->wstats_flags |= local->hw.max_noise ? + local->wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ? IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID; - if (local->hw.max_rssi < 0 || local->hw.max_noise < 0) + if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) local->wstats_flags |= IW_QUAL_DBM; result = sta_info_start(local); @@ -1726,9 +1720,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (result < 0) goto fail_dev; - ieee80211_debugfs_add_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev)); - ieee80211_if_set_type(local->mdev, IEEE80211_IF_TYPE_AP); - result = ieee80211_init_rate_ctrl_alg(local, hw->rate_control_algorithm); if (result < 0) { @@ -1745,16 +1736,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_wep; } - ieee80211_install_qdisc(local->mdev); + local->mdev->select_queue = ieee80211_select_queue; /* add one default STA interface */ - result = ieee80211_if_add(local->mdev, "wlan%d", NULL, + result = ieee80211_if_add(local, "wlan%d", NULL, IEEE80211_IF_TYPE_STA, NULL); if (result) printk(KERN_WARNING "%s: Failed to add default virtual iface\n", wiphy_name(local->hw.wiphy)); - local->reg_state = IEEE80211_DEV_REGISTERED; rtnl_unlock(); ieee80211_led_init(local); @@ -1764,8 +1754,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) fail_wep: rate_control_deinitialize(local); fail_rate: - ieee80211_debugfs_remove_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev)); unregister_netdevice(local->mdev); + local->mdev = NULL; fail_dev: rtnl_unlock(); sta_info_stop(local); @@ -1773,8 +1763,8 @@ fail_sta_info: debugfs_hw_del(local); destroy_workqueue(local->hw.workqueue); fail_workqueue: - ieee80211_if_free(local->mdev); - local->mdev = NULL; + if (local->mdev) + free_netdev(local->mdev); fail_mdev_alloc: wiphy_unregister(local->hw.wiphy); return result; @@ -1784,42 +1774,27 @@ EXPORT_SYMBOL(ieee80211_register_hw); void ieee80211_unregister_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata, *tmp; tasklet_kill(&local->tx_pending_tasklet); tasklet_kill(&local->tasklet); rtnl_lock(); - BUG_ON(local->reg_state != IEEE80211_DEV_REGISTERED); - - local->reg_state = IEEE80211_DEV_UNREGISTERED; - /* * At this point, interface list manipulations are fine * because the driver cannot be handing us frames any * more and the tasklet is killed. */ - /* - * First, we remove all non-master interfaces. Do this because they - * may have bss pointer dependency on the master, and when we free - * the master these would be freed as well, breaking our list - * iteration completely. - */ - list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { - if (sdata->dev == local->mdev) - continue; - list_del(&sdata->list); - __ieee80211_if_del(local, sdata); - } + /* First, we remove all virtual interfaces. */ + ieee80211_remove_interfaces(local); /* then, finally, remove the master interface */ - __ieee80211_if_del(local, IEEE80211_DEV_TO_SUB_IF(local->mdev)); + unregister_netdevice(local->mdev); rtnl_unlock(); - ieee80211_rx_bss_list_deinit(local->mdev); + ieee80211_rx_bss_list_deinit(local); ieee80211_clear_tx_pending(local); sta_info_stop(local); rate_control_deinitialize(local); @@ -1836,8 +1811,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) wiphy_unregister(local->hw.wiphy); ieee80211_wep_free(local); ieee80211_led_exit(local); - ieee80211_if_free(local->mdev); - local->mdev = NULL; + free_netdev(local->mdev); } EXPORT_SYMBOL(ieee80211_unregister_hw); @@ -1854,27 +1828,17 @@ static int __init ieee80211_init(void) struct sk_buff *skb; int ret; - BUILD_BUG_ON(sizeof(struct ieee80211_tx_packet_data) > sizeof(skb->cb)); + BUILD_BUG_ON(sizeof(struct ieee80211_tx_info) > sizeof(skb->cb)); + BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, driver_data) + + IEEE80211_TX_INFO_DRIVER_DATA_SIZE > sizeof(skb->cb)); ret = rc80211_pid_init(); if (ret) - goto out; - - ret = ieee80211_wme_register(); - if (ret) { - printk(KERN_DEBUG "ieee80211_init: failed to " - "initialize WME (err=%d)\n", ret); - goto out_cleanup_pid; - } + return ret; ieee80211_debugfs_netdev_init(); return 0; - - out_cleanup_pid: - rc80211_pid_exit(); - out: - return ret; } static void __exit ieee80211_exit(void) @@ -1890,7 +1854,6 @@ static void __exit ieee80211_exit(void) if (mesh_allocated) ieee80211s_stop(); - ieee80211_wme_unregister(); ieee80211_debugfs_netdev_exit(); } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index f76bc26ae4d2..b5933b271491 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -315,6 +315,13 @@ struct mesh_table *mesh_table_alloc(int size_order) return newtbl; } +static void __mesh_table_free(struct mesh_table *tbl) +{ + kfree(tbl->hash_buckets); + kfree(tbl->hashwlock); + kfree(tbl); +} + void mesh_table_free(struct mesh_table *tbl, bool free_leafs) { struct hlist_head *mesh_hash; @@ -330,9 +337,7 @@ void mesh_table_free(struct mesh_table *tbl, bool free_leafs) } spin_unlock(&tbl->hashwlock[i]); } - kfree(tbl->hash_buckets); - kfree(tbl->hashwlock); - kfree(tbl); + __mesh_table_free(tbl); } static void ieee80211_mesh_path_timer(unsigned long data) @@ -349,21 +354,16 @@ struct mesh_table *mesh_table_grow(struct mesh_table *tbl) { struct mesh_table *newtbl; struct hlist_head *oldhash; - struct hlist_node *p; - int err = 0; + struct hlist_node *p, *q; int i; if (atomic_read(&tbl->entries) - < tbl->mean_chain_len * (tbl->hash_mask + 1)) { - err = -EPERM; + < tbl->mean_chain_len * (tbl->hash_mask + 1)) goto endgrow; - } newtbl = mesh_table_alloc(tbl->size_order + 1); - if (!newtbl) { - err = -ENOMEM; + if (!newtbl) goto endgrow; - } newtbl->free_node = tbl->free_node; newtbl->mean_chain_len = tbl->mean_chain_len; @@ -373,13 +373,19 @@ struct mesh_table *mesh_table_grow(struct mesh_table *tbl) oldhash = tbl->hash_buckets; for (i = 0; i <= tbl->hash_mask; i++) hlist_for_each(p, &oldhash[i]) - tbl->copy_node(p, newtbl); + if (tbl->copy_node(p, newtbl) < 0) + goto errcopy; + return newtbl; + +errcopy: + for (i = 0; i <= newtbl->hash_mask; i++) { + hlist_for_each_safe(p, q, &newtbl->hash_buckets[i]) + tbl->free_node(p, 0); + } + __mesh_table_free(tbl); endgrow: - if (err) - return NULL; - else - return newtbl; + return NULL; } /** @@ -397,7 +403,7 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, put_unaligned(cpu_to_le32(sdata->u.sta.mesh_seqnum), &meshhdr->seqnum); sdata->u.sta.mesh_seqnum++; - return 5; + return 6; } void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 2e161f6d8288..669eafafe497 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -109,7 +109,7 @@ struct mesh_table { __u32 hash_rnd; /* Used for hash generation */ atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */ void (*free_node) (struct hlist_node *p, bool free_leafs); - void (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl); + int (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl); int size_order; int mean_chain_len; }; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 3df809222d1c..7fa149e230e6 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -26,7 +26,7 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; - return le32_to_cpu(get_unaligned((__le32 *) (preq_elem + offset))); + return get_unaligned_le32(preq_elem + offset); } /* HWMP IE processing macros */ @@ -120,7 +120,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, *pos++ = WLAN_EID_PREP; break; default: - kfree(skb); + kfree_skb(skb); return -ENOTSUPP; break; } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 5845dc21ce85..5f88a2e6ee50 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -158,19 +158,20 @@ int mesh_path_add(u8 *dst, struct net_device *dev) if (atomic_add_unless(&sdata->u.sta.mpaths, 1, MESH_MAX_MPATHS) == 0) return -ENOSPC; - read_lock(&pathtbl_resize_lock); - + err = -ENOMEM; new_mpath = kzalloc(sizeof(struct mesh_path), GFP_KERNEL); - if (!new_mpath) { - atomic_dec(&sdata->u.sta.mpaths); - err = -ENOMEM; - goto endadd2; - } + if (!new_mpath) + goto err_path_alloc; + + new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL); + if (!new_node) + goto err_node_alloc; + + read_lock(&pathtbl_resize_lock); memcpy(new_mpath->dst, dst, ETH_ALEN); new_mpath->dev = dev; new_mpath->flags = 0; skb_queue_head_init(&new_mpath->frame_queue); - new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL); new_node->mpath = new_mpath; new_mpath->timer.data = (unsigned long) new_mpath; new_mpath->timer.function = mesh_path_timer; @@ -183,16 +184,11 @@ int mesh_path_add(u8 *dst, struct net_device *dev) spin_lock(&mesh_paths->hashwlock[hash_idx]); + err = -EEXIST; hlist_for_each_entry(node, n, bucket, list) { mpath = node->mpath; - if (mpath->dev == dev && memcmp(dst, mpath->dst, ETH_ALEN) - == 0) { - err = -EEXIST; - atomic_dec(&sdata->u.sta.mpaths); - kfree(new_node); - kfree(new_mpath); - goto endadd; - } + if (mpath->dev == dev && memcmp(dst, mpath->dst, ETH_ALEN) == 0) + goto err_exists; } hlist_add_head_rcu(&new_node->list, bucket); @@ -200,11 +196,9 @@ int mesh_path_add(u8 *dst, struct net_device *dev) mesh_paths->mean_chain_len * (mesh_paths->hash_mask + 1)) grow = 1; -endadd: spin_unlock(&mesh_paths->hashwlock[hash_idx]); -endadd2: read_unlock(&pathtbl_resize_lock); - if (!err && grow) { + if (grow) { struct mesh_table *oldtbl, *newtbl; write_lock(&pathtbl_resize_lock); @@ -212,13 +206,24 @@ endadd2: newtbl = mesh_table_grow(mesh_paths); if (!newtbl) { write_unlock(&pathtbl_resize_lock); - return -ENOMEM; + return 0; } rcu_assign_pointer(mesh_paths, newtbl); + write_unlock(&pathtbl_resize_lock); + synchronize_rcu(); mesh_table_free(oldtbl, false); - write_unlock(&pathtbl_resize_lock); } + return 0; + +err_exists: + spin_unlock(&mesh_paths->hashwlock[hash_idx]); + read_unlock(&pathtbl_resize_lock); + kfree(new_node); +err_node_alloc: + kfree(new_mpath); +err_path_alloc: + atomic_dec(&sdata->u.sta.mpaths); return err; } @@ -257,7 +262,6 @@ void mesh_plink_broken(struct sta_info *sta) } rcu_read_unlock(); } -EXPORT_SYMBOL(mesh_plink_broken); /** * mesh_path_flush_by_nexthop - Deletes mesh paths if their next hop matches @@ -453,25 +457,28 @@ static void mesh_path_node_free(struct hlist_node *p, bool free_leafs) struct mpath_node *node = hlist_entry(p, struct mpath_node, list); mpath = node->mpath; hlist_del_rcu(p); - synchronize_rcu(); if (free_leafs) kfree(mpath); kfree(node); } -static void mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl) +static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl) { struct mesh_path *mpath; struct mpath_node *node, *new_node; u32 hash_idx; + new_node = kmalloc(sizeof(struct mpath_node), GFP_ATOMIC); + if (new_node == NULL) + return -ENOMEM; + node = hlist_entry(p, struct mpath_node, list); mpath = node->mpath; - new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL); new_node->mpath = mpath; hash_idx = mesh_table_hash(mpath->dst, mpath->dev, newtbl); hlist_add_head(&new_node->list, &newtbl->hash_buckets[hash_idx]); + return 0; } int mesh_pathtbl_init(void) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 37f0c2b94ae7..9efeb1f07025 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -79,7 +79,7 @@ void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) * * @sta: mes peer link to restart * - * Locking: this function must be called holding sta->plink_lock + * Locking: this function must be called holding sta->lock */ static inline void mesh_plink_fsm_restart(struct sta_info *sta) { @@ -105,7 +105,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, if (!sta) return NULL; - sta->flags |= WLAN_STA_AUTHORIZED; + sta->flags = WLAN_STA_AUTHORIZED; sta->supp_rates[local->hw.conf.channel->band] = rates; return sta; @@ -118,7 +118,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, * * All mesh paths with this peer as next hop will be flushed * - * Locking: the caller must hold sta->plink_lock + * Locking: the caller must hold sta->lock */ static void __mesh_plink_deactivate(struct sta_info *sta) { @@ -139,9 +139,9 @@ static void __mesh_plink_deactivate(struct sta_info *sta) */ void mesh_plink_deactivate(struct sta_info *sta) { - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); __mesh_plink_deactivate(sta); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); } static int mesh_plink_frame_tx(struct net_device *dev, @@ -270,10 +270,10 @@ static void mesh_plink_timer(unsigned long data) */ sta = (struct sta_info *) data; - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); if (sta->ignore_plink_timer) { sta->ignore_plink_timer = false; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); return; } mpl_dbg("Mesh plink timer for %s fired on state %d\n", @@ -298,7 +298,7 @@ static void mesh_plink_timer(unsigned long data) rand % sta->plink_timeout; ++sta->plink_retries; mod_plink_timer(sta, sta->plink_timeout); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(dev, PLINK_OPEN, sta->addr, llid, 0, 0); break; @@ -311,7 +311,7 @@ static void mesh_plink_timer(unsigned long data) reason = cpu_to_le16(MESH_CONFIRM_TIMEOUT); sta->plink_state = PLINK_HOLDING; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, plid, reason); break; @@ -319,10 +319,10 @@ static void mesh_plink_timer(unsigned long data) /* holding timer */ del_timer(&sta->plink_timer); mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } } @@ -344,16 +344,16 @@ int mesh_plink_open(struct sta_info *sta) DECLARE_MAC_BUF(mac); #endif - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); get_random_bytes(&llid, 2); sta->llid = llid; if (sta->plink_state != PLINK_LISTEN) { - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); return -EBUSY; } sta->plink_state = PLINK_OPN_SNT; mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mpl_dbg("Mesh plink: starting establishment with %s\n", print_mac(mac, sta->addr)); @@ -367,10 +367,10 @@ void mesh_plink_block(struct sta_info *sta) DECLARE_MAC_BUF(mac); #endif - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); __mesh_plink_deactivate(sta); sta->plink_state = PLINK_BLOCKED; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); } int mesh_plink_close(struct sta_info *sta) @@ -383,14 +383,14 @@ int mesh_plink_close(struct sta_info *sta) mpl_dbg("Mesh plink: closing link with %s\n", print_mac(mac, sta->addr)); - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); sta->reason = cpu_to_le16(MESH_LINK_CANCELLED); reason = sta->reason; if (sta->plink_state == PLINK_LISTEN || sta->plink_state == PLINK_BLOCKED) { mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); return 0; } else if (sta->plink_state == PLINK_ESTAB) { __mesh_plink_deactivate(sta); @@ -402,7 +402,7 @@ int mesh_plink_close(struct sta_info *sta) sta->plink_state = PLINK_HOLDING; llid = sta->llid; plid = sta->plid; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(sta->sdata->dev, PLINK_CLOSE, sta->addr, llid, plid, reason); return 0; @@ -490,7 +490,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, /* avoid warning */ break; } - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); } else if (!sta) { /* ftype == PLINK_OPEN */ u64 rates; @@ -512,9 +512,9 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, return; } event = OPN_ACPT; - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); } else { - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); switch (ftype) { case PLINK_OPEN: if (!mesh_plink_free_count(sdata) || @@ -551,7 +551,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, break; default: mpl_dbg("Mesh plink: unknown frame subtype\n"); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); rcu_read_unlock(); return; } @@ -568,7 +568,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, switch (event) { case CLS_ACPT: mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; case OPN_ACPT: sta->plink_state = PLINK_OPN_RCVD; @@ -576,14 +576,14 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, get_random_bytes(&llid, 2); sta->llid = llid; mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(dev, PLINK_OPEN, sta->addr, llid, 0, 0); mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, plid, 0); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } break; @@ -603,7 +603,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, sta->ignore_plink_timer = true; llid = sta->llid; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, plid, reason); break; @@ -612,7 +612,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, sta->plink_state = PLINK_OPN_RCVD; sta->plid = plid; llid = sta->llid; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, plid, 0); break; @@ -622,10 +622,10 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, dot11MeshConfirmTimeout(sdata))) sta->ignore_plink_timer = true; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } break; @@ -645,13 +645,13 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, sta->ignore_plink_timer = true; llid = sta->llid; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, plid, reason); break; case OPN_ACPT: llid = sta->llid; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, plid, 0); break; @@ -659,12 +659,12 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, del_timer(&sta->plink_timer); sta->plink_state = PLINK_ESTAB; mesh_plink_inc_estab_count(sdata); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mpl_dbg("Mesh plink with %s ESTABLISHED\n", print_mac(mac, sta->addr)); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } break; @@ -684,7 +684,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, sta->ignore_plink_timer = true; llid = sta->llid; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, plid, reason); break; @@ -692,14 +692,14 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, del_timer(&sta->plink_timer); sta->plink_state = PLINK_ESTAB; mesh_plink_inc_estab_count(sdata); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mpl_dbg("Mesh plink with %s ESTABLISHED\n", print_mac(mac, sta->addr)); mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, plid, 0); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } break; @@ -713,18 +713,18 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, sta->plink_state = PLINK_HOLDING; llid = sta->llid; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, plid, reason); break; case OPN_ACPT: llid = sta->llid; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, plid, 0); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } break; @@ -734,7 +734,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, if (del_timer(&sta->plink_timer)) sta->ignore_plink_timer = 1; mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; case OPN_ACPT: case CNF_ACPT: @@ -742,19 +742,19 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, case CNF_RJCT: llid = sta->llid; reason = sta->reason; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, plid, reason); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); } break; default: /* should not get here, PLINK_BLOCKED is dealt with at the * beggining of the function */ - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c index 0f844f7895f1..408649bd4702 100644 --- a/net/mac80211/michael.c +++ b/net/mac80211/michael.c @@ -6,85 +6,68 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - #include <linux/types.h> +#include <linux/bitops.h> +#include <linux/ieee80211.h> +#include <asm/unaligned.h> #include "michael.h" -static inline u32 rotr(u32 val, int bits) -{ - return (val >> bits) | (val << (32 - bits)); -} - - -static inline u32 rotl(u32 val, int bits) -{ - return (val << bits) | (val >> (32 - bits)); -} - - -static inline u32 xswap(u32 val) -{ - return ((val & 0xff00ff00) >> 8) | ((val & 0x00ff00ff) << 8); -} - - -#define michael_block(l, r) \ -do { \ - r ^= rotl(l, 17); \ - l += r; \ - r ^= xswap(l); \ - l += r; \ - r ^= rotl(l, 3); \ - l += r; \ - r ^= rotr(l, 2); \ - l += r; \ -} while (0) - - -static inline u32 michael_get32(u8 *data) +static void michael_block(struct michael_mic_ctx *mctx, u32 val) { - return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); + mctx->l ^= val; + mctx->r ^= rol32(mctx->l, 17); + mctx->l += mctx->r; + mctx->r ^= ((mctx->l & 0xff00ff00) >> 8) | + ((mctx->l & 0x00ff00ff) << 8); + mctx->l += mctx->r; + mctx->r ^= rol32(mctx->l, 3); + mctx->l += mctx->r; + mctx->r ^= ror32(mctx->l, 2); + mctx->l += mctx->r; } - -static inline void michael_put32(u32 val, u8 *data) +static void michael_mic_hdr(struct michael_mic_ctx *mctx, const u8 *key, + struct ieee80211_hdr *hdr) { - data[0] = val & 0xff; - data[1] = (val >> 8) & 0xff; - data[2] = (val >> 16) & 0xff; - data[3] = (val >> 24) & 0xff; + u8 *da, *sa, tid; + + da = ieee80211_get_DA(hdr); + sa = ieee80211_get_SA(hdr); + if (ieee80211_is_data_qos(hdr->frame_control)) + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + else + tid = 0; + + mctx->l = get_unaligned_le32(key); + mctx->r = get_unaligned_le32(key + 4); + + /* + * A pseudo header (DA, SA, Priority, 0, 0, 0) is used in Michael MIC + * calculation, but it is _not_ transmitted + */ + michael_block(mctx, get_unaligned_le32(da)); + michael_block(mctx, get_unaligned_le16(&da[4]) | + (get_unaligned_le16(sa) << 16)); + michael_block(mctx, get_unaligned_le32(&sa[2])); + michael_block(mctx, tid); } - -void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority, - u8 *data, size_t data_len, u8 *mic) +void michael_mic(const u8 *key, struct ieee80211_hdr *hdr, + const u8 *data, size_t data_len, u8 *mic) { - u32 l, r, val; + u32 val; size_t block, blocks, left; + struct michael_mic_ctx mctx; - l = michael_get32(key); - r = michael_get32(key + 4); - - /* A pseudo header (DA, SA, Priority, 0, 0, 0) is used in Michael MIC - * calculation, but it is _not_ transmitted */ - l ^= michael_get32(da); - michael_block(l, r); - l ^= da[4] | (da[5] << 8) | (sa[0] << 16) | (sa[1] << 24); - michael_block(l, r); - l ^= michael_get32(&sa[2]); - michael_block(l, r); - l ^= priority; - michael_block(l, r); + michael_mic_hdr(&mctx, key, hdr); /* Real data */ blocks = data_len / 4; left = data_len % 4; - for (block = 0; block < blocks; block++) { - l ^= michael_get32(&data[block * 4]); - michael_block(l, r); - } + for (block = 0; block < blocks; block++) + michael_block(&mctx, get_unaligned_le32(&data[block * 4])); /* Partial block of 0..3 bytes and padding: 0x5a + 4..7 zeros to make * total length a multiple of 4. */ @@ -94,11 +77,10 @@ void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority, left--; val |= data[blocks * 4 + left]; } - l ^= val; - michael_block(l, r); - /* last block is zero, so l ^ 0 = l */ - michael_block(l, r); - michael_put32(l, mic); - michael_put32(r, mic + 4); + michael_block(&mctx, val); + michael_block(&mctx, 0); + + put_unaligned_le32(mctx.l, mic); + put_unaligned_le32(mctx.r, mic + 4); } diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h index 2e6aebabeea1..3b848dad9587 100644 --- a/net/mac80211/michael.h +++ b/net/mac80211/michael.h @@ -14,7 +14,11 @@ #define MICHAEL_MIC_LEN 8 -void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority, - u8 *data, size_t data_len, u8 *mic); +struct michael_mic_ctx { + u32 l, r; +}; + +void michael_mic(const u8 *key, struct ieee80211_hdr *hdr, + const u8 *data, size_t data_len, u8 *mic); #endif /* MICHAEL_H */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a5e5c31c23ab..d7c371e36bf0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -44,7 +44,7 @@ #define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) #define IEEE80211_SCAN_INTERVAL (2 * HZ) #define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ) -#define IEEE80211_IBSS_JOIN_TIMEOUT (20 * HZ) +#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) #define IEEE80211_PROBE_DELAY (HZ / 33) #define IEEE80211_CHANNEL_TIME (HZ / 33) @@ -78,7 +78,7 @@ static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst, static struct ieee80211_sta_bss * ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int freq, u8 *ssid, u8 ssid_len); -static void ieee80211_rx_bss_put(struct net_device *dev, +static void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_sta_bss *bss); static int ieee80211_sta_find_ibss(struct net_device *dev, struct ieee80211_if_sta *ifsta); @@ -87,6 +87,7 @@ static int ieee80211_sta_start_scan(struct net_device *dev, u8 *ssid, size_t ssid_len); static int ieee80211_sta_config_auth(struct net_device *dev, struct ieee80211_if_sta *ifsta); +static void sta_rx_agg_session_timer_expired(unsigned long data); void ieee802_11_parse_elems(u8 *start, size_t len, @@ -203,6 +204,25 @@ void ieee802_11_parse_elems(u8 *start, size_t len, elems->perr = pos; elems->perr_len = elen; break; + case WLAN_EID_CHANNEL_SWITCH: + elems->ch_switch_elem = pos; + elems->ch_switch_elem_len = elen; + break; + case WLAN_EID_QUIET: + if (!elems->quiet_elem) { + elems->quiet_elem = pos; + elems->quiet_elem_len = elen; + } + elems->num_of_quiet_elem++; + break; + case WLAN_EID_COUNTRY: + elems->country_elem = pos; + elems->country_elem_len = elen; + break; + case WLAN_EID_PWR_CONSTRAINT: + elems->pwr_constr_elem = pos; + elems->pwr_constr_elem_len = elen; + break; default: break; } @@ -256,19 +276,8 @@ static void ieee80211_sta_def_wmm_params(struct net_device *dev, qparam.cw_max = 1023; qparam.txop = 0; - for (i = IEEE80211_TX_QUEUE_DATA0; i < NUM_TX_DATA_QUEUES; i++) - local->ops->conf_tx(local_to_hw(local), - i + IEEE80211_TX_QUEUE_DATA0, - &qparam); - - if (ibss) { - /* IBSS uses different parameters for Beacon sending */ - qparam.cw_min++; - qparam.cw_min *= 2; - qparam.cw_min--; - local->ops->conf_tx(local_to_hw(local), - IEEE80211_TX_QUEUE_BEACON, &qparam); - } + for (i = 0; i < local_to_hw(local)->queues; i++) + local->ops->conf_tx(local_to_hw(local), i, &qparam); } } @@ -282,6 +291,12 @@ static void ieee80211_sta_wmm_params(struct net_device *dev, int count; u8 *pos; + if (!(ifsta->flags & IEEE80211_STA_WMM_ENABLED)) + return; + + if (!wmm_param) + return; + if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1) return; count = wmm_param[6] & 0x0f; @@ -305,37 +320,33 @@ static void ieee80211_sta_wmm_params(struct net_device *dev, switch (aci) { case 1: - queue = IEEE80211_TX_QUEUE_DATA3; - if (acm) { + queue = 3; + if (acm) local->wmm_acm |= BIT(0) | BIT(3); - } break; case 2: - queue = IEEE80211_TX_QUEUE_DATA1; - if (acm) { + queue = 1; + if (acm) local->wmm_acm |= BIT(4) | BIT(5); - } break; case 3: - queue = IEEE80211_TX_QUEUE_DATA0; - if (acm) { + queue = 0; + if (acm) local->wmm_acm |= BIT(6) | BIT(7); - } break; case 0: default: - queue = IEEE80211_TX_QUEUE_DATA2; - if (acm) { + queue = 2; + if (acm) local->wmm_acm |= BIT(1) | BIT(2); - } break; } params.aifs = pos[0] & 0x0f; params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4); params.cw_min = ecw2cw(pos[1] & 0x0f); - params.txop = pos[2] | (pos[3] << 8); -#ifdef CONFIG_MAC80211_DEBUG + params.txop = get_unaligned_le16(pos + 2); +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d " "cWmin=%d cWmax=%d txop=%d\n", dev->name, queue, aci, acm, params.aifs, params.cw_min, @@ -355,11 +366,14 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, bool use_short_preamble) { struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf; +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG struct ieee80211_if_sta *ifsta = &sdata->u.sta; DECLARE_MAC_BUF(mac); +#endif u32 changed = 0; if (use_protection != bss_conf->use_cts_prot) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" "%s)\n", @@ -367,11 +381,13 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, use_protection ? "enabled" : "disabled", print_mac(mac, ifsta->bssid)); } +#endif bss_conf->use_cts_prot = use_protection; changed |= BSS_CHANGED_ERP_CTS_PROT; } if (use_short_preamble != bss_conf->use_short_preamble) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "%s: switched to %s barker preamble" " (BSSID=%s)\n", @@ -379,6 +395,7 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, use_short_preamble ? "short" : "long", print_mac(mac, ifsta->bssid)); } +#endif bss_conf->use_short_preamble = use_short_preamble; changed |= BSS_CHANGED_ERP_PREAMBLE; } @@ -537,7 +554,7 @@ static void ieee80211_set_associated(struct net_device *dev, changed |= ieee80211_handle_bss_capability(sdata, bss); - ieee80211_rx_bss_put(dev, bss); + ieee80211_rx_bss_put(local, bss); } if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { @@ -547,16 +564,15 @@ static void ieee80211_set_associated(struct net_device *dev, sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; } - netif_carrier_on(dev); ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); ieee80211_sta_send_associnfo(dev, ifsta); } else { + netif_carrier_off(dev); ieee80211_sta_tear_down_BA_sessions(dev, ifsta->bssid); ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; - netif_carrier_off(dev); - ieee80211_reset_erp_info(dev); + changed |= ieee80211_reset_erp_info(dev); sdata->bss_conf.assoc_ht = 0; sdata->bss_conf.ht_conf = NULL; @@ -569,6 +585,10 @@ static void ieee80211_set_associated(struct net_device *dev, sdata->bss_conf.assoc = assoc; ieee80211_bss_info_change_notify(sdata, changed); + + if (assoc) + netif_carrier_on(dev); + wrqu.ap_addr.sa_family = ARPHRD_ETHER; wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); } @@ -586,7 +606,7 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb, int encrypt) { struct ieee80211_sub_if_data *sdata; - struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_tx_info *info; sdata = IEEE80211_DEV_TO_SUB_IF(dev); skb->dev = sdata->local->mdev; @@ -594,11 +614,11 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb, skb_set_network_header(skb, 0); skb_set_transport_header(skb, 0); - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; - memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = sdata->dev->ifindex; + info = IEEE80211_SKB_CB(skb); + memset(info, 0, sizeof(struct ieee80211_tx_info)); + info->control.ifindex = sdata->dev->ifindex; if (!encrypt) - pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; + info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; dev_queue_xmit(skb); } @@ -665,6 +685,26 @@ static void ieee80211_authenticate(struct net_device *dev, mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); } +static int ieee80211_compatible_rates(struct ieee80211_sta_bss *bss, + struct ieee80211_supported_band *sband, + u64 *rates) +{ + int i, j, count; + *rates = 0; + count = 0; + for (i = 0; i < bss->supp_rates_len; i++) { + int rate = (bss->supp_rates[i] & 0x7F) * 5; + + for (j = 0; j < sband->n_bitrates; j++) + if (sband->bitrates[j].bitrate == rate) { + *rates |= BIT(j); + count++; + break; + } + } + + return count; +} static void ieee80211_send_assoc(struct net_device *dev, struct ieee80211_if_sta *ifsta) @@ -673,11 +713,12 @@ static void ieee80211_send_assoc(struct net_device *dev, struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u8 *pos, *ies; - int i, len; + int i, len, count, rates_len, supp_rates_len; u16 capab; struct ieee80211_sta_bss *bss; int wmm = 0; struct ieee80211_supported_band *sband; + u64 rates = 0; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 + ifsta->extra_ie_len + @@ -706,10 +747,23 @@ static void ieee80211_send_assoc(struct net_device *dev, if (bss) { if (bss->capability & WLAN_CAPABILITY_PRIVACY) capab |= WLAN_CAPABILITY_PRIVACY; - if (bss->wmm_ie) { + if (bss->wmm_ie) wmm = 1; - } - ieee80211_rx_bss_put(dev, bss); + + /* get all rates supported by the device and the AP as + * some APs don't like getting a superset of their rates + * in the association request (e.g. D-Link DAP 1353 in + * b-only mode) */ + rates_len = ieee80211_compatible_rates(bss, sband, &rates); + + if ((bss->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) && + (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT)) + capab |= WLAN_CAPABILITY_SPECTRUM_MGMT; + + ieee80211_rx_bss_put(local, bss); + } else { + rates = ~0; + rates_len = sband->n_bitrates; } mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); @@ -740,24 +794,56 @@ static void ieee80211_send_assoc(struct net_device *dev, *pos++ = ifsta->ssid_len; memcpy(pos, ifsta->ssid, ifsta->ssid_len); + /* add all rates which were marked to be used above */ + supp_rates_len = rates_len; + if (supp_rates_len > 8) + supp_rates_len = 8; + len = sband->n_bitrates; - if (len > 8) - len = 8; - pos = skb_put(skb, len + 2); + pos = skb_put(skb, supp_rates_len + 2); *pos++ = WLAN_EID_SUPP_RATES; - *pos++ = len; - for (i = 0; i < len; i++) { - int rate = sband->bitrates[i].bitrate; - *pos++ = (u8) (rate / 5); - } + *pos++ = supp_rates_len; - if (sband->n_bitrates > len) { - pos = skb_put(skb, sband->n_bitrates - len + 2); - *pos++ = WLAN_EID_EXT_SUPP_RATES; - *pos++ = sband->n_bitrates - len; - for (i = len; i < sband->n_bitrates; i++) { + count = 0; + for (i = 0; i < sband->n_bitrates; i++) { + if (BIT(i) & rates) { int rate = sband->bitrates[i].bitrate; *pos++ = (u8) (rate / 5); + if (++count == 8) + break; + } + } + + if (count == 8) { + pos = skb_put(skb, rates_len - count + 2); + *pos++ = WLAN_EID_EXT_SUPP_RATES; + *pos++ = rates_len - count; + + for (i++; i < sband->n_bitrates; i++) { + if (BIT(i) & rates) { + int rate = sband->bitrates[i].bitrate; + *pos++ = (u8) (rate / 5); + } + } + } + + if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT) { + /* 1. power capabilities */ + pos = skb_put(skb, 4); + *pos++ = WLAN_EID_PWR_CAPABILITY; + *pos++ = 2; + *pos++ = 0; /* min tx power */ + *pos++ = local->hw.conf.channel->max_power; /* max tx power */ + + /* 2. supported channels */ + /* TODO: get this in reg domain format */ + pos = skb_put(skb, 2 * sband->n_channels + 2); + *pos++ = WLAN_EID_SUPPORTED_CHANNELS; + *pos++ = 2 * sband->n_channels; + for (i = 0; i < sband->n_channels; i++) { + *pos++ = ieee80211_frequency_to_channel( + sband->channels[i].center_freq); + *pos++ = 1; /* one channel in the subband*/ } } @@ -778,9 +864,32 @@ static void ieee80211_send_assoc(struct net_device *dev, *pos++ = 1; /* WME ver */ *pos++ = 0; } + /* wmm support is a must to HT */ - if (wmm && sband->ht_info.ht_supported) { - __le16 tmp = cpu_to_le16(sband->ht_info.cap); + if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) && + sband->ht_info.ht_supported && bss->ht_add_ie) { + struct ieee80211_ht_addt_info *ht_add_info = + (struct ieee80211_ht_addt_info *)bss->ht_add_ie; + u16 cap = sband->ht_info.cap; + __le16 tmp; + u32 flags = local->hw.conf.channel->flags; + + switch (ht_add_info->ht_param & IEEE80211_HT_IE_CHA_SEC_OFFSET) { + case IEEE80211_HT_IE_CHA_SEC_ABOVE: + if (flags & IEEE80211_CHAN_NO_FAT_ABOVE) { + cap &= ~IEEE80211_HT_CAP_SUP_WIDTH; + cap &= ~IEEE80211_HT_CAP_SGI_40; + } + break; + case IEEE80211_HT_IE_CHA_SEC_BELOW: + if (flags & IEEE80211_CHAN_NO_FAT_BELOW) { + cap &= ~IEEE80211_HT_CAP_SUP_WIDTH; + cap &= ~IEEE80211_HT_CAP_SGI_40; + } + break; + } + + tmp = cpu_to_le16(cap); pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2); *pos++ = WLAN_EID_HT_CAPABILITY; *pos++ = sizeof(struct ieee80211_ht_cap); @@ -883,7 +992,7 @@ static int ieee80211_privacy_mismatch(struct net_device *dev, wep_privacy = !!ieee80211_sta_wep_configured(dev); privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED); - ieee80211_rx_bss_put(dev, bss); + ieee80211_rx_bss_put(local, bss); if ((bss_privacy == wep_privacy) || (bss_privacy == privacy_invoked)) return 0; @@ -1075,14 +1184,10 @@ static void ieee80211_auth_challenge(struct net_device *dev, u8 *pos; struct ieee802_11_elems elems; - printk(KERN_DEBUG "%s: replying to auth challenge\n", dev->name); pos = mgmt->u.auth.variable; ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); - if (!elems.challenge) { - printk(KERN_DEBUG "%s: no challenge IE in shared key auth " - "frame\n", dev->name); + if (!elems.challenge) return; - } ieee80211_send_auth(dev, ifsta, 3, elems.challenge - 2, elems.challenge_len + 2, 1); } @@ -1098,8 +1203,8 @@ static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid, struct ieee80211_mgmt *mgmt; u16 capab; - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 + - sizeof(mgmt->u.action.u.addba_resp)); + skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer " "for addba resp frame\n", dev->name); @@ -1147,9 +1252,7 @@ void ieee80211_send_addba_request(struct net_device *dev, const u8 *da, struct ieee80211_mgmt *mgmt; u16 capab; - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 + - sizeof(mgmt->u.action.u.addba_req)); - + skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); if (!skb) { printk(KERN_ERR "%s: failed to allocate buffer " @@ -1250,7 +1353,7 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev, /* examine state machine */ - spin_lock_bh(&sta->ampdu_mlme.ampdu_rx); + spin_lock_bh(&sta->lock); if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) { #ifdef CONFIG_MAC80211_HT_DEBUG @@ -1266,9 +1369,11 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev, sta->ampdu_mlme.tid_rx[tid] = kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); if (!sta->ampdu_mlme.tid_rx[tid]) { +#ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_ERR "allocate rx mlme to tid %d failed\n", tid); +#endif goto end; } /* rx timer */ @@ -1282,16 +1387,18 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev, /* prepare reordering buffer */ tid_agg_rx->reorder_buf = - kmalloc(buf_size * sizeof(struct sk_buf *), GFP_ATOMIC); + kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC); if (!tid_agg_rx->reorder_buf) { +#ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_ERR "can not allocate reordering buffer " "to tid %d\n", tid); +#endif kfree(sta->ampdu_mlme.tid_rx[tid]); goto end; } memset(tid_agg_rx->reorder_buf, 0, - buf_size * sizeof(struct sk_buf *)); + buf_size * sizeof(struct sk_buff *)); if (local->ops->ampdu_action) ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, @@ -1317,7 +1424,7 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev, tid_agg_rx->stored_mpdu_num = 0; status = WLAN_STATUS_SUCCESS; end: - spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx); + spin_unlock_bh(&sta->lock); end_no_lock: ieee80211_send_addba_resp(sta->sdata->dev, sta->addr, tid, @@ -1349,18 +1456,16 @@ static void ieee80211_sta_process_addba_resp(struct net_device *dev, state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_lock_bh(&sta->lock); if (!(*state & HT_ADDBA_REQUESTED_MSK)) { - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - printk(KERN_DEBUG "state not HT_ADDBA_REQUESTED_MSK:" - "%d\n", *state); + spin_unlock_bh(&sta->lock); goto addba_resp_exit; } if (mgmt->u.action.u.addba_resp.dialog_token != sta->ampdu_mlme.tid_tx[tid]->dialog_token) { - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_unlock_bh(&sta->lock); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ @@ -1373,26 +1478,18 @@ static void ieee80211_sta_process_addba_resp(struct net_device *dev, #endif /* CONFIG_MAC80211_HT_DEBUG */ if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) == WLAN_STATUS_SUCCESS) { - if (*state & HT_ADDBA_RECEIVED_MSK) - printk(KERN_DEBUG "double addBA response\n"); - *state |= HT_ADDBA_RECEIVED_MSK; sta->ampdu_mlme.addba_req_num[tid] = 0; - if (*state == HT_AGG_STATE_OPERATIONAL) { - printk(KERN_DEBUG "Aggregation on for tid %d \n", tid); + if (*state == HT_AGG_STATE_OPERATIONAL) ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); - } - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - printk(KERN_DEBUG "recipient accepted agg: tid %d \n", tid); + spin_unlock_bh(&sta->lock); } else { - printk(KERN_DEBUG "recipient rejected agg: tid %d \n", tid); - sta->ampdu_mlme.addba_req_num[tid]++; /* this will allow the state check in stop_BA_session */ *state = HT_AGG_STATE_OPERATIONAL; - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_unlock_bh(&sta->lock); ieee80211_stop_tx_ba_session(hw, sta->addr, tid, WLAN_BACK_INITIATOR); } @@ -1411,8 +1508,7 @@ void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid, struct ieee80211_mgmt *mgmt; u16 params; - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 + - sizeof(mgmt->u.action.u.delba)); + skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); if (!skb) { printk(KERN_ERR "%s: failed to allocate buffer " @@ -1445,6 +1541,35 @@ void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid, ieee80211_sta_tx(dev, skb, 0); } +void ieee80211_send_bar(struct net_device *dev, u8 *ra, u16 tid, u16 ssn) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct sk_buff *skb; + struct ieee80211_bar *bar; + u16 bar_control = 0; + + skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom); + if (!skb) { + printk(KERN_ERR "%s: failed to allocate buffer for " + "bar frame\n", dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar)); + memset(bar, 0, sizeof(*bar)); + bar->frame_control = IEEE80211_FC(IEEE80211_FTYPE_CTL, + IEEE80211_STYPE_BACK_REQ); + memcpy(bar->ra, ra, ETH_ALEN); + memcpy(bar->ta, dev->dev_addr, ETH_ALEN); + bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL; + bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA; + bar_control |= (u16)(tid << 12); + bar->control = cpu_to_le16(bar_control); + bar->start_seq_num = cpu_to_le16(ssn); + + ieee80211_sta_tx(dev, skb, 0); +} + void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid, u16 initiator, u16 reason) { @@ -1463,17 +1588,17 @@ void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid, } /* check if TID is in operational state */ - spin_lock_bh(&sta->ampdu_mlme.ampdu_rx); + spin_lock_bh(&sta->lock); if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) { - spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx); + spin_unlock_bh(&sta->lock); rcu_read_unlock(); return; } sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_REQ_STOP_BA_MSK | (initiator << HT_AGG_STATE_INITIATOR_SHIFT); - spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx); + spin_unlock_bh(&sta->lock); /* stop HW Rx aggregation. ampdu_action existence * already verified in session init so we add the BUG_ON */ @@ -1488,7 +1613,7 @@ void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid, ra, tid, NULL); if (ret) printk(KERN_DEBUG "HW problem - can not stop rx " - "aggergation for tid %d\n", tid); + "aggregation for tid %d\n", tid); /* shutdown timer has not expired */ if (initiator != WLAN_BACK_TIMER) @@ -1550,10 +1675,10 @@ static void ieee80211_sta_process_delba(struct net_device *dev, ieee80211_sta_stop_rx_ba_session(dev, sta->addr, tid, WLAN_BACK_INITIATOR, 0); else { /* WLAN_BACK_RECIPIENT */ - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_lock_bh(&sta->lock); sta->ampdu_mlme.tid_state_tx[tid] = HT_AGG_STATE_OPERATIONAL; - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_unlock_bh(&sta->lock); ieee80211_stop_tx_ba_session(&local->hw, sta->addr, tid, WLAN_BACK_RECIPIENT); } @@ -1571,7 +1696,7 @@ void sta_addba_resp_timer_expired(unsigned long data) * only one argument, and both sta_info and TID are needed, so init * flow in sta_info_create gives the TID as data, while the timer_to_id * array gives the sta through container_of */ - u16 tid = *(int *)data; + u16 tid = *(u8 *)data; struct sta_info *temp_sta = container_of((void *)data, struct sta_info, timer_to_tid[tid]); @@ -1590,20 +1715,24 @@ void sta_addba_resp_timer_expired(unsigned long data) state = &sta->ampdu_mlme.tid_state_tx[tid]; /* check if the TID waits for addBA response */ - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_lock_bh(&sta->lock); if (!(*state & HT_ADDBA_REQUESTED_MSK)) { - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_unlock_bh(&sta->lock); *state = HT_AGG_STATE_IDLE; +#ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "timer expired on tid %d but we are not " "expecting addBA response there", tid); +#endif goto timer_expired_exit; } +#ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid); +#endif /* go through the state check in stop_BA_session */ *state = HT_AGG_STATE_OPERATIONAL; - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_unlock_bh(&sta->lock); ieee80211_stop_tx_ba_session(hw, temp_sta->addr, tid, WLAN_BACK_INITIATOR); @@ -1616,10 +1745,10 @@ timer_expired_exit: * resetting it after each frame that arrives from the originator. * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed. */ -void sta_rx_agg_session_timer_expired(unsigned long data) +static void sta_rx_agg_session_timer_expired(unsigned long data) { /* not an elegant detour, but there is no choice as the timer passes - * only one argument, and verious sta_info are needed here, so init + * only one argument, and various sta_info are needed here, so init * flow in sta_info_create gives the TID as data, while the timer_to_id * array gives the sta through container_of */ u8 *ptid = (u8 *)data; @@ -1627,7 +1756,9 @@ void sta_rx_agg_session_timer_expired(unsigned long data) struct sta_info *sta = container_of(timer_to_id, struct sta_info, timer_to_tid[0]); +#ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); +#endif ieee80211_sta_stop_rx_ba_session(sta->sdata->dev, sta->addr, (u16)*ptid, WLAN_BACK_TIMER, WLAN_REASON_QSTA_TIMEOUT); @@ -1647,6 +1778,71 @@ void ieee80211_sta_tear_down_BA_sessions(struct net_device *dev, u8 *addr) } } +static void ieee80211_send_refuse_measurement_request(struct net_device *dev, + struct ieee80211_msrment_ie *request_ie, + const u8 *da, const u8 *bssid, + u8 dialog_token) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct sk_buff *skb; + struct ieee80211_mgmt *msr_report; + + skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom + + sizeof(struct ieee80211_msrment_ie)); + + if (!skb) { + printk(KERN_ERR "%s: failed to allocate buffer for " + "measurement report frame\n", dev->name); + return; + } + + skb_reserve(skb, local->hw.extra_tx_headroom); + msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24); + memset(msr_report, 0, 24); + memcpy(msr_report->da, da, ETH_ALEN); + memcpy(msr_report->sa, dev->dev_addr, ETH_ALEN); + memcpy(msr_report->bssid, bssid, ETH_ALEN); + msr_report->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, + IEEE80211_STYPE_ACTION); + + skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement)); + msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT; + msr_report->u.action.u.measurement.action_code = + WLAN_ACTION_SPCT_MSR_RPRT; + msr_report->u.action.u.measurement.dialog_token = dialog_token; + + msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT; + msr_report->u.action.u.measurement.length = + sizeof(struct ieee80211_msrment_ie); + + memset(&msr_report->u.action.u.measurement.msr_elem, 0, + sizeof(struct ieee80211_msrment_ie)); + msr_report->u.action.u.measurement.msr_elem.token = request_ie->token; + msr_report->u.action.u.measurement.msr_elem.mode |= + IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED; + msr_report->u.action.u.measurement.msr_elem.type = request_ie->type; + + ieee80211_sta_tx(dev, skb, 0); +} + +static void ieee80211_sta_process_measurement_req(struct net_device *dev, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + /* + * Ignoring measurement request is spec violation. + * Mandatory measurements must be reported optional + * measurements might be refused or reported incapable + * For now just refuse + * TODO: Answer basic measurement as unmeasured + */ + ieee80211_send_refuse_measurement_request(dev, + &mgmt->u.action.u.measurement.msr_elem, + mgmt->sa, mgmt->bssid, + mgmt->u.action.u.measurement.dialog_token); +} + + static void ieee80211_rx_mgmt_auth(struct net_device *dev, struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, @@ -1657,73 +1853,41 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev, DECLARE_MAC_BUF(mac); if (ifsta->state != IEEE80211_AUTHENTICATE && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS) { - printk(KERN_DEBUG "%s: authentication frame received from " - "%s, but not in authenticate state - ignored\n", - dev->name, print_mac(mac, mgmt->sa)); + sdata->vif.type != IEEE80211_IF_TYPE_IBSS) return; - } - if (len < 24 + 6) { - printk(KERN_DEBUG "%s: too short (%zd) authentication frame " - "received from %s - ignored\n", - dev->name, len, print_mac(mac, mgmt->sa)); + if (len < 24 + 6) return; - } if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { - printk(KERN_DEBUG "%s: authentication frame received from " - "unknown AP (SA=%s BSSID=%s) - " - "ignored\n", dev->name, print_mac(mac, mgmt->sa), - print_mac(mac, mgmt->bssid)); + memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) return; - } if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) { - printk(KERN_DEBUG "%s: authentication frame received from " - "unknown BSSID (SA=%s BSSID=%s) - " - "ignored\n", dev->name, print_mac(mac, mgmt->sa), - print_mac(mac, mgmt->bssid)); + memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) return; - } auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); status_code = le16_to_cpu(mgmt->u.auth.status_code); - printk(KERN_DEBUG "%s: RX authentication from %s (alg=%d " - "transaction=%d status=%d)\n", - dev->name, print_mac(mac, mgmt->sa), auth_alg, - auth_transaction, status_code); - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - /* IEEE 802.11 standard does not require authentication in IBSS + /* + * IEEE 802.11 standard does not require authentication in IBSS * networks and most implementations do not seem to use it. * However, try to reply to authentication attempts if someone * has actually implemented this. - * TODO: Could implement shared key authentication. */ - if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) { - printk(KERN_DEBUG "%s: unexpected IBSS authentication " - "frame (alg=%d transaction=%d)\n", - dev->name, auth_alg, auth_transaction); + */ + if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; - } ieee80211_send_auth(dev, ifsta, 2, NULL, 0, 0); } if (auth_alg != ifsta->auth_alg || - auth_transaction != ifsta->auth_transaction) { - printk(KERN_DEBUG "%s: unexpected authentication frame " - "(alg=%d transaction=%d)\n", - dev->name, auth_alg, auth_transaction); + auth_transaction != ifsta->auth_transaction) return; - } if (status_code != WLAN_STATUS_SUCCESS) { - printk(KERN_DEBUG "%s: AP denied authentication (auth_alg=%d " - "code=%d)\n", dev->name, ifsta->auth_alg, status_code); if (status_code == WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG) { u8 algs[3]; const int num_algs = ARRAY_SIZE(algs); @@ -1752,9 +1916,6 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev, !ieee80211_sta_wep_configured(dev)) continue; ifsta->auth_alg = algs[pos]; - printk(KERN_DEBUG "%s: set auth_alg=%d for " - "next try\n", - dev->name, ifsta->auth_alg); break; } } @@ -1784,30 +1945,16 @@ static void ieee80211_rx_mgmt_deauth(struct net_device *dev, u16 reason_code; DECLARE_MAC_BUF(mac); - if (len < 24 + 2) { - printk(KERN_DEBUG "%s: too short (%zd) deauthentication frame " - "received from %s - ignored\n", - dev->name, len, print_mac(mac, mgmt->sa)); + if (len < 24 + 2) return; - } - if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { - printk(KERN_DEBUG "%s: deauthentication frame received from " - "unknown AP (SA=%s BSSID=%s) - " - "ignored\n", dev->name, print_mac(mac, mgmt->sa), - print_mac(mac, mgmt->bssid)); + if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN)) return; - } reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); - printk(KERN_DEBUG "%s: RX deauthentication from %s" - " (reason=%d)\n", - dev->name, print_mac(mac, mgmt->sa), reason_code); - - if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) { + if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) printk(KERN_DEBUG "%s: deauthenticated\n", dev->name); - } if (ifsta->state == IEEE80211_AUTHENTICATE || ifsta->state == IEEE80211_ASSOCIATE || @@ -1830,27 +1977,14 @@ static void ieee80211_rx_mgmt_disassoc(struct net_device *dev, u16 reason_code; DECLARE_MAC_BUF(mac); - if (len < 24 + 2) { - printk(KERN_DEBUG "%s: too short (%zd) disassociation frame " - "received from %s - ignored\n", - dev->name, len, print_mac(mac, mgmt->sa)); + if (len < 24 + 2) return; - } - if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { - printk(KERN_DEBUG "%s: disassociation frame received from " - "unknown AP (SA=%s BSSID=%s) - " - "ignored\n", dev->name, print_mac(mac, mgmt->sa), - print_mac(mac, mgmt->bssid)); + if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN)) return; - } reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); - printk(KERN_DEBUG "%s: RX disassociation from %s" - " (reason=%d)\n", - dev->name, print_mac(mac, mgmt->sa), reason_code); - if (ifsta->flags & IEEE80211_STA_ASSOCIATED) printk(KERN_DEBUG "%s: disassociated\n", dev->name); @@ -1886,27 +2020,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, /* AssocResp and ReassocResp have identical structure, so process both * of them in this function. */ - if (ifsta->state != IEEE80211_ASSOCIATE) { - printk(KERN_DEBUG "%s: association frame received from " - "%s, but not in associate state - ignored\n", - dev->name, print_mac(mac, mgmt->sa)); + if (ifsta->state != IEEE80211_ASSOCIATE) return; - } - if (len < 24 + 6) { - printk(KERN_DEBUG "%s: too short (%zd) association frame " - "received from %s - ignored\n", - dev->name, len, print_mac(mac, mgmt->sa)); + if (len < 24 + 6) return; - } - if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { - printk(KERN_DEBUG "%s: association frame received from " - "unknown AP (SA=%s BSSID=%s) - " - "ignored\n", dev->name, print_mac(mac, mgmt->sa), - print_mac(mac, mgmt->bssid)); + if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) return; - } capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); @@ -1970,10 +2091,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, local->hw.conf.channel->center_freq, ifsta->ssid, ifsta->ssid_len); if (bss) { - sta->last_rssi = bss->rssi; sta->last_signal = bss->signal; + sta->last_qual = bss->qual; sta->last_noise = bss->noise; - ieee80211_rx_bss_put(dev, bss); + ieee80211_rx_bss_put(local, bss); } err = sta_info_insert(sta); @@ -1995,8 +2116,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, * to between the sta_info_alloc() and sta_info_insert() above. */ - sta->flags |= WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP | - WLAN_STA_AUTHORIZED; + set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP | + WLAN_STA_AUTHORIZED); rates = 0; basic_rates = 0; @@ -2040,7 +2161,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, else sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; - if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param) { + if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param && + (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { struct ieee80211_ht_bss_info bss_info; ieee80211_ht_cap_ie_to_ht_info( (struct ieee80211_ht_cap *) @@ -2053,8 +2175,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, rate_control_rate_init(sta, local); - if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { - sta->flags |= WLAN_STA_WME; + if (elems.wmm_param) { + set_sta_flags(sta, WLAN_STA_WME); rcu_read_unlock(); ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, elems.wmm_param_len); @@ -2090,10 +2212,9 @@ static void __ieee80211_rx_bss_hash_add(struct net_device *dev, /* Caller must hold local->sta_bss_lock */ -static void __ieee80211_rx_bss_hash_del(struct net_device *dev, +static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local, struct ieee80211_sta_bss *bss) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *b, *prev = NULL; b = local->sta_bss_hash[STA_HASH(bss->bssid)]; while (b) { @@ -2238,45 +2359,42 @@ static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss) kfree(bss->rsn_ie); kfree(bss->wmm_ie); kfree(bss->ht_ie); + kfree(bss->ht_add_ie); kfree(bss_mesh_id(bss)); kfree(bss_mesh_cfg(bss)); kfree(bss); } -static void ieee80211_rx_bss_put(struct net_device *dev, +static void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_sta_bss *bss) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - local_bh_disable(); if (!atomic_dec_and_lock(&bss->users, &local->sta_bss_lock)) { local_bh_enable(); return; } - __ieee80211_rx_bss_hash_del(dev, bss); + __ieee80211_rx_bss_hash_del(local, bss); list_del(&bss->list); spin_unlock_bh(&local->sta_bss_lock); ieee80211_rx_bss_free(bss); } -void ieee80211_rx_bss_list_init(struct net_device *dev) +void ieee80211_rx_bss_list_init(struct ieee80211_local *local) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); spin_lock_init(&local->sta_bss_lock); INIT_LIST_HEAD(&local->sta_bss_list); } -void ieee80211_rx_bss_list_deinit(struct net_device *dev) +void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *bss, *tmp; list_for_each_entry_safe(bss, tmp, &local->sta_bss_list, list) - ieee80211_rx_bss_put(dev, bss); + ieee80211_rx_bss_put(local, bss); } @@ -2288,11 +2406,10 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, int res, rates, i, j; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - struct ieee80211_tx_control control; - struct rate_selection ratesel; u8 *pos; struct ieee80211_sub_if_data *sdata; struct ieee80211_supported_band *sband; + union iwreq_data wrqu; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; @@ -2306,7 +2423,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, local->ops->reset_tsf(local_to_hw(local)); } memcpy(ifsta->bssid, bss->bssid, ETH_ALEN); - res = ieee80211_if_config(dev); + res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); if (res) return res; @@ -2315,32 +2432,27 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, sdata->drop_unencrypted = bss->capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - res = ieee80211_set_freq(local, bss->freq); + res = ieee80211_set_freq(dev, bss->freq); - if (local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS) { - printk(KERN_DEBUG "%s: IBSS not allowed on frequency " - "%d MHz\n", dev->name, local->oper_channel->center_freq); - return -1; - } + if (res) + return res; - /* Set beacon template */ + /* Build IBSS probe response */ skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); - do { - if (!skb) - break; - + if (skb) { skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + sizeof(mgmt->u.beacon)); memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_BEACON); + IEEE80211_STYPE_PROBE_RESP); memset(mgmt->da, 0xff, ETH_ALEN); memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); mgmt->u.beacon.beacon_int = cpu_to_le16(local->hw.conf.beacon_int); + mgmt->u.beacon.timestamp = cpu_to_le64(bss->timestamp); mgmt->u.beacon.capab_info = cpu_to_le16(bss->capability); pos = skb_put(skb, 2 + ifsta->ssid_len); @@ -2378,65 +2490,29 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, memcpy(pos, &bss->supp_rates[8], rates); } - memset(&control, 0, sizeof(control)); - rate_control_get_rate(dev, sband, skb, &ratesel); - if (!ratesel.rate) { - printk(KERN_DEBUG "%s: Failed to determine TX rate " - "for IBSS beacon\n", dev->name); - break; - } - control.vif = &sdata->vif; - control.tx_rate = ratesel.rate; - if (sdata->bss_conf.use_short_preamble && - ratesel.rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) - control.flags |= IEEE80211_TXCTL_SHORT_PREAMBLE; - control.antenna_sel_tx = local->hw.conf.antenna_sel_tx; - control.flags |= IEEE80211_TXCTL_NO_ACK; - control.retry_limit = 1; - - ifsta->probe_resp = skb_copy(skb, GFP_ATOMIC); - if (ifsta->probe_resp) { - mgmt = (struct ieee80211_mgmt *) - ifsta->probe_resp->data; - mgmt->frame_control = - IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_PROBE_RESP); - } else { - printk(KERN_DEBUG "%s: Could not allocate ProbeResp " - "template for IBSS\n", dev->name); - } - - if (local->ops->beacon_update && - local->ops->beacon_update(local_to_hw(local), - skb, &control) == 0) { - printk(KERN_DEBUG "%s: Configured IBSS beacon " - "template\n", dev->name); - skb = NULL; - } - - rates = 0; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - for (i = 0; i < bss->supp_rates_len; i++) { - int bitrate = (bss->supp_rates[i] & 0x7f) * 5; - for (j = 0; j < sband->n_bitrates; j++) - if (sband->bitrates[j].bitrate == bitrate) - rates |= BIT(j); - } - ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates; + ifsta->probe_resp = skb; - ieee80211_sta_def_wmm_params(dev, bss, 1); - } while (0); + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); + } - if (skb) { - printk(KERN_DEBUG "%s: Failed to configure IBSS beacon " - "template\n", dev->name); - dev_kfree_skb(skb); + rates = 0; + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + for (i = 0; i < bss->supp_rates_len; i++) { + int bitrate = (bss->supp_rates[i] & 0x7f) * 5; + for (j = 0; j < sband->n_bitrates; j++) + if (sband->bitrates[j].bitrate == bitrate) + rates |= BIT(j); } + ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates; + + ieee80211_sta_def_wmm_params(dev, bss, 1); ifsta->state = IEEE80211_IBSS_JOINED; mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); - ieee80211_rx_bss_put(dev, bss); + memset(&wrqu, 0, sizeof(wrqu)); + memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN); + wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); return res; } @@ -2482,11 +2558,10 @@ static void ieee80211_rx_bss_info(struct net_device *dev, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status, + struct ieee802_11_elems *elems, int beacon) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee802_11_elems elems; - size_t baselen; int freq, clen; struct ieee80211_sta_bss *bss; struct sta_info *sta; @@ -2499,35 +2574,24 @@ static void ieee80211_rx_bss_info(struct net_device *dev, if (!beacon && memcmp(mgmt->da, dev->dev_addr, ETH_ALEN)) return; /* ignore ProbeResp to foreign address */ -#if 0 - printk(KERN_DEBUG "%s: RX %s from %s to %s\n", - dev->name, beacon ? "Beacon" : "Probe Response", - print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da)); -#endif - - baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; - if (baselen > len) - return; - beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp); - ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); - if (ieee80211_vif_is_mesh(&sdata->vif) && elems.mesh_id && - elems.mesh_config && mesh_matches_local(&elems, dev)) { - u64 rates = ieee80211_sta_get_rates(local, &elems, + if (ieee80211_vif_is_mesh(&sdata->vif) && elems->mesh_id && + elems->mesh_config && mesh_matches_local(elems, dev)) { + u64 rates = ieee80211_sta_get_rates(local, elems, rx_status->band); mesh_neighbour_update(mgmt->sa, rates, dev, - mesh_peer_accepts_plinks(&elems, dev)); + mesh_peer_accepts_plinks(elems, dev)); } rcu_read_lock(); - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates && + if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems->supp_rates && memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 && (sta = sta_info_get(local, mgmt->sa))) { u64 prev_rates; - u64 supp_rates = ieee80211_sta_get_rates(local, &elems, + u64 supp_rates = ieee80211_sta_get_rates(local, elems, rx_status->band); prev_rates = sta->supp_rates[rx_status->band]; @@ -2539,21 +2603,12 @@ static void ieee80211_rx_bss_info(struct net_device *dev, sta->supp_rates[rx_status->band] = sdata->u.sta.supp_rates_bits[rx_status->band]; } - if (sta->supp_rates[rx_status->band] != prev_rates) { - printk(KERN_DEBUG "%s: updated supp_rates set for " - "%s based on beacon info (0x%llx & 0x%llx -> " - "0x%llx)\n", - dev->name, print_mac(mac, sta->addr), - (unsigned long long) prev_rates, - (unsigned long long) supp_rates, - (unsigned long long) sta->supp_rates[rx_status->band]); - } } rcu_read_unlock(); - if (elems.ds_params && elems.ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems.ds_params[0]); + if (elems->ds_params && elems->ds_params_len == 1) + freq = ieee80211_channel_to_frequency(elems->ds_params[0]); else freq = rx_status->freq; @@ -2563,23 +2618,23 @@ static void ieee80211_rx_bss_info(struct net_device *dev, return; #ifdef CONFIG_MAC80211_MESH - if (elems.mesh_config) - bss = ieee80211_rx_mesh_bss_get(dev, elems.mesh_id, - elems.mesh_id_len, elems.mesh_config, freq); + if (elems->mesh_config) + bss = ieee80211_rx_mesh_bss_get(dev, elems->mesh_id, + elems->mesh_id_len, elems->mesh_config, freq); else #endif bss = ieee80211_rx_bss_get(dev, mgmt->bssid, freq, - elems.ssid, elems.ssid_len); + elems->ssid, elems->ssid_len); if (!bss) { #ifdef CONFIG_MAC80211_MESH - if (elems.mesh_config) - bss = ieee80211_rx_mesh_bss_add(dev, elems.mesh_id, - elems.mesh_id_len, elems.mesh_config, - elems.mesh_config_len, freq); + if (elems->mesh_config) + bss = ieee80211_rx_mesh_bss_add(dev, elems->mesh_id, + elems->mesh_id_len, elems->mesh_config, + elems->mesh_config_len, freq); else #endif bss = ieee80211_rx_bss_add(dev, mgmt->bssid, freq, - elems.ssid, elems.ssid_len); + elems->ssid, elems->ssid_len); if (!bss) return; } else { @@ -2592,46 +2647,66 @@ static void ieee80211_rx_bss_info(struct net_device *dev, } /* save the ERP value so that it is available at association time */ - if (elems.erp_info && elems.erp_info_len >= 1) { - bss->erp_value = elems.erp_info[0]; + if (elems->erp_info && elems->erp_info_len >= 1) { + bss->erp_value = elems->erp_info[0]; bss->has_erp_value = 1; } - if (elems.ht_cap_elem && - (!bss->ht_ie || bss->ht_ie_len != elems.ht_cap_elem_len || - memcmp(bss->ht_ie, elems.ht_cap_elem, elems.ht_cap_elem_len))) { + if (elems->ht_cap_elem && + (!bss->ht_ie || bss->ht_ie_len != elems->ht_cap_elem_len || + memcmp(bss->ht_ie, elems->ht_cap_elem, elems->ht_cap_elem_len))) { kfree(bss->ht_ie); - bss->ht_ie = kmalloc(elems.ht_cap_elem_len + 2, GFP_ATOMIC); + bss->ht_ie = kmalloc(elems->ht_cap_elem_len + 2, GFP_ATOMIC); if (bss->ht_ie) { - memcpy(bss->ht_ie, elems.ht_cap_elem - 2, - elems.ht_cap_elem_len + 2); - bss->ht_ie_len = elems.ht_cap_elem_len + 2; + memcpy(bss->ht_ie, elems->ht_cap_elem - 2, + elems->ht_cap_elem_len + 2); + bss->ht_ie_len = elems->ht_cap_elem_len + 2; } else bss->ht_ie_len = 0; - } else if (!elems.ht_cap_elem && bss->ht_ie) { + } else if (!elems->ht_cap_elem && bss->ht_ie) { kfree(bss->ht_ie); bss->ht_ie = NULL; bss->ht_ie_len = 0; } + if (elems->ht_info_elem && + (!bss->ht_add_ie || + bss->ht_add_ie_len != elems->ht_info_elem_len || + memcmp(bss->ht_add_ie, elems->ht_info_elem, + elems->ht_info_elem_len))) { + kfree(bss->ht_add_ie); + bss->ht_add_ie = + kmalloc(elems->ht_info_elem_len + 2, GFP_ATOMIC); + if (bss->ht_add_ie) { + memcpy(bss->ht_add_ie, elems->ht_info_elem - 2, + elems->ht_info_elem_len + 2); + bss->ht_add_ie_len = elems->ht_info_elem_len + 2; + } else + bss->ht_add_ie_len = 0; + } else if (!elems->ht_info_elem && bss->ht_add_ie) { + kfree(bss->ht_add_ie); + bss->ht_add_ie = NULL; + bss->ht_add_ie_len = 0; + } + bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int); bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info); bss->supp_rates_len = 0; - if (elems.supp_rates) { + if (elems->supp_rates) { clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; - if (clen > elems.supp_rates_len) - clen = elems.supp_rates_len; - memcpy(&bss->supp_rates[bss->supp_rates_len], elems.supp_rates, + if (clen > elems->supp_rates_len) + clen = elems->supp_rates_len; + memcpy(&bss->supp_rates[bss->supp_rates_len], elems->supp_rates, clen); bss->supp_rates_len += clen; } - if (elems.ext_supp_rates) { + if (elems->ext_supp_rates) { clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; - if (clen > elems.ext_supp_rates_len) - clen = elems.ext_supp_rates_len; + if (clen > elems->ext_supp_rates_len) + clen = elems->ext_supp_rates_len; memcpy(&bss->supp_rates[bss->supp_rates_len], - elems.ext_supp_rates, clen); + elems->ext_supp_rates, clen); bss->supp_rates_len += clen; } @@ -2639,9 +2714,9 @@ static void ieee80211_rx_bss_info(struct net_device *dev, bss->timestamp = beacon_timestamp; bss->last_update = jiffies; - bss->rssi = rx_status->ssi; bss->signal = rx_status->signal; bss->noise = rx_status->noise; + bss->qual = rx_status->qual; if (!beacon && !bss->probe_resp) bss->probe_resp = true; @@ -2651,37 +2726,37 @@ static void ieee80211_rx_bss_info(struct net_device *dev, */ if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS && bss->probe_resp && beacon) { - ieee80211_rx_bss_put(dev, bss); + ieee80211_rx_bss_put(local, bss); return; } - if (elems.wpa && - (!bss->wpa_ie || bss->wpa_ie_len != elems.wpa_len || - memcmp(bss->wpa_ie, elems.wpa, elems.wpa_len))) { + if (elems->wpa && + (!bss->wpa_ie || bss->wpa_ie_len != elems->wpa_len || + memcmp(bss->wpa_ie, elems->wpa, elems->wpa_len))) { kfree(bss->wpa_ie); - bss->wpa_ie = kmalloc(elems.wpa_len + 2, GFP_ATOMIC); + bss->wpa_ie = kmalloc(elems->wpa_len + 2, GFP_ATOMIC); if (bss->wpa_ie) { - memcpy(bss->wpa_ie, elems.wpa - 2, elems.wpa_len + 2); - bss->wpa_ie_len = elems.wpa_len + 2; + memcpy(bss->wpa_ie, elems->wpa - 2, elems->wpa_len + 2); + bss->wpa_ie_len = elems->wpa_len + 2; } else bss->wpa_ie_len = 0; - } else if (!elems.wpa && bss->wpa_ie) { + } else if (!elems->wpa && bss->wpa_ie) { kfree(bss->wpa_ie); bss->wpa_ie = NULL; bss->wpa_ie_len = 0; } - if (elems.rsn && - (!bss->rsn_ie || bss->rsn_ie_len != elems.rsn_len || - memcmp(bss->rsn_ie, elems.rsn, elems.rsn_len))) { + if (elems->rsn && + (!bss->rsn_ie || bss->rsn_ie_len != elems->rsn_len || + memcmp(bss->rsn_ie, elems->rsn, elems->rsn_len))) { kfree(bss->rsn_ie); - bss->rsn_ie = kmalloc(elems.rsn_len + 2, GFP_ATOMIC); + bss->rsn_ie = kmalloc(elems->rsn_len + 2, GFP_ATOMIC); if (bss->rsn_ie) { - memcpy(bss->rsn_ie, elems.rsn - 2, elems.rsn_len + 2); - bss->rsn_ie_len = elems.rsn_len + 2; + memcpy(bss->rsn_ie, elems->rsn - 2, elems->rsn_len + 2); + bss->rsn_ie_len = elems->rsn_len + 2; } else bss->rsn_ie_len = 0; - } else if (!elems.rsn && bss->rsn_ie) { + } else if (!elems->rsn && bss->rsn_ie) { kfree(bss->rsn_ie); bss->rsn_ie = NULL; bss->rsn_ie_len = 0; @@ -2701,20 +2776,21 @@ static void ieee80211_rx_bss_info(struct net_device *dev, * inclusion of the WMM Parameters in beacons, however, is optional. */ - if (elems.wmm_param && - (!bss->wmm_ie || bss->wmm_ie_len != elems.wmm_param_len || - memcmp(bss->wmm_ie, elems.wmm_param, elems.wmm_param_len))) { + if (elems->wmm_param && + (!bss->wmm_ie || bss->wmm_ie_len != elems->wmm_param_len || + memcmp(bss->wmm_ie, elems->wmm_param, elems->wmm_param_len))) { kfree(bss->wmm_ie); - bss->wmm_ie = kmalloc(elems.wmm_param_len + 2, GFP_ATOMIC); + bss->wmm_ie = kmalloc(elems->wmm_param_len + 2, GFP_ATOMIC); if (bss->wmm_ie) { - memcpy(bss->wmm_ie, elems.wmm_param - 2, - elems.wmm_param_len + 2); - bss->wmm_ie_len = elems.wmm_param_len + 2; + memcpy(bss->wmm_ie, elems->wmm_param - 2, + elems->wmm_param_len + 2); + bss->wmm_ie_len = elems->wmm_param_len + 2; } else bss->wmm_ie_len = 0; - } else if (elems.wmm_info && - (!bss->wmm_ie || bss->wmm_ie_len != elems.wmm_info_len || - memcmp(bss->wmm_ie, elems.wmm_info, elems.wmm_info_len))) { + } else if (elems->wmm_info && + (!bss->wmm_ie || bss->wmm_ie_len != elems->wmm_info_len || + memcmp(bss->wmm_ie, elems->wmm_info, + elems->wmm_info_len))) { /* As for certain AP's Fifth bit is not set in WMM IE in * beacon frames.So while parsing the beacon frame the * wmm_info structure is used instead of wmm_param. @@ -2724,14 +2800,14 @@ static void ieee80211_rx_bss_info(struct net_device *dev, * n-band association. */ kfree(bss->wmm_ie); - bss->wmm_ie = kmalloc(elems.wmm_info_len + 2, GFP_ATOMIC); + bss->wmm_ie = kmalloc(elems->wmm_info_len + 2, GFP_ATOMIC); if (bss->wmm_ie) { - memcpy(bss->wmm_ie, elems.wmm_info - 2, - elems.wmm_info_len + 2); - bss->wmm_ie_len = elems.wmm_info_len + 2; + memcpy(bss->wmm_ie, elems->wmm_info - 2, + elems->wmm_info_len + 2); + bss->wmm_ie_len = elems->wmm_info_len + 2; } else bss->wmm_ie_len = 0; - } else if (!elems.wmm_param && !elems.wmm_info && bss->wmm_ie) { + } else if (!elems->wmm_param && !elems->wmm_info && bss->wmm_ie) { kfree(bss->wmm_ie); bss->wmm_ie = NULL; bss->wmm_ie_len = 0; @@ -2742,8 +2818,9 @@ static void ieee80211_rx_bss_info(struct net_device *dev, !local->sta_sw_scanning && !local->sta_hw_scanning && bss->capability & WLAN_CAPABILITY_IBSS && bss->freq == local->oper_channel->center_freq && - elems.ssid_len == sdata->u.sta.ssid_len && - memcmp(elems.ssid, sdata->u.sta.ssid, sdata->u.sta.ssid_len) == 0) { + elems->ssid_len == sdata->u.sta.ssid_len && + memcmp(elems->ssid, sdata->u.sta.ssid, + sdata->u.sta.ssid_len) == 0) { if (rx_status->flag & RX_FLAG_TSFT) { /* in order for correct IBSS merging we need mactime * @@ -2781,18 +2858,18 @@ static void ieee80211_rx_bss_info(struct net_device *dev, #endif /* CONFIG_MAC80211_IBSS_DEBUG */ if (beacon_timestamp > rx_timestamp) { #ifndef CONFIG_MAC80211_IBSS_DEBUG - if (net_ratelimit()) + printk(KERN_DEBUG "%s: beacon TSF higher than " + "local TSF - IBSS merge with BSSID %s\n", + dev->name, print_mac(mac, mgmt->bssid)); #endif - printk(KERN_DEBUG "%s: beacon TSF higher than " - "local TSF - IBSS merge with BSSID %s\n", - dev->name, print_mac(mac, mgmt->bssid)); ieee80211_sta_join_ibss(dev, &sdata->u.sta, bss); ieee80211_ibss_add_sta(dev, NULL, - mgmt->bssid, mgmt->sa); + mgmt->bssid, mgmt->sa, + BIT(rx_status->rate_idx)); } } - ieee80211_rx_bss_put(dev, bss); + ieee80211_rx_bss_put(local, bss); } @@ -2801,7 +2878,17 @@ static void ieee80211_rx_mgmt_probe_resp(struct net_device *dev, size_t len, struct ieee80211_rx_status *rx_status) { - ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 0); + size_t baselen; + struct ieee802_11_elems elems; + + baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; + if (baselen > len) + return; + + ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, + &elems); + + ieee80211_rx_bss_info(dev, mgmt, len, rx_status, &elems, 0); } @@ -2818,7 +2905,14 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, struct ieee80211_conf *conf = &local->hw.conf; u32 changed = 0; - ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 1); + /* Process beacon from the current BSS */ + baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; + if (baselen > len) + return; + + ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); + + ieee80211_rx_bss_info(dev, mgmt, len, rx_status, &elems, 1); sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != IEEE80211_IF_TYPE_STA) @@ -2829,17 +2923,8 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) return; - /* Process beacon from the current BSS */ - baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; - if (baselen > len) - return; - - ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); - - if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { - ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, - elems.wmm_param_len); - } + ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, + elems.wmm_param_len); /* Do not send changes to driver if we are scanning. This removes * requirement that driver's bss_info_changed function needs to be @@ -2916,11 +3001,11 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, pos = mgmt->u.probe_req.variable; if (pos[0] != WLAN_EID_SSID || pos + 2 + pos[1] > end) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " - "from %s\n", - dev->name, print_mac(mac, mgmt->sa)); - } +#ifdef CONFIG_MAC80211_IBSS_DEBUG + printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " + "from %s\n", + dev->name, print_mac(mac, mgmt->sa)); +#endif return; } if (pos[1] != 0 && @@ -2951,11 +3036,24 @@ static void ieee80211_rx_mgmt_action(struct net_device *dev, struct ieee80211_rx_status *rx_status) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); if (len < IEEE80211_MIN_ACTION_SIZE) return; switch (mgmt->u.action.category) { + case WLAN_CATEGORY_SPECTRUM_MGMT: + if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) + break; + switch (mgmt->u.action.u.chan_switch.action_code) { + case WLAN_ACTION_SPCT_MSR_REQ: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.measurement))) + break; + ieee80211_sta_process_measurement_req(dev, mgmt, len); + break; + } + break; case WLAN_CATEGORY_BACK: switch (mgmt->u.action.u.addba_req.action_code) { case WLAN_ACTION_ADDBA_REQ: @@ -2976,11 +3074,6 @@ static void ieee80211_rx_mgmt_action(struct net_device *dev, break; ieee80211_sta_process_delba(dev, mgmt, len); break; - default: - if (net_ratelimit()) - printk(KERN_DEBUG "%s: Rx unknown A-MPDU action\n", - dev->name); - break; } break; case PLINK_CATEGORY: @@ -2991,11 +3084,6 @@ static void ieee80211_rx_mgmt_action(struct net_device *dev, if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_rx_path_sel_frame(dev, mgmt, len); break; - default: - if (net_ratelimit()) - printk(KERN_DEBUG "%s: Rx unknown action frame - " - "category=%d\n", dev->name, mgmt->u.action.category); - break; } } @@ -3031,11 +3119,6 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, skb_queue_tail(&ifsta->skb_queue, skb); queue_work(local->hw.workqueue, &ifsta->work); return; - default: - printk(KERN_DEBUG "%s: received unknown management frame - " - "stype=%d\n", dev->name, - (fc & IEEE80211_FCTL_STYPE) >> 4); - break; } fail: @@ -3099,33 +3182,32 @@ ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb, struct ieee80211_rx_status *rx_status) { struct ieee80211_mgmt *mgmt; - u16 fc; + __le16 fc; if (skb->len < 2) return RX_DROP_UNUSABLE; mgmt = (struct ieee80211_mgmt *) skb->data; - fc = le16_to_cpu(mgmt->frame_control); + fc = mgmt->frame_control; - if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) + if (ieee80211_is_ctl(fc)) return RX_CONTINUE; if (skb->len < 24) return RX_DROP_MONITOR; - if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT) { - if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP) { - ieee80211_rx_mgmt_probe_resp(dev, mgmt, - skb->len, rx_status); - dev_kfree_skb(skb); - return RX_QUEUED; - } else if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) { - ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len, - rx_status); - dev_kfree_skb(skb); - return RX_QUEUED; - } + if (ieee80211_is_probe_resp(fc)) { + ieee80211_rx_mgmt_probe_resp(dev, mgmt, skb->len, rx_status); + dev_kfree_skb(skb); + return RX_QUEUED; + } + + if (ieee80211_is_beacon(fc)) { + ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len, rx_status); + dev_kfree_skb(skb); + return RX_QUEUED; } + return RX_CONTINUE; } @@ -3165,8 +3247,10 @@ static void ieee80211_sta_expire(struct net_device *dev, unsigned long exp_time) spin_lock_irqsave(&local->sta_lock, flags); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) if (time_after(jiffies, sta->last_rx + exp_time)) { +#ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: expiring inactive STA %s\n", dev->name, print_mac(mac, sta->addr)); +#endif __sta_info_unlink(&sta); if (sta) list_add(&sta->list, &tmp_list); @@ -3205,7 +3289,7 @@ static void ieee80211_mesh_housekeeping(struct net_device *dev, free_plinks = mesh_plink_availables(sdata); if (free_plinks != sdata->u.sta.accepting_plinks) - ieee80211_if_config_beacon(dev); + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); mod_timer(&ifsta->timer, jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL); @@ -3249,13 +3333,10 @@ void ieee80211_sta_work(struct work_struct *work) if (local->sta_sw_scanning || local->sta_hw_scanning) return; - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) { - printk(KERN_DEBUG "%s: ieee80211_sta_work: non-STA interface " - "(type=%d)\n", dev->name, sdata->vif.type); + if (WARN_ON(sdata->vif.type != IEEE80211_IF_TYPE_STA && + sdata->vif.type != IEEE80211_IF_TYPE_IBSS && + sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)) return; - } ifsta = &sdata->u.sta; while ((skb = skb_dequeue(&ifsta->skb_queue))) @@ -3309,8 +3390,7 @@ void ieee80211_sta_work(struct work_struct *work) break; #endif default: - printk(KERN_DEBUG "ieee80211_sta_work: Unknown state %d\n", - ifsta->state); + WARN_ON(1); break; } @@ -3345,8 +3425,6 @@ static void ieee80211_sta_reset_auth(struct net_device *dev, ifsta->auth_alg = WLAN_AUTH_LEAP; else ifsta->auth_alg = WLAN_AUTH_OPEN; - printk(KERN_DEBUG "%s: Initial auth_alg=%d\n", dev->name, - ifsta->auth_alg); ifsta->auth_transaction = -1; ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; ifsta->auth_tries = ifsta->assoc_tries = 0; @@ -3410,21 +3488,17 @@ static int ieee80211_sta_config_auth(struct net_device *dev, struct ieee80211_sta_bss *bss, *selected = NULL; int top_rssi = 0, freq; - if (!(ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | - IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL))) { - ifsta->state = IEEE80211_AUTHENTICATE; - ieee80211_sta_reset_auth(dev, ifsta); - return 0; - } - spin_lock_bh(&local->sta_bss_lock); freq = local->oper_channel->center_freq; list_for_each_entry(bss, &local->sta_bss_list, list) { if (!(bss->capability & WLAN_CAPABILITY_ESS)) continue; - if (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ - !!sdata->default_key) + if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | + IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL)) && + (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ + !!sdata->default_key)) continue; if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && @@ -3439,9 +3513,9 @@ static int ieee80211_sta_config_auth(struct net_device *dev, !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len)) continue; - if (!selected || top_rssi < bss->rssi) { + if (!selected || top_rssi < bss->signal) { selected = bss; - top_rssi = bss->rssi; + top_rssi = bss->signal; } } if (selected) @@ -3449,13 +3523,13 @@ static int ieee80211_sta_config_auth(struct net_device *dev, spin_unlock_bh(&local->sta_bss_lock); if (selected) { - ieee80211_set_freq(local, selected->freq); + ieee80211_set_freq(dev, selected->freq); if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) ieee80211_sta_set_ssid(dev, selected->ssid, selected->ssid_len); ieee80211_sta_set_bssid(dev, selected->bssid); ieee80211_sta_def_wmm_params(dev, selected, 0); - ieee80211_rx_bss_put(dev, selected); + ieee80211_rx_bss_put(local, selected); ifsta->state = IEEE80211_AUTHENTICATE; ieee80211_sta_reset_auth(dev, ifsta); return 0; @@ -3484,6 +3558,7 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, struct ieee80211_supported_band *sband; u8 bssid[ETH_ALEN], *pos; int i; + int ret; DECLARE_MAC_BUF(mac); #if 0 @@ -3513,14 +3588,16 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, sband = local->hw.wiphy->bands[bss->band]; if (local->hw.conf.beacon_int == 0) - local->hw.conf.beacon_int = 10000; + local->hw.conf.beacon_int = 100; bss->beacon_int = local->hw.conf.beacon_int; bss->last_update = jiffies; bss->capability = WLAN_CAPABILITY_IBSS; - if (sdata->default_key) { + + if (sdata->default_key) bss->capability |= WLAN_CAPABILITY_PRIVACY; - } else + else sdata->drop_unencrypted = 0; + bss->supp_rates_len = sband->n_bitrates; pos = bss->supp_rates; for (i = 0; i < sband->n_bitrates; i++) { @@ -3528,7 +3605,9 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, *pos++ = (u8) (rate / 5); } - return ieee80211_sta_join_ibss(dev, ifsta, bss); + ret = ieee80211_sta_join_ibss(dev, ifsta, bss); + ieee80211_rx_bss_put(local, bss); + return ret; } @@ -3569,17 +3648,22 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, spin_unlock_bh(&local->sta_bss_lock); #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG " sta_find_ibss: selected %s current " - "%s\n", print_mac(mac, bssid), print_mac(mac2, ifsta->bssid)); + if (found) + printk(KERN_DEBUG " sta_find_ibss: selected %s current " + "%s\n", print_mac(mac, bssid), + print_mac(mac2, ifsta->bssid)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 && (bss = ieee80211_rx_bss_get(dev, bssid, local->hw.conf.channel->center_freq, ifsta->ssid, ifsta->ssid_len))) { + int ret; printk(KERN_DEBUG "%s: Selected IBSS BSSID %s" " based on configured SSID\n", dev->name, print_mac(mac, bssid)); - return ieee80211_sta_join_ibss(dev, ifsta, bss); + ret = ieee80211_sta_join_ibss(dev, ifsta, bss); + ieee80211_rx_bss_put(local, bss); + return ret; } #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG " did not try to join ibss\n"); @@ -3629,28 +3713,45 @@ int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta; + int res; if (len > IEEE80211_MAX_SSID_LEN) return -EINVAL; ifsta = &sdata->u.sta; - if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) + if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) { + memset(ifsta->ssid, 0, sizeof(ifsta->ssid)); + memcpy(ifsta->ssid, ssid, len); + ifsta->ssid_len = len; ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; - memcpy(ifsta->ssid, ssid, len); - memset(ifsta->ssid + len, 0, IEEE80211_MAX_SSID_LEN - len); - ifsta->ssid_len = len; + + res = 0; + /* + * Hack! MLME code needs to be cleaned up to have different + * entry points for configuration and internal selection change + */ + if (netif_running(sdata->dev)) + res = ieee80211_if_config(sdata, IEEE80211_IFCC_SSID); + if (res) { + printk(KERN_DEBUG "%s: Failed to config new SSID to " + "the low-level driver\n", dev->name); + return res; + } + } if (len) ifsta->flags |= IEEE80211_STA_SSID_SET; else ifsta->flags &= ~IEEE80211_STA_SSID_SET; + if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { ifsta->ibss_join_req = jiffies; ifsta->state = IEEE80211_IBSS_SEARCH; return ieee80211_sta_find_ibss(dev, ifsta); } + return 0; } @@ -3676,7 +3777,12 @@ int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid) if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { memcpy(ifsta->bssid, bssid, ETH_ALEN); - res = ieee80211_if_config(dev); + res = 0; + /* + * Hack! See also ieee80211_sta_set_ssid. + */ + if (netif_running(sdata->dev)) + res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); if (res) { printk(KERN_DEBUG "%s: Failed to config new BSSID to " "the low-level driver\n", dev->name); @@ -3699,7 +3805,7 @@ static void ieee80211_send_nullfunc(struct ieee80211_local *local, { struct sk_buff *skb; struct ieee80211_hdr *nullfunc; - u16 fc; + __le16 fc; skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24); if (!skb) { @@ -3711,11 +3817,11 @@ static void ieee80211_send_nullfunc(struct ieee80211_local *local, nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24); memset(nullfunc, 0, 24); - fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | - IEEE80211_FCTL_TODS; + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | + IEEE80211_FCTL_TODS); if (powersave) - fc |= IEEE80211_FCTL_PM; - nullfunc->frame_control = cpu_to_le16(fc); + fc |= cpu_to_le16(IEEE80211_FCTL_PM); + nullfunc->frame_control = fc; memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN); memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN); @@ -3763,6 +3869,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) netif_tx_lock_bh(local->mdev); + netif_addr_lock(local->mdev); local->filter_flags &= ~FIF_BCN_PRBRESP_PROMISC; local->ops->configure_filter(local_to_hw(local), FIF_BCN_PRBRESP_PROMISC, @@ -3770,15 +3877,11 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) local->mdev->mc_count, local->mdev->mc_list); + netif_addr_unlock(local->mdev); netif_tx_unlock_bh(local->mdev); rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { - - /* No need to wake the master device. */ - if (sdata->dev == local->mdev) - continue; - /* Tell AP we're back */ if (sdata->vif.type == IEEE80211_IF_TYPE_STA && sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) @@ -3944,12 +4047,6 @@ static int ieee80211_sta_start_scan(struct net_device *dev, rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { - - /* Don't stop the master interface, otherwise we can't transmit - * probes! */ - if (sdata->dev == local->mdev) - continue; - netif_stop_queue(sdata->dev); if (sdata->vif.type == IEEE80211_IF_TYPE_STA && (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) @@ -3967,14 +4064,14 @@ static int ieee80211_sta_start_scan(struct net_device *dev, local->scan_band = IEEE80211_BAND_2GHZ; local->scan_dev = dev; - netif_tx_lock_bh(local->mdev); + netif_addr_lock_bh(local->mdev); local->filter_flags |= FIF_BCN_PRBRESP_PROMISC; local->ops->configure_filter(local_to_hw(local), FIF_BCN_PRBRESP_PROMISC, &local->filter_flags, local->mdev->mc_count, local->mdev->mc_list); - netif_tx_unlock_bh(local->mdev); + netif_addr_unlock_bh(local->mdev); /* TODO: start scan as soon as all nullfunc frames are ACKed */ queue_delayed_work(local->hw.workqueue, &local->scan_work, @@ -4009,6 +4106,7 @@ int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len) static char * ieee80211_sta_scan_result(struct net_device *dev, + struct iw_request_info *info, struct ieee80211_sta_bss *bss, char *current_ev, char *end_buf) { @@ -4023,7 +4121,7 @@ ieee80211_sta_scan_result(struct net_device *dev, iwe.cmd = SIOCGIWAP; iwe.u.ap_addr.sa_family = ARPHRD_ETHER; memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN); - current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, IW_EV_ADDR_LEN); memset(&iwe, 0, sizeof(iwe)); @@ -4031,13 +4129,13 @@ ieee80211_sta_scan_result(struct net_device *dev, if (bss_mesh_cfg(bss)) { iwe.u.data.length = bss_mesh_id_len(bss); iwe.u.data.flags = 1; - current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, - bss_mesh_id(bss)); + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, bss_mesh_id(bss)); } else { iwe.u.data.length = bss->ssid_len; iwe.u.data.flags = 1; - current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, - bss->ssid); + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, bss->ssid); } if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS) @@ -4050,31 +4148,30 @@ ieee80211_sta_scan_result(struct net_device *dev, iwe.u.mode = IW_MODE_MASTER; else iwe.u.mode = IW_MODE_ADHOC; - current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, - IW_EV_UINT_LEN); + current_ev = iwe_stream_add_event(info, current_ev, end_buf, + &iwe, IW_EV_UINT_LEN); } memset(&iwe, 0, sizeof(iwe)); iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = bss->freq; - iwe.u.freq.e = 6; - current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, + iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); + iwe.u.freq.e = 0; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, IW_EV_FREQ_LEN); memset(&iwe, 0, sizeof(iwe)); iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); - iwe.u.freq.e = 0; - current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, + iwe.u.freq.m = bss->freq; + iwe.u.freq.e = 6; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, IW_EV_FREQ_LEN); - memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVQUAL; - iwe.u.qual.qual = bss->signal; - iwe.u.qual.level = bss->rssi; + iwe.u.qual.qual = bss->qual; + iwe.u.qual.level = bss->signal; iwe.u.qual.noise = bss->noise; iwe.u.qual.updated = local->wstats_flags; - current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, IW_EV_QUAL_LEN); memset(&iwe, 0, sizeof(iwe)); @@ -4084,27 +4181,36 @@ ieee80211_sta_scan_result(struct net_device *dev, else iwe.u.data.flags = IW_ENCODE_DISABLED; iwe.u.data.length = 0; - current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, ""); + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, ""); if (bss && bss->wpa_ie) { memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVGENIE; iwe.u.data.length = bss->wpa_ie_len; - current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, - bss->wpa_ie); + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, bss->wpa_ie); } if (bss && bss->rsn_ie) { memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVGENIE; iwe.u.data.length = bss->rsn_ie_len; - current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, - bss->rsn_ie); + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, bss->rsn_ie); + } + + if (bss && bss->ht_ie) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVGENIE; + iwe.u.data.length = bss->ht_ie_len; + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, bss->ht_ie); } if (bss && bss->supp_rates_len > 0) { /* display all supported rates in readable format */ - char *p = current_ev + IW_EV_LCP_LEN; + char *p = current_ev + iwe_stream_lcp_len(info); int i; memset(&iwe, 0, sizeof(iwe)); @@ -4115,7 +4221,7 @@ ieee80211_sta_scan_result(struct net_device *dev, for (i = 0; i < bss->supp_rates_len; i++) { iwe.u.bitrate.value = ((bss->supp_rates[i] & 0x7f) * 500000); - p = iwe_stream_add_value(current_ev, p, + p = iwe_stream_add_value(info, current_ev, p, end_buf, &iwe, IW_EV_PARAM_LEN); } current_ev = p; @@ -4129,8 +4235,16 @@ ieee80211_sta_scan_result(struct net_device *dev, iwe.cmd = IWEVCUSTOM; sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp)); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(current_ev, end_buf, + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, &iwe, buf); + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVCUSTOM; + sprintf(buf, " Last beacon: %dms ago", + jiffies_to_msecs(jiffies - bss->last_update)); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, &iwe, buf); kfree(buf); } } @@ -4144,31 +4258,36 @@ ieee80211_sta_scan_result(struct net_device *dev, iwe.cmd = IWEVCUSTOM; sprintf(buf, "Mesh network (version %d)", cfg[0]); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(current_ev, end_buf, + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, &iwe, buf); sprintf(buf, "Path Selection Protocol ID: " "0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3], cfg[4]); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(current_ev, end_buf, + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, &iwe, buf); sprintf(buf, "Path Selection Metric ID: " "0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7], cfg[8]); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(current_ev, end_buf, + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, &iwe, buf); sprintf(buf, "Congestion Control Mode ID: " "0x%02X%02X%02X%02X", cfg[9], cfg[10], cfg[11], cfg[12]); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(current_ev, end_buf, + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, &iwe, buf); sprintf(buf, "Channel Precedence: " "0x%02X%02X%02X%02X", cfg[13], cfg[14], cfg[15], cfg[16]); iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(current_ev, end_buf, + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, &iwe, buf); kfree(buf); } @@ -4178,7 +4297,9 @@ ieee80211_sta_scan_result(struct net_device *dev, } -int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len) +int ieee80211_sta_scan_results(struct net_device *dev, + struct iw_request_info *info, + char *buf, size_t len) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); char *current_ev = buf; @@ -4191,8 +4312,8 @@ int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len) spin_unlock_bh(&local->sta_bss_lock); return -E2BIG; } - current_ev = ieee80211_sta_scan_result(dev, bss, current_ev, - end_buf); + current_ev = ieee80211_sta_scan_result(dev, info, bss, + current_ev, end_buf); } spin_unlock_bh(&local->sta_bss_lock); return current_ev - buf; @@ -4203,6 +4324,7 @@ int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; + kfree(ifsta->extra_ie); if (len == 0) { ifsta->extra_ie = NULL; @@ -4220,14 +4342,15 @@ int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len) } -struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev, - struct sk_buff *skb, u8 *bssid, - u8 *addr) +struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev, + struct sk_buff *skb, u8 *bssid, + u8 *addr, u64 supp_rates) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sta_info *sta; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); DECLARE_MAC_BUF(mac); + int band = local->hw.conf.channel->band; /* TODO: Could consider removing the least recently used entry and * allow new one to be added. */ @@ -4239,17 +4362,24 @@ struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev, return NULL; } + if (compare_ether_addr(bssid, sdata->u.sta.bssid)) + return NULL; + +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Adding new IBSS station %s (dev=%s)\n", wiphy_name(local->hw.wiphy), print_mac(mac, addr), dev->name); +#endif sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); if (!sta) return NULL; - sta->flags |= WLAN_STA_AUTHORIZED; + set_sta_flags(sta, WLAN_STA_AUTHORIZED); - sta->supp_rates[local->hw.conf.channel->band] = - sdata->u.sta.supp_rates_bits[local->hw.conf.channel->band]; + if (supp_rates) + sta->supp_rates[band] = supp_rates; + else + sta->supp_rates[band] = sdata->u.sta.supp_rates_bits[band]; rate_control_rate_init(sta, local); @@ -4265,7 +4395,7 @@ int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason) struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; - printk(KERN_DEBUG "%s: deauthenticate(reason=%d)\n", + printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n", dev->name, reason); if (sdata->vif.type != IEEE80211_IF_TYPE_STA && @@ -4283,7 +4413,7 @@ int ieee80211_sta_disassociate(struct net_device *dev, u16 reason) struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; - printk(KERN_DEBUG "%s: disassociate(reason=%d)\n", + printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n", dev->name, reason); if (sdata->vif.type != IEEE80211_IF_TYPE_STA) @@ -4307,12 +4437,10 @@ void ieee80211_notify_mac(struct ieee80211_hw *hw, case IEEE80211_NOTIFY_RE_ASSOC: rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + continue; - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) { - ieee80211_sta_req_auth(sdata->dev, - &sdata->u.sta); - } - + ieee80211_sta_req_auth(sdata->dev, &sdata->u.sta); } rcu_read_unlock(); break; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 841df93807fc..0388c090dfe9 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -176,20 +176,24 @@ void rate_control_get_rate(struct net_device *dev, rcu_read_lock(); sta = sta_info_get(local, hdr->addr1); - memset(sel, 0, sizeof(struct rate_selection)); + sel->rate_idx = -1; + sel->nonerp_idx = -1; + sel->probe_idx = -1; ref->ops->get_rate(ref->priv, dev, sband, skb, sel); + BUG_ON(sel->rate_idx < 0); + /* Select a non-ERP backup rate. */ - if (!sel->nonerp) { + if (sel->nonerp_idx < 0) { for (i = 0; i < sband->n_bitrates; i++) { struct ieee80211_rate *rate = &sband->bitrates[i]; - if (sel->rate->bitrate < rate->bitrate) + if (sband->bitrates[sel->rate_idx].bitrate < rate->bitrate) break; if (rate_supported(sta, sband->band, i) && !(rate->flags & IEEE80211_RATE_ERP_G)) - sel->nonerp = rate; + sel->nonerp_idx = i; } } diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 5b45f33cb766..ede7ab56f65b 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -19,22 +19,22 @@ #include "ieee80211_i.h" #include "sta_info.h" -/* TODO: kdoc */ +/** + * struct rate_selection - rate selection for rate control algos + * @rate: selected transmission rate index + * @nonerp: Non-ERP rate to use instead if ERP cannot be used + * @probe: rate for probing (or -1) + * + */ struct rate_selection { - /* Selected transmission rate */ - struct ieee80211_rate *rate; - /* Non-ERP rate to use if mac80211 decides it cannot use an ERP rate */ - struct ieee80211_rate *nonerp; - /* probe with this rate, or NULL for no probing */ - struct ieee80211_rate *probe; + s8 rate_idx, nonerp_idx, probe_idx; }; struct rate_control_ops { struct module *module; const char *name; void (*tx_status)(void *priv, struct net_device *dev, - struct sk_buff *skb, - struct ieee80211_tx_status *status); + struct sk_buff *skb); void (*get_rate)(void *priv, struct net_device *dev, struct ieee80211_supported_band *band, struct sk_buff *skb, @@ -76,13 +76,12 @@ struct rate_control_ref *rate_control_get(struct rate_control_ref *ref); void rate_control_put(struct rate_control_ref *ref); static inline void rate_control_tx_status(struct net_device *dev, - struct sk_buff *skb, - struct ieee80211_tx_status *status) + struct sk_buff *skb) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct rate_control_ref *ref = local->rate_ctrl; - ref->ops->tx_status(ref->priv, dev, skb, status); + ref->ops->tx_status(ref->priv, dev, skb); } @@ -138,7 +137,7 @@ static inline int rate_supported(struct sta_info *sta, return (sta == NULL || sta->supp_rates[band] & BIT(index)); } -static inline int +static inline s8 rate_lowest_index(struct ieee80211_local *local, struct ieee80211_supported_band *sband, struct sta_info *sta) @@ -155,14 +154,6 @@ rate_lowest_index(struct ieee80211_local *local, return 0; } -static inline struct ieee80211_rate * -rate_lowest(struct ieee80211_local *local, - struct ieee80211_supported_band *sband, - struct sta_info *sta) -{ - return &sband->bitrates[rate_lowest_index(local, sband, sta)]; -} - /* functions for rate control related to a device */ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, @@ -171,9 +162,7 @@ void rate_control_deinitialize(struct ieee80211_local *local); /* Rate control algorithms */ -#if defined(RC80211_PID_COMPILE) || \ - (defined(CONFIG_MAC80211_RC_PID) && \ - !defined(CONFIG_MAC80211_RC_PID_MODULE)) +#ifdef CONFIG_MAC80211_RC_PID extern int rc80211_pid_init(void); extern void rc80211_pid_exit(void); #else diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index 04afc13ed825..0a9135b974b5 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -61,7 +61,7 @@ enum rc_pid_event_type { union rc_pid_event_data { /* RC_PID_EVENT_TX_STATUS */ struct { - struct ieee80211_tx_status tx_status; + struct ieee80211_tx_info tx_status; }; /* RC_PID_EVENT_TYPE_RATE_CHANGE */ /* RC_PID_EVENT_TYPE_TX_RATE */ @@ -141,7 +141,6 @@ struct rc_pid_events_file_info { * rate behaviour values (lower means we should trust more what we learnt * about behaviour of rates, higher means we should trust more the natural * ordering of rates) - * @fast_start: if Y, push high rates right after initialization */ struct rc_pid_debugfs_entries { struct dentry *dir; @@ -154,11 +153,10 @@ struct rc_pid_debugfs_entries { struct dentry *sharpen_factor; struct dentry *sharpen_duration; struct dentry *norm_offset; - struct dentry *fast_start; }; void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, - struct ieee80211_tx_status *stat); + struct ieee80211_tx_info *stat); void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf, int index, int rate); @@ -267,9 +265,6 @@ struct rc_pid_info { /* Normalization offset. */ unsigned int norm_offset; - /* Fast starst parameter. */ - unsigned int fast_start; - /* Rates information. */ struct rc_pid_rateinfo *rinfo; diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index a849b745bdb5..a914ba73ccf5 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -237,8 +237,7 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, } static void rate_control_pid_tx_status(void *priv, struct net_device *dev, - struct sk_buff *skb, - struct ieee80211_tx_status *status) + struct sk_buff *skb) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; @@ -248,6 +247,7 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev, struct rc_pid_sta_info *spinfo; unsigned long period; struct ieee80211_supported_band *sband; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); rcu_read_lock(); @@ -259,35 +259,35 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev, /* Don't update the state if we're not controlling the rate. */ sdata = sta->sdata; - if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) { - sta->txrate_idx = sdata->bss->max_ratectrl_rateidx; + if (sdata->force_unicast_rateidx > -1) { + sta->txrate_idx = sdata->max_ratectrl_rateidx; goto unlock; } /* Ignore all frames that were sent with a different rate than the rate * we currently advise mac80211 to use. */ - if (status->control.tx_rate != &sband->bitrates[sta->txrate_idx]) + if (info->tx_rate_idx != sta->txrate_idx) goto unlock; spinfo = sta->rate_ctrl_priv; spinfo->tx_num_xmit++; #ifdef CONFIG_MAC80211_DEBUGFS - rate_control_pid_event_tx_status(&spinfo->events, status); + rate_control_pid_event_tx_status(&spinfo->events, info); #endif /* We count frames that totally failed to be transmitted as two bad * frames, those that made it out but had some retries as one good and * one bad frame. */ - if (status->excessive_retries) { + if (info->status.excessive_retries) { spinfo->tx_num_failed += 2; spinfo->tx_num_xmit++; - } else if (status->retry_count) { + } else if (info->status.retry_count) { spinfo->tx_num_failed++; spinfo->tx_num_xmit++; } - if (status->excessive_retries) { + if (info->status.excessive_retries) { sta->tx_retry_failed++; sta->tx_num_consecutive_failures++; sta->tx_num_mpdu_fail++; @@ -295,8 +295,8 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev, sta->tx_num_consecutive_failures = 0; sta->tx_num_mpdu_ok++; } - sta->tx_retry_count += status->retry_count; - sta->tx_num_mpdu_fail += status->retry_count; + sta->tx_retry_count += info->status.retry_count; + sta->tx_num_mpdu_fail += info->status.retry_count; /* Update PID controller state. */ period = (HZ * pinfo->sampling_period + 500) / 1000; @@ -330,15 +330,15 @@ static void rate_control_pid_get_rate(void *priv, struct net_device *dev, fc = le16_to_cpu(hdr->frame_control); if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || is_multicast_ether_addr(hdr->addr1) || !sta) { - sel->rate = rate_lowest(local, sband, sta); + sel->rate_idx = rate_lowest_index(local, sband, sta); rcu_read_unlock(); return; } /* If a forced rate is in effect, select it. */ sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) - sta->txrate_idx = sdata->bss->force_unicast_rateidx; + if (sdata->force_unicast_rateidx > -1) + sta->txrate_idx = sdata->force_unicast_rateidx; rateidx = sta->txrate_idx; @@ -349,7 +349,7 @@ static void rate_control_pid_get_rate(void *priv, struct net_device *dev, rcu_read_unlock(); - sel->rate = &sband->bitrates[rateidx]; + sel->rate_idx = rateidx; #ifdef CONFIG_MAC80211_DEBUGFS rate_control_pid_event_tx_rate( @@ -398,13 +398,25 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) return NULL; } + pinfo->target = RC_PID_TARGET_PF; + pinfo->sampling_period = RC_PID_INTERVAL; + pinfo->coeff_p = RC_PID_COEFF_P; + pinfo->coeff_i = RC_PID_COEFF_I; + pinfo->coeff_d = RC_PID_COEFF_D; + pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT; + pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR; + pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION; + pinfo->norm_offset = RC_PID_NORM_OFFSET; + pinfo->rinfo = rinfo; + pinfo->oldrate = 0; + /* Sort the rates. This is optimized for the most common case (i.e. * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed * mapping too. */ for (i = 0; i < sband->n_bitrates; i++) { rinfo[i].index = i; rinfo[i].rev_index = i; - if (pinfo->fast_start) + if (RC_PID_FAST_START) rinfo[i].diff = 0; else rinfo[i].diff = i * pinfo->norm_offset; @@ -425,19 +437,6 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) break; } - pinfo->target = RC_PID_TARGET_PF; - pinfo->sampling_period = RC_PID_INTERVAL; - pinfo->coeff_p = RC_PID_COEFF_P; - pinfo->coeff_i = RC_PID_COEFF_I; - pinfo->coeff_d = RC_PID_COEFF_D; - pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT; - pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR; - pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION; - pinfo->norm_offset = RC_PID_NORM_OFFSET; - pinfo->fast_start = RC_PID_FAST_START; - pinfo->rinfo = rinfo; - pinfo->oldrate = 0; - #ifdef CONFIG_MAC80211_DEBUGFS de = &pinfo->dentries; de->dir = debugfs_create_dir("rc80211_pid", @@ -465,9 +464,6 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) de->norm_offset = debugfs_create_u32("norm_offset", S_IRUSR | S_IWUSR, de->dir, &pinfo->norm_offset); - de->fast_start = debugfs_create_bool("fast_start", - S_IRUSR | S_IWUSR, de->dir, - &pinfo->fast_start); #endif return pinfo; @@ -479,7 +475,6 @@ static void rate_control_pid_free(void *priv) #ifdef CONFIG_MAC80211_DEBUGFS struct rc_pid_debugfs_entries *de = &pinfo->dentries; - debugfs_remove(de->fast_start); debugfs_remove(de->norm_offset); debugfs_remove(de->sharpen_duration); debugfs_remove(de->sharpen_factor); @@ -540,11 +535,6 @@ static struct rate_control_ops mac80211_rcpid = { #endif }; -MODULE_DESCRIPTION("PID controller based rate control algorithm"); -MODULE_AUTHOR("Stefano Brivio"); -MODULE_AUTHOR("Mattias Nissler"); -MODULE_LICENSE("GPL"); - int __init rc80211_pid_init(void) { return ieee80211_rate_control_register(&mac80211_rcpid); @@ -554,8 +544,3 @@ void rc80211_pid_exit(void) { ieee80211_rate_control_unregister(&mac80211_rcpid); } - -#ifdef CONFIG_MAC80211_RC_PID_MODULE -module_init(rc80211_pid_init); -module_exit(rc80211_pid_exit); -#endif diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c index ae75d4178739..8121d3bc6835 100644 --- a/net/mac80211/rc80211_pid_debugfs.c +++ b/net/mac80211/rc80211_pid_debugfs.c @@ -39,11 +39,11 @@ static void rate_control_pid_event(struct rc_pid_event_buffer *buf, } void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, - struct ieee80211_tx_status *stat) + struct ieee80211_tx_info *stat) { union rc_pid_event_data evd; - memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_status)); + memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_info)); rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_STATUS, &evd); } @@ -85,7 +85,7 @@ static int rate_control_pid_events_open(struct inode *inode, struct file *file) struct rc_pid_sta_info *sinfo = inode->i_private; struct rc_pid_event_buffer *events = &sinfo->events; struct rc_pid_events_file_info *file_info; - unsigned int status; + unsigned long status; /* Allocate a state struct */ file_info = kmalloc(sizeof(*file_info), GFP_KERNEL); @@ -135,7 +135,7 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf, char pb[RC_PID_PRINT_BUF_SIZE]; int ret; int p; - unsigned int status; + unsigned long status; /* Check if there is something to read. */ if (events->next_entry == file_info->next_entry) { @@ -167,8 +167,8 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf, switch (ev->type) { case RC_PID_EVENT_TYPE_TX_STATUS: p += snprintf(pb + p, length - p, "tx_status %u %u", - ev->data.tx_status.excessive_retries, - ev->data.tx_status.retry_count); + ev->data.tx_status.status.excessive_retries, + ev->data.tx_status.status.retry_count); break; case RC_PID_EVENT_TYPE_RATE_CHANGE: p += snprintf(pb + p, length - p, "rate_change %d %d", diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 02f436a86061..6d9ae67c27ca 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -61,22 +61,147 @@ static inline int should_drop_frame(struct ieee80211_rx_status *status, int present_fcs_len, int radiotap_len) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) return 1; if (unlikely(skb->len < 16 + present_fcs_len + radiotap_len)) return 1; - if (((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == - cpu_to_le16(IEEE80211_FTYPE_CTL)) && - ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) != - cpu_to_le16(IEEE80211_STYPE_PSPOLL)) && - ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) != - cpu_to_le16(IEEE80211_STYPE_BACK_REQ))) + if (ieee80211_is_ctl(hdr->frame_control) && + !ieee80211_is_pspoll(hdr->frame_control) && + !ieee80211_is_back_req(hdr->frame_control)) return 1; return 0; } +static int +ieee80211_rx_radiotap_len(struct ieee80211_local *local, + struct ieee80211_rx_status *status) +{ + int len; + + /* always present fields */ + len = sizeof(struct ieee80211_radiotap_header) + 9; + + if (status->flag & RX_FLAG_TSFT) + len += 8; + if (local->hw.flags & IEEE80211_HW_SIGNAL_DB || + local->hw.flags & IEEE80211_HW_SIGNAL_DBM) + len += 1; + if (local->hw.flags & IEEE80211_HW_NOISE_DBM) + len += 1; + + if (len & 1) /* padding for RX_FLAGS if necessary */ + len++; + + /* make sure radiotap starts at a naturally aligned address */ + if (len % 8) + len = roundup(len, 8); + + return len; +} + +/** + * ieee80211_add_rx_radiotap_header - add radiotap header + * + * add a radiotap header containing all the fields which the hardware provided. + */ +static void +ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, + struct sk_buff *skb, + struct ieee80211_rx_status *status, + struct ieee80211_rate *rate, + int rtap_len) +{ + struct ieee80211_radiotap_header *rthdr; + unsigned char *pos; + + rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len); + memset(rthdr, 0, rtap_len); + + /* radiotap header, set always present flags */ + rthdr->it_present = + cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | + (1 << IEEE80211_RADIOTAP_RATE) | + (1 << IEEE80211_RADIOTAP_CHANNEL) | + (1 << IEEE80211_RADIOTAP_ANTENNA) | + (1 << IEEE80211_RADIOTAP_RX_FLAGS)); + rthdr->it_len = cpu_to_le16(rtap_len); + + pos = (unsigned char *)(rthdr+1); + + /* the order of the following fields is important */ + + /* IEEE80211_RADIOTAP_TSFT */ + if (status->flag & RX_FLAG_TSFT) { + *(__le64 *)pos = cpu_to_le64(status->mactime); + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT); + pos += 8; + } + + /* IEEE80211_RADIOTAP_FLAGS */ + if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) + *pos |= IEEE80211_RADIOTAP_F_FCS; + pos++; + + /* IEEE80211_RADIOTAP_RATE */ + *pos = rate->bitrate / 5; + pos++; + + /* IEEE80211_RADIOTAP_CHANNEL */ + *(__le16 *)pos = cpu_to_le16(status->freq); + pos += 2; + if (status->band == IEEE80211_BAND_5GHZ) + *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_OFDM | + IEEE80211_CHAN_5GHZ); + else + *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_DYN | + IEEE80211_CHAN_2GHZ); + pos += 2; + + /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */ + if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { + *pos = status->signal; + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL); + pos++; + } + + /* IEEE80211_RADIOTAP_DBM_ANTNOISE */ + if (local->hw.flags & IEEE80211_HW_NOISE_DBM) { + *pos = status->noise; + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTNOISE); + pos++; + } + + /* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */ + + /* IEEE80211_RADIOTAP_ANTENNA */ + *pos = status->antenna; + pos++; + + /* IEEE80211_RADIOTAP_DB_ANTSIGNAL */ + if (local->hw.flags & IEEE80211_HW_SIGNAL_DB) { + *pos = status->signal; + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL); + pos++; + } + + /* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */ + + /* IEEE80211_RADIOTAP_RX_FLAGS */ + /* ensure 2 byte alignment for the 2 byte field as required */ + if ((pos - (unsigned char *)rthdr) & 1) + pos++; + /* FIXME: when radiotap gets a 'bad PLCP' flag use it here */ + if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) + *(__le16 *)pos |= cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS); + pos += 2; +} + /* * This function copies a received frame to all monitor interfaces and * returns a cleaned-up SKB that no longer includes the FCS nor the @@ -89,17 +214,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, { struct ieee80211_sub_if_data *sdata; int needed_headroom = 0; - struct ieee80211_radiotap_header *rthdr; - __le64 *rttsft = NULL; - struct ieee80211_rtap_fixed_data { - u8 flags; - u8 rate; - __le16 chan_freq; - __le16 chan_flags; - u8 antsignal; - u8 padding_for_rxflags; - __le16 rx_flags; - } __attribute__ ((packed)) *rtfixed; struct sk_buff *skb, *skb2; struct net_device *prev_dev = NULL; int present_fcs_len = 0; @@ -116,8 +230,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (status->flag & RX_FLAG_RADIOTAP) rtap_len = ieee80211_get_radiotap_len(origskb->data); else - /* room for radiotap header, always present fields and TSFT */ - needed_headroom = sizeof(*rthdr) + sizeof(*rtfixed) + 8; + /* room for the radiotap header based on driver features */ + needed_headroom = ieee80211_rx_radiotap_len(local, status); if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) present_fcs_len = FCS_LEN; @@ -163,55 +277,9 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, } /* if necessary, prepend radiotap information */ - if (!(status->flag & RX_FLAG_RADIOTAP)) { - rtfixed = (void *) skb_push(skb, sizeof(*rtfixed)); - rtap_len = sizeof(*rthdr) + sizeof(*rtfixed); - if (status->flag & RX_FLAG_TSFT) { - rttsft = (void *) skb_push(skb, sizeof(*rttsft)); - rtap_len += 8; - } - rthdr = (void *) skb_push(skb, sizeof(*rthdr)); - memset(rthdr, 0, sizeof(*rthdr)); - memset(rtfixed, 0, sizeof(*rtfixed)); - rthdr->it_present = - cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | - (1 << IEEE80211_RADIOTAP_RATE) | - (1 << IEEE80211_RADIOTAP_CHANNEL) | - (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL) | - (1 << IEEE80211_RADIOTAP_RX_FLAGS)); - rtfixed->flags = 0; - if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) - rtfixed->flags |= IEEE80211_RADIOTAP_F_FCS; - - if (rttsft) { - *rttsft = cpu_to_le64(status->mactime); - rthdr->it_present |= - cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT); - } - - /* FIXME: when radiotap gets a 'bad PLCP' flag use it here */ - rtfixed->rx_flags = 0; - if (status->flag & - (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) - rtfixed->rx_flags |= - cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS); - - rtfixed->rate = rate->bitrate / 5; - - rtfixed->chan_freq = cpu_to_le16(status->freq); - - if (status->band == IEEE80211_BAND_5GHZ) - rtfixed->chan_flags = - cpu_to_le16(IEEE80211_CHAN_OFDM | - IEEE80211_CHAN_5GHZ); - else - rtfixed->chan_flags = - cpu_to_le16(IEEE80211_CHAN_DYN | - IEEE80211_CHAN_2GHZ); - - rtfixed->antsignal = status->ssi; - rthdr->it_len = cpu_to_le16(rtap_len); - } + if (!(status->flag & RX_FLAG_RADIOTAP)) + ieee80211_add_rx_radiotap_header(local, skb, status, rate, + needed_headroom); skb_reset_mac_header(skb); skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -253,33 +321,33 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) { - u8 *data = rx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int tid; /* does the frame have a qos control field? */ - if (WLAN_FC_IS_QOS_DATA(rx->fc)) { - u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN; + if (ieee80211_is_data_qos(hdr->frame_control)) { + u8 *qc = ieee80211_get_qos_ctl(hdr); /* frame has qos control */ - tid = qc[0] & QOS_CONTROL_TID_MASK; - if (qc[0] & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT) + tid = *qc & IEEE80211_QOS_CTL_TID_MASK; + if (*qc & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT) rx->flags |= IEEE80211_RX_AMSDU; else rx->flags &= ~IEEE80211_RX_AMSDU; } else { - if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) { - /* Separate TID for management frames */ - tid = NUM_RX_DATA_QUEUES - 1; - } else { - /* no qos control present */ - tid = 0; /* 802.1d - Best Effort */ - } + /* + * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"): + * + * Sequence numbers for management frames, QoS data + * frames with a broadcast/multicast address in the + * Address 1 field, and all non-QoS data frames sent + * by QoS STAs are assigned using an additional single + * modulo-4096 counter, [...] + * + * We also use that counter for non-QoS STAs. + */ + tid = NUM_RX_DATA_QUEUES - 1; } - I802_DEBUG_INC(rx->local->wme_rx_queue[tid]); - /* only a debug counter, sta might not be assigned properly yet */ - if (rx->sta) - I802_DEBUG_INC(rx->sta->wme_rx_queue[tid]); - rx->queue = tid; /* Set skb->priority to 1d tag if highest order bit of TID is not set. * For now, set skb->priority to 0 for other cases. */ @@ -289,9 +357,10 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx) { #ifdef CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int hdrlen; - if (!WLAN_FC_DATA_PRESENT(rx->fc)) + if (!ieee80211_is_data_present(hdr->frame_control)) return; /* @@ -313,7 +382,7 @@ static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx) * header and the payload is not supported, the driver is required * to move the 802.11 header further back in that case. */ - hdrlen = ieee80211_get_hdrlen(rx->fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (rx->flags & IEEE80211_RX_AMSDU) hdrlen += ETH_HLEN; WARN_ON_ONCE(((unsigned long)(rx->skb->data + hdrlen)) & 3); @@ -321,51 +390,9 @@ static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx) } -static u32 ieee80211_rx_load_stats(struct ieee80211_local *local, - struct sk_buff *skb, - struct ieee80211_rx_status *status, - struct ieee80211_rate *rate) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u32 load = 0, hdrtime; - - /* Estimate total channel use caused by this frame */ - - /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, - * 1 usec = 1/8 * (1080 / 10) = 13.5 */ - - if (status->band == IEEE80211_BAND_5GHZ || - (status->band == IEEE80211_BAND_5GHZ && - rate->flags & IEEE80211_RATE_ERP_G)) - hdrtime = CHAN_UTIL_HDR_SHORT; - else - hdrtime = CHAN_UTIL_HDR_LONG; - - load = hdrtime; - if (!is_multicast_ether_addr(hdr->addr1)) - load += hdrtime; - - /* TODO: optimise again */ - load += skb->len * CHAN_UTIL_RATE_LCM / rate->bitrate; - - /* Divide channel_use by 8 to avoid wrapping around the counter */ - load >>= CHAN_UTIL_SHIFT; - - return load; -} - /* rx handlers */ -static ieee80211_rx_result -ieee80211_rx_h_if_stats(struct ieee80211_rx_data *rx) -{ - if (rx->sta) - rx->sta->channel_use_raw += rx->load; - rx->sdata->channel_use_raw += rx->load; - return RX_CONTINUE; -} - -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; @@ -394,14 +421,11 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) { - int hdrlen = ieee80211_get_hdrlen(rx->fc); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + unsigned int hdrlen = ieee80211_hdrlen(hdr->frame_control); -#define msh_h_get(h, l) ((struct ieee80211s_hdr *) ((u8 *)h + l)) - - if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) { - if (!((rx->fc & IEEE80211_FCTL_FROMDS) && - (rx->fc & IEEE80211_FCTL_TODS))) + if (ieee80211_is_data(hdr->frame_control)) { + if (!ieee80211_has_a4(hdr->frame_control)) return RX_DROP_MONITOR; if (memcmp(hdr->addr4, rx->dev->dev_addr, ETH_ALEN) == 0) return RX_DROP_MONITOR; @@ -414,27 +438,30 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) if (!rx->sta || sta_plink_state(rx->sta) != PLINK_ESTAB) { struct ieee80211_mgmt *mgmt; - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT) + if (!ieee80211_is_mgmt(hdr->frame_control)) return RX_DROP_MONITOR; - switch (rx->fc & IEEE80211_FCTL_STYPE) { - case IEEE80211_STYPE_ACTION: + if (ieee80211_is_action(hdr->frame_control)) { mgmt = (struct ieee80211_mgmt *)hdr; if (mgmt->u.action.category != PLINK_CATEGORY) return RX_DROP_MONITOR; - /* fall through on else */ - case IEEE80211_STYPE_PROBE_REQ: - case IEEE80211_STYPE_PROBE_RESP: - case IEEE80211_STYPE_BEACON: return RX_CONTINUE; - break; - default: - return RX_DROP_MONITOR; } - } else if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - is_multicast_ether_addr(hdr->addr1) && - mesh_rmc_check(hdr->addr4, msh_h_get(hdr, hdrlen), rx->dev)) + if (ieee80211_is_probe_req(hdr->frame_control) || + ieee80211_is_probe_resp(hdr->frame_control) || + ieee80211_is_beacon(hdr->frame_control)) + return RX_CONTINUE; + + return RX_DROP_MONITOR; + + } + +#define msh_h_get(h, l) ((struct ieee80211s_hdr *) ((u8 *)h + l)) + + if (ieee80211_is_data(hdr->frame_control) && + is_multicast_ether_addr(hdr->addr1) && + mesh_rmc_check(hdr->addr4, msh_h_get(hdr, hdrlen), rx->dev)) return RX_DROP_MONITOR; #undef msh_h_get @@ -442,16 +469,14 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_check(struct ieee80211_rx_data *rx) { - struct ieee80211_hdr *hdr; - - hdr = (struct ieee80211_hdr *) rx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { - if (unlikely(rx->fc & IEEE80211_FCTL_RETRY && + if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->queue] == hdr->seq_ctrl)) { if (rx->flags & IEEE80211_RX_RA_MATCH) { @@ -480,15 +505,14 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) if (ieee80211_vif_is_mesh(&rx->sdata->vif)) return ieee80211_rx_mesh_check(rx); - if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA || - ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL && - (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) && + if (unlikely((ieee80211_is_data(hdr->frame_control) || + ieee80211_is_pspoll(hdr->frame_control)) && rx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) { - if ((!(rx->fc & IEEE80211_FCTL_FROMDS) && - !(rx->fc & IEEE80211_FCTL_TODS) && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) - || !(rx->flags & IEEE80211_RX_RA_MATCH)) { + (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) { + if ((!ieee80211_has_fromds(hdr->frame_control) && + !ieee80211_has_tods(hdr->frame_control) && + ieee80211_is_data(hdr->frame_control)) || + !(rx->flags & IEEE80211_RX_RA_MATCH)) { /* Drop IBSS frames and frames for other hosts * silently. */ return RX_DROP_MONITOR; @@ -501,10 +525,10 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int keyidx; int hdrlen; ieee80211_rx_result result = RX_DROP_UNUSABLE; @@ -536,7 +560,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * possible. */ - if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) + if (!ieee80211_has_protected(hdr->frame_control)) return RX_CONTINUE; /* @@ -565,7 +589,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) (rx->status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; - hdrlen = ieee80211_get_hdrlen(rx->fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (rx->skb->len < 8 + hdrlen) return RX_DROP_UNUSABLE; /* TODO: count this? */ @@ -592,17 +616,12 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) rx->key->tx_rx_count++; /* TODO: add threshold stuff again */ } else { -#ifdef CONFIG_MAC80211_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "%s: RX protected frame," - " but have no key\n", rx->dev->name); -#endif /* CONFIG_MAC80211_DEBUG */ return RX_DROP_MONITOR; } /* Check for weak IVs if possible */ if (rx->sta && rx->key->conf.alg == ALG_WEP && - ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) && + ieee80211_is_data(hdr->frame_control) && (!(rx->status->flag & RX_FLAG_IV_STRIPPED) || !(rx->status->flag & RX_FLAG_DECRYPTED)) && ieee80211_wep_is_weak_iv(rx->skb, rx->key)) @@ -633,10 +652,8 @@ static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) sdata = sta->sdata; - if (sdata->bss) - atomic_inc(&sdata->bss->num_sta_ps); - sta->flags |= WLAN_STA_PS; - sta->flags &= ~WLAN_STA_PSPOLL; + atomic_inc(&sdata->bss->num_sta_ps); + set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d enters power save mode\n", dev->name, print_mac(mac, sta->addr), sta->aid); @@ -649,15 +666,14 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) struct sk_buff *skb; int sent = 0; struct ieee80211_sub_if_data *sdata; - struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_tx_info *info; DECLARE_MAC_BUF(mac); sdata = sta->sdata; - if (sdata->bss) - atomic_dec(&sdata->bss->num_sta_ps); + atomic_dec(&sdata->bss->num_sta_ps); - sta->flags &= ~(WLAN_STA_PS | WLAN_STA_PSPOLL); + clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL); if (!skb_queue_empty(&sta->ps_tx_buf)) sta_info_clear_tim_bit(sta); @@ -669,13 +685,13 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) /* Send all buffered frames to the station */ while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + info = IEEE80211_SKB_CB(skb); sent++; - pkt_data->flags |= IEEE80211_TXPD_REQUEUE; + info->flags |= IEEE80211_TX_CTL_REQUEUE; dev_queue_xmit(skb); } while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + info = IEEE80211_SKB_CB(skb); local->total_ps_buffered--; sent++; #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG @@ -683,19 +699,19 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) "since STA not sleeping anymore\n", dev->name, print_mac(mac, sta->addr), sta->aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - pkt_data->flags |= IEEE80211_TXPD_REQUEUE; + info->flags |= IEEE80211_TX_CTL_REQUEUE; dev_queue_xmit(skb); } return sent; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) { struct sta_info *sta = rx->sta; struct net_device *dev = rx->dev; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; if (!sta) return RX_CONTINUE; @@ -725,24 +741,26 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->rx_fragments++; sta->rx_bytes += rx->skb->len; - sta->last_rssi = rx->status->ssi; sta->last_signal = rx->status->signal; + sta->last_qual = rx->status->qual; sta->last_noise = rx->status->noise; - if (!(rx->fc & IEEE80211_FCTL_MOREFRAGS)) { + if (!ieee80211_has_morefrags(hdr->frame_control) && + (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP || + rx->sdata->vif.type == IEEE80211_IF_TYPE_VLAN)) { /* Change STA power saving mode only in the end of a frame * exchange sequence */ - if ((sta->flags & WLAN_STA_PS) && !(rx->fc & IEEE80211_FCTL_PM)) + if (test_sta_flags(sta, WLAN_STA_PS) && + !ieee80211_has_pm(hdr->frame_control)) rx->sent_ps_buffered += ap_sta_ps_end(dev, sta); - else if (!(sta->flags & WLAN_STA_PS) && - (rx->fc & IEEE80211_FCTL_PM)) + else if (!test_sta_flags(sta, WLAN_STA_PS) && + ieee80211_has_pm(hdr->frame_control)) ap_sta_ps_start(dev, sta); } /* Drop data::nullfunc frames silently, since they are used only to * control station power saving mode. */ - if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_NULLFUNC) { + if (ieee80211_is_nullfunc(hdr->frame_control)) { I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); /* Update counter and free packet here to avoid counting this * as a dropped packed. */ @@ -768,7 +786,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, sdata->fragment_next = 0; if (!skb_queue_empty(&entry->skb_list)) { -#ifdef CONFIG_MAC80211_DEBUG +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) entry->skb_list.next->data; DECLARE_MAC_BUF(mac); @@ -780,7 +798,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, jiffies - entry->first_frag_time, entry->seq, entry->last_frag, print_mac(mac, hdr->addr1), print_mac(mac2, hdr->addr2)); -#endif /* CONFIG_MAC80211_DEBUG */ +#endif __skb_queue_purge(&entry->skb_list); } @@ -837,7 +855,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, return NULL; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr; @@ -901,18 +919,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) break; } rpn = rx->key->u.ccmp.rx_pn[rx->queue]; - if (memcmp(pn, rpn, CCMP_PN_LEN) != 0) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: defrag: CCMP PN not " - "sequential A2=%s" - " PN=%02x%02x%02x%02x%02x%02x " - "(expected %02x%02x%02x%02x%02x%02x)\n", - rx->dev->name, print_mac(mac, hdr->addr2), - rpn[0], rpn[1], rpn[2], rpn[3], rpn[4], - rpn[5], pn[0], pn[1], pn[2], pn[3], - pn[4], pn[5]); + if (memcmp(pn, rpn, CCMP_PN_LEN)) return RX_DROP_UNUSABLE; - } memcpy(entry->last_pn, pn, CCMP_PN_LEN); } @@ -953,7 +961,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) return RX_CONTINUE; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); @@ -988,7 +996,7 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) * Tell TX path to send one frame even though the STA may * still remain is PS mode after this frame exchange. */ - rx->sta->flags |= WLAN_STA_PSPOLL; + set_sta_flags(rx->sta, WLAN_STA_PSPOLL); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "STA %s aid %d: PS Poll (entries after %d)\n", @@ -1016,7 +1024,7 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) * have nothing buffered for it? */ printk(KERN_DEBUG "%s: STA %s sent PS Poll even " - "though there is no buffered frames for it\n", + "though there are no buffered frames for it\n", rx->dev->name, print_mac(mac, rx->sta->addr)); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } @@ -1028,22 +1036,22 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) return RX_QUEUED; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_remove_qos_control(struct ieee80211_rx_data *rx) { - u16 fc = rx->fc; u8 *data = rx->skb->data; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)data; - if (!WLAN_FC_IS_QOS_DATA(fc)) + if (!ieee80211_is_data_qos(hdr->frame_control)) return RX_CONTINUE; /* remove the qos control field, update frame type and meta-data */ - memmove(data + 2, data, ieee80211_get_hdrlen(fc) - 2); - hdr = (struct ieee80211_hdr *) skb_pull(rx->skb, 2); + memmove(data + IEEE80211_QOS_CTL_LEN, data, + ieee80211_hdrlen(hdr->frame_control) - IEEE80211_QOS_CTL_LEN); + hdr = (struct ieee80211_hdr *)skb_pull(rx->skb, IEEE80211_QOS_CTL_LEN); /* change frame type to non QOS */ - rx->fc = fc &= ~IEEE80211_STYPE_QOS_DATA; - hdr->frame_control = cpu_to_le16(fc); + rx->fc &= ~IEEE80211_STYPE_QOS_DATA; + hdr->frame_control &= ~cpu_to_le16(IEEE80211_STYPE_QOS_DATA); return RX_CONTINUE; } @@ -1051,14 +1059,9 @@ ieee80211_rx_h_remove_qos_control(struct ieee80211_rx_data *rx) static int ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx) { - if (unlikely(!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED))) { -#ifdef CONFIG_MAC80211_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "%s: dropped frame " - "(unauthorized port)\n", rx->dev->name); -#endif /* CONFIG_MAC80211_DEBUG */ + if (unlikely(!rx->sta || + !test_sta_flags(rx->sta, WLAN_STA_AUTHORIZED))) return -EACCES; - } return 0; } @@ -1091,7 +1094,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) u16 fc, hdrlen, ethertype; u8 *payload; u8 dst[ETH_ALEN]; - u8 src[ETH_ALEN]; + u8 src[ETH_ALEN] __aligned(2); struct sk_buff *skb = rx->skb; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); DECLARE_MAC_BUF(mac); @@ -1138,16 +1141,8 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) memcpy(src, hdr->addr2, ETH_ALEN); if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_AP && - sdata->vif.type != IEEE80211_IF_TYPE_VLAN)) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: dropped ToDS frame " - "(BSSID=%s SA=%s DA=%s)\n", - dev->name, - print_mac(mac, hdr->addr1), - print_mac(mac2, hdr->addr2), - print_mac(mac3, hdr->addr3)); + sdata->vif.type != IEEE80211_IF_TYPE_VLAN)) return -1; - } break; case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): /* RA TA DA SA */ @@ -1155,17 +1150,8 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) memcpy(src, hdr->addr4, ETH_ALEN); if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_WDS && - sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: dropped FromDS&ToDS " - "frame (RA=%s TA=%s DA=%s SA=%s)\n", - rx->dev->name, - print_mac(mac, hdr->addr1), - print_mac(mac2, hdr->addr2), - print_mac(mac3, hdr->addr3), - print_mac(mac4, hdr->addr4)); + sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)) return -1; - } break; case IEEE80211_FCTL_FROMDS: /* DA BSSID SA */ @@ -1182,27 +1168,13 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) memcpy(dst, hdr->addr1, ETH_ALEN); memcpy(src, hdr->addr2, ETH_ALEN); - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: dropped IBSS frame " - "(DA=%s SA=%s BSSID=%s)\n", - dev->name, - print_mac(mac, hdr->addr1), - print_mac(mac2, hdr->addr2), - print_mac(mac3, hdr->addr3)); - } + if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS) return -1; - } break; } - if (unlikely(skb->len - hdrlen < 8)) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: RX too short data frame " - "payload\n", dev->name); - } + if (unlikely(skb->len - hdrlen < 8)) return -1; - } payload = skb->data + hdrlen; ethertype = (payload[6] << 8) | payload[7]; @@ -1234,7 +1206,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) */ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx) { - static const u8 pae_group_addr[ETH_ALEN] + static const u8 pae_group_addr[ETH_ALEN] __aligned(2) = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 }; struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; @@ -1305,11 +1277,11 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(skb->data)) { if (*mesh_ttl > 0) { xmit_skb = skb_copy(skb, GFP_ATOMIC); - if (!xmit_skb && net_ratelimit()) + if (xmit_skb) + xmit_skb->pkt_type = PACKET_OTHERHOST; + else if (net_ratelimit()) printk(KERN_DEBUG "%s: failed to clone " "multicast frame\n", dev->name); - else - xmit_skb->pkt_type = PACKET_OTHERHOST; } else IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.sta, dropped_frames_ttl); @@ -1345,7 +1317,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) } } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) { struct net_device *dev = rx->dev; @@ -1394,10 +1366,8 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) padding = ((4 - subframe_len) & 0x3); /* the last MSDU has no padding */ - if (subframe_len > remaining) { - printk(KERN_DEBUG "%s: wrong buffer size", dev->name); + if (subframe_len > remaining) return RX_DROP_UNUSABLE; - } skb_pull(skb, sizeof(struct ethhdr)); /* if last subframe reuse skb */ @@ -1418,8 +1388,6 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) eth = (struct ethhdr *) skb_pull(skb, ntohs(len) + padding); if (!eth) { - printk(KERN_DEBUG "%s: wrong buffer size ", - dev->name); dev_kfree_skb(frame); return RX_DROP_UNUSABLE; } @@ -1462,7 +1430,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) return RX_QUEUED; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_data(struct ieee80211_rx_data *rx) { struct net_device *dev = rx->dev; @@ -1493,21 +1461,21 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) return RX_QUEUED; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; struct ieee80211_hw *hw = &local->hw; struct sk_buff *skb = rx->skb; - struct ieee80211_bar *bar = (struct ieee80211_bar *) skb->data; + struct ieee80211_bar *bar = (struct ieee80211_bar *)skb->data; struct tid_ampdu_rx *tid_agg_rx; u16 start_seq_num; u16 tid; - if (likely((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL)) + if (likely(!ieee80211_is_ctl(bar->frame_control))) return RX_CONTINUE; - if ((rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BACK_REQ) { + if (ieee80211_is_back_req(bar->frame_control)) { if (!rx->sta) return RX_CONTINUE; tid = le16_to_cpu(bar->control) >> 12; @@ -1537,7 +1505,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) return RX_CONTINUE; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata; @@ -1561,41 +1529,27 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev, struct ieee80211_hdr *hdr, struct ieee80211_rx_data *rx) { - int keyidx, hdrlen; + int keyidx; + unsigned int hdrlen; DECLARE_MAC_BUF(mac); DECLARE_MAC_BUF(mac2); - hdrlen = ieee80211_get_hdrlen_from_skb(rx->skb); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (rx->skb->len >= hdrlen + 4) keyidx = rx->skb->data[hdrlen + 3] >> 6; else keyidx = -1; - if (net_ratelimit()) - printk(KERN_DEBUG "%s: TKIP hwaccel reported Michael MIC " - "failure from %s to %s keyidx=%d\n", - dev->name, print_mac(mac, hdr->addr2), - print_mac(mac2, hdr->addr1), keyidx); - if (!rx->sta) { /* * Some hardware seem to generate incorrect Michael MIC * reports; ignore them to avoid triggering countermeasures. */ - if (net_ratelimit()) - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for unknown address %s\n", - dev->name, print_mac(mac, hdr->addr2)); goto ignore; } - if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for a frame with no PROTECTED flag (src " - "%s)\n", dev->name, print_mac(mac, hdr->addr2)); + if (!ieee80211_has_protected(hdr->frame_control)) goto ignore; - } if (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP && keyidx) { /* @@ -1604,24 +1558,12 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev, * group keys and only the AP is sending real multicast * frames in the BSS. */ - if (net_ratelimit()) - printk(KERN_DEBUG "%s: ignored Michael MIC error for " - "a frame with non-zero keyidx (%d)" - " (src %s)\n", dev->name, keyidx, - print_mac(mac, hdr->addr2)); goto ignore; } - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for a frame that cannot be encrypted " - "(fc=0x%04x) (src %s)\n", - dev->name, rx->fc, print_mac(mac, hdr->addr2)); + if (!ieee80211_is_data(hdr->frame_control) && + !ieee80211_is_auth(hdr->frame_control)) goto ignore; - } mac80211_ev_michael_mic_failure(rx->dev, keyidx, hdr); ignore: @@ -1710,67 +1652,57 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx) dev_kfree_skb(skb); } -typedef ieee80211_rx_result (*ieee80211_rx_handler)(struct ieee80211_rx_data *); -static ieee80211_rx_handler ieee80211_rx_handlers[] = -{ - ieee80211_rx_h_if_stats, - ieee80211_rx_h_passive_scan, - ieee80211_rx_h_check, - ieee80211_rx_h_decrypt, - ieee80211_rx_h_sta_process, - ieee80211_rx_h_defragment, - ieee80211_rx_h_ps_poll, - ieee80211_rx_h_michael_mic_verify, - /* this must be after decryption - so header is counted in MPDU mic - * must be before pae and data, so QOS_DATA format frames - * are not passed to user space by these functions - */ - ieee80211_rx_h_remove_qos_control, - ieee80211_rx_h_amsdu, - ieee80211_rx_h_data, - ieee80211_rx_h_ctrl, - ieee80211_rx_h_mgmt, - NULL -}; static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_data *rx, struct sk_buff *skb) { - ieee80211_rx_handler *handler; ieee80211_rx_result res = RX_DROP_MONITOR; rx->skb = skb; rx->sdata = sdata; rx->dev = sdata->dev; - for (handler = ieee80211_rx_handlers; *handler != NULL; handler++) { - res = (*handler)(rx); - - switch (res) { - case RX_CONTINUE: - continue; - case RX_DROP_UNUSABLE: - case RX_DROP_MONITOR: - I802_DEBUG_INC(sdata->local->rx_handlers_drop); - if (rx->sta) - rx->sta->rx_dropped++; - break; - case RX_QUEUED: - I802_DEBUG_INC(sdata->local->rx_handlers_queued); - break; - } - break; - } - +#define CALL_RXH(rxh) \ + res = rxh(rx); \ + if (res != RX_CONTINUE) \ + goto rxh_done; + + CALL_RXH(ieee80211_rx_h_passive_scan) + CALL_RXH(ieee80211_rx_h_check) + CALL_RXH(ieee80211_rx_h_decrypt) + CALL_RXH(ieee80211_rx_h_sta_process) + CALL_RXH(ieee80211_rx_h_defragment) + CALL_RXH(ieee80211_rx_h_ps_poll) + CALL_RXH(ieee80211_rx_h_michael_mic_verify) + /* must be after MMIC verify so header is counted in MPDU mic */ + CALL_RXH(ieee80211_rx_h_remove_qos_control) + CALL_RXH(ieee80211_rx_h_amsdu) + CALL_RXH(ieee80211_rx_h_data) + CALL_RXH(ieee80211_rx_h_ctrl) + CALL_RXH(ieee80211_rx_h_mgmt) + +#undef CALL_RXH + + rxh_done: switch (res) { - case RX_CONTINUE: case RX_DROP_MONITOR: + I802_DEBUG_INC(sdata->local->rx_handlers_drop); + if (rx->sta) + rx->sta->rx_dropped++; + /* fall through */ + case RX_CONTINUE: ieee80211_rx_cooked_monitor(rx); break; case RX_DROP_UNUSABLE: + I802_DEBUG_INC(sdata->local->rx_handlers_drop); + if (rx->sta) + rx->sta->rx_dropped++; dev_kfree_skb(rx->skb); break; + case RX_QUEUED: + I802_DEBUG_INC(sdata->local->rx_handlers_queued); + break; } } @@ -1801,9 +1733,13 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, case IEEE80211_IF_TYPE_IBSS: if (!bssid) return 0; - if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT && - (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) + if (ieee80211_is_beacon(hdr->frame_control)) { + if (!rx->sta) + rx->sta = ieee80211_ibss_add_sta(sdata->dev, + rx->skb, bssid, hdr->addr2, + BIT(rx->status->rate_idx)); return 1; + } else if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { if (!(rx->flags & IEEE80211_RX_IN_SCAN)) return 0; @@ -1816,7 +1752,8 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, rx->flags &= ~IEEE80211_RX_RA_MATCH; } else if (!rx->sta) rx->sta = ieee80211_ibss_add_sta(sdata->dev, rx->skb, - bssid, hdr->addr2); + bssid, hdr->addr2, + BIT(rx->status->rate_idx)); break; case IEEE80211_IF_TYPE_MESH_POINT: if (!multicast && @@ -1840,15 +1777,9 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, return 0; rx->flags &= ~IEEE80211_RX_RA_MATCH; } - if (sdata->dev == sdata->local->mdev && - !(rx->flags & IEEE80211_RX_IN_SCAN)) - /* do not receive anything via - * master device when not scanning */ - return 0; break; case IEEE80211_IF_TYPE_WDS: - if (bssid || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + if (bssid || !ieee80211_is_data(hdr->frame_control)) return 0; if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2)) return 0; @@ -1872,7 +1803,6 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_rx_status *status, - u32 load, struct ieee80211_rate *rate) { struct ieee80211_local *local = hw_to_local(hw); @@ -1891,7 +1821,6 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, rx.local = local; rx.status = status; - rx.load = load; rx.rate = rate; rx.fc = le16_to_cpu(hdr->frame_control); type = rx.fc & IEEE80211_FCTL_FTYPE; @@ -1952,7 +1881,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (!skb_new) { if (net_ratelimit()) printk(KERN_DEBUG "%s: failed to copy " - "multicast frame for %s", + "multicast frame for %s\n", wiphy_name(local->hw.wiphy), prev->dev->name); continue; @@ -2000,7 +1929,6 @@ u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, struct ieee80211_rx_status status; u16 head_seq_num, buf_size; int index; - u32 pkt_load; struct ieee80211_supported_band *sband; struct ieee80211_rate *rate; @@ -2035,12 +1963,9 @@ u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, sizeof(status)); sband = local->hw.wiphy->bands[status.band]; rate = &sband->bitrates[status.rate_idx]; - pkt_load = ieee80211_rx_load_stats(local, - tid_agg_rx->reorder_buf[index], - &status, rate); __ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index], - &status, pkt_load, rate); + &status, rate); tid_agg_rx->stored_mpdu_num--; tid_agg_rx->reorder_buf[index] = NULL; } @@ -2082,11 +2007,8 @@ u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, sizeof(status)); sband = local->hw.wiphy->bands[status.band]; rate = &sband->bitrates[status.rate_idx]; - pkt_load = ieee80211_rx_load_stats(local, - tid_agg_rx->reorder_buf[index], - &status, rate); __ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index], - &status, pkt_load, rate); + &status, rate); tid_agg_rx->stored_mpdu_num--; tid_agg_rx->reorder_buf[index] = NULL; tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); @@ -2103,32 +2025,29 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct sta_info *sta; struct tid_ampdu_rx *tid_agg_rx; - u16 fc, sc; + u16 sc; u16 mpdu_seq_num; - u8 ret = 0, *qc; + u8 ret = 0; int tid; sta = sta_info_get(local, hdr->addr2); if (!sta) return ret; - fc = le16_to_cpu(hdr->frame_control); - /* filter the QoS data rx stream according to * STA/TID and check if this STA/TID is on aggregation */ - if (!WLAN_FC_IS_QOS_DATA(fc)) + if (!ieee80211_is_data_qos(hdr->frame_control)) goto end_reorder; - qc = skb->data + ieee80211_get_hdrlen(fc) - QOS_CONTROL_LEN; - tid = qc[0] & QOS_CONTROL_TID_MASK; + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) goto end_reorder; tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; - /* null data frames are excluded */ - if (unlikely(fc & IEEE80211_STYPE_NULLFUNC)) + /* qos null data frames are excluded */ + if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC))) goto end_reorder; /* new un-ordered ampdu frame - process it */ @@ -2165,7 +2084,6 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_rx_status *status) { struct ieee80211_local *local = hw_to_local(hw); - u32 pkt_load; struct ieee80211_rate *rate = NULL; struct ieee80211_supported_band *sband; @@ -2205,11 +2123,8 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, return; } - pkt_load = ieee80211_rx_load_stats(local, skb, status, rate); - local->channel_use_raw += pkt_load; - if (!ieee80211_rx_reorder_ampdu(local, skb)) - __ieee80211_rx_handle_packet(hw, skb, status, pkt_load, rate); + __ieee80211_rx_handle_packet(hw, skb, status, rate); rcu_read_unlock(); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 7d4fe4a52929..f2ba653b9d69 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -135,6 +135,7 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_local *local, int idx, /** * __sta_info_free - internal STA free helper * + * @local: pointer to the global information * @sta: STA info to free * * This function must undo everything done by sta_info_alloc() @@ -202,14 +203,12 @@ void sta_info_destroy(struct sta_info *sta) dev_kfree_skb_any(skb); for (i = 0; i < STA_TID_NUM; i++) { - spin_lock_bh(&sta->ampdu_mlme.ampdu_rx); + spin_lock_bh(&sta->lock); if (sta->ampdu_mlme.tid_rx[i]) del_timer_sync(&sta->ampdu_mlme.tid_rx[i]->session_timer); - spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx); - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); if (sta->ampdu_mlme.tid_tx[i]) del_timer_sync(&sta->ampdu_mlme.tid_tx[i]->addba_resp_timer); - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_unlock_bh(&sta->lock); } __sta_info_free(local, sta); @@ -236,6 +235,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (!sta) return NULL; + spin_lock_init(&sta->lock); + spin_lock_init(&sta->flaglock); + memcpy(sta->addr, addr, ETH_ALEN); sta->local = local; sta->sdata = sdata; @@ -249,15 +251,13 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, return NULL; } - spin_lock_init(&sta->ampdu_mlme.ampdu_rx); - spin_lock_init(&sta->ampdu_mlme.ampdu_tx); for (i = 0; i < STA_TID_NUM; i++) { /* timer_to_tid must be initialized with identity mapping to * enable session_timer's data differentiation. refer to * sta_rx_agg_session_timer_expired for useage */ sta->timer_to_tid[i] = i; /* tid to tx queue: initialize according to HW (0 is valid) */ - sta->tid_to_tx_q[i] = local->hw.queues; + sta->tid_to_tx_q[i] = ieee80211_num_queues(&local->hw); /* rx */ sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE; sta->ampdu_mlme.tid_rx[i] = NULL; @@ -276,7 +276,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, #ifdef CONFIG_MAC80211_MESH sta->plink_state = PLINK_LISTEN; - spin_lock_init(&sta->plink_lock); init_timer(&sta->plink_timer); #endif @@ -321,7 +320,9 @@ int sta_info_insert(struct sta_info *sta) /* notify driver */ if (local->ops->sta_notify) { if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) - sdata = sdata->u.vlan.ap; + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); local->ops->sta_notify(local_to_hw(local), &sdata->vif, STA_NOTIFY_ADD, sta->addr); @@ -376,8 +377,10 @@ static inline void __bss_tim_clear(struct ieee80211_if_ap *bss, u16 aid) static void __sta_info_set_tim_bit(struct ieee80211_if_ap *bss, struct sta_info *sta) { - if (bss) - __bss_tim_set(bss, sta->aid); + BUG_ON(!bss); + + __bss_tim_set(bss, sta->aid); + if (sta->local->ops->set_tim) { sta->local->tim_in_locked_section = true; sta->local->ops->set_tim(local_to_hw(sta->local), sta->aid, 1); @@ -389,6 +392,8 @@ void sta_info_set_tim_bit(struct sta_info *sta) { unsigned long flags; + BUG_ON(!sta->sdata->bss); + spin_lock_irqsave(&sta->local->sta_lock, flags); __sta_info_set_tim_bit(sta->sdata->bss, sta); spin_unlock_irqrestore(&sta->local->sta_lock, flags); @@ -397,8 +402,10 @@ void sta_info_set_tim_bit(struct sta_info *sta) static void __sta_info_clear_tim_bit(struct ieee80211_if_ap *bss, struct sta_info *sta) { - if (bss) - __bss_tim_clear(bss, sta->aid); + BUG_ON(!bss); + + __bss_tim_clear(bss, sta->aid); + if (sta->local->ops->set_tim) { sta->local->tim_in_locked_section = true; sta->local->ops->set_tim(local_to_hw(sta->local), sta->aid, 0); @@ -410,6 +417,8 @@ void sta_info_clear_tim_bit(struct sta_info *sta) { unsigned long flags; + BUG_ON(!sta->sdata->bss); + spin_lock_irqsave(&sta->local->sta_lock, flags); __sta_info_clear_tim_bit(sta->sdata->bss, sta); spin_unlock_irqrestore(&sta->local->sta_lock, flags); @@ -437,10 +446,10 @@ void __sta_info_unlink(struct sta_info **sta) list_del(&(*sta)->list); - if ((*sta)->flags & WLAN_STA_PS) { - (*sta)->flags &= ~WLAN_STA_PS; - if (sdata->bss) - atomic_dec(&sdata->bss->num_sta_ps); + if (test_and_clear_sta_flags(*sta, WLAN_STA_PS)) { + BUG_ON(!sdata->bss); + + atomic_dec(&sdata->bss->num_sta_ps); __sta_info_clear_tim_bit(sdata->bss, *sta); } @@ -448,7 +457,9 @@ void __sta_info_unlink(struct sta_info **sta) if (local->ops->sta_notify) { if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) - sdata = sdata->u.vlan.ap; + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); local->ops->sta_notify(local_to_hw(local), &sdata->vif, STA_NOTIFY_REMOVE, (*sta)->addr); @@ -515,20 +526,20 @@ static inline int sta_info_buffer_expired(struct ieee80211_local *local, struct sta_info *sta, struct sk_buff *skb) { - struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_tx_info *info; int timeout; if (!skb) return 0; - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + info = IEEE80211_SKB_CB(skb); /* Timeout: (2 * listen_interval * beacon_int * 1024 / 1000000) sec */ timeout = (sta->listen_interval * local->hw.conf.beacon_int * 32 / 15625) * HZ; if (timeout < STA_TX_BUFFER_EXPIRE) timeout = STA_TX_BUFFER_EXPIRE; - return time_after(jiffies, pkt_data->jiffies + timeout); + return time_after(jiffies, info->control.jiffies + timeout); } @@ -557,8 +568,10 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, sdata = sta->sdata; local->total_ps_buffered--; +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "Buffered frame expired (STA " "%s)\n", print_mac(mac, sta->addr)); +#endif dev_kfree_skb(skb); if (skb_queue_empty(&sta->ps_tx_buf)) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index f8c95bc9659c..109db787ccb7 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -32,7 +32,7 @@ * @WLAN_STA_WDS: Station is one of our WDS peers. * @WLAN_STA_PSPOLL: Station has just PS-polled us. * @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the - * IEEE80211_TXCTL_CLEAR_PS_FILT control flag) when the next + * IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next * frame to this station is transmitted. */ enum ieee80211_sta_info_flags { @@ -129,23 +129,19 @@ enum plink_state { * * @tid_state_rx: TID's state in Rx session state machine. * @tid_rx: aggregation info for Rx per TID - * @ampdu_rx: for locking sections in aggregation Rx flow * @tid_state_tx: TID's state in Tx session state machine. * @tid_tx: aggregation info for Tx per TID * @addba_req_num: number of times addBA request has been sent. - * @ampdu_tx: for locking sectionsi in aggregation Tx flow * @dialog_token_allocator: dialog token enumerator for each new session; */ struct sta_ampdu_mlme { /* rx */ u8 tid_state_rx[STA_TID_NUM]; struct tid_ampdu_rx *tid_rx[STA_TID_NUM]; - spinlock_t ampdu_rx; /* tx */ u8 tid_state_tx[STA_TID_NUM]; struct tid_ampdu_tx *tid_tx[STA_TID_NUM]; u8 addba_req_num[STA_TID_NUM]; - spinlock_t ampdu_tx; u8 dialog_token_allocator; }; @@ -164,9 +160,20 @@ struct sta_ampdu_mlme { * @list: global linked list entry * @hnext: hash table linked list pointer * @local: pointer to the global information + * @sdata: TBD + * @key: TBD + * @rate_ctrl: TBD + * @rate_ctrl_priv: TBD + * @lock: used for locking all fields that require locking, see comments + * in the header file. + * @flaglock: spinlock for flags accesses + * @ht_info: HT capabilities of this STA + * @supp_rates: Bitmap of supported rates (per band) * @addr: MAC address of this STA * @aid: STA's unique AID (1..2007, 0 = not assigned yet), * only used in AP (and IBSS?) mode + * @listen_interval: TBD + * @pin_status: TBD * @flags: STA flags, see &enum ieee80211_sta_info_flags * @ps_tx_buf: buffer of frames to transmit to this station * when it leaves power saving state @@ -175,8 +182,41 @@ struct sta_ampdu_mlme { * power saving state * @rx_packets: Number of MSDUs received from this STA * @rx_bytes: Number of bytes received from this STA - * @supp_rates: Bitmap of supported rates (per band) - * @ht_info: HT capabilities of this STA + * @wep_weak_iv_count: TBD + * @last_rx: TBD + * @num_duplicates: number of duplicate frames received from this STA + * @rx_fragments: number of received MPDUs + * @rx_dropped: number of dropped MPDUs from this STA + * @last_signal: signal of last received frame from this STA + * @last_qual: qual of last received frame from this STA + * @last_noise: noise of last received frame from this STA + * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) + * @wme_rx_queue: TBD + * @tx_filtered_count: TBD + * @tx_retry_failed: TBD + * @tx_retry_count: TBD + * @tx_num_consecutive_failures: TBD + * @tx_num_mpdu_ok: TBD + * @tx_num_mpdu_fail: TBD + * @fail_avg: moving percentage of failed MSDUs + * @tx_packets: number of RX/TX MSDUs + * @tx_bytes: TBD + * @tx_fragments: number of transmitted MPDUs + * @txrate_idx: TBD + * @last_txrate_idx: TBD + * @wme_tx_queue: TBD + * @ampdu_mlme: TBD + * @timer_to_tid: identity mapping to ID timers + * @tid_to_tx_q: map tid to tx queue + * @llid: Local link ID + * @plid: Peer link ID + * @reason: Cancel reason on PLINK_HOLDING state + * @plink_retries: Retries in establishment + * @ignore_plink_timer: TBD + * @plink_state plink_state: TBD + * @plink_timeout: TBD + * @plink_timer: TBD + * @debugfs: debug filesystem info */ struct sta_info { /* General information, mostly static */ @@ -187,6 +227,8 @@ struct sta_info { struct ieee80211_key *key; struct rate_control_ref *rate_ctrl; void *rate_ctrl_priv; + spinlock_t lock; + spinlock_t flaglock; struct ieee80211_ht_info ht_info; u64 supp_rates[IEEE80211_NUM_BANDS]; u8 addr[ETH_ALEN]; @@ -199,7 +241,10 @@ struct sta_info { */ u8 pin_status; - /* frequently updated information, needs locking? */ + /* + * frequently updated, locked with own spinlock (flaglock), + * use the accessors defined below + */ u32 flags; /* @@ -213,14 +258,12 @@ struct sta_info { unsigned long rx_packets, rx_bytes; unsigned long wep_weak_iv_count; unsigned long last_rx; - unsigned long num_duplicates; /* number of duplicate frames received - * from this STA */ - unsigned long rx_fragments; /* number of received MPDUs */ - unsigned long rx_dropped; /* number of dropped MPDUs from this STA */ - int last_rssi; /* RSSI of last received frame from this STA */ - int last_signal; /* signal of last received frame from this STA */ - int last_noise; /* noise of last received frame from this STA */ - /* last received seq/frag number from this STA (per RX queue) */ + unsigned long num_duplicates; + unsigned long rx_fragments; + unsigned long rx_dropped; + int last_signal; + int last_qual; + int last_noise; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; #ifdef CONFIG_MAC80211_DEBUG_COUNTERS unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES]; @@ -237,42 +280,36 @@ struct sta_info { unsigned int fail_avg; /* Updated from TX path only, no locking requirements */ - unsigned long tx_packets; /* number of RX/TX MSDUs */ + unsigned long tx_packets; unsigned long tx_bytes; - unsigned long tx_fragments; /* number of transmitted MPDUs */ + unsigned long tx_fragments; int txrate_idx; int last_txrate_idx; + u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; #ifdef CONFIG_MAC80211_DEBUG_COUNTERS unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES]; #endif - /* Debug counters, no locking doesn't matter */ - int channel_use; - int channel_use_raw; - /* - * Aggregation information, comes with own locking. + * Aggregation information, locked with lock. */ struct sta_ampdu_mlme ampdu_mlme; - u8 timer_to_tid[STA_TID_NUM]; /* identity mapping to ID timers */ - u8 tid_to_tx_q[STA_TID_NUM]; /* map tid to tx queue */ + u8 timer_to_tid[STA_TID_NUM]; + u8 tid_to_tx_q[STA_TID_NUM]; #ifdef CONFIG_MAC80211_MESH /* * Mesh peer link attributes * TODO: move to a sub-structure that is referenced with pointer? */ - __le16 llid; /* Local link ID */ - __le16 plid; /* Peer link ID */ - __le16 reason; /* Cancel reason on PLINK_HOLDING state */ - u8 plink_retries; /* Retries in establishment */ + __le16 llid; + __le16 plid; + __le16 reason; + u8 plink_retries; bool ignore_plink_timer; enum plink_state plink_state; u32 plink_timeout; struct timer_list plink_timer; - spinlock_t plink_lock; /* For peer_state reads / updates and other - updates in the structure. Ensures robust - transitions for the peerlink FSM */ #endif #ifdef CONFIG_MAC80211_DEBUGFS @@ -299,6 +336,73 @@ static inline enum plink_state sta_plink_state(struct sta_info *sta) return PLINK_LISTEN; } +static inline void set_sta_flags(struct sta_info *sta, const u32 flags) +{ + unsigned long irqfl; + + spin_lock_irqsave(&sta->flaglock, irqfl); + sta->flags |= flags; + spin_unlock_irqrestore(&sta->flaglock, irqfl); +} + +static inline void clear_sta_flags(struct sta_info *sta, const u32 flags) +{ + unsigned long irqfl; + + spin_lock_irqsave(&sta->flaglock, irqfl); + sta->flags &= ~flags; + spin_unlock_irqrestore(&sta->flaglock, irqfl); +} + +static inline void set_and_clear_sta_flags(struct sta_info *sta, + const u32 set, const u32 clear) +{ + unsigned long irqfl; + + spin_lock_irqsave(&sta->flaglock, irqfl); + sta->flags |= set; + sta->flags &= ~clear; + spin_unlock_irqrestore(&sta->flaglock, irqfl); +} + +static inline u32 test_sta_flags(struct sta_info *sta, const u32 flags) +{ + u32 ret; + unsigned long irqfl; + + spin_lock_irqsave(&sta->flaglock, irqfl); + ret = sta->flags & flags; + spin_unlock_irqrestore(&sta->flaglock, irqfl); + + return ret; +} + +static inline u32 test_and_clear_sta_flags(struct sta_info *sta, + const u32 flags) +{ + u32 ret; + unsigned long irqfl; + + spin_lock_irqsave(&sta->flaglock, irqfl); + ret = sta->flags & flags; + sta->flags &= ~flags; + spin_unlock_irqrestore(&sta->flaglock, irqfl); + + return ret; +} + +static inline u32 get_sta_flags(struct sta_info *sta) +{ + u32 ret; + unsigned long irqfl; + + spin_lock_irqsave(&sta->flaglock, irqfl); + ret = sta->flags; + spin_unlock_irqrestore(&sta->flaglock, irqfl); + + return ret; +} + /* Maximum number of concurrently registered stations */ #define MAX_STA_COUNT 2007 diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 09093da24af6..995f7af3d25e 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -6,25 +6,23 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - #include <linux/kernel.h> +#include <linux/bitops.h> #include <linux/types.h> #include <linux/netdevice.h> +#include <asm/unaligned.h> #include <net/mac80211.h> #include "key.h" #include "tkip.h" #include "wep.h" - -/* TKIP key mixing functions */ - - #define PHASE1_LOOP_COUNT 8 - -/* 2-byte by 2-byte subset of the full AES S-box table; second part of this - * table is identical to first part but byte-swapped */ +/* + * 2-byte by 2-byte subset of the full AES S-box table; second part of this + * table is identical to first part but byte-swapped + */ static const u16 tkip_sbox[256] = { 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154, @@ -61,84 +59,54 @@ static const u16 tkip_sbox[256] = 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A, }; - -static inline u16 Mk16(u8 x, u8 y) +static u16 tkipS(u16 val) { - return ((u16) x << 8) | (u16) y; + return tkip_sbox[val & 0xff] ^ swab16(tkip_sbox[val >> 8]); } - -static inline u8 Hi8(u16 v) -{ - return v >> 8; -} - - -static inline u8 Lo8(u16 v) -{ - return v & 0xff; -} - - -static inline u16 Hi16(u32 v) -{ - return v >> 16; -} - - -static inline u16 Lo16(u32 v) -{ - return v & 0xffff; -} - - -static inline u16 RotR1(u16 v) -{ - return (v >> 1) | ((v & 0x0001) << 15); -} - - -static inline u16 tkip_S(u16 val) +static u8 *write_tkip_iv(u8 *pos, u16 iv16) { - u16 a = tkip_sbox[Hi8(val)]; - - return tkip_sbox[Lo8(val)] ^ Hi8(a) ^ (Lo8(a) << 8); + *pos++ = iv16 >> 8; + *pos++ = ((iv16 >> 8) | 0x20) & 0x7f; + *pos++ = iv16 & 0xFF; + return pos; } - - -/* P1K := Phase1(TA, TK, TSC) +/* + * P1K := Phase1(TA, TK, TSC) * TA = transmitter address (48 bits) * TK = dot11DefaultKeyValue or dot11KeyMappingValue (128 bits) * TSC = TKIP sequence counter (48 bits, only 32 msb bits used) * P1K: 80 bits */ -static void tkip_mixing_phase1(const u8 *ta, const u8 *tk, u32 tsc_IV32, - u16 *p1k) +static void tkip_mixing_phase1(const u8 *tk, struct tkip_ctx *ctx, + const u8 *ta, u32 tsc_IV32) { int i, j; + u16 *p1k = ctx->p1k; - p1k[0] = Lo16(tsc_IV32); - p1k[1] = Hi16(tsc_IV32); - p1k[2] = Mk16(ta[1], ta[0]); - p1k[3] = Mk16(ta[3], ta[2]); - p1k[4] = Mk16(ta[5], ta[4]); + p1k[0] = tsc_IV32 & 0xFFFF; + p1k[1] = tsc_IV32 >> 16; + p1k[2] = get_unaligned_le16(ta + 0); + p1k[3] = get_unaligned_le16(ta + 2); + p1k[4] = get_unaligned_le16(ta + 4); for (i = 0; i < PHASE1_LOOP_COUNT; i++) { j = 2 * (i & 1); - p1k[0] += tkip_S(p1k[4] ^ Mk16(tk[ 1 + j], tk[ 0 + j])); - p1k[1] += tkip_S(p1k[0] ^ Mk16(tk[ 5 + j], tk[ 4 + j])); - p1k[2] += tkip_S(p1k[1] ^ Mk16(tk[ 9 + j], tk[ 8 + j])); - p1k[3] += tkip_S(p1k[2] ^ Mk16(tk[13 + j], tk[12 + j])); - p1k[4] += tkip_S(p1k[3] ^ Mk16(tk[ 1 + j], tk[ 0 + j])) + i; + p1k[0] += tkipS(p1k[4] ^ get_unaligned_le16(tk + 0 + j)); + p1k[1] += tkipS(p1k[0] ^ get_unaligned_le16(tk + 4 + j)); + p1k[2] += tkipS(p1k[1] ^ get_unaligned_le16(tk + 8 + j)); + p1k[3] += tkipS(p1k[2] ^ get_unaligned_le16(tk + 12 + j)); + p1k[4] += tkipS(p1k[3] ^ get_unaligned_le16(tk + 0 + j)) + i; } + ctx->initialized = 1; } - -static void tkip_mixing_phase2(const u16 *p1k, const u8 *tk, u16 tsc_IV16, - u8 *rc4key) +static void tkip_mixing_phase2(const u8 *tk, struct tkip_ctx *ctx, + u16 tsc_IV16, u8 *rc4key) { u16 ppk[6]; + const u16 *p1k = ctx->p1k; int i; ppk[0] = p1k[0]; @@ -148,70 +116,35 @@ static void tkip_mixing_phase2(const u16 *p1k, const u8 *tk, u16 tsc_IV16, ppk[4] = p1k[4]; ppk[5] = p1k[4] + tsc_IV16; - ppk[0] += tkip_S(ppk[5] ^ Mk16(tk[ 1], tk[ 0])); - ppk[1] += tkip_S(ppk[0] ^ Mk16(tk[ 3], tk[ 2])); - ppk[2] += tkip_S(ppk[1] ^ Mk16(tk[ 5], tk[ 4])); - ppk[3] += tkip_S(ppk[2] ^ Mk16(tk[ 7], tk[ 6])); - ppk[4] += tkip_S(ppk[3] ^ Mk16(tk[ 9], tk[ 8])); - ppk[5] += tkip_S(ppk[4] ^ Mk16(tk[11], tk[10])); - ppk[0] += RotR1(ppk[5] ^ Mk16(tk[13], tk[12])); - ppk[1] += RotR1(ppk[0] ^ Mk16(tk[15], tk[14])); - ppk[2] += RotR1(ppk[1]); - ppk[3] += RotR1(ppk[2]); - ppk[4] += RotR1(ppk[3]); - ppk[5] += RotR1(ppk[4]); - - rc4key[0] = Hi8(tsc_IV16); - rc4key[1] = (Hi8(tsc_IV16) | 0x20) & 0x7f; - rc4key[2] = Lo8(tsc_IV16); - rc4key[3] = Lo8((ppk[5] ^ Mk16(tk[1], tk[0])) >> 1); - - for (i = 0; i < 6; i++) { - rc4key[4 + 2 * i] = Lo8(ppk[i]); - rc4key[5 + 2 * i] = Hi8(ppk[i]); - } + ppk[0] += tkipS(ppk[5] ^ get_unaligned_le16(tk + 0)); + ppk[1] += tkipS(ppk[0] ^ get_unaligned_le16(tk + 2)); + ppk[2] += tkipS(ppk[1] ^ get_unaligned_le16(tk + 4)); + ppk[3] += tkipS(ppk[2] ^ get_unaligned_le16(tk + 6)); + ppk[4] += tkipS(ppk[3] ^ get_unaligned_le16(tk + 8)); + ppk[5] += tkipS(ppk[4] ^ get_unaligned_le16(tk + 10)); + ppk[0] += ror16(ppk[5] ^ get_unaligned_le16(tk + 12), 1); + ppk[1] += ror16(ppk[0] ^ get_unaligned_le16(tk + 14), 1); + ppk[2] += ror16(ppk[1], 1); + ppk[3] += ror16(ppk[2], 1); + ppk[4] += ror16(ppk[3], 1); + ppk[5] += ror16(ppk[4], 1); + + rc4key = write_tkip_iv(rc4key, tsc_IV16); + *rc4key++ = ((ppk[5] ^ get_unaligned_le16(tk)) >> 1) & 0xFF; + + for (i = 0; i < 6; i++) + put_unaligned_le16(ppk[i], rc4key + 2 * i); } - /* Add TKIP IV and Ext. IV at @pos. @iv0, @iv1, and @iv2 are the first octets * of the IV. Returns pointer to the octet following IVs (i.e., beginning of * the packet payload). */ -u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, - u8 iv0, u8 iv1, u8 iv2) +u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16) { - *pos++ = iv0; - *pos++ = iv1; - *pos++ = iv2; + pos = write_tkip_iv(pos, iv16); *pos++ = (key->conf.keyidx << 6) | (1 << 5) /* Ext IV */; - *pos++ = key->u.tkip.iv32 & 0xff; - *pos++ = (key->u.tkip.iv32 >> 8) & 0xff; - *pos++ = (key->u.tkip.iv32 >> 16) & 0xff; - *pos++ = (key->u.tkip.iv32 >> 24) & 0xff; - return pos; -} - - -void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta, - u16 *phase1key) -{ - tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], - key->u.tkip.iv32, phase1key); -} - -void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta, - u8 *rc4key) -{ - /* Calculate per-packet key */ - if (key->u.tkip.iv16 == 0 || !key->u.tkip.tx_initialized) { - /* IV16 wrapped around - perform TKIP phase 1 */ - tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], - key->u.tkip.iv32, key->u.tkip.p1k); - key->u.tkip.tx_initialized = 1; - } - - tkip_mixing_phase2(key->u.tkip.p1k, - &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], - key->u.tkip.iv16, rc4key); + put_unaligned_le32(key->u.tkip.tx.iv32, pos); + return pos + 4; } void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf, @@ -220,48 +153,44 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf, { struct ieee80211_key *key = (struct ieee80211_key *) container_of(keyconf, struct ieee80211_key, conf); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u8 *data = (u8 *) hdr; - u16 fc = le16_to_cpu(hdr->frame_control); - int hdr_len = ieee80211_get_hdrlen(fc); - u8 *ta = hdr->addr2; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + u8 *data; + const u8 *tk; + struct tkip_ctx *ctx; u16 iv16; u32 iv32; - iv16 = data[hdr_len] << 8; - iv16 += data[hdr_len + 2]; - iv32 = data[hdr_len + 4] | (data[hdr_len + 5] << 8) | - (data[hdr_len + 6] << 16) | (data[hdr_len + 7] << 24); + data = (u8 *)hdr + ieee80211_hdrlen(hdr->frame_control); + iv16 = data[2] | (data[0] << 8); + iv32 = get_unaligned_le32(&data[4]); + + tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; + ctx = &key->u.tkip.tx; -#ifdef CONFIG_TKIP_DEBUG +#ifdef CONFIG_MAC80211_TKIP_DEBUG printk(KERN_DEBUG "TKIP encrypt: iv16 = 0x%04x, iv32 = 0x%08x\n", iv16, iv32); - if (iv32 != key->u.tkip.iv32) { + if (iv32 != ctx->iv32) { printk(KERN_DEBUG "skb: iv32 = 0x%08x key: iv32 = 0x%08x\n", - iv32, key->u.tkip.iv32); + iv32, ctx->iv32); printk(KERN_DEBUG "Wrap around of iv16 in the middle of a " "fragmented packet\n"); } -#endif /* CONFIG_TKIP_DEBUG */ +#endif /* Update the p1k only when the iv16 in the packet wraps around, this * might occur after the wrap around of iv16 in the key in case of * fragmented packets. */ - if (iv16 == 0 || !key->u.tkip.tx_initialized) { - /* IV16 wrapped around - perform TKIP phase 1 */ - tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], - iv32, key->u.tkip.p1k); - key->u.tkip.tx_initialized = 1; - } + if (iv16 == 0 || !ctx->initialized) + tkip_mixing_phase1(tk, ctx, hdr->addr2, iv32); if (type == IEEE80211_TKIP_P1_KEY) { - memcpy(outkey, key->u.tkip.p1k, sizeof(u16) * 5); + memcpy(outkey, ctx->p1k, sizeof(u16) * 5); return; } - tkip_mixing_phase2(key->u.tkip.p1k, - &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], iv16, outkey); + tkip_mixing_phase2(tk, ctx, iv16, outkey); } EXPORT_SYMBOL(ieee80211_get_tkip_key); @@ -275,13 +204,19 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, u8 *pos, size_t payload_len, u8 *ta) { u8 rc4key[16]; + struct tkip_ctx *ctx = &key->u.tkip.tx; + const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; + + /* Calculate per-packet key */ + if (ctx->iv16 == 0 || !ctx->initialized) + tkip_mixing_phase1(tk, ctx, ta, ctx->iv32); + + tkip_mixing_phase2(tk, ctx, ctx->iv16, rc4key); - ieee80211_tkip_gen_rc4key(key, ta, rc4key); - pos = ieee80211_tkip_add_iv(pos, key, rc4key[0], rc4key[1], rc4key[2]); + pos = ieee80211_tkip_add_iv(pos, key, key->u.tkip.tx.iv16); ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len); } - /* Decrypt packet payload with TKIP using @key. @pos is a pointer to the * beginning of the buffer containing IEEE 802.11 header payload, i.e., * including IV, Ext. IV, real data, Michael MIC, ICV. @payload_len is the @@ -296,15 +231,16 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, u32 iv16; u8 rc4key[16], keyid, *pos = payload; int res; + const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; if (payload_len < 12) return -1; iv16 = (pos[0] << 8) | pos[2]; keyid = pos[3]; - iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24); + iv32 = get_unaligned_le32(pos + 4); pos += 8; -#ifdef CONFIG_TKIP_DEBUG +#ifdef CONFIG_MAC80211_TKIP_DEBUG { int i; printk(KERN_DEBUG "TKIP decrypt: data(len=%zd)", payload_len); @@ -314,7 +250,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, printk(KERN_DEBUG "TKIP decrypt: iv16=%04x iv32=%08x\n", iv16, iv32); } -#endif /* CONFIG_TKIP_DEBUG */ +#endif if (!(keyid & (1 << 5))) return TKIP_DECRYPT_NO_EXT_IV; @@ -322,50 +258,48 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, if ((keyid >> 6) != key->conf.keyidx) return TKIP_DECRYPT_INVALID_KEYIDX; - if (key->u.tkip.rx_initialized[queue] && - (iv32 < key->u.tkip.iv32_rx[queue] || - (iv32 == key->u.tkip.iv32_rx[queue] && - iv16 <= key->u.tkip.iv16_rx[queue]))) { -#ifdef CONFIG_TKIP_DEBUG + if (key->u.tkip.rx[queue].initialized && + (iv32 < key->u.tkip.rx[queue].iv32 || + (iv32 == key->u.tkip.rx[queue].iv32 && + iv16 <= key->u.tkip.rx[queue].iv16))) { +#ifdef CONFIG_MAC80211_TKIP_DEBUG DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "TKIP replay detected for RX frame from " "%s (RX IV (%04x,%02x) <= prev. IV (%04x,%02x)\n", print_mac(mac, ta), - iv32, iv16, key->u.tkip.iv32_rx[queue], - key->u.tkip.iv16_rx[queue]); -#endif /* CONFIG_TKIP_DEBUG */ + iv32, iv16, key->u.tkip.rx[queue].iv32, + key->u.tkip.rx[queue].iv16); +#endif return TKIP_DECRYPT_REPLAY; } if (only_iv) { res = TKIP_DECRYPT_OK; - key->u.tkip.rx_initialized[queue] = 1; + key->u.tkip.rx[queue].initialized = 1; goto done; } - if (!key->u.tkip.rx_initialized[queue] || - key->u.tkip.iv32_rx[queue] != iv32) { - key->u.tkip.rx_initialized[queue] = 1; + if (!key->u.tkip.rx[queue].initialized || + key->u.tkip.rx[queue].iv32 != iv32) { /* IV16 wrapped around - perform TKIP phase 1 */ - tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], - iv32, key->u.tkip.p1k_rx[queue]); -#ifdef CONFIG_TKIP_DEBUG + tkip_mixing_phase1(tk, &key->u.tkip.rx[queue], ta, iv32); +#ifdef CONFIG_MAC80211_TKIP_DEBUG { int i; + u8 key_offset = NL80211_TKIP_DATA_OFFSET_ENCR_KEY; DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "TKIP decrypt: Phase1 TA=%s" " TK=", print_mac(mac, ta)); for (i = 0; i < 16; i++) printk("%02x ", - key->conf.key[ - ALG_TKIP_TEMP_ENCR_KEY + i]); + key->conf.key[key_offset + i]); printk("\n"); printk(KERN_DEBUG "TKIP decrypt: P1K="); for (i = 0; i < 5; i++) - printk("%04x ", key->u.tkip.p1k_rx[queue][i]); + printk("%04x ", key->u.tkip.rx[queue].p1k[i]); printk("\n"); } -#endif /* CONFIG_TKIP_DEBUG */ +#endif if (key->local->ops->update_tkip_key && key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { u8 bcast[ETH_ALEN] = @@ -377,14 +311,12 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, key->local->ops->update_tkip_key( local_to_hw(key->local), &key->conf, - sta_addr, iv32, key->u.tkip.p1k_rx[queue]); + sta_addr, iv32, key->u.tkip.rx[queue].p1k); } } - tkip_mixing_phase2(key->u.tkip.p1k_rx[queue], - &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], - iv16, rc4key); -#ifdef CONFIG_TKIP_DEBUG + tkip_mixing_phase2(tk, &key->u.tkip.rx[queue], iv16, rc4key); +#ifdef CONFIG_MAC80211_TKIP_DEBUG { int i; printk(KERN_DEBUG "TKIP decrypt: Phase2 rc4key="); @@ -392,7 +324,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, printk("%02x ", rc4key[i]); printk("\n"); } -#endif /* CONFIG_TKIP_DEBUG */ +#endif res = ieee80211_wep_decrypt_data(tfm, rc4key, 16, pos, payload_len - 12); done: @@ -409,5 +341,3 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, return res; } - - diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h index b7c2ee763d9d..d4714383f5fc 100644 --- a/net/mac80211/tkip.h +++ b/net/mac80211/tkip.h @@ -13,12 +13,8 @@ #include <linux/crypto.h> #include "key.h" -u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, - u8 iv0, u8 iv1, u8 iv2); -void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta, - u16 *phase1key); -void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta, - u8 *rc4key); +u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16); + void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, struct ieee80211_key *key, u8 *pos, size_t payload_len, u8 *ta); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f35eaea98e73..0fbadd8b983c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -38,23 +38,12 @@ /* misc utils */ -static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata, - struct ieee80211_hdr *hdr) -{ - /* Set the sequence number for this frame. */ - hdr->seq_ctrl = cpu_to_le16(sdata->sequence); - - /* Increase the sequence number. */ - sdata->sequence = (sdata->sequence + 0x10) & IEEE80211_SCTL_SEQ; -} - #ifdef CONFIG_MAC80211_LOWTX_FRAME_DUMP static void ieee80211_dump_frame(const char *ifname, const char *title, const struct sk_buff *skb) { - const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; + const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + unsigned int hdrlen; DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "%s: %s (len=%d)", ifname, title, skb->len); @@ -63,13 +52,12 @@ static void ieee80211_dump_frame(const char *ifname, const char *title, return; } - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (hdrlen > skb->len) hdrlen = skb->len; if (hdrlen >= 4) printk(" FC=0x%04x DUR=0x%04x", - fc, le16_to_cpu(hdr->duration_id)); + le16_to_cpu(hdr->frame_control), le16_to_cpu(hdr->duration_id)); if (hdrlen >= 10) printk(" A1=%s", print_mac(mac, hdr->addr1)); if (hdrlen >= 16) @@ -87,15 +75,16 @@ static inline void ieee80211_dump_frame(const char *ifname, const char *title, } #endif /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ -static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, - int next_frag_len) +static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, + int next_frag_len) { int rate, mrate, erp, dur, i; - struct ieee80211_rate *txrate = tx->rate; + struct ieee80211_rate *txrate; struct ieee80211_local *local = tx->local; struct ieee80211_supported_band *sband; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[tx->channel->band]; + txrate = &sband->bitrates[tx->rate_idx]; erp = 0; if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) @@ -139,7 +128,7 @@ static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, /* data/mgmt */ if (0 /* FIX: data/mgmt during CFP */) - return 32768; + return cpu_to_le16(32768); if (group_addr) /* Group address as the destination - no ACK */ return 0; @@ -209,19 +198,7 @@ static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, tx->sdata->bss_conf.use_short_preamble); } - return dur; -} - -static inline int __ieee80211_queue_stopped(const struct ieee80211_local *local, - int queue) -{ - return test_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); -} - -static inline int __ieee80211_queue_pending(const struct ieee80211_local *local, - int queue) -{ - return test_bit(IEEE80211_LINK_STATE_PENDING, &local->state[queue]); + return cpu_to_le16(dur); } static int inline is_ieee80211_device(struct net_device *dev, @@ -233,16 +210,16 @@ static int inline is_ieee80211_device(struct net_device *dev, /* tx handlers */ -static ieee80211_tx_result +static ieee80211_tx_result debug_noinline ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - struct sk_buff *skb = tx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); u32 sta_flags; - if (unlikely(tx->flags & IEEE80211_TX_INJECTED)) + if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) return TX_CONTINUE; if (unlikely(tx->local->sta_sw_scanning) && @@ -256,7 +233,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (tx->flags & IEEE80211_TX_PS_BUFFERED) return TX_CONTINUE; - sta_flags = tx->sta ? tx->sta->flags : 0; + sta_flags = tx->sta ? get_sta_flags(tx->sta) : 0; if (likely(tx->flags & IEEE80211_TX_UNICAST)) { if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && @@ -287,17 +264,6 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) return TX_CONTINUE; } -static ieee80211_tx_result -ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - - if (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)) >= 24) - ieee80211_include_sequence(tx->sdata, hdr); - - return TX_CONTINUE; -} - /* This function is called whenever the AP is about to exceed the maximum limit * of buffered frames for power saving STAs. This situation should not really * happen often during normal operation, so dropping the oldest buffered packet @@ -316,8 +282,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) list_for_each_entry_rcu(sdata, &local->interfaces, list) { struct ieee80211_if_ap *ap; - if (sdata->dev == local->mdev || - sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != IEEE80211_IF_TYPE_AP) continue; ap = &sdata->u.ap; skb = skb_dequeue(&ap->ps_bc_buf); @@ -340,13 +305,17 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) rcu_read_unlock(); local->total_ps_buffered = total; +#ifdef MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", wiphy_name(local->hw.wiphy), purged); +#endif } static ieee80211_tx_result ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + /* * broadcast/multicast frame * @@ -355,8 +324,12 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) * This is done either by the hardware or us. */ - /* not AP/IBSS or ordered frame */ - if (!tx->sdata->bss || (tx->fc & IEEE80211_FCTL_ORDER)) + /* powersaving STAs only in AP/VLAN mode */ + if (!tx->sdata->bss) + return TX_CONTINUE; + + /* no buffering for ordered frames */ + if (tx->fc & IEEE80211_FCTL_ORDER) return TX_CONTINUE; /* no stations in PS mode */ @@ -369,11 +342,13 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) purge_old_ps_buffers(tx->local); if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= AP_MAX_BC_BUFFER) { +#ifdef MAC80211_VERBOSE_PS_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "%s: BC TX buffer full - " "dropping the oldest frame\n", tx->dev->name); } +#endif dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf)); } else tx->local->total_ps_buffered++; @@ -382,7 +357,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) } /* buffered in hardware */ - tx->control->flags |= IEEE80211_TXCTL_SEND_AFTER_DTIM; + info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; return TX_CONTINUE; } @@ -391,6 +366,8 @@ static ieee80211_tx_result ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) { struct sta_info *sta = tx->sta; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + u32 staflags; DECLARE_MAC_BUF(mac); if (unlikely(!sta || @@ -398,9 +375,10 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) (tx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP))) return TX_CONTINUE; - if (unlikely((sta->flags & WLAN_STA_PS) && - !(sta->flags & WLAN_STA_PSPOLL))) { - struct ieee80211_tx_packet_data *pkt_data; + staflags = get_sta_flags(sta); + + if (unlikely((staflags & WLAN_STA_PS) && + !(staflags & WLAN_STA_PSPOLL))) { #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "STA %s aid %d: PS buffer (entries " "before %d)\n", @@ -411,11 +389,13 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) purge_old_ps_buffers(tx->local); if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) { struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf); +#ifdef MAC80211_VERBOSE_PS_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "%s: STA %s TX " "buffer full - dropping oldest frame\n", tx->dev->name, print_mac(mac, sta->addr)); } +#endif dev_kfree_skb(old); } else tx->local->total_ps_buffered++; @@ -424,24 +404,23 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) if (skb_queue_empty(&sta->ps_tx_buf)) sta_info_set_tim_bit(sta); - pkt_data = (struct ieee80211_tx_packet_data *)tx->skb->cb; - pkt_data->jiffies = jiffies; + info->control.jiffies = jiffies; skb_queue_tail(&sta->ps_tx_buf, tx->skb); return TX_QUEUED; } #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - else if (unlikely(sta->flags & WLAN_STA_PS)) { + else if (unlikely(test_sta_flags(sta, WLAN_STA_PS))) { printk(KERN_DEBUG "%s: STA %s in PS mode, but pspoll " "set -> send frame\n", tx->dev->name, print_mac(mac, sta->addr)); } #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - sta->flags &= ~WLAN_STA_PSPOLL; + clear_sta_flags(sta, WLAN_STA_PSPOLL); return TX_CONTINUE; } -static ieee80211_tx_result +static ieee80211_tx_result debug_noinline ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx) { if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED)) @@ -453,21 +432,22 @@ ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx) return ieee80211_tx_h_multicast_ps_buf(tx); } -static ieee80211_tx_result +static ieee80211_tx_result debug_noinline ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) { struct ieee80211_key *key; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); u16 fc = tx->fc; - if (unlikely(tx->control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)) + if (unlikely(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) tx->key = NULL; else if (tx->sta && (key = rcu_dereference(tx->sta->key))) tx->key = key; else if ((key = rcu_dereference(tx->sdata->default_key))) tx->key = key; else if (tx->sdata->drop_unencrypted && - !(tx->control->flags & IEEE80211_TXCTL_EAPOL_FRAME) && - !(tx->flags & IEEE80211_TX_INJECTED)) { + !(info->flags & IEEE80211_TX_CTL_EAPOL_FRAME) && + !(info->flags & IEEE80211_TX_CTL_INJECTED)) { I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); return TX_DROP; } else @@ -496,15 +476,197 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) } if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) - tx->control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; + info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; return TX_CONTINUE; } -static ieee80211_tx_result +static ieee80211_tx_result debug_noinline +ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) +{ + struct rate_selection rsel; + struct ieee80211_supported_band *sband; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + + sband = tx->local->hw.wiphy->bands[tx->channel->band]; + + if (likely(tx->rate_idx < 0)) { + rate_control_get_rate(tx->dev, sband, tx->skb, &rsel); + tx->rate_idx = rsel.rate_idx; + if (unlikely(rsel.probe_idx >= 0)) { + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; + info->control.alt_retry_rate_idx = tx->rate_idx; + tx->rate_idx = rsel.probe_idx; + } else + info->control.alt_retry_rate_idx = -1; + + if (unlikely(tx->rate_idx < 0)) + return TX_DROP; + } else + info->control.alt_retry_rate_idx = -1; + + if (tx->sdata->bss_conf.use_cts_prot && + (tx->flags & IEEE80211_TX_FRAGMENTED) && (rsel.nonerp_idx >= 0)) { + tx->last_frag_rate_idx = tx->rate_idx; + if (rsel.probe_idx >= 0) + tx->flags &= ~IEEE80211_TX_PROBE_LAST_FRAG; + else + tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; + tx->rate_idx = rsel.nonerp_idx; + info->tx_rate_idx = rsel.nonerp_idx; + info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; + } else { + tx->last_frag_rate_idx = tx->rate_idx; + info->tx_rate_idx = tx->rate_idx; + } + info->tx_rate_idx = tx->rate_idx; + + return TX_CONTINUE; +} + +static ieee80211_tx_result debug_noinline +ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_supported_band *sband; + + sband = tx->local->hw.wiphy->bands[tx->channel->band]; + + if (tx->sta) + info->control.aid = tx->sta->aid; + + if (!info->control.retry_limit) { + if (!is_multicast_ether_addr(hdr->addr1)) { + int len = min_t(int, tx->skb->len + FCS_LEN, + tx->local->fragmentation_threshold); + if (len > tx->local->rts_threshold + && tx->local->rts_threshold < + IEEE80211_MAX_RTS_THRESHOLD) { + info->flags |= IEEE80211_TX_CTL_USE_RTS_CTS; + info->flags |= + IEEE80211_TX_CTL_LONG_RETRY_LIMIT; + info->control.retry_limit = + tx->local->long_retry_limit; + } else { + info->control.retry_limit = + tx->local->short_retry_limit; + } + } else { + info->control.retry_limit = 1; + } + } + + if (tx->flags & IEEE80211_TX_FRAGMENTED) { + /* Do not use multiple retry rates when sending fragmented + * frames. + * TODO: The last fragment could still use multiple retry + * rates. */ + info->control.alt_retry_rate_idx = -1; + } + + /* Use CTS protection for unicast frames sent using extended rates if + * there are associated non-ERP stations and RTS/CTS is not configured + * for the frame. */ + if ((tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) && + (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_ERP_G) && + (tx->flags & IEEE80211_TX_UNICAST) && + tx->sdata->bss_conf.use_cts_prot && + !(info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)) + info->flags |= IEEE80211_TX_CTL_USE_CTS_PROTECT; + + /* Transmit data frames using short preambles if the driver supports + * short preambles at the selected rate and short preambles are + * available on the network at the current point in time. */ + if (ieee80211_is_data(hdr->frame_control) && + (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) && + tx->sdata->bss_conf.use_short_preamble && + (!tx->sta || test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))) { + info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE; + } + + if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) || + (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) { + struct ieee80211_rate *rate; + s8 baserate = -1; + int idx; + + /* Do not use multiple retry rates when using RTS/CTS */ + info->control.alt_retry_rate_idx = -1; + + /* Use min(data rate, max base rate) as CTS/RTS rate */ + rate = &sband->bitrates[tx->rate_idx]; + + for (idx = 0; idx < sband->n_bitrates; idx++) { + if (sband->bitrates[idx].bitrate > rate->bitrate) + continue; + if (tx->sdata->basic_rates & BIT(idx) && + (baserate < 0 || + (sband->bitrates[baserate].bitrate + < sband->bitrates[idx].bitrate))) + baserate = idx; + } + + if (baserate >= 0) + info->control.rts_cts_rate_idx = baserate; + else + info->control.rts_cts_rate_idx = 0; + } + + if (tx->sta) + info->control.aid = tx->sta->aid; + + return TX_CONTINUE; +} + +static ieee80211_tx_result debug_noinline +ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + u16 *seq; + u8 *qc; + int tid; + + /* only for injected frames */ + if (unlikely(ieee80211_is_ctl(hdr->frame_control))) + return TX_CONTINUE; + + if (ieee80211_hdrlen(hdr->frame_control) < 24) + return TX_CONTINUE; + + if (!ieee80211_is_data_qos(hdr->frame_control)) { + info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; + return TX_CONTINUE; + } + + /* + * This should be true for injected/management frames only, for + * management frames we have set the IEEE80211_TX_CTL_ASSIGN_SEQ + * above since they are not QoS-data frames. + */ + if (!tx->sta) + return TX_CONTINUE; + + /* include per-STA, per-TID sequence counter */ + + qc = ieee80211_get_qos_ctl(hdr); + tid = *qc & IEEE80211_QOS_CTL_TID_MASK; + seq = &tx->sta->tid_seq[tid]; + + hdr->seq_ctrl = cpu_to_le16(*seq); + + /* Increase the sequence number. */ + *seq = (*seq + 0x10) & IEEE80211_SCTL_SEQ; + + return TX_CONTINUE; +} + +static ieee80211_tx_result debug_noinline ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; size_t hdrlen, per_fragm, num_fragm, payload_len, left; struct sk_buff **frags, *first, *frag; int i; @@ -515,9 +677,19 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) if (!(tx->flags & IEEE80211_TX_FRAGMENTED)) return TX_CONTINUE; + /* + * Warn when submitting a fragmented A-MPDU frame and drop it. + * This scenario is handled in __ieee80211_tx_prepare but extra + * caution taken here as fragmented ampdu may cause Tx stop. + */ + if (WARN_ON(tx->flags & IEEE80211_TX_CTL_AMPDU || + skb_get_queue_mapping(tx->skb) >= + ieee80211_num_regular_queues(&tx->local->hw))) + return TX_DROP; + first = tx->skb; - hdrlen = ieee80211_get_hdrlen(tx->fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); payload_len = first->len - hdrlen; per_fragm = frag_threshold - hdrlen - FCS_LEN; num_fragm = DIV_ROUND_UP(payload_len, per_fragm); @@ -558,6 +730,8 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG)); copylen = left > per_fragm ? per_fragm : left; memcpy(skb_put(frag, copylen), pos, copylen); + memcpy(frag->cb, first->cb, sizeof(frag->cb)); + skb_copy_queue_mapping(frag, first); pos += copylen; left -= copylen; @@ -570,7 +744,6 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) return TX_CONTINUE; fail: - printk(KERN_DEBUG "%s: failed to fragment frame\n", tx->dev->name); if (frags) { for (i = 0; i < num_fragm - 1; i++) if (frags[i]) @@ -581,7 +754,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) return TX_DROP; } -static ieee80211_tx_result +static ieee80211_tx_result debug_noinline ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) { if (!tx->key) @@ -601,236 +774,57 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) return TX_DROP; } -static ieee80211_tx_result -ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) +static ieee80211_tx_result debug_noinline +ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx) { - struct rate_selection rsel; - struct ieee80211_supported_band *sband; - - sband = tx->local->hw.wiphy->bands[tx->local->hw.conf.channel->band]; - - if (likely(!tx->rate)) { - rate_control_get_rate(tx->dev, sband, tx->skb, &rsel); - tx->rate = rsel.rate; - if (unlikely(rsel.probe)) { - tx->control->flags |= - IEEE80211_TXCTL_RATE_CTRL_PROBE; - tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; - tx->control->alt_retry_rate = tx->rate; - tx->rate = rsel.probe; - } else - tx->control->alt_retry_rate = NULL; - - if (!tx->rate) - return TX_DROP; - } else - tx->control->alt_retry_rate = NULL; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + int next_len, i; + int group_addr = is_multicast_ether_addr(hdr->addr1); - if (tx->sdata->bss_conf.use_cts_prot && - (tx->flags & IEEE80211_TX_FRAGMENTED) && rsel.nonerp) { - tx->last_frag_rate = tx->rate; - if (rsel.probe) - tx->flags &= ~IEEE80211_TX_PROBE_LAST_FRAG; - else - tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; - tx->rate = rsel.nonerp; - tx->control->tx_rate = rsel.nonerp; - tx->control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE; - } else { - tx->last_frag_rate = tx->rate; - tx->control->tx_rate = tx->rate; + if (!(tx->flags & IEEE80211_TX_FRAGMENTED)) { + hdr->duration_id = ieee80211_duration(tx, group_addr, 0); + return TX_CONTINUE; } - tx->control->tx_rate = tx->rate; - - return TX_CONTINUE; -} -static ieee80211_tx_result -ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - u16 fc = le16_to_cpu(hdr->frame_control); - u16 dur; - struct ieee80211_tx_control *control = tx->control; + hdr->duration_id = ieee80211_duration(tx, group_addr, + tx->extra_frag[0]->len); - if (!control->retry_limit) { - if (!is_multicast_ether_addr(hdr->addr1)) { - if (tx->skb->len + FCS_LEN > tx->local->rts_threshold - && tx->local->rts_threshold < - IEEE80211_MAX_RTS_THRESHOLD) { - control->flags |= - IEEE80211_TXCTL_USE_RTS_CTS; - control->flags |= - IEEE80211_TXCTL_LONG_RETRY_LIMIT; - control->retry_limit = - tx->local->long_retry_limit; - } else { - control->retry_limit = - tx->local->short_retry_limit; - } + for (i = 0; i < tx->num_extra_frag; i++) { + if (i + 1 < tx->num_extra_frag) { + next_len = tx->extra_frag[i + 1]->len; } else { - control->retry_limit = 1; + next_len = 0; + tx->rate_idx = tx->last_frag_rate_idx; } - } - if (tx->flags & IEEE80211_TX_FRAGMENTED) { - /* Do not use multiple retry rates when sending fragmented - * frames. - * TODO: The last fragment could still use multiple retry - * rates. */ - control->alt_retry_rate = NULL; - } - - /* Use CTS protection for unicast frames sent using extended rates if - * there are associated non-ERP stations and RTS/CTS is not configured - * for the frame. */ - if ((tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) && - (tx->rate->flags & IEEE80211_RATE_ERP_G) && - (tx->flags & IEEE80211_TX_UNICAST) && - tx->sdata->bss_conf.use_cts_prot && - !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) - control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; - - /* Transmit data frames using short preambles if the driver supports - * short preambles at the selected rate and short preambles are - * available on the network at the current point in time. */ - if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) && - (tx->rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) && - tx->sdata->bss_conf.use_short_preamble && - (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) { - tx->control->flags |= IEEE80211_TXCTL_SHORT_PREAMBLE; - } - - /* Setup duration field for the first fragment of the frame. Duration - * for remaining fragments will be updated when they are being sent - * to low-level driver in ieee80211_tx(). */ - dur = ieee80211_duration(tx, is_multicast_ether_addr(hdr->addr1), - (tx->flags & IEEE80211_TX_FRAGMENTED) ? - tx->extra_frag[0]->len : 0); - hdr->duration_id = cpu_to_le16(dur); - - if ((control->flags & IEEE80211_TXCTL_USE_RTS_CTS) || - (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) { - struct ieee80211_supported_band *sband; - struct ieee80211_rate *rate, *baserate; - int idx; - - sband = tx->local->hw.wiphy->bands[ - tx->local->hw.conf.channel->band]; - - /* Do not use multiple retry rates when using RTS/CTS */ - control->alt_retry_rate = NULL; - - /* Use min(data rate, max base rate) as CTS/RTS rate */ - rate = tx->rate; - baserate = NULL; - - for (idx = 0; idx < sband->n_bitrates; idx++) { - if (sband->bitrates[idx].bitrate > rate->bitrate) - continue; - if (tx->sdata->basic_rates & BIT(idx) && - (!baserate || - (baserate->bitrate < sband->bitrates[idx].bitrate))) - baserate = &sband->bitrates[idx]; - } - - if (baserate) - control->rts_cts_rate = baserate; - else - control->rts_cts_rate = &sband->bitrates[0]; - } - - if (tx->sta) { - control->aid = tx->sta->aid; - tx->sta->tx_packets++; - tx->sta->tx_fragments++; - tx->sta->tx_bytes += tx->skb->len; - if (tx->extra_frag) { - int i; - tx->sta->tx_fragments += tx->num_extra_frag; - for (i = 0; i < tx->num_extra_frag; i++) { - tx->sta->tx_bytes += - tx->extra_frag[i]->len; - } - } + hdr = (struct ieee80211_hdr *)tx->extra_frag[i]->data; + hdr->duration_id = ieee80211_duration(tx, 0, next_len); } return TX_CONTINUE; } -static ieee80211_tx_result -ieee80211_tx_h_load_stats(struct ieee80211_tx_data *tx) +static ieee80211_tx_result debug_noinline +ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) { - struct ieee80211_local *local = tx->local; - struct sk_buff *skb = tx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u32 load = 0, hdrtime; - struct ieee80211_rate *rate = tx->rate; - - /* TODO: this could be part of tx_status handling, so that the number - * of retries would be known; TX rate should in that case be stored - * somewhere with the packet */ - - /* Estimate total channel use caused by this frame */ - - /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, - * 1 usec = 1/8 * (1080 / 10) = 13.5 */ - - if (tx->channel->band == IEEE80211_BAND_5GHZ || - (tx->channel->band == IEEE80211_BAND_2GHZ && - rate->flags & IEEE80211_RATE_ERP_G)) - hdrtime = CHAN_UTIL_HDR_SHORT; - else - hdrtime = CHAN_UTIL_HDR_LONG; - - load = hdrtime; - if (!is_multicast_ether_addr(hdr->addr1)) - load += hdrtime; - - if (tx->control->flags & IEEE80211_TXCTL_USE_RTS_CTS) - load += 2 * hdrtime; - else if (tx->control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) - load += hdrtime; + int i; - /* TODO: optimise again */ - load += skb->len * CHAN_UTIL_RATE_LCM / rate->bitrate; + if (!tx->sta) + return TX_CONTINUE; + tx->sta->tx_packets++; + tx->sta->tx_fragments++; + tx->sta->tx_bytes += tx->skb->len; if (tx->extra_frag) { - int i; - for (i = 0; i < tx->num_extra_frag; i++) { - load += 2 * hdrtime; - load += tx->extra_frag[i]->len * - tx->rate->bitrate; - } + tx->sta->tx_fragments += tx->num_extra_frag; + for (i = 0; i < tx->num_extra_frag; i++) + tx->sta->tx_bytes += tx->extra_frag[i]->len; } - /* Divide channel_use by 8 to avoid wrapping around the counter */ - load >>= CHAN_UTIL_SHIFT; - local->channel_use_raw += load; - if (tx->sta) - tx->sta->channel_use_raw += load; - tx->sdata->channel_use_raw += load; - return TX_CONTINUE; } -typedef ieee80211_tx_result (*ieee80211_tx_handler)(struct ieee80211_tx_data *); -static ieee80211_tx_handler ieee80211_tx_handlers[] = -{ - ieee80211_tx_h_check_assoc, - ieee80211_tx_h_sequence, - ieee80211_tx_h_ps_buf, - ieee80211_tx_h_select_key, - ieee80211_tx_h_michael_mic_add, - ieee80211_tx_h_fragment, - ieee80211_tx_h_encrypt, - ieee80211_tx_h_rate_ctrl, - ieee80211_tx_h_misc, - ieee80211_tx_h_load_stats, - NULL -}; - /* actual transmit path */ /* @@ -854,12 +848,12 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, (struct ieee80211_radiotap_header *) skb->data; struct ieee80211_supported_band *sband; int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); - struct ieee80211_tx_control *control = tx->control; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - sband = tx->local->hw.wiphy->bands[tx->local->hw.conf.channel->band]; + sband = tx->local->hw.wiphy->bands[tx->channel->band]; - control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; - tx->flags |= IEEE80211_TX_INJECTED; + info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + info->flags |= IEEE80211_TX_CTL_INJECTED; tx->flags &= ~IEEE80211_TX_FRAGMENTED; /* @@ -896,7 +890,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, r = &sband->bitrates[i]; if (r->bitrate == target_rate) { - tx->rate = r; + tx->rate_idx = i; break; } } @@ -907,7 +901,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, * radiotap uses 0 for 1st ant, mac80211 is 1 for * 1st ant */ - control->antenna_sel_tx = (*iterator.this_arg) + 1; + info->antenna_sel_tx = (*iterator.this_arg) + 1; break; #if 0 @@ -931,8 +925,8 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, skb_trim(skb, skb->len - FCS_LEN); } if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) - control->flags &= - ~IEEE80211_TXCTL_DO_NOT_ENCRYPT; + info->flags &= + ~IEEE80211_TX_CTL_DO_NOT_ENCRYPT; if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) tx->flags |= IEEE80211_TX_FRAGMENTED; break; @@ -967,12 +961,12 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, static ieee80211_tx_result __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, struct sk_buff *skb, - struct net_device *dev, - struct ieee80211_tx_control *control) + struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr; struct ieee80211_sub_if_data *sdata; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int hdrlen; @@ -981,7 +975,9 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, tx->dev = dev; /* use original interface */ tx->local = local; tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); - tx->control = control; + tx->channel = local->hw.conf.channel; + tx->rate_idx = -1; + tx->last_frag_rate_idx = -1; /* * Set this flag (used below to indicate "automatic fragmentation"), * it will be cleared/left by radiotap as desired. @@ -1008,34 +1004,33 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, if (is_multicast_ether_addr(hdr->addr1)) { tx->flags &= ~IEEE80211_TX_UNICAST; - control->flags |= IEEE80211_TXCTL_NO_ACK; + info->flags |= IEEE80211_TX_CTL_NO_ACK; } else { tx->flags |= IEEE80211_TX_UNICAST; - control->flags &= ~IEEE80211_TXCTL_NO_ACK; + info->flags &= ~IEEE80211_TX_CTL_NO_ACK; } if (tx->flags & IEEE80211_TX_FRAGMENTED) { if ((tx->flags & IEEE80211_TX_UNICAST) && skb->len + FCS_LEN > local->fragmentation_threshold && - !local->ops->set_frag_threshold) + !local->ops->set_frag_threshold && + !(info->flags & IEEE80211_TX_CTL_AMPDU)) tx->flags |= IEEE80211_TX_FRAGMENTED; else tx->flags &= ~IEEE80211_TX_FRAGMENTED; } if (!tx->sta) - control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT; - else if (tx->sta->flags & WLAN_STA_CLEAR_PS_FILT) { - control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT; - tx->sta->flags &= ~WLAN_STA_CLEAR_PS_FILT; - } + info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; + else if (test_and_clear_sta_flags(tx->sta, WLAN_STA_CLEAR_PS_FILT)) + info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; hdrlen = ieee80211_get_hdrlen(tx->fc); if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; tx->ethertype = (pos[0] << 8) | pos[1]; } - control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; + info->flags |= IEEE80211_TX_CTL_FIRST_FRAGMENT; return TX_CONTINUE; } @@ -1045,14 +1040,12 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, */ static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx, struct sk_buff *skb, - struct net_device *mdev, - struct ieee80211_tx_control *control) + struct net_device *mdev) { - struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct net_device *dev; - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - dev = dev_get_by_index(&init_net, pkt_data->ifindex); + dev = dev_get_by_index(&init_net, info->control.ifindex); if (unlikely(dev && !is_ieee80211_device(dev, mdev))) { dev_put(dev); dev = NULL; @@ -1060,7 +1053,7 @@ static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx, if (unlikely(!dev)) return -ENODEV; /* initialises tx with control */ - __ieee80211_tx_prepare(tx, skb, dev, control); + __ieee80211_tx_prepare(tx, skb, dev); dev_put(dev); return 0; } @@ -1068,50 +1061,49 @@ static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx, static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_tx_data *tx) { - struct ieee80211_tx_control *control = tx->control; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int ret, i; - if (!ieee80211_qdisc_installed(local->mdev) && - __ieee80211_queue_stopped(local, 0)) { - netif_stop_queue(local->mdev); + if (netif_subqueue_stopped(local->mdev, skb)) return IEEE80211_TX_AGAIN; - } + if (skb) { ieee80211_dump_frame(wiphy_name(local->hw.wiphy), "TX to low-level driver", skb); - ret = local->ops->tx(local_to_hw(local), skb, control); + ret = local->ops->tx(local_to_hw(local), skb); if (ret) return IEEE80211_TX_AGAIN; local->mdev->trans_start = jiffies; ieee80211_led_tx(local, 1); } if (tx->extra_frag) { - control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | - IEEE80211_TXCTL_USE_CTS_PROTECT | - IEEE80211_TXCTL_CLEAR_PS_FILT | - IEEE80211_TXCTL_FIRST_FRAGMENT); for (i = 0; i < tx->num_extra_frag; i++) { if (!tx->extra_frag[i]) continue; - if (__ieee80211_queue_stopped(local, control->queue)) + info = IEEE80211_SKB_CB(tx->extra_frag[i]); + info->flags &= ~(IEEE80211_TX_CTL_USE_RTS_CTS | + IEEE80211_TX_CTL_USE_CTS_PROTECT | + IEEE80211_TX_CTL_CLEAR_PS_FILT | + IEEE80211_TX_CTL_FIRST_FRAGMENT); + if (netif_subqueue_stopped(local->mdev, + tx->extra_frag[i])) return IEEE80211_TX_FRAG_AGAIN; if (i == tx->num_extra_frag) { - control->tx_rate = tx->last_frag_rate; + info->tx_rate_idx = tx->last_frag_rate_idx; if (tx->flags & IEEE80211_TX_PROBE_LAST_FRAG) - control->flags |= - IEEE80211_TXCTL_RATE_CTRL_PROBE; + info->flags |= + IEEE80211_TX_CTL_RATE_CTRL_PROBE; else - control->flags &= - ~IEEE80211_TXCTL_RATE_CTRL_PROBE; + info->flags &= + ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; } ieee80211_dump_frame(wiphy_name(local->hw.wiphy), "TX to low-level driver", tx->extra_frag[i]); ret = local->ops->tx(local_to_hw(local), - tx->extra_frag[i], - control); + tx->extra_frag[i]); if (ret) return IEEE80211_TX_FRAG_AGAIN; local->mdev->trans_start = jiffies; @@ -1124,17 +1116,65 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, return IEEE80211_TX_OK; } -static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, - struct ieee80211_tx_control *control) +/* + * Invoke TX handlers, return 0 on success and non-zero if the + * frame was dropped or queued. + */ +static int invoke_tx_handlers(struct ieee80211_tx_data *tx) +{ + struct sk_buff *skb = tx->skb; + ieee80211_tx_result res = TX_DROP; + int i; + +#define CALL_TXH(txh) \ + res = txh(tx); \ + if (res != TX_CONTINUE) \ + goto txh_done; + + CALL_TXH(ieee80211_tx_h_check_assoc) + CALL_TXH(ieee80211_tx_h_ps_buf) + CALL_TXH(ieee80211_tx_h_select_key) + CALL_TXH(ieee80211_tx_h_michael_mic_add) + CALL_TXH(ieee80211_tx_h_rate_ctrl) + CALL_TXH(ieee80211_tx_h_misc) + CALL_TXH(ieee80211_tx_h_sequence) + CALL_TXH(ieee80211_tx_h_fragment) + /* handlers after fragment must be aware of tx info fragmentation! */ + CALL_TXH(ieee80211_tx_h_encrypt) + CALL_TXH(ieee80211_tx_h_calculate_duration) + CALL_TXH(ieee80211_tx_h_stats) +#undef CALL_TXH + + txh_done: + if (unlikely(res == TX_DROP)) { + I802_DEBUG_INC(tx->local->tx_handlers_drop); + dev_kfree_skb(skb); + for (i = 0; i < tx->num_extra_frag; i++) + if (tx->extra_frag[i]) + dev_kfree_skb(tx->extra_frag[i]); + kfree(tx->extra_frag); + return -1; + } else if (unlikely(res == TX_QUEUED)) { + I802_DEBUG_INC(tx->local->tx_handlers_queued); + return -1; + } + + return 0; +} + +static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sta_info *sta; - ieee80211_tx_handler *handler; struct ieee80211_tx_data tx; - ieee80211_tx_result res = TX_DROP, res_prepare; + ieee80211_tx_result res_prepare; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int ret, i; + u16 queue; + + queue = skb_get_queue_mapping(skb); - WARN_ON(__ieee80211_queue_pending(local, control->queue)); + WARN_ON(test_bit(queue, local->queues_pending)); if (unlikely(skb->len < 10)) { dev_kfree_skb(skb); @@ -1144,7 +1184,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, rcu_read_lock(); /* initialises tx */ - res_prepare = __ieee80211_tx_prepare(&tx, skb, dev, control); + res_prepare = __ieee80211_tx_prepare(&tx, skb, dev); if (res_prepare == TX_DROP) { dev_kfree_skb(skb); @@ -1154,79 +1194,53 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, sta = tx.sta; tx.channel = local->hw.conf.channel; + info->band = tx.channel->band; - for (handler = ieee80211_tx_handlers; *handler != NULL; - handler++) { - res = (*handler)(&tx); - if (res != TX_CONTINUE) - break; - } - - skb = tx.skb; /* handlers are allowed to change skb */ - - if (unlikely(res == TX_DROP)) { - I802_DEBUG_INC(local->tx_handlers_drop); - goto drop; - } - - if (unlikely(res == TX_QUEUED)) { - I802_DEBUG_INC(local->tx_handlers_queued); - rcu_read_unlock(); - return 0; - } - - if (tx.extra_frag) { - for (i = 0; i < tx.num_extra_frag; i++) { - int next_len, dur; - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) - tx.extra_frag[i]->data; - - if (i + 1 < tx.num_extra_frag) { - next_len = tx.extra_frag[i + 1]->len; - } else { - next_len = 0; - tx.rate = tx.last_frag_rate; - } - dur = ieee80211_duration(&tx, 0, next_len); - hdr->duration_id = cpu_to_le16(dur); - } - } + if (invoke_tx_handlers(&tx)) + goto out; retry: ret = __ieee80211_tx(local, skb, &tx); if (ret) { - struct ieee80211_tx_stored_packet *store = - &local->pending_packet[control->queue]; + struct ieee80211_tx_stored_packet *store; + + /* + * Since there are no fragmented frames on A-MPDU + * queues, there's no reason for a driver to reject + * a frame there, warn and drop it. + */ + if (WARN_ON(queue >= ieee80211_num_regular_queues(&local->hw))) + goto drop; + + store = &local->pending_packet[queue]; if (ret == IEEE80211_TX_FRAG_AGAIN) skb = NULL; - set_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[control->queue]); + set_bit(queue, local->queues_pending); smp_mb(); - /* When the driver gets out of buffers during sending of - * fragments and calls ieee80211_stop_queue, there is - * a small window between IEEE80211_LINK_STATE_XOFF and - * IEEE80211_LINK_STATE_PENDING flags are set. If a buffer + /* + * When the driver gets out of buffers during sending of + * fragments and calls ieee80211_stop_queue, the netif + * subqueue is stopped. There is, however, a small window + * in which the PENDING bit is not yet set. If a buffer * gets available in that window (i.e. driver calls * ieee80211_wake_queue), we would end up with ieee80211_tx - * called with IEEE80211_LINK_STATE_PENDING. Prevent this by + * called with the PENDING bit still set. Prevent this by * continuing transmitting here when that situation is - * possible to have happened. */ - if (!__ieee80211_queue_stopped(local, control->queue)) { - clear_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[control->queue]); + * possible to have happened. + */ + if (!__netif_subqueue_stopped(local->mdev, queue)) { + clear_bit(queue, local->queues_pending); goto retry; } - memcpy(&store->control, control, - sizeof(struct ieee80211_tx_control)); store->skb = skb; store->extra_frag = tx.extra_frag; store->num_extra_frag = tx.num_extra_frag; - store->last_frag_rate = tx.last_frag_rate; + store->last_frag_rate_idx = tx.last_frag_rate_idx; store->last_frag_rate_ctrl_probe = !!(tx.flags & IEEE80211_TX_PROBE_LAST_FRAG); } + out: rcu_read_unlock(); return 0; @@ -1243,24 +1257,57 @@ retry: /* device xmit handlers */ +static int ieee80211_skb_resize(struct ieee80211_local *local, + struct sk_buff *skb, + int head_need, bool may_encrypt) +{ + int tail_need = 0; + + /* + * This could be optimised, devices that do full hardware + * crypto (including TKIP MMIC) need no tailroom... But we + * have no drivers for such devices currently. + */ + if (may_encrypt) { + tail_need = IEEE80211_ENCRYPT_TAILROOM; + tail_need -= skb_tailroom(skb); + tail_need = max_t(int, tail_need, 0); + } + + if (head_need || tail_need) { + /* Sorry. Can't account for this any more */ + skb_orphan(skb); + } + + if (skb_header_cloned(skb)) + I802_DEBUG_INC(local->tx_expand_skb_head_cloned); + else + I802_DEBUG_INC(local->tx_expand_skb_head); + + if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) { + printk(KERN_DEBUG "%s: failed to reallocate TX buffer\n", + wiphy_name(local->hw.wiphy)); + return -ENOMEM; + } + + /* update truesize too */ + skb->truesize += head_need + tail_need; + + return 0; +} + int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ieee80211_tx_control control; - struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct net_device *odev = NULL; struct ieee80211_sub_if_data *osdata; int headroom; + bool may_encrypt; int ret; - /* - * copy control out of the skb so other people can use skb->cb - */ - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(&control, 0, sizeof(struct ieee80211_tx_control)); - - if (pkt_data->ifindex) - odev = dev_get_by_index(&init_net, pkt_data->ifindex); + if (info->control.ifindex) + odev = dev_get_by_index(&init_net, info->control.ifindex); if (unlikely(odev && !is_ieee80211_device(odev, dev))) { dev_put(odev); odev = NULL; @@ -1273,32 +1320,25 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, dev_kfree_skb(skb); return 0; } + osdata = IEEE80211_DEV_TO_SUB_IF(odev); - headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM; - if (skb_headroom(skb) < headroom) { - if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - dev_put(odev); - return 0; - } + may_encrypt = !(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT); + + headroom = osdata->local->tx_headroom; + if (may_encrypt) + headroom += IEEE80211_ENCRYPT_HEADROOM; + headroom -= skb_headroom(skb); + headroom = max_t(int, 0, headroom); + + if (ieee80211_skb_resize(osdata->local, skb, headroom, may_encrypt)) { + dev_kfree_skb(skb); + dev_put(odev); + return 0; } - control.vif = &osdata->vif; - control.type = osdata->vif.type; - if (pkt_data->flags & IEEE80211_TXPD_REQ_TX_STATUS) - control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS; - if (pkt_data->flags & IEEE80211_TXPD_DO_NOT_ENCRYPT) - control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; - if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) - control.flags |= IEEE80211_TXCTL_REQUEUE; - if (pkt_data->flags & IEEE80211_TXPD_EAPOL_FRAME) - control.flags |= IEEE80211_TXCTL_EAPOL_FRAME; - if (pkt_data->flags & IEEE80211_TXPD_AMPDU) - control.flags |= IEEE80211_TXCTL_AMPDU; - control.queue = pkt_data->queue; - - ret = ieee80211_tx(odev, skb, &control); + info->control.vif = &osdata->vif; + ret = ieee80211_tx(odev, skb); dev_put(odev); return ret; @@ -1308,7 +1348,7 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_radiotap_header *prthdr = (struct ieee80211_radiotap_header *)skb->data; u16 len_rthdr; @@ -1330,12 +1370,12 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, skb->dev = local->mdev; - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(pkt_data, 0, sizeof(*pkt_data)); /* needed because we set skb device to master */ - pkt_data->ifindex = dev->ifindex; + info->control.ifindex = dev->ifindex; - pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; + info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + /* Interfaces should always request a status report */ + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; /* * fix up the pointers accounting for the radiotap @@ -1379,10 +1419,11 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata; int ret = 1, head_need; - u16 ethertype, hdrlen, meshhdrlen = 0, fc; + u16 ethertype, hdrlen, meshhdrlen = 0; + __le16 fc; struct ieee80211_hdr hdr; struct ieee80211s_hdr mesh_hdr; const u8 *encaps_data; @@ -1393,8 +1434,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (unlikely(skb->len < ETH_HLEN)) { - printk(KERN_DEBUG "%s: short skb (len=%d)\n", - dev->name, skb->len); ret = 0; goto fail; } @@ -1405,12 +1444,12 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, /* convert Ethernet header to proper 802.11 header (based on * operation mode) */ ethertype = (skb->data[12] << 8) | skb->data[13]; - fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); switch (sdata->vif.type) { case IEEE80211_IF_TYPE_AP: case IEEE80211_IF_TYPE_VLAN: - fc |= IEEE80211_FCTL_FROMDS; + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); /* DA BSSID SA */ memcpy(hdr.addr1, skb->data, ETH_ALEN); memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); @@ -1418,7 +1457,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, hdrlen = 24; break; case IEEE80211_IF_TYPE_WDS: - fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS; + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN); memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); @@ -1428,7 +1467,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, break; #ifdef CONFIG_MAC80211_MESH case IEEE80211_IF_TYPE_MESH_POINT: - fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS; + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ if (is_multicast_ether_addr(skb->data)) memcpy(hdr.addr1, skb->data, ETH_ALEN); @@ -1458,7 +1497,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, break; #endif case IEEE80211_IF_TYPE_STA: - fc |= IEEE80211_FCTL_TODS; + fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); @@ -1486,13 +1525,14 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, rcu_read_lock(); sta = sta_info_get(local, hdr.addr1); if (sta) - sta_flags = sta->flags; + sta_flags = get_sta_flags(sta); rcu_read_unlock(); } - /* receiver is QoS enabled, use a QoS type frame */ - if (sta_flags & WLAN_STA_WME) { - fc |= IEEE80211_STYPE_QOS_DATA; + /* receiver and we are QoS enabled, use a QoS type frame */ + if (sta_flags & WLAN_STA_WME && + ieee80211_num_regular_queues(&local->hw) >= 4) { + fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); hdrlen += 2; } @@ -1520,7 +1560,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, goto fail; } - hdr.frame_control = cpu_to_le16(fc); + hdr.frame_control = fc; hdr.duration_id = 0; hdr.seq_ctrl = 0; @@ -1555,32 +1595,26 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and * alloc_skb() (net/core/skbuff.c) */ - head_need = hdrlen + encaps_len + meshhdrlen + local->tx_headroom; - head_need -= skb_headroom(skb); + head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb); - /* We are going to modify skb data, so make a copy of it if happens to - * be cloned. This could happen, e.g., with Linux bridge code passing - * us broadcast frames. */ + /* + * So we need to modify the skb header and hence need a copy of + * that. The head_need variable above doesn't, so far, include + * the needed header space that we don't need right away. If we + * can, then we don't reallocate right now but only after the + * frame arrives at the master device (if it does...) + * + * If we cannot, however, then we will reallocate to include all + * the ever needed space. Also, if we need to reallocate it anyway, + * make it big enough for everything we may ever need. + */ if (head_need > 0 || skb_cloned(skb)) { -#if 0 - printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes " - "of headroom\n", dev->name, head_need); -#endif - - if (skb_cloned(skb)) - I802_DEBUG_INC(local->tx_expand_skb_head_cloned); - else - I802_DEBUG_INC(local->tx_expand_skb_head); - /* Since we have to reallocate the buffer, make sure that there - * is enough room for possible WEP IV/ICV and TKIP (8 bytes - * before payload and 12 after). */ - if (pskb_expand_head(skb, (head_need > 0 ? head_need + 8 : 8), - 12, GFP_ATOMIC)) { - printk(KERN_DEBUG "%s: failed to reallocate TX buffer" - "\n", dev->name); + head_need += IEEE80211_ENCRYPT_HEADROOM; + head_need += local->tx_headroom; + head_need = max_t(int, 0, head_need); + if (ieee80211_skb_resize(local, skb, head_need, true)) goto fail; - } } if (encaps_data) { @@ -1595,7 +1629,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, h_pos += meshhdrlen; } - if (fc & IEEE80211_STYPE_QOS_DATA) { + if (ieee80211_is_data_qos(fc)) { __le16 *qos_control; qos_control = (__le16*) skb_push(skb, 2); @@ -1611,11 +1645,14 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, nh_pos += hdrlen; h_pos += hdrlen; - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = dev->ifindex; + info = IEEE80211_SKB_CB(skb); + memset(info, 0, sizeof(*info)); + info->control.ifindex = dev->ifindex; if (ethertype == ETH_P_PAE) - pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME; + info->flags |= IEEE80211_TX_CTL_EAPOL_FRAME; + + /* Interfaces should always request a status report */ + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; skb->dev = local->mdev; dev->stats.tx_packets++; @@ -1640,46 +1677,55 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, return ret; } -/* helper functions for pending packets for when queues are stopped */ +/* + * ieee80211_clear_tx_pending may not be called in a context where + * it is possible that it packets could come in again. + */ void ieee80211_clear_tx_pending(struct ieee80211_local *local) { int i, j; struct ieee80211_tx_stored_packet *store; - for (i = 0; i < local->hw.queues; i++) { - if (!__ieee80211_queue_pending(local, i)) + for (i = 0; i < ieee80211_num_regular_queues(&local->hw); i++) { + if (!test_bit(i, local->queues_pending)) continue; store = &local->pending_packet[i]; kfree_skb(store->skb); for (j = 0; j < store->num_extra_frag; j++) kfree_skb(store->extra_frag[j]); kfree(store->extra_frag); - clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[i]); + clear_bit(i, local->queues_pending); } } +/* + * Transmit all pending packets. Called from tasklet, locks master device + * TX lock so that no new packets can come in. + */ void ieee80211_tx_pending(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *)data; struct net_device *dev = local->mdev; struct ieee80211_tx_stored_packet *store; struct ieee80211_tx_data tx; - int i, ret, reschedule = 0; + int i, ret; netif_tx_lock_bh(dev); - for (i = 0; i < local->hw.queues; i++) { - if (__ieee80211_queue_stopped(local, i)) + for (i = 0; i < ieee80211_num_regular_queues(&local->hw); i++) { + /* Check that this queue is ok */ + if (__netif_subqueue_stopped(local->mdev, i)) continue; - if (!__ieee80211_queue_pending(local, i)) { - reschedule = 1; + + if (!test_bit(i, local->queues_pending)) { + ieee80211_wake_queue(&local->hw, i); continue; } + store = &local->pending_packet[i]; - tx.control = &store->control; tx.extra_frag = store->extra_frag; tx.num_extra_frag = store->num_extra_frag; - tx.last_frag_rate = store->last_frag_rate; + tx.last_frag_rate_idx = store->last_frag_rate_idx; tx.flags = 0; if (store->last_frag_rate_ctrl_probe) tx.flags |= IEEE80211_TX_PROBE_LAST_FRAG; @@ -1688,19 +1734,11 @@ void ieee80211_tx_pending(unsigned long data) if (ret == IEEE80211_TX_FRAG_AGAIN) store->skb = NULL; } else { - clear_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[i]); - reschedule = 1; + clear_bit(i, local->queues_pending); + ieee80211_wake_queue(&local->hw, i); } } netif_tx_unlock_bh(dev); - if (reschedule) { - if (!ieee80211_qdisc_installed(dev)) { - if (!__ieee80211_queue_stopped(local, 0)) - netif_wake_queue(dev); - } else - netif_schedule(dev); - } } /* functions for drivers to get certain frames */ @@ -1769,23 +1807,24 @@ static void ieee80211_beacon_add_tim(struct ieee80211_local *local, } struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_tx_control *control) + struct ieee80211_vif *vif) { struct ieee80211_local *local = hw_to_local(hw); - struct sk_buff *skb; + struct sk_buff *skb = NULL; + struct ieee80211_tx_info *info; struct net_device *bdev; struct ieee80211_sub_if_data *sdata = NULL; struct ieee80211_if_ap *ap = NULL; + struct ieee80211_if_sta *ifsta = NULL; struct rate_selection rsel; struct beacon_data *beacon; struct ieee80211_supported_band *sband; struct ieee80211_mgmt *mgmt; int *num_beacons; - bool err = true; + enum ieee80211_band band = local->hw.conf.channel->band; u8 *pos; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[band]; rcu_read_lock(); @@ -1810,9 +1849,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, memcpy(skb_put(skb, beacon->head_len), beacon->head, beacon->head_len); - ieee80211_include_sequence(sdata, - (struct ieee80211_hdr *)skb->data); - /* * Not very nice, but we want to allow the driver to call * ieee80211_beacon_get() as a response to the set_tim() @@ -1835,9 +1871,24 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, beacon->tail, beacon->tail_len); num_beacons = &ap->num_beacons; + } else + goto out; + } else if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + struct ieee80211_hdr *hdr; + ifsta = &sdata->u.sta; - err = false; - } + if (!ifsta->probe_resp) + goto out; + + skb = skb_copy(ifsta->probe_resp, GFP_ATOMIC); + if (!skb) + goto out; + + hdr = (struct ieee80211_hdr *) skb->data; + hdr->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, + IEEE80211_STYPE_BEACON); + + num_beacons = &ifsta->num_beacons; } else if (ieee80211_vif_is_mesh(&sdata->vif)) { /* headroom, head length, tail length and maximum TIM length */ skb = dev_alloc_skb(local->tx_headroom + 400); @@ -1848,8 +1899,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + sizeof(mgmt->u.beacon)); memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_BEACON); + mgmt->frame_control = + cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); memset(mgmt->da, 0xff, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); /* BSSID is left zeroed, wildcard value */ @@ -1864,43 +1915,41 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, mesh_mgmt_ies_add(skb, sdata->dev); num_beacons = &sdata->u.sta.num_beacons; - - err = false; + } else { + WARN_ON(1); + goto out; } - if (err) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "no beacon data avail for %s\n", - bdev->name); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + info = IEEE80211_SKB_CB(skb); + + info->band = band; + rate_control_get_rate(local->mdev, sband, skb, &rsel); + + if (unlikely(rsel.rate_idx < 0)) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: ieee80211_beacon_get: " + "no rate found\n", + wiphy_name(local->hw.wiphy)); + } + dev_kfree_skb(skb); skb = NULL; goto out; } - if (control) { - rate_control_get_rate(local->mdev, sband, skb, &rsel); - if (!rsel.rate) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: ieee80211_beacon_get: " - "no rate found\n", - wiphy_name(local->hw.wiphy)); - } - dev_kfree_skb(skb); - skb = NULL; - goto out; - } + info->control.vif = vif; + info->tx_rate_idx = rsel.rate_idx; + + info->flags |= IEEE80211_TX_CTL_NO_ACK; + info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; + info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; + if (sdata->bss_conf.use_short_preamble && + sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) + info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE; + + info->antenna_sel_tx = local->hw.conf.antenna_sel_tx; + info->control.retry_limit = 1; - control->vif = vif; - control->tx_rate = rsel.rate; - if (sdata->bss_conf.use_short_preamble && - rsel.rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) - control->flags |= IEEE80211_TXCTL_SHORT_PREAMBLE; - control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; - control->flags |= IEEE80211_TXCTL_NO_ACK; - control->retry_limit = 1; - control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT; - } (*num_beacons)++; out: rcu_read_unlock(); @@ -1910,14 +1959,13 @@ EXPORT_SYMBOL(ieee80211_beacon_get); void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const void *frame, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl, + const struct ieee80211_tx_info *frame_txctl, struct ieee80211_rts *rts) { const struct ieee80211_hdr *hdr = frame; - u16 fctl; - fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS; - rts->frame_control = cpu_to_le16(fctl); + rts->frame_control = + cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS); rts->duration = ieee80211_rts_duration(hw, vif, frame_len, frame_txctl); memcpy(rts->ra, hdr->addr1, sizeof(rts->ra)); @@ -1927,14 +1975,13 @@ EXPORT_SYMBOL(ieee80211_rts_get); void ieee80211_ctstoself_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const void *frame, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl, + const struct ieee80211_tx_info *frame_txctl, struct ieee80211_cts *cts) { const struct ieee80211_hdr *hdr = frame; - u16 fctl; - fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS; - cts->frame_control = cpu_to_le16(fctl); + cts->frame_control = + cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS); cts->duration = ieee80211_ctstoself_duration(hw, vif, frame_len, frame_txctl); memcpy(cts->ra, hdr->addr1, sizeof(cts->ra)); @@ -1943,23 +1990,21 @@ EXPORT_SYMBOL(ieee80211_ctstoself_get); struct sk_buff * ieee80211_get_buffered_bc(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_tx_control *control) + struct ieee80211_vif *vif) { struct ieee80211_local *local = hw_to_local(hw); - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct sta_info *sta; - ieee80211_tx_handler *handler; struct ieee80211_tx_data tx; - ieee80211_tx_result res = TX_DROP; struct net_device *bdev; struct ieee80211_sub_if_data *sdata; struct ieee80211_if_ap *bss = NULL; struct beacon_data *beacon; + struct ieee80211_tx_info *info; sdata = vif_to_sdata(vif); bdev = sdata->dev; - + bss = &sdata->u.ap; if (!bss) return NULL; @@ -1967,19 +2012,16 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, rcu_read_lock(); beacon = rcu_dereference(bss->beacon); - if (sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon || - !beacon->head) { - rcu_read_unlock(); - return NULL; - } + if (sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon || !beacon->head) + goto out; if (bss->dtim_count != 0) - return NULL; /* send buffered bc/mc only after DTIM beacon */ - memset(control, 0, sizeof(*control)); + goto out; /* send buffered bc/mc only after DTIM beacon */ + while (1) { skb = skb_dequeue(&bss->ps_bc_buf); if (!skb) - return NULL; + goto out; local->total_ps_buffered--; if (!skb_queue_empty(&bss->ps_bc_buf) && skb->len >= 2) { @@ -1992,30 +2034,21 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, cpu_to_le16(IEEE80211_FCTL_MOREDATA); } - if (!ieee80211_tx_prepare(&tx, skb, local->mdev, control)) + if (!ieee80211_tx_prepare(&tx, skb, local->mdev)) break; dev_kfree_skb_any(skb); } + + info = IEEE80211_SKB_CB(skb); + sta = tx.sta; tx.flags |= IEEE80211_TX_PS_BUFFERED; tx.channel = local->hw.conf.channel; + info->band = tx.channel->band; - for (handler = ieee80211_tx_handlers; *handler != NULL; handler++) { - res = (*handler)(&tx); - if (res == TX_DROP || res == TX_QUEUED) - break; - } - skb = tx.skb; /* handlers are allowed to change skb */ - - if (res == TX_DROP) { - I802_DEBUG_INC(local->tx_handlers_drop); - dev_kfree_skb(skb); - skb = NULL; - } else if (res == TX_QUEUED) { - I802_DEBUG_INC(local->tx_handlers_queued); + if (invoke_tx_handlers(&tx)) skb = NULL; - } - + out: rcu_read_unlock(); return skb; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index cc9f715c7bfc..19f85e1b3695 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -34,49 +34,48 @@ void *mac80211_wiphy_privid = &mac80211_wiphy_privid; /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ -const unsigned char rfc1042_header[] = +const unsigned char rfc1042_header[] __aligned(2) = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; /* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ -const unsigned char bridge_tunnel_header[] = +const unsigned char bridge_tunnel_header[] __aligned(2) = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, enum ieee80211_if_types type) { - u16 fc; + __le16 fc = hdr->frame_control; /* drop ACK/CTS frames and incorrect hdr len (ctrl) */ if (len < 16) return NULL; - fc = le16_to_cpu(hdr->frame_control); - - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_DATA: + if (ieee80211_is_data(fc)) { if (len < 24) /* drop incorrect hdr len (data) */ return NULL; - switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { - case IEEE80211_FCTL_TODS: - return hdr->addr1; - case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): + + if (ieee80211_has_a4(fc)) return NULL; - case IEEE80211_FCTL_FROMDS: + if (ieee80211_has_tods(fc)) + return hdr->addr1; + if (ieee80211_has_fromds(fc)) return hdr->addr2; - case 0: - return hdr->addr3; - } - break; - case IEEE80211_FTYPE_MGMT: + + return hdr->addr3; + } + + if (ieee80211_is_mgmt(fc)) { if (len < 24) /* drop incorrect hdr len (mgmt) */ return NULL; return hdr->addr3; - case IEEE80211_FTYPE_CTL: - if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL) + } + + if (ieee80211_is_ctl(fc)) { + if(ieee80211_is_pspoll(fc)) return hdr->addr1; - else if ((fc & IEEE80211_FCTL_STYPE) == - IEEE80211_STYPE_BACK_REQ) { + + if (ieee80211_is_back_req(fc)) { switch (type) { case IEEE80211_IF_TYPE_STA: return hdr->addr2; @@ -84,11 +83,9 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, case IEEE80211_IF_TYPE_VLAN: return hdr->addr1; default: - return NULL; + break; /* fall through to the return */ } } - else - return NULL; } return NULL; @@ -133,14 +130,46 @@ int ieee80211_get_hdrlen(u16 fc) } EXPORT_SYMBOL(ieee80211_get_hdrlen); -int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) +unsigned int ieee80211_hdrlen(__le16 fc) { - const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *) skb->data; - int hdrlen; + unsigned int hdrlen = 24; + + if (ieee80211_is_data(fc)) { + if (ieee80211_has_a4(fc)) + hdrlen = 30; + if (ieee80211_is_data_qos(fc)) + hdrlen += IEEE80211_QOS_CTL_LEN; + goto out; + } + + if (ieee80211_is_ctl(fc)) { + /* + * ACK and CTS are 10 bytes, all others 16. To see how + * to get this condition consider + * subtype mask: 0b0000000011110000 (0x00F0) + * ACK subtype: 0b0000000011010000 (0x00D0) + * CTS subtype: 0b0000000011000000 (0x00C0) + * bits that matter: ^^^ (0x00E0) + * value of those: 0b0000000011000000 (0x00C0) + */ + if ((fc & cpu_to_le16(0x00E0)) == cpu_to_le16(0x00C0)) + hdrlen = 10; + else + hdrlen = 16; + } +out: + return hdrlen; +} +EXPORT_SYMBOL(ieee80211_hdrlen); + +unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) +{ + const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *)skb->data; + unsigned int hdrlen; if (unlikely(skb->len < 10)) return 0; - hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (unlikely(hdrlen > skb->len)) return 0; return hdrlen; @@ -153,15 +182,15 @@ int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) /* 7.1.3.5a.2 */ switch (ae) { case 0: - return 5; + return 6; case 1: - return 11; + return 12; case 2: - return 17; + return 18; case 3: - return 23; + return 24; default: - return 5; + return 6; } } @@ -258,7 +287,7 @@ EXPORT_SYMBOL(ieee80211_generic_frame_duration); __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, struct ieee80211_vif *vif, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl) + const struct ieee80211_tx_info *frame_txctl) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rate *rate; @@ -266,10 +295,13 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, bool short_preamble; int erp; u16 dur; + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; short_preamble = sdata->bss_conf.use_short_preamble; - rate = frame_txctl->rts_cts_rate; + rate = &sband->bitrates[frame_txctl->control.rts_cts_rate_idx]; erp = 0; if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) @@ -292,7 +324,7 @@ EXPORT_SYMBOL(ieee80211_rts_duration); __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, struct ieee80211_vif *vif, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl) + const struct ieee80211_tx_info *frame_txctl) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rate *rate; @@ -300,10 +332,13 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, bool short_preamble; int erp; u16 dur; + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; short_preamble = sdata->bss_conf.use_short_preamble; - rate = frame_txctl->rts_cts_rate; + rate = &sband->bitrates[frame_txctl->control.rts_cts_rate_idx]; erp = 0; if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) erp = rate->flags & IEEE80211_RATE_ERP_G; @@ -311,7 +346,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, /* Data frame duration */ dur = ieee80211_frame_duration(local, frame_len, rate->bitrate, erp, short_preamble); - if (!(frame_txctl->flags & IEEE80211_TXCTL_NO_ACK)) { + if (!(frame_txctl->flags & IEEE80211_TX_CTL_NO_ACK)) { /* ACK duration */ dur += ieee80211_frame_duration(local, 10, rate->bitrate, erp, short_preamble); @@ -325,17 +360,10 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue) { struct ieee80211_local *local = hw_to_local(hw); - if (test_and_clear_bit(IEEE80211_LINK_STATE_XOFF, - &local->state[queue])) { - if (test_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[queue])) - tasklet_schedule(&local->tx_pending_tasklet); - else - if (!ieee80211_qdisc_installed(local->mdev)) { - if (queue == 0) - netif_wake_queue(local->mdev); - } else - __netif_schedule(local->mdev); + if (test_bit(queue, local->queues_pending)) { + tasklet_schedule(&local->tx_pending_tasklet); + } else { + netif_wake_subqueue(local->mdev, queue); } } EXPORT_SYMBOL(ieee80211_wake_queue); @@ -344,29 +372,15 @@ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue) { struct ieee80211_local *local = hw_to_local(hw); - if (!ieee80211_qdisc_installed(local->mdev) && queue == 0) - netif_stop_queue(local->mdev); - set_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); + netif_stop_subqueue(local->mdev, queue); } EXPORT_SYMBOL(ieee80211_stop_queue); -void ieee80211_start_queues(struct ieee80211_hw *hw) -{ - struct ieee80211_local *local = hw_to_local(hw); - int i; - - for (i = 0; i < local->hw.queues; i++) - clear_bit(IEEE80211_LINK_STATE_XOFF, &local->state[i]); - if (!ieee80211_qdisc_installed(local->mdev)) - netif_start_queue(local->mdev); -} -EXPORT_SYMBOL(ieee80211_start_queues); - void ieee80211_stop_queues(struct ieee80211_hw *hw) { int i; - for (i = 0; i < hw->queues; i++) + for (i = 0; i < ieee80211_num_queues(hw); i++) ieee80211_stop_queue(hw, i); } EXPORT_SYMBOL(ieee80211_stop_queues); @@ -375,7 +389,7 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw) { int i; - for (i = 0; i < hw->queues; i++) + for (i = 0; i < hw->queues + hw->ampdu_queues; i++) ieee80211_wake_queue(hw, i); } EXPORT_SYMBOL(ieee80211_wake_queues); @@ -389,6 +403,39 @@ void ieee80211_iterate_active_interfaces( struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; + rtnl_lock(); + + list_for_each_entry(sdata, &local->interfaces, list) { + switch (sdata->vif.type) { + case IEEE80211_IF_TYPE_INVALID: + case IEEE80211_IF_TYPE_MNTR: + case IEEE80211_IF_TYPE_VLAN: + continue; + case IEEE80211_IF_TYPE_AP: + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_IBSS: + case IEEE80211_IF_TYPE_WDS: + case IEEE80211_IF_TYPE_MESH_POINT: + break; + } + if (netif_running(sdata->dev)) + iterator(data, sdata->dev->dev_addr, + &sdata->vif); + } + + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); + +void ieee80211_iterate_active_interfaces_atomic( + struct ieee80211_hw *hw, + void (*iterator)(void *data, u8 *mac, + struct ieee80211_vif *vif), + void *data) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { @@ -404,8 +451,6 @@ void ieee80211_iterate_active_interfaces( case IEEE80211_IF_TYPE_MESH_POINT: break; } - if (sdata->dev == local->mdev) - continue; if (netif_running(sdata->dev)) iterator(data, sdata->dev->dev_addr, &sdata->vif); @@ -413,4 +458,4 @@ void ieee80211_iterate_active_interfaces( rcu_read_unlock(); } -EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); +EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic); diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index affcecd78c10..872d2fcd1a5b 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -84,24 +84,17 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + unsigned int hdrlen; u8 *newhdr; - fc = le16_to_cpu(hdr->frame_control); - fc |= IEEE80211_FCTL_PROTECTED; - hdr->frame_control = cpu_to_le16(fc); + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if ((skb_headroom(skb) < WEP_IV_LEN || - skb_tailroom(skb) < WEP_ICV_LEN)) { - I802_DEBUG_INC(local->tx_expand_skb_head); - if (unlikely(pskb_expand_head(skb, WEP_IV_LEN, WEP_ICV_LEN, - GFP_ATOMIC))) - return NULL; - } + if (WARN_ON(skb_tailroom(skb) < WEP_ICV_LEN || + skb_headroom(skb) < WEP_IV_LEN)) + return NULL; - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); newhdr = skb_push(skb, WEP_IV_LEN); memmove(newhdr, newhdr + WEP_IV_LEN, hdrlen); ieee80211_wep_get_iv(local, key, newhdr + hdrlen); @@ -113,12 +106,10 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + unsigned int hdrlen; - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen); skb_pull(skb, WEP_IV_LEN); } @@ -228,17 +219,15 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, u32 klen; u8 *rc4key; u8 keyidx; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + unsigned int hdrlen; size_t len; int ret = 0; - fc = le16_to_cpu(hdr->frame_control); - if (!(fc & IEEE80211_FCTL_PROTECTED)) + if (!ieee80211_has_protected(hdr->frame_control)) return -1; - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (skb->len < 8 + hdrlen) return -1; @@ -264,11 +253,8 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen, skb->data + hdrlen + WEP_IV_LEN, - len)) { - if (net_ratelimit()) - printk(KERN_DEBUG "WEP decrypt failed (ICV)\n"); + len)) ret = -1; - } kfree(rc4key); @@ -285,17 +271,15 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + unsigned int hdrlen; u8 *ivpos; u32 iv; - fc = le16_to_cpu(hdr->frame_control); - if (!(fc & IEEE80211_FCTL_PROTECTED)) + if (!ieee80211_has_protected(hdr->frame_control)) return NULL; - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); ivpos = skb->data + hdrlen; iv = (ivpos[0] << 16) | (ivpos[1] << 8) | ivpos[2]; @@ -314,14 +298,8 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (!(rx->status->flag & RX_FLAG_DECRYPTED)) { - if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) { -#ifdef CONFIG_MAC80211_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "%s: RX WEP frame, decrypt " - "failed\n", rx->dev->name); -#endif /* CONFIG_MAC80211_DEBUG */ + if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) return RX_DROP_UNUSABLE; - } } else if (!(rx->status->flag & RX_FLAG_IV_STRIPPED)) { ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ @@ -333,11 +311,16 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + info->control.iv_len = WEP_IV_LEN; + info->control.icv_len = WEP_ICV_LEN; + if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { if (ieee80211_wep_encrypt(tx->local, skb, tx->key)) return -1; } else { - tx->control->key_idx = tx->key->conf.hw_key_idx; + info->control.hw_key = &tx->key->conf; if (tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) { if (!ieee80211_wep_add_iv(tx->local, skb, tx->key)) return -1; @@ -349,8 +332,6 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) ieee80211_tx_result ieee80211_crypto_wep_encrypt(struct ieee80211_tx_data *tx) { - tx->control->iv_len = WEP_IV_LEN; - tx->control->icv_len = WEP_ICV_LEN; ieee80211_tx_set_protected(tx); if (wep_encrypt_skb(tx, tx->skb) < 0) { diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h index 363779c50658..e587172115b8 100644 --- a/net/mac80211/wep.h +++ b/net/mac80211/wep.h @@ -26,7 +26,7 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key); int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key); -u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key); +u8 *ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key); ieee80211_rx_result ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx); diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 76e1de1dc735..34fa8ed1e784 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -95,6 +95,13 @@ static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, } } + if (alg == ALG_WEP && + key_len != LEN_WEP40 && key_len != LEN_WEP104) { + ieee80211_key_free(key); + err = -EINVAL; + goto out_unlock; + } + ieee80211_key_link(key, sdata, sta); if (set_tx_key || (!sta && !sdata->default_key && key)) @@ -135,7 +142,39 @@ static int ieee80211_ioctl_giwname(struct net_device *dev, struct iw_request_info *info, char *name, char *extra) { + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_supported_band *sband; + u8 is_ht = 0, is_a = 0, is_b = 0, is_g = 0; + + + sband = local->hw.wiphy->bands[IEEE80211_BAND_5GHZ]; + if (sband) { + is_a = 1; + is_ht |= sband->ht_info.ht_supported; + } + + sband = local->hw.wiphy->bands[IEEE80211_BAND_2GHZ]; + if (sband) { + int i; + /* Check for mandatory rates */ + for (i = 0; i < sband->n_bitrates; i++) { + if (sband->bitrates[i].bitrate == 10) + is_b = 1; + if (sband->bitrates[i].bitrate == 60) + is_g = 1; + } + is_ht |= sband->ht_info.ht_supported; + } + strcpy(name, "IEEE 802.11"); + if (is_a) + strcat(name, "a"); + if (is_b) + strcat(name, "b"); + if (is_g) + strcat(name, "g"); + if (is_ht) + strcat(name, "n"); return 0; } @@ -169,14 +208,26 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, range->num_encoding_sizes = 2; range->max_encoding_tokens = NUM_DEFAULT_KEYS; - range->max_qual.qual = local->hw.max_signal; - range->max_qual.level = local->hw.max_rssi; - range->max_qual.noise = local->hw.max_noise; + if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC || + local->hw.flags & IEEE80211_HW_SIGNAL_DB) + range->max_qual.level = local->hw.max_signal; + else if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) + range->max_qual.level = -110; + else + range->max_qual.level = 0; + + if (local->hw.flags & IEEE80211_HW_NOISE_DBM) + range->max_qual.noise = -110; + else + range->max_qual.noise = 0; + + range->max_qual.qual = 100; range->max_qual.updated = local->wstats_flags; - range->avg_qual.qual = local->hw.max_signal/2; - range->avg_qual.level = 0; - range->avg_qual.noise = 0; + range->avg_qual.qual = 50; + /* not always true but better than nothing */ + range->avg_qual.level = range->max_qual.level / 2; + range->avg_qual.noise = range->max_qual.noise / 2; range->avg_qual.updated = local->wstats_flags; range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | @@ -209,7 +260,6 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, range->num_frequency = c; IW_EVENT_CAPA_SET_KERNEL(range->event_capa); - IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWTHRSPY); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); @@ -246,15 +296,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev, return -EINVAL; } - if (type == sdata->vif.type) - return 0; - if (netif_running(dev)) - return -EBUSY; - - ieee80211_if_reinit(dev); - ieee80211_if_set_type(dev, type); - - return 0; + return ieee80211_if_change_type(sdata, type); } @@ -291,14 +333,22 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev, return 0; } -int ieee80211_set_freq(struct ieee80211_local *local, int freqMHz) +int ieee80211_set_freq(struct net_device *dev, int freqMHz) { int ret = -EINVAL; struct ieee80211_channel *chan; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { + if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && + chan->flags & IEEE80211_CHAN_NO_IBSS) { + printk(KERN_DEBUG "%s: IBSS not allowed on frequency " + "%d MHz\n", dev->name, chan->center_freq); + return ret; + } local->oper_channel = chan; if (local->sta_sw_scanning || local->sta_hw_scanning) @@ -316,7 +366,6 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *freq, char *extra) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type == IEEE80211_IF_TYPE_STA) @@ -330,14 +379,14 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; } else - return ieee80211_set_freq(local, + return ieee80211_set_freq(dev, ieee80211_channel_to_frequency(freq->m)); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) div /= 10; if (div > 0) - return ieee80211_set_freq(local, freq->m / div); + return ieee80211_set_freq(dev, freq->m / div); else return -EINVAL; } @@ -395,7 +444,7 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, memset(sdata->u.ap.ssid + len, 0, IEEE80211_MAX_SSID_LEN - len); sdata->u.ap.ssid_len = len; - return ieee80211_if_config(dev); + return ieee80211_if_config(sdata, IEEE80211_IFCC_SSID); } return -EOPNOTSUPP; } @@ -490,9 +539,15 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type == IEEE80211_IF_TYPE_STA || sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - ap_addr->sa_family = ARPHRD_ETHER; - memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); - return 0; + if (sdata->u.sta.state == IEEE80211_ASSOCIATED || + sdata->u.sta.state == IEEE80211_IBSS_JOINED) { + ap_addr->sa_family = ARPHRD_ETHER; + memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); + return 0; + } else { + memset(&ap_addr->sa_data, 0, ETH_ALEN); + return 0; + } } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { ap_addr->sa_family = ARPHRD_ETHER; memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); @@ -543,7 +598,7 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev, if (local->sta_sw_scanning || local->sta_hw_scanning) return -EAGAIN; - res = ieee80211_sta_scan_results(dev, extra, data->length); + res = ieee80211_sta_scan_results(dev, info, extra, data->length); if (res >= 0) { data->length = res; return 0; @@ -564,16 +619,14 @@ static int ieee80211_ioctl_siwrate(struct net_device *dev, struct ieee80211_supported_band *sband; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (!sdata->bss) - return -ENODEV; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; /* target_rate = -1, rate->fixed = 0 means auto only, so use all rates * target_rate = X, rate->fixed = 1 means only rate X * target_rate = X, rate->fixed = 0 means all rates <= X */ - sdata->bss->max_ratectrl_rateidx = -1; - sdata->bss->force_unicast_rateidx = -1; + sdata->max_ratectrl_rateidx = -1; + sdata->force_unicast_rateidx = -1; if (rate->value < 0) return 0; @@ -582,9 +635,9 @@ static int ieee80211_ioctl_siwrate(struct net_device *dev, int this_rate = brate->bitrate; if (target_rate == this_rate) { - sdata->bss->max_ratectrl_rateidx = i; + sdata->max_ratectrl_rateidx = i; if (rate->fixed) - sdata->bss->force_unicast_rateidx = i; + sdata->force_unicast_rateidx = i; err = 0; break; } @@ -697,6 +750,9 @@ static int ieee80211_ioctl_siwrts(struct net_device *dev, if (rts->disabled) local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; + else if (!rts->fixed) + /* if the rts value is not fixed, then take default */ + local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; else if (rts->value < 0 || rts->value > IEEE80211_MAX_RTS_THRESHOLD) return -EINVAL; else @@ -734,6 +790,8 @@ static int ieee80211_ioctl_siwfrag(struct net_device *dev, if (frag->disabled) local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; + else if (!frag->fixed) + local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; else if (frag->value < 256 || frag->value > IEEE80211_MAX_FRAG_THRESHOLD) return -EINVAL; @@ -925,6 +983,58 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev, erq->length = sdata->keys[idx]->conf.keylen; erq->flags |= IW_ENCODE_ENABLED; + if (sdata->vif.type == IEEE80211_IF_TYPE_STA) { + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + switch (ifsta->auth_alg) { + case WLAN_AUTH_OPEN: + case WLAN_AUTH_LEAP: + erq->flags |= IW_ENCODE_OPEN; + break; + case WLAN_AUTH_SHARED_KEY: + erq->flags |= IW_ENCODE_RESTRICTED; + break; + } + } + + return 0; +} + +static int ieee80211_ioctl_siwpower(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *wrq, + char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_conf *conf = &local->hw.conf; + + if (wrq->disabled) { + conf->flags &= ~IEEE80211_CONF_PS; + return ieee80211_hw_config(local); + } + + switch (wrq->flags & IW_POWER_MODE) { + case IW_POWER_ON: /* If not specified */ + case IW_POWER_MODE: /* If set all mask */ + case IW_POWER_ALL_R: /* If explicitely state all */ + conf->flags |= IEEE80211_CONF_PS; + break; + default: /* Otherwise we don't support it */ + return -EINVAL; + } + + return ieee80211_hw_config(local); +} + +static int ieee80211_ioctl_giwpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, + char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_conf *conf = &local->hw.conf; + + wrqu->power.disabled = !(conf->flags & IEEE80211_CONF_PS); + return 0; } @@ -996,8 +1106,8 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev wstats->qual.noise = 0; wstats->qual.updated = IW_QUAL_ALL_INVALID; } else { - wstats->qual.level = sta->last_rssi; - wstats->qual.qual = sta->last_signal; + wstats->qual.level = sta->last_signal; + wstats->qual.qual = sta->last_qual; wstats->qual.noise = sta->last_noise; wstats->qual.updated = local->wstats_flags; } @@ -1130,8 +1240,8 @@ static const iw_handler ieee80211_handler[] = (iw_handler) ieee80211_ioctl_giwretry, /* SIOCGIWRETRY */ (iw_handler) ieee80211_ioctl_siwencode, /* SIOCSIWENCODE */ (iw_handler) ieee80211_ioctl_giwencode, /* SIOCGIWENCODE */ - (iw_handler) NULL, /* SIOCSIWPOWER */ - (iw_handler) NULL, /* SIOCGIWPOWER */ + (iw_handler) ieee80211_ioctl_siwpower, /* SIOCSIWPOWER */ + (iw_handler) ieee80211_ioctl_giwpower, /* SIOCGIWPOWER */ (iw_handler) NULL, /* -- hole -- */ (iw_handler) NULL, /* -- hole -- */ (iw_handler) ieee80211_ioctl_siwgenie, /* SIOCSIWGENIE */ diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 64faa3dc488f..07edda0b8a5c 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -18,61 +18,42 @@ #include "ieee80211_i.h" #include "wme.h" -/* maximum number of hardware queues we support. */ -#define TC_80211_MAX_QUEUES 16 - +/* Default mapping in classifier to work with default + * queue setup. + */ const int ieee802_1d_to_ac[8] = { 2, 3, 3, 2, 1, 1, 0, 0 }; -struct ieee80211_sched_data -{ - unsigned long qdisc_pool[BITS_TO_LONGS(TC_80211_MAX_QUEUES)]; - struct tcf_proto *filter_list; - struct Qdisc *queues[TC_80211_MAX_QUEUES]; - struct sk_buff_head requeued[TC_80211_MAX_QUEUES]; -}; - static const char llc_ip_hdr[8] = {0xAA, 0xAA, 0x3, 0, 0, 0, 0x08, 0}; -/* given a data frame determine the 802.1p/1d tag to use */ -static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd) +/* Given a data frame determine the 802.1p/1d tag to use. */ +static unsigned int classify_1d(struct sk_buff *skb) { - struct iphdr *ip; - int dscp; - int offset; - - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct tcf_result res = { -1, 0 }; - - /* if there is a user set filter list, call out to that */ - if (q->filter_list) { - tc_classify(skb, q->filter_list, &res); - if (res.class != -1) - return res.class; - } + unsigned int dscp; /* skb->priority values from 256->263 are magic values to - * directly indicate a specific 802.1d priority. - * This is used to allow 802.1d priority to be passed directly in - * from VLAN tags, etc. */ + * directly indicate a specific 802.1d priority. This is used + * to allow 802.1d priority to be passed directly in from VLAN + * tags, etc. + */ if (skb->priority >= 256 && skb->priority <= 263) return skb->priority - 256; - /* check there is a valid IP header present */ - offset = ieee80211_get_hdrlen_from_skb(skb); - if (skb->len < offset + sizeof(llc_ip_hdr) + sizeof(*ip) || - memcmp(skb->data + offset, llc_ip_hdr, sizeof(llc_ip_hdr))) - return 0; + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + dscp = ip_hdr(skb)->tos & 0xfc; + break; - ip = (struct iphdr *) (skb->data + offset + sizeof(llc_ip_hdr)); + default: + return 0; + } - dscp = ip->tos & 0xfc; if (dscp & 0x1c) return 0; return dscp >> 5; } -static inline int wme_downgrade_ac(struct sk_buff *skb) +static int wme_downgrade_ac(struct sk_buff *skb) { switch (skb->priority) { case 6: @@ -93,43 +74,38 @@ static inline int wme_downgrade_ac(struct sk_buff *skb) } -/* positive return value indicates which queue to use - * negative return value indicates to drop the frame */ -static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd) +/* Indicate which queue to use. */ +static u16 classify80211(struct sk_buff *skb, struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - unsigned short fc = le16_to_cpu(hdr->frame_control); - int qos; - /* see if frame is data or non data frame */ - if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) { + if (!ieee80211_is_data(hdr->frame_control)) { /* management frames go on AC_VO queue, but are sent * without QoS control fields */ - return IEEE80211_TX_QUEUE_DATA0; + return 0; } if (0 /* injected */) { /* use AC from radiotap */ } - /* is this a QoS frame? */ - qos = fc & IEEE80211_STYPE_QOS_DATA; - - if (!qos) { + if (!ieee80211_is_data_qos(hdr->frame_control)) { skb->priority = 0; /* required for correct WPA/11i MIC */ return ieee802_1d_to_ac[skb->priority]; } /* use the data classifier to determine what 802.1d tag the * data frame has */ - skb->priority = classify_1d(skb, qd); + skb->priority = classify_1d(skb); /* in case we are a client verify acm is not set for this ac */ while (unlikely(local->wmm_acm & BIT(skb->priority))) { if (wme_downgrade_ac(skb)) { - /* No AC with lower priority has acm=0, drop packet. */ - return -1; + /* The old code would drop the packet in this + * case. + */ + return 0; } } @@ -137,55 +113,52 @@ static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd) return ieee802_1d_to_ac[skb->priority]; } - -static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd) +u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_tx_packet_data *pkt_data = - (struct ieee80211_tx_packet_data *) skb->cb; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - unsigned short fc = le16_to_cpu(hdr->frame_control); - struct Qdisc *qdisc; - int err, queue; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct sta_info *sta; + u16 queue; u8 tid; - if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) { - queue = pkt_data->queue; + queue = classify80211(skb, dev); + if (unlikely(queue >= local->hw.queues)) + queue = local->hw.queues - 1; + + if (info->flags & IEEE80211_TX_CTL_REQUEUE) { rcu_read_lock(); sta = sta_info_get(local, hdr->addr1); - tid = skb->priority & QOS_CONTROL_TAG1D_MASK; + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; if (sta) { + struct ieee80211_hw *hw = &local->hw; int ampdu_queue = sta->tid_to_tx_q[tid]; - if ((ampdu_queue < local->hw.queues) && - test_bit(ampdu_queue, q->qdisc_pool)) { + + if ((ampdu_queue < ieee80211_num_queues(hw)) && + test_bit(ampdu_queue, local->queue_pool)) { queue = ampdu_queue; - pkt_data->flags |= IEEE80211_TXPD_AMPDU; + info->flags |= IEEE80211_TX_CTL_AMPDU; } else { - pkt_data->flags &= ~IEEE80211_TXPD_AMPDU; + info->flags &= ~IEEE80211_TX_CTL_AMPDU; } } rcu_read_unlock(); - skb_queue_tail(&q->requeued[queue], skb); - qd->q.qlen++; - return 0; - } - queue = classify80211(skb, qd); + return queue; + } - /* now we know the 1d priority, fill in the QoS header if there is one + /* Now we know the 1d priority, fill in the QoS header if + * there is one. */ - if (WLAN_FC_IS_QOS_DATA(fc)) { - u8 *p = skb->data + ieee80211_get_hdrlen(fc) - 2; + if (ieee80211_is_data_qos(hdr->frame_control)) { + u8 *p = ieee80211_get_qos_ctl(hdr); u8 ack_policy = 0; - tid = skb->priority & QOS_CONTROL_TAG1D_MASK; + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; if (local->wifi_wme_noack_test) ack_policy |= QOS_CONTROL_ACK_POLICY_NOACK << QOS_CONTROL_ACK_POLICY_SHIFT; /* qos header is 2 bytes, second reserved */ - *p = ack_policy | tid; - p++; + *p++ = ack_policy | tid; *p = 0; rcu_read_lock(); @@ -193,475 +166,37 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd) sta = sta_info_get(local, hdr->addr1); if (sta) { int ampdu_queue = sta->tid_to_tx_q[tid]; - if ((ampdu_queue < local->hw.queues) && - test_bit(ampdu_queue, q->qdisc_pool)) { + struct ieee80211_hw *hw = &local->hw; + + if ((ampdu_queue < ieee80211_num_queues(hw)) && + test_bit(ampdu_queue, local->queue_pool)) { queue = ampdu_queue; - pkt_data->flags |= IEEE80211_TXPD_AMPDU; + info->flags |= IEEE80211_TX_CTL_AMPDU; } else { - pkt_data->flags &= ~IEEE80211_TXPD_AMPDU; + info->flags &= ~IEEE80211_TX_CTL_AMPDU; } } rcu_read_unlock(); } - if (unlikely(queue >= local->hw.queues)) { -#if 0 - if (net_ratelimit()) { - printk(KERN_DEBUG "%s - queue=%d (hw does not " - "support) -> %d\n", - __func__, queue, local->hw.queues - 1); - } -#endif - queue = local->hw.queues - 1; - } - - if (unlikely(queue < 0)) { - kfree_skb(skb); - err = NET_XMIT_DROP; - } else { - tid = skb->priority & QOS_CONTROL_TAG1D_MASK; - pkt_data->queue = (unsigned int) queue; - qdisc = q->queues[queue]; - err = qdisc->enqueue(skb, qdisc); - if (err == NET_XMIT_SUCCESS) { - qd->q.qlen++; - qd->bstats.bytes += skb->len; - qd->bstats.packets++; - return NET_XMIT_SUCCESS; - } - } - qd->qstats.drops++; - return err; -} - - -/* TODO: clean up the cases where master_hard_start_xmit - * returns non 0 - it shouldn't ever do that. Once done we - * can remove this function */ -static int wme_qdiscop_requeue(struct sk_buff *skb, struct Qdisc* qd) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_tx_packet_data *pkt_data = - (struct ieee80211_tx_packet_data *) skb->cb; - struct Qdisc *qdisc; - int err; - - /* we recorded which queue to use earlier! */ - qdisc = q->queues[pkt_data->queue]; - - if ((err = qdisc->ops->requeue(skb, qdisc)) == 0) { - qd->q.qlen++; - return 0; - } - qd->qstats.drops++; - return err; -} - - -static struct sk_buff *wme_qdiscop_dequeue(struct Qdisc* qd) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct net_device *dev = qd->dev; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - struct sk_buff *skb; - struct Qdisc *qdisc; - int queue; - - /* check all the h/w queues in numeric/priority order */ - for (queue = 0; queue < hw->queues; queue++) { - /* see if there is room in this hardware queue */ - if ((test_bit(IEEE80211_LINK_STATE_XOFF, - &local->state[queue])) || - (test_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[queue])) || - (!test_bit(queue, q->qdisc_pool))) - continue; - - /* there is space - try and get a frame */ - skb = skb_dequeue(&q->requeued[queue]); - if (skb) { - qd->q.qlen--; - return skb; - } - - qdisc = q->queues[queue]; - skb = qdisc->dequeue(qdisc); - if (skb) { - qd->q.qlen--; - return skb; - } - } - /* returning a NULL here when all the h/w queues are full means we - * never need to call netif_stop_queue in the driver */ - return NULL; -} - - -static void wme_qdiscop_reset(struct Qdisc* qd) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - int queue; - - /* QUESTION: should we have some hardware flush functionality here? */ - - for (queue = 0; queue < hw->queues; queue++) { - skb_queue_purge(&q->requeued[queue]); - qdisc_reset(q->queues[queue]); - } - qd->q.qlen = 0; -} - - -static void wme_qdiscop_destroy(struct Qdisc* qd) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - int queue; - - tcf_destroy_chain(q->filter_list); - q->filter_list = NULL; - - for (queue=0; queue < hw->queues; queue++) { - skb_queue_purge(&q->requeued[queue]); - qdisc_destroy(q->queues[queue]); - q->queues[queue] = &noop_qdisc; - } -} - - -/* called whenever parameters are updated on existing qdisc */ -static int wme_qdiscop_tune(struct Qdisc *qd, struct nlattr *opt) -{ -/* struct ieee80211_sched_data *q = qdisc_priv(qd); -*/ - /* check our options block is the right size */ - /* copy any options to our local structure */ -/* Ignore options block for now - always use static mapping - struct tc_ieee80211_qopt *qopt = nla_data(opt); - - if (opt->nla_len < nla_attr_size(sizeof(*qopt))) - return -EINVAL; - memcpy(q->tag2queue, qopt->tag2queue, sizeof(qopt->tag2queue)); -*/ - return 0; -} - - -/* called during initial creation of qdisc on device */ -static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct net_device *dev = qd->dev; - struct ieee80211_local *local; - int queues; - int err = 0, i; - - /* check that device is a mac80211 device */ - if (!dev->ieee80211_ptr || - dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid) - return -EINVAL; - - /* check this device is an ieee80211 master type device */ - if (dev->type != ARPHRD_IEEE80211) - return -EINVAL; - - /* check that there is no qdisc currently attached to device - * this ensures that we will be the root qdisc. (I can't find a better - * way to test this explicitly) */ - if (dev->qdisc_sleeping != &noop_qdisc) - return -EINVAL; - - if (qd->flags & TCQ_F_INGRESS) - return -EINVAL; - - local = wdev_priv(dev->ieee80211_ptr); - queues = local->hw.queues; - - /* if options were passed in, set them */ - if (opt) { - err = wme_qdiscop_tune(qd, opt); - } - - /* create child queues */ - for (i = 0; i < queues; i++) { - skb_queue_head_init(&q->requeued[i]); - q->queues[i] = qdisc_create_dflt(qd->dev, &pfifo_qdisc_ops, - qd->handle); - if (!q->queues[i]) { - q->queues[i] = &noop_qdisc; - printk(KERN_ERR "%s child qdisc %i creation failed", dev->name, i); - } - } - - /* reserve all legacy QoS queues */ - for (i = 0; i < min(IEEE80211_TX_QUEUE_DATA4, queues); i++) - set_bit(i, q->qdisc_pool); - - return err; -} - -static int wme_qdiscop_dump(struct Qdisc *qd, struct sk_buff *skb) -{ -/* struct ieee80211_sched_data *q = qdisc_priv(qd); - unsigned char *p = skb->tail; - struct tc_ieee80211_qopt opt; - - memcpy(&opt.tag2queue, q->tag2queue, TC_80211_MAX_TAG + 1); - NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); -*/ return skb->len; -/* -nla_put_failure: - skb_trim(skb, p - skb->data);*/ - return -1; -} - - -static int wme_classop_graft(struct Qdisc *qd, unsigned long arg, - struct Qdisc *new, struct Qdisc **old) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - unsigned long queue = arg - 1; - - if (queue >= hw->queues) - return -EINVAL; - - if (!new) - new = &noop_qdisc; - - sch_tree_lock(qd); - *old = q->queues[queue]; - q->queues[queue] = new; - qdisc_reset(*old); - sch_tree_unlock(qd); - - return 0; -} - - -static struct Qdisc * -wme_classop_leaf(struct Qdisc *qd, unsigned long arg) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - unsigned long queue = arg - 1; - - if (queue >= hw->queues) - return NULL; - - return q->queues[queue]; -} - - -static unsigned long wme_classop_get(struct Qdisc *qd, u32 classid) -{ - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - unsigned long queue = TC_H_MIN(classid); - - if (queue - 1 >= hw->queues) - return 0; - return queue; } - -static unsigned long wme_classop_bind(struct Qdisc *qd, unsigned long parent, - u32 classid) -{ - return wme_classop_get(qd, classid); -} - - -static void wme_classop_put(struct Qdisc *q, unsigned long cl) -{ -} - - -static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent, - struct nlattr **tca, unsigned long *arg) -{ - unsigned long cl = *arg; - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - - if (cl - 1 > hw->queues) - return -ENOENT; - - /* TODO: put code to program hardware queue parameters here, - * to allow programming from tc command line */ - - return 0; -} - - -/* we don't support deleting hardware queues - * when we add WMM-SA support - TSPECs may be deleted here */ -static int wme_classop_delete(struct Qdisc *qd, unsigned long cl) -{ - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - - if (cl - 1 > hw->queues) - return -ENOENT; - return 0; -} - - -static int wme_classop_dump_class(struct Qdisc *qd, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - - if (cl - 1 > hw->queues) - return -ENOENT; - tcm->tcm_handle = TC_H_MIN(cl); - tcm->tcm_parent = qd->handle; - tcm->tcm_info = q->queues[cl-1]->handle; /* do we need this? */ - return 0; -} - - -static void wme_classop_walk(struct Qdisc *qd, struct qdisc_walker *arg) -{ - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - int queue; - - if (arg->stop) - return; - - for (queue = 0; queue < hw->queues; queue++) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - /* we should return classids for our internal queues here - * as well as the external ones */ - if (arg->fn(qd, queue+1, arg) < 0) { - arg->stop = 1; - break; - } - arg->count++; - } -} - - -static struct tcf_proto ** wme_classop_find_tcf(struct Qdisc *qd, - unsigned long cl) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - - if (cl) - return NULL; - - return &q->filter_list; -} - - -/* this qdisc is classful (i.e. has classes, some of which may have leaf qdiscs attached) - * - these are the operations on the classes */ -static const struct Qdisc_class_ops class_ops = -{ - .graft = wme_classop_graft, - .leaf = wme_classop_leaf, - - .get = wme_classop_get, - .put = wme_classop_put, - .change = wme_classop_change, - .delete = wme_classop_delete, - .walk = wme_classop_walk, - - .tcf_chain = wme_classop_find_tcf, - .bind_tcf = wme_classop_bind, - .unbind_tcf = wme_classop_put, - - .dump = wme_classop_dump_class, -}; - - -/* queueing discipline operations */ -static struct Qdisc_ops wme_qdisc_ops __read_mostly = -{ - .next = NULL, - .cl_ops = &class_ops, - .id = "ieee80211", - .priv_size = sizeof(struct ieee80211_sched_data), - - .enqueue = wme_qdiscop_enqueue, - .dequeue = wme_qdiscop_dequeue, - .requeue = wme_qdiscop_requeue, - .drop = NULL, /* drop not needed since we are always the root qdisc */ - - .init = wme_qdiscop_init, - .reset = wme_qdiscop_reset, - .destroy = wme_qdiscop_destroy, - .change = wme_qdiscop_tune, - - .dump = wme_qdiscop_dump, -}; - - -void ieee80211_install_qdisc(struct net_device *dev) -{ - struct Qdisc *qdisc; - - qdisc = qdisc_create_dflt(dev, &wme_qdisc_ops, TC_H_ROOT); - if (!qdisc) { - printk(KERN_ERR "%s: qdisc installation failed\n", dev->name); - return; - } - - /* same handle as would be allocated by qdisc_alloc_handle() */ - qdisc->handle = 0x80010000; - - qdisc_lock_tree(dev); - list_add_tail(&qdisc->list, &dev->qdisc_list); - dev->qdisc_sleeping = qdisc; - qdisc_unlock_tree(dev); -} - - -int ieee80211_qdisc_installed(struct net_device *dev) -{ - return dev->qdisc_sleeping->ops == &wme_qdisc_ops; -} - - -int ieee80211_wme_register(void) -{ - return register_qdisc(&wme_qdisc_ops); -} - - -void ieee80211_wme_unregister(void) -{ - unregister_qdisc(&wme_qdisc_ops); -} - int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, - struct sta_info *sta, u16 tid) + struct sta_info *sta, u16 tid) { int i; - struct ieee80211_sched_data *q = - qdisc_priv(local->mdev->qdisc_sleeping); - DECLARE_MAC_BUF(mac); /* prepare the filter and save it for the SW queue - * matching the recieved HW queue */ + * matching the received HW queue */ + + if (!local->hw.ampdu_queues) + return -EPERM; /* try to get a Qdisc from the pool */ - for (i = IEEE80211_TX_QUEUE_BEACON; i < local->hw.queues; i++) - if (!test_and_set_bit(i, q->qdisc_pool)) { + for (i = local->hw.queues; i < ieee80211_num_queues(&local->hw); i++) + if (!test_and_set_bit(i, local->queue_pool)) { ieee80211_stop_queue(local_to_hw(local), i); sta->tid_to_tx_q[tid] = i; @@ -670,11 +205,13 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, * on the previous queue * since HT is strict in order */ #ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) + if (net_ratelimit()) { + DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "allocated aggregation queue" - " %d tid %d addr %s pool=0x%lX", + " %d tid %d addr %s pool=0x%lX\n", i, tid, print_mac(mac, sta->addr), - q->qdisc_pool[0]); + local->queue_pool[0]); + } #endif /* CONFIG_MAC80211_HT_DEBUG */ return 0; } @@ -683,44 +220,79 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, } /** - * the caller needs to hold local->mdev->queue_lock + * the caller needs to hold netdev_get_tx_queue(local->mdev, X)->lock */ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, struct sta_info *sta, u16 tid, u8 requeue) { - struct ieee80211_sched_data *q = - qdisc_priv(local->mdev->qdisc_sleeping); int agg_queue = sta->tid_to_tx_q[tid]; + struct ieee80211_hw *hw = &local->hw; /* return the qdisc to the pool */ - clear_bit(agg_queue, q->qdisc_pool); - sta->tid_to_tx_q[tid] = local->hw.queues; + clear_bit(agg_queue, local->queue_pool); + sta->tid_to_tx_q[tid] = ieee80211_num_queues(hw); - if (requeue) + if (requeue) { ieee80211_requeue(local, agg_queue); - else - q->queues[agg_queue]->ops->reset(q->queues[agg_queue]); + } else { + struct netdev_queue *txq; + spinlock_t *root_lock; + + txq = netdev_get_tx_queue(local->mdev, agg_queue); + root_lock = qdisc_root_lock(txq->qdisc); + + spin_lock_bh(root_lock); + qdisc_reset(txq->qdisc); + spin_unlock_bh(root_lock); + } } void ieee80211_requeue(struct ieee80211_local *local, int queue) { - struct Qdisc *root_qd = local->mdev->qdisc_sleeping; - struct ieee80211_sched_data *q = qdisc_priv(root_qd); - struct Qdisc *qdisc = q->queues[queue]; - struct sk_buff *skb = NULL; + struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, queue); + struct sk_buff_head list; + spinlock_t *root_lock; + struct Qdisc *qdisc; u32 len; + rcu_read_lock_bh(); + + qdisc = rcu_dereference(txq->qdisc); if (!qdisc || !qdisc->dequeue) - return; + goto out_unlock; + + skb_queue_head_init(&list); - printk(KERN_DEBUG "requeue: qlen = %d\n", qdisc->q.qlen); + root_lock = qdisc_root_lock(qdisc); + spin_lock(root_lock); for (len = qdisc->q.qlen; len > 0; len--) { - skb = qdisc->dequeue(qdisc); - root_qd->q.qlen--; - /* packet will be classified again and */ - /* skb->packet_data->queue will be overridden if needed */ + struct sk_buff *skb = qdisc->dequeue(qdisc); + if (skb) - wme_qdiscop_enqueue(skb, root_qd); + __skb_queue_tail(&list, skb); + } + spin_unlock(root_lock); + + for (len = list.qlen; len > 0; len--) { + struct sk_buff *skb = __skb_dequeue(&list); + u16 new_queue; + + BUG_ON(!skb); + new_queue = ieee80211_select_queue(local->mdev, skb); + skb_set_queue_mapping(skb, new_queue); + + txq = netdev_get_tx_queue(local->mdev, new_queue); + + + qdisc = rcu_dereference(txq->qdisc); + root_lock = qdisc_root_lock(qdisc); + + spin_lock(root_lock); + qdisc_enqueue_root(skb, qdisc); + spin_unlock(root_lock); } + +out_unlock: + rcu_read_unlock_bh(); } diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h index fcc6b05508cc..04de28c071a6 100644 --- a/net/mac80211/wme.h +++ b/net/mac80211/wme.h @@ -19,57 +19,16 @@ #define QOS_CONTROL_ACK_POLICY_NORMAL 0 #define QOS_CONTROL_ACK_POLICY_NOACK 1 -#define QOS_CONTROL_TID_MASK 0x0f #define QOS_CONTROL_ACK_POLICY_SHIFT 5 -#define QOS_CONTROL_TAG1D_MASK 0x07 - extern const int ieee802_1d_to_ac[8]; -static inline int WLAN_FC_IS_QOS_DATA(u16 fc) -{ - return (fc & 0x8C) == 0x88; -} - -#ifdef CONFIG_NET_SCHED -void ieee80211_install_qdisc(struct net_device *dev); -int ieee80211_qdisc_installed(struct net_device *dev); +u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb); int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, struct sta_info *sta, u16 tid); void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, struct sta_info *sta, u16 tid, u8 requeue); void ieee80211_requeue(struct ieee80211_local *local, int queue); -int ieee80211_wme_register(void); -void ieee80211_wme_unregister(void); -#else -static inline void ieee80211_install_qdisc(struct net_device *dev) -{ -} -static inline int ieee80211_qdisc_installed(struct net_device *dev) -{ - return 0; -} -static inline int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, - struct sta_info *sta, u16 tid) -{ - return -EAGAIN; -} -static inline void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, - struct sta_info *sta, u16 tid, - u8 requeue) -{ -} -static inline void ieee80211_requeue(struct ieee80211_local *local, int queue) -{ -} -static inline int ieee80211_wme_register(void) -{ - return 0; -} -static inline void ieee80211_wme_unregister(void) -{ -} -#endif /* CONFIG_NET_SCHED */ #endif /* _WME_H */ diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 45709ada8fee..2f33df0dcccf 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -11,6 +11,8 @@ #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/compiler.h> +#include <linux/ieee80211.h> +#include <asm/unaligned.h> #include <net/mac80211.h> #include "ieee80211_i.h" @@ -19,76 +21,30 @@ #include "aes_ccm.h" #include "wpa.h" -static int ieee80211_get_hdr_info(const struct sk_buff *skb, u8 **sa, u8 **da, - u8 *qos_tid, u8 **data, size_t *data_len) -{ - struct ieee80211_hdr *hdr; - size_t hdrlen; - u16 fc; - int a4_included; - u8 *pos; - - hdr = (struct ieee80211_hdr *) skb->data; - fc = le16_to_cpu(hdr->frame_control); - - hdrlen = 24; - if ((fc & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) == - (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { - hdrlen += ETH_ALEN; - *sa = hdr->addr4; - *da = hdr->addr3; - } else if (fc & IEEE80211_FCTL_FROMDS) { - *sa = hdr->addr3; - *da = hdr->addr1; - } else if (fc & IEEE80211_FCTL_TODS) { - *sa = hdr->addr2; - *da = hdr->addr3; - } else { - *sa = hdr->addr2; - *da = hdr->addr1; - } - - if (fc & 0x80) - hdrlen += 2; - - *data = skb->data + hdrlen; - *data_len = skb->len - hdrlen; - - a4_included = (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS); - if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - fc & IEEE80211_STYPE_QOS_DATA) { - pos = (u8 *) &hdr->addr4; - if (a4_included) - pos += 6; - *qos_tid = pos[0] & 0x0f; - *qos_tid |= 0x80; /* qos_included flag */ - } else - *qos_tid = 0; - - return skb->len < hdrlen ? -1 : 0; -} - - ieee80211_tx_result ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) { - u8 *data, *sa, *da, *key, *mic, qos_tid; + u8 *data, *key, *mic, key_offset; size_t data_len; - u16 fc; + unsigned int hdrlen; + struct ieee80211_hdr *hdr; struct sk_buff *skb = tx->skb; int authenticator; int wpa_test = 0; + int tail; - fc = tx->fc; - + hdr = (struct ieee80211_hdr *)skb->data; if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || - !WLAN_FC_DATA_PRESENT(fc)) + !ieee80211_is_data_present(hdr->frame_control)) return TX_CONTINUE; - if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len)) + hdrlen = ieee80211_hdrlen(hdr->frame_control); + if (skb->len < hdrlen) return TX_DROP; + data = skb->data + hdrlen; + data_len = skb->len - hdrlen; + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !(tx->flags & IEEE80211_TX_FRAGMENTED) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) && @@ -98,26 +54,27 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) return TX_CONTINUE; } - if (skb_tailroom(skb) < MICHAEL_MIC_LEN) { - I802_DEBUG_INC(tx->local->tx_expand_skb_head); - if (unlikely(pskb_expand_head(skb, TKIP_IV_LEN, - MICHAEL_MIC_LEN + TKIP_ICV_LEN, - GFP_ATOMIC))) { - printk(KERN_DEBUG "%s: failed to allocate more memory " - "for Michael MIC\n", tx->dev->name); - return TX_DROP; - } - } + tail = MICHAEL_MIC_LEN; + if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + tail += TKIP_ICV_LEN; + + if (WARN_ON(skb_tailroom(skb) < tail || + skb_headroom(skb) < TKIP_IV_LEN)) + return TX_DROP; #if 0 authenticator = fc & IEEE80211_FCTL_FROMDS; /* FIX */ #else authenticator = 1; #endif - key = &tx->key->conf.key[authenticator ? ALG_TKIP_TEMP_AUTH_TX_MIC_KEY : - ALG_TKIP_TEMP_AUTH_RX_MIC_KEY]; + /* At this point we know we're using ALG_TKIP. To get the MIC key + * we now will rely on the offset from the ieee80211_key_conf::key */ + key_offset = authenticator ? + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY : + NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY; + key = &tx->key->conf.key[key_offset]; mic = skb_put(skb, MICHAEL_MIC_LEN); - michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic); + michael_mic(key, hdr, data, data_len, mic); return TX_CONTINUE; } @@ -126,47 +83,50 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) ieee80211_rx_result ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) { - u8 *data, *sa, *da, *key = NULL, qos_tid; + u8 *data, *key = NULL, key_offset; size_t data_len; - u16 fc; + unsigned int hdrlen; + struct ieee80211_hdr *hdr; u8 mic[MICHAEL_MIC_LEN]; struct sk_buff *skb = rx->skb; int authenticator = 1, wpa_test = 0; DECLARE_MAC_BUF(mac); - fc = rx->fc; - /* * No way to verify the MIC if the hardware stripped it */ if (rx->status->flag & RX_FLAG_MMIC_STRIPPED) return RX_CONTINUE; + hdr = (struct ieee80211_hdr *)skb->data; if (!rx->key || rx->key->conf.alg != ALG_TKIP || - !(rx->fc & IEEE80211_FCTL_PROTECTED) || !WLAN_FC_DATA_PRESENT(fc)) + !ieee80211_has_protected(hdr->frame_control) || + !ieee80211_is_data_present(hdr->frame_control)) return RX_CONTINUE; - if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len) - || data_len < MICHAEL_MIC_LEN) + hdrlen = ieee80211_hdrlen(hdr->frame_control); + if (skb->len < hdrlen + MICHAEL_MIC_LEN) return RX_DROP_UNUSABLE; - data_len -= MICHAEL_MIC_LEN; + data = skb->data + hdrlen; + data_len = skb->len - hdrlen - MICHAEL_MIC_LEN; #if 0 authenticator = fc & IEEE80211_FCTL_TODS; /* FIX */ #else authenticator = 1; #endif - key = &rx->key->conf.key[authenticator ? ALG_TKIP_TEMP_AUTH_RX_MIC_KEY : - ALG_TKIP_TEMP_AUTH_TX_MIC_KEY]; - michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic); + /* At this point we know we're using ALG_TKIP. To get the MIC key + * we now will rely on the offset from the ieee80211_key_conf::key */ + key_offset = authenticator ? + NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY : + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY; + key = &rx->key->conf.key[key_offset]; + michael_mic(key, hdr, data, data_len, mic); if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_UNUSABLE; - printk(KERN_DEBUG "%s: invalid Michael MIC in data frame from " - "%s\n", rx->dev->name, print_mac(mac, sa)); - mac80211_ev_michael_mic_failure(rx->dev, rx->key->conf.keyidx, (void *) skb->data); return RX_DROP_UNUSABLE; @@ -176,59 +136,58 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) skb_trim(skb, skb->len - MICHAEL_MIC_LEN); /* update IV in key information to be able to detect replays */ - rx->key->u.tkip.iv32_rx[rx->queue] = rx->tkip_iv32; - rx->key->u.tkip.iv16_rx[rx->queue] = rx->tkip_iv16; + rx->key->u.tkip.rx[rx->queue].iv32 = rx->tkip_iv32; + rx->key->u.tkip.rx[rx->queue].iv16 = rx->tkip_iv16; return RX_CONTINUE; } -static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, - struct sk_buff *skb, int test) +static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_key *key = tx->key; - int hdrlen, len, tailneed; - u16 fc; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + unsigned int hdrlen; + int len, tail; u8 *pos; - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); + info->control.icv_len = TKIP_ICV_LEN; + info->control.iv_len = TKIP_IV_LEN; + + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && + !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + /* hwaccel - with no need for preallocated room for IV/ICV */ + info->control.hw_key = &tx->key->conf; + return 0; + } + + hdrlen = ieee80211_hdrlen(hdr->frame_control); len = skb->len - hdrlen; if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) - tailneed = 0; + tail = 0; else - tailneed = TKIP_ICV_LEN; - - if ((skb_headroom(skb) < TKIP_IV_LEN || - skb_tailroom(skb) < tailneed)) { - I802_DEBUG_INC(tx->local->tx_expand_skb_head); - if (unlikely(pskb_expand_head(skb, TKIP_IV_LEN, tailneed, - GFP_ATOMIC))) - return -1; - } + tail = TKIP_ICV_LEN; + + if (WARN_ON(skb_tailroom(skb) < tail || + skb_headroom(skb) < TKIP_IV_LEN)) + return -1; pos = skb_push(skb, TKIP_IV_LEN); memmove(pos, pos + TKIP_IV_LEN, hdrlen); pos += hdrlen; /* Increase IV for the frame */ - key->u.tkip.iv16++; - if (key->u.tkip.iv16 == 0) - key->u.tkip.iv32++; + key->u.tkip.tx.iv16++; + if (key->u.tkip.tx.iv16 == 0) + key->u.tkip.tx.iv32++; if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { - hdr = (struct ieee80211_hdr *)skb->data; - /* hwaccel - with preallocated room for IV */ - ieee80211_tkip_add_iv(pos, key, - (u8) (key->u.tkip.iv16 >> 8), - (u8) (((key->u.tkip.iv16 >> 8) | 0x20) & - 0x7f), - (u8) key->u.tkip.iv16); + ieee80211_tkip_add_iv(pos, key, key->u.tkip.tx.iv16); - tx->control->key_idx = tx->key->conf.hw_key_idx; + info->control.hw_key = &tx->key->conf; return 0; } @@ -246,28 +205,16 @@ ieee80211_tx_result ieee80211_crypto_tkip_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb = tx->skb; - int wpa_test = 0, test = 0; - tx->control->icv_len = TKIP_ICV_LEN; - tx->control->iv_len = TKIP_IV_LEN; ieee80211_tx_set_protected(tx); - if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && - !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) && - !wpa_test) { - /* hwaccel - with no need for preallocated room for IV/ICV */ - tx->control->key_idx = tx->key->conf.hw_key_idx; - return TX_CONTINUE; - } - - if (tkip_encrypt_skb(tx, skb, test) < 0) + if (tkip_encrypt_skb(tx, skb) < 0) return TX_DROP; if (tx->extra_frag) { int i; for (i = 0; i < tx->num_extra_frag; i++) { - if (tkip_encrypt_skb(tx, tx->extra_frag[i], test) - < 0) + if (tkip_encrypt_skb(tx, tx->extra_frag[i]) < 0) return TX_DROP; } } @@ -280,16 +227,14 @@ ieee80211_rx_result ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - u16 fc; int hdrlen, res, hwaccel = 0, wpa_test = 0; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; DECLARE_MAC_BUF(mac); - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + if (!ieee80211_is_data(hdr->frame_control)) return RX_CONTINUE; if (!rx->sta || skb->len - hdrlen < 12) @@ -315,15 +260,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) hdr->addr1, hwaccel, rx->queue, &rx->tkip_iv32, &rx->tkip_iv16); - if (res != TKIP_DECRYPT_OK || wpa_test) { -#ifdef CONFIG_MAC80211_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "%s: TKIP decrypt failed for RX " - "frame from %s (res=%d)\n", rx->dev->name, - print_mac(mac, rx->sta->addr), res); -#endif /* CONFIG_MAC80211_DEBUG */ + if (res != TKIP_DECRYPT_OK || wpa_test) return RX_DROP_UNUSABLE; - } /* Trim ICV */ skb_trim(skb, skb->len - TKIP_ICV_LEN); @@ -336,70 +274,68 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) } -static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad, +static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, int encrypted) { - u16 fc; - int a4_included, qos_included; - u8 qos_tid, *fc_pos, *data, *sa, *da; - int len_a; - size_t data_len; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + __le16 mask_fc; + int a4_included; + u8 qos_tid; + u8 *b_0, *aad; + u16 data_len, len_a; + unsigned int hdrlen; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - fc_pos = (u8 *) &hdr->frame_control; - fc = fc_pos[0] ^ (fc_pos[1] << 8); - a4_included = (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS); - - ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len); - data_len -= CCMP_HDR_LEN + (encrypted ? CCMP_MIC_LEN : 0); - if (qos_tid & 0x80) { - qos_included = 1; - qos_tid &= 0x0f; - } else - qos_included = 0; - /* First block, b_0 */ + b_0 = scratch + 3 * AES_BLOCK_LEN; + aad = scratch + 4 * AES_BLOCK_LEN; + + /* + * Mask FC: zero subtype b4 b5 b6 + * Retry, PwrMgt, MoreData; set Protected + */ + mask_fc = hdr->frame_control; + mask_fc &= ~cpu_to_le16(0x0070 | IEEE80211_FCTL_RETRY | + IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA); + mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + + hdrlen = ieee80211_hdrlen(hdr->frame_control); + len_a = hdrlen - 2; + a4_included = ieee80211_has_a4(hdr->frame_control); + + if (ieee80211_is_data_qos(hdr->frame_control)) + qos_tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + else + qos_tid = 0; + + data_len = skb->len - hdrlen - CCMP_HDR_LEN; + if (encrypted) + data_len -= CCMP_MIC_LEN; + /* First block, b_0 */ b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */ /* Nonce: QoS Priority | A2 | PN */ b_0[1] = qos_tid; - memcpy(&b_0[2], hdr->addr2, 6); + memcpy(&b_0[2], hdr->addr2, ETH_ALEN); memcpy(&b_0[8], pn, CCMP_PN_LEN); /* l(m) */ - b_0[14] = (data_len >> 8) & 0xff; - b_0[15] = data_len & 0xff; - + put_unaligned_be16(data_len, &b_0[14]); /* AAD (extra authenticate-only data) / masked 802.11 header * FC | A1 | A2 | A3 | SC | [A4] | [QC] */ - - len_a = a4_included ? 28 : 22; - if (qos_included) - len_a += 2; - - aad[0] = 0; /* (len_a >> 8) & 0xff; */ - aad[1] = len_a & 0xff; - /* Mask FC: zero subtype b4 b5 b6 */ - aad[2] = fc_pos[0] & ~(BIT(4) | BIT(5) | BIT(6)); - /* Retry, PwrMgt, MoreData; set Protected */ - aad[3] = (fc_pos[1] & ~(BIT(3) | BIT(4) | BIT(5))) | BIT(6); - memcpy(&aad[4], &hdr->addr1, 18); + put_unaligned_be16(len_a, &aad[0]); + put_unaligned(mask_fc, (__le16 *)&aad[2]); + memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN); /* Mask Seq#, leave Frag# */ aad[22] = *((u8 *) &hdr->seq_ctrl) & 0x0f; aad[23] = 0; + if (a4_included) { - memcpy(&aad[24], hdr->addr4, 6); - aad[30] = 0; + memcpy(&aad[24], hdr->addr4, ETH_ALEN); + aad[30] = qos_tid; aad[31] = 0; - } else - memset(&aad[24], 0, 8); - if (qos_included) { - u8 *dpos = &aad[a4_included ? 30 : 24]; - - /* Mask QoS Control field */ - dpos[0] = qos_tid; - dpos[1] = 0; + } else { + memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN); + aad[24] = qos_tid; } } @@ -429,36 +365,37 @@ static inline int ccmp_hdr2pn(u8 *pn, u8 *hdr) } -static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, - struct sk_buff *skb, int test) +static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_key *key = tx->key; - int hdrlen, len, tailneed; - u16 fc; - u8 *pos, *pn, *b_0, *aad, *scratch; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + int hdrlen, len, tail; + u8 *pos, *pn; int i; - scratch = key->u.ccmp.tx_crypto_buf; - b_0 = scratch + 3 * AES_BLOCK_LEN; - aad = scratch + 4 * AES_BLOCK_LEN; + info->control.icv_len = CCMP_MIC_LEN; + info->control.iv_len = CCMP_HDR_LEN; - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && + !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + /* hwaccel - with no need for preallocated room for CCMP " + * header or MIC fields */ + info->control.hw_key = &tx->key->conf; + return 0; + } + + hdrlen = ieee80211_hdrlen(hdr->frame_control); len = skb->len - hdrlen; if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) - tailneed = 0; + tail = 0; else - tailneed = CCMP_MIC_LEN; - - if ((skb_headroom(skb) < CCMP_HDR_LEN || - skb_tailroom(skb) < tailneed)) { - I802_DEBUG_INC(tx->local->tx_expand_skb_head); - if (unlikely(pskb_expand_head(skb, CCMP_HDR_LEN, tailneed, - GFP_ATOMIC))) - return -1; - } + tail = CCMP_MIC_LEN; + + if (WARN_ON(skb_tailroom(skb) < tail || + skb_headroom(skb) < CCMP_HDR_LEN)) + return -1; pos = skb_push(skb, CCMP_HDR_LEN); memmove(pos, pos + CCMP_HDR_LEN, hdrlen); @@ -478,13 +415,13 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { /* hwaccel - with preallocated room for CCMP header */ - tx->control->key_idx = key->conf.hw_key_idx; + info->control.hw_key = &tx->key->conf; return 0; } pos += CCMP_HDR_LEN; - ccmp_special_blocks(skb, pn, b_0, aad, 0); - ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, b_0, aad, pos, len, + ccmp_special_blocks(skb, pn, key->u.ccmp.tx_crypto_buf, 0); + ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, key->u.ccmp.tx_crypto_buf, pos, len, pos, skb_put(skb, CCMP_MIC_LEN)); return 0; @@ -495,28 +432,16 @@ ieee80211_tx_result ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb = tx->skb; - int test = 0; - tx->control->icv_len = CCMP_MIC_LEN; - tx->control->iv_len = CCMP_HDR_LEN; ieee80211_tx_set_protected(tx); - if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && - !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { - /* hwaccel - with no need for preallocated room for CCMP " - * header or MIC fields */ - tx->control->key_idx = tx->key->conf.hw_key_idx; - return TX_CONTINUE; - } - - if (ccmp_encrypt_skb(tx, skb, test) < 0) + if (ccmp_encrypt_skb(tx, skb) < 0) return TX_DROP; if (tx->extra_frag) { int i; for (i = 0; i < tx->num_extra_frag; i++) { - if (ccmp_encrypt_skb(tx, tx->extra_frag[i], test) - < 0) + if (ccmp_encrypt_skb(tx, tx->extra_frag[i]) < 0) return TX_DROP; } } @@ -528,8 +453,7 @@ ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) ieee80211_rx_result ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - u16 fc; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int hdrlen; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; @@ -537,10 +461,9 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) int data_len; DECLARE_MAC_BUF(mac); - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + if (!ieee80211_is_data(hdr->frame_control)) return RX_CONTINUE; data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN; @@ -554,41 +477,19 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) (void) ccmp_hdr2pn(pn, skb->data + hdrlen); if (memcmp(pn, key->u.ccmp.rx_pn[rx->queue], CCMP_PN_LEN) <= 0) { -#ifdef CONFIG_MAC80211_DEBUG - u8 *ppn = key->u.ccmp.rx_pn[rx->queue]; - - printk(KERN_DEBUG "%s: CCMP replay detected for RX frame from " - "%s (RX PN %02x%02x%02x%02x%02x%02x <= prev. PN " - "%02x%02x%02x%02x%02x%02x)\n", rx->dev->name, - print_mac(mac, rx->sta->addr), - pn[0], pn[1], pn[2], pn[3], pn[4], pn[5], - ppn[0], ppn[1], ppn[2], ppn[3], ppn[4], ppn[5]); -#endif /* CONFIG_MAC80211_DEBUG */ key->u.ccmp.replays++; return RX_DROP_UNUSABLE; } if (!(rx->status->flag & RX_FLAG_DECRYPTED)) { /* hardware didn't decrypt/verify MIC */ - u8 *scratch, *b_0, *aad; - - scratch = key->u.ccmp.rx_crypto_buf; - b_0 = scratch + 3 * AES_BLOCK_LEN; - aad = scratch + 4 * AES_BLOCK_LEN; - - ccmp_special_blocks(skb, pn, b_0, aad, 1); + ccmp_special_blocks(skb, pn, key->u.ccmp.rx_crypto_buf, 1); if (ieee80211_aes_ccm_decrypt( - key->u.ccmp.tfm, scratch, b_0, aad, + key->u.ccmp.tfm, key->u.ccmp.rx_crypto_buf, skb->data + hdrlen + CCMP_HDR_LEN, data_len, skb->data + skb->len - CCMP_MIC_LEN, skb->data + hdrlen + CCMP_HDR_LEN)) { -#ifdef CONFIG_MAC80211_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "%s: CCMP decrypt failed " - "for RX frame from %s\n", rx->dev->name, - print_mac(mac, rx->sta->addr)); -#endif /* CONFIG_MAC80211_DEBUG */ return RX_DROP_UNUSABLE; } } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c1fc0f1a641c..ee898e74808d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -33,9 +33,8 @@ config NF_CONNTRACK into connections. This is required to do Masquerading or other kinds of Network - Address Translation (except for Fast NAT). It can also be used to - enhance packet filtering (see `Connection state match support' - below). + Address Translation. It can also be used to enhance packet + filtering (see `Connection state match support' below). To compile it as a module, choose M here. If unsure, say N. @@ -50,6 +49,15 @@ config NF_CT_ACCT Those counters can be used for flow-based accounting or the `connbytes' match. + Please note that currently this option only sets a default state. + You may change it at boot time with nf_conntrack.acct=0/1 kernel + paramater or by loading the nf_conntrack module with acct=0/1. + + You may also disable/enable it on a running system with: + sysctl net.netfilter.nf_conntrack_acct=0/1 + + This option will be removed in 2.6.29. + If unsure, say `N'. config NF_CONNTRACK_MARK @@ -90,6 +98,7 @@ config NF_CT_PROTO_DCCP tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' depends on EXPERIMENTAL && NF_CONNTRACK depends on NETFILTER_ADVANCED + default IP_DCCP help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on DCCP connections. @@ -104,6 +113,7 @@ config NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' depends on EXPERIMENTAL && NF_CONNTRACK depends on NETFILTER_ADVANCED + default IP_SCTP help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on SCTP connections. @@ -532,6 +542,7 @@ config NETFILTER_XT_MATCH_DCCP tristate '"dccp" protocol match support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED + default IP_DCCP help With this option enabled, you will be able to use the iptables `dccp' match in order to match on DCCP source/destination ports @@ -725,6 +736,7 @@ config NETFILTER_XT_MATCH_SCTP tristate '"sctp" protocol match support (EXPERIMENTAL)' depends on NETFILTER_XTABLES && EXPERIMENTAL depends on NETFILTER_ADVANCED + default IP_SCTP help With this option enabled, you will be able to use the `sctp' match in order to match on SCTP source/destination ports diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5c4b183f6422..3bd2cc556aea 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,6 +1,6 @@ netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o -nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o +nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o obj-$(CONFIG_NETFILTER) = netfilter.o diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c new file mode 100644 index 000000000000..59bd8b903a19 --- /dev/null +++ b/net/netfilter/nf_conntrack_acct.c @@ -0,0 +1,104 @@ +/* Accouting handling for netfilter. */ + +/* + * (C) 2008 Krzysztof Piotr Oledzki <ole@ans.pl> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/netfilter.h> +#include <linux/kernel.h> +#include <linux/moduleparam.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack_acct.h> + +#ifdef CONFIG_NF_CT_ACCT +#define NF_CT_ACCT_DEFAULT 1 +#else +#define NF_CT_ACCT_DEFAULT 0 +#endif + +int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; +EXPORT_SYMBOL_GPL(nf_ct_acct); + +module_param_named(acct, nf_ct_acct, bool, 0644); +MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); + +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *acct_sysctl_header; +static struct ctl_table acct_sysctl_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_acct", + .data = &nf_ct_acct, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + {} +}; +#endif /* CONFIG_SYSCTL */ + +unsigned int +seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) +{ + struct nf_conn_counter *acct; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; + + return seq_printf(s, "packets=%llu bytes=%llu ", + (unsigned long long)acct[dir].packets, + (unsigned long long)acct[dir].bytes); +}; +EXPORT_SYMBOL_GPL(seq_print_acct); + +static struct nf_ct_ext_type acct_extend __read_mostly = { + .len = sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]), + .align = __alignof__(struct nf_conn_counter[IP_CT_DIR_MAX]), + .id = NF_CT_EXT_ACCT, +}; + +int nf_conntrack_acct_init(void) +{ + int ret; + +#ifdef CONFIG_NF_CT_ACCT + printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n"); + printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); + printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); +#endif + + ret = nf_ct_extend_register(&acct_extend); + if (ret < 0) { + printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); + return ret; + } + +#ifdef CONFIG_SYSCTL + acct_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path, + acct_sysctl_table); + + if (!acct_sysctl_header) { + nf_ct_extend_unregister(&acct_extend); + + printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n"); + return -ENOMEM; + } +#endif + + return 0; +} + +void nf_conntrack_acct_fini(void) +{ +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(acct_sysctl_header); +#endif + nf_ct_extend_unregister(&acct_extend); +} diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c4b1799da5d7..c519d090bdb9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -37,6 +37,7 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack_acct.h> #define NF_CONNTRACK_VERSION "0.5.0" @@ -196,8 +197,6 @@ destroy_conntrack(struct nf_conntrack *nfct) if (l4proto && l4proto->destroy) l4proto->destroy(ct); - nf_ct_ext_destroy(ct); - rcu_read_unlock(); spin_lock_bh(&nf_conntrack_lock); @@ -466,7 +465,8 @@ static noinline int early_drop(unsigned int hash) } struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_tuple *repl) + const struct nf_conntrack_tuple *repl, + gfp_t gfp) { struct nf_conn *ct = NULL; @@ -491,7 +491,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, } } - ct = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC); + ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp); if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); atomic_dec(&nf_conntrack_count); @@ -520,6 +520,7 @@ static void nf_conntrack_free_rcu(struct rcu_head *head) void nf_conntrack_free(struct nf_conn *ct) { + nf_ct_ext_destroy(ct); call_rcu(&ct->rcu, nf_conntrack_free_rcu); } EXPORT_SYMBOL_GPL(nf_conntrack_free); @@ -543,7 +544,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } - ct = nf_conntrack_alloc(tuple, &repl_tuple); + ct = nf_conntrack_alloc(tuple, &repl_tuple, GFP_ATOMIC); if (ct == NULL || IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)ct; @@ -555,6 +556,8 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } + nf_ct_acct_ext_add(ct, GFP_ATOMIC); + spin_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(tuple); if (exp) { @@ -828,17 +831,16 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, } acct: -#ifdef CONFIG_NF_CT_ACCT if (do_acct) { - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - skb->len - skb_network_offset(skb); + struct nf_conn_counter *acct; - if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) - || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) - event |= IPCT_COUNTER_FILLING; + acct = nf_conn_acct_find(ct); + if (acct) { + acct[CTINFO2DIR(ctinfo)].packets++; + acct[CTINFO2DIR(ctinfo)].bytes += + skb->len - skb_network_offset(skb); + } } -#endif spin_unlock_bh(&nf_conntrack_lock); @@ -848,6 +850,32 @@ acct: } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); +bool __nf_ct_kill_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + int do_acct) +{ + if (do_acct) { + struct nf_conn_counter *acct; + + spin_lock_bh(&nf_conntrack_lock); + acct = nf_conn_acct_find(ct); + if (acct) { + acct[CTINFO2DIR(ctinfo)].packets++; + acct[CTINFO2DIR(ctinfo)].bytes += + skb->len - skb_network_offset(skb); + } + spin_unlock_bh(&nf_conntrack_lock); + } + + if (del_timer(&ct->timeout)) { + ct->timeout.function((unsigned long)ct); + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); + #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include <linux/netfilter/nfnetlink.h> @@ -1007,6 +1035,7 @@ void nf_conntrack_cleanup(void) nf_conntrack_proto_fini(); nf_conntrack_helper_fini(); nf_conntrack_expect_fini(); + nf_conntrack_acct_fini(); } struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced) @@ -1146,6 +1175,10 @@ int __init nf_conntrack_init(void) if (ret < 0) goto out_fini_expect; + ret = nf_conntrack_acct_init(); + if (ret < 0) + goto out_fini_helper; + /* For use by REJECT target */ rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); @@ -1158,6 +1191,8 @@ int __init nf_conntrack_init(void) return ret; +out_fini_helper: + nf_conntrack_helper_fini(); out_fini_expect: nf_conntrack_expect_fini(); out_fini_proto: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e31beeb33b2b..e8f0dead267f 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -587,10 +587,10 @@ int __init nf_conntrack_expect_init(void) return 0; err3: + kmem_cache_destroy(nf_ct_expect_cachep); +err2: nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, nf_ct_expect_hsize); -err2: - kmem_cache_destroy(nf_ct_expect_cachep); err1: return err; } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index bcc19fa4ed1e..4b2c769d555f 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -59,12 +59,19 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) if (!*ext) return NULL; + INIT_RCU_HEAD(&(*ext)->rcu); (*ext)->offset[id] = off; (*ext)->len = len; return (void *)(*ext) + off; } +static void __nf_ct_ext_free_rcu(struct rcu_head *head) +{ + struct nf_ct_ext *ext = container_of(head, struct nf_ct_ext, rcu); + kfree(ext); +} + void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) { struct nf_ct_ext *new; @@ -88,13 +95,11 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) newlen = newoff + t->len; rcu_read_unlock(); - if (newlen >= ksize(ct->ext)) { - new = kmalloc(newlen, gfp); - if (!new) - return NULL; - - memcpy(new, ct->ext, ct->ext->len); + new = __krealloc(ct->ext, newlen, gfp); + if (!new) + return NULL; + if (new != ct->ext) { for (i = 0; i < NF_CT_EXT_NUM; i++) { if (!nf_ct_ext_exist(ct, i)) continue; @@ -106,14 +111,14 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) (void *)ct->ext + ct->ext->offset[i]); rcu_read_unlock(); } - kfree(ct->ext); + call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu); ct->ext = new; } - ct->ext->offset[id] = newoff; - ct->ext->len = newlen; - memset((void *)ct->ext + newoff, 0, newlen - newoff); - return (void *)ct->ext + newoff; + new->offset[id] = newoff; + new->len = newlen; + memset((void *)new + newoff, 0, newlen - newoff); + return (void *)new + newoff; } EXPORT_SYMBOL(__nf_ct_ext_add); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 95da1a24aab7..2f83c158934d 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -619,6 +619,7 @@ static const struct nf_conntrack_expect_policy h245_exp_policy = { static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { .name = "H.245", .me = THIS_MODULE, + .tuple.src.l3num = AF_UNSPEC, .tuple.dst.protonum = IPPROTO_UDP, .help = h245_help, .expect_policy = &h245_exp_policy, @@ -1765,6 +1766,7 @@ static void __exit nf_conntrack_h323_fini(void) nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); + nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); kfree(h323_buffer); pr_debug("nf_ct_h323: fini\n"); } @@ -1777,28 +1779,34 @@ static int __init nf_conntrack_h323_init(void) h323_buffer = kmalloc(65536, GFP_KERNEL); if (!h323_buffer) return -ENOMEM; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]); + ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245); if (ret < 0) goto err1; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]); + ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]); if (ret < 0) goto err2; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]); + ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]); if (ret < 0) goto err3; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]); + ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]); if (ret < 0) goto err4; + ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]); + if (ret < 0) + goto err5; pr_debug("nf_ct_h323: init success\n"); return 0; -err4: +err5: nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); -err3: +err4: nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); -err2: +err3: nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); +err2: + nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); err1: + kfree(h323_buffer); return ret; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 7d1b11703741..8e0b4c8f62a8 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -20,6 +20,7 @@ #include <linux/err.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/rculist.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l3proto.h> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 16774ecd1c4e..105a616c5c78 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -4,7 +4,7 @@ * (C) 2001 by Jay Schulist <jschlst@samba.org> * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org> * (C) 2003 by Patrick Mchardy <kaber@trash.net> - * (C) 2005-2007 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2005-2008 by Pablo Neira Ayuso <pablo@netfilter.org> * * Initial connection tracking via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> +#include <linux/rculist.h> #include <linux/types.h> #include <linux/timer.h> #include <linux/skbuff.h> @@ -36,6 +37,7 @@ #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> +#include <net/netfilter/nf_conntrack_acct.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_protocol.h> @@ -205,22 +207,26 @@ nla_put_failure: return -1; } -#ifdef CONFIG_NF_CT_ACCT static int ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, enum ip_conntrack_dir dir) { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; struct nlattr *nest_count; + const struct nf_conn_counter *acct; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; nest_count = nla_nest_start(skb, type | NLA_F_NESTED); if (!nest_count) goto nla_put_failure; - NLA_PUT_BE32(skb, CTA_COUNTERS32_PACKETS, - htonl(ct->counters[dir].packets)); - NLA_PUT_BE32(skb, CTA_COUNTERS32_BYTES, - htonl(ct->counters[dir].bytes)); + NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, + cpu_to_be64(acct[dir].packets)); + NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, + cpu_to_be64(acct[dir].bytes)); nla_nest_end(skb, nest_count); @@ -229,9 +235,6 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, nla_put_failure: return -1; } -#else -#define ctnetlink_dump_counters(a, b, c) (0) -#endif #ifdef CONFIG_NF_CONNTRACK_MARK static inline int @@ -472,14 +475,17 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, goto nla_put_failure; nla_nest_end(skb, nest_parms); + if (ctnetlink_dump_id(skb, ct) < 0) + goto nla_put_failure; + + if (ctnetlink_dump_status(skb, ct) < 0) + goto nla_put_failure; + if (events & IPCT_DESTROY) { if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) goto nla_put_failure; } else { - if (ctnetlink_dump_status(skb, ct) < 0) - goto nla_put_failure; - if (ctnetlink_dump_timeout(skb, ct) < 0) goto nla_put_failure; @@ -497,11 +503,6 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, goto nla_put_failure; #endif - if (events & IPCT_COUNTER_FILLING && - (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)) - goto nla_put_failure; - if (events & IPCT_RELATED && ctnetlink_dump_master(skb, ct) < 0) goto nla_put_failure; @@ -572,11 +573,15 @@ restart: cb->args[1] = (unsigned long)ct; goto out; } -#ifdef CONFIG_NF_CT_ACCT + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) - memset(&ct->counters, 0, sizeof(ct->counters)); -#endif + IPCTNL_MSG_CT_GET_CTRZERO) { + struct nf_conn_counter *acct; + + acct = nf_conn_acct_find(ct); + if (acct) + memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); + } } if (cb->args[1]) { cb->args[1] = 0; @@ -809,9 +814,8 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, return -ENOENT; } } - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill(ct); nf_ct_put(ct); return 0; @@ -829,14 +833,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, u_int8_t u3 = nfmsg->nfgen_family; int err = 0; - if (nlh->nlmsg_flags & NLM_F_DUMP) { -#ifndef CONFIG_NF_CT_ACCT - if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) - return -ENOTSUPP; -#endif + if (nlh->nlmsg_flags & NLM_F_DUMP) return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, ctnetlink_done); - } if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); @@ -888,20 +887,19 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) /* unchangeable */ - return -EINVAL; + return -EBUSY; if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) /* SEEN_REPLY bit can only be set */ - return -EINVAL; - + return -EBUSY; if (d & IPS_ASSURED && !(status & IPS_ASSURED)) /* ASSURED bit can only be set */ - return -EINVAL; + return -EBUSY; if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { #ifndef CONFIG_NF_NAT_NEEDED - return -EINVAL; + return -EOPNOTSUPP; #else struct nf_nat_range range; @@ -942,7 +940,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) /* don't change helper of sibling connections */ if (ct->master) - return -EINVAL; + return -EBUSY; err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); if (err < 0) @@ -960,7 +958,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) helper = __nf_conntrack_helper_find_byname(helpname); if (helper == NULL) - return -EINVAL; + return -EOPNOTSUPP; if (help) { if (help->helper == helper) @@ -1127,7 +1125,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conn_help *help; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(otuple, rtuple); + ct = nf_conntrack_alloc(otuple, rtuple, GFP_KERNEL); if (ct == NULL || IS_ERR(ct)) return -ENOMEM; @@ -1150,6 +1148,8 @@ ctnetlink_create_conntrack(struct nlattr *cda[], goto err; } + nf_ct_acct_ext_add(ct, GFP_KERNEL); + #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); @@ -1255,12 +1255,12 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { /* we only allow nat config for new conntracks */ if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { - err = -EINVAL; + err = -EOPNOTSUPP; goto out_unlock; } /* can't link an existing conntrack to a master */ if (cda[CTA_TUPLE_MASTER]) { - err = -EINVAL; + err = -EOPNOTSUPP; goto out_unlock; } err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), @@ -1605,7 +1605,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, h = __nf_conntrack_helper_find_byname(name); if (!h) { spin_unlock_bh(&nf_conntrack_lock); - return -EINVAL; + return -EOPNOTSUPP; } for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index afb4a1861d2c..e7866dd3cde6 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -475,8 +475,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, if (type == DCCP_PKT_RESET && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* Tear down connection immediately if only reply is a RESET */ - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index cbf2e27a22b2..30aa5b94a771 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -463,6 +463,82 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> + +static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, + const struct nf_conn *ct) +{ + struct nlattr *nest_parms; + + read_lock_bh(&sctp_lock); + nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + NLA_PUT_U8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state); + + NLA_PUT_BE32(skb, + CTA_PROTOINFO_SCTP_VTAG_ORIGINAL, + ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]); + + NLA_PUT_BE32(skb, + CTA_PROTOINFO_SCTP_VTAG_REPLY, + ct->proto.sctp.vtag[IP_CT_DIR_REPLY]); + + read_unlock_bh(&sctp_lock); + + nla_nest_end(skb, nest_parms); + + return 0; + +nla_put_failure: + read_unlock_bh(&sctp_lock); + return -1; +} + +static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = { + [CTA_PROTOINFO_SCTP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NLA_U32 }, + [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 }, +}; + +static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) +{ + struct nlattr *attr = cda[CTA_PROTOINFO_SCTP]; + struct nlattr *tb[CTA_PROTOINFO_SCTP_MAX+1]; + int err; + + /* updates may not contain the internal protocol info, skip parsing */ + if (!attr) + return 0; + + err = nla_parse_nested(tb, + CTA_PROTOINFO_SCTP_MAX, + attr, + sctp_nla_policy); + if (err < 0) + return err; + + if (!tb[CTA_PROTOINFO_SCTP_STATE] || + !tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] || + !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]) + return -EINVAL; + + write_lock_bh(&sctp_lock); + ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); + ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = + nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); + ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = + nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]); + write_unlock_bh(&sctp_lock); + + return 0; +} +#endif + #ifdef CONFIG_SYSCTL static unsigned int sctp_sysctl_table_users; static struct ctl_table_header *sctp_sysctl_header; @@ -591,6 +667,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .new = sctp_new, .me = THIS_MODULE, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .to_nlattr = sctp_to_nlattr, + .from_nlattr = nlattr_to_sctp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, @@ -617,6 +695,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .new = sctp_new, .me = THIS_MODULE, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .to_nlattr = sctp_to_nlattr, + .from_nlattr = nlattr_to_sctp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ba94004fe323..420a10d8eb1e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -331,12 +331,13 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph) I. Upper bound for valid data: seq <= sender.td_maxend II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin - III. Upper bound for valid ack: sack <= receiver.td_end - IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW + III. Upper bound for valid (s)ack: sack <= receiver.td_end + IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW - where sack is the highest right edge of sack block found in the packet. + where sack is the highest right edge of sack block found in the packet + or ack in the case of packet without SACK option. - The upper bound limit for a valid ack is not ignored - + The upper bound limit for a valid (s)ack is not ignored - we doesn't have to deal with fragments. */ @@ -606,12 +607,12 @@ static bool tcp_in_window(const struct nf_conn *ct, before(seq, sender->td_maxend + 1), after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), - after(ack, receiver->td_end - MAXACKWINDOW(sender))); + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); if (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && - after(ack, receiver->td_end - MAXACKWINDOW(sender))) { + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { /* * Take into account window scaling (RFC 1323). */ @@ -843,9 +844,14 @@ static int tcp_packet(struct nf_conn *ct, /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ write_unlock_bh(&tcp_lock); - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); - return -NF_REPEAT; + + /* Only repeat if we can actually remove the timer. + * Destruction may already be in progress in process + * context and we must give it a chance to terminate. + */ + if (nf_ct_kill(ct)) + return -NF_REPEAT; + return -NF_DROP; } /* Fall through */ case TCP_CONNTRACK_IGNORE: @@ -877,8 +883,7 @@ static int tcp_packet(struct nf_conn *ct, if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill(ct); return -NF_DROP; } ct->proto.tcp.last_index = index; @@ -961,8 +966,7 @@ static int tcp_packet(struct nf_conn *ct, problem case, so we can delete the conntrack immediately. --RR */ if (th->rst) { - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 9f4900069561..2f9bbc058b48 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -870,6 +870,7 @@ static int process_sdp(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); unsigned int matchoff, matchlen; unsigned int mediaoff, medialen; unsigned int sdpoff; @@ -959,6 +960,9 @@ static int process_sdp(struct sk_buff *skb, if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr); + if (ret == NF_ACCEPT && i > 0) + help->help.ct_sip_info.invite_cseq = cseq; + return ret; } static int process_invite_response(struct sk_buff *skb, @@ -967,14 +971,14 @@ static int process_invite_response(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dptr, datalen, cseq); - else { + else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); - return NF_ACCEPT; - } + return NF_ACCEPT; } static int process_update_response(struct sk_buff *skb, @@ -983,14 +987,14 @@ static int process_update_response(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dptr, datalen, cseq); - else { + else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); - return NF_ACCEPT; - } + return NF_ACCEPT; } static int process_prack_response(struct sk_buff *skb, @@ -999,14 +1003,14 @@ static int process_prack_response(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dptr, datalen, cseq); - else { + else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); - return NF_ACCEPT; - } + return NF_ACCEPT; } static int process_bye_request(struct sk_buff *skb, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 46ea542d0df9..869ef9349d0f 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -25,6 +25,7 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_acct.h> MODULE_LICENSE("GPL"); @@ -38,19 +39,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, } EXPORT_SYMBOL_GPL(print_tuple); -#ifdef CONFIG_NF_CT_ACCT -static unsigned int -seq_print_counters(struct seq_file *s, - const struct ip_conntrack_counter *counter) -{ - return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)counter->packets, - (unsigned long long)counter->bytes); -} -#else -#define seq_print_counters(x, y) 0 -#endif - struct ct_iter_state { unsigned int bucket; }; @@ -146,7 +134,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL])) + if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) return -ENOSPC; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) @@ -157,7 +145,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY])) + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) return -ENOSPC; if (test_bit(IPS_ASSURED_BIT, &ct->status)) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index bc11d7092032..9fda6ee95a31 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -92,10 +92,6 @@ void nf_log_packet(int pf, vsnprintf(prefix, sizeof(prefix), fmt, args); va_end(args); logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); - } else if (net_ratelimit()) { - printk(KERN_WARNING "nf_log_packet: can\'t log since " - "no backend logging module loaded in! Please either " - "load one, or disable logging explicitly\n"); } rcu_read_unlock(); } diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 69d699f95f4c..01489681fa96 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -65,7 +65,7 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, int pf, { struct nf_sockopt_ops *ops; - if (sock_net(sk) != &init_net) + if (!net_eq(sock_net(sk), &init_net)) return ERR_PTR(-ENOPROTOOPT); if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b8173af8c24a..9a35b57ab76d 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -453,6 +453,14 @@ __build_packet_message(struct nfulnl_instance *inst, } } + if (indev && skb_mac_header_was_set(skb)) { + NLA_PUT_BE16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)); + NLA_PUT_BE16(inst->skb, NFULA_HWLEN, + htons(skb->dev->hard_header_len)); + NLA_PUT(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, + skb_mac_header(skb)); + } + if (skb->tstamp.tv64) { struct nfulnl_msg_packet_timestamp ts; struct timeval tv = ktime_to_timeval(skb->tstamp); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3447025ce068..8c860112ce05 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -243,7 +243,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, switch ((enum nfqnl_config_mode)queue->copy_mode) { case NFQNL_COPY_META: case NFQNL_COPY_NONE: - data_len = 0; break; case NFQNL_COPY_PACKET: @@ -556,7 +555,7 @@ nfqnl_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* Drop any packets associated with the downed device */ diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 211189eb2b67..76ca1f2421eb 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -8,7 +8,7 @@ * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> * by Henrik Nordstrom <hno@marasystems.com> * - * (C) 2006 Red Hat, Inc., James Morris <jmorris@redhat.com> + * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -94,6 +94,12 @@ connsecmark_tg_check(const char *tablename, const void *entry, { const struct xt_connsecmark_target_info *info = targinfo; + if (strcmp(tablename, "mangle") && strcmp(tablename, "security")) { + printk(KERN_INFO PFX "target only valid in the \'mangle\' " + "or \'security\' tables, not \'%s\'.\n", tablename); + return false; + } + switch (info->mode) { case CONNSECMARK_SAVE: case CONNSECMARK_RESTORE: @@ -126,7 +132,6 @@ static struct xt_target connsecmark_tg_reg[] __read_mostly = { .destroy = connsecmark_tg_destroy, .target = connsecmark_tg, .targetsize = sizeof(struct xt_connsecmark_target_info), - .table = "mangle", .me = THIS_MODULE, }, { @@ -136,7 +141,6 @@ static struct xt_target connsecmark_tg_reg[] __read_mostly = { .destroy = connsecmark_tg_destroy, .target = connsecmark_tg, .targetsize = sizeof(struct xt_connsecmark_target_info), - .table = "mangle", .me = THIS_MODULE, }, }; diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index c0284856ccd4..94f87ee7552b 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -5,7 +5,7 @@ * Based on the nfmark match by: * (C) 1999-2001 Marc Boucher <marc@mbsi.ca> * - * (C) 2006 Red Hat, Inc., James Morris <jmorris@redhat.com> + * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -89,6 +89,12 @@ secmark_tg_check(const char *tablename, const void *entry, { struct xt_secmark_target_info *info = targinfo; + if (strcmp(tablename, "mangle") && strcmp(tablename, "security")) { + printk(KERN_INFO PFX "target only valid in the \'mangle\' " + "or \'security\' tables, not \'%s\'.\n", tablename); + return false; + } + if (mode && mode != info->mode) { printk(KERN_INFO PFX "mode already set to %hu cannot mix with " "rules for mode %hu\n", mode, info->mode); @@ -127,7 +133,6 @@ static struct xt_target secmark_tg_reg[] __read_mostly = { .destroy = secmark_tg_destroy, .target = secmark_tg, .targetsize = sizeof(struct xt_secmark_target_info), - .table = "mangle", .me = THIS_MODULE, }, { @@ -137,7 +142,6 @@ static struct xt_target secmark_tg_reg[] __read_mostly = { .destroy = secmark_tg_destroy, .target = secmark_tg, .targetsize = sizeof(struct xt_secmark_target_info), - .table = "mangle", .me = THIS_MODULE, }, }; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 217e2b686322..beb5094703cb 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -147,17 +147,21 @@ tcpmss_mangle_packet(struct sk_buff *skb, return TCPOLEN_MSS; } -static u_int32_t tcpmss_reverse_mtu4(const struct iphdr *iph) +static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, + unsigned int family) { - struct flowi fl = { - .fl4_dst = iph->saddr, - }; + struct flowi fl = {}; const struct nf_afinfo *ai; struct rtable *rt = NULL; u_int32_t mtu = ~0U; + if (family == PF_INET) + fl.fl4_dst = ip_hdr(skb)->saddr; + else + fl.fl6_dst = ipv6_hdr(skb)->saddr; + rcu_read_lock(); - ai = nf_get_afinfo(AF_INET); + ai = nf_get_afinfo(family); if (ai != NULL) ai->route((struct dst_entry **)&rt, &fl); rcu_read_unlock(); @@ -178,7 +182,8 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in, __be16 newlen; int ret; - ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu4(iph), + ret = tcpmss_mangle_packet(skb, targinfo, + tcpmss_reverse_mtu(skb, PF_INET), iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); if (ret < 0) @@ -193,28 +198,6 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in, } #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) -static u_int32_t tcpmss_reverse_mtu6(const struct ipv6hdr *iph) -{ - struct flowi fl = { - .fl6_dst = iph->saddr, - }; - const struct nf_afinfo *ai; - struct rtable *rt = NULL; - u_int32_t mtu = ~0U; - - rcu_read_lock(); - ai = nf_get_afinfo(AF_INET6); - if (ai != NULL) - ai->route((struct dst_entry **)&rt, &fl); - rcu_read_unlock(); - - if (rt != NULL) { - mtu = dst_mtu(&rt->u.dst); - dst_release(&rt->u.dst); - } - return mtu; -} - static unsigned int tcpmss_tg6(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -229,7 +212,8 @@ tcpmss_tg6(struct sk_buff *skb, const struct net_device *in, tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); if (tcphoff < 0) return NF_DROP; - ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu6(ipv6h), + ret = tcpmss_mangle_packet(skb, targinfo, + tcpmss_reverse_mtu(skb, PF_INET6), tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index d7e8983cd37f..3e39c4fe1931 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -8,6 +8,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connbytes.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_acct.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); @@ -27,12 +28,15 @@ connbytes_mt(const struct sk_buff *skb, const struct net_device *in, u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; u_int64_t pkts = 0; - const struct ip_conntrack_counter *counters; + const struct nf_conn_counter *counters; ct = nf_ct_get(skb, &ctinfo); if (!ct) return false; - counters = ct->counters; + + counters = nf_conn_acct_find(ct); + if (!counters) + return false; switch (sinfo->what) { case XT_CONNBYTES_PKTS: diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 2e89a00df92c..70907f6baac3 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -73,7 +73,8 @@ connlimit_iphash6(const union nf_inet_addr *addr, static inline bool already_closed(const struct nf_conn *conn) { if (nf_ct_protonum(conn) == IPPROTO_TCP) - return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT; + return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT || + conn->proto.tcp.state == TCP_CONNTRACK_CLOSE; else return 0; } diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index 500528d60cd7..c63e9333c755 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -179,3 +179,5 @@ module_exit(iprange_mt_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>, Jan Engelhardt <jengelh@computergmbh.de>"); MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching"); +MODULE_ALIAS("ipt_iprange"); +MODULE_ALIAS("ip6t_iprange"); diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 72f694d947f4..4903182a062b 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -29,12 +29,16 @@ string_mt(const struct sk_buff *skb, const struct net_device *in, { const struct xt_string_info *conf = matchinfo; struct ts_state state; + int invert; memset(&state, 0, sizeof(struct ts_state)); + invert = (match->revision == 0 ? conf->u.v0.invert : + conf->u.v1.flags & XT_STRING_FLAG_INVERT); + return (skb_find_text((struct sk_buff *)skb, conf->from_offset, conf->to_offset, conf->config, &state) - != UINT_MAX) ^ conf->invert; + != UINT_MAX) ^ invert; } #define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m)) @@ -46,6 +50,7 @@ string_mt_check(const char *tablename, const void *ip, { struct xt_string_info *conf = matchinfo; struct ts_config *ts_conf; + int flags = TS_AUTOLOAD; /* Damn, can't handle this case properly with iptables... */ if (conf->from_offset > conf->to_offset) @@ -54,8 +59,15 @@ string_mt_check(const char *tablename, const void *ip, return false; if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE) return false; + if (match->revision == 1) { + if (conf->u.v1.flags & + ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT)) + return false; + if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE) + flags |= TS_IGNORECASE; + } ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, - GFP_KERNEL, TS_AUTOLOAD); + GFP_KERNEL, flags); if (IS_ERR(ts_conf)) return false; @@ -72,6 +84,17 @@ static void string_mt_destroy(const struct xt_match *match, void *matchinfo) static struct xt_match string_mt_reg[] __read_mostly = { { .name = "string", + .revision = 0, + .family = AF_INET, + .checkentry = string_mt_check, + .match = string_mt, + .destroy = string_mt_destroy, + .matchsize = sizeof(struct xt_string_info), + .me = THIS_MODULE + }, + { + .name = "string", + .revision = 1, .family = AF_INET, .checkentry = string_mt_check, .match = string_mt, @@ -81,6 +104,17 @@ static struct xt_match string_mt_reg[] __read_mostly = { }, { .name = "string", + .revision = 0, + .family = AF_INET6, + .checkentry = string_mt_check, + .match = string_mt, + .destroy = string_mt_destroy, + .matchsize = sizeof(struct xt_string_info), + .me = THIS_MODULE + }, + { + .name = "string", + .revision = 1, .family = AF_INET6, .checkentry = string_mt_check, .match = string_mt, diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index ed76baab4734..9f328593287e 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -173,7 +173,7 @@ time_mt(const struct sk_buff *skb, const struct net_device *in, __net_timestamp((struct sk_buff *)skb); stamp = ktime_to_ns(skb->tstamp); - do_div(stamp, NSEC_PER_SEC); + stamp = div_s64(stamp, NSEC_PER_SEC); if (info->flags & XT_TIME_LOCAL_TZ) /* Adjust for local timezone */ diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index fdc14a0d21af..0aec318bf0ef 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -584,19 +584,14 @@ list_start: rcu_read_unlock(); genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto list_failure; - - return 0; + return genlmsg_reply(ans_skb, info); list_retry: /* XXX - this limit is a guesstimate */ if (nlsze_mult < 4) { rcu_read_unlock(); kfree_skb(ans_skb); - nlsze_mult++; + nlsze_mult *= 2; goto list_start; } list_failure_lock: diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 02c2f7c0b255..643c032a3a57 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -30,8 +30,7 @@ */ #include <linux/types.h> -#include <linux/rcupdate.h> -#include <linux/list.h> +#include <linux/rculist.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/string.h> diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 22c191267808..44be5d5261f4 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -386,11 +386,7 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto listdef_failure; - return 0; + return genlmsg_reply(ans_skb, info); listdef_failure_lock: rcu_read_unlock(); @@ -501,11 +497,7 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) goto version_failure; genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto version_failure; - return 0; + return genlmsg_reply(ans_skb, info); version_failure: kfree_skb(ans_skb); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 0099da5b2591..921c118ead89 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -954,7 +954,7 @@ static int netlbl_unlhsh_netdev_handler(struct notifier_block *this, struct net_device *dev = ptr; struct netlbl_unlhsh_iface *iface = NULL; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* XXX - should this be a check for NETDEV_DOWN or _UNREGISTER? */ @@ -1107,11 +1107,7 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) goto list_failure; genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto list_failure; - return 0; + return genlmsg_reply(ans_skb, info); list_failure: kfree_skb(ans_skb); @@ -1534,7 +1530,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, } } list_for_each_entry_rcu(addr6, &iface->addr6_list, list) { - if (addr6->valid || iter_addr6++ < skip_addr6) + if (!addr6->valid || iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, iface, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9b97f8006c9c..b0eacc0007cc 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -158,9 +158,10 @@ static void netlink_sock_destruct(struct sock *sk) printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); return; } - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(!nlk_sk(sk)->groups); + + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(nlk_sk(sk)->groups); } /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on @@ -759,7 +760,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp) * 0: continue * 1: repeat lookup - reference dropped while waiting for socket memory. */ -int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, +int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk) { struct netlink_sock *nlk; @@ -886,13 +887,13 @@ retry: return netlink_unicast_kernel(sk, skb); if (sk_filter(sk, skb)) { - int err = skb->len; + err = skb->len; kfree_skb(skb); sock_put(sk); return err; } - err = netlink_attachskb(sk, skb, nonblock, &timeo, ssk); + err = netlink_attachskb(sk, skb, &timeo, ssk); if (err == 1) goto retry; if (err) diff --git a/net/netlink/attr.c b/net/netlink/attr.c index feb326f4a752..2d106cfe1d27 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -132,6 +132,7 @@ errout: * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream + * @policy: validation policy * * Parses a stream of attributes and stores a pointer to each attribute in * the tb array accessable via the attribute type. Attributes with a type @@ -194,7 +195,7 @@ struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) /** * nla_strlcpy - Copy string attribute payload into a sized buffer * @dst: where to copy the string to - * @src: attribute to copy the string from + * @nla: attribute to copy the string from * @dstsize: size of destination buffer * * Copies at most dstsize - 1 bytes into the destination buffer. @@ -340,9 +341,9 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) } /** - * nla_reserve - reserve room for attribute without header + * nla_reserve_nohdr - reserve room for attribute without header * @skb: socket buffer to reserve room on - * @len: length of attribute payload + * @attrlen: length of attribute payload * * Reserves room for attribute payload without a header. * @@ -400,13 +401,13 @@ void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) * @attrlen: length of attribute payload * @data: head of attribute payload * - * Returns -1 if the tailroom of the skb is insufficient to store + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute header and payload. */ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) - return -1; + return -EMSGSIZE; __nla_put(skb, attrtype, attrlen, data); return 0; @@ -418,13 +419,13 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) * @attrlen: length of attribute payload * @data: head of attribute payload * - * Returns -1 if the tailroom of the skb is insufficient to store + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute payload. */ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -1; + return -EMSGSIZE; __nla_put_nohdr(skb, attrlen, data); return 0; @@ -436,13 +437,13 @@ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) * @attrlen: length of attribute payload * @data: head of attribute payload * - * Returns -1 if the tailroom of the skb is insufficient to store + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute payload. */ int nla_append(struct sk_buff *skb, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -1; + return -EMSGSIZE; memcpy(skb_put(skb, attrlen), data, attrlen); return 0; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index d16929c9b4bc..3e1191cecaf0 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -444,8 +444,11 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (ops->dumpit == NULL) return -EOPNOTSUPP; - return netlink_dump_start(genl_sock, skb, nlh, - ops->dumpit, ops->done); + genl_unlock(); + err = netlink_dump_start(genl_sock, skb, nlh, + ops->dumpit, ops->done); + genl_lock(); + return err; } if (ops->doit == NULL) @@ -554,7 +557,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, return genlmsg_end(skb, hdr); nla_put_failure: - return genlmsg_cancel(skb, hdr); + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; } static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, @@ -590,7 +594,8 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, return genlmsg_end(skb, hdr); nla_put_failure: - return genlmsg_cancel(skb, hdr); + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; } static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) @@ -601,9 +606,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; - if (chains_to_skip != 0) - genl_lock(); - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { if (i < chains_to_skip) continue; @@ -621,9 +623,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) } errout: - if (chains_to_skip != 0) - genl_unlock(); - cb->args[0] = i; cb->args[1] = n; @@ -768,7 +767,7 @@ static int __init genl_init(void) /* we'll bump the group number right afterwards */ genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC, 0, - genl_rcv, NULL, THIS_MODULE); + genl_rcv, &genl_mutex, THIS_MODULE); if (genl_sock == NULL) panic("GENL: Cannot initialize generic netlink\n"); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 4bae8b998cab..532e4faa29f7 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -73,6 +73,20 @@ static const struct proto_ops nr_proto_ops; * separate class since they always nest. */ static struct lock_class_key nr_netdev_xmit_lock_key; +static struct lock_class_key nr_netdev_addr_lock_key; + +static void nr_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key); +} + +static void nr_set_lockdep_key(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key); + netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL); +} /* * Socket removal during an interrupt is now safe. @@ -106,7 +120,7 @@ static int nr_device_event(struct notifier_block *this, unsigned long event, voi { struct net_device *dev = (struct net_device *)ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event != NETDEV_DOWN) @@ -475,13 +489,11 @@ static struct sock *nr_make_new(struct sock *osk) sock_init_data(NULL, sk); sk->sk_type = osk->sk_type; - sk->sk_socket = osk->sk_socket; sk->sk_priority = osk->sk_priority; sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; - sk->sk_sleep = osk->sk_sleep; sock_copy_flags(sk, osk); skb_queue_head_init(&nr->ack_queue); @@ -538,11 +550,9 @@ static int nr_release(struct socket *sock) sk->sk_state_change(sk); sock_orphan(sk); sock_set_flag(sk, SOCK_DESTROY); - sk->sk_socket = NULL; break; default: - sk->sk_socket = NULL; break; } @@ -810,13 +820,11 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags) goto out_release; newsk = skb->sk; - newsk->sk_socket = newsock; - newsk->sk_sleep = &newsock->wait; + sock_graft(newsk, newsock); /* Now attach up the new socket */ kfree_skb(skb); sk_acceptq_removed(sk); - newsock->sk = newsk; out_release: release_sock(sk); @@ -1436,7 +1444,7 @@ static int __init nr_proto_init(void) free_netdev(dev); goto fail; } - lockdep_set_class(&dev->_xmit_lock, &nr_netdev_xmit_lock_key); + nr_set_lockdep_key(dev); dev_nr[i] = dev; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 25070240d4ae..c718e7e3f7de 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -5,8 +5,6 @@ * * PACKET - implements raw packet sockets. * - * Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox, <gw4pts@gw4pts.ampr.org> @@ -188,6 +186,9 @@ struct packet_sock { unsigned int pg_vec_order; unsigned int pg_vec_pages; unsigned int pg_vec_len; + enum tpacket_versions tp_version; + unsigned int tp_hdrlen; + unsigned int tp_reserve; #endif }; @@ -203,14 +204,52 @@ struct packet_skb_cb { #ifdef CONFIG_PACKET_MMAP -static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position) +static void *packet_lookup_frame(struct packet_sock *po, unsigned int position, + int status) { unsigned int pg_vec_pos, frame_offset; + union { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + void *raw; + } h; pg_vec_pos = position / po->frames_per_block; frame_offset = position % po->frames_per_block; - return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size)); + h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size); + switch (po->tp_version) { + case TPACKET_V1: + if (status != h.h1->tp_status ? TP_STATUS_USER : + TP_STATUS_KERNEL) + return NULL; + break; + case TPACKET_V2: + if (status != h.h2->tp_status ? TP_STATUS_USER : + TP_STATUS_KERNEL) + return NULL; + break; + } + return h.raw; +} + +static void __packet_set_status(struct packet_sock *po, void *frame, int status) +{ + union { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + void *raw; + } h; + + h.raw = frame; + switch (po->tp_version) { + case TPACKET_V1: + h.h1->tp_status = status; + break; + case TPACKET_V2: + h.h2->tp_status = status; + break; + } } #endif @@ -221,8 +260,8 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) static void packet_sock_destruct(struct sock *sk) { - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive packet socket: %p\n", sk); @@ -553,14 +592,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe struct sock *sk; struct packet_sock *po; struct sockaddr_ll *sll; - struct tpacket_hdr *h; + union { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + void *raw; + } h; u8 * skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; - unsigned short macoff, netoff; + unsigned short macoff, netoff, hdrlen; struct sk_buff *copy_skb = NULL; struct timeval tv; + struct timespec ts; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -592,10 +636,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe snaplen = res; if (sk->sk_type == SOCK_DGRAM) { - macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; + macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 + + po->tp_reserve; } else { unsigned maclen = skb_network_offset(skb); - netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen)); + netoff = TPACKET_ALIGN(po->tp_hdrlen + + (maclen < 16 ? 16 : maclen)) + + po->tp_reserve; macoff = netoff - maclen; } @@ -618,9 +665,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe } spin_lock(&sk->sk_receive_queue.lock); - h = packet_lookup_frame(po, po->head); - - if (h->tp_status) + h.raw = packet_lookup_frame(po, po->head, TP_STATUS_KERNEL); + if (!h.raw) goto ring_is_full; po->head = po->head != po->frame_max ? po->head+1 : 0; po->stats.tp_packets++; @@ -632,20 +678,41 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe status &= ~TP_STATUS_LOSING; spin_unlock(&sk->sk_receive_queue.lock); - skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen); + skb_copy_bits(skb, 0, h.raw + macoff, snaplen); - h->tp_len = skb->len; - h->tp_snaplen = snaplen; - h->tp_mac = macoff; - h->tp_net = netoff; - if (skb->tstamp.tv64) - tv = ktime_to_timeval(skb->tstamp); - else - do_gettimeofday(&tv); - h->tp_sec = tv.tv_sec; - h->tp_usec = tv.tv_usec; + switch (po->tp_version) { + case TPACKET_V1: + h.h1->tp_len = skb->len; + h.h1->tp_snaplen = snaplen; + h.h1->tp_mac = macoff; + h.h1->tp_net = netoff; + if (skb->tstamp.tv64) + tv = ktime_to_timeval(skb->tstamp); + else + do_gettimeofday(&tv); + h.h1->tp_sec = tv.tv_sec; + h.h1->tp_usec = tv.tv_usec; + hdrlen = sizeof(*h.h1); + break; + case TPACKET_V2: + h.h2->tp_len = skb->len; + h.h2->tp_snaplen = snaplen; + h.h2->tp_mac = macoff; + h.h2->tp_net = netoff; + if (skb->tstamp.tv64) + ts = ktime_to_timespec(skb->tstamp); + else + getnstimeofday(&ts); + h.h2->tp_sec = ts.tv_sec; + h.h2->tp_nsec = ts.tv_nsec; + h.h2->tp_vlan_tci = skb->vlan_tci; + hdrlen = sizeof(*h.h2); + break; + default: + BUG(); + } - sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); + sll = h.raw + TPACKET_ALIGN(hdrlen); sll->sll_halen = dev_parse_header(skb, sll->sll_addr); sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; @@ -656,14 +723,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe else sll->sll_ifindex = dev->ifindex; - h->tp_status = status; + __packet_set_status(po, h.raw, status); smp_mb(); { struct page *p_start, *p_end; - u8 *h_end = (u8 *)h + macoff + snaplen - 1; + u8 *h_end = h.raw + macoff + snaplen - 1; - p_start = virt_to_page(h); + p_start = virt_to_page(h.raw); p_end = virt_to_page(h_end); while (p_start <= p_end) { flush_dcache_page(p_start); @@ -743,7 +810,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, if (len > dev->mtu+reserve) goto out_unlock; - skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), msg->msg_flags & MSG_DONTWAIT, &err); if (skb==NULL) goto out_unlock; @@ -1109,6 +1176,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); + aux.tp_vlan_tci = skb->vlan_tci; put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } @@ -1175,7 +1243,8 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, return 0; } -static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what) +static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, + int what) { switch (i->type) { case PACKET_MR_MULTICAST: @@ -1185,13 +1254,14 @@ static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int w dev_mc_delete(dev, i->addr, i->alen, 0); break; case PACKET_MR_PROMISC: - dev_set_promiscuity(dev, what); + return dev_set_promiscuity(dev, what); break; case PACKET_MR_ALLMULTI: - dev_set_allmulti(dev, what); + return dev_set_allmulti(dev, what); break; default:; } + return 0; } static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what) @@ -1245,7 +1315,11 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) i->count = 1; i->next = po->mclist; po->mclist = i; - packet_dev_mc(dev, i, +1); + err = packet_dev_mc(dev, i, 1); + if (err) { + po->mclist = i->next; + kfree(i); + } done: rtnl_unlock(); @@ -1358,6 +1432,38 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv pkt_sk(sk)->copy_thresh = val; return 0; } + case PACKET_VERSION: + { + int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (po->pg_vec) + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + switch (val) { + case TPACKET_V1: + case TPACKET_V2: + po->tp_version = val; + return 0; + default: + return -EINVAL; + } + } + case PACKET_RESERVE: + { + unsigned int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (po->pg_vec) + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + po->tp_reserve = val; + return 0; + } #endif case PACKET_AUXDATA: { @@ -1433,6 +1539,37 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, data = &val; break; +#ifdef CONFIG_PACKET_MMAP + case PACKET_VERSION: + if (len > sizeof(int)) + len = sizeof(int); + val = po->tp_version; + data = &val; + break; + case PACKET_HDRLEN: + if (len > sizeof(int)) + len = sizeof(int); + if (copy_from_user(&val, optval, len)) + return -EFAULT; + switch (val) { + case TPACKET_V1: + val = sizeof(struct tpacket_hdr); + break; + case TPACKET_V2: + val = sizeof(struct tpacket2_hdr); + break; + default: + return -EINVAL; + } + data = &val; + break; + case PACKET_RESERVE: + if (len > sizeof(unsigned int)) + len = sizeof(unsigned int); + val = po->tp_reserve; + data = &val; + break; +#endif default: return -ENOPROTOOPT; } @@ -1540,7 +1677,7 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: - if (sock_net(sk) != &init_net) + if (!net_eq(sock_net(sk), &init_net)) return -ENOIOCTLCMD; return inet_dgram_ops.ioctl(sock, cmd, arg); #endif @@ -1566,11 +1703,8 @@ static unsigned int packet_poll(struct file * file, struct socket *sock, spin_lock_bh(&sk->sk_receive_queue.lock); if (po->pg_vec) { unsigned last = po->head ? po->head-1 : po->frame_max; - struct tpacket_hdr *h; - - h = packet_lookup_frame(po, last); - if (h->tp_status) + if (packet_lookup_frame(po, last, TP_STATUS_USER)) mask |= POLLIN | POLLRDNORM; } spin_unlock_bh(&sk->sk_receive_queue.lock); @@ -1665,11 +1799,21 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing if (unlikely(po->pg_vec)) return -EBUSY; + switch (po->tp_version) { + case TPACKET_V1: + po->tp_hdrlen = TPACKET_HDRLEN; + break; + case TPACKET_V2: + po->tp_hdrlen = TPACKET2_HDRLEN; + break; + } + if (unlikely((int)req->tp_block_size <= 0)) return -EINVAL; if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) return -EINVAL; - if (unlikely(req->tp_frame_size < TPACKET_HDRLEN)) + if (unlikely(req->tp_frame_size < po->tp_hdrlen + + po->tp_reserve)) return -EINVAL; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) return -EINVAL; @@ -1688,13 +1832,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing goto out; for (i = 0; i < req->tp_block_nr; i++) { - char *ptr = pg_vec[i]; - struct tpacket_hdr *header; + void *ptr = pg_vec[i]; int k; for (k = 0; k < po->frames_per_block; k++) { - header = (struct tpacket_hdr *) ptr; - header->tp_status = TP_STATUS_KERNEL; + __packet_set_status(po, ptr, TP_STATUS_KERNEL); ptr += req->tp_frame_size; } } diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c index e4b051dbed61..8aa822730145 100644 --- a/net/rfkill/rfkill-input.c +++ b/net/rfkill/rfkill-input.c @@ -30,39 +30,62 @@ struct rfkill_task { spinlock_t lock; /* for accessing last and desired state */ unsigned long last; /* last schedule */ enum rfkill_state desired_state; /* on/off */ - enum rfkill_state current_state; /* on/off */ }; static void rfkill_task_handler(struct work_struct *work) { struct rfkill_task *task = container_of(work, struct rfkill_task, work); - enum rfkill_state state; mutex_lock(&task->mutex); - /* - * Use temp variable to fetch desired state to keep it - * consistent even if rfkill_schedule_toggle() runs in - * another thread or interrupts us. - */ - state = task->desired_state; + rfkill_switch_all(task->type, task->desired_state); - if (state != task->current_state) { - rfkill_switch_all(task->type, state); - task->current_state = state; + mutex_unlock(&task->mutex); +} + +static void rfkill_task_epo_handler(struct work_struct *work) +{ + rfkill_epo(); +} + +static DECLARE_WORK(epo_work, rfkill_task_epo_handler); + +static void rfkill_schedule_epo(void) +{ + schedule_work(&epo_work); +} + +static void rfkill_schedule_set(struct rfkill_task *task, + enum rfkill_state desired_state) +{ + unsigned long flags; + + if (unlikely(work_pending(&epo_work))) + return; + + spin_lock_irqsave(&task->lock, flags); + + if (time_after(jiffies, task->last + msecs_to_jiffies(200))) { + task->desired_state = desired_state; + task->last = jiffies; + schedule_work(&task->work); } - mutex_unlock(&task->mutex); + spin_unlock_irqrestore(&task->lock, flags); } static void rfkill_schedule_toggle(struct rfkill_task *task) { unsigned long flags; + if (unlikely(work_pending(&epo_work))) + return; + spin_lock_irqsave(&task->lock, flags); if (time_after(jiffies, task->last + msecs_to_jiffies(200))) { - task->desired_state = !task->desired_state; + task->desired_state = + rfkill_state_complement(task->desired_state); task->last = jiffies; schedule_work(&task->work); } @@ -70,26 +93,26 @@ static void rfkill_schedule_toggle(struct rfkill_task *task) spin_unlock_irqrestore(&task->lock, flags); } -#define DEFINE_RFKILL_TASK(n, t) \ - struct rfkill_task n = { \ - .work = __WORK_INITIALIZER(n.work, \ - rfkill_task_handler), \ - .type = t, \ - .mutex = __MUTEX_INITIALIZER(n.mutex), \ - .lock = __SPIN_LOCK_UNLOCKED(n.lock), \ - .desired_state = RFKILL_STATE_ON, \ - .current_state = RFKILL_STATE_ON, \ +#define DEFINE_RFKILL_TASK(n, t) \ + struct rfkill_task n = { \ + .work = __WORK_INITIALIZER(n.work, \ + rfkill_task_handler), \ + .type = t, \ + .mutex = __MUTEX_INITIALIZER(n.mutex), \ + .lock = __SPIN_LOCK_UNLOCKED(n.lock), \ + .desired_state = RFKILL_STATE_UNBLOCKED, \ } static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN); static DEFINE_RFKILL_TASK(rfkill_bt, RFKILL_TYPE_BLUETOOTH); static DEFINE_RFKILL_TASK(rfkill_uwb, RFKILL_TYPE_UWB); static DEFINE_RFKILL_TASK(rfkill_wimax, RFKILL_TYPE_WIMAX); +static DEFINE_RFKILL_TASK(rfkill_wwan, RFKILL_TYPE_WWAN); static void rfkill_event(struct input_handle *handle, unsigned int type, - unsigned int code, int down) + unsigned int code, int data) { - if (type == EV_KEY && down == 1) { + if (type == EV_KEY && data == 1) { switch (code) { case KEY_WLAN: rfkill_schedule_toggle(&rfkill_wlan); @@ -106,6 +129,28 @@ static void rfkill_event(struct input_handle *handle, unsigned int type, default: break; } + } else if (type == EV_SW) { + switch (code) { + case SW_RFKILL_ALL: + /* EVERY radio type. data != 0 means radios ON */ + /* handle EPO (emergency power off) through shortcut */ + if (data) { + rfkill_schedule_set(&rfkill_wwan, + RFKILL_STATE_UNBLOCKED); + rfkill_schedule_set(&rfkill_wimax, + RFKILL_STATE_UNBLOCKED); + rfkill_schedule_set(&rfkill_uwb, + RFKILL_STATE_UNBLOCKED); + rfkill_schedule_set(&rfkill_bt, + RFKILL_STATE_UNBLOCKED); + rfkill_schedule_set(&rfkill_wlan, + RFKILL_STATE_UNBLOCKED); + } else + rfkill_schedule_epo(); + break; + default: + break; + } } } @@ -168,6 +213,11 @@ static const struct input_device_id rfkill_ids[] = { .evbit = { BIT_MASK(EV_KEY) }, .keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) }, }, + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT, + .evbit = { BIT(EV_SW) }, + .swbit = { [BIT_WORD(SW_RFKILL_ALL)] = BIT_MASK(SW_RFKILL_ALL) }, + }, { } }; diff --git a/net/rfkill/rfkill-input.h b/net/rfkill/rfkill-input.h index 4dae5006fc77..f63d05045685 100644 --- a/net/rfkill/rfkill-input.h +++ b/net/rfkill/rfkill-input.h @@ -12,5 +12,6 @@ #define __RFKILL_INPUT_H void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state); +void rfkill_epo(void); #endif /* __RFKILL_INPUT_H */ diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 4e10a95de832..7a560b785097 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -39,8 +39,56 @@ MODULE_LICENSE("GPL"); static LIST_HEAD(rfkill_list); /* list of registered rf switches */ static DEFINE_MUTEX(rfkill_mutex); +static unsigned int rfkill_default_state = RFKILL_STATE_UNBLOCKED; +module_param_named(default_state, rfkill_default_state, uint, 0444); +MODULE_PARM_DESC(default_state, + "Default initial state for all radio types, 0 = radio off"); + static enum rfkill_state rfkill_states[RFKILL_TYPE_MAX]; +static BLOCKING_NOTIFIER_HEAD(rfkill_notifier_list); + + +/** + * register_rfkill_notifier - Add notifier to rfkill notifier chain + * @nb: pointer to the new entry to add to the chain + * + * See blocking_notifier_chain_register() for return value and further + * observations. + * + * Adds a notifier to the rfkill notifier chain. The chain will be + * called with a pointer to the relevant rfkill structure as a parameter, + * refer to include/linux/rfkill.h for the possible events. + * + * Notifiers added to this chain are to always return NOTIFY_DONE. This + * chain is a blocking notifier chain: notifiers can sleep. + * + * Calls to this chain may have been done through a workqueue. One must + * assume unordered asynchronous behaviour, there is no way to know if + * actions related to the event that generated the notification have been + * carried out already. + */ +int register_rfkill_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&rfkill_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(register_rfkill_notifier); + +/** + * unregister_rfkill_notifier - remove notifier from rfkill notifier chain + * @nb: pointer to the entry to remove from the chain + * + * See blocking_notifier_chain_unregister() for return value and further + * observations. + * + * Removes a notifier from the rfkill notifier chain. + */ +int unregister_rfkill_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&rfkill_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(unregister_rfkill_notifier); + static void rfkill_led_trigger(struct rfkill *rfkill, enum rfkill_state state) @@ -50,24 +98,101 @@ static void rfkill_led_trigger(struct rfkill *rfkill, if (!led->name) return; - if (state == RFKILL_STATE_OFF) + if (state != RFKILL_STATE_UNBLOCKED) led_trigger_event(led, LED_OFF); else led_trigger_event(led, LED_FULL); #endif /* CONFIG_RFKILL_LEDS */ } +static void notify_rfkill_state_change(struct rfkill *rfkill) +{ + blocking_notifier_call_chain(&rfkill_notifier_list, + RFKILL_STATE_CHANGED, + rfkill); +} + +static void update_rfkill_state(struct rfkill *rfkill) +{ + enum rfkill_state newstate, oldstate; + + if (rfkill->get_state) { + mutex_lock(&rfkill->mutex); + if (!rfkill->get_state(rfkill->data, &newstate)) { + oldstate = rfkill->state; + rfkill->state = newstate; + if (oldstate != newstate) + notify_rfkill_state_change(rfkill); + } + mutex_unlock(&rfkill->mutex); + } +} + +/** + * rfkill_toggle_radio - wrapper for toggle_radio hook + * + * @rfkill: the rfkill struct to use + * @force: calls toggle_radio even if cache says it is not needed, + * and also makes sure notifications of the state will be + * sent even if it didn't change + * @state: the new state to call toggle_radio() with + * + * Calls rfkill->toggle_radio, enforcing the API for toggle_radio + * calls and handling all the red tape such as issuing notifications + * if the call is successful. + * + * Note that @force cannot override a (possibly cached) state of + * RFKILL_STATE_HARD_BLOCKED. Any device making use of + * RFKILL_STATE_HARD_BLOCKED implements either get_state() or + * rfkill_force_state(), so the cache either is bypassed or valid. + * + * Note that we do call toggle_radio for RFKILL_STATE_SOFT_BLOCKED + * even if the radio is in RFKILL_STATE_HARD_BLOCKED state, so as to + * give the driver a hint that it should double-BLOCK the transmitter. + * + * Caller must have aquired rfkill_mutex. + */ static int rfkill_toggle_radio(struct rfkill *rfkill, - enum rfkill_state state) + enum rfkill_state state, + int force) { int retval = 0; + enum rfkill_state oldstate, newstate; + + oldstate = rfkill->state; + + if (rfkill->get_state && !force && + !rfkill->get_state(rfkill->data, &newstate)) + rfkill->state = newstate; + + switch (state) { + case RFKILL_STATE_HARD_BLOCKED: + /* typically happens when refreshing hardware state, + * such as on resume */ + state = RFKILL_STATE_SOFT_BLOCKED; + break; + case RFKILL_STATE_UNBLOCKED: + /* force can't override this, only rfkill_force_state() can */ + if (rfkill->state == RFKILL_STATE_HARD_BLOCKED) + return -EPERM; + break; + case RFKILL_STATE_SOFT_BLOCKED: + /* nothing to do, we want to give drivers the hint to double + * BLOCK even a transmitter that is already in state + * RFKILL_STATE_HARD_BLOCKED */ + break; + } - if (state != rfkill->state) { + if (force || state != rfkill->state) { retval = rfkill->toggle_radio(rfkill->data, state); - if (!retval) { + /* never allow a HARD->SOFT downgrade! */ + if (!retval && rfkill->state != RFKILL_STATE_HARD_BLOCKED) rfkill->state = state; - rfkill_led_trigger(rfkill, state); - } + } + + if (force || rfkill->state != oldstate) { + rfkill_led_trigger(rfkill, rfkill->state); + notify_rfkill_state_change(rfkill); } return retval; @@ -82,7 +207,6 @@ static int rfkill_toggle_radio(struct rfkill *rfkill, * a specific switch is claimed by userspace in which case it is * left alone. */ - void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state) { struct rfkill *rfkill; @@ -93,13 +217,66 @@ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state) list_for_each_entry(rfkill, &rfkill_list, node) { if ((!rfkill->user_claim) && (rfkill->type == type)) - rfkill_toggle_radio(rfkill, state); + rfkill_toggle_radio(rfkill, state, 0); } mutex_unlock(&rfkill_mutex); } EXPORT_SYMBOL(rfkill_switch_all); +/** + * rfkill_epo - emergency power off all transmitters + * + * This kicks all rfkill devices to RFKILL_STATE_SOFT_BLOCKED, ignoring + * everything in its path but rfkill_mutex. + */ +void rfkill_epo(void) +{ + struct rfkill *rfkill; + + mutex_lock(&rfkill_mutex); + list_for_each_entry(rfkill, &rfkill_list, node) { + rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1); + } + mutex_unlock(&rfkill_mutex); +} +EXPORT_SYMBOL_GPL(rfkill_epo); + +/** + * rfkill_force_state - Force the internal rfkill radio state + * @rfkill: pointer to the rfkill class to modify. + * @state: the current radio state the class should be forced to. + * + * This function updates the internal state of the radio cached + * by the rfkill class. It should be used when the driver gets + * a notification by the firmware/hardware of the current *real* + * state of the radio rfkill switch. + * + * It may not be called from an atomic context. + */ +int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state) +{ + enum rfkill_state oldstate; + + if (state != RFKILL_STATE_SOFT_BLOCKED && + state != RFKILL_STATE_UNBLOCKED && + state != RFKILL_STATE_HARD_BLOCKED) + return -EINVAL; + + mutex_lock(&rfkill->mutex); + + oldstate = rfkill->state; + rfkill->state = state; + + if (state != oldstate) + notify_rfkill_state_change(rfkill); + + mutex_unlock(&rfkill->mutex); + + return 0; +} +EXPORT_SYMBOL(rfkill_force_state); + static ssize_t rfkill_name_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -109,31 +286,31 @@ static ssize_t rfkill_name_show(struct device *dev, return sprintf(buf, "%s\n", rfkill->name); } -static ssize_t rfkill_type_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static const char *rfkill_get_type_str(enum rfkill_type type) { - struct rfkill *rfkill = to_rfkill(dev); - const char *type; - - switch (rfkill->type) { + switch (type) { case RFKILL_TYPE_WLAN: - type = "wlan"; - break; + return "wlan"; case RFKILL_TYPE_BLUETOOTH: - type = "bluetooth"; - break; + return "bluetooth"; case RFKILL_TYPE_UWB: - type = "ultrawideband"; - break; + return "ultrawideband"; case RFKILL_TYPE_WIMAX: - type = "wimax"; - break; + return "wimax"; + case RFKILL_TYPE_WWAN: + return "wwan"; default: BUG(); } +} + +static ssize_t rfkill_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rfkill *rfkill = to_rfkill(dev); - return sprintf(buf, "%s\n", type); + return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type)); } static ssize_t rfkill_state_show(struct device *dev, @@ -142,6 +319,7 @@ static ssize_t rfkill_state_show(struct device *dev, { struct rfkill *rfkill = to_rfkill(dev); + update_rfkill_state(rfkill); return sprintf(buf, "%d\n", rfkill->state); } @@ -156,10 +334,14 @@ static ssize_t rfkill_state_store(struct device *dev, if (!capable(CAP_NET_ADMIN)) return -EPERM; + /* RFKILL_STATE_HARD_BLOCKED is illegal here... */ + if (state != RFKILL_STATE_UNBLOCKED && + state != RFKILL_STATE_SOFT_BLOCKED) + return -EINVAL; + if (mutex_lock_interruptible(&rfkill->mutex)) return -ERESTARTSYS; - error = rfkill_toggle_radio(rfkill, - state ? RFKILL_STATE_ON : RFKILL_STATE_OFF); + error = rfkill_toggle_radio(rfkill, state, 0); mutex_unlock(&rfkill->mutex); return error ? error : count; @@ -200,7 +382,8 @@ static ssize_t rfkill_claim_store(struct device *dev, if (rfkill->user_claim != claim) { if (!claim) rfkill_toggle_radio(rfkill, - rfkill_states[rfkill->type]); + rfkill_states[rfkill->type], + 0); rfkill->user_claim = claim; } @@ -233,12 +416,12 @@ static int rfkill_suspend(struct device *dev, pm_message_t state) if (dev->power.power_state.event != state.event) { if (state.event & PM_EVENT_SLEEP) { - mutex_lock(&rfkill->mutex); - - if (rfkill->state == RFKILL_STATE_ON) - rfkill->toggle_radio(rfkill->data, - RFKILL_STATE_OFF); + /* Stop transmitter, keep state, no notifies */ + update_rfkill_state(rfkill); + mutex_lock(&rfkill->mutex); + rfkill->toggle_radio(rfkill->data, + RFKILL_STATE_SOFT_BLOCKED); mutex_unlock(&rfkill->mutex); } @@ -255,8 +438,8 @@ static int rfkill_resume(struct device *dev) if (dev->power.power_state.event != PM_EVENT_ON) { mutex_lock(&rfkill->mutex); - if (rfkill->state == RFKILL_STATE_ON) - rfkill->toggle_radio(rfkill->data, RFKILL_STATE_ON); + /* restore radio state AND notify everybody */ + rfkill_toggle_radio(rfkill, rfkill->state, 1); mutex_unlock(&rfkill->mutex); } @@ -269,34 +452,71 @@ static int rfkill_resume(struct device *dev) #define rfkill_resume NULL #endif +static int rfkill_blocking_uevent_notifier(struct notifier_block *nb, + unsigned long eventid, + void *data) +{ + struct rfkill *rfkill = (struct rfkill *)data; + + switch (eventid) { + case RFKILL_STATE_CHANGED: + kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); + break; + default: + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block rfkill_blocking_uevent_nb = { + .notifier_call = rfkill_blocking_uevent_notifier, + .priority = 0, +}; + +static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct rfkill *rfkill = to_rfkill(dev); + int error; + + error = add_uevent_var(env, "RFKILL_NAME=%s", rfkill->name); + if (error) + return error; + error = add_uevent_var(env, "RFKILL_TYPE=%s", + rfkill_get_type_str(rfkill->type)); + if (error) + return error; + error = add_uevent_var(env, "RFKILL_STATE=%d", rfkill->state); + return error; +} + static struct class rfkill_class = { .name = "rfkill", .dev_release = rfkill_release, .dev_attrs = rfkill_dev_attrs, .suspend = rfkill_suspend, .resume = rfkill_resume, + .dev_uevent = rfkill_dev_uevent, }; static int rfkill_add_switch(struct rfkill *rfkill) { - int error; - mutex_lock(&rfkill_mutex); - error = rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type]); - if (!error) - list_add_tail(&rfkill->node, &rfkill_list); + rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type], 0); + + list_add_tail(&rfkill->node, &rfkill_list); mutex_unlock(&rfkill_mutex); - return error; + return 0; } static void rfkill_remove_switch(struct rfkill *rfkill) { mutex_lock(&rfkill_mutex); list_del_init(&rfkill->node); - rfkill_toggle_radio(rfkill, RFKILL_STATE_OFF); + rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1); mutex_unlock(&rfkill_mutex); } @@ -412,7 +632,7 @@ int rfkill_register(struct rfkill *rfkill) EXPORT_SYMBOL(rfkill_register); /** - * rfkill_unregister - Uegister a rfkill structure. + * rfkill_unregister - Unregister a rfkill structure. * @rfkill: rfkill structure to be unregistered * * This function should be called by the network driver during device @@ -436,8 +656,13 @@ static int __init rfkill_init(void) int error; int i; + /* RFKILL_STATE_HARD_BLOCKED is illegal here... */ + if (rfkill_default_state != RFKILL_STATE_SOFT_BLOCKED && + rfkill_default_state != RFKILL_STATE_UNBLOCKED) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(rfkill_states); i++) - rfkill_states[i] = RFKILL_STATE_ON; + rfkill_states[i] = rfkill_default_state; error = class_register(&rfkill_class); if (error) { @@ -445,11 +670,14 @@ static int __init rfkill_init(void) return error; } + register_rfkill_notifier(&rfkill_blocking_uevent_nb); + return 0; } static void __exit rfkill_exit(void) { + unregister_rfkill_notifier(&rfkill_blocking_uevent_nb); class_unregister(&rfkill_class); } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 1ebf65294405..a7f1ce11bc22 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -74,6 +74,20 @@ ax25_address rose_callsign; * separate class since they always nest. */ static struct lock_class_key rose_netdev_xmit_lock_key; +static struct lock_class_key rose_netdev_addr_lock_key; + +static void rose_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key); +} + +static void rose_set_lockdep_key(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key); + netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL); +} /* * Convert a ROSE address into text. @@ -197,7 +211,7 @@ static int rose_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event != NETDEV_DOWN) @@ -566,13 +580,11 @@ static struct sock *rose_make_new(struct sock *osk) #endif sk->sk_type = osk->sk_type; - sk->sk_socket = osk->sk_socket; sk->sk_priority = osk->sk_priority; sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; - sk->sk_sleep = osk->sk_sleep; sock_copy_flags(sk, osk); init_timer(&rose->timer); @@ -759,7 +771,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le sock->state = SS_UNCONNECTED; rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, - &diagnostic); + &diagnostic, 0); if (!rose->neighbour) { err = -ENETUNREACH; goto out_release; @@ -855,7 +867,7 @@ rose_try_next_neigh: if (sk->sk_state != TCP_ESTABLISHED) { /* Try next neighbour */ - rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic); + rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic, 0); if (rose->neighbour) goto rose_try_next_neigh; @@ -924,14 +936,12 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags) goto out_release; newsk = skb->sk; - newsk->sk_socket = newsock; - newsk->sk_sleep = &newsock->wait; + sock_graft(newsk, newsock); /* Now attach up the new socket */ skb->sk = NULL; kfree_skb(skb); sk->sk_ack_backlog--; - newsock->sk = newsk; out_release: release_sock(sk); @@ -1580,7 +1590,7 @@ static int __init rose_proto_init(void) free_netdev(dev); goto fail; } - lockdep_set_class(&dev->_xmit_lock, &rose_netdev_xmit_lock_key); + rose_set_lockdep_key(dev); dev_rose[i] = dev; } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index bd593871c81e..a81066a1010a 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -662,27 +662,34 @@ struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neig } /* - * Find a neighbour given a ROSE address. + * Find a neighbour or a route given a ROSE address. */ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, - unsigned char *diagnostic) + unsigned char *diagnostic, int new) { struct rose_neigh *res = NULL; struct rose_node *node; int failed = 0; int i; - spin_lock_bh(&rose_node_list_lock); + if (!new) spin_lock_bh(&rose_node_list_lock); for (node = rose_node_list; node != NULL; node = node->next) { if (rosecmpm(addr, &node->address, node->mask) == 0) { for (i = 0; i < node->count; i++) { - if (!rose_ftimer_running(node->neighbour[i])) { - res = node->neighbour[i]; - goto out; - } else - failed = 1; + if (new) { + if (node->neighbour[i]->restarted) { + res = node->neighbour[i]; + goto out; + } + } + else { + if (!rose_ftimer_running(node->neighbour[i])) { + res = node->neighbour[i]; + goto out; + } else + failed = 1; + } } - break; } } @@ -695,7 +702,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, } out: - spin_unlock_bh(&rose_node_list_lock); + if (!new) spin_unlock_bh(&rose_node_list_lock); return res; } @@ -1018,7 +1025,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) rose_route = rose_route->next; } - if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic)) == NULL) { + if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic, 1)) == NULL) { rose_transmit_clear_request(rose_neigh, lci, cause, diagnostic); goto out; } diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 4b2682feeedc..32e489118beb 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -660,9 +660,9 @@ static void rxrpc_sock_destructor(struct sock *sk) rxrpc_purge_queue(&sk->sk_receive_queue); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(sk_unhashed(sk)); - BUG_TRAP(!sk->sk_socket); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(!sk_unhashed(sk)); + WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive rxrpc socket: %p\n", sk); diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index f8a699e92962..f98c8027e5c1 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -21,6 +21,7 @@ #include <net/af_rxrpc.h> #include <net/ip.h> #include <net/udp.h> +#include <net/net_namespace.h> #include "ar-internal.h" unsigned long rxrpc_ack_timeout = 1; @@ -708,12 +709,12 @@ void rxrpc_data_ready(struct sock *sk, int count) if (skb_checksum_complete(skb)) { rxrpc_free_skb(skb); rxrpc_put_local(local); - UDP_INC_STATS_BH(UDP_MIB_INERRORS, 0); + UDP_INC_STATS_BH(&init_net, UDP_MIB_INERRORS, 0); _leave(" [CSUM failed]"); return; } - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, 0); + UDP_INC_STATS_BH(&init_net, UDP_MIB_INDATAGRAMS, 0); /* the socket buffer we have is owned by UDP, with UDP's data all over * it, but we really want our own */ diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 82adfe6447d7..9437b27ff84d 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -106,17 +106,6 @@ config NET_SCH_PRIO To compile this code as a module, choose M here: the module will be called sch_prio. -config NET_SCH_RR - tristate "Multi Band Round Robin Queuing (RR)" - select NET_SCH_PRIO - ---help--- - Say Y here if you want to use an n-band round robin packet - scheduler. - - The module uses sch_prio for its framework and is aliased as - sch_rr, so it will load sch_prio, although it is referred - to using sch_rr. - config NET_SCH_RED tristate "Random Early Detection (RED)" ---help--- diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 74e662cbb2c5..d308c19aa3f9 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -41,7 +41,7 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) return; } } - BUG_TRAP(0); + WARN_ON(1); } EXPORT_SYMBOL(tcf_hash_destroy); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 422872c4f14b..ac04289da5d7 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -139,7 +139,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result #else action = gact->tcf_action; #endif - gact->tcf_bstats.bytes += skb->len; + gact->tcf_bstats.bytes += qdisc_pkt_len(skb); gact->tcf_bstats.packets++; if (action == TC_ACT_SHOT) gact->tcf_qstats.drops++; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index da696fd3e341..d1263b3c96c3 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -205,7 +205,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, spin_lock(&ipt->tcf_lock); ipt->tcf_tm.lastuse = jiffies; - ipt->tcf_bstats.bytes += skb->len; + ipt->tcf_bstats.bytes += qdisc_pkt_len(skb); ipt->tcf_bstats.packets++; /* yes, we have to worry about both in and out dev diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1aff005d95cd..70341c020b6d 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -164,7 +164,7 @@ bad_mirred: if (skb2 != NULL) kfree_skb(skb2); m->tcf_qstats.overlimits++; - m->tcf_bstats.bytes += skb->len; + m->tcf_bstats.bytes += qdisc_pkt_len(skb); m->tcf_bstats.packets++; spin_unlock(&m->tcf_lock); /* should we be asking for packet to be dropped? @@ -184,7 +184,7 @@ bad_mirred: goto bad_mirred; } - m->tcf_bstats.bytes += skb2->len; + m->tcf_bstats.bytes += qdisc_pkt_len(skb2); m->tcf_bstats.packets++; if (!(at & AT_EGRESS)) if (m->tcfm_ok_push) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 0a3c8339767a..7b39ed485bca 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -124,7 +124,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, egress = p->flags & TCA_NAT_FLAG_EGRESS; action = p->tcf_action; - p->tcf_bstats.bytes += skb->len; + p->tcf_bstats.bytes += qdisc_pkt_len(skb); p->tcf_bstats.packets++; spin_unlock(&p->tcf_lock); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 3cc4cb9e500e..d5f4e3404864 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -182,7 +182,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, bad: p->tcf_qstats.overlimits++; done: - p->tcf_bstats.bytes += skb->len; + p->tcf_bstats.bytes += qdisc_pkt_len(skb); p->tcf_bstats.packets++; spin_unlock(&p->tcf_lock); return p->tcf_action; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0898120bbcc0..38015b493947 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -116,7 +116,7 @@ static void tcf_police_destroy(struct tcf_police *p) return; } } - BUG_TRAP(0); + WARN_ON(1); } static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { @@ -272,7 +272,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, spin_lock(&police->tcf_lock); - police->tcf_bstats.bytes += skb->len; + police->tcf_bstats.bytes += qdisc_pkt_len(skb); police->tcf_bstats.packets++; if (police->tcfp_ewma_rate && @@ -282,7 +282,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, return police->tcf_action; } - if (skb->len <= police->tcfp_mtu) { + if (qdisc_pkt_len(skb) <= police->tcfp_mtu) { if (police->tcfp_R_tab == NULL) { spin_unlock(&police->tcf_lock); return police->tcfp_result; @@ -295,12 +295,12 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, ptoks = toks + police->tcfp_ptoks; if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) ptoks = (long)L2T_P(police, police->tcfp_mtu); - ptoks -= L2T_P(police, skb->len); + ptoks -= L2T_P(police, qdisc_pkt_len(skb)); } toks += police->tcfp_toks; if (toks > (long)police->tcfp_burst) toks = police->tcfp_burst; - toks -= L2T(police, skb->len); + toks -= L2T(police, qdisc_pkt_len(skb)); if ((toks|ptoks) >= 0) { police->tcfp_t_c = now; police->tcfp_toks = toks; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 64b2d136c78e..e7851ce92cfe 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -6,7 +6,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Authors: Jamal Hadi Salim (2005) + * Authors: Jamal Hadi Salim (2005-8) * */ @@ -34,13 +34,14 @@ static struct tcf_hashinfo simp_hash_info = { .lock = &simp_lock, }; +#define SIMP_MAX_DATA 32 static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { struct tcf_defact *d = a->priv; spin_lock(&d->tcf_lock); d->tcf_tm.lastuse = jiffies; - d->tcf_bstats.bytes += skb->len; + d->tcf_bstats.bytes += qdisc_pkt_len(skb); d->tcf_bstats.packets++; /* print policy string followed by _ then packet count @@ -69,23 +70,28 @@ static int tcf_simp_release(struct tcf_defact *d, int bind) return ret; } -static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) +static int alloc_defdata(struct tcf_defact *d, char *defdata) { - d->tcfd_defdata = kmemdup(defdata, datalen, GFP_KERNEL); + d->tcfd_defdata = kstrndup(defdata, SIMP_MAX_DATA, GFP_KERNEL); if (unlikely(!d->tcfd_defdata)) return -ENOMEM; - d->tcfd_datalen = datalen; + return 0; } -static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) +static void reset_policy(struct tcf_defact *d, char *defdata, + struct tc_defact *p) { - kfree(d->tcfd_defdata); - return alloc_defdata(d, datalen, defdata); + spin_lock_bh(&d->tcf_lock); + d->tcf_action = p->action; + memset(d->tcfd_defdata, 0, SIMP_MAX_DATA); + strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); + spin_unlock_bh(&d->tcf_lock); } static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { [TCA_DEF_PARMS] = { .len = sizeof(struct tc_defact) }, + [TCA_DEF_DATA] = { .type = NLA_STRING, .len = SIMP_MAX_DATA }, }; static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, @@ -95,28 +101,24 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, struct tc_defact *parm; struct tcf_defact *d; struct tcf_common *pc; - void *defdata; - u32 datalen = 0; + char *defdata; int ret = 0, err; if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_DEF_MAX, nla, NULL); + err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy); if (err < 0) return err; if (tb[TCA_DEF_PARMS] == NULL) return -EINVAL; - parm = nla_data(tb[TCA_DEF_PARMS]); - defdata = nla_data(tb[TCA_DEF_DATA]); - if (defdata == NULL) + if (tb[TCA_DEF_DATA] == NULL) return -EINVAL; - datalen = nla_len(tb[TCA_DEF_DATA]); - if (datalen == 0) - return -EINVAL; + parm = nla_data(tb[TCA_DEF_PARMS]); + defdata = nla_data(tb[TCA_DEF_DATA]); pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info); if (!pc) { @@ -126,11 +128,12 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, return -ENOMEM; d = to_defact(pc); - ret = alloc_defdata(d, datalen, defdata); + ret = alloc_defdata(d, defdata); if (ret < 0) { kfree(pc); return ret; } + d->tcf_action = parm->action; ret = ACT_P_CREATED; } else { d = to_defact(pc); @@ -138,13 +141,9 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, tcf_simp_release(d, bind); return -EEXIST; } - realloc_defdata(d, datalen, defdata); + reset_policy(d, defdata, parm); } - spin_lock_bh(&d->tcf_lock); - d->tcf_action = parm->action; - spin_unlock_bh(&d->tcf_lock); - if (ret == ACT_P_CREATED) tcf_hash_insert(pc, &simp_hash_info); return ret; @@ -172,7 +171,7 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, opt.bindcnt = d->tcf_bindcnt - bind; opt.action = d->tcf_action; NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); - NLA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata); + NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata); t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(d->tcf_tm.expires); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1086df7478bc..d2b6f54a6261 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -120,6 +120,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; + spinlock_t *root_lock; struct tcmsg *t; u32 protocol; u32 prio; @@ -166,7 +167,8 @@ replay: /* Find qdisc */ if (!parent) { - q = dev->qdisc_sleeping; + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); + q = dev_queue->qdisc_sleeping; parent = q->handle; } else { q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); @@ -203,6 +205,8 @@ replay: } } + root_lock = qdisc_root_lock(q); + if (tp == NULL) { /* Proto-tcf does not exist, create new one */ @@ -220,7 +224,7 @@ replay: tp = kzalloc(sizeof(*tp), GFP_KERNEL); if (tp == NULL) goto errout; - err = -EINVAL; + err = -ENOENT; tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); if (tp_ops == NULL) { #ifdef CONFIG_KMOD @@ -262,10 +266,10 @@ replay: goto errout; } - qdisc_lock_tree(dev); + spin_lock_bh(root_lock); tp->next = *back; *back = tp; - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) goto errout; @@ -274,9 +278,9 @@ replay: if (fh == 0) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { - qdisc_lock_tree(dev); + spin_lock_bh(root_lock); *back = tp->next; - qdisc_unlock_tree(dev); + spin_lock_bh(root_lock); tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER); tcf_destroy(tp); @@ -334,7 +338,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad1 = 0; - tcm->tcm_ifindex = tp->q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; tcm->tcm_parent = tp->classid; tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); NLA_PUT_STRING(skb, TCA_KIND, tp->ops->kind); @@ -390,6 +394,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + struct netdev_queue *dev_queue; int t; int s_t; struct net_device *dev; @@ -408,8 +413,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; + dev_queue = netdev_get_tx_queue(dev, 0); if (!tcm->tcm_parent) - q = dev->qdisc_sleeping; + q = dev_queue->qdisc_sleeping; else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 971b867e0484..8f63a1a94014 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -36,6 +36,8 @@ struct flow_filter { struct list_head list; struct tcf_exts exts; struct tcf_ematch_tree ematches; + struct timer_list perturb_timer; + u32 perturb_period; u32 handle; u32 nkeys; @@ -47,11 +49,9 @@ struct flow_filter { u32 addend; u32 divisor; u32 baseclass; + u32 hashrnd; }; -static u32 flow_hashrnd __read_mostly; -static int flow_hashrnd_initted __read_mostly; - static const struct tcf_ext_map flow_ext_map = { .action = TCA_FLOW_ACT, .police = TCA_FLOW_POLICE, @@ -348,7 +348,7 @@ static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp, } if (f->mode == FLOW_MODE_HASH) - classid = jhash2(keys, f->nkeys, flow_hashrnd); + classid = jhash2(keys, f->nkeys, f->hashrnd); else { classid = keys[0]; classid = (classid & f->mask) ^ f->xor; @@ -369,6 +369,15 @@ static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp, return -1; } +static void flow_perturbation(unsigned long arg) +{ + struct flow_filter *f = (struct flow_filter *)arg; + + get_random_bytes(&f->hashrnd, 4); + if (f->perturb_period) + mod_timer(&f->perturb_timer, jiffies + f->perturb_period); +} + static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_KEYS] = { .type = NLA_U32 }, [TCA_FLOW_MODE] = { .type = NLA_U32 }, @@ -381,6 +390,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_ACT] = { .type = NLA_NESTED }, [TCA_FLOW_POLICE] = { .type = NLA_NESTED }, [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED }, + [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; static int flow_change(struct tcf_proto *tp, unsigned long base, @@ -394,6 +404,7 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, struct tcf_exts e; struct tcf_ematch_tree t; unsigned int nkeys = 0; + unsigned int perturb_period = 0; u32 baseclass = 0; u32 keymask = 0; u32 mode; @@ -442,6 +453,14 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; + + if (mode == FLOW_MODE_HASH) + perturb_period = f->perturb_period; + if (tb[TCA_FLOW_PERTURB]) { + if (mode != FLOW_MODE_HASH) + goto err2; + perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; + } } else { err = -EINVAL; if (!handle) @@ -455,6 +474,12 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; + if (tb[TCA_FLOW_PERTURB]) { + if (mode != FLOW_MODE_HASH) + goto err2; + perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; + } + if (TC_H_MAJ(baseclass) == 0) baseclass = TC_H_MAKE(tp->q->handle, baseclass); if (TC_H_MIN(baseclass) == 0) @@ -467,6 +492,11 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, f->handle = handle; f->mask = ~0U; + + get_random_bytes(&f->hashrnd, 4); + f->perturb_timer.function = flow_perturbation; + f->perturb_timer.data = (unsigned long)f; + init_timer_deferrable(&f->perturb_timer); } tcf_exts_change(tp, &f->exts, &e); @@ -495,6 +525,11 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, if (baseclass) f->baseclass = baseclass; + f->perturb_period = perturb_period; + del_timer(&f->perturb_timer); + if (perturb_period) + mod_timer(&f->perturb_timer, jiffies + perturb_period); + if (*arg == 0) list_add_tail(&f->list, &head->filters); @@ -512,6 +547,7 @@ err1: static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f) { + del_timer_sync(&f->perturb_timer); tcf_exts_destroy(tp, &f->exts); tcf_em_tree_destroy(tp, &f->ematches); kfree(f); @@ -532,11 +568,6 @@ static int flow_init(struct tcf_proto *tp) { struct flow_head *head; - if (!flow_hashrnd_initted) { - get_random_bytes(&flow_hashrnd, 4); - flow_hashrnd_initted = 1; - } - head = kzalloc(sizeof(*head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; @@ -605,6 +636,9 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh, if (f->baseclass) NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass); + if (f->perturb_period) + NLA_PUT_U32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ); + if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0) goto nla_put_failure; #ifdef CONFIG_NET_EMATCH diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 784dcb870b98..481260a4f10f 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -73,11 +73,13 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif) } static inline -void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id) +void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) { - qdisc_lock_tree(dev); + spinlock_t *root_lock = qdisc_root_lock(q); + + spin_lock_bh(root_lock); memset(head->fastmap, 0, sizeof(head->fastmap)); - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); } static inline void @@ -302,7 +304,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) *fp = f->next; tcf_tree_unlock(tp); - route4_reset_fastmap(tp->q->dev, head, f->id); + route4_reset_fastmap(tp->q, head, f->id); route4_delete_filter(tp, f); /* Strip tree */ @@ -500,7 +502,7 @@ reinsert: } tcf_tree_unlock(tp); - route4_reset_fastmap(tp->q->dev, head, f->id); + route4_reset_fastmap(tp->q, head, f->id); *arg = (unsigned long)f; return 0; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4d755444c449..246f9065ce34 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -75,7 +75,6 @@ struct tc_u_hnode struct tc_u_common { - struct tc_u_common *next; struct tc_u_hnode *hlist; struct Qdisc *q; int refcnt; @@ -87,8 +86,6 @@ static const struct tcf_ext_map u32_ext_map = { .police = TCA_U32_POLICE }; -static struct tc_u_common *u32_list; - static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift) { unsigned h = ntohl(key & sel->hmask)>>fshift; @@ -287,9 +284,7 @@ static int u32_init(struct tcf_proto *tp) struct tc_u_hnode *root_ht; struct tc_u_common *tp_c; - for (tp_c = u32_list; tp_c; tp_c = tp_c->next) - if (tp_c->q == tp->q) - break; + tp_c = tp->q->u32_node; root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL); if (root_ht == NULL) @@ -307,8 +302,7 @@ static int u32_init(struct tcf_proto *tp) return -ENOBUFS; } tp_c->q = tp->q; - tp_c->next = u32_list; - u32_list = tp_c; + tp->q->u32_node = tp_c; } tp_c->refcnt++; @@ -351,7 +345,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key) } } } - BUG_TRAP(0); + WARN_ON(1); return 0; } @@ -374,7 +368,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) struct tc_u_common *tp_c = tp->data; struct tc_u_hnode **hn; - BUG_TRAP(!ht->refcnt); + WARN_ON(ht->refcnt); u32_clear_hnode(tp, ht); @@ -386,7 +380,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) } } - BUG_TRAP(0); + WARN_ON(1); return -ENOENT; } @@ -395,21 +389,15 @@ static void u32_destroy(struct tcf_proto *tp) struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = xchg(&tp->root, NULL); - BUG_TRAP(root_ht != NULL); + WARN_ON(root_ht == NULL); if (root_ht && --root_ht->refcnt == 0) u32_destroy_hnode(tp, root_ht); if (--tp_c->refcnt == 0) { struct tc_u_hnode *ht; - struct tc_u_common **tp_cp; - for (tp_cp = &u32_list; *tp_cp; tp_cp = &(*tp_cp)->next) { - if (*tp_cp == tp_c) { - *tp_cp = tp_c->next; - break; - } - } + tp->q->u32_node = NULL; for (ht = tp_c->hlist; ht; ht = ht->next) { ht->refcnt--; @@ -419,7 +407,7 @@ static void u32_destroy(struct tcf_proto *tp) while ((ht = tp_c->hlist) != NULL) { tp_c->hlist = ht->next; - BUG_TRAP(ht->refcnt == 0); + WARN_ON(ht->refcnt != 0); kfree(ht); } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c40773cdbe45..b0601642e227 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -99,7 +99,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, ---requeue requeues once dequeued packet. It is used for non-standard or - just buggy devices, which can defer output even if dev->tbusy=0. + just buggy devices, which can defer output even if netif_queue_stopped()=0. ---reset @@ -185,11 +185,20 @@ EXPORT_SYMBOL(unregister_qdisc); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { - struct Qdisc *q; + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + struct Qdisc *q, *txq_root = txq->qdisc; + + if (!(txq_root->flags & TCQ_F_BUILTIN) && + txq_root->handle == handle) + return txq_root; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (q->handle == handle) - return q; + list_for_each_entry(q, &txq_root->list, list) { + if (q->handle == handle) + return q; + } } return NULL; } @@ -277,15 +286,137 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) } EXPORT_SYMBOL(qdisc_put_rtab); +static LIST_HEAD(qdisc_stab_list); +static DEFINE_SPINLOCK(qdisc_stab_lock); + +static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { + [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, + [TCA_STAB_DATA] = { .type = NLA_BINARY }, +}; + +static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) +{ + struct nlattr *tb[TCA_STAB_MAX + 1]; + struct qdisc_size_table *stab; + struct tc_sizespec *s; + unsigned int tsize = 0; + u16 *tab = NULL; + int err; + + err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy); + if (err < 0) + return ERR_PTR(err); + if (!tb[TCA_STAB_BASE]) + return ERR_PTR(-EINVAL); + + s = nla_data(tb[TCA_STAB_BASE]); + + if (s->tsize > 0) { + if (!tb[TCA_STAB_DATA]) + return ERR_PTR(-EINVAL); + tab = nla_data(tb[TCA_STAB_DATA]); + tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); + } + + if (!s || tsize != s->tsize || (!tab && tsize > 0)) + return ERR_PTR(-EINVAL); + + spin_lock(&qdisc_stab_lock); + + list_for_each_entry(stab, &qdisc_stab_list, list) { + if (memcmp(&stab->szopts, s, sizeof(*s))) + continue; + if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) + continue; + stab->refcnt++; + spin_unlock(&qdisc_stab_lock); + return stab; + } + + spin_unlock(&qdisc_stab_lock); + + stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); + if (!stab) + return ERR_PTR(-ENOMEM); + + stab->refcnt = 1; + stab->szopts = *s; + if (tsize > 0) + memcpy(stab->data, tab, tsize * sizeof(u16)); + + spin_lock(&qdisc_stab_lock); + list_add_tail(&stab->list, &qdisc_stab_list); + spin_unlock(&qdisc_stab_lock); + + return stab; +} + +void qdisc_put_stab(struct qdisc_size_table *tab) +{ + if (!tab) + return; + + spin_lock(&qdisc_stab_lock); + + if (--tab->refcnt == 0) { + list_del(&tab->list); + kfree(tab); + } + + spin_unlock(&qdisc_stab_lock); +} +EXPORT_SYMBOL(qdisc_put_stab); + +static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, TCA_STAB); + NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts); + nla_nest_end(skb, nest); + + return skb->len; + +nla_put_failure: + return -1; +} + +void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + int pkt_len, slot; + + pkt_len = skb->len + stab->szopts.overhead; + if (unlikely(!stab->szopts.tsize)) + goto out; + + slot = pkt_len + stab->szopts.cell_align; + if (unlikely(slot < 0)) + slot = 0; + + slot >>= stab->szopts.cell_log; + if (likely(slot < stab->szopts.tsize)) + pkt_len = stab->data[slot]; + else + pkt_len = stab->data[stab->szopts.tsize - 1] * + (slot / stab->szopts.tsize) + + stab->data[slot % stab->szopts.tsize]; + + pkt_len <<= stab->szopts.size_log; +out: + if (unlikely(pkt_len < 1)) + pkt_len = 1; + qdisc_skb_cb(skb)->pkt_len = pkt_len; +} +EXPORT_SYMBOL(qdisc_calculate_pkt_len); + static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); - struct net_device *dev = wd->qdisc->dev; wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - netif_schedule(dev); + __netif_schedule(wd->qdisc); return HRTIMER_NORESTART; } @@ -316,6 +447,110 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) } EXPORT_SYMBOL(qdisc_watchdog_cancel); +static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) +{ + unsigned int size = n * sizeof(struct hlist_head), i; + struct hlist_head *h; + + if (size <= PAGE_SIZE) + h = kmalloc(size, GFP_KERNEL); + else + h = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(size)); + + if (h != NULL) { + for (i = 0; i < n; i++) + INIT_HLIST_HEAD(&h[i]); + } + return h; +} + +static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n) +{ + unsigned int size = n * sizeof(struct hlist_head); + + if (size <= PAGE_SIZE) + kfree(h); + else + free_pages((unsigned long)h, get_order(size)); +} + +void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) +{ + struct Qdisc_class_common *cl; + struct hlist_node *n, *next; + struct hlist_head *nhash, *ohash; + unsigned int nsize, nmask, osize; + unsigned int i, h; + + /* Rehash when load factor exceeds 0.75 */ + if (clhash->hashelems * 4 <= clhash->hashsize * 3) + return; + nsize = clhash->hashsize * 2; + nmask = nsize - 1; + nhash = qdisc_class_hash_alloc(nsize); + if (nhash == NULL) + return; + + ohash = clhash->hash; + osize = clhash->hashsize; + + sch_tree_lock(sch); + for (i = 0; i < osize; i++) { + hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) { + h = qdisc_class_hash(cl->classid, nmask); + hlist_add_head(&cl->hnode, &nhash[h]); + } + } + clhash->hash = nhash; + clhash->hashsize = nsize; + clhash->hashmask = nmask; + sch_tree_unlock(sch); + + qdisc_class_hash_free(ohash, osize); +} +EXPORT_SYMBOL(qdisc_class_hash_grow); + +int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) +{ + unsigned int size = 4; + + clhash->hash = qdisc_class_hash_alloc(size); + if (clhash->hash == NULL) + return -ENOMEM; + clhash->hashsize = size; + clhash->hashmask = size - 1; + clhash->hashelems = 0; + return 0; +} +EXPORT_SYMBOL(qdisc_class_hash_init); + +void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) +{ + qdisc_class_hash_free(clhash->hash, clhash->hashsize); +} +EXPORT_SYMBOL(qdisc_class_hash_destroy); + +void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, + struct Qdisc_class_common *cl) +{ + unsigned int h; + + INIT_HLIST_NODE(&cl->hnode); + h = qdisc_class_hash(cl->classid, clhash->hashmask); + hlist_add_head(&cl->hnode, &clhash->hash[h]); + clhash->hashelems++; +} +EXPORT_SYMBOL(qdisc_class_hash_insert); + +void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, + struct Qdisc_class_common *cl) +{ + hlist_del(&cl->hnode); + clhash->hashelems--; +} +EXPORT_SYMBOL(qdisc_class_hash_remove); + /* Allocate an unique handle from space managed by kernel */ static u32 qdisc_alloc_handle(struct net_device *dev) @@ -332,32 +567,39 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return i>0 ? autohandle : 0; } -/* Attach toplevel qdisc to device dev */ +/* Attach toplevel qdisc to device queue. */ -static struct Qdisc * -dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) +static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, + struct Qdisc *qdisc) { + spinlock_t *root_lock; struct Qdisc *oqdisc; + int ingress; + + ingress = 0; + if (qdisc && qdisc->flags&TCQ_F_INGRESS) + ingress = 1; + + if (ingress) { + oqdisc = dev_queue->qdisc; + } else { + oqdisc = dev_queue->qdisc_sleeping; + } - if (dev->flags & IFF_UP) - dev_deactivate(dev); + root_lock = qdisc_root_lock(oqdisc); + spin_lock_bh(root_lock); - qdisc_lock_tree(dev); - if (qdisc && qdisc->flags&TCQ_F_INGRESS) { - oqdisc = dev->qdisc_ingress; + if (ingress) { /* Prune old scheduler */ if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) { /* delete */ qdisc_reset(oqdisc); - dev->qdisc_ingress = NULL; + dev_queue->qdisc = NULL; } else { /* new */ - dev->qdisc_ingress = qdisc; + dev_queue->qdisc = qdisc; } } else { - - oqdisc = dev->qdisc_sleeping; - /* Prune old scheduler */ if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) qdisc_reset(oqdisc); @@ -365,14 +607,11 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) /* ... and graft new one */ if (qdisc == NULL) qdisc = &noop_qdisc; - dev->qdisc_sleeping = qdisc; - dev->qdisc = &noop_qdisc; + dev_queue->qdisc_sleeping = qdisc; + dev_queue->qdisc = &noop_qdisc; } - qdisc_unlock_tree(dev); - - if (dev->flags & IFF_UP) - dev_activate(dev); + spin_unlock_bh(root_lock); return oqdisc; } @@ -389,7 +628,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) return; - sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); + sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { WARN_ON(parentid != TC_H_ROOT); return; @@ -405,26 +644,66 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); -/* Graft qdisc "new" to class "classid" of qdisc "parent" or - to device "dev". +static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid, + struct Qdisc *old, struct Qdisc *new) +{ + if (new || old) + qdisc_notify(skb, n, clid, old, new); + + if (old) { + spin_lock_bh(&old->q.lock); + qdisc_destroy(old); + spin_unlock_bh(&old->q.lock); + } +} - Old qdisc is not destroyed but returned in *old. +/* Graft qdisc "new" to class "classid" of qdisc "parent" or + * to device "dev". + * + * When appropriate send a netlink notification using 'skb' + * and "n". + * + * On success, destroy old qdisc. */ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, - u32 classid, - struct Qdisc *new, struct Qdisc **old) + struct sk_buff *skb, struct nlmsghdr *n, u32 classid, + struct Qdisc *new, struct Qdisc *old) { + struct Qdisc *q = old; int err = 0; - struct Qdisc *q = *old; - if (parent == NULL) { - if (q && q->flags&TCQ_F_INGRESS) { - *old = dev_graft_qdisc(dev, q); - } else { - *old = dev_graft_qdisc(dev, new); + unsigned int i, num_q, ingress; + + ingress = 0; + num_q = dev->num_tx_queues; + if (q && q->flags & TCQ_F_INGRESS) { + num_q = 1; + ingress = 1; + } + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + + for (i = 0; i < num_q; i++) { + struct netdev_queue *dev_queue = &dev->rx_queue; + + if (!ingress) + dev_queue = netdev_get_tx_queue(dev, i); + + if (ingress) { + old = dev_graft_qdisc(dev_queue, q); + } else { + old = dev_graft_qdisc(dev_queue, new); + if (new && i > 0) + atomic_inc(&new->refcnt); + } + notify_and_destroy(skb, n, classid, old, new); } + + if (dev->flags & IFF_UP) + dev_activate(dev); } else { const struct Qdisc_class_ops *cops = parent->ops->cl_ops; @@ -433,10 +712,12 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (cops) { unsigned long cl = cops->get(parent, classid); if (cl) { - err = cops->graft(parent, cl, new, old); + err = cops->graft(parent, cl, new, &old); cops->put(parent, cl); } } + if (!err) + notify_and_destroy(skb, n, classid, old, new); } return err; } @@ -448,13 +729,14 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, */ static struct Qdisc * -qdisc_create(struct net_device *dev, u32 parent, u32 handle, - struct nlattr **tca, int *errp) +qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, + u32 parent, u32 handle, struct nlattr **tca, int *errp) { int err; struct nlattr *kind = tca[TCA_KIND]; struct Qdisc *sch; struct Qdisc_ops *ops; + struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); #ifdef CONFIG_KMOD @@ -489,7 +771,7 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle, if (ops == NULL) goto err_out; - sch = qdisc_alloc(dev, ops); + sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) { err = PTR_ERR(sch); goto err_out2; @@ -499,10 +781,8 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle, if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; - sch->stats_lock = &dev->ingress_lock; handle = TC_H_MAKE(TC_H_INGRESS, 0); } else { - sch->stats_lock = &dev->queue_lock; if (handle == 0) { handle = qdisc_alloc_handle(dev); err = -ENOMEM; @@ -514,9 +794,17 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle, sch->handle = handle; if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB]); + if (IS_ERR(stab)) { + err = PTR_ERR(stab); + goto err_out3; + } + sch->stab = stab; + } if (tca[TCA_RATE]) { err = gen_new_estimator(&sch->bstats, &sch->rate_est, - sch->stats_lock, + qdisc_root_lock(sch), tca[TCA_RATE]); if (err) { /* @@ -529,13 +817,13 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle, goto err_out3; } } - qdisc_lock_tree(dev); - list_add_tail(&sch->list, &dev->qdisc_list); - qdisc_unlock_tree(dev); + if (parent) + list_add_tail(&sch->list, &dev_queue->qdisc->list); return sch; } err_out3: + qdisc_put_stab(sch->stab); dev_put(dev); kfree((char *) sch - sch->padded); err_out2: @@ -547,18 +835,29 @@ err_out: static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) { - if (tca[TCA_OPTIONS]) { - int err; + struct qdisc_size_table *stab = NULL; + int err = 0; + if (tca[TCA_OPTIONS]) { if (sch->ops->change == NULL) return -EINVAL; err = sch->ops->change(sch, tca[TCA_OPTIONS]); if (err) return err; } + + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB]); + if (IS_ERR(stab)) + return PTR_ERR(stab); + } + + qdisc_put_stab(sch->stab); + sch->stab = stab; + if (tca[TCA_RATE]) gen_replace_estimator(&sch->bstats, &sch->rate_est, - sch->stats_lock, tca[TCA_RATE]); + qdisc_root_lock(sch), tca[TCA_RATE]); return 0; } @@ -634,10 +933,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -ENOENT; q = qdisc_leaf(p, clid); } else { /* ingress */ - q = dev->qdisc_ingress; + q = dev->rx_queue.qdisc; } } else { - q = dev->qdisc_sleeping; + struct netdev_queue *dev_queue; + dev_queue = netdev_get_tx_queue(dev, 0); + q = dev_queue->qdisc_sleeping; } if (!q) return -ENOENT; @@ -657,14 +958,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -EINVAL; if (q->handle == 0) return -ENOENT; - if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0) + if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) return err; - if (q) { - qdisc_notify(skb, n, clid, q, NULL); - qdisc_lock_tree(dev); - qdisc_destroy(q); - qdisc_unlock_tree(dev); - } } else { qdisc_notify(skb, n, clid, NULL, q); } @@ -708,10 +1003,12 @@ replay: return -ENOENT; q = qdisc_leaf(p, clid); } else { /*ingress */ - q = dev->qdisc_ingress; + q = dev->rx_queue.qdisc; } } else { - q = dev->qdisc_sleeping; + struct netdev_queue *dev_queue; + dev_queue = netdev_get_tx_queue(dev, 0); + q = dev_queue->qdisc_sleeping; } /* It may be default qdisc, ignore it */ @@ -788,10 +1085,12 @@ create_n_graft: if (!(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; if (clid == TC_H_INGRESS) - q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent, + q = qdisc_create(dev, &dev->rx_queue, + tcm->tcm_parent, tcm->tcm_parent, tca, &err); else - q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle, + q = qdisc_create(dev, netdev_get_tx_queue(dev, 0), + tcm->tcm_parent, tcm->tcm_handle, tca, &err); if (q == NULL) { if (err == -EAGAIN) @@ -801,22 +1100,18 @@ create_n_graft: graft: if (1) { - struct Qdisc *old_q = NULL; - err = qdisc_graft(dev, p, clid, q, &old_q); + spinlock_t *root_lock; + + err = qdisc_graft(dev, p, skb, n, clid, q, NULL); if (err) { if (q) { - qdisc_lock_tree(dev); + root_lock = qdisc_root_lock(q); + spin_lock_bh(root_lock); qdisc_destroy(q); - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); } return err; } - qdisc_notify(skb, n, clid, old_q, q); - if (old_q) { - qdisc_lock_tree(dev); - qdisc_destroy(old_q); - qdisc_unlock_tree(dev); - } } return 0; } @@ -834,7 +1129,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = clid; tcm->tcm_handle = q->handle; tcm->tcm_info = atomic_read(&q->refcnt); @@ -843,8 +1138,11 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; q->qstats.qlen = q->q.qlen; + if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) + goto nla_put_failure; + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, q->stats_lock, &d) < 0) + TCA_XSTATS, qdisc_root_lock(q), &d) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) @@ -894,13 +1192,57 @@ err_out: return -EINVAL; } +static bool tc_qdisc_dump_ignore(struct Qdisc *q) +{ + return (q->flags & TCQ_F_BUILTIN) ? true : false; +} + +static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, + struct netlink_callback *cb, + int *q_idx_p, int s_q_idx) +{ + int ret = 0, q_idx = *q_idx_p; + struct Qdisc *q; + + if (!root) + return 0; + + q = root; + if (q_idx < s_q_idx) { + q_idx++; + } else { + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + list_for_each_entry(q, &root->list, list) { + if (q_idx < s_q_idx) { + q_idx++; + continue; + } + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + +out: + *q_idx_p = q_idx; + return ret; +done: + ret = -1; + goto out; +} + static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; - struct Qdisc *q; if (net != &init_net) return 0; @@ -910,21 +1252,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) read_lock(&dev_base_lock); idx = 0; for_each_netdev(&init_net, dev) { + struct netdev_queue *dev_queue; + if (idx < s_idx) goto cont; if (idx > s_idx) s_q_idx = 0; q_idx = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (q_idx < s_q_idx) { - q_idx++; - continue; - } - if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) - goto done; - q_idx++; - } + + dev_queue = netdev_get_tx_queue(dev, 0); + if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + + dev_queue = &dev->rx_queue; + if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + cont: idx++; } @@ -949,6 +1292,7 @@ done: static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); + struct netdev_queue *dev_queue; struct tcmsg *tcm = NLMSG_DATA(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; @@ -986,6 +1330,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ + dev_queue = netdev_get_tx_queue(dev, 0); if (pid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(pid); @@ -996,7 +1341,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } else if (qid1) { qid = qid1; } else if (qid == 0) - qid = dev->qdisc_sleeping->handle; + qid = dev_queue->qdisc_sleeping->handle; /* Now qid is genuine qdisc handle consistent both with parent and child. @@ -1007,7 +1352,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) pid = TC_H_MAKE(qid, pid); } else { if (qid == 0) - qid = dev->qdisc_sleeping->handle; + qid = dev_queue->qdisc_sleeping->handle; } /* OK. Locate qdisc */ @@ -1080,7 +1425,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; - tcm->tcm_ifindex = q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = q->handle; tcm->tcm_handle = q->handle; tcm->tcm_info = 0; @@ -1089,7 +1434,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, q->stats_lock, &d) < 0) + TCA_XSTATS, qdisc_root_lock(q), &d) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) @@ -1140,15 +1485,62 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); } +static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct qdisc_dump_args arg; + + if (tc_qdisc_dump_ignore(q) || + *t_p < s_t || !q->ops->cl_ops || + (tcm->tcm_parent && + TC_H_MAJ(tcm->tcm_parent) != q->handle)) { + (*t_p)++; + return 0; + } + if (*t_p > s_t) + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); + arg.w.fn = qdisc_class_dump; + arg.skb = skb; + arg.cb = cb; + arg.w.stop = 0; + arg.w.skip = cb->args[1]; + arg.w.count = 0; + q->ops->cl_ops->walk(q, &arg.w); + cb->args[1] = arg.w.count; + if (arg.w.stop) + return -1; + (*t_p)++; + return 0; +} + +static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct Qdisc *q; + + if (!root) + return 0; + + if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) + return -1; + + list_for_each_entry(q, &root->list, list) { + if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + return -1; + } + + return 0; +} + static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); struct net *net = sock_net(skb->sk); - int t; - int s_t; + struct netdev_queue *dev_queue; struct net_device *dev; - struct Qdisc *q; - struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); - struct qdisc_dump_args arg; + int t, s_t; if (net != &init_net) return 0; @@ -1161,28 +1553,15 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (t < s_t || !q->ops->cl_ops || - (tcm->tcm_parent && - TC_H_MAJ(tcm->tcm_parent) != q->handle)) { - t++; - continue; - } - if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); - arg.w.fn = qdisc_class_dump; - arg.skb = skb; - arg.cb = cb; - arg.w.stop = 0; - arg.w.skip = cb->args[1]; - arg.w.count = 0; - q->ops->cl_ops->walk(q, &arg.w); - cb->args[1] = arg.w.count; - if (arg.w.stop) - break; - t++; - } + dev_queue = netdev_get_tx_queue(dev, 0); + if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + goto done; + dev_queue = &dev->rx_queue; + if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + goto done; + +done: cb->args[0] = t; dev_put(dev); @@ -1252,12 +1631,12 @@ void tcf_destroy(struct tcf_proto *tp) kfree(tp); } -void tcf_destroy_chain(struct tcf_proto *fl) +void tcf_destroy_chain(struct tcf_proto **fl) { struct tcf_proto *tp; - while ((tp = fl) != NULL) { - fl = tp->next; + while ((tp = *fl) != NULL) { + *fl = tp->next; tcf_destroy(tp); } } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 335273416384..6b517b9dac5b 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -160,9 +160,9 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) *prev = flow->next; pr_debug("atm_tc_put: qdisc %p\n", flow->q); qdisc_destroy(flow->q); - tcf_destroy_chain(flow->filter_list); + tcf_destroy_chain(&flow->filter_list); if (flow->sock) { - pr_debug("atm_tc_put: f_count %d\n", + pr_debug("atm_tc_put: f_count %ld\n", file_count(flow->sock->file)); flow->vcc->pop = flow->old_pop; sockfd_put(flow->sock); @@ -259,7 +259,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, sock = sockfd_lookup(fd, &error); if (!sock) return error; /* f_count++ */ - pr_debug("atm_tc_change: f_count %d\n", file_count(sock->file)); + pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file)); if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { error = -EPROTOTYPE; goto err_out; @@ -296,7 +296,8 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, goto err_out; } flow->filter_list = NULL; - flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); + flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); if (!flow->q) flow->q = &noop_qdisc; pr_debug("atm_tc_change: qdisc %p\n", flow->q); @@ -428,7 +429,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) #endif } - ret = flow->q->enqueue(skb, flow->q); + ret = qdisc_enqueue(skb, flow->q); if (ret != 0) { drop: __maybe_unused sch->qstats.drops++; @@ -436,9 +437,9 @@ drop: __maybe_unused flow->qstats.drops++; return ret; } - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; - flow->bstats.bytes += skb->len; + flow->bstats.bytes += qdisc_pkt_len(skb); flow->bstats.packets++; /* * Okay, this may seem weird. We pretend we've dropped the packet if @@ -555,7 +556,8 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); p->flows = &p->link; - p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); + p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (!p->link.q) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); @@ -586,10 +588,11 @@ static void atm_tc_destroy(struct Qdisc *sch) struct atm_flow_data *flow; pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); + for (flow = p->flows; flow; flow = flow->next) + tcf_destroy_chain(&flow->filter_list); + /* races ? */ while ((flow = p->flows)) { - tcf_destroy_chain(flow->filter_list); - flow->filter_list = NULL; if (flow->ref > 1) printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, flow->ref); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 09969c1fbc08..14954bf4a683 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -73,11 +73,10 @@ struct cbq_sched_data; struct cbq_class { - struct cbq_class *next; /* hash table link */ + struct Qdisc_class_common common; struct cbq_class *next_alive; /* next class with backlog in this priority band */ /* Parameters */ - u32 classid; unsigned char priority; /* class priority */ unsigned char priority2; /* priority to be used after overlimit */ unsigned char ewma_log; /* time constant for idle time calculation */ @@ -144,7 +143,7 @@ struct cbq_class struct cbq_sched_data { - struct cbq_class *classes[16]; /* Hash table of all classes */ + struct Qdisc_class_hash clhash; /* Hash table of all classes */ int nclasses[TC_CBQ_MAXPRIO+1]; unsigned quanta[TC_CBQ_MAXPRIO+1]; @@ -177,23 +176,15 @@ struct cbq_sched_data #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len) - -static __inline__ unsigned cbq_hash(u32 h) -{ - h ^= h>>8; - h ^= h>>4; - return h&0xF; -} - static __inline__ struct cbq_class * cbq_class_lookup(struct cbq_sched_data *q, u32 classid) { - struct cbq_class *cl; + struct Qdisc_class_common *clc; - for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next) - if (cl->classid == classid) - return cl; - return NULL; + clc = qdisc_class_find(&q->clhash, classid); + if (clc == NULL) + return NULL; + return container_of(clc, struct cbq_class, common); } #ifdef CONFIG_NET_CLS_ACT @@ -379,7 +370,6 @@ static int cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); - int len = skb->len; int uninitialized_var(ret); struct cbq_class *cl = cbq_classify(skb, sch, &ret); @@ -396,10 +386,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) #ifdef CONFIG_NET_CLS_ACT cl->q->__parent = sch; #endif - if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) { + ret = qdisc_enqueue(skb, cl->q); + if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; sch->bstats.packets++; - sch->bstats.bytes+=len; + sch->bstats.bytes += qdisc_pkt_len(skb); cbq_mark_toplevel(q, cl); if (!cl->next_alive) cbq_activate_class(cl); @@ -659,14 +650,13 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) } sch->flags &= ~TCQ_F_THROTTLED; - netif_schedule(sch->dev); + __netif_schedule(sch); return HRTIMER_NORESTART; } #ifdef CONFIG_NET_CLS_ACT static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) { - int len = skb->len; struct Qdisc *sch = child->__parent; struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = q->rx_class; @@ -680,10 +670,10 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) q->rx_class = cl; cl->q->__parent = sch; - if (cl->q->enqueue(skb, cl->q) == 0) { + if (qdisc_enqueue(skb, cl->q) == 0) { sch->q.qlen++; sch->bstats.packets++; - sch->bstats.bytes+=len; + sch->bstats.bytes += qdisc_pkt_len(skb); if (!cl->next_alive) cbq_activate_class(cl); return 0; @@ -889,7 +879,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) if (skb == NULL) goto skip_class; - cl->deficit -= skb->len; + cl->deficit -= qdisc_pkt_len(skb); q->tx_class = cl; q->tx_borrowed = borrow; if (borrow != cl) { @@ -897,11 +887,11 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) borrow->xstats.borrows++; cl->xstats.borrows++; #else - borrow->xstats.borrows += skb->len; - cl->xstats.borrows += skb->len; + borrow->xstats.borrows += qdisc_pkt_len(skb); + cl->xstats.borrows += qdisc_pkt_len(skb); #endif } - q->tx_len = skb->len; + q->tx_len = qdisc_pkt_len(skb); if (cl->deficit <= 0) { q->active[prio] = cl; @@ -1071,13 +1061,14 @@ static void cbq_adjust_levels(struct cbq_class *this) static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) { struct cbq_class *cl; - unsigned h; + struct hlist_node *n; + unsigned int h; if (q->quanta[prio] == 0) return; - for (h=0; h<16; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) { + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { /* BUGGGG... Beware! This expression suffer of arithmetic overflows! */ @@ -1085,9 +1076,9 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ q->quanta[prio]; } - if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) { - printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum); - cl->quantum = cl->qdisc->dev->mtu/2 + 1; + if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { + printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum); + cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; } } } @@ -1114,10 +1105,12 @@ static void cbq_sync_defmap(struct cbq_class *cl) if (split->defaults[i]) continue; - for (h=0; h<16; h++) { + for (h = 0; h < q->clhash.hashsize; h++) { + struct hlist_node *n; struct cbq_class *c; - for (c = q->classes[h]; c; c = c->next) { + hlist_for_each_entry(c, n, &q->clhash.hash[h], + common.hnode) { if (c->split == split && c->level < level && c->defmap&(1<<i)) { split->defaults[i] = c; @@ -1135,12 +1128,12 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma if (splitid == 0) { if ((split = cl->split) == NULL) return; - splitid = split->classid; + splitid = split->common.classid; } - if (split == NULL || split->classid != splitid) { + if (split == NULL || split->common.classid != splitid) { for (split = cl->tparent; split; split = split->tparent) - if (split->classid == splitid) + if (split->common.classid == splitid) break; } @@ -1163,13 +1156,7 @@ static void cbq_unlink_class(struct cbq_class *this) struct cbq_class *cl, **clp; struct cbq_sched_data *q = qdisc_priv(this->qdisc); - for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) { - if (cl == this) { - *clp = cl->next; - cl->next = NULL; - break; - } - } + qdisc_class_hash_remove(&q->clhash, &this->common); if (this->tparent) { clp=&this->sibling; @@ -1188,19 +1175,17 @@ static void cbq_unlink_class(struct cbq_class *this) this->tparent->children = NULL; } } else { - BUG_TRAP(this->sibling == this); + WARN_ON(this->sibling != this); } } static void cbq_link_class(struct cbq_class *this) { struct cbq_sched_data *q = qdisc_priv(this->qdisc); - unsigned h = cbq_hash(this->classid); struct cbq_class *parent = this->tparent; this->sibling = this; - this->next = q->classes[h]; - q->classes[h] = this; + qdisc_class_hash_insert(&q->clhash, &this->common); if (parent == NULL) return; @@ -1242,6 +1227,7 @@ cbq_reset(struct Qdisc* sch) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl; + struct hlist_node *n; int prio; unsigned h; @@ -1258,8 +1244,8 @@ cbq_reset(struct Qdisc* sch) for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) q->active[prio] = NULL; - for (h = 0; h < 16; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) { + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { qdisc_reset(cl->q); cl->next_alive = NULL; @@ -1406,11 +1392,16 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL) return -EINVAL; + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + goto put_rtab; + q->link.refcnt = 1; q->link.sibling = &q->link; - q->link.classid = sch->handle; + q->link.common.classid = sch->handle; q->link.qdisc = sch; - if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle))) q->link.q = &noop_qdisc; @@ -1419,7 +1410,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.cpriority = TC_CBQ_MAXPRIO-1; q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; q->link.overlimit = cbq_ovl_classic; - q->link.allot = psched_mtu(sch->dev); + q->link.allot = psched_mtu(qdisc_dev(sch)); q->link.quantum = q->link.allot; q->link.weight = q->link.R_tab->rate.rate; @@ -1441,6 +1432,10 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) cbq_addprio(q, &q->link); return 0; + +put_rtab: + qdisc_put_rtab(q->link.R_tab); + return err; } static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) @@ -1521,7 +1516,7 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) struct tc_cbq_fopt opt; if (cl->split || cl->defmap) { - opt.split = cl->split ? cl->split->classid : 0; + opt.split = cl->split ? cl->split->common.classid : 0; opt.defmap = cl->defmap; opt.defchange = ~0; NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt); @@ -1602,10 +1597,10 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg, struct nlattr *nest; if (cl->tparent) - tcm->tcm_parent = cl->tparent->classid; + tcm->tcm_parent = cl->tparent->common.classid; else tcm->tcm_parent = TC_H_ROOT; - tcm->tcm_handle = cl->classid; + tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->q->handle; nest = nla_nest_start(skb, TCA_OPTIONS); @@ -1650,8 +1645,10 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl) { if (new == NULL) { - if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->classid)) == NULL) + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->common.classid); + if (new == NULL) return -ENOBUFS; } else { #ifdef CONFIG_NET_CLS_ACT @@ -1702,9 +1699,9 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) { struct cbq_sched_data *q = qdisc_priv(sch); - BUG_TRAP(!cl->filters); + WARN_ON(cl->filters); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); gen_kill_estimator(&cl->bstats, &cl->rate_est); @@ -1716,6 +1713,7 @@ static void cbq_destroy(struct Qdisc* sch) { struct cbq_sched_data *q = qdisc_priv(sch); + struct hlist_node *n, *next; struct cbq_class *cl; unsigned h; @@ -1727,20 +1725,16 @@ cbq_destroy(struct Qdisc* sch) * classes from root to leafs which means that filters can still * be bound to classes which have been destroyed already. --TGR '04 */ - for (h = 0; h < 16; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) { - tcf_destroy_chain(cl->filter_list); - cl->filter_list = NULL; - } + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) + tcf_destroy_chain(&cl->filter_list); } - for (h = 0; h < 16; h++) { - struct cbq_class *next; - - for (cl = q->classes[h]; cl; cl = next) { - next = cl->next; + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h], + common.hnode) cbq_destroy_class(sch, cl); - } } + qdisc_class_hash_destroy(&q->clhash); } static void cbq_put(struct Qdisc *sch, unsigned long arg) @@ -1749,12 +1743,13 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) if (--cl->refcnt == 0) { #ifdef CONFIG_NET_CLS_ACT + spinlock_t *root_lock = qdisc_root_lock(sch); struct cbq_sched_data *q = qdisc_priv(sch); - spin_lock_bh(&sch->dev->queue_lock); + spin_lock_bh(root_lock); if (q->rx_class == cl) q->rx_class = NULL; - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); #endif cbq_destroy_class(sch, cl); @@ -1783,7 +1778,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl) { /* Check parent */ if (parentid) { - if (cl->tparent && cl->tparent->classid != parentid) + if (cl->tparent && + cl->tparent->common.classid != parentid) return -EINVAL; if (!cl->tparent && parentid != TC_H_ROOT) return -EINVAL; @@ -1832,7 +1828,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + qdisc_root_lock(sch), tca[TCA_RATE]); return 0; } @@ -1883,9 +1879,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; - if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) + if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid))) cl->q = &noop_qdisc; - cl->classid = classid; + cl->common.classid = classid; cl->tparent = parent; cl->qdisc = sch; cl->allot = parent->allot; @@ -1918,9 +1915,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); sch_tree_unlock(sch); + qdisc_class_hash_grow(sch, &q->clhash); + if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, tca[TCA_RATE]); + qdisc_root_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; @@ -2010,15 +2009,15 @@ static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct cbq_sched_data *q = qdisc_priv(sch); + struct cbq_class *cl; + struct hlist_node *n; unsigned h; if (arg->stop) return; - for (h = 0; h < 16; h++) { - struct cbq_class *cl; - - for (cl = q->classes[h]; cl; cl = cl->next) { + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 0df911fd67b1..a935676987e2 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -60,7 +60,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, sch, p, new, old); if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (new == NULL) new = &noop_qdisc; @@ -251,13 +252,13 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) } } - err = p->q->enqueue(skb, p->q); + err = qdisc_enqueue(skb, p->q); if (err != NET_XMIT_SUCCESS) { sch->qstats.drops++; return err; } - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; sch->q.qlen++; @@ -390,7 +391,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) p->default_index = default_index; p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); - p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); + p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (p->q == NULL) p->q = &noop_qdisc; @@ -416,7 +418,7 @@ static void dsmark_destroy(struct Qdisc *sch) pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); - tcf_destroy_chain(p->filter_list); + tcf_destroy_chain(&p->filter_list); qdisc_destroy(p->q); kfree(p->mask); } @@ -444,7 +446,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -466,7 +469,8 @@ static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static const struct Qdisc_class_ops dsmark_class_ops = { diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 95ed48221652..23d258bfe8ac 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -27,7 +27,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) { struct fifo_sched_data *q = qdisc_priv(sch); - if (likely(sch->qstats.backlog + skb->len <= q->limit)) + if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_reshape_fail(skb, sch); @@ -48,10 +48,10 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt) struct fifo_sched_data *q = qdisc_priv(sch); if (opt == NULL) { - u32 limit = sch->dev->tx_queue_len ? : 1; + u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; if (sch->ops == &bfifo_qdisc_ops) - limit *= sch->dev->mtu; + limit *= qdisc_dev(sch)->mtu; q->limit = limit; } else { @@ -107,3 +107,46 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; EXPORT_SYMBOL(bfifo_qdisc_ops); + +/* Pass size change message down to embedded FIFO */ +int fifo_set_limit(struct Qdisc *q, unsigned int limit) +{ + struct nlattr *nla; + int ret = -ENOMEM; + + /* Hack to avoid sending change message to non-FIFO */ + if (strncmp(q->ops->id + 1, "fifo", 4) != 0) + return 0; + + nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); + if (nla) { + nla->nla_type = RTM_NEWQDISC; + nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); + ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; + + ret = q->ops->change(q, nla); + kfree(nla); + } + return ret; +} +EXPORT_SYMBOL(fifo_set_limit); + +struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, + unsigned int limit) +{ + struct Qdisc *q; + int err = -ENOMEM; + + q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + ops, TC_H_MAKE(sch->handle, 1)); + if (q) { + err = fifo_set_limit(q, limit); + if (err < 0) { + qdisc_destroy(q); + q = NULL; + } + } + + return q ? : ERR_PTR(err); +} +EXPORT_SYMBOL(fifo_create_dflt); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d355e5e47fe3..fd2a6cadb115 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -29,58 +29,36 @@ /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with - * dev->queue_lock spinlock. + * qdisc_root_lock(qdisc) spinlock. * * The idea is the following: - * - enqueue, dequeue are serialized via top level device - * spinlock dev->queue_lock. - * - ingress filtering is serialized via top level device - * spinlock dev->ingress_lock. + * - enqueue, dequeue are serialized via qdisc root lock + * - ingress filtering is also serialized via qdisc root lock * - updates to tree and tree walking are only done under the rtnl mutex. */ -void qdisc_lock_tree(struct net_device *dev) - __acquires(dev->queue_lock) - __acquires(dev->ingress_lock) -{ - spin_lock_bh(&dev->queue_lock); - spin_lock(&dev->ingress_lock); -} -EXPORT_SYMBOL(qdisc_lock_tree); - -void qdisc_unlock_tree(struct net_device *dev) - __releases(dev->ingress_lock) - __releases(dev->queue_lock) -{ - spin_unlock(&dev->ingress_lock); - spin_unlock_bh(&dev->queue_lock); -} -EXPORT_SYMBOL(qdisc_unlock_tree); - static inline int qdisc_qlen(struct Qdisc *q) { return q->q.qlen; } -static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, - struct Qdisc *q) +static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { if (unlikely(skb->next)) - dev->gso_skb = skb; + q->gso_skb = skb; else q->ops->requeue(skb, q); - netif_schedule(dev); + __netif_schedule(q); return 0; } -static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, - struct Qdisc *q) +static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { struct sk_buff *skb; - if ((skb = dev->gso_skb)) - dev->gso_skb = NULL; + if ((skb = q->gso_skb)) + q->gso_skb = NULL; else skb = q->dequeue(q); @@ -88,12 +66,12 @@ static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, } static inline int handle_dev_cpu_collision(struct sk_buff *skb, - struct net_device *dev, + struct netdev_queue *dev_queue, struct Qdisc *q) { int ret; - if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { + if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { /* * Same CPU holding the lock. It may be a transient * configuration error, when hard_start_xmit() recurses. We @@ -103,7 +81,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, kfree_skb(skb); if (net_ratelimit()) printk(KERN_WARNING "Dead loop on netdevice %s, " - "fix it urgently!\n", dev->name); + "fix it urgently!\n", dev_queue->dev->name); ret = qdisc_qlen(q); } else { /* @@ -111,22 +89,22 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * some time. */ __get_cpu_var(netdev_rx_stat).cpu_collision++; - ret = dev_requeue_skb(skb, dev, q); + ret = dev_requeue_skb(skb, q); } return ret; } /* - * NOTE: Called under dev->queue_lock with locally disabled BH. + * NOTE: Called under qdisc_lock(q) with locally disabled BH. * - * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this - * device at a time. dev->queue_lock serializes queue accesses for - * this device AND dev->qdisc pointer itself. + * __QDISC_STATE_RUNNING guarantees only one CPU can process + * this qdisc at a time. qdisc_lock(q) serializes queue accesses for + * this queue. * * netif_tx_lock serializes accesses to device driver. * - * dev->queue_lock and netif_tx_lock are mutually exclusive, + * qdisc_lock(q) and netif_tx_lock are mutually exclusive, * if one is grabbed, another must be free. * * Note, that this procedure can be called by a watchdog timer @@ -136,27 +114,32 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct net_device *dev) +static inline int qdisc_restart(struct Qdisc *q) { - struct Qdisc *q = dev->qdisc; - struct sk_buff *skb; + struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; + struct net_device *dev; + spinlock_t *root_lock; + struct sk_buff *skb; /* Dequeue packet */ - if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) + if (unlikely((skb = dequeue_skb(q)) == NULL)) return 0; + root_lock = qdisc_root_lock(q); + + /* And release qdisc */ + spin_unlock(root_lock); - /* And release queue */ - spin_unlock(&dev->queue_lock); + dev = qdisc_dev(q); + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - HARD_TX_LOCK(dev, smp_processor_id()); + HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_subqueue_stopped(dev, skb)) - ret = dev_hard_start_xmit(skb, dev); - HARD_TX_UNLOCK(dev); + ret = dev_hard_start_xmit(skb, dev, txq); + HARD_TX_UNLOCK(dev, txq); - spin_lock(&dev->queue_lock); - q = dev->qdisc; + spin_lock(root_lock); switch (ret) { case NETDEV_TX_OK: @@ -166,7 +149,7 @@ static inline int qdisc_restart(struct net_device *dev) case NETDEV_TX_LOCKED: /* Driver try lock failed */ - ret = handle_dev_cpu_collision(skb, dev, q); + ret = handle_dev_cpu_collision(skb, txq, q); break; default: @@ -175,33 +158,33 @@ static inline int qdisc_restart(struct net_device *dev) printk(KERN_WARNING "BUG %s code %d qlen %d\n", dev->name, ret, q->q.qlen); - ret = dev_requeue_skb(skb, dev, q); + ret = dev_requeue_skb(skb, q); break; } + if (ret && netif_tx_queue_stopped(txq)) + ret = 0; + return ret; } -void __qdisc_run(struct net_device *dev) +void __qdisc_run(struct Qdisc *q) { unsigned long start_time = jiffies; - while (qdisc_restart(dev)) { - if (netif_queue_stopped(dev)) - break; - + while (qdisc_restart(q)) { /* * Postpone processing if * 1. another process needs the CPU; * 2. we've been doing it for too long. */ if (need_resched() || jiffies != start_time) { - netif_schedule(dev); + __netif_schedule(q); break; } } - clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); + clear_bit(__QDISC_STATE_RUNNING, &q->state); } static void dev_watchdog(unsigned long arg) @@ -209,19 +192,35 @@ static void dev_watchdog(unsigned long arg) struct net_device *dev = (struct net_device *)arg; netif_tx_lock(dev); - if (dev->qdisc != &noop_qdisc) { + if (!qdisc_tx_is_noop(dev)) { if (netif_device_present(dev) && netif_running(dev) && netif_carrier_ok(dev)) { - if (netif_queue_stopped(dev) && - time_after(jiffies, dev->trans_start + dev->watchdog_timeo)) { + int some_queue_stopped = 0; + unsigned int i; - printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", - dev->name); + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(dev, i); + if (netif_tx_queue_stopped(txq)) { + some_queue_stopped = 1; + break; + } + } + + if (some_queue_stopped && + time_after(jiffies, (dev->trans_start + + dev->watchdog_timeo))) { + char drivername[64]; + printk(KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", + dev->name, netdev_drivername(dev, drivername, 64)); dev->tx_timeout(dev); WARN_ON_ONCE(1); } - if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) + if (!mod_timer(&dev->watchdog_timer, + round_jiffies(jiffies + + dev->watchdog_timeo))) dev_hold(dev); } } @@ -317,12 +316,18 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct netdev_queue noop_netdev_queue = { + .qdisc = &noop_qdisc, +}; + struct Qdisc noop_qdisc = { .enqueue = noop_enqueue, .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), + .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), + .dev_queue = &noop_netdev_queue, }; EXPORT_SYMBOL(noop_qdisc); @@ -335,12 +340,19 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct Qdisc noqueue_qdisc; +static struct netdev_queue noqueue_netdev_queue = { + .qdisc = &noqueue_qdisc, +}; + static struct Qdisc noqueue_qdisc = { .enqueue = NULL, .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), + .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), + .dev_queue = &noqueue_netdev_queue, }; @@ -364,7 +376,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) { struct sk_buff_head *list = prio2list(skb, qdisc); - if (skb_queue_len(list) < qdisc->dev->tx_queue_len) { + if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { qdisc->q.qlen++; return __qdisc_enqueue_tail(skb, qdisc, list); } @@ -440,7 +452,8 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = { .owner = THIS_MODULE, }; -struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) +struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, + struct Qdisc_ops *ops) { void *p; struct Qdisc *sch; @@ -462,24 +475,25 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; - sch->dev = dev; - dev_hold(dev); + sch->dev_queue = dev_queue; + dev_hold(qdisc_dev(sch)); atomic_set(&sch->refcnt, 1); return sch; errout: - return ERR_PTR(-err); + return ERR_PTR(err); } -struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, +struct Qdisc * qdisc_create_dflt(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc_ops *ops, unsigned int parentid) { struct Qdisc *sch; - sch = qdisc_alloc(dev, ops); + sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) goto errout; - sch->stats_lock = &dev->queue_lock; sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) @@ -491,7 +505,7 @@ errout: } EXPORT_SYMBOL(qdisc_create_dflt); -/* Under dev->queue_lock and BH! */ +/* Under qdisc_root_lock(qdisc) and BH! */ void qdisc_reset(struct Qdisc *qdisc) { @@ -508,86 +522,161 @@ EXPORT_SYMBOL(qdisc_reset); static void __qdisc_destroy(struct rcu_head *head) { struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); + const struct Qdisc_ops *ops = qdisc->ops; + +#ifdef CONFIG_NET_SCHED + qdisc_put_stab(qdisc->stab); +#endif + gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); + if (ops->reset) + ops->reset(qdisc); + if (ops->destroy) + ops->destroy(qdisc); + + module_put(ops->owner); + dev_put(qdisc_dev(qdisc)); + + kfree_skb(qdisc->gso_skb); + kfree((char *) qdisc - qdisc->padded); } -/* Under dev->queue_lock and BH! */ +/* Under qdisc_root_lock(qdisc) and BH! */ void qdisc_destroy(struct Qdisc *qdisc) { - const struct Qdisc_ops *ops = qdisc->ops; - if (qdisc->flags & TCQ_F_BUILTIN || !atomic_dec_and_test(&qdisc->refcnt)) return; - list_del(&qdisc->list); - gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); - if (ops->reset) - ops->reset(qdisc); - if (ops->destroy) - ops->destroy(qdisc); + if (qdisc->parent) + list_del(&qdisc->list); - module_put(ops->owner); - dev_put(qdisc->dev); call_rcu(&qdisc->q_rcu, __qdisc_destroy); } EXPORT_SYMBOL(qdisc_destroy); +static bool dev_all_qdisc_sleeping_noop(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + if (txq->qdisc_sleeping != &noop_qdisc) + return false; + } + return true; +} + +static void attach_one_default_qdisc(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + + if (dev->tx_queue_len) { + qdisc = qdisc_create_dflt(dev, dev_queue, + &pfifo_fast_ops, TC_H_ROOT); + if (!qdisc) { + printk(KERN_INFO "%s: activation failed\n", dev->name); + return; + } + } else { + qdisc = &noqueue_qdisc; + } + dev_queue->qdisc_sleeping = qdisc; +} + +static void transition_one_qdisc(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_need_watchdog) +{ + struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; + int *need_watchdog_p = _need_watchdog; + + rcu_assign_pointer(dev_queue->qdisc, new_qdisc); + if (new_qdisc != &noqueue_qdisc) + *need_watchdog_p = 1; +} + void dev_activate(struct net_device *dev) { + int need_watchdog; + /* No queueing discipline is attached to device; create default one i.e. pfifo_fast for devices, which need queueing and noqueue_qdisc for virtual interfaces */ - if (dev->qdisc_sleeping == &noop_qdisc) { - struct Qdisc *qdisc; - if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops, - TC_H_ROOT); - if (qdisc == NULL) { - printk(KERN_INFO "%s: activation failed\n", dev->name); - return; - } - list_add_tail(&qdisc->list, &dev->qdisc_list); - } else { - qdisc = &noqueue_qdisc; - } - dev->qdisc_sleeping = qdisc; - } + if (dev_all_qdisc_sleeping_noop(dev)) + netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); if (!netif_carrier_ok(dev)) /* Delay activation until next carrier-on event */ return; - spin_lock_bh(&dev->queue_lock); - rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping); - if (dev->qdisc != &noqueue_qdisc) { + need_watchdog = 0; + netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); + + if (need_watchdog) { dev->trans_start = jiffies; dev_watchdog_up(dev); } - spin_unlock_bh(&dev->queue_lock); } -void dev_deactivate(struct net_device *dev) +static void dev_deactivate_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) { + struct Qdisc *qdisc_default = _qdisc_default; struct Qdisc *qdisc; - struct sk_buff *skb; - int running; - spin_lock_bh(&dev->queue_lock); - qdisc = dev->qdisc; - dev->qdisc = &noop_qdisc; + qdisc = dev_queue->qdisc; + if (qdisc) { + spin_lock_bh(qdisc_lock(qdisc)); - qdisc_reset(qdisc); + dev_queue->qdisc = qdisc_default; + qdisc_reset(qdisc); - skb = dev->gso_skb; - dev->gso_skb = NULL; - spin_unlock_bh(&dev->queue_lock); + spin_unlock_bh(qdisc_lock(qdisc)); + } +} - kfree_skb(skb); +static bool some_qdisc_is_running(struct net_device *dev, int lock) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *dev_queue; + spinlock_t *root_lock; + struct Qdisc *q; + int val; + + dev_queue = netdev_get_tx_queue(dev, i); + q = dev_queue->qdisc; + root_lock = qdisc_root_lock(q); + + if (lock) + spin_lock_bh(root_lock); + + val = test_bit(__QDISC_STATE_RUNNING, &q->state); + + if (lock) + spin_unlock_bh(root_lock); + + if (val) + return true; + } + return false; +} + +void dev_deactivate(struct net_device *dev) +{ + bool running; + + netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); dev_watchdog_down(dev); @@ -596,16 +685,14 @@ void dev_deactivate(struct net_device *dev) /* Wait for outstanding qdisc_run calls. */ do { - while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) + while (some_qdisc_is_running(dev, 0)) yield(); /* * Double-check inside queue lock to ensure that all effects * of the queue run are visible when we return. */ - spin_lock_bh(&dev->queue_lock); - running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); - spin_unlock_bh(&dev->queue_lock); + running = some_qdisc_is_running(dev, 1); /* * The running flag should never be set at this point because @@ -618,32 +705,46 @@ void dev_deactivate(struct net_device *dev) } while (WARN_ON_ONCE(running)); } +static void dev_init_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc) +{ + struct Qdisc *qdisc = _qdisc; + + dev_queue->qdisc = qdisc; + dev_queue->qdisc_sleeping = qdisc; +} + void dev_init_scheduler(struct net_device *dev) { - qdisc_lock_tree(dev); - dev->qdisc = &noop_qdisc; - dev->qdisc_sleeping = &noop_qdisc; - INIT_LIST_HEAD(&dev->qdisc_list); - qdisc_unlock_tree(dev); + netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); + dev_init_scheduler_queue(dev, &dev->rx_queue, NULL); setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); } -void dev_shutdown(struct net_device *dev) +static void shutdown_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) { - struct Qdisc *qdisc; + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc_default = _qdisc_default; + + if (qdisc) { + spinlock_t *root_lock = qdisc_root_lock(qdisc); + + dev_queue->qdisc = qdisc_default; + dev_queue->qdisc_sleeping = qdisc_default; - qdisc_lock_tree(dev); - qdisc = dev->qdisc_sleeping; - dev->qdisc = &noop_qdisc; - dev->qdisc_sleeping = &noop_qdisc; - qdisc_destroy(qdisc); -#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE) - if ((qdisc = dev->qdisc_ingress) != NULL) { - dev->qdisc_ingress = NULL; + spin_lock_bh(root_lock); qdisc_destroy(qdisc); + spin_unlock_bh(root_lock); } -#endif - BUG_TRAP(!timer_pending(&dev->watchdog_timer)); - qdisc_unlock_tree(dev); +} + +void dev_shutdown(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); + shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); + WARN_ON(timer_pending(&dev->watchdog_timer)); } diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 3a9d226ff1e4..c1ad6b8de105 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -164,7 +164,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) * if no default DP has been configured. This * allows for DP flows to be left untouched. */ - if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len) + if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len) return qdisc_enqueue_tail(skb, sch); else goto drop; @@ -188,7 +188,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) } q->packetsin++; - q->bytesin += skb->len; + q->bytesin += qdisc_pkt_len(skb); if (gred_wred_mode(t)) gred_load_wred_set(t, q); @@ -226,8 +226,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) break; } - if (q->backlog + skb->len <= q->limit) { - q->backlog += skb->len; + if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { + q->backlog += qdisc_pkt_len(skb); return qdisc_enqueue_tail(skb, sch); } @@ -254,7 +254,7 @@ static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) } else { if (red_is_idling(&q->parms)) red_end_of_idle_period(&q->parms); - q->backlog += skb->len; + q->backlog += qdisc_pkt_len(skb); } return qdisc_requeue(skb, sch); @@ -277,7 +277,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch) "VQ 0x%x after dequeue, screwing up " "backlog.\n", tc_index_to_dp(skb)); } else { - q->backlog -= skb->len; + q->backlog -= qdisc_pkt_len(skb); if (!q->backlog && !gred_wred_mode(t)) red_start_of_idle_period(&q->parms); @@ -299,7 +299,7 @@ static unsigned int gred_drop(struct Qdisc* sch) skb = qdisc_dequeue_tail(sch); if (skb) { - unsigned int len = skb->len; + unsigned int len = qdisc_pkt_len(skb); struct gred_sched_data *q; u16 dp = tc_index_to_dp(skb); @@ -582,7 +582,8 @@ append_opt: return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static void gred_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 87293d0db1d7..0ae7d19dcba8 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -113,7 +113,7 @@ enum hfsc_class_flags struct hfsc_class { - u32 classid; /* class id */ + struct Qdisc_class_common cl_common; unsigned int refcnt; /* usage count */ struct gnet_stats_basic bstats; @@ -134,7 +134,6 @@ struct hfsc_class struct rb_node vt_node; /* parent's vt_tree member */ struct rb_root cf_tree; /* active children sorted by cl_f */ struct rb_node cf_node; /* parent's cf_heap member */ - struct list_head hlist; /* hash list member */ struct list_head dlist; /* drop list member */ u64 cl_total; /* total work in bytes */ @@ -177,13 +176,11 @@ struct hfsc_class unsigned long cl_nactive; /* number of active children */ }; -#define HFSC_HSIZE 16 - struct hfsc_sched { u16 defcls; /* default class id */ struct hfsc_class root; /* root class */ - struct list_head clhash[HFSC_HSIZE]; /* class hash */ + struct Qdisc_class_hash clhash; /* class hash */ struct rb_root eligible; /* eligible tree */ struct list_head droplist; /* active leaf class list (for dropping) */ @@ -898,7 +895,7 @@ qdisc_peek_len(struct Qdisc *sch) printk("qdisc_peek_len: non work-conserving qdisc ?\n"); return 0; } - len = skb->len; + len = qdisc_pkt_len(skb); if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) { if (net_ratelimit()) printk("qdisc_peek_len: failed to requeue\n"); @@ -933,26 +930,16 @@ hfsc_adjust_levels(struct hfsc_class *cl) } while ((cl = cl->cl_parent) != NULL); } -static inline unsigned int -hfsc_hash(u32 h) -{ - h ^= h >> 8; - h ^= h >> 4; - - return h & (HFSC_HSIZE - 1); -} - static inline struct hfsc_class * hfsc_find_class(u32 classid, struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); - struct hfsc_class *cl; + struct Qdisc_class_common *clc; - list_for_each_entry(cl, &q->clhash[hfsc_hash(classid)], hlist) { - if (cl->classid == classid) - return cl; - } - return NULL; + clc = qdisc_class_find(&q->clhash, classid); + if (clc == NULL) + return NULL; + return container_of(clc, struct hfsc_class, cl_common); } static void @@ -1032,7 +1019,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl != NULL) { if (parentid) { - if (cl->cl_parent && cl->cl_parent->classid != parentid) + if (cl->cl_parent && + cl->cl_parent->cl_common.classid != parentid) return -EINVAL; if (cl->cl_parent == NULL && parentid != TC_H_ROOT) return -EINVAL; @@ -1057,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + qdisc_root_lock(sch), tca[TCA_RATE]); return 0; } @@ -1091,11 +1079,12 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (usc != NULL) hfsc_change_usc(cl, usc, 0); + cl->cl_common.classid = classid; cl->refcnt = 1; - cl->classid = classid; cl->sched = q; cl->cl_parent = parent; - cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); + cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; INIT_LIST_HEAD(&cl->children); @@ -1103,7 +1092,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->cf_tree = RB_ROOT; sch_tree_lock(sch); - list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]); + qdisc_class_hash_insert(&q->clhash, &cl->cl_common); list_add_tail(&cl->siblings, &parent->children); if (parent->level == 0) hfsc_purge_queue(sch, parent); @@ -1111,9 +1100,11 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->cl_pcvtoff = parent->cl_cvtoff; sch_tree_unlock(sch); + qdisc_class_hash_grow(sch, &q->clhash); + if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, tca[TCA_RATE]); + qdisc_root_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } @@ -1123,7 +1114,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) { struct hfsc_sched *q = qdisc_priv(sch); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->qdisc); gen_kill_estimator(&cl->bstats, &cl->rate_est); if (cl != &q->root) @@ -1145,7 +1136,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) hfsc_adjust_levels(cl->cl_parent); hfsc_purge_queue(sch, cl); - list_del(&cl->hlist); + qdisc_class_hash_remove(&q->clhash, &cl->cl_common); if (--cl->refcnt == 0) hfsc_destroy_class(sch, cl); @@ -1211,8 +1202,9 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl->level > 0) return -EINVAL; if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->classid); + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->cl_common.classid); if (new == NULL) new = &noop_qdisc; } @@ -1345,8 +1337,9 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct hfsc_class *cl = (struct hfsc_class *)arg; struct nlattr *nest; - tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT; - tcm->tcm_handle = cl->classid; + tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->cl_common.classid : + TC_H_ROOT; + tcm->tcm_handle = cl->cl_common.classid; if (cl->level == 0) tcm->tcm_info = cl->qdisc->handle; @@ -1360,7 +1353,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, nla_put_failure: nla_nest_cancel(skb, nest); - return -1; + return -EMSGSIZE; } static int @@ -1390,14 +1383,16 @@ static void hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct hfsc_sched *q = qdisc_priv(sch); + struct hlist_node *n; struct hfsc_class *cl; unsigned int i; if (arg->stop) return; - for (i = 0; i < HFSC_HSIZE; i++) { - list_for_each_entry(cl, &q->clhash[i], hlist) { + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], + cl_common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; @@ -1433,23 +1428,25 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) { struct hfsc_sched *q = qdisc_priv(sch); struct tc_hfsc_qopt *qopt; - unsigned int i; + int err; if (opt == NULL || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); q->defcls = qopt->defcls; - for (i = 0; i < HFSC_HSIZE; i++) - INIT_LIST_HEAD(&q->clhash[i]); + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + return err; q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); skb_queue_head_init(&q->requeue); + q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; - q->root.classid = sch->handle; q->root.sched = q; - q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; @@ -1457,7 +1454,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) q->root.vt_tree = RB_ROOT; q->root.cf_tree = RB_ROOT; - list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]); + qdisc_class_hash_insert(&q->clhash, &q->root.cl_common); + qdisc_class_hash_grow(sch, &q->clhash); qdisc_watchdog_init(&q->watchdog, sch); @@ -1520,10 +1518,11 @@ hfsc_reset_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl; + struct hlist_node *n; unsigned int i; - for (i = 0; i < HFSC_HSIZE; i++) { - list_for_each_entry(cl, &q->clhash[i], hlist) + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) hfsc_reset_class(cl); } __skb_queue_purge(&q->requeue); @@ -1537,13 +1536,20 @@ static void hfsc_destroy_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); - struct hfsc_class *cl, *next; + struct hlist_node *n, *next; + struct hfsc_class *cl; unsigned int i; - for (i = 0; i < HFSC_HSIZE; i++) { - list_for_each_entry_safe(cl, next, &q->clhash[i], hlist) + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) + tcf_destroy_chain(&cl->filter_list); + } + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + cl_common.hnode) hfsc_destroy_class(sch, cl); } + qdisc_class_hash_destroy(&q->clhash); __skb_queue_purge(&q->requeue); qdisc_watchdog_cancel(&q->watchdog); } @@ -1568,7 +1574,6 @@ static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct hfsc_class *cl; - unsigned int len; int err; cl = hfsc_classify(skb, sch, &err); @@ -1579,8 +1584,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; } - len = skb->len; - err = cl->qdisc->enqueue(skb, cl->qdisc); + err = qdisc_enqueue(skb, cl->qdisc); if (unlikely(err != NET_XMIT_SUCCESS)) { cl->qstats.drops++; sch->qstats.drops++; @@ -1588,12 +1592,12 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) } if (cl->qdisc->q.qlen == 1) - set_active(cl, len); + set_active(cl, qdisc_pkt_len(skb)); cl->bstats.packets++; - cl->bstats.bytes += len; + cl->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; - sch->bstats.bytes += len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -1643,9 +1647,9 @@ hfsc_dequeue(struct Qdisc *sch) return NULL; } - update_vf(cl, skb->len, cur_time); + update_vf(cl, qdisc_pkt_len(skb), cur_time); if (realtime) - cl->cl_cumul += skb->len; + cl->cl_cumul += qdisc_pkt_len(skb); if (cl->qdisc->q.qlen != 0) { if (cl->cl_flags & HFSC_RSC) { diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 66148cc4759e..75a40951c4f2 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -24,10 +24,9 @@ * Jiri Fojtasek * fixed requeue routine * and many others. thanks. - * - * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $ */ #include <linux/module.h> +#include <linux/moduleparam.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> @@ -52,14 +51,17 @@ one less than their parent. */ -#define HTB_HSIZE 16 /* classid hash size */ -#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ +static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */ #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ #if HTB_VER >> 16 != TC_HTB_PROTOVER #error "Mismatched sch_htb.c and pkt_sch.h" #endif +/* Module parameter and sysfs export */ +module_param (htb_hysteresis, int, 0640); +MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate"); + /* used internaly to keep status of single class */ enum htb_cmode { HTB_CANT_SEND, /* class can't send and can't borrow */ @@ -69,8 +71,8 @@ enum htb_cmode { /* interior & leaf nodes; props specific to leaves are marked L: */ struct htb_class { + struct Qdisc_class_common common; /* general class parameters */ - u32 classid; struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; @@ -79,10 +81,8 @@ struct htb_class { /* topology */ int level; /* our level (see above) */ + unsigned int children; struct htb_class *parent; /* parent class */ - struct hlist_node hlist; /* classid hash list item */ - struct list_head sibling; /* sibling list item */ - struct list_head children; /* children list */ union { struct htb_class_leaf { @@ -137,8 +137,7 @@ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, } struct htb_sched { - struct list_head root; /* root classes list */ - struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */ + struct Qdisc_class_hash clhash; struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ /* self list - roots of self generating tree */ @@ -160,7 +159,6 @@ struct htb_sched { /* filters for qdisc itself */ struct tcf_proto *filter_list; - int filter_cnt; int rate2quantum; /* quant = rate / rate2quantum */ psched_time_t now; /* cached dequeue time */ @@ -173,32 +171,16 @@ struct htb_sched { long direct_pkts; }; -/* compute hash of size HTB_HSIZE for given handle */ -static inline int htb_hash(u32 h) -{ -#if HTB_HSIZE != 16 -#error "Declare new hash for your HTB_HSIZE" -#endif - h ^= h >> 8; /* stolen from cbq_hash */ - h ^= h >> 4; - return h & 0xf; -} - /* find class in global hash table using given handle */ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - struct hlist_node *p; - struct htb_class *cl; + struct Qdisc_class_common *clc; - if (TC_H_MAJ(handle) != sch->handle) + clc = qdisc_class_find(&q->clhash, handle); + if (clc == NULL) return NULL; - - hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) { - if (cl->classid == handle) - return cl; - } - return NULL; + return container_of(clc, struct htb_class, common); } /** @@ -279,7 +261,7 @@ static void htb_add_to_id_tree(struct rb_root *root, parent = *p; c = rb_entry(parent, struct htb_class, node[prio]); - if (cl->classid > c->classid) + if (cl->common.classid > c->common.classid) p = &parent->rb_right; else p = &parent->rb_left; @@ -443,7 +425,7 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) /* we are removing child which is pointed to from parent feed - forget the pointer but remember classid */ - p->un.inner.last_ptr_id[prio] = cl->classid; + p->un.inner.last_ptr_id[prio] = cl->common.classid; p->un.inner.ptr[prio] = NULL; } @@ -462,19 +444,21 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) htb_remove_class_from_row(q, cl, mask); } -#if HTB_HYSTERESIS static inline long htb_lowater(const struct htb_class *cl) { - return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; + if (htb_hysteresis) + return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; + else + return 0; } static inline long htb_hiwater(const struct htb_class *cl) { - return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; + if (htb_hysteresis) + return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; + else + return 0; } -#else -#define htb_lowater(cl) (0) -#define htb_hiwater(cl) (0) -#endif + /** * htb_class_mode - computes and returns current class mode @@ -540,7 +524,7 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) */ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) { - BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen); + WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen); if (!cl->prio_activity) { cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); @@ -558,7 +542,7 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) */ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { - BUG_TRAP(cl->prio_activity); + WARN_ON(!cl->prio_activity); htb_deactivate_prios(q, cl); cl->prio_activity = 0; @@ -588,21 +572,20 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) kfree_skb(skb); return ret; #endif - } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != - NET_XMIT_SUCCESS) { + } else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { sch->qstats.drops++; cl->qstats.drops++; return NET_XMIT_DROP; } else { cl->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; - cl->bstats.bytes += skb->len; + cl->bstats.bytes += qdisc_pkt_len(skb); htb_activate(q, cl); } sch->q.qlen++; sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); return NET_XMIT_SUCCESS; } @@ -659,7 +642,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, int level, struct sk_buff *skb) { - int bytes = skb->len; + int bytes = qdisc_pkt_len(skb); long toks, diff; enum htb_cmode old_mode; @@ -746,10 +729,10 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, while (n) { struct htb_class *cl = rb_entry(n, struct htb_class, node[prio]); - if (id == cl->classid) + if (id == cl->common.classid) return n; - if (id > cl->classid) { + if (id > cl->common.classid) { n = n->rb_right; } else { r = n; @@ -774,7 +757,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, u32 *pid; } stk[TC_HTB_MAXDEPTH], *sp = stk; - BUG_TRAP(tree->rb_node); + WARN_ON(!tree->rb_node); sp->root = tree->rb_node; sp->pptr = pptr; sp->pid = pid; @@ -794,7 +777,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, *sp->pptr = (*sp->pptr)->rb_left; if (sp > stk) { sp--; - BUG_TRAP(*sp->pptr); + WARN_ON(!*sp->pptr); if (!*sp->pptr) return NULL; htb_next_rb_node(sp->pptr); @@ -809,7 +792,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, sp->pid = cl->un.inner.last_ptr_id + prio; } } - BUG_TRAP(0); + WARN_ON(1); return NULL; } @@ -827,7 +810,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, do { next: - BUG_TRAP(cl); + WARN_ON(!cl); if (!cl) return NULL; @@ -859,7 +842,7 @@ next: if (!cl->warned) { printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n", - cl->classid); + cl->common.classid); cl->warned = 1; } q->nwc_hit++; @@ -872,7 +855,8 @@ next: } while (cl != start); if (likely(skb != NULL)) { - if ((cl->un.leaf.deficit[level] -= skb->len) < 0) { + cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb); + if (cl->un.leaf.deficit[level] < 0) { cl->un.leaf.deficit[level] += cl->un.leaf.quantum; htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> ptr[0]) + prio); @@ -970,13 +954,12 @@ static unsigned int htb_drop(struct Qdisc *sch) static void htb_reset(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - int i; - - for (i = 0; i < HTB_HSIZE; i++) { - struct hlist_node *p; - struct htb_class *cl; + struct htb_class *cl; + struct hlist_node *n; + unsigned int i; - hlist_for_each_entry(cl, p, q->hash + i, hlist) { + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { if (cl->level) memset(&cl->un.inner, 0, sizeof(cl->un.inner)); else { @@ -1034,16 +1017,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; } - INIT_LIST_HEAD(&q->root); - for (i = 0; i < HTB_HSIZE; i++) - INIT_HLIST_HEAD(q->hash + i); + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + return err; for (i = 0; i < TC_HTB_NUMPRIO; i++) INIT_LIST_HEAD(q->drops + i); qdisc_watchdog_init(&q->watchdog, sch); skb_queue_head_init(&q->direct_queue); - q->direct_qlen = sch->dev->tx_queue_len; + q->direct_qlen = qdisc_dev(sch)->tx_queue_len; if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ q->direct_qlen = 2; @@ -1056,11 +1039,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { + spinlock_t *root_lock = qdisc_root_lock(sch); struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_glob gopt; - spin_lock_bh(&sch->dev->queue_lock); + spin_lock_bh(root_lock); gopt.direct_pkts = q->direct_pkts; gopt.version = HTB_VER; @@ -1074,11 +1058,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); nla_nest_end(skb, nest); - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); return skb->len; nla_put_failure: - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); nla_nest_cancel(skb, nest); return -1; } @@ -1087,12 +1071,13 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; + spinlock_t *root_lock = qdisc_root_lock(sch); struct nlattr *nest; struct tc_htb_opt opt; - spin_lock_bh(&sch->dev->queue_lock); - tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT; - tcm->tcm_handle = cl->classid; + spin_lock_bh(root_lock); + tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT; + tcm->tcm_handle = cl->common.classid; if (!cl->level && cl->un.leaf.q) tcm->tcm_info = cl->un.leaf.q->handle; @@ -1112,11 +1097,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); nla_nest_end(skb, nest); - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); return skb->len; nla_put_failure: - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); nla_nest_cancel(skb, nest); return -1; } @@ -1146,8 +1131,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl && !cl->level) { if (new == NULL && - (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->classid)) + (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->common.classid)) == NULL) return -ENOBUFS; sch_tree_lock(sch); @@ -1188,20 +1174,21 @@ static inline int htb_parent_last_child(struct htb_class *cl) if (!cl->parent) /* the root class */ return 0; - - if (!(cl->parent->children.next == &cl->sibling && - cl->parent->children.prev == &cl->sibling)) + if (cl->parent->children > 1) /* not the last child */ return 0; - return 1; } -static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q) +static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, + struct Qdisc *new_q) { struct htb_class *parent = cl->parent; - BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity); + WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity); + + if (parent->cmode != HTB_CAN_SEND) + htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level); parent->level = 0; memset(&parent->un.inner, 0, sizeof(parent->un.inner)); @@ -1217,32 +1204,15 @@ static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q) static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { - struct htb_sched *q = qdisc_priv(sch); - if (!cl->level) { - BUG_TRAP(cl->un.leaf.q); + WARN_ON(!cl->un.leaf.q); qdisc_destroy(cl->un.leaf.q); } gen_kill_estimator(&cl->bstats, &cl->rate_est); qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); - tcf_destroy_chain(cl->filter_list); - - while (!list_empty(&cl->children)) - htb_destroy_class(sch, list_entry(cl->children.next, - struct htb_class, sibling)); - - /* note: this delete may happen twice (see htb_delete) */ - hlist_del_init(&cl->hlist); - list_del(&cl->sibling); - - if (cl->prio_activity) - htb_deactivate(q, cl); - - if (cl->cmode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); - + tcf_destroy_chain(&cl->filter_list); kfree(cl); } @@ -1250,18 +1220,27 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) static void htb_destroy(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); + struct hlist_node *n, *next; + struct htb_class *cl; + unsigned int i; qdisc_watchdog_cancel(&q->watchdog); /* This line used to be after htb_destroy_class call below and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call unbind_filter on it (without Oops). */ - tcf_destroy_chain(q->filter_list); - - while (!list_empty(&q->root)) - htb_destroy_class(sch, list_entry(q->root.next, - struct htb_class, sibling)); + tcf_destroy_chain(&q->filter_list); + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) + tcf_destroy_chain(&cl->filter_list); + } + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + common.hnode) + htb_destroy_class(sch, cl); + } + qdisc_class_hash_destroy(&q->clhash); __skb_queue_purge(&q->direct_queue); } @@ -1276,12 +1255,13 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) // TODO: why don't allow to delete subtree ? references ? does // tc subsys quarantee us that in htb_destroy it holds no class // refs so that we can remove children safely there ? - if (!list_empty(&cl->children) || cl->filter_cnt) + if (cl->children || cl->filter_cnt) return -EBUSY; if (!cl->level && htb_parent_last_child(cl)) { - new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->parent->classid); + new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->parent->common.classid); last_child = 1; } @@ -1294,13 +1274,17 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) } /* delete from hash and active; remainder in destroy_class */ - hlist_del_init(&cl->hlist); + qdisc_class_hash_remove(&q->clhash, &cl->common); + cl->parent->children--; if (cl->prio_activity) htb_deactivate(q, cl); + if (cl->cmode != HTB_CAN_SEND) + htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); + if (last_child) - htb_parent_to_leaf(cl, new_q); + htb_parent_to_leaf(q, cl, new_q); if (--cl->refcnt == 0) htb_destroy_class(sch, cl); @@ -1383,12 +1367,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto failure; gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + qdisc_root_lock(sch), tca[TCA_RATE] ? : &est.nla); cl->refcnt = 1; - INIT_LIST_HEAD(&cl->sibling); - INIT_HLIST_NODE(&cl->hlist); - INIT_LIST_HEAD(&cl->children); + cl->children = 0; INIT_LIST_HEAD(&cl->un.leaf.drop_list); RB_CLEAR_NODE(&cl->pq_node); @@ -1398,7 +1380,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); + new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); sch_tree_lock(sch); if (parent && !parent->level) { unsigned int qlen = parent->un.leaf.q->q.qlen; @@ -1422,7 +1405,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* leaf (we) needs elementary qdisc */ cl->un.leaf.q = new_q ? new_q : &noop_qdisc; - cl->classid = classid; + cl->common.classid = classid; cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ @@ -1433,13 +1416,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ - hlist_add_head(&cl->hlist, q->hash + htb_hash(classid)); - list_add_tail(&cl->sibling, - parent ? &parent->children : &q->root); + qdisc_class_hash_insert(&q->clhash, &cl->common); + if (parent) + parent->children++; } else { if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + qdisc_root_lock(sch), tca[TCA_RATE]); sch_tree_lock(sch); } @@ -1451,13 +1434,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->quantum && cl->un.leaf.quantum < 1000) { printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", - cl->classid); + cl->common.classid); cl->un.leaf.quantum = 1000; } if (!hopt->quantum && cl->un.leaf.quantum > 200000) { printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", - cl->classid); + cl->common.classid); cl->un.leaf.quantum = 200000; } if (hopt->quantum) @@ -1480,6 +1463,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->ceil = ctab; sch_tree_unlock(sch); + qdisc_class_hash_grow(sch, &q->clhash); + *arg = (unsigned long)cl; return 0; @@ -1503,7 +1488,6 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg) static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { - struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_find(classid, sch); /*if (cl && !cl->level) return 0; @@ -1517,35 +1501,29 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, */ if (cl) cl->filter_cnt++; - else - q->filter_cnt++; return (unsigned long)cl; } static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) { - struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; if (cl) cl->filter_cnt--; - else - q->filter_cnt--; } static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct htb_sched *q = qdisc_priv(sch); - int i; + struct htb_class *cl; + struct hlist_node *n; + unsigned int i; if (arg->stop) return; - for (i = 0; i < HTB_HSIZE; i++) { - struct hlist_node *p; - struct htb_class *cl; - - hlist_for_each_entry(cl, p, q->hash + i, hlist) { + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 274b1ddb160c..4a2b77374358 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -77,7 +77,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) result = tc_classify(skb, p->filter_list, &res); sch->bstats.packets++; - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); switch (result) { case TC_ACT_SHOT: result = TC_ACT_SHOT; @@ -104,7 +104,7 @@ static void ingress_destroy(struct Qdisc *sch) { struct ingress_qdisc_data *p = qdisc_priv(sch); - tcf_destroy_chain(p->filter_list); + tcf_destroy_chain(&p->filter_list); } static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index c9c649b26eaa..a59085700678 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -82,6 +82,13 @@ struct netem_skb_cb { psched_time_t time_to_send; }; +static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(skb->cb) < + sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); + return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; +} + /* init_crandom - initialize correlated random number generator * Use entropy source for initial seed. */ @@ -180,11 +187,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * skb will be queued. */ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = sch->dev->qdisc; + struct Qdisc *rootq = qdisc_root(sch); u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; - rootq->enqueue(skb2, rootq); + qdisc_enqueue_root(skb2, rootq); q->duplicate = dupsave; } @@ -205,7 +212,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } - cb = (struct netem_skb_cb *)skb->cb; + cb = netem_skb_cb(skb); if (q->gap == 0 /* not doing reordering */ || q->counter < q->gap /* inside last reordering gap */ || q->reorder < get_crandom(&q->reorder_cor)) { @@ -218,7 +225,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) now = psched_get_time(); cb->time_to_send = now + delay; ++q->counter; - ret = q->qdisc->enqueue(skb, q->qdisc); + ret = qdisc_enqueue(skb, q->qdisc); } else { /* * Do re-ordering by putting one out of N packets at the front @@ -231,7 +238,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (likely(ret == NET_XMIT_SUCCESS)) { sch->q.qlen++; - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; } else sch->qstats.drops++; @@ -277,8 +284,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) skb = q->qdisc->dequeue(q->qdisc); if (skb) { - const struct netem_skb_cb *cb - = (const struct netem_skb_cb *)skb->cb; + const struct netem_skb_cb *cb = netem_skb_cb(skb); psched_time_t now = psched_get_time(); /* if more time remaining? */ @@ -310,28 +316,6 @@ static void netem_reset(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); } -/* Pass size change message down to embedded FIFO */ -static int set_fifo_limit(struct Qdisc *q, int limit) -{ - struct nlattr *nla; - int ret = -ENOMEM; - - /* Hack to avoid sending change message to non-FIFO */ - if (strncmp(q->ops->id + 1, "fifo", 4) != 0) - return 0; - - nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); - if (nla) { - nla->nla_type = RTM_NEWQDISC; - nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); - ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; - - ret = q->ops->change(q, nla); - kfree(nla); - } - return ret; -} - /* * Distribution data is a variable size payload containing * signed 16 bit values. @@ -341,6 +325,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) struct netem_sched_data *q = qdisc_priv(sch); unsigned long n = nla_len(attr)/sizeof(__s16); const __s16 *data = nla_data(attr); + spinlock_t *root_lock; struct disttable *d; int i; @@ -355,9 +340,11 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) for (i = 0; i < n; i++) d->table[i] = data[i]; - spin_lock_bh(&sch->dev->queue_lock); + root_lock = qdisc_root_lock(sch); + + spin_lock_bh(root_lock); d = xchg(&q->delay_dist, d); - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); kfree(d); return 0; @@ -416,7 +403,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (ret < 0) return ret; - ret = set_fifo_limit(q->qdisc, qopt->limit); + ret = fifo_set_limit(q->qdisc, qopt->limit); if (ret) { pr_debug("netem: can't set fifo limit\n"); return ret; @@ -476,7 +463,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct fifo_sched_data *q = qdisc_priv(sch); struct sk_buff_head *list = &sch->q; - psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send; + psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; struct sk_buff *skb; if (likely(skb_queue_len(list) < q->limit)) { @@ -487,8 +474,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) } skb_queue_reverse_walk(list, skb) { - const struct netem_skb_cb *cb - = (const struct netem_skb_cb *)skb->cb; + const struct netem_skb_cb *cb = netem_skb_cb(skb); if (tnext >= cb->time_to_send) break; @@ -496,8 +482,8 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) __skb_queue_after(list, skb, nskb); - sch->qstats.backlog += nskb->len; - sch->bstats.bytes += nskb->len; + sch->qstats.backlog += qdisc_pkt_len(nskb); + sch->bstats.bytes += qdisc_pkt_len(nskb); sch->bstats.packets++; return NET_XMIT_SUCCESS; @@ -517,7 +503,7 @@ static int tfifo_init(struct Qdisc *sch, struct nlattr *opt) q->limit = ctl->limit; } else - q->limit = max_t(u32, sch->dev->tx_queue_len, 1); + q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); q->oldest = PSCHED_PASTPERFECT; return 0; @@ -558,7 +544,8 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); - q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops, + q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &tfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1)); if (!q->qdisc) { pr_debug("netem: qdisc create failed\n"); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 4aa2b45dad0a..f849243eb095 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -24,11 +24,9 @@ struct prio_sched_data { int bands; - int curband; /* for round-robin */ struct tcf_proto *filter_list; u8 prio2band[TC_PRIO_MAX+1]; struct Qdisc *queues[TCQ_PRIO_BANDS]; - int mq; }; @@ -55,17 +53,14 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) if (!q->filter_list || err < 0) { if (TC_H_MAJ(band)) band = 0; - band = q->prio2band[band&TC_PRIO_MAX]; - goto out; + return q->queues[q->prio2band[band&TC_PRIO_MAX]]; } band = res.classid; } band = TC_H_MIN(band) - 1; if (band >= q->bands) - band = q->prio2band[0]; -out: - if (q->mq) - skb_set_queue_mapping(skb, band); + return q->queues[q->prio2band[0]]; + return q->queues[band]; } @@ -86,8 +81,9 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) } #endif - if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) { - sch->bstats.bytes += skb->len; + ret = qdisc_enqueue(skb, qdisc); + if (ret == NET_XMIT_SUCCESS) { + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -123,67 +119,23 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch) } -static struct sk_buff * -prio_dequeue(struct Qdisc* sch) +static struct sk_buff *prio_dequeue(struct Qdisc* sch) { - struct sk_buff *skb; struct prio_sched_data *q = qdisc_priv(sch); int prio; - struct Qdisc *qdisc; for (prio = 0; prio < q->bands; prio++) { - /* Check if the target subqueue is available before - * pulling an skb. This way we avoid excessive requeues - * for slower queues. - */ - if (!__netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) { - qdisc = q->queues[prio]; - skb = qdisc->dequeue(qdisc); - if (skb) { - sch->q.qlen--; - return skb; - } + struct Qdisc *qdisc = q->queues[prio]; + struct sk_buff *skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + return skb; } } return NULL; } -static struct sk_buff *rr_dequeue(struct Qdisc* sch) -{ - struct sk_buff *skb; - struct prio_sched_data *q = qdisc_priv(sch); - struct Qdisc *qdisc; - int bandcount; - - /* Only take one pass through the queues. If nothing is available, - * return nothing. - */ - for (bandcount = 0; bandcount < q->bands; bandcount++) { - /* Check if the target subqueue is available before - * pulling an skb. This way we avoid excessive requeues - * for slower queues. If the queue is stopped, try the - * next queue. - */ - if (!__netif_subqueue_stopped(sch->dev, - (q->mq ? q->curband : 0))) { - qdisc = q->queues[q->curband]; - skb = qdisc->dequeue(qdisc); - if (skb) { - sch->q.qlen--; - q->curband++; - if (q->curband >= q->bands) - q->curband = 0; - return skb; - } - } - q->curband++; - if (q->curband >= q->bands) - q->curband = 0; - } - return NULL; -} - static unsigned int prio_drop(struct Qdisc* sch) { struct prio_sched_data *q = qdisc_priv(sch); @@ -219,7 +171,7 @@ prio_destroy(struct Qdisc* sch) int prio; struct prio_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); for (prio=0; prio<q->bands; prio++) qdisc_destroy(q->queues[prio]); } @@ -228,45 +180,22 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); struct tc_prio_qopt *qopt; - struct nlattr *tb[TCA_PRIO_MAX + 1]; - int err; int i; - err = nla_parse_nested_compat(tb, TCA_PRIO_MAX, opt, NULL, qopt, - sizeof(*qopt)); - if (err < 0) - return err; - - q->bands = qopt->bands; - /* If we're multiqueue, make sure the number of incoming bands - * matches the number of queues on the device we're associating with. - * If the number of bands requested is zero, then set q->bands to - * dev->egress_subqueue_count. Also, the root qdisc must be the - * only one that is enabled for multiqueue, since it's the only one - * that interacts with the underlying device. - */ - q->mq = nla_get_flag(tb[TCA_PRIO_MQ]); - if (q->mq) { - if (sch->parent != TC_H_ROOT) - return -EINVAL; - if (netif_is_multiqueue(sch->dev)) { - if (q->bands == 0) - q->bands = sch->dev->egress_subqueue_count; - else if (q->bands != sch->dev->egress_subqueue_count) - return -EINVAL; - } else - return -EOPNOTSUPP; - } + if (nla_len(opt) < sizeof(*qopt)) + return -EINVAL; + qopt = nla_data(opt); - if (q->bands > TCQ_PRIO_BANDS || q->bands < 2) + if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) return -EINVAL; for (i=0; i<=TC_PRIO_MAX; i++) { - if (qopt->priomap[i] >= q->bands) + if (qopt->priomap[i] >= qopt->bands) return -EINVAL; } sch_tree_lock(sch); + q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { @@ -281,7 +210,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) for (i=0; i<q->bands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child; - child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); @@ -331,10 +261,6 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt); if (nest == NULL) goto nla_put_failure; - if (q->mq) { - if (nla_put_flag(skb, TCA_PRIO_MQ) < 0) - goto nla_put_failure; - } nla_nest_compat_end(skb, nest); return skb->len; @@ -507,44 +433,17 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; -static struct Qdisc_ops rr_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &prio_class_ops, - .id = "rr", - .priv_size = sizeof(struct prio_sched_data), - .enqueue = prio_enqueue, - .dequeue = rr_dequeue, - .requeue = prio_requeue, - .drop = prio_drop, - .init = prio_init, - .reset = prio_reset, - .destroy = prio_destroy, - .change = prio_tune, - .dump = prio_dump, - .owner = THIS_MODULE, -}; - static int __init prio_module_init(void) { - int err; - - err = register_qdisc(&prio_qdisc_ops); - if (err < 0) - return err; - err = register_qdisc(&rr_qdisc_ops); - if (err < 0) - unregister_qdisc(&prio_qdisc_ops); - return err; + return register_qdisc(&prio_qdisc_ops); } static void __exit prio_module_exit(void) { unregister_qdisc(&prio_qdisc_ops); - unregister_qdisc(&rr_qdisc_ops); } module_init(prio_module_init) module_exit(prio_module_exit) MODULE_LICENSE("GPL"); -MODULE_ALIAS("sch_rr"); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 3dcd493f4f4a..3f2d1d7f3bbd 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -92,9 +92,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) break; } - ret = child->enqueue(skb, child); + ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; sch->q.qlen++; } else { @@ -174,33 +174,6 @@ static void red_destroy(struct Qdisc *sch) qdisc_destroy(q->qdisc); } -static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit) -{ - struct Qdisc *q; - struct nlattr *nla; - int ret; - - q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, - TC_H_MAKE(sch->handle, 1)); - if (q) { - nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), - GFP_KERNEL); - if (nla) { - nla->nla_type = RTM_NEWQDISC; - nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); - ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; - - ret = q->ops->change(q, nla); - kfree(nla); - - if (ret == 0) - return q; - } - qdisc_destroy(q); - } - return NULL; -} - static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, @@ -228,9 +201,9 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) ctl = nla_data(tb[TCA_RED_PARMS]); if (ctl->limit > 0) { - child = red_create_dflt(sch, ctl->limit); - if (child == NULL) - return -ENOMEM; + child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); + if (IS_ERR(child)) + return PTR_ERR(child); } sch_tree_lock(sch); @@ -281,7 +254,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f0463d757a98..8589da666568 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -245,7 +245,7 @@ static unsigned int sfq_drop(struct Qdisc *sch) if (d > 1) { sfq_index x = q->dep[d + SFQ_DEPTH].next; skb = q->qs[x].prev; - len = skb->len; + len = qdisc_pkt_len(skb); __skb_unlink(skb, &q->qs[x]); kfree_skb(skb); sfq_dec(q, x); @@ -261,7 +261,7 @@ static unsigned int sfq_drop(struct Qdisc *sch) q->next[q->tail] = q->next[d]; q->allot[q->next[d]] += q->quantum; skb = q->qs[d].prev; - len = skb->len; + len = qdisc_pkt_len(skb); __skb_unlink(skb, &q->qs[d]); kfree_skb(skb); sfq_dec(q, d); @@ -305,7 +305,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->qs[x].qlen >= q->limit) return qdisc_drop(skb, sch); - sch->qstats.backlog += skb->len; + sch->qstats.backlog += qdisc_pkt_len(skb); __skb_queue_tail(&q->qs[x], skb); sfq_inc(q, x); if (q->qs[x].qlen == 1) { /* The flow is new */ @@ -320,7 +320,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) } } if (++sch->q.qlen <= q->limit) { - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; return 0; } @@ -352,7 +352,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) q->hash[x] = hash; } - sch->qstats.backlog += skb->len; + sch->qstats.backlog += qdisc_pkt_len(skb); __skb_queue_head(&q->qs[x], skb); /* If selected queue has length q->limit+1, this means that * all another queues are empty and we do simple tail drop. @@ -363,7 +363,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) skb = q->qs[x].prev; __skb_unlink(skb, &q->qs[x]); sch->qstats.drops++; - sch->qstats.backlog -= skb->len; + sch->qstats.backlog -= qdisc_pkt_len(skb); kfree_skb(skb); return NET_XMIT_CN; } @@ -411,7 +411,7 @@ sfq_dequeue(struct Qdisc *sch) skb = __skb_dequeue(&q->qs[a]); sfq_dec(q, a); sch->q.qlen--; - sch->qstats.backlog -= skb->len; + sch->qstats.backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ if (q->qs[a].qlen == 0) { @@ -423,7 +423,7 @@ sfq_dequeue(struct Qdisc *sch) } q->next[q->tail] = a; q->allot[a] += q->quantum; - } else if ((q->allot[a] -= skb->len) <= 0) { + } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { q->tail = a; a = q->next[a]; q->allot[a] += q->quantum; @@ -461,7 +461,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; sch_tree_lock(sch); - q->quantum = ctl->quantum ? : psched_mtu(sch->dev); + q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); q->perturb_period = ctl->perturb_period * HZ; if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); @@ -502,7 +502,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->max_depth = 0; q->tail = SFQ_DEPTH; if (opt == NULL) { - q->quantum = psched_mtu(sch->dev); + q->quantum = psched_mtu(qdisc_dev(sch)); q->perturb_period = 0; q->perturbation = net_random(); } else { @@ -520,7 +520,7 @@ static void sfq_destroy(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); q->perturb_period = 0; del_timer_sync(&q->perturb_timer); } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 0b7d78f59d8c..b296672f7632 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -123,7 +123,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) struct tbf_sched_data *q = qdisc_priv(sch); int ret; - if (skb->len > q->max_size) { + if (qdisc_pkt_len(skb) > q->max_size) { sch->qstats.drops++; #ifdef CONFIG_NET_CLS_ACT if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) @@ -133,13 +133,14 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) return NET_XMIT_DROP; } - if ((ret = q->qdisc->enqueue(skb, q->qdisc)) != 0) { + ret = qdisc_enqueue(skb, q->qdisc); + if (ret != 0) { sch->qstats.drops++; return ret; } sch->q.qlen++; - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; return 0; } @@ -180,7 +181,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) psched_time_t now; long toks; long ptoks = 0; - unsigned int len = skb->len; + unsigned int len = qdisc_pkt_len(skb); now = psched_get_time(); toks = psched_tdiff_bounded(now, q->t_c, q->buffer); @@ -242,34 +243,6 @@ static void tbf_reset(struct Qdisc* sch) qdisc_watchdog_cancel(&q->watchdog); } -static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit) -{ - struct Qdisc *q; - struct nlattr *nla; - int ret; - - q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, - TC_H_MAKE(sch->handle, 1)); - if (q) { - nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), - GFP_KERNEL); - if (nla) { - nla->nla_type = RTM_NEWQDISC; - nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); - ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; - - ret = q->ops->change(q, nla); - kfree(nla); - - if (ret == 0) - return q; - } - qdisc_destroy(q); - } - - return NULL; -} - static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) }, [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, @@ -322,8 +295,11 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) goto done; if (qopt->limit > 0) { - if ((child = tbf_create_dflt_qdisc(sch, qopt->limit)) == NULL) + child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit); + if (IS_ERR(child)) { + err = PTR_ERR(child); goto done; + } } sch_tree_lock(sch); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 0444fd0f0d22..537223642b6e 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -78,12 +78,12 @@ struct teql_sched_data static int teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) { - struct net_device *dev = sch->dev; + struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); if (q->q.qlen < dev->tx_queue_len) { __skb_queue_tail(&q->q, skb); - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; return 0; } @@ -107,17 +107,19 @@ static struct sk_buff * teql_dequeue(struct Qdisc* sch) { struct teql_sched_data *dat = qdisc_priv(sch); + struct netdev_queue *dat_queue; struct sk_buff *skb; skb = __skb_dequeue(&dat->q); + dat_queue = netdev_get_tx_queue(dat->m->dev, 0); if (skb == NULL) { - struct net_device *m = dat->m->dev->qdisc->dev; + struct net_device *m = qdisc_dev(dat_queue->qdisc); if (m) { dat->m->slaves = sch; netif_wake_queue(m); } } - sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen; + sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; return skb; } @@ -153,10 +155,16 @@ teql_destroy(struct Qdisc* sch) if (q == master->slaves) { master->slaves = NEXT_SLAVE(q); if (q == master->slaves) { + struct netdev_queue *txq; + spinlock_t *root_lock; + + txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; - spin_lock_bh(&master->dev->queue_lock); - qdisc_reset(master->dev->qdisc); - spin_unlock_bh(&master->dev->queue_lock); + + root_lock = qdisc_root_lock(txq->qdisc); + spin_lock_bh(root_lock); + qdisc_reset(txq->qdisc); + spin_unlock_bh(root_lock); } } skb_queue_purge(&dat->q); @@ -170,7 +178,7 @@ teql_destroy(struct Qdisc* sch) static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) { - struct net_device *dev = sch->dev; + struct net_device *dev = qdisc_dev(sch); struct teql_master *m = (struct teql_master*)sch->ops; struct teql_sched_data *q = qdisc_priv(sch); @@ -216,7 +224,8 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) { - struct teql_sched_data *q = qdisc_priv(dev->qdisc); + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); + struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc); struct neighbour *mn = skb->dst->neighbour; struct neighbour *n = q->ncache; @@ -252,7 +261,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * static inline int teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) { - if (dev->qdisc == &noop_qdisc) + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); + if (txq->qdisc == &noop_qdisc) return -ENODEV; if (dev->header_ops == NULL || @@ -268,7 +278,6 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) struct Qdisc *start, *q; int busy; int nores; - int len = skb->len; int subq = skb_get_queue_mapping(skb); struct sk_buff *skb_res = NULL; @@ -282,12 +291,13 @@ restart: goto drop; do { - struct net_device *slave = q->dev; + struct net_device *slave = qdisc_dev(q); + struct netdev_queue *slave_txq; - if (slave->qdisc_sleeping != q) + slave_txq = netdev_get_tx_queue(slave, 0); + if (slave_txq->qdisc_sleeping != q) continue; - if (netif_queue_stopped(slave) || - __netif_subqueue_stopped(slave, subq) || + if (__netif_subqueue_stopped(slave, subq) || !netif_running(slave)) { busy = 1; continue; @@ -296,14 +306,14 @@ restart: switch (teql_resolve(skb, skb_res, slave)) { case 0: if (netif_tx_trylock(slave)) { - if (!netif_queue_stopped(slave) && - !__netif_subqueue_stopped(slave, subq) && + if (!__netif_subqueue_stopped(slave, subq) && slave->hard_start_xmit(skb, slave) == 0) { netif_tx_unlock(slave); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->stats.tx_packets++; - master->stats.tx_bytes += len; + master->stats.tx_bytes += + qdisc_pkt_len(skb); return 0; } netif_tx_unlock(slave); @@ -352,7 +362,7 @@ static int teql_master_open(struct net_device *dev) q = m->slaves; do { - struct net_device *slave = q->dev; + struct net_device *slave = qdisc_dev(q); if (slave == NULL) return -EUNATCH; @@ -403,7 +413,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) q = m->slaves; if (q) { do { - if (new_mtu > q->dev->mtu) + if (new_mtu > qdisc_dev(q)->mtu) return -EINVAL; } while ((q=NEXT_SLAVE(q)) != m->slaves); } diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 0b79f869c4ea..58b3e882a187 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -47,11 +47,11 @@ config SCTP_DBG_MSG config SCTP_DBG_OBJCNT bool "SCTP: Debug object counts" + depends on PROC_FS help If you say Y, this will enable debugging support for counting the type of objects that are currently allocated. This is useful for - identifying memory leaks. If the /proc filesystem is enabled this - debug information can be viewed by + identifying memory leaks. This debug information can be viewed by 'cat /proc/net/sctp/sctp_dbg_objcnt' If unsure, say N diff --git a/net/sctp/Makefile b/net/sctp/Makefile index f5356b9d5ee3..6b794734380a 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -9,10 +9,10 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ transport.o chunk.o sm_make_chunk.o ulpevent.o \ inqueue.o outqueue.o ulpqueue.o command.o \ tsnmap.o bind_addr.o socket.o primitive.o \ - output.o input.o debug.o ssnmap.o proc.o \ - auth.o + output.o input.o debug.o ssnmap.o auth.o sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o +sctp-$(CONFIG_PROC_FS) += proc.o sctp-$(CONFIG_SYSCTL) += sysctl.o sctp-$(subst m,y,$(CONFIG_IPV6)) += ipv6.o diff --git a/net/sctp/associola.c b/net/sctp/associola.c index b4cd2b71953f..8472b8b349c4 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -136,6 +136,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Set association default SACK delay */ asoc->sackdelay = msecs_to_jiffies(sp->sackdelay); + asoc->sackfreq = sp->sackfreq; /* Set the association default flags controlling * Heartbeat, SACK delay, and Path MTU Discovery. @@ -261,6 +262,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * already received one packet.] */ asoc->peer.sack_needed = 1; + asoc->peer.sack_cnt = 0; /* Assume that the peer will tell us if he recognizes ASCONF * as part of INIT exchange. @@ -462,7 +464,7 @@ static void sctp_association_destroy(struct sctp_association *asoc) spin_unlock_bh(&sctp_assocs_id_lock); } - BUG_TRAP(!atomic_read(&asoc->rmem_alloc)); + WARN_ON(atomic_read(&asoc->rmem_alloc)); if (asoc->base.malloced) { kfree(asoc); @@ -474,6 +476,15 @@ static void sctp_association_destroy(struct sctp_association *asoc) void sctp_assoc_set_primary(struct sctp_association *asoc, struct sctp_transport *transport) { + int changeover = 0; + + /* it's a changeover only if we already have a primary path + * that we are changing + */ + if (asoc->peer.primary_path != NULL && + asoc->peer.primary_path != transport) + changeover = 1 ; + asoc->peer.primary_path = transport; /* Set a default msg_name for events. */ @@ -499,12 +510,12 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, * double switch to the same destination address. */ if (transport->cacc.changeover_active) - transport->cacc.cycling_changeover = 1; + transport->cacc.cycling_changeover = changeover; /* 2) The sender MUST set CHANGEOVER_ACTIVE to indicate that * a changeover has occurred. */ - transport->cacc.changeover_active = 1; + transport->cacc.changeover_active = changeover; /* 3) The sender MUST store the next TSN to be sent in * next_tsn_at_change. @@ -615,6 +626,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, * association configured value. */ peer->sackdelay = asoc->sackdelay; + peer->sackfreq = asoc->sackfreq; /* Enable/disable heartbeat, SACK delay, and path MTU discovery * based on association setting. @@ -641,6 +653,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to " "%d\n", asoc, asoc->pathmtu); + peer->pmtu_pending = 0; asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); @@ -1203,6 +1216,9 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) struct list_head *head = &asoc->peer.transport_addr_list; struct list_head *pos; + if (asoc->peer.transport_count == 1) + return; + /* Find the next transport in a round-robin fashion. */ t = asoc->peer.retran_path; pos = &t->transports; @@ -1217,6 +1233,15 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) t = list_entry(pos, struct sctp_transport, transports); + /* We have exhausted the list, but didn't find any + * other active transports. If so, use the next + * transport. + */ + if (t == asoc->peer.retran_path) { + t = next; + break; + } + /* Try to find an active transport. */ if ((t->state == SCTP_ACTIVE) || @@ -1229,15 +1254,6 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) if (!next) next = t; } - - /* We have exhausted the list, but didn't find any - * other active transports. If so, use the next - * transport. - */ - if (t == asoc->peer.retran_path) { - t = next; - break; - } } asoc->peer.retran_path = t; diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 80e6df06967a..f62bc2468935 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -348,6 +348,43 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp, return match; } +/* Does the address 'addr' conflict with any addresses in + * the bp. + */ +int sctp_bind_addr_conflict(struct sctp_bind_addr *bp, + const union sctp_addr *addr, + struct sctp_sock *bp_sp, + struct sctp_sock *addr_sp) +{ + struct sctp_sockaddr_entry *laddr; + int conflict = 0; + struct sctp_sock *sp; + + /* Pick the IPv6 socket as the basis of comparison + * since it's usually a superset of the IPv4. + * If there is no IPv6 socket, then default to bind_addr. + */ + if (sctp_opt2sk(bp_sp)->sk_family == AF_INET6) + sp = bp_sp; + else if (sctp_opt2sk(addr_sp)->sk_family == AF_INET6) + sp = addr_sp; + else + sp = bp_sp; + + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid) + continue; + + conflict = sp->pf->cmp_addr(&laddr->a, addr, sp); + if (conflict) + break; + } + rcu_read_unlock(); + + return conflict; +} + /* Get the state of the entry in the bind_addr_list */ int sctp_bind_addr_state(const struct sctp_bind_addr *bp, const union sctp_addr *addr) diff --git a/net/sctp/input.c b/net/sctp/input.c index ca6b022b1df2..a49fa80b57b9 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -61,6 +61,7 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/checksum.h> +#include <net/net_namespace.h> /* Forward declarations for internal helpers. */ static int sctp_rcv_ootb(struct sk_buff *); @@ -82,8 +83,8 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb) { struct sk_buff *list = skb_shinfo(skb)->frag_list; struct sctphdr *sh = sctp_hdr(skb); - __u32 cmp = ntohl(sh->checksum); - __u32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb)); + __be32 cmp = sh->checksum; + __be32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb)); for (; list; list = list->next) val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list), @@ -430,6 +431,9 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, struct sock *sk = NULL; struct sctp_association *asoc; struct sctp_transport *transport = NULL; + struct sctp_init_chunk *chunkhdr; + __u32 vtag = ntohl(sctphdr->vtag); + int len = skb->len - ((void *)sctphdr - (void *)skb->data); *app = NULL; *tpp = NULL; @@ -451,8 +455,28 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, sk = asoc->base.sk; - if (ntohl(sctphdr->vtag) != asoc->c.peer_vtag) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + /* RFC 4960, Appendix C. ICMP Handling + * + * ICMP6) An implementation MUST validate that the Verification Tag + * contained in the ICMP message matches the Verification Tag of + * the peer. If the Verification Tag is not 0 and does NOT + * match, discard the ICMP message. If it is 0 and the ICMP + * message contains enough bytes to verify that the chunk type is + * an INIT chunk and that the Initiate Tag matches the tag of the + * peer, continue with ICMP7. If the ICMP message is too short + * or the chunk type or the Initiate Tag does not match, silently + * discard the packet. + */ + if (vtag == 0) { + chunkhdr = (struct sctp_init_chunk *)((void *)sctphdr + + sizeof(struct sctphdr)); + if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t) + + sizeof(__be32) || + chunkhdr->chunk_hdr.type != SCTP_CID_INIT || + ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) { + goto out; + } + } else if (vtag != asoc->c.peer_vtag) { goto out; } @@ -462,7 +486,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(&init_net, LINUX_MIB_LOCKDROPPEDICMPS); *app = asoc; *tpp = transport; @@ -511,7 +535,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) int err; if (skb->len < ihlen + 8) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); return; } @@ -525,7 +549,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); return; } /* Warning: The sock lock is held. Remember to call diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e45e44c60635..a238d6834b33 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -299,7 +299,8 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1, /* Fills in the source address(saddr) based on the destination address(daddr) * and asoc's bind address list. */ -static void sctp_v6_get_saddr(struct sctp_association *asoc, +static void sctp_v6_get_saddr(struct sctp_sock *sk, + struct sctp_association *asoc, struct dst_entry *dst, union sctp_addr *daddr, union sctp_addr *saddr) @@ -318,7 +319,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, if (!asoc) { ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, &daddr->v6.sin6_addr, - inet6_sk(asoc->base.sk)->srcprefs, + inet6_sk(&sk->inet.sk)->srcprefs, &saddr->v6.sin6_addr); SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n", NIP6(saddr->v6.sin6_addr)); @@ -726,6 +727,11 @@ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr)); } +static void sctp_v6_ecn_capable(struct sock *sk) +{ + inet6_sk(sk)->tclass |= INET_ECN_ECT_0; +} + /* Initialize a PF_INET6 socket msg_name. */ static void sctp_inet6_msgname(char *msgname, int *addr_len) { @@ -812,7 +818,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp) return 1; /* v4-mapped-v6 addresses */ case AF_INET: - if (!__ipv6_only_sock(sctp_opt2sk(sp)) && sp->v4mapped) + if (!__ipv6_only_sock(sctp_opt2sk(sp))) return 1; default: return 0; @@ -834,6 +840,11 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, if (!af1 || !af2) return 0; + + /* If the socket is IPv6 only, v4 addrs will not match */ + if (__ipv6_only_sock(sctp_opt2sk(opt)) && af1 != af2) + return 0; + /* Today, wildcard AF_INET/AF_INET6. */ if (sctp_is_any(addr1) || sctp_is_any(addr2)) return 1; @@ -870,7 +881,11 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) return 0; } dev_put(dev); + } else if (type == IPV6_ADDR_MAPPED) { + if (!opt->v4mapped) + return 0; } + af = opt->pf->af; } return af->available(addr, opt); @@ -913,9 +928,12 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) static int sctp_inet6_supported_addrs(const struct sctp_sock *opt, __be16 *types) { - types[0] = SCTP_PARAM_IPV4_ADDRESS; - types[1] = SCTP_PARAM_IPV6_ADDRESS; - return 2; + types[0] = SCTP_PARAM_IPV6_ADDRESS; + if (!opt || !ipv6_only_sock(sctp_opt2sk(opt))) { + types[1] = SCTP_PARAM_IPV4_ADDRESS; + return 2; + } + return 1; } static const struct proto_ops inet6_seqpacket_ops = { @@ -996,6 +1014,7 @@ static struct sctp_af sctp_af_inet6 = { .skb_iif = sctp_v6_skb_iif, .is_ce = sctp_v6_is_ce, .seq_dump_addr = sctp_v6_seq_dump_addr, + .ecn_capable = sctp_v6_ecn_capable, .net_header_len = sizeof(struct ipv6hdr), .sockaddr_len = sizeof(struct sockaddr_in6), #ifdef CONFIG_COMPAT diff --git a/net/sctp/output.c b/net/sctp/output.c index cf4f9fb6819d..45684646b1db 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -50,6 +50,7 @@ #include <linux/init.h> #include <net/inet_ecn.h> #include <net/icmp.h> +#include <net/net_namespace.h> #ifndef TEST_FRAME #include <net/tcp.h> @@ -157,7 +158,8 @@ void sctp_packet_free(struct sctp_packet *packet) * packet can be sent only after receiving the COOKIE_ACK. */ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, - struct sctp_chunk *chunk) + struct sctp_chunk *chunk, + int one_packet) { sctp_xmit_t retval; int error = 0; @@ -175,7 +177,9 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, /* If we have an empty packet, then we can NOT ever * return PMTU_FULL. */ - retval = sctp_packet_append_chunk(packet, chunk); + if (!one_packet) + retval = sctp_packet_append_chunk(packet, + chunk); } break; @@ -361,7 +365,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; struct sctphdr *sh; - __u32 crc32 = 0; + __be32 crc32 = __constant_cpu_to_be32(0); struct sk_buff *nskb; struct sctp_chunk *chunk, *tmp; struct sock *sk; @@ -534,7 +538,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) /* 3) Put the resultant value into the checksum field in the * common header, and leave the rest of the bits unchanged. */ - sh->checksum = htonl(crc32); + sh->checksum = crc32; /* IP layer ECN support * From RFC 2481 @@ -548,7 +552,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) * Note: The works for IPv6 layer checks this bit too later * in transmission. See IP6_ECN_flow_xmit(). */ - INET_ECN_xmit(nskb->sk); + (*tp->af_specific->ecn_capable)(nskb->sk); /* Set up the IP options. */ /* BUG: not implemented @@ -592,7 +596,7 @@ out: return err; no_route: kfree_skb(nskb); - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 59edfd25a19c..4328ad5439c9 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -71,6 +71,8 @@ static void sctp_mark_missing(struct sctp_outq *q, static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); +static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout); + /* Add data to the front of the queue. */ static inline void sctp_outq_head_data(struct sctp_outq *q, struct sctp_chunk *ch) @@ -208,6 +210,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); + q->fast_rtx = 0; q->outstanding_bytes = 0; q->empty = 1; q->cork = 0; @@ -500,6 +503,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, case SCTP_RTXR_FAST_RTX: SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); + q->fast_rtx = 1; break; case SCTP_RTXR_PMTUD: SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); @@ -518,9 +522,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by * following the procedures outlined in C1 - C5. */ - sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point); + if (reason == SCTP_RTXR_T3_RTX) + sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point); - error = sctp_outq_flush(q, /* rtx_timeout */ 1); + /* Flush the queues only on timeout, since fast_rtx is only + * triggered during sack processing and the queue + * will be flushed at the end. + */ + if (reason != SCTP_RTXR_FAST_RTX) + error = sctp_outq_flush(q, /* rtx_timeout */ 1); if (error) q->asoc->base.sk->sk_err = -error; @@ -538,17 +548,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, int rtx_timeout, int *start_timer) { struct list_head *lqueue; - struct list_head *lchunk; struct sctp_transport *transport = pkt->transport; sctp_xmit_t status; struct sctp_chunk *chunk, *chunk1; struct sctp_association *asoc; + int fast_rtx; int error = 0; + int timer = 0; + int done = 0; asoc = q->asoc; lqueue = &q->retransmit; + fast_rtx = q->fast_rtx; - /* RFC 2960 6.3.3 Handle T3-rtx Expiration + /* This loop handles time-out retransmissions, fast retransmissions, + * and retransmissions due to opening of whindow. + * + * RFC 2960 6.3.3 Handle T3-rtx Expiration * * E3) Determine how many of the earliest (i.e., lowest TSN) * outstanding DATA chunks for the address for which the @@ -563,12 +579,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, * [Just to be painfully clear, if we are retransmitting * because a timeout just happened, we should send only ONE * packet of retransmitted data.] + * + * For fast retransmissions we also send only ONE packet. However, + * if we are just flushing the queue due to open window, we'll + * try to send as much as possible. */ - lchunk = sctp_list_dequeue(lqueue); - - while (lchunk) { - chunk = list_entry(lchunk, struct sctp_chunk, - transmitted_list); + list_for_each_entry_safe(chunk, chunk1, lqueue, transmitted_list) { /* Make sure that Gap Acked TSNs are not retransmitted. A * simple approach is just to move such TSNs out of the @@ -576,58 +592,60 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, * next chunk. */ if (chunk->tsn_gap_acked) { - list_add_tail(lchunk, &transport->transmitted); - lchunk = sctp_list_dequeue(lqueue); + list_del(&chunk->transmitted_list); + list_add_tail(&chunk->transmitted_list, + &transport->transmitted); continue; } + /* If we are doing fast retransmit, ignore non-fast_rtransmit + * chunks + */ + if (fast_rtx && !chunk->fast_retransmit) + continue; + /* Attempt to append this chunk to the packet. */ status = sctp_packet_append_chunk(pkt, chunk); switch (status) { case SCTP_XMIT_PMTU_FULL: /* Send this packet. */ - if ((error = sctp_packet_transmit(pkt)) == 0) - *start_timer = 1; + error = sctp_packet_transmit(pkt); /* If we are retransmitting, we should only * send a single packet. */ - if (rtx_timeout) { - list_add(lchunk, lqueue); - lchunk = NULL; - } + if (rtx_timeout || fast_rtx) + done = 1; - /* Bundle lchunk in the next round. */ + /* Bundle next chunk in the next round. */ break; case SCTP_XMIT_RWND_FULL: /* Send this packet. */ - if ((error = sctp_packet_transmit(pkt)) == 0) - *start_timer = 1; + error = sctp_packet_transmit(pkt); /* Stop sending DATA as there is no more room * at the receiver. */ - list_add(lchunk, lqueue); - lchunk = NULL; + done = 1; break; case SCTP_XMIT_NAGLE_DELAY: /* Send this packet. */ - if ((error = sctp_packet_transmit(pkt)) == 0) - *start_timer = 1; + error = sctp_packet_transmit(pkt); /* Stop sending DATA because of nagle delay. */ - list_add(lchunk, lqueue); - lchunk = NULL; + done = 1; break; default: /* The append was successful, so add this chunk to * the transmitted list. */ - list_add_tail(lchunk, &transport->transmitted); + list_del(&chunk->transmitted_list); + list_add_tail(&chunk->transmitted_list, + &transport->transmitted); /* Mark the chunk as ineligible for fast retransmit * after it is retransmitted. @@ -635,27 +653,44 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, if (chunk->fast_retransmit > 0) chunk->fast_retransmit = -1; - *start_timer = 1; - q->empty = 0; + /* Force start T3-rtx timer when fast retransmitting + * the earliest outstanding TSN + */ + if (!timer && fast_rtx && + ntohl(chunk->subh.data_hdr->tsn) == + asoc->ctsn_ack_point + 1) + timer = 2; - /* Retrieve a new chunk to bundle. */ - lchunk = sctp_list_dequeue(lqueue); + q->empty = 0; break; } - /* If we are here due to a retransmit timeout or a fast - * retransmit and if there are any chunks left in the retransmit - * queue that could not fit in the PMTU sized packet, they need - * to be marked as ineligible for a subsequent fast retransmit. - */ - if (rtx_timeout && !lchunk) { - list_for_each_entry(chunk1, lqueue, transmitted_list) { - if (chunk1->fast_retransmit > 0) - chunk1->fast_retransmit = -1; - } + /* Set the timer if there were no errors */ + if (!error && !timer) + timer = 1; + + if (done) + break; + } + + /* If we are here due to a retransmit timeout or a fast + * retransmit and if there are any chunks left in the retransmit + * queue that could not fit in the PMTU sized packet, they need + * to be marked as ineligible for a subsequent fast retransmit. + */ + if (rtx_timeout || fast_rtx) { + list_for_each_entry(chunk1, lqueue, transmitted_list) { + if (chunk1->fast_retransmit > 0) + chunk1->fast_retransmit = -1; } } + *start_timer = timer; + + /* Clear fast retransmit hint */ + if (fast_rtx) + q->fast_rtx = 0; + return error; } @@ -669,6 +704,7 @@ int sctp_outq_uncork(struct sctp_outq *q) return error; } + /* * Try to flush an outqueue. * @@ -678,7 +714,7 @@ int sctp_outq_uncork(struct sctp_outq *q) * locking concerns must be made. Today we use the sock lock to protect * this function. */ -int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) +static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) { struct sctp_packet *packet; struct sctp_packet singleton; @@ -692,6 +728,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) sctp_xmit_t status; int error = 0; int start_timer = 0; + int one_packet = 0; /* These transports have chunks to send. */ struct list_head transport_list; @@ -797,20 +834,33 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) if (sctp_test_T_bit(chunk)) { packet->vtag = asoc->c.my_vtag; } - case SCTP_CID_SACK: - case SCTP_CID_HEARTBEAT: + /* The following chunks are "response" chunks, i.e. + * they are generated in response to something we + * received. If we are sending these, then we can + * send only 1 packet containing these chunks. + */ case SCTP_CID_HEARTBEAT_ACK: - case SCTP_CID_SHUTDOWN: case SCTP_CID_SHUTDOWN_ACK: - case SCTP_CID_ERROR: - case SCTP_CID_COOKIE_ECHO: case SCTP_CID_COOKIE_ACK: - case SCTP_CID_ECN_ECNE: + case SCTP_CID_COOKIE_ECHO: + case SCTP_CID_ERROR: case SCTP_CID_ECN_CWR: - case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: + one_packet = 1; + /* Fall throught */ + + case SCTP_CID_SACK: + case SCTP_CID_HEARTBEAT: + case SCTP_CID_SHUTDOWN: + case SCTP_CID_ECN_ECNE: + case SCTP_CID_ASCONF: case SCTP_CID_FWD_TSN: - sctp_packet_transmit_chunk(packet, chunk); + status = sctp_packet_transmit_chunk(packet, chunk, + one_packet); + if (status != SCTP_XMIT_OK) { + /* put the chunk back */ + list_add(&chunk->list, &q->control_chunk_list); + } break; default: @@ -862,7 +912,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) rtx_timeout, &start_timer); if (start_timer) - sctp_transport_reset_timers(transport); + sctp_transport_reset_timers(transport, + start_timer-1); /* This can happen on COOKIE-ECHO resend. Only * one chunk can get bundled with a COOKIE-ECHO. @@ -940,7 +991,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) atomic_read(&chunk->skb->users) : -1); /* Add the chunk to the packet. */ - status = sctp_packet_transmit_chunk(packet, chunk); + status = sctp_packet_transmit_chunk(packet, chunk, 0); switch (status) { case SCTP_XMIT_PMTU_FULL: @@ -977,7 +1028,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) list_add_tail(&chunk->transmitted_list, &transport->transmitted); - sctp_transport_reset_timers(transport); + sctp_transport_reset_timers(transport, start_timer-1); q->empty = 0; @@ -1205,7 +1256,6 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) * Make sure the empty queue handler will get run later. */ q->empty = (list_empty(&q->out_chunk_list) && - list_empty(&q->control_chunk_list) && list_empty(&q->retransmit)); if (!q->empty) goto finish; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 0aba759cb9b7..f268910620be 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -383,3 +383,139 @@ void sctp_assocs_proc_exit(void) { remove_proc_entry("assocs", proc_net_sctp); } + +static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos >= sctp_assoc_hashsize) + return NULL; + + if (*pos < 0) + *pos = 0; + + if (*pos == 0) + seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX " + "REM_ADDR_RTX START\n"); + + return (void *)pos; +} + +static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + if (++*pos >= sctp_assoc_hashsize) + return NULL; + + return pos; +} + +static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v) +{ + return; +} + +static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) +{ + struct sctp_hashbucket *head; + struct sctp_ep_common *epb; + struct sctp_association *assoc; + struct hlist_node *node; + struct sctp_transport *tsp; + int hash = *(loff_t *)v; + + if (hash >= sctp_assoc_hashsize) + return -ENOMEM; + + head = &sctp_assoc_hashtable[hash]; + sctp_local_bh_disable(); + read_lock(&head->lock); + sctp_for_each_hentry(epb, node, &head->chain) { + assoc = sctp_assoc(epb); + list_for_each_entry(tsp, &assoc->peer.transport_addr_list, + transports) { + /* + * The remote address (ADDR) + */ + tsp->af_specific->seq_dump_addr(seq, &tsp->ipaddr); + seq_printf(seq, " "); + + /* + * The association ID (ASSOC_ID) + */ + seq_printf(seq, "%d ", tsp->asoc->assoc_id); + + /* + * If the Heartbeat is active (HB_ACT) + * Note: 1 = Active, 0 = Inactive + */ + seq_printf(seq, "%d ", timer_pending(&tsp->hb_timer)); + + /* + * Retransmit time out (RTO) + */ + seq_printf(seq, "%lu ", tsp->rto); + + /* + * Maximum path retransmit count (PATH_MAX_RTX) + */ + seq_printf(seq, "%d ", tsp->pathmaxrxt); + + /* + * remote address retransmit count (REM_ADDR_RTX) + * Note: We don't have a way to tally this at the moment + * so lets just leave it as zero for the moment + */ + seq_printf(seq, "0 "); + + /* + * remote address start time (START). This is also not + * currently implemented, but we can record it with a + * jiffies marker in a subsequent patch + */ + seq_printf(seq, "0"); + + seq_printf(seq, "\n"); + } + } + + read_unlock(&head->lock); + sctp_local_bh_enable(); + + return 0; + +} + +static const struct seq_operations sctp_remaddr_ops = { + .start = sctp_remaddr_seq_start, + .next = sctp_remaddr_seq_next, + .stop = sctp_remaddr_seq_stop, + .show = sctp_remaddr_seq_show, +}; + +/* Cleanup the proc fs entry for 'remaddr' object. */ +void sctp_remaddr_proc_exit(void) +{ + remove_proc_entry("remaddr", proc_net_sctp); +} + +static int sctp_remaddr_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &sctp_remaddr_ops); +} + +static const struct file_operations sctp_remaddr_seq_fops = { + .open = sctp_remaddr_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +int __init sctp_remaddr_proc_init(void) +{ + struct proc_dir_entry *p; + + p = create_proc_entry("remaddr", S_IRUGO, proc_net_sctp); + if (!p) + return -ENOMEM; + p->proc_fops = &sctp_remaddr_seq_fops; + + return 0; +} diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 0ec234b762c2..a6e0818bcff5 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -52,6 +52,8 @@ #include <linux/inetdevice.h> #include <linux/seq_file.h> #include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/swap.h> #include <net/net_namespace.h> #include <net/protocol.h> #include <net/ip.h> @@ -64,9 +66,12 @@ /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; -struct proc_dir_entry *proc_net_sctp; DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; +#ifdef CONFIG_PROC_FS +struct proc_dir_entry *proc_net_sctp; +#endif + struct idr sctp_assocs_id; DEFINE_SPINLOCK(sctp_assocs_id_lock); @@ -97,6 +102,7 @@ struct sock *sctp_get_ctl_sock(void) /* Set up the proc fs entry for the SCTP protocol. */ static __init int sctp_proc_init(void) { +#ifdef CONFIG_PROC_FS if (!proc_net_sctp) { struct proc_dir_entry *ent; ent = proc_mkdir("sctp", init_net.proc_net); @@ -108,16 +114,32 @@ static __init int sctp_proc_init(void) } if (sctp_snmp_proc_init()) - goto out_nomem; + goto out_snmp_proc_init; if (sctp_eps_proc_init()) - goto out_nomem; + goto out_eps_proc_init; if (sctp_assocs_proc_init()) - goto out_nomem; + goto out_assocs_proc_init; + if (sctp_remaddr_proc_init()) + goto out_remaddr_proc_init; return 0; +out_remaddr_proc_init: + sctp_assocs_proc_exit(); +out_assocs_proc_init: + sctp_eps_proc_exit(); +out_eps_proc_init: + sctp_snmp_proc_exit(); +out_snmp_proc_init: + if (proc_net_sctp) { + proc_net_sctp = NULL; + remove_proc_entry("sctp", init_net.proc_net); + } out_nomem: return -ENOMEM; +#else + return 0; +#endif /* CONFIG_PROC_FS */ } /* Clean up the proc fs entry for the SCTP protocol. @@ -126,14 +148,17 @@ out_nomem: */ static void sctp_proc_exit(void) { +#ifdef CONFIG_PROC_FS sctp_snmp_proc_exit(); sctp_eps_proc_exit(); sctp_assocs_proc_exit(); + sctp_remaddr_proc_exit(); if (proc_net_sctp) { proc_net_sctp = NULL; remove_proc_entry("sctp", init_net.proc_net); } +#endif } /* Private helper to extract ipv4 address and stash them in @@ -358,6 +383,10 @@ static int sctp_v4_addr_valid(union sctp_addr *addr, struct sctp_sock *sp, const struct sk_buff *skb) { + /* IPv4 addresses not allowed */ + if (sp && ipv6_only_sock(sctp_opt2sk(sp))) + return 0; + /* Is this a non-unicast address or a unusable SCTP address? */ if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) return 0; @@ -381,6 +410,9 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) !sysctl_ip_nonlocal_bind) return 0; + if (ipv6_only_sock(sctp_opt2sk(sp))) + return 0; + return 1; } @@ -470,11 +502,11 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, /* Walk through the bind address list and look for a bind * address that matches the source address of the returned dst. */ + sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) continue; - sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; } @@ -519,7 +551,8 @@ out: /* For v4, the source address is cached in the route entry(dst). So no need * to cache it separately and hence this is an empty routine. */ -static void sctp_v4_get_saddr(struct sctp_association *asoc, +static void sctp_v4_get_saddr(struct sctp_sock *sk, + struct sctp_association *asoc, struct dst_entry *dst, union sctp_addr *daddr, union sctp_addr *saddr) @@ -616,6 +649,11 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); } +static void sctp_v4_ecn_capable(struct sock *sk) +{ + INET_ECN_xmit(sk); +} + /* Event handler for inet address addition/deletion events. * The sctp_local_addr_list needs to be protocted by a spin lock since * multiple notifiers (say IPv4 and IPv6) may be running at the same @@ -630,7 +668,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, struct sctp_sockaddr_entry *temp; int found = 0; - if (dev_net(ifa->ifa_dev->dev) != &init_net) + if (!net_eq(dev_net(ifa->ifa_dev->dev), &init_net)) return NOTIFY_DONE; switch (ev) { @@ -934,6 +972,7 @@ static struct sctp_af sctp_af_inet = { .skb_iif = sctp_v4_skb_iif, .is_ce = sctp_v4_is_ce, .seq_dump_addr = sctp_v4_seq_dump_addr, + .ecn_capable = sctp_v4_ecn_capable, .net_header_len = sizeof(struct iphdr), .sockaddr_len = sizeof(struct sockaddr_in), #ifdef CONFIG_COMPAT @@ -1043,6 +1082,7 @@ SCTP_STATIC __init int sctp_init(void) int status = -EINVAL; unsigned long goal; unsigned long limit; + unsigned long nr_pages; int max_share; int order; @@ -1138,8 +1178,9 @@ SCTP_STATIC __init int sctp_init(void) * Note this initalizes the data in sctpv6_prot too * Unabashedly stolen from tcp_init */ - limit = min(num_physpages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (num_physpages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + nr_pages = totalram_pages - totalhigh_pages; + limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); limit = max(limit, 128UL); sysctl_sctp_mem[0] = limit / 4 * 3; sysctl_sctp_mem[1] = limit; @@ -1149,7 +1190,7 @@ SCTP_STATIC __init int sctp_init(void) limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); - sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */ + sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 81b606424e12..e8ca4e54981f 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2364,8 +2364,13 @@ static int sctp_process_param(struct sctp_association *asoc, case SCTP_PARAM_IPV6_ADDRESS: if (PF_INET6 != asoc->base.sk->sk_family) break; - /* Fall through. */ + goto do_addr_param; + case SCTP_PARAM_IPV4_ADDRESS: + /* v4 addresses are not allowed on v6-only socket */ + if (ipv6_only_sock(asoc->base.sk)) + break; +do_addr_param: af = sctp_get_af_specific(param_type2af(param.p->type)); af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0); scope = sctp_scope(peer_addr); @@ -2418,7 +2423,8 @@ static int sctp_process_param(struct sctp_association *asoc, break; case SCTP_PARAM_IPV6_ADDRESS: - asoc->peer.ipv6_address = 1; + if (PF_INET6 == asoc->base.sk->sk_family) + asoc->peer.ipv6_address = 1; break; case SCTP_PARAM_HOST_NAME_ADDRESS: @@ -2829,6 +2835,19 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, addr_param = (union sctp_addr_param *) ((void *)asconf_param + sizeof(sctp_addip_param_t)); + switch (addr_param->v4.param_hdr.type) { + case SCTP_PARAM_IPV6_ADDRESS: + if (!asoc->peer.ipv6_address) + return SCTP_ERROR_INV_PARAM; + break; + case SCTP_PARAM_IPV4_ADDRESS: + if (!asoc->peer.ipv4_address) + return SCTP_ERROR_INV_PARAM; + break; + default: + return SCTP_ERROR_INV_PARAM; + } + af = sctp_get_af_specific(param_type2af(addr_param->v4.param_hdr.type)); if (unlikely(!af)) return SCTP_ERROR_INV_PARAM; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 23a9f1a95b7d..9732c797e8ed 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -190,20 +190,28 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, * unacknowledged DATA chunk. ... */ if (!asoc->peer.sack_needed) { - /* We will need a SACK for the next packet. */ - asoc->peer.sack_needed = 1; + asoc->peer.sack_cnt++; /* Set the SACK delay timeout based on the * SACK delay for the last transport * data was received from, or the default * for the association. */ - if (trans) + if (trans) { + /* We will need a SACK for the next packet. */ + if (asoc->peer.sack_cnt >= trans->sackfreq - 1) + asoc->peer.sack_needed = 1; + asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = trans->sackdelay; - else + } else { + /* We will need a SACK for the next packet. */ + if (asoc->peer.sack_cnt >= asoc->sackfreq - 1) + asoc->peer.sack_needed = 1; + asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; + } /* Restart the SACK timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, @@ -216,6 +224,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, goto nomem; asoc->peer.sack_needed = 0; + asoc->peer.sack_cnt = 0; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(sack)); @@ -655,7 +664,7 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, struct sctp_association *asoc, struct sctp_sackhdr *sackh) { - int err; + int err = 0; if (sctp_outq_sack(&asoc->outqueue, sackh)) { /* There are no more TSNs awaiting SACK. */ @@ -663,11 +672,6 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN), asoc->state, asoc->ep, asoc, NULL, GFP_ATOMIC); - } else { - /* Windows may have opened, so we need - * to check if we have DATA to transmit - */ - err = sctp_outq_flush(&asoc->outqueue, 0); } return err; @@ -1472,8 +1476,15 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_DISCARD_PACKET: - /* We need to discard the whole packet. */ + /* We need to discard the whole packet. + * Uncork the queue since there might be + * responses pending + */ chunk->pdiscard = 1; + if (asoc) { + sctp_outq_uncork(&asoc->outqueue); + local_cork = 0; + } break; case SCTP_CMD_RTO_PENDING: @@ -1544,8 +1555,15 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, } out: - if (local_cork) - sctp_outq_uncork(&asoc->outqueue); + /* If this is in response to a received chunk, wait until + * we are done with the packet to open the queue so that we don't + * send multiple packets in response to a single request. + */ + if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) { + if (chunk->end_of_packet || chunk->singleton) + sctp_outq_uncork(&asoc->outqueue); + } else if (local_cork) + sctp_outq_uncork(&asoc->outqueue); return error; nomem: error = -ENOMEM; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 0c9d5a6950fe..8848d329aa2c 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -795,8 +795,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); - /* This will send the COOKIE ACK */ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); @@ -883,7 +881,6 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, if (asoc->autoclose) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); /* It may also notify its ULP about the successful * establishment of the association with a Communication Up @@ -1781,7 +1778,6 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, goto nomem; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); /* RFC 2960 5.1 Normal Establishment of an Association * @@ -1898,12 +1894,13 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, } } - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) goto nomem; + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); + if (ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); @@ -1911,9 +1908,6 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ai_ev)); - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); - return SCTP_DISPOSITION_CONSUME; nomem: @@ -3970,9 +3964,6 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, return sctp_sf_pdiscard(ep, asoc, type, arg, commands); break; case SCTP_CID_ACTION_DISCARD_ERR: - /* Discard the packet. */ - sctp_sf_pdiscard(ep, asoc, type, arg, commands); - /* Generate an ERROR chunk as response. */ hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, @@ -3982,6 +3973,9 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); } + + /* Discard the packet. */ + sctp_sf_pdiscard(ep, asoc, type, arg, commands); return SCTP_DISPOSITION_CONSUME; break; case SCTP_CID_ACTION_SKIP: @@ -5899,12 +5893,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, return SCTP_IERROR_NO_DATA; } - /* If definately accepting the DATA chunk, record its TSN, otherwise - * wait for renege processing. - */ - if (SCTP_CMD_CHUNK_ULP == deliver) - sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); - chunk->data_accepted = 1; /* Note: Some chunks may get overcounted (if we drop) or overcounted @@ -5924,6 +5912,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and discard the DATA chunk. */ if (ntohs(data_hdr->stream) >= asoc->c.sinit_max_instreams) { + /* Mark tsn as received even though we drop it */ + sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); + err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM, &data_hdr->stream, sizeof(data_hdr->stream)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e7e3baf7009e..dbb79adf8f3c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -116,7 +116,7 @@ static int sctp_memory_pressure; static atomic_t sctp_memory_allocated; static atomic_t sctp_sockets_allocated; -static void sctp_enter_memory_pressure(void) +static void sctp_enter_memory_pressure(struct sock *sk) { sctp_memory_pressure = 1; } @@ -308,9 +308,16 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, if (len < sizeof (struct sockaddr)) return NULL; - /* Does this PF support this AF? */ - if (!opt->pf->af_supported(addr->sa.sa_family, opt)) - return NULL; + /* V4 mapped address are really of AF_INET family */ + if (addr->sa.sa_family == AF_INET6 && + ipv6_addr_v4mapped(&addr->v6.sin6_addr)) { + if (!opt->pf->af_supported(AF_INET, opt)) + return NULL; + } else { + /* Does this PF support this AF? */ + if (!opt->pf->af_supported(addr->sa.sa_family, opt)) + return NULL; + } /* If we get this far, af is valid. */ af = sctp_get_af_specific(addr->sa.sa_family); @@ -370,18 +377,19 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; + /* See if the address matches any of the addresses we may have + * already bound before checking against other endpoints. + */ + if (sctp_bind_addr_match(bp, addr, sp)) + return -EINVAL; + /* Make sure we are allowed to bind here. * The function sctp_get_port_local() does duplicate address * detection. */ addr->v4.sin_port = htons(snum); if ((ret = sctp_get_port_local(sk, addr))) { - if (ret == (long) sk) { - /* This endpoint has a conflicting address. */ - return -EINVAL; - } else { - return -EADDRINUSE; - } + return -EADDRINUSE; } /* Refresh ephemeral port. */ @@ -956,7 +964,8 @@ out: */ static int __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, - int addrs_size) + int addrs_size, + sctp_assoc_t *assoc_id) { struct sctp_sock *sp; struct sctp_endpoint *ep; @@ -1111,6 +1120,8 @@ static int __sctp_connect(struct sock* sk, timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); err = sctp_wait_for_connect(asoc, &timeo); + if (!err && assoc_id) + *assoc_id = asoc->assoc_id; /* Don't free association on exit. */ asoc = NULL; @@ -1128,7 +1139,8 @@ out_free: /* Helper for tunneling sctp_connectx() requests through sctp_setsockopt() * * API 8.9 - * int sctp_connectx(int sd, struct sockaddr *addrs, int addrcnt); + * int sctp_connectx(int sd, struct sockaddr *addrs, int addrcnt, + * sctp_assoc_t *asoc); * * If sd is an IPv4 socket, the addresses passed must be IPv4 addresses. * If the sd is an IPv6 socket, the addresses passed can either be IPv4 @@ -1144,8 +1156,10 @@ out_free: * representation is termed a "packed array" of addresses). The caller * specifies the number of addresses in the array with addrcnt. * - * On success, sctp_connectx() returns 0. On failure, sctp_connectx() returns - * -1, and sets errno to the appropriate error code. + * On success, sctp_connectx() returns 0. It also sets the assoc_id to + * the association id of the new association. On failure, sctp_connectx() + * returns -1, and sets errno to the appropriate error code. The assoc_id + * is not touched by the kernel. * * For SCTP, the port given in each socket address must be the same, or * sctp_connectx() will fail, setting errno to EINVAL. @@ -1182,11 +1196,12 @@ out_free: * addrs The pointer to the addresses in user land * addrssize Size of the addrs buffer * - * Returns 0 if ok, <0 errno code on error. + * Returns >=0 if ok, <0 errno code on error. */ -SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, +SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk, struct sockaddr __user *addrs, - int addrs_size) + int addrs_size, + sctp_assoc_t *assoc_id) { int err = 0; struct sockaddr *kaddrs; @@ -1209,13 +1224,46 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, if (__copy_from_user(kaddrs, addrs, addrs_size)) { err = -EFAULT; } else { - err = __sctp_connect(sk, kaddrs, addrs_size); + err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id); } kfree(kaddrs); + return err; } +/* + * This is an older interface. It's kept for backward compatibility + * to the option that doesn't provide association id. + */ +SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) +{ + return __sctp_setsockopt_connectx(sk, addrs, addrs_size, NULL); +} + +/* + * New interface for the API. The since the API is done with a socket + * option, to make it simple we feed back the association id is as a return + * indication to the call. Error is always negative and association id is + * always positive. + */ +SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) +{ + sctp_assoc_t assoc_id = 0; + int err = 0; + + err = __sctp_setsockopt_connectx(sk, addrs, addrs_size, &assoc_id); + + if (err) + return err; + else + return assoc_id; +} + /* API 3.1.4 close() - UDP Style Syntax * Applications use close() to perform graceful shutdown (as described in * Section 10.1 of [SCTP]) on ALL the associations currently represented @@ -2305,74 +2353,98 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, return 0; } -/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) - * - * This options will get or set the delayed ack timer. The time is set - * in milliseconds. If the assoc_id is 0, then this sets or gets the - * endpoints default delayed ack timer value. If the assoc_id field is - * non-zero, then the set or get effects the specified association. - * - * struct sctp_assoc_value { - * sctp_assoc_t assoc_id; - * uint32_t assoc_value; - * }; +/* + * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) + * + * This option will effect the way delayed acks are performed. This + * option allows you to get or set the delayed ack time, in + * milliseconds. It also allows changing the delayed ack frequency. + * Changing the frequency to 1 disables the delayed sack algorithm. If + * the assoc_id is 0, then this sets or gets the endpoints default + * values. If the assoc_id field is non-zero, then the set or get + * effects the specified association for the one to many model (the + * assoc_id field is ignored by the one to one model). Note that if + * sack_delay or sack_freq are 0 when setting this option, then the + * current values will remain unchanged. + * + * struct sctp_sack_info { + * sctp_assoc_t sack_assoc_id; + * uint32_t sack_delay; + * uint32_t sack_freq; + * }; * - * assoc_id - This parameter, indicates which association the - * user is preforming an action upon. Note that if - * this field's value is zero then the endpoints - * default value is changed (effecting future - * associations only). + * sack_assoc_id - This parameter, indicates which association the user + * is performing an action upon. Note that if this field's value is + * zero then the endpoints default value is changed (effecting future + * associations only). * - * assoc_value - This parameter contains the number of milliseconds - * that the user is requesting the delayed ACK timer - * be set to. Note that this value is defined in - * the standard to be between 200 and 500 milliseconds. + * sack_delay - This parameter contains the number of milliseconds that + * the user is requesting the delayed ACK timer be set to. Note that + * this value is defined in the standard to be between 200 and 500 + * milliseconds. * - * Note: a value of zero will leave the value alone, - * but disable SACK delay. A non-zero value will also - * enable SACK delay. + * sack_freq - This parameter contains the number of packets that must + * be received before a sack is sent without waiting for the delay + * timer to expire. The default value for this is 2, setting this + * value to 1 will disable the delayed sack algorithm. */ -static int sctp_setsockopt_delayed_ack_time(struct sock *sk, +static int sctp_setsockopt_delayed_ack(struct sock *sk, char __user *optval, int optlen) { - struct sctp_assoc_value params; + struct sctp_sack_info params; struct sctp_transport *trans = NULL; struct sctp_association *asoc = NULL; struct sctp_sock *sp = sctp_sk(sk); - if (optlen != sizeof(struct sctp_assoc_value)) - return - EINVAL; + if (optlen == sizeof(struct sctp_sack_info)) { + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; - if (copy_from_user(¶ms, optval, optlen)) - return -EFAULT; + if (params.sack_delay == 0 && params.sack_freq == 0) + return 0; + } else if (optlen == sizeof(struct sctp_assoc_value)) { + printk(KERN_WARNING "SCTP: Use of struct sctp_sack_info " + "in delayed_ack socket option deprecated\n"); + printk(KERN_WARNING "SCTP: struct sctp_sack_info instead\n"); + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; + + if (params.sack_delay == 0) + params.sack_freq = 1; + else + params.sack_freq = 0; + } else + return - EINVAL; /* Validate value parameter. */ - if (params.assoc_value > 500) + if (params.sack_delay > 500) return -EINVAL; - /* Get association, if assoc_id != 0 and the socket is a one + /* Get association, if sack_assoc_id != 0 and the socket is a one * to many style socket, and an association was not found, then * the id was invalid. */ - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id && sctp_style(sk, UDP)) + asoc = sctp_id2assoc(sk, params.sack_assoc_id); + if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - if (params.assoc_value) { + if (params.sack_delay) { if (asoc) { asoc->sackdelay = - msecs_to_jiffies(params.assoc_value); + msecs_to_jiffies(params.sack_delay); asoc->param_flags = (asoc->param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_ENABLE; } else { - sp->sackdelay = params.assoc_value; + sp->sackdelay = params.sack_delay; sp->param_flags = (sp->param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_ENABLE; } - } else { + } + + if (params.sack_freq == 1) { if (asoc) { asoc->param_flags = (asoc->param_flags & ~SPP_SACKDELAY) | @@ -2382,22 +2454,40 @@ static int sctp_setsockopt_delayed_ack_time(struct sock *sk, (sp->param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_DISABLE; } + } else if (params.sack_freq > 1) { + if (asoc) { + asoc->sackfreq = params.sack_freq; + asoc->param_flags = + (asoc->param_flags & ~SPP_SACKDELAY) | + SPP_SACKDELAY_ENABLE; + } else { + sp->sackfreq = params.sack_freq; + sp->param_flags = + (sp->param_flags & ~SPP_SACKDELAY) | + SPP_SACKDELAY_ENABLE; + } } /* If change is for association, also apply to each transport. */ if (asoc) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { - if (params.assoc_value) { + if (params.sack_delay) { trans->sackdelay = - msecs_to_jiffies(params.assoc_value); + msecs_to_jiffies(params.sack_delay); trans->param_flags = (trans->param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_ENABLE; - } else { + } + if (params.sack_freq == 1) { trans->param_flags = (trans->param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_DISABLE; + } else if (params.sack_freq > 1) { + trans->sackfreq = params.sack_freq; + trans->param_flags = + (trans->param_flags & ~SPP_SACKDELAY) | + SPP_SACKDELAY_ENABLE; } } } @@ -3164,10 +3254,18 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, optlen, SCTP_BINDX_REM_ADDR); break; + case SCTP_SOCKOPT_CONNECTX_OLD: + /* 'optlen' is the size of the addresses buffer. */ + retval = sctp_setsockopt_connectx_old(sk, + (struct sockaddr __user *)optval, + optlen); + break; + case SCTP_SOCKOPT_CONNECTX: /* 'optlen' is the size of the addresses buffer. */ - retval = sctp_setsockopt_connectx(sk, (struct sockaddr __user *)optval, - optlen); + retval = sctp_setsockopt_connectx(sk, + (struct sockaddr __user *)optval, + optlen); break; case SCTP_DISABLE_FRAGMENTS: @@ -3186,8 +3284,8 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen); break; - case SCTP_DELAYED_ACK_TIME: - retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen); + case SCTP_DELAYED_ACK: + retval = sctp_setsockopt_delayed_ack(sk, optval, optlen); break; case SCTP_PARTIAL_DELIVERY_POINT: retval = sctp_setsockopt_partial_delivery_point(sk, optval, optlen); @@ -3294,7 +3392,7 @@ SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr, /* Pass correct addr len to common routine (so it knows there * is only one address being passed. */ - err = __sctp_connect(sk, addr, af->sockaddr_len); + err = __sctp_connect(sk, addr, af->sockaddr_len, NULL); } sctp_release_sock(sk); @@ -3446,6 +3544,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->pathmaxrxt = sctp_max_retrans_path; sp->pathmtu = 0; // allow default discovery sp->sackdelay = sctp_sack_timeout; + sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; @@ -3497,7 +3596,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) } /* Cleanup any SCTP per socket resources. */ -SCTP_STATIC int sctp_destroy_sock(struct sock *sk) +SCTP_STATIC void sctp_destroy_sock(struct sock *sk) { struct sctp_endpoint *ep; @@ -3507,7 +3606,6 @@ SCTP_STATIC int sctp_destroy_sock(struct sock *sk) ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); atomic_dec(&sctp_sockets_allocated); - return 0; } /* API 4.1.7 shutdown() - TCP Style Syntax @@ -3812,7 +3910,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval goto out; /* Map the socket to an unused fd that can be returned to the user. */ - retval = sock_map_fd(newsock); + retval = sock_map_fd(newsock, 0); if (retval < 0) { sock_release(newsock); goto out; @@ -3999,70 +4097,91 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, return 0; } -/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) - * - * This options will get or set the delayed ack timer. The time is set - * in milliseconds. If the assoc_id is 0, then this sets or gets the - * endpoints default delayed ack timer value. If the assoc_id field is - * non-zero, then the set or get effects the specified association. - * - * struct sctp_assoc_value { - * sctp_assoc_t assoc_id; - * uint32_t assoc_value; - * }; +/* + * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) + * + * This option will effect the way delayed acks are performed. This + * option allows you to get or set the delayed ack time, in + * milliseconds. It also allows changing the delayed ack frequency. + * Changing the frequency to 1 disables the delayed sack algorithm. If + * the assoc_id is 0, then this sets or gets the endpoints default + * values. If the assoc_id field is non-zero, then the set or get + * effects the specified association for the one to many model (the + * assoc_id field is ignored by the one to one model). Note that if + * sack_delay or sack_freq are 0 when setting this option, then the + * current values will remain unchanged. + * + * struct sctp_sack_info { + * sctp_assoc_t sack_assoc_id; + * uint32_t sack_delay; + * uint32_t sack_freq; + * }; * - * assoc_id - This parameter, indicates which association the - * user is preforming an action upon. Note that if - * this field's value is zero then the endpoints - * default value is changed (effecting future - * associations only). + * sack_assoc_id - This parameter, indicates which association the user + * is performing an action upon. Note that if this field's value is + * zero then the endpoints default value is changed (effecting future + * associations only). * - * assoc_value - This parameter contains the number of milliseconds - * that the user is requesting the delayed ACK timer - * be set to. Note that this value is defined in - * the standard to be between 200 and 500 milliseconds. + * sack_delay - This parameter contains the number of milliseconds that + * the user is requesting the delayed ACK timer be set to. Note that + * this value is defined in the standard to be between 200 and 500 + * milliseconds. * - * Note: a value of zero will leave the value alone, - * but disable SACK delay. A non-zero value will also - * enable SACK delay. + * sack_freq - This parameter contains the number of packets that must + * be received before a sack is sent without waiting for the delay + * timer to expire. The default value for this is 2, setting this + * value to 1 will disable the delayed sack algorithm. */ -static int sctp_getsockopt_delayed_ack_time(struct sock *sk, int len, +static int sctp_getsockopt_delayed_ack(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct sctp_assoc_value params; + struct sctp_sack_info params; struct sctp_association *asoc = NULL; struct sctp_sock *sp = sctp_sk(sk); - if (len < sizeof(struct sctp_assoc_value)) - return - EINVAL; + if (len >= sizeof(struct sctp_sack_info)) { + len = sizeof(struct sctp_sack_info); - len = sizeof(struct sctp_assoc_value); - - if (copy_from_user(¶ms, optval, len)) - return -EFAULT; + if (copy_from_user(¶ms, optval, len)) + return -EFAULT; + } else if (len == sizeof(struct sctp_assoc_value)) { + printk(KERN_WARNING "SCTP: Use of struct sctp_sack_info " + "in delayed_ack socket option deprecated\n"); + printk(KERN_WARNING "SCTP: struct sctp_sack_info instead\n"); + if (copy_from_user(¶ms, optval, len)) + return -EFAULT; + } else + return - EINVAL; - /* Get association, if assoc_id != 0 and the socket is a one + /* Get association, if sack_assoc_id != 0 and the socket is a one * to many style socket, and an association was not found, then * the id was invalid. */ - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id && sctp_style(sk, UDP)) + asoc = sctp_id2assoc(sk, params.sack_assoc_id); + if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { /* Fetch association values. */ - if (asoc->param_flags & SPP_SACKDELAY_ENABLE) - params.assoc_value = jiffies_to_msecs( + if (asoc->param_flags & SPP_SACKDELAY_ENABLE) { + params.sack_delay = jiffies_to_msecs( asoc->sackdelay); - else - params.assoc_value = 0; + params.sack_freq = asoc->sackfreq; + + } else { + params.sack_delay = 0; + params.sack_freq = 1; + } } else { /* Fetch socket values. */ - if (sp->param_flags & SPP_SACKDELAY_ENABLE) - params.assoc_value = sp->sackdelay; - else - params.assoc_value = 0; + if (sp->param_flags & SPP_SACKDELAY_ENABLE) { + params.sack_delay = sp->sackdelay; + params.sack_freq = sp->sackfreq; + } else { + params.sack_delay = 0; + params.sack_freq = 1; + } } if (copy_to_user(optval, ¶ms, len)) @@ -4112,6 +4231,8 @@ static int sctp_getsockopt_peer_addrs_num_old(struct sock *sk, int len, if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) return -EFAULT; + printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_NUM_OLD " + "socket option deprecated\n"); /* For UDP-style sockets, id specifies the association to query. */ asoc = sctp_id2assoc(sk, id); if (!asoc) @@ -4151,6 +4272,9 @@ static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, if (getaddrs.addr_num <= 0) return -EINVAL; + printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_OLD " + "socket option deprecated\n"); + /* For UDP-style sockets, id specifies the association to query. */ asoc = sctp_id2assoc(sk, getaddrs.assoc_id); if (!asoc) @@ -4244,6 +4368,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) return -EFAULT; + printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_NUM_OLD " + "socket option deprecated\n"); + /* * For UDP-style sockets, id specifies the association to query. * If the id field is set to the value '0' then the locally bound @@ -4276,6 +4403,11 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, (AF_INET6 == addr->a.sa.sa_family)) continue; + if ((PF_INET6 == sk->sk_family) && + inet_v6_ipv6only(sk) && + (AF_INET == addr->a.sa.sa_family)) + continue; + cnt++; } rcu_read_unlock(); @@ -4316,6 +4448,10 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; + if ((PF_INET6 == sk->sk_family) && + inet_v6_ipv6only(sk) && + (AF_INET == addr->a.sa.sa_family)) + continue; memcpy(&temp, &addr->a, sizeof(temp)); if (!temp.v4.sin_port) temp.v4.sin_port = htons(port); @@ -4351,6 +4487,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; + if ((PF_INET6 == sk->sk_family) && + inet_v6_ipv6only(sk) && + (AF_INET == addr->a.sa.sa_family)) + continue; memcpy(&temp, &addr->a, sizeof(temp)); if (!temp.v4.sin_port) temp.v4.sin_port = htons(port); @@ -4401,7 +4541,13 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (copy_from_user(&getaddrs, optval, len)) return -EFAULT; - if (getaddrs.addr_num <= 0) return -EINVAL; + if (getaddrs.addr_num <= 0 || + getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr))) + return -EINVAL; + + printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_OLD " + "socket option deprecated\n"); + /* * For UDP-style sockets, id specifies the association to query. * If the id field is set to the value '0' then the locally bound @@ -5218,8 +5364,8 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_peer_addr_params(sk, len, optval, optlen); break; - case SCTP_DELAYED_ACK_TIME: - retval = sctp_getsockopt_delayed_ack_time(sk, len, optval, + case SCTP_DELAYED_ACK: + retval = sctp_getsockopt_delayed_ack(sk, len, optval, optlen); break; case SCTP_INITMSG: @@ -5439,12 +5585,13 @@ pp_found: struct sctp_endpoint *ep2; ep2 = sctp_sk(sk2)->ep; - if (reuse && sk2->sk_reuse && - sk2->sk_state != SCTP_SS_LISTENING) + if (sk == sk2 || + (reuse && sk2->sk_reuse && + sk2->sk_state != SCTP_SS_LISTENING)) continue; - if (sctp_bind_addr_match(&ep2->base.bind_addr, addr, - sctp_sk(sk))) { + if (sctp_bind_addr_conflict(&ep2->base.bind_addr, addr, + sctp_sk(sk2), sctp_sk(sk))) { ret = (long)sk2; goto fail_unlock; } @@ -5557,8 +5704,13 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) if (!ep->base.bind_addr.port) { if (sctp_autobind(sk)) return -EAGAIN; - } else + } else { + if (sctp_get_port(sk, inet_sk(sk)->num)) { + sk->sk_state = SCTP_SS_CLOSED; + return -EADDRINUSE; + } sctp_sk(sk)->bind_hash->fastreuse = 0; + } sctp_hash_endpoint(ep); return 0; @@ -5628,7 +5780,7 @@ int sctp_inet_listen(struct socket *sock, int backlog) goto out; /* Allocate HMAC for generating cookie. */ - if (sctp_hmac_alg) { + if (!sctp_sk(sk)->hmac && sctp_hmac_alg) { tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { if (net_ratelimit()) { @@ -5656,7 +5808,8 @@ int sctp_inet_listen(struct socket *sock, int backlog) goto cleanup; /* Store away the transform reference. */ - sctp_sk(sk)->hmac = tfm; + if (!sctp_sk(sk)->hmac) + sctp_sk(sk)->hmac = tfm; out: sctp_release_sock(sk); return err; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index f4938f6c5abe..e745c118f239 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->rttvar = 0; peer->srtt = 0; peer->rto_pending = 0; + peer->fast_recovery = 0; peer->last_time_heard = jiffies; peer->last_time_used = jiffies; @@ -99,6 +100,9 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, INIT_LIST_HEAD(&peer->send_ready); INIT_LIST_HEAD(&peer->transports); + peer->T3_rtx_timer.expires = 0; + peer->hb_timer.expires = 0; + setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, (unsigned long)peer); setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, @@ -190,7 +194,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport) /* Start T3_rtx timer if it is not already running and update the heartbeat * timer. This routine is called every time a DATA chunk is sent. */ -void sctp_transport_reset_timers(struct sctp_transport *transport) +void sctp_transport_reset_timers(struct sctp_transport *transport, int force) { /* RFC 2960 6.3.2 Retransmission Timer Rules * @@ -200,7 +204,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport) * address. */ - if (!timer_pending(&transport->T3_rtx_timer)) + if (force || !timer_pending(&transport->T3_rtx_timer)) if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) sctp_transport_hold(transport); @@ -291,7 +295,7 @@ void sctp_transport_route(struct sctp_transport *transport, if (saddr) memcpy(&transport->saddr, saddr, sizeof(union sctp_addr)); else - af->get_saddr(asoc, dst, daddr, &transport->saddr); + af->get_saddr(opt, asoc, dst, daddr, &transport->saddr); transport->dst = dst; if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { @@ -403,11 +407,16 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, cwnd = transport->cwnd; flight_size = transport->flight_size; + /* See if we need to exit Fast Recovery first */ + if (transport->fast_recovery && + TSN_lte(transport->fast_recovery_exit, sack_ctsn)) + transport->fast_recovery = 0; + /* The appropriate cwnd increase algorithm is performed if, and only - * if the cumulative TSN has advanced and the congestion window is + * if the cumulative TSN whould advanced and the congestion window is * being fully utilized. */ - if ((transport->asoc->ctsn_ack_point >= sack_ctsn) || + if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) || (flight_size < cwnd)) return; @@ -416,17 +425,23 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, pmtu = transport->asoc->pathmtu; if (cwnd <= ssthresh) { - /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less - * than or equal to ssthresh an SCTP endpoint MUST use the - * slow start algorithm to increase cwnd only if the current - * congestion window is being fully utilized and an incoming - * SACK advances the Cumulative TSN Ack Point. Only when these - * two conditions are met can the cwnd be increased otherwise - * the cwnd MUST not be increased. If these conditions are met - * then cwnd MUST be increased by at most the lesser of - * 1) the total size of the previously outstanding DATA - * chunk(s) acknowledged, and 2) the destination's path MTU. + /* RFC 4960 7.2.1 + * o When cwnd is less than or equal to ssthresh, an SCTP + * endpoint MUST use the slow-start algorithm to increase + * cwnd only if the current congestion window is being fully + * utilized, an incoming SACK advances the Cumulative TSN + * Ack Point, and the data sender is not in Fast Recovery. + * Only when these three conditions are met can the cwnd be + * increased; otherwise, the cwnd MUST not be increased. + * If these conditions are met, then cwnd MUST be increased + * by, at most, the lesser of 1) the total size of the + * previously outstanding DATA chunk(s) acknowledged, and + * 2) the destination's path MTU. This upper bound protects + * against the ACK-Splitting attack outlined in [SAVAGE99]. */ + if (transport->fast_recovery) + return; + if (bytes_acked > pmtu) cwnd += pmtu; else @@ -502,6 +517,13 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * cwnd = ssthresh * partial_bytes_acked = 0 */ + if (transport->fast_recovery) + return; + + /* Mark Fast recovery */ + transport->fast_recovery = 1; + transport->fast_recovery_exit = transport->asoc->next_tsn - 1; + transport->ssthresh = max(transport->cwnd/2, 4*transport->asoc->pathmtu); transport->cwnd = transport->ssthresh; @@ -586,6 +608,7 @@ void sctp_transport_reset(struct sctp_transport *t) t->flight_size = 0; t->error_count = 0; t->rto_pending = 0; + t->fast_recovery = 0; /* Initialize the state information for SFR-CACC */ t->cacc.changeover_active = 0; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index ce6cda6b6994..a1f654aea268 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -710,6 +710,11 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, if (!skb) goto fail; + /* Now that all memory allocations for this chunk succeeded, we + * can mark it as received so the tsn_map is updated correctly. + */ + sctp_tsnmap_mark(&asoc->peer.tsn_map, ntohl(chunk->subh.data_hdr->tsn)); + /* First calculate the padding, so we don't inadvertently * pass up the wrong length to the user. * diff --git a/net/socket.c b/net/socket.c index 66c4a8cf6db9..8ef8ba81b9e2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -63,11 +63,13 @@ #include <linux/file.h> #include <linux/net.h> #include <linux/interrupt.h> +#include <linux/thread_info.h> #include <linux/rcupdate.h> #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/mutex.h> +#include <linux/thread_info.h> #include <linux/wanrouter.h> #include <linux/if_bridge.h> #include <linux/if_frad.h> @@ -90,6 +92,7 @@ #include <asm/unistd.h> #include <net/compat.h> +#include <net/wext.h> #include <net/sock.h> #include <linux/netfilter.h> @@ -179,9 +182,9 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0; * invalid addresses -EFAULT is returned. On a success 0 is returned. */ -int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) +int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) { - if (ulen < 0 || ulen > MAX_SOCK_ADDR) + if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) return -EINVAL; if (ulen == 0) return 0; @@ -207,7 +210,7 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) * specified. Zero is returned for a success. */ -int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, +int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, int __user *ulen) { int err; @@ -218,7 +221,7 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, return err; if (len > klen) len = klen; - if (len < 0 || len > MAX_SOCK_ADDR) + if (len < 0 || len > sizeof(struct sockaddr_storage)) return -EINVAL; if (len) { if (audit_sockaddr(klen, kaddr)) @@ -262,7 +265,7 @@ static void sock_destroy_inode(struct inode *inode) container_of(inode, struct socket_alloc, vfs_inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct socket_alloc *ei = (struct socket_alloc *)foo; @@ -348,11 +351,11 @@ static struct dentry_operations sockfs_dentry_operations = { * but we take care of internal coherence yet. */ -static int sock_alloc_fd(struct file **filep) +static int sock_alloc_fd(struct file **filep, int flags) { int fd; - fd = get_unused_fd(); + fd = get_unused_fd_flags(flags); if (likely(fd >= 0)) { struct file *file = get_empty_filp(); @@ -366,7 +369,7 @@ static int sock_alloc_fd(struct file **filep) return fd; } -static int sock_attach_fd(struct socket *sock, struct file *file) +static int sock_attach_fd(struct socket *sock, struct file *file, int flags) { struct dentry *dentry; struct qstr name = { .name = "" }; @@ -388,20 +391,20 @@ static int sock_attach_fd(struct socket *sock, struct file *file) init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE, &socket_file_ops); SOCK_INODE(sock)->i_fop = &socket_file_ops; - file->f_flags = O_RDWR; + file->f_flags = O_RDWR | (flags & O_NONBLOCK); file->f_pos = 0; file->private_data = sock; return 0; } -int sock_map_fd(struct socket *sock) +int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; - int fd = sock_alloc_fd(&newfile); + int fd = sock_alloc_fd(&newfile, flags); if (likely(fd >= 0)) { - int err = sock_attach_fd(sock, newfile); + int err = sock_attach_fd(sock, newfile, flags); if (unlikely(err < 0)) { put_filp(newfile); @@ -1217,12 +1220,27 @@ asmlinkage long sys_socket(int family, int type, int protocol) { int retval; struct socket *sock; + int flags; + + /* Check the SOCK_* constants for consistency. */ + BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); + BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); + BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); + + flags = type & ~SOCK_TYPE_MASK; + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + type &= SOCK_TYPE_MASK; + + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; retval = sock_create(family, type, protocol, &sock); if (retval < 0) goto out; - retval = sock_map_fd(sock); + retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); if (retval < 0) goto out_release; @@ -1245,6 +1263,15 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, struct socket *sock1, *sock2; int fd1, fd2, err; struct file *newfile1, *newfile2; + int flags; + + flags = type & ~SOCK_TYPE_MASK; + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + type &= SOCK_TYPE_MASK; + + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; /* * Obtain the first socket and check if the underlying protocol @@ -1263,13 +1290,13 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, if (err < 0) goto out_release_both; - fd1 = sock_alloc_fd(&newfile1); + fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC); if (unlikely(fd1 < 0)) { err = fd1; goto out_release_both; } - fd2 = sock_alloc_fd(&newfile2); + fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); if (unlikely(fd2 < 0)) { err = fd2; put_filp(newfile1); @@ -1277,12 +1304,12 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, goto out_release_both; } - err = sock_attach_fd(sock1, newfile1); + err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK); if (unlikely(err < 0)) { goto out_fd2; } - err = sock_attach_fd(sock2, newfile2); + err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK); if (unlikely(err < 0)) { fput(newfile1); goto out_fd1; @@ -1341,20 +1368,20 @@ out_fd: asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) { struct socket *sock; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; int err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { - err = move_addr_to_kernel(umyaddr, addrlen, address); + err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); if (err >= 0) { err = security_socket_bind(sock, - (struct sockaddr *)address, + (struct sockaddr *)&address, addrlen); if (!err) err = sock->ops->bind(sock, (struct sockaddr *) - address, addrlen); + &address, addrlen); } fput_light(sock->file, fput_needed); } @@ -1400,13 +1427,19 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen) +long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) { struct socket *sock, *newsock; struct file *newfile; int err, len, newfd, fput_needed; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; + + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) @@ -1425,14 +1458,14 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, */ __module_get(newsock->ops->owner); - newfd = sock_alloc_fd(&newfile); + newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); goto out_put; } - err = sock_attach_fd(newsock, newfile); + err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK); if (err < 0) goto out_fd_simple; @@ -1445,13 +1478,13 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, goto out_fd; if (upeer_sockaddr) { - if (newsock->ops->getname(newsock, (struct sockaddr *)address, + if (newsock->ops->getname(newsock, (struct sockaddr *)&address, &len, 2) < 0) { err = -ECONNABORTED; goto out_fd; } - err = move_addr_to_user(address, len, upeer_sockaddr, - upeer_addrlen); + err = move_addr_to_user((struct sockaddr *)&address, + len, upeer_sockaddr, upeer_addrlen); if (err < 0) goto out_fd; } @@ -1478,6 +1511,66 @@ out_fd: goto out_put; } +#ifdef HAVE_SET_RESTORE_SIGMASK +asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, + const sigset_t __user *sigmask, + size_t sigsetsize, int flags) +{ + sigset_t ksigmask, sigsaved; + int ret; + + if (sigmask) { + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) + return -EFAULT; + + sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + + ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); + + if (ret < 0 && signal_pending(current)) { + /* + * Don't restore the signal mask yet. Let do_signal() deliver + * the signal on the way back to userspace, before the signal + * mask is restored. + */ + if (sigmask) { + memcpy(¤t->saved_sigmask, &sigsaved, + sizeof(sigsaved)); + set_restore_sigmask(); + } + } else if (sigmask) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + return ret; +} +#else +asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, + const sigset_t __user *sigmask, + size_t sigsetsize, int flags) +{ + /* The platform does not support restoring the signal mask in the + * return path. So we do not allow using paccept() with a signal + * mask. */ + if (sigmask) + return -EINVAL; + + return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); +} +#endif + +asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen) +{ + return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0); +} + /* * Attempt to connect to a socket with the server address. The address * is in user space so we verify it is OK and move it to kernel space. @@ -1494,22 +1587,22 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) { struct socket *sock; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; int err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; - err = move_addr_to_kernel(uservaddr, addrlen, address); + err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); if (err < 0) goto out_put; err = - security_socket_connect(sock, (struct sockaddr *)address, addrlen); + security_socket_connect(sock, (struct sockaddr *)&address, addrlen); if (err) goto out_put; - err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, + err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, sock->file->f_flags); out_put: fput_light(sock->file, fput_needed); @@ -1526,7 +1619,7 @@ asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) { struct socket *sock; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; int len, err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); @@ -1537,10 +1630,10 @@ asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, if (err) goto out_put; - err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0); + err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); if (err) goto out_put; - err = move_addr_to_user(address, len, usockaddr, usockaddr_len); + err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); out_put: fput_light(sock->file, fput_needed); @@ -1557,7 +1650,7 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) { struct socket *sock; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; int len, err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); @@ -1569,10 +1662,10 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, } err = - sock->ops->getname(sock, (struct sockaddr *)address, &len, + sock->ops->getname(sock, (struct sockaddr *)&address, &len, 1); if (!err) - err = move_addr_to_user(address, len, usockaddr, + err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); fput_light(sock->file, fput_needed); } @@ -1590,7 +1683,7 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, int addr_len) { struct socket *sock; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; int err; struct msghdr msg; struct iovec iov; @@ -1609,10 +1702,10 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, msg.msg_controllen = 0; msg.msg_namelen = 0; if (addr) { - err = move_addr_to_kernel(addr, addr_len, address); + err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); if (err < 0) goto out_put; - msg.msg_name = address; + msg.msg_name = (struct sockaddr *)&address; msg.msg_namelen = addr_len; } if (sock->file->f_flags & O_NONBLOCK) @@ -1648,7 +1741,7 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, struct socket *sock; struct iovec iov; struct msghdr msg; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; int err, err2; int fput_needed; @@ -1662,14 +1755,15 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, msg.msg_iov = &iov; iov.iov_len = size; iov.iov_base = ubuf; - msg.msg_name = address; - msg.msg_namelen = MAX_SOCK_ADDR; + msg.msg_name = (struct sockaddr *)&address; + msg.msg_namelen = sizeof(address); if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, size, flags); if (err >= 0 && addr != NULL) { - err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len); + err2 = move_addr_to_user((struct sockaddr *)&address, + msg.msg_namelen, addr, addr_len); if (err2 < 0) err = err2; } @@ -1789,7 +1883,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; struct socket *sock; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; unsigned char ctl[sizeof(struct cmsghdr) + 20] __attribute__ ((aligned(sizeof(__kernel_size_t)))); @@ -1827,9 +1921,13 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) /* This will also move the address data into kernel space */ if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ); + err = verify_compat_iovec(&msg_sys, iov, + (struct sockaddr *)&address, + VERIFY_READ); } else - err = verify_iovec(&msg_sys, iov, address, VERIFY_READ); + err = verify_iovec(&msg_sys, iov, + (struct sockaddr *)&address, + VERIFY_READ); if (err < 0) goto out_freeiov; total_len = err; @@ -1900,7 +1998,7 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, int fput_needed; /* kernel mode address */ - char addr[MAX_SOCK_ADDR]; + struct sockaddr_storage addr; /* user mode address pointers */ struct sockaddr __user *uaddr; @@ -1938,9 +2036,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, uaddr = (__force void __user *)msg_sys.msg_name; uaddr_len = COMPAT_NAMELEN(msg); if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); + err = verify_compat_iovec(&msg_sys, iov, + (struct sockaddr *)&addr, + VERIFY_WRITE); } else - err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE); + err = verify_iovec(&msg_sys, iov, + (struct sockaddr *)&addr, + VERIFY_WRITE); if (err < 0) goto out_freeiov; total_len = err; @@ -1956,7 +2058,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, len = err; if (uaddr != NULL) { - err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, + err = move_addr_to_user((struct sockaddr *)&addr, + msg_sys.msg_namelen, uaddr, uaddr_len); if (err < 0) goto out_freeiov; @@ -1988,10 +2091,11 @@ out: /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) -static const unsigned char nargs[18]={ +static const unsigned char nargs[19]={ AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3) + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), + AL(6) }; #undef AL @@ -2010,7 +2114,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) unsigned long a0, a1; int err; - if (call < 1 || call > SYS_RECVMSG) + if (call < 1 || call > SYS_PACCEPT) return -EINVAL; /* copy_from_user should be SMP safe. */ @@ -2039,8 +2143,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) break; case SYS_ACCEPT: err = - sys_accept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); + do_accept(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], 0); break; case SYS_GETSOCKNAME: err = @@ -2087,6 +2191,13 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) case SYS_RECVMSG: err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; + case SYS_PACCEPT: + err = + sys_paccept(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], + (const sigset_t __user *) a[3], + a[4], a[5]); + break; default: err = -EINVAL; break; @@ -2210,10 +2321,19 @@ static long compat_sock_ioctl(struct file *file, unsigned cmd, { struct socket *sock = file->private_data; int ret = -ENOIOCTLCMD; + struct sock *sk; + struct net *net; + + sk = sock->sk; + net = sock_net(sk); if (sock->ops->compat_ioctl) ret = sock->ops->compat_ioctl(sock, cmd, arg); + if (ret == -ENOIOCTLCMD && + (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) + ret = compat_wext_handle_ioctl(net, cmd, arg); + return ret; } #endif diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index d927d9f57412..744b79fdcb19 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -17,8 +17,8 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#define RPC_ANONYMOUS_USERID ((uid_t)-2) -#define RPC_ANONYMOUS_GROUPID ((gid_t)-2) +#define RPC_MACHINE_CRED_USERID ((uid_t)0) +#define RPC_MACHINE_CRED_GROUPID ((gid_t)0) struct generic_cred { struct rpc_cred gc_base; @@ -44,8 +44,8 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred); struct rpc_cred *rpc_lookup_machine_cred(void) { struct auth_cred acred = { - .uid = RPC_ANONYMOUS_USERID, - .gid = RPC_ANONYMOUS_GROUPID, + .uid = RPC_MACHINE_CRED_USERID, + .gid = RPC_MACHINE_CRED_GROUPID, .machine_cred = 1, }; diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index f3431a7e33da..4de8bcf26fa7 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile @@ -5,12 +5,12 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ - gss_mech_switch.o svcauth_gss.o gss_krb5_crypto.o + gss_mech_switch.o svcauth_gss.o obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ - gss_krb5_seqnum.o gss_krb5_wrap.o + gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index cc12d5f5d5da..853a4142cea1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -33,8 +33,6 @@ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Id$ */ @@ -63,22 +61,11 @@ static const struct rpc_credops gss_nullops; # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#define NFS_NGROUPS 16 - -#define GSS_CRED_SLACK 1024 /* XXX: unused */ +#define GSS_CRED_SLACK 1024 /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ #define GSS_VERF_SLACK 100 -/* XXX this define must match the gssd define -* as it is passed to gssd to signal the use of -* machine creds should be part of the shared rpc interface */ - -#define CA_RUN_AS_MACHINE 0x00000200 - -/* dump the buffer in `emacs-hexl' style */ -#define isprint(c) ((c > 0x1f) && (c < 0x7f)) - struct gss_auth { struct kref kref; struct rpc_auth rpc_auth; @@ -146,7 +133,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - dest->data = kmemdup(p, len, GFP_KERNEL); + dest->data = kmemdup(p, len, GFP_NOFS); if (unlikely(dest->data == NULL)) return ERR_PTR(-ENOMEM); dest->len = len; @@ -171,7 +158,7 @@ gss_alloc_context(void) { struct gss_cl_ctx *ctx; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_NOFS); if (ctx != NULL) { ctx->gc_proc = RPC_GSS_PROC_DATA; ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ @@ -272,7 +259,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) return NULL; } -/* Try to add a upcall to the pipefs queue. +/* Try to add an upcall to the pipefs queue. * If an upcall owned by our uid already exists, then we return a reference * to that upcall instead of adding the new upcall. */ @@ -341,7 +328,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid) { struct gss_upcall_msg *gss_msg; - gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL); + gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); if (gss_msg != NULL) { INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); @@ -493,7 +480,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { const void *p, *end; void *buf; - struct rpc_clnt *clnt; struct gss_upcall_msg *gss_msg; struct inode *inode = filp->f_path.dentry->d_inode; struct gss_cl_ctx *ctx; @@ -503,11 +489,10 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (mlen > MSG_BUF_MAXSIZE) goto out; err = -ENOMEM; - buf = kmalloc(mlen, GFP_KERNEL); + buf = kmalloc(mlen, GFP_NOFS); if (!buf) goto out; - clnt = RPC_I(inode)->private; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; @@ -806,7 +791,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", acred->uid, auth->au_flavor); - if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL))) + if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS))) goto out_err; rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 1d52308ca324..c93fca204558 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -83,8 +83,6 @@ out: return ret; } -EXPORT_SYMBOL(krb5_encrypt); - u32 krb5_decrypt( struct crypto_blkcipher *tfm, @@ -118,8 +116,6 @@ out: return ret; } -EXPORT_SYMBOL(krb5_decrypt); - static int checksummer(struct scatterlist *sg, void *data) { @@ -161,8 +157,6 @@ out: return err ? GSS_S_FAILURE : 0; } -EXPORT_SYMBOL(make_checksum); - struct encryptor_desc { u8 iv[8]; /* XXX hard-coded blocksize */ struct blkcipher_desc desc; @@ -262,8 +256,6 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, return ret; } -EXPORT_SYMBOL(gss_encrypt_xdr_buf); - struct decryptor_desc { u8 iv[8]; /* XXX hard-coded blocksize */ struct blkcipher_desc desc; @@ -334,5 +326,3 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc); } - -EXPORT_SYMBOL(gss_decrypt_xdr_buf); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 60c3dba545d7..ef45eba22485 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -70,7 +70,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_KERNEL); + res->data = kmemdup(p, len, GFP_NOFS); if (unlikely(res->data == NULL)) return ERR_PTR(-ENOMEM); res->len = len; @@ -131,7 +131,7 @@ gss_import_sec_context_kerberos(const void *p, struct krb5_ctx *ctx; int tmp; - if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) goto out_err; p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 5f1d36dfbcf7..b8f42ef7178e 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -78,7 +78,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; - unsigned char *ptr, *krb5_hdr, *msg_start; + unsigned char *ptr, *msg_start; s32 now; u32 seq_send; @@ -87,36 +87,36 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, now = get_seconds(); - token->len = g_token_size(&ctx->mech_used, 24); + token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8); ptr = token->data; - g_make_token_header(&ctx->mech_used, 24, &ptr); + g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr); - *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); - *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); + /* ptr now at header described in rfc 1964, section 1.2.1: */ + ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff); + ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff); - /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ - krb5_hdr = ptr - 2; - msg_start = krb5_hdr + 24; + msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8; - *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5); - memset(krb5_hdr + 4, 0xff, 4); + *(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5); + memset(ptr + 4, 0xff, 4); - if (make_checksum("md5", krb5_hdr, 8, text, 0, &md5cksum)) + if (make_checksum("md5", ptr, 8, text, 0, &md5cksum)) return GSS_S_FAILURE; if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, md5cksum.len)) return GSS_S_FAILURE; - memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8); + memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8); spin_lock(&krb5_seq_lock); seq_send = ctx->seq_send++; spin_unlock(&krb5_seq_lock); if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, - seq_send, krb5_hdr + 16, krb5_hdr + 8)) + seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, + ptr + 8)) return GSS_S_FAILURE; return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index d91a5d004803..066ec73c84d6 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -92,30 +92,30 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, read_token->len)) return GSS_S_DEFECTIVE_TOKEN; - if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || - (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) + if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) || + (ptr[1] != (KG_TOK_MIC_MSG & 0xff))) return GSS_S_DEFECTIVE_TOKEN; /* XXX sanity-check bodysize?? */ - signalg = ptr[0] + (ptr[1] << 8); + signalg = ptr[2] + (ptr[3] << 8); if (signalg != SGN_ALG_DES_MAC_MD5) return GSS_S_DEFECTIVE_TOKEN; - sealalg = ptr[2] + (ptr[3] << 8); + sealalg = ptr[4] + (ptr[5] << 8); if (sealalg != SEAL_ALG_NONE) return GSS_S_DEFECTIVE_TOKEN; - if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) + if ((ptr[6] != 0xff) || (ptr[7] != 0xff)) return GSS_S_DEFECTIVE_TOKEN; - if (make_checksum("md5", ptr - 2, 8, message_buffer, 0, &md5cksum)) + if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum)) return GSS_S_FAILURE; if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16)) return GSS_S_FAILURE; - if (memcmp(md5cksum.data + 8, ptr + 14, 8)) + if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8)) return GSS_S_BAD_SIG; /* it got through unscathed. Make sure the context is unexpired */ @@ -127,7 +127,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, /* do sequencing checks */ - if (krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, &seqnum)) + if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum)) return GSS_S_FAILURE; if ((ctx->initiate && direction != 0xff) || diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index b00b1b426301..ae8e69b59c4c 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -87,8 +87,8 @@ out: return 0; } -static inline void -make_confounder(char *p, int blocksize) +static void +make_confounder(char *p, u32 conflen) { static u64 i = 0; u64 *q = (u64 *)p; @@ -102,8 +102,22 @@ make_confounder(char *p, int blocksize) * uniqueness would mean worrying about atomicity and rollover, and I * don't care enough. */ - BUG_ON(blocksize != 8); - *q = i++; + /* initialize to random value */ + if (i == 0) { + i = random32(); + i = (i << 32) | random32(); + } + + switch (conflen) { + case 16: + *q++ = i++; + /* fall through */ + case 8: + *q++ = i++; + break; + default: + BUG(); + } } /* Assumptions: the head and tail of inbuf are ours to play with. @@ -122,7 +136,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; int blocksize = 0, plainlen; - unsigned char *ptr, *krb5_hdr, *msg_start; + unsigned char *ptr, *msg_start; s32 now; int headlen; struct page **tmp_pages; @@ -149,26 +163,26 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, buf->len += headlen; BUG_ON((buf->len - offset - headlen) % blocksize); - g_make_token_header(&kctx->mech_used, 24 + plainlen, &ptr); + g_make_token_header(&kctx->mech_used, + GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr); - *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); - *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff); + /* ptr now at header described in rfc 1964, section 1.2.1: */ + ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff); + ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff); - /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ - krb5_hdr = ptr - 2; - msg_start = krb5_hdr + 24; + msg_start = ptr + 24; - *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5); - memset(krb5_hdr + 4, 0xff, 4); - *(__be16 *)(krb5_hdr + 4) = htons(SEAL_ALG_DES); + *(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5); + memset(ptr + 4, 0xff, 4); + *(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES); make_confounder(msg_start, blocksize); /* XXXJBF: UGH!: */ tmp_pages = buf->pages; buf->pages = pages; - if (make_checksum("md5", krb5_hdr, 8, buf, + if (make_checksum("md5", ptr, 8, buf, offset + headlen - blocksize, &md5cksum)) return GSS_S_FAILURE; buf->pages = tmp_pages; @@ -176,7 +190,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, md5cksum.data, md5cksum.len)) return GSS_S_FAILURE; - memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8); + memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8); spin_lock(&krb5_seq_lock); seq_send = kctx->seq_send++; @@ -185,7 +199,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, /* XXX would probably be more efficient to compute checksum * and encrypt at the same time: */ if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, - seq_send, krb5_hdr + 16, krb5_hdr + 8))) + seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))) return GSS_S_FAILURE; if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, @@ -219,38 +233,38 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) buf->len - offset)) return GSS_S_DEFECTIVE_TOKEN; - if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || - (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) + if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) || + (ptr[1] != (KG_TOK_WRAP_MSG & 0xff))) return GSS_S_DEFECTIVE_TOKEN; /* XXX sanity-check bodysize?? */ /* get the sign and seal algorithms */ - signalg = ptr[0] + (ptr[1] << 8); + signalg = ptr[2] + (ptr[3] << 8); if (signalg != SGN_ALG_DES_MAC_MD5) return GSS_S_DEFECTIVE_TOKEN; - sealalg = ptr[2] + (ptr[3] << 8); + sealalg = ptr[4] + (ptr[5] << 8); if (sealalg != SEAL_ALG_DES) return GSS_S_DEFECTIVE_TOKEN; - if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) + if ((ptr[6] != 0xff) || (ptr[7] != 0xff)) return GSS_S_DEFECTIVE_TOKEN; if (gss_decrypt_xdr_buf(kctx->enc, buf, - ptr + 22 - (unsigned char *)buf->head[0].iov_base)) + ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base)) return GSS_S_DEFECTIVE_TOKEN; - if (make_checksum("md5", ptr - 2, 8, buf, - ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum)) + if (make_checksum("md5", ptr, 8, buf, + ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum)) return GSS_S_FAILURE; if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, md5cksum.data, md5cksum.len)) return GSS_S_FAILURE; - if (memcmp(md5cksum.data + 8, ptr + 14, 8)) + if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8)) return GSS_S_BAD_SIG; /* it got through unscathed. Make sure the context is unexpired */ @@ -262,8 +276,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) /* do sequencing checks */ - if (krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, - &seqnum)) + if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, + &direction, &seqnum)) return GSS_S_BAD_SIG; if ((kctx->initiate && direction != 0xff) || @@ -274,7 +288,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) * better to copy and encrypt at the same time. */ blocksize = crypto_blkcipher_blocksize(kctx->enc); - data_start = ptr + 22 + blocksize; + data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize; orig_start = buf->head[0].iov_base + offset; data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; memmove(orig_start, data_start, data_len); diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 5deb4b6e4514..035e1dd6af1b 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -76,7 +76,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_KERNEL); + res->data = kmemdup(p, len, GFP_NOFS); if (unlikely(res->data == NULL)) return ERR_PTR(-ENOMEM); return q; @@ -90,7 +90,7 @@ gss_import_sec_context_spkm3(const void *p, size_t len, struct spkm3_ctx *ctx; int version; - if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) goto out_err; p = simple_get_bytes(p, end, &version, sizeof(version)); diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c index 6cdd241ad267..3308157436d2 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ b/net/sunrpc/auth_gss/gss_spkm3_token.c @@ -90,7 +90,7 @@ asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits) int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen) { - if (!(out->data = kzalloc(explen,GFP_KERNEL))) + if (!(out->data = kzalloc(explen,GFP_NOFS))) return 0; out->len = explen; memcpy(out->data, in, enclen); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 5905d56737d6..81ae3d62a0cc 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1144,20 +1144,20 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_SVC_NONE: break; case RPC_GSS_SVC_INTEGRITY: + /* placeholders for length and seq. number: */ + svc_putnl(resv, 0); + svc_putnl(resv, 0); if (unwrap_integ_data(&rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + break; + case RPC_GSS_SVC_PRIVACY: /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); - break; - case RPC_GSS_SVC_PRIVACY: if (unwrap_priv_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; - /* placeholders for length and seq. number: */ - svc_putnl(resv, 0); - svc_putnl(resv, 0); break; default: goto auth_err; @@ -1170,8 +1170,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) goto out; } garbage_args: - /* Restore write pointer to its original value: */ - xdr_ressize_check(rqstp, reject_stat); ret = SVC_GARBAGE; goto out; auth_err: diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 44920b90bdc4..46b2647c5bd2 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", acred->uid, acred->gid); - if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) + if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS))) return ERR_PTR(-ENOMEM); rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8945307556ec..76739e928d0d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -25,6 +25,7 @@ #include <linux/module.h> #include <linux/types.h> +#include <linux/kallsyms.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/smp_lock.h> @@ -58,7 +59,6 @@ static void call_start(struct rpc_task *task); static void call_reserve(struct rpc_task *task); static void call_reserveresult(struct rpc_task *task); static void call_allocate(struct rpc_task *task); -static void call_encode(struct rpc_task *task); static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); @@ -70,9 +70,9 @@ static void call_refreshresult(struct rpc_task *task); static void call_timeout(struct rpc_task *task); static void call_connect(struct rpc_task *task); static void call_connect_status(struct rpc_task *task); -static __be32 * call_header(struct rpc_task *task); -static __be32 * call_verify(struct rpc_task *task); +static __be32 *rpc_encode_header(struct rpc_task *task); +static __be32 *rpc_verify_header(struct rpc_task *task); static int rpc_ping(struct rpc_clnt *clnt, int flags); static void rpc_register_client(struct rpc_clnt *clnt) @@ -324,6 +324,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) clnt->cl_autobind = 1; if (args->flags & RPC_CLNT_CREATE_DISCRTRY) clnt->cl_discrtry = 1; + if (!(args->flags & RPC_CLNT_CREATE_QUIET)) + clnt->cl_chatty = 1; return clnt; } @@ -690,6 +692,21 @@ rpc_restart_call(struct rpc_task *task) } EXPORT_SYMBOL_GPL(rpc_restart_call); +#ifdef RPC_DEBUG +static const char *rpc_proc_name(const struct rpc_task *task) +{ + const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; + + if (proc) { + if (proc->p_name) + return proc->p_name; + else + return "NULL"; + } else + return "no proc"; +} +#endif + /* * 0. Initial state * @@ -701,9 +718,9 @@ call_start(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - dprintk("RPC: %5u call_start %s%d proc %d (%s)\n", task->tk_pid, + dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, clnt->cl_protname, clnt->cl_vers, - task->tk_msg.rpc_proc->p_proc, + rpc_proc_name(task), (RPC_IS_ASYNC(task) ? "async" : "sync")); /* Increment call count */ @@ -861,7 +878,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) * 3. Encode arguments of an RPC call */ static void -call_encode(struct rpc_task *task) +rpc_xdr_encode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; kxdrproc_t encode; @@ -876,23 +893,19 @@ call_encode(struct rpc_task *task) (char *)req->rq_buffer + req->rq_callsize, req->rq_rcvsize); - /* Encode header and provided arguments */ - encode = task->tk_msg.rpc_proc->p_encode; - if (!(p = call_header(task))) { - printk(KERN_INFO "RPC: call_header failed, exit EIO\n"); + p = rpc_encode_header(task); + if (p == NULL) { + printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n"); rpc_exit(task, -EIO); return; } + + encode = task->tk_msg.rpc_proc->p_encode; if (encode == NULL) return; task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); - if (task->tk_status == -ENOMEM) { - /* XXX: Is this sane? */ - rpc_delay(task, 3*HZ); - task->tk_status = -EAGAIN; - } } /* @@ -929,11 +942,9 @@ call_bind_status(struct rpc_task *task) } switch (task->tk_status) { - case -EAGAIN: - dprintk("RPC: %5u rpcbind waiting for another request " - "to finish\n", task->tk_pid); - /* avoid busy-waiting here -- could be a network outage. */ - rpc_delay(task, 5*HZ); + case -ENOMEM: + dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); + rpc_delay(task, HZ >> 2); goto retry_timeout; case -EACCES: dprintk("RPC: %5u remote rpcbind: RPC program/version " @@ -1046,10 +1057,16 @@ call_transmit(struct rpc_task *task) /* Encode here so that rpcsec_gss can use correct sequence number. */ if (rpc_task_need_encode(task)) { BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); - call_encode(task); + rpc_xdr_encode(task); /* Did the encode result in an error condition? */ - if (task->tk_status != 0) + if (task->tk_status != 0) { + /* Was the error nonfatal? */ + if (task->tk_status == -EAGAIN) + rpc_delay(task, HZ >> 4); + else + rpc_exit(task, task->tk_status); return; + } } xprt_transmit(task); if (task->tk_status < 0) @@ -1132,7 +1149,8 @@ call_status(struct rpc_task *task) rpc_exit(task, status); break; default: - printk("%s: RPC call returned error %d\n", + if (clnt->cl_chatty) + printk("%s: RPC call returned error %d\n", clnt->cl_protname, -status); rpc_exit(task, status); } @@ -1157,7 +1175,8 @@ call_timeout(struct rpc_task *task) task->tk_timeouts++; if (RPC_IS_SOFT(task)) { - printk(KERN_NOTICE "%s: server %s not responding, timed out\n", + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); rpc_exit(task, -EIO); return; @@ -1165,7 +1184,8 @@ call_timeout(struct rpc_task *task) if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { task->tk_flags |= RPC_CALL_MAJORSEEN; - printk(KERN_NOTICE "%s: server %s not responding, still trying\n", + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s not responding, still trying\n", clnt->cl_protname, clnt->cl_server); } rpc_force_rebind(clnt); @@ -1196,8 +1216,9 @@ call_decode(struct rpc_task *task) task->tk_pid, task->tk_status); if (task->tk_flags & RPC_CALL_MAJORSEEN) { - printk(KERN_NOTICE "%s: server %s OK\n", - clnt->cl_protname, clnt->cl_server); + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s OK\n", + clnt->cl_protname, clnt->cl_server); task->tk_flags &= ~RPC_CALL_MAJORSEEN; } @@ -1224,8 +1245,7 @@ call_decode(struct rpc_task *task) goto out_retry; } - /* Verify the RPC header */ - p = call_verify(task); + p = rpc_verify_header(task); if (IS_ERR(p)) { if (p == ERR_PTR(-EAGAIN)) goto out_retry; @@ -1243,7 +1263,7 @@ call_decode(struct rpc_task *task) return; out_retry: task->tk_status = 0; - /* Note: call_verify() may have freed the RPC slot */ + /* Note: rpc_verify_header() may have freed the RPC slot */ if (task->tk_rqstp == req) { req->rq_received = req->rq_rcv_buf.len = 0; if (task->tk_client->cl_discrtry) @@ -1290,11 +1310,8 @@ call_refreshresult(struct rpc_task *task) return; } -/* - * Call header serialization - */ static __be32 * -call_header(struct rpc_task *task) +rpc_encode_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; @@ -1314,11 +1331,8 @@ call_header(struct rpc_task *task) return p; } -/* - * Reply header verification - */ static __be32 * -call_verify(struct rpc_task *task) +rpc_verify_header(struct rpc_task *task) { struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; int len = task->tk_rqstp->rq_rcv_buf.len >> 2; @@ -1392,7 +1406,7 @@ call_verify(struct rpc_task *task) task->tk_action = call_bind; goto out_retry; case RPC_AUTH_TOOWEAK: - printk(KERN_NOTICE "call_verify: server %s requires stronger " + printk(KERN_NOTICE "RPC: server %s requires stronger " "authentication.\n", task->tk_client->cl_server); break; default: @@ -1431,10 +1445,10 @@ call_verify(struct rpc_task *task) error = -EPROTONOSUPPORT; goto out_err; case RPC_PROC_UNAVAIL: - dprintk("RPC: %5u %s: proc %p unsupported by program %u, " + dprintk("RPC: %5u %s: proc %s unsupported by program %u, " "version %u on server %s\n", task->tk_pid, __func__, - task->tk_msg.rpc_proc, + rpc_proc_name(task), task->tk_client->cl_prog, task->tk_client->cl_vers, task->tk_client->cl_server); @@ -1517,44 +1531,53 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int EXPORT_SYMBOL_GPL(rpc_call_null); #ifdef RPC_DEBUG +static void rpc_show_header(void) +{ + printk(KERN_INFO "-pid- flgs status -client- --rqstp- " + "-timeout ---ops--\n"); +} + +static void rpc_show_task(const struct rpc_clnt *clnt, + const struct rpc_task *task) +{ + const char *rpc_waitq = "none"; + char *p, action[KSYM_SYMBOL_LEN]; + + if (RPC_IS_QUEUED(task)) + rpc_waitq = rpc_qname(task->tk_waitqueue); + + /* map tk_action pointer to a function name; then trim off + * the "+0x0 [sunrpc]" */ + sprint_symbol(action, (unsigned long)task->tk_action); + p = strchr(action, '+'); + if (p) + *p = '\0'; + + printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n", + task->tk_pid, task->tk_flags, task->tk_status, + clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, + clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task), + action, rpc_waitq); +} + void rpc_show_tasks(void) { struct rpc_clnt *clnt; - struct rpc_task *t; + struct rpc_task *task; + int header = 0; spin_lock(&rpc_client_lock); - if (list_empty(&all_clients)) - goto out; - printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " - "-rpcwait -action- ---ops--\n"); list_for_each_entry(clnt, &all_clients, cl_clients) { - if (list_empty(&clnt->cl_tasks)) - continue; spin_lock(&clnt->cl_lock); - list_for_each_entry(t, &clnt->cl_tasks, tk_task) { - const char *rpc_waitq = "none"; - int proc; - - if (t->tk_msg.rpc_proc) - proc = t->tk_msg.rpc_proc->p_proc; - else - proc = -1; - - if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->tk_waitqueue); - - printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, proc, - t->tk_flags, t->tk_status, - t->tk_client, - (t->tk_client ? t->tk_client->cl_prog : 0), - t->tk_rqstp, t->tk_timeout, - rpc_waitq, - t->tk_action, t->tk_ops); + list_for_each_entry(task, &clnt->cl_tasks, tk_task) { + if (!header) { + rpc_show_header(); + header++; + } + rpc_show_task(clnt, task); } spin_unlock(&clnt->cl_lock); } -out: spin_unlock(&rpc_client_lock); } #endif diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 5a9b0e7828cd..23a2b8f6dc49 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -897,7 +897,7 @@ static struct file_system_type rpc_pipe_fs_type = { }; static void -init_once(struct kmem_cache * cachep, void *foo) +init_once(void *foo) { struct rpc_inode *rpci = (struct rpc_inode *) foo; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 0517967a68bf..24db2b4d12d3 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -32,6 +32,10 @@ #define RPCBIND_PROGRAM (100000u) #define RPCBIND_PORT (111u) +#define RPCBVERS_2 (2u) +#define RPCBVERS_3 (3u) +#define RPCBVERS_4 (4u) + enum { RPCBPROC_NULL, RPCBPROC_SET, @@ -64,6 +68,7 @@ enum { #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) static void rpcb_getport_done(struct rpc_task *, void *); +static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; struct rpcbind_args { @@ -76,41 +81,73 @@ struct rpcbind_args { const char * r_netid; const char * r_addr; const char * r_owner; + + int r_status; }; static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; +static struct rpc_procinfo rpcb_procedures4[]; struct rpcb_info { - int rpc_vers; + u32 rpc_vers; struct rpc_procinfo * rpc_proc; }; static struct rpcb_info rpcb_next_version[]; static struct rpcb_info rpcb_next_version6[]; +static const struct rpc_call_ops rpcb_getport_ops = { + .rpc_call_done = rpcb_getport_done, + .rpc_release = rpcb_map_release, +}; + +static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) +{ + xprt_clear_binding(xprt); + rpc_wake_up_status(&xprt->binding, status); +} + static void rpcb_map_release(void *data) { struct rpcbind_args *map = data; + rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status); xprt_put(map->r_xprt); kfree(map); } -static const struct rpc_call_ops rpcb_getport_ops = { - .rpc_call_done = rpcb_getport_done, - .rpc_release = rpcb_map_release, +static const struct sockaddr_in rpcb_inaddr_loopback = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = htons(RPCBIND_PORT), }; -static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) +static const struct sockaddr_in6 rpcb_in6addr_loopback = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, + .sin6_port = htons(RPCBIND_PORT), +}; + +static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, + size_t addrlen, u32 version) { - xprt_clear_binding(xprt); - rpc_wake_up_status(&xprt->binding, status); + struct rpc_create_args args = { + .protocol = XPRT_TRANSPORT_UDP, + .address = addr, + .addrsize = addrlen, + .servername = "localhost", + .program = &rpcb_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, + .flags = RPC_CLNT_CREATE_NOPING, + }; + + return rpc_create(&args); } static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, - size_t salen, int proto, u32 version, - int privileged) + size_t salen, int proto, u32 version) { struct rpc_create_args args = { .protocol = proto, @@ -120,7 +157,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = RPC_CLNT_CREATE_NOPING, + .flags = (RPC_CLNT_CREATE_NOPING | + RPC_CLNT_CREATE_NONPRIVPORT), }; switch (srvaddr->sa_family) { @@ -134,29 +172,72 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return NULL; } - if (!privileged) - args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; return rpc_create(&args); } +static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, + u32 version, struct rpc_message *msg, + int *result) +{ + struct rpc_clnt *rpcb_clnt; + int error = 0; + + *result = 0; + + rpcb_clnt = rpcb_create_local(addr, addrlen, version); + if (!IS_ERR(rpcb_clnt)) { + error = rpc_call_sync(rpcb_clnt, msg, 0); + rpc_shutdown_client(rpcb_clnt); + } else + error = PTR_ERR(rpcb_clnt); + + if (error < 0) + printk(KERN_WARNING "RPC: failed to contact local rpcbind " + "server (errno %d).\n", -error); + dprintk("RPC: registration status %d/%d\n", error, *result); + + return error; +} + /** * rpcb_register - set or unset a port registration with the local rpcbind svc * @prog: RPC program number to bind * @vers: RPC version number to bind - * @prot: transport protocol to use to make this request + * @prot: transport protocol to register * @port: port value to register - * @okay: result code + * @okay: OUT: result code + * + * RPC services invoke this function to advertise their contact + * information via the system's rpcbind daemon. RPC services + * invoke this function once for each [program, version, transport] + * tuple they wish to advertise. + * + * Callers may also unregister RPC services that are no longer + * available by setting the passed-in port to zero. This removes + * all registered transports for [program, version] from the local + * rpcbind database. + * + * Returns zero if the registration request was dispatched + * successfully and a reply was received. The rpcbind daemon's + * boolean result code is stored in *okay. + * + * Returns an errno value and sets *result to zero if there was + * some problem that prevented the rpcbind request from being + * dispatched, or if the rpcbind daemon did not respond within + * the timeout. * - * port == 0 means unregister, port != 0 means register. + * This function uses rpcbind protocol version 2 to contact the + * local rpcbind daemon. * - * This routine supports only rpcbind version 2. + * Registration works over both AF_INET and AF_INET6, and services + * registered via this function are advertised as available for any + * address. If the local rpcbind daemon is listening on AF_INET6, + * services registered via this function will be advertised on + * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 + * addresses). */ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) { - struct sockaddr_in sin = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - }; struct rpcbind_args map = { .r_prog = prog, .r_vers = vers, @@ -164,32 +245,159 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) .r_port = port, }; struct rpc_message msg = { - .rpc_proc = &rpcb_procedures2[port ? - RPCBPROC_SET : RPCBPROC_UNSET], .rpc_argp = &map, .rpc_resp = okay, }; - struct rpc_clnt *rpcb_clnt; - int error = 0; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); - rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, - sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1); - if (IS_ERR(rpcb_clnt)) - return PTR_ERR(rpcb_clnt); + msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; + if (port) + msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - error = rpc_call_sync(rpcb_clnt, &msg, 0); + return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, + sizeof(rpcb_inaddr_loopback), + RPCBVERS_2, &msg, okay); +} - rpc_shutdown_client(rpcb_clnt); - if (error < 0) - printk(KERN_WARNING "RPC: failed to contact local rpcbind " - "server (errno %d).\n", -error); - dprintk("RPC: registration status %d/%d\n", error, *okay); +/* + * Fill in AF_INET family-specific arguments to register + */ +static int rpcb_register_netid4(struct sockaddr_in *address_to_register, + struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + unsigned short port = ntohs(address_to_register->sin_port); + char buf[32]; + + /* Construct AF_INET universal address */ + snprintf(buf, sizeof(buf), + NIPQUAD_FMT".%u.%u", + NIPQUAD(address_to_register->sin_addr.s_addr), + port >> 8, port & 0xff); + map->r_addr = buf; + + dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " + "local rpcbind\n", (port ? "" : "un"), + map->r_prog, map->r_vers, + map->r_addr, map->r_netid); + + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + if (port) + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + + return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, + sizeof(rpcb_inaddr_loopback), + RPCBVERS_4, msg, msg->rpc_resp); +} - return error; +/* + * Fill in AF_INET6 family-specific arguments to register + */ +static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, + struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + unsigned short port = ntohs(address_to_register->sin6_port); + char buf[64]; + + /* Construct AF_INET6 universal address */ + snprintf(buf, sizeof(buf), + NIP6_FMT".%u.%u", + NIP6(address_to_register->sin6_addr), + port >> 8, port & 0xff); + map->r_addr = buf; + + dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " + "local rpcbind\n", (port ? "" : "un"), + map->r_prog, map->r_vers, + map->r_addr, map->r_netid); + + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + if (port) + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + + return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, + sizeof(rpcb_in6addr_loopback), + RPCBVERS_4, msg, msg->rpc_resp); +} + +/** + * rpcb_v4_register - set or unset a port registration with the local rpcbind + * @program: RPC program number of service to (un)register + * @version: RPC version number of service to (un)register + * @address: address family, IP address, and port to (un)register + * @netid: netid of transport protocol to (un)register + * @result: result code from rpcbind RPC call + * + * RPC services invoke this function to advertise their contact + * information via the system's rpcbind daemon. RPC services + * invoke this function once for each [program, version, address, + * netid] tuple they wish to advertise. + * + * Callers may also unregister RPC services that are no longer + * available by setting the port number in the passed-in address + * to zero. Callers pass a netid of "" to unregister all + * transport netids associated with [program, version, address]. + * + * Returns zero if the registration request was dispatched + * successfully and a reply was received. The rpcbind daemon's + * result code is stored in *result. + * + * Returns an errno value and sets *result to zero if there was + * some problem that prevented the rpcbind request from being + * dispatched, or if the rpcbind daemon did not respond within + * the timeout. + * + * This function uses rpcbind protocol version 4 to contact the + * local rpcbind daemon. The local rpcbind daemon must support + * version 4 of the rpcbind protocol in order for these functions + * to register a service successfully. + * + * Supported netids include "udp" and "tcp" for UDP and TCP over + * IPv4, and "udp6" and "tcp6" for UDP and TCP over IPv6, + * respectively. + * + * The contents of @address determine the address family and the + * port to be registered. The usual practice is to pass INADDR_ANY + * as the raw address, but specifying a non-zero address is also + * supported by this API if the caller wishes to advertise an RPC + * service on a specific network interface. + * + * Note that passing in INADDR_ANY does not create the same service + * registration as IN6ADDR_ANY. The former advertises an RPC + * service on any IPv4 address, but not on IPv6. The latter + * advertises the service on all IPv4 and IPv6 addresses. + */ +int rpcb_v4_register(const u32 program, const u32 version, + const struct sockaddr *address, const char *netid, + int *result) +{ + struct rpcbind_args map = { + .r_prog = program, + .r_vers = version, + .r_netid = netid, + .r_owner = RPCB_OWNER_STRING, + }; + struct rpc_message msg = { + .rpc_argp = &map, + .rpc_resp = result, + }; + + *result = 0; + + switch (address->sa_family) { + case AF_INET: + return rpcb_register_netid4((struct sockaddr_in *)address, + &msg); + case AF_INET6: + return rpcb_register_netid6((struct sockaddr_in6 *)address, + &msg); + } + + return -EAFNOSUPPORT; } /** @@ -227,7 +435,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) __func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin, - sizeof(*sin), prot, 2, 0); + sizeof(*sin), prot, RPCBVERS_2); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -243,10 +451,10 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) } EXPORT_SYMBOL_GPL(rpcb_getport_sync); -static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version) +static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc) { struct rpc_message msg = { - .rpc_proc = rpcb_next_version[version].rpc_proc, + .rpc_proc = proc, .rpc_argp = map, .rpc_resp = &map->r_port, }; @@ -271,6 +479,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi void rpcb_getport_async(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_procinfo *proc; u32 bind_version; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_clnt *rpcb_clnt; @@ -280,7 +489,6 @@ void rpcb_getport_async(struct rpc_task *task) struct sockaddr *sap = (struct sockaddr *)&addr; size_t salen; int status; - struct rpcb_info *info; dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __func__, @@ -289,17 +497,16 @@ void rpcb_getport_async(struct rpc_task *task) /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); + /* Put self on the wait queue to ensure we get notified if + * some other task is already attempting to bind the port */ + rpc_sleep_on(&xprt->binding, task, NULL); + if (xprt_test_and_set_binding(xprt)) { - status = -EAGAIN; /* tell caller to check again */ dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __func__); - goto bailout_nowake; + return; } - /* Put self on queue before sending rpcbind request, in case - * rpcb_getport_done completes before we return from rpc_run_task */ - rpc_sleep_on(&xprt->binding, task, NULL); - /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; @@ -313,10 +520,12 @@ void rpcb_getport_async(struct rpc_task *task) /* Don't ever use rpcbind v2 for AF_INET6 requests */ switch (sap->sa_family) { case AF_INET: - info = rpcb_next_version; + proc = rpcb_next_version[xprt->bind_index].rpc_proc; + bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; break; case AF_INET6: - info = rpcb_next_version6; + proc = rpcb_next_version6[xprt->bind_index].rpc_proc; + bind_version = rpcb_next_version6[xprt->bind_index].rpc_vers; break; default: status = -EAFNOSUPPORT; @@ -324,20 +533,19 @@ void rpcb_getport_async(struct rpc_task *task) task->tk_pid, __func__); goto bailout_nofree; } - if (info[xprt->bind_index].rpc_proc == NULL) { + if (proc == NULL) { xprt->bind_index = 0; status = -EPFNOSUPPORT; dprintk("RPC: %5u %s: no more getport versions available\n", task->tk_pid, __func__); goto bailout_nofree; } - bind_version = info[xprt->bind_index].rpc_vers; dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __func__, bind_version); rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot, - bind_version, 0); + bind_version); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", @@ -360,26 +568,23 @@ void rpcb_getport_async(struct rpc_task *task) map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ + map->r_status = -EIO; - child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index); + child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { - status = -EIO; + /* rpcb_map_release() has freed the arguments */ dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __func__); - goto bailout; + return; } rpc_put_task(child); task->tk_xprt->stat.bind_count++; return; -bailout: - kfree(map); - xprt_put(xprt); bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); -bailout_nowake: task->tk_status = status; } EXPORT_SYMBOL_GPL(rpcb_getport_async); @@ -418,9 +623,13 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n", child->tk_pid, status, map->r_port); - rpcb_wake_rpcbind_waiters(xprt, status); + map->r_status = status; } +/* + * XDR functions for rpcbind + */ + static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, struct rpcbind_args *rpcb) { @@ -439,7 +648,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { *portp = (unsigned short) ntohl(*p++); - dprintk("RPC: rpcb_decode_getport result %u\n", + dprintk("RPC: rpcb_decode_getport result %u\n", *portp); return 0; } @@ -448,8 +657,8 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, unsigned int *boolp) { *boolp = (unsigned int) ntohl(*p++); - dprintk("RPC: rpcb_decode_set result %u\n", - *boolp); + dprintk("RPC: rpcb_decode_set: call %s\n", + (*boolp ? "succeeded" : "failed")); return 0; } @@ -572,52 +781,60 @@ out_err: static struct rpc_procinfo rpcb_procedures2[] = { PROC(SET, mapping, set), PROC(UNSET, mapping, set), - PROC(GETADDR, mapping, getport), + PROC(GETPORT, mapping, getport), }; static struct rpc_procinfo rpcb_procedures3[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), + PROC(SET, getaddr, set), + PROC(UNSET, getaddr, set), PROC(GETADDR, getaddr, getaddr), }; static struct rpc_procinfo rpcb_procedures4[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), + PROC(SET, getaddr, set), + PROC(UNSET, getaddr, set), + PROC(GETADDR, getaddr, getaddr), PROC(GETVERSADDR, getaddr, getaddr), }; static struct rpcb_info rpcb_next_version[] = { -#ifdef CONFIG_SUNRPC_BIND34 - { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, - { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, -#endif - { 2, &rpcb_procedures2[RPCBPROC_GETPORT] }, - { 0, NULL }, + { + .rpc_vers = RPCBVERS_2, + .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], + }, + { + .rpc_proc = NULL, + }, }; static struct rpcb_info rpcb_next_version6[] = { -#ifdef CONFIG_SUNRPC_BIND34 - { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, - { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, -#endif - { 0, NULL }, + { + .rpc_vers = RPCBVERS_4, + .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], + }, + { + .rpc_vers = RPCBVERS_3, + .rpc_proc = &rpcb_procedures3[RPCBPROC_GETADDR], + }, + { + .rpc_proc = NULL, + }, }; static struct rpc_version rpcb_version2 = { - .number = 2, + .number = RPCBVERS_2, .nrprocs = RPCB_HIGHPROC_2, .procs = rpcb_procedures2 }; static struct rpc_version rpcb_version3 = { - .number = 3, + .number = RPCBVERS_3, .nrprocs = RPCB_HIGHPROC_3, .procs = rpcb_procedures3 }; static struct rpc_version rpcb_version4 = { - .number = 4, + .number = RPCBVERS_4, .nrprocs = RPCB_HIGHPROC_4, .procs = rpcb_procedures4 }; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6eab9bf94baf..385f427bedad 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -576,9 +576,7 @@ EXPORT_SYMBOL_GPL(rpc_delay); */ static void rpc_prepare_task(struct rpc_task *task) { - lock_kernel(); task->tk_ops->rpc_call_prepare(task, task->tk_calldata); - unlock_kernel(); } /* @@ -588,9 +586,7 @@ void rpc_exit_task(struct rpc_task *task) { task->tk_action = NULL; if (task->tk_ops->rpc_call_done != NULL) { - lock_kernel(); task->tk_ops->rpc_call_done(task, task->tk_calldata); - unlock_kernel(); if (task->tk_action != NULL) { WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ @@ -602,11 +598,8 @@ EXPORT_SYMBOL_GPL(rpc_exit_task); void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) { - if (ops->rpc_release != NULL) { - lock_kernel(); + if (ops->rpc_release != NULL) ops->rpc_release(calldata); - unlock_kernel(); - } } /* @@ -626,19 +619,15 @@ static void __rpc_execute(struct rpc_task *task) /* * Execute any pending callback. */ - if (RPC_DO_CALLBACK(task)) { - /* Define a callback save pointer */ + if (task->tk_callback) { void (*save_callback)(struct rpc_task *); /* - * If a callback exists, save it, reset it, - * call it. - * The save is needed to stop from resetting - * another callback set within the callback handler - * - Dave + * We set tk_callback to NULL before calling it, + * in case it sets the tk_callback field itself: */ - save_callback=task->tk_callback; - task->tk_callback=NULL; + save_callback = task->tk_callback; + task->tk_callback = NULL; save_callback(task); } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index d74c2d269539..5a32cb7c4bb4 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -18,7 +18,7 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/module.h> -#include <linux/sched.h> +#include <linux/kthread.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/xdr.h> @@ -292,15 +292,14 @@ svc_pool_map_put(void) /* - * Set the current thread's cpus_allowed mask so that it + * Set the given thread's cpus_allowed mask so that it * will only run on cpus in the given pool. - * - * Returns 1 and fills in oldmask iff a cpumask was applied. */ -static inline int -svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) +static inline void +svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) { struct svc_pool_map *m = &svc_pool_map; + unsigned int node = m->pool_to[pidx]; /* * The caller checks for sv_nrpools > 1, which @@ -308,26 +307,17 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) */ BUG_ON(m->count == 0); - switch (m->mode) - { - default: - return 0; + switch (m->mode) { case SVC_POOL_PERCPU: { - unsigned int cpu = m->pool_to[pidx]; - - *oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - return 1; + set_cpus_allowed_ptr(task, &cpumask_of_cpu(node)); + break; } case SVC_POOL_PERNODE: { - unsigned int node = m->pool_to[pidx]; node_to_cpumask_ptr(nodecpumask, node); - - *oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, nodecpumask); - return 1; + set_cpus_allowed_ptr(task, nodecpumask); + break; } } } @@ -444,7 +434,7 @@ EXPORT_SYMBOL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, void (*shutdown)(struct svc_serv *serv), - svc_thread_fn func, int sig, struct module *mod) + svc_thread_fn func, struct module *mod) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); @@ -453,7 +443,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, if (serv != NULL) { serv->sv_function = func; - serv->sv_kill_signal = sig; serv->sv_module = mod; } @@ -462,7 +451,8 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, EXPORT_SYMBOL(svc_create_pooled); /* - * Destroy an RPC service. Should be called with the BKL held + * Destroy an RPC service. Should be called with appropriate locking to + * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. */ void svc_destroy(struct svc_serv *serv) @@ -579,46 +569,6 @@ out_enomem: EXPORT_SYMBOL(svc_prepare_thread); /* - * Create a thread in the given pool. Caller must hold BKL. - * On a NUMA or SMP machine, with a multi-pool serv, the thread - * will be restricted to run on the cpus belonging to the pool. - */ -static int -__svc_create_thread(svc_thread_fn func, struct svc_serv *serv, - struct svc_pool *pool) -{ - struct svc_rqst *rqstp; - int error = -ENOMEM; - int have_oldmask = 0; - cpumask_t uninitialized_var(oldmask); - - rqstp = svc_prepare_thread(serv, pool); - if (IS_ERR(rqstp)) { - error = PTR_ERR(rqstp); - goto out; - } - - if (serv->sv_nrpools > 1) - have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); - - error = kernel_thread((int (*)(void *)) func, rqstp, 0); - - if (have_oldmask) - set_cpus_allowed(current, oldmask); - - if (error < 0) - goto out_thread; - svc_sock_update_bufs(serv); - error = 0; -out: - return error; - -out_thread: - svc_exit_thread(rqstp); - goto out; -} - -/* * Choose a pool in which to create a new thread, for svc_set_num_threads */ static inline struct svc_pool * @@ -675,7 +625,7 @@ found_pool: * of threads the given number. If `pool' is non-NULL, applies * only to threads in that pool, otherwise round-robins between * all pools. Must be called with a svc_get() reference and - * the BKL held. + * the BKL or another lock to protect access to svc_serv fields. * * Destroying threads relies on the service threads filling in * rqstp->rq_task, which only the nfs ones do. Assumes the serv @@ -687,7 +637,9 @@ found_pool: int svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { - struct task_struct *victim; + struct svc_rqst *rqstp; + struct task_struct *task; + struct svc_pool *chosen_pool; int error = 0; unsigned int state = serv->sv_nrthreads-1; @@ -703,18 +655,34 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) /* create new threads */ while (nrservs > 0) { nrservs--; + chosen_pool = choose_pool(serv, pool, &state); + + rqstp = svc_prepare_thread(serv, chosen_pool); + if (IS_ERR(rqstp)) { + error = PTR_ERR(rqstp); + break; + } + __module_get(serv->sv_module); - error = __svc_create_thread(serv->sv_function, serv, - choose_pool(serv, pool, &state)); - if (error < 0) { + task = kthread_create(serv->sv_function, rqstp, serv->sv_name); + if (IS_ERR(task)) { + error = PTR_ERR(task); module_put(serv->sv_module); + svc_exit_thread(rqstp); break; } + + rqstp->rq_task = task; + if (serv->sv_nrpools > 1) + svc_pool_map_set_cpumask(task, chosen_pool->sp_id); + + svc_sock_update_bufs(serv); + wake_up_process(task); } /* destroy old threads */ while (nrservs < 0 && - (victim = choose_victim(serv, pool, &state)) != NULL) { - send_sig(serv->sv_kill_signal, victim, 1); + (task = choose_victim(serv, pool, &state)) != NULL) { + send_sig(SIGINT, task, 1); nrservs++; } @@ -723,7 +691,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) EXPORT_SYMBOL(svc_set_num_threads); /* - * Called from a server thread as it's exiting. Caller must hold BKL. + * Called from a server thread as it's exiting. Caller must hold the BKL or + * the "service mutex", whichever is appropriate for the service. */ void svc_exit_thread(struct svc_rqst *rqstp) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index d8e8d79a8451..e46c825f4954 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -6,30 +6,9 @@ #include <linux/sched.h> #include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/net.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/udp.h> -#include <linux/tcp.h> -#include <linux/unistd.h> -#include <linux/slab.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/file.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <net/sock.h> -#include <net/checksum.h> -#include <net/ip.h> -#include <net/ipv6.h> -#include <net/tcp_states.h> -#include <linux/uaccess.h> -#include <asm/ioctls.h> - -#include <linux/sunrpc/types.h> -#include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/xdr.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svc_xprt.h> @@ -296,8 +275,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) if (!(xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) return; - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) - return; cpu = get_cpu(); pool = svc_pool_for_cpu(xprt->xpt_server, cpu); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 3f30ee6006ae..f24800f2c098 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -278,7 +278,7 @@ static int ip_map_show(struct seq_file *m, dom = im->m_client->h.name; if (ipv6_addr_v4mapped(&addr)) { - seq_printf(m, "%s" NIPQUAD_FMT "%s\n", + seq_printf(m, "%s " NIPQUAD_FMT " %s\n", im->m_class, ntohl(addr.s6_addr32[3]) >> 24 & 0xff, ntohl(addr.s6_addr32[3]) >> 16 & 0xff, @@ -286,7 +286,7 @@ static int ip_map_show(struct seq_file *m, ntohl(addr.s6_addr32[3]) >> 0 & 0xff, dom); } else { - seq_printf(m, "%s" NIP6_FMT "%s\n", + seq_printf(m, "%s " NIP6_FMT " %s\n", im->m_class, NIP6(addr), dom); } return 0; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e1770f7ba0b3..99a52aabe332 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -690,7 +690,7 @@ static void xprt_connect_status(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - if (task->tk_status >= 0) { + if (task->tk_status == 0) { xprt->stat.connect_count++; xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; dprintk("RPC: %5u xprt_connect_status: connection established\n", @@ -699,12 +699,6 @@ static void xprt_connect_status(struct rpc_task *task) } switch (task->tk_status) { - case -ECONNREFUSED: - case -ECONNRESET: - dprintk("RPC: %5u xprt_connect_status: server %s refused " - "connection\n", task->tk_pid, - task->tk_client->cl_server); - break; case -ENOTCONN: dprintk("RPC: %5u xprt_connect_status: connection broken\n", task->tk_pid); @@ -878,6 +872,7 @@ void xprt_transmit(struct rpc_task *task) return; req->rq_connect_cookie = xprt->connect_cookie; + req->rq_xtime = jiffies; status = xprt->ops->send_request(task); if (status == 0) { dprintk("RPC: %5u xmit complete\n", task->tk_pid); diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 88c0ca20bb1e..87101177825b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -69,6 +69,10 @@ atomic_t rdma_stat_rq_prod; atomic_t rdma_stat_sq_poll; atomic_t rdma_stat_sq_prod; +/* Temporary NFS request map and context caches */ +struct kmem_cache *svc_rdma_map_cachep; +struct kmem_cache *svc_rdma_ctxt_cachep; + /* * This function implements reading and resetting an atomic_t stat * variable through read/write to a proc file. Any write to the file @@ -236,11 +240,14 @@ static ctl_table svcrdma_root_table[] = { void svc_rdma_cleanup(void) { dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); + flush_scheduled_work(); if (svcrdma_table_header) { unregister_sysctl_table(svcrdma_table_header); svcrdma_table_header = NULL; } svc_unreg_xprt_class(&svc_rdma_class); + kmem_cache_destroy(svc_rdma_map_cachep); + kmem_cache_destroy(svc_rdma_ctxt_cachep); } int svc_rdma_init(void) @@ -255,9 +262,37 @@ int svc_rdma_init(void) svcrdma_table_header = register_sysctl_table(svcrdma_root_table); + /* Create the temporary map cache */ + svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache", + sizeof(struct svc_rdma_req_map), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!svc_rdma_map_cachep) { + printk(KERN_INFO "Could not allocate map cache.\n"); + goto err0; + } + + /* Create the temporary context cache */ + svc_rdma_ctxt_cachep = + kmem_cache_create("svc_rdma_ctxt_cache", + sizeof(struct svc_rdma_op_ctxt), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!svc_rdma_ctxt_cachep) { + printk(KERN_INFO "Could not allocate WR ctxt cache.\n"); + goto err1; + } + /* Register RDMA with the SVC transport switch */ svc_reg_xprt_class(&svc_rdma_class); return 0; + err1: + kmem_cache_destroy(svc_rdma_map_cachep); + err0: + unregister_sysctl_table(svcrdma_table_header); + return -ENOMEM; } MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); MODULE_DESCRIPTION("SVC RDMA Transport"); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index c22d6b6f2db4..b4b17f44cb29 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -112,11 +112,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, rqstp->rq_arg.tail[0].iov_len = 0; } -struct chunk_sge { - int start; /* sge no for this chunk */ - int count; /* sge count for this chunk */ -}; - /* Encode a read-chunk-list as an array of IB SGE * * Assumptions: @@ -134,8 +129,8 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, struct rpcrdma_msg *rmsgp, - struct ib_sge *sge, - struct chunk_sge *ch_sge_ary, + struct svc_rdma_req_map *rpl_map, + struct svc_rdma_req_map *chl_map, int ch_count, int byte_count) { @@ -156,22 +151,18 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, head->arg.head[0] = rqstp->rq_arg.head[0]; head->arg.tail[0] = rqstp->rq_arg.tail[0]; head->arg.pages = &head->pages[head->count]; - head->sge[0].length = head->count; /* save count of hdr pages */ + head->hdr_count = head->count; /* save count of hdr pages */ head->arg.page_base = 0; head->arg.page_len = ch_bytes; head->arg.len = rqstp->rq_arg.len + ch_bytes; head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes; head->count++; - ch_sge_ary[0].start = 0; + chl_map->ch[0].start = 0; while (byte_count) { + rpl_map->sge[sge_no].iov_base = + page_address(rqstp->rq_arg.pages[page_no]) + page_off; sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes); - sge[sge_no].addr = - ib_dma_map_page(xprt->sc_cm_id->device, - rqstp->rq_arg.pages[page_no], - page_off, sge_bytes, - DMA_FROM_DEVICE); - sge[sge_no].length = sge_bytes; - sge[sge_no].lkey = xprt->sc_phys_mr->lkey; + rpl_map->sge[sge_no].iov_len = sge_bytes; /* * Don't bump head->count here because the same page * may be used by multiple SGE. @@ -187,11 +178,11 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, * SGE, move to the next SGE */ if (ch_bytes == 0) { - ch_sge_ary[ch_no].count = - sge_no - ch_sge_ary[ch_no].start; + chl_map->ch[ch_no].count = + sge_no - chl_map->ch[ch_no].start; ch_no++; ch++; - ch_sge_ary[ch_no].start = sge_no; + chl_map->ch[ch_no].start = sge_no; ch_bytes = ch->rc_target.rs_length; /* If bytes remaining account for next chunk */ if (byte_count) { @@ -220,18 +211,25 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, return sge_no; } -static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt, - struct ib_sge *sge, +static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, + struct svc_rdma_op_ctxt *ctxt, + struct kvec *vec, u64 *sgl_offset, int count) { int i; ctxt->count = count; + ctxt->direction = DMA_FROM_DEVICE; for (i = 0; i < count; i++) { - ctxt->sge[i].addr = sge[i].addr; - ctxt->sge[i].length = sge[i].length; - *sgl_offset = *sgl_offset + sge[i].length; + atomic_inc(&xprt->sc_dma_used); + ctxt->sge[i].addr = + ib_dma_map_single(xprt->sc_cm_id->device, + vec[i].iov_base, vec[i].iov_len, + DMA_FROM_DEVICE); + ctxt->sge[i].length = vec[i].iov_len; + ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey; + *sgl_offset = *sgl_offset + vec[i].iov_len; } } @@ -260,11 +258,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) * On our side, we need to read into a pagelist. The first page immediately * follows the RPC header. * - * This function returns 1 to indicate success. The data is not yet in + * This function returns: + * 0 - No error and no read-list found. + * + * 1 - Successful read-list processing. The data is not yet in * the pagelist and therefore the RPC request must be deferred. The * I/O completion will enqueue the transport again and * svc_rdma_recvfrom will complete the request. * + * <0 - Error processing/posting read-list. + * * NOTE: The ctxt must not be touched after the last WR has been posted * because the I/O completion processing may occur on another * processor and free / modify the context. Ne touche pas! @@ -277,50 +280,38 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, struct ib_send_wr read_wr; int err = 0; int ch_no; - struct ib_sge *sge; int ch_count; int byte_count; int sge_count; u64 sgl_offset; struct rpcrdma_read_chunk *ch; struct svc_rdma_op_ctxt *ctxt = NULL; - struct svc_rdma_op_ctxt *head; - struct svc_rdma_op_ctxt *tmp_sge_ctxt; - struct svc_rdma_op_ctxt *tmp_ch_ctxt; - struct chunk_sge *ch_sge_ary; + struct svc_rdma_req_map *rpl_map; + struct svc_rdma_req_map *chl_map; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; - /* Allocate temporary contexts to keep SGE */ - BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge)); - tmp_sge_ctxt = svc_rdma_get_context(xprt); - sge = tmp_sge_ctxt->sge; - tmp_ch_ctxt = svc_rdma_get_context(xprt); - ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; + /* Allocate temporary reply and chunk maps */ + rpl_map = svc_rdma_get_req_map(); + chl_map = svc_rdma_get_req_map(); svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); + if (ch_count > RPCSVC_MAXPAGES) + return -EINVAL; sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, - sge, ch_sge_ary, + rpl_map, chl_map, ch_count, byte_count); - head = svc_rdma_get_context(xprt); sgl_offset = 0; ch_no = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++, ch_no++) { next_sge: - if (!ctxt) - ctxt = head; - else { - ctxt->next = svc_rdma_get_context(xprt); - ctxt = ctxt->next; - } - ctxt->next = NULL; + ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_FROM_DEVICE; - clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); /* Prepare READ WR */ @@ -333,50 +324,46 @@ next_sge: read_wr.wr.rdma.remote_addr = get_unaligned(&(ch->rc_target.rs_offset)) + sgl_offset; - read_wr.sg_list = &sge[ch_sge_ary[ch_no].start]; + read_wr.sg_list = ctxt->sge; read_wr.num_sge = - rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count); - rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start], + rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); + rdma_set_ctxt_sge(xprt, ctxt, + &rpl_map->sge[chl_map->ch[ch_no].start], &sgl_offset, read_wr.num_sge); if (((ch+1)->rc_discrim == 0) && - (read_wr.num_sge == ch_sge_ary[ch_no].count)) { + (read_wr.num_sge == chl_map->ch[ch_no].count)) { /* * Mark the last RDMA_READ with a bit to * indicate all RPC data has been fetched from * the client and the RPC needs to be enqueued. */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - ctxt->next = hdr_ctxt; - hdr_ctxt->next = head; + ctxt->read_hdr = hdr_ctxt; } /* Post the read */ err = svc_rdma_send(xprt, &read_wr); if (err) { - printk(KERN_ERR "svcrdma: Error posting send = %d\n", + printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", err); - /* - * Break the circular list so free knows when - * to stop if the error happened to occur on - * the last read - */ - ctxt->next = NULL; + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + svc_rdma_put_context(ctxt, 0); goto out; } atomic_inc(&rdma_stat_read); - if (read_wr.num_sge < ch_sge_ary[ch_no].count) { - ch_sge_ary[ch_no].count -= read_wr.num_sge; - ch_sge_ary[ch_no].start += read_wr.num_sge; + if (read_wr.num_sge < chl_map->ch[ch_no].count) { + chl_map->ch[ch_no].count -= read_wr.num_sge; + chl_map->ch[ch_no].start += read_wr.num_sge; goto next_sge; } sgl_offset = 0; - err = 0; + err = 1; } out: - svc_rdma_put_context(tmp_sge_ctxt, 0); - svc_rdma_put_context(tmp_ch_ctxt, 0); + svc_rdma_put_req_map(rpl_map); + svc_rdma_put_req_map(chl_map); /* Detach arg pages. svc_recv will replenish them */ for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) @@ -389,25 +376,12 @@ next_sge: while (rqstp->rq_resused) rqstp->rq_respages[--rqstp->rq_resused] = NULL; - if (err) { - printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err); - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); - /* Free the linked list of read contexts */ - while (head != NULL) { - ctxt = head->next; - svc_rdma_put_context(head, 1); - head = ctxt; - } - return 0; - } - - return 1; + return err; } static int rdma_read_complete(struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *data) + struct svc_rdma_op_ctxt *head) { - struct svc_rdma_op_ctxt *head = data->next; int page_no; int ret; @@ -419,7 +393,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp, rqstp->rq_pages[page_no] = head->pages[page_no]; } /* Point rq_arg.pages past header */ - rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length]; + rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; rqstp->rq_arg.page_len = head->arg.page_len; rqstp->rq_arg.page_base = head->arg.page_base; @@ -433,21 +407,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp, rqstp->rq_arg.len = head->arg.len; rqstp->rq_arg.buflen = head->arg.buflen; + /* Free the context */ + svc_rdma_put_context(head, 0); + /* XXX: What should this be? */ rqstp->rq_prot = IPPROTO_MAX; - - /* - * Free the contexts we used to build the RDMA_READ. We have - * to be careful here because the context list uses the same - * next pointer used to chain the contexts associated with the - * RDMA_READ - */ - data->next = NULL; /* terminate circular list */ - do { - data = head->next; - svc_rdma_put_context(head, 0); - head = data; - } while (head != NULL); + svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt); ret = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len @@ -457,8 +422,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp, ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, rqstp->rq_arg.head[0].iov_len); - /* Indicate that we've consumed an RQ credit */ - rqstp->rq_xprt_ctxt = rqstp->rq_xprt; svc_xprt_received(rqstp->rq_xprt); return ret; } @@ -480,13 +443,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) dprintk("svcrdma: rqstp=%p\n", rqstp); - /* - * The rq_xprt_ctxt indicates if we've consumed an RQ credit - * or not. It is used in the rdma xpo_release_rqst function to - * determine whether or not to return an RQ WQE to the RQ. - */ - rqstp->rq_xprt_ctxt = NULL; - spin_lock_bh(&rdma_xprt->sc_read_complete_lock); if (!list_empty(&rdma_xprt->sc_read_complete_q)) { ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, @@ -537,21 +493,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) /* If the request is invalid, reply with an error */ if (len < 0) { if (len == -ENOSYS) - (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); + svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); goto close_out; } - /* Read read-list data. If we would need to wait, defer - * it. Not that in this case, we don't return the RQ credit - * until after the read completes. - */ - if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) { + /* Read read-list data. */ + ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt); + if (ret > 0) { + /* read-list posted, defer until data received from client. */ svc_xprt_received(xprt); return 0; } - - /* Indicate we've consumed an RQ credit */ - rqstp->rq_xprt_ctxt = rqstp->rq_xprt; + if (ret < 0) { + /* Post of read-list failed, free context. */ + svc_rdma_put_context(ctxt, 1); + return 0; + } ret = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len @@ -569,11 +526,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) return ret; close_out: - if (ctxt) { + if (ctxt) svc_rdma_put_context(ctxt, 1); - /* Indicate we've consumed an RQ credit */ - rqstp->rq_xprt_ctxt = rqstp->rq_xprt; - } dprintk("svcrdma: transport %p is closing\n", xprt); /* * Set the close bit and enqueue it. svc_recv will see the diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 981f190c1b39..84d328329d98 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -63,52 +63,44 @@ * SGE[2..sge_count-2] data from xdr->pages[] * SGE[sge_count-1] data from xdr->tail. * + * The max SGE we need is the length of the XDR / pagesize + one for + * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES + * reserves a page for both the request and the reply header, and this + * array is only concerned with the reply we are assured that we have + * on extra page for the RPCRMDA header. */ -static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, - struct xdr_buf *xdr, - struct ib_sge *sge, - int *sge_count) +static void xdr_to_sge(struct svcxprt_rdma *xprt, + struct xdr_buf *xdr, + struct svc_rdma_req_map *vec) { - /* Max we need is the length of the XDR / pagesize + one for - * head + one for tail + one for RPCRDMA header - */ int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; int sge_no; - u32 byte_count = xdr->len; u32 sge_bytes; u32 page_bytes; - int page_off; + u32 page_off; int page_no; + BUG_ON(xdr->len != + (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); + /* Skip the first sge, this is for the RPCRDMA header */ sge_no = 1; /* Head SGE */ - sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device, - xdr->head[0].iov_base, - xdr->head[0].iov_len, - DMA_TO_DEVICE); - sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len); - byte_count -= sge_bytes; - sge[sge_no].length = sge_bytes; - sge[sge_no].lkey = xprt->sc_phys_mr->lkey; + vec->sge[sge_no].iov_base = xdr->head[0].iov_base; + vec->sge[sge_no].iov_len = xdr->head[0].iov_len; sge_no++; /* pages SGE */ page_no = 0; page_bytes = xdr->page_len; page_off = xdr->page_base; - while (byte_count && page_bytes) { - sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off)); - sge[sge_no].addr = - ib_dma_map_page(xprt->sc_cm_id->device, - xdr->pages[page_no], page_off, - sge_bytes, DMA_TO_DEVICE); - sge_bytes = min(sge_bytes, page_bytes); - byte_count -= sge_bytes; + while (page_bytes) { + vec->sge[sge_no].iov_base = + page_address(xdr->pages[page_no]) + page_off; + sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); page_bytes -= sge_bytes; - sge[sge_no].length = sge_bytes; - sge[sge_no].lkey = xprt->sc_phys_mr->lkey; + vec->sge[sge_no].iov_len = sge_bytes; sge_no++; page_no++; @@ -116,36 +108,24 @@ static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, } /* Tail SGE */ - if (byte_count && xdr->tail[0].iov_len) { - sge[sge_no].addr = - ib_dma_map_single(xprt->sc_cm_id->device, - xdr->tail[0].iov_base, - xdr->tail[0].iov_len, - DMA_TO_DEVICE); - sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len); - byte_count -= sge_bytes; - sge[sge_no].length = sge_bytes; - sge[sge_no].lkey = xprt->sc_phys_mr->lkey; + if (xdr->tail[0].iov_len) { + vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; + vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; sge_no++; } BUG_ON(sge_no > sge_max); - BUG_ON(byte_count != 0); - - *sge_count = sge_no; - return sge; + vec->count = sge_no; } - /* Assumptions: * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE */ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, u32 rmr, u64 to, u32 xdr_off, int write_len, - struct ib_sge *xdr_sge, int sge_count) + struct svc_rdma_req_map *vec) { - struct svc_rdma_op_ctxt *tmp_sge_ctxt; struct ib_send_wr write_wr; struct ib_sge *sge; int xdr_sge_no; @@ -154,25 +134,23 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, int sge_off; int bc; struct svc_rdma_op_ctxt *ctxt; - int ret = 0; - BUG_ON(sge_count > RPCSVC_MAXPAGES); + BUG_ON(vec->count > RPCSVC_MAXPAGES); dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " - "write_len=%d, xdr_sge=%p, sge_count=%d\n", + "write_len=%d, vec->sge=%p, vec->count=%lu\n", rmr, (unsigned long long)to, xdr_off, - write_len, xdr_sge, sge_count); + write_len, vec->sge, vec->count); ctxt = svc_rdma_get_context(xprt); - ctxt->count = 0; - tmp_sge_ctxt = svc_rdma_get_context(xprt); - sge = tmp_sge_ctxt->sge; + ctxt->direction = DMA_TO_DEVICE; + sge = ctxt->sge; /* Find the SGE associated with xdr_off */ - for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count; + for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count; xdr_sge_no++) { - if (xdr_sge[xdr_sge_no].length > bc) + if (vec->sge[xdr_sge_no].iov_len > bc) break; - bc -= xdr_sge[xdr_sge_no].length; + bc -= vec->sge[xdr_sge_no].iov_len; } sge_off = bc; @@ -180,21 +158,29 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, sge_no = 0; /* Copy the remaining SGE */ - while (bc != 0 && xdr_sge_no < sge_count) { - sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off; - sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey; + while (bc != 0 && xdr_sge_no < vec->count) { + sge[sge_no].lkey = xprt->sc_phys_mr->lkey; sge_bytes = min((size_t)bc, - (size_t)(xdr_sge[xdr_sge_no].length-sge_off)); + (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off)); sge[sge_no].length = sge_bytes; - + atomic_inc(&xprt->sc_dma_used); + sge[sge_no].addr = + ib_dma_map_single(xprt->sc_cm_id->device, + (void *) + vec->sge[xdr_sge_no].iov_base + sge_off, + sge_bytes, DMA_TO_DEVICE); + if (dma_mapping_error(xprt->sc_cm_id->device->dma_device, + sge[sge_no].addr)) + goto err; sge_off = 0; sge_no++; + ctxt->count++; xdr_sge_no++; bc -= sge_bytes; } BUG_ON(bc != 0); - BUG_ON(xdr_sge_no > sge_count); + BUG_ON(xdr_sge_no > vec->count); /* Prepare WRITE WR */ memset(&write_wr, 0, sizeof write_wr); @@ -209,21 +195,20 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, /* Post It */ atomic_inc(&rdma_stat_write); - if (svc_rdma_send(xprt, &write_wr)) { - svc_rdma_put_context(ctxt, 1); - /* Fatal error, close transport */ - ret = -EIO; - } - svc_rdma_put_context(tmp_sge_ctxt, 0); - return ret; + if (svc_rdma_send(xprt, &write_wr)) + goto err; + return 0; + err: + svc_rdma_put_context(ctxt, 0); + /* Fatal error, close transport */ + return -EIO; } static int send_write_chunks(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rdma_argp, struct rpcrdma_msg *rdma_resp, struct svc_rqst *rqstp, - struct ib_sge *sge, - int sge_count) + struct svc_rdma_req_map *vec) { u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; int write_len; @@ -269,8 +254,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, rs_offset + chunk_off, xdr_off, this_write, - sge, - sge_count); + vec); if (ret) { dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", ret); @@ -292,8 +276,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rdma_argp, struct rpcrdma_msg *rdma_resp, struct svc_rqst *rqstp, - struct ib_sge *sge, - int sge_count) + struct svc_rdma_req_map *vec) { u32 xfer_len = rqstp->rq_res.len; int write_len; @@ -341,8 +324,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, rs_offset + chunk_off, xdr_off, this_write, - sge, - sge_count); + vec); if (ret) { dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", ret); @@ -380,7 +362,7 @@ static int send_reply(struct svcxprt_rdma *rdma, struct page *page, struct rpcrdma_msg *rdma_resp, struct svc_rdma_op_ctxt *ctxt, - int sge_count, + struct svc_rdma_req_map *vec, int byte_count) { struct ib_send_wr send_wr; @@ -389,11 +371,23 @@ static int send_reply(struct svcxprt_rdma *rdma, int page_no; int ret; + /* Post a recv buffer to handle another request. */ + ret = svc_rdma_post_recv(rdma); + if (ret) { + printk(KERN_INFO + "svcrdma: could not post a receive buffer, err=%d." + "Closing transport %p.\n", ret, rdma); + set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + svc_rdma_put_context(ctxt, 0); + return -ENOTCONN; + } + /* Prepare the context */ ctxt->pages[0] = page; ctxt->count = 1; /* Prepare the SGE for the RPCRDMA Header */ + atomic_inc(&rdma->sc_dma_used); ctxt->sge[0].addr = ib_dma_map_page(rdma->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_TO_DEVICE); @@ -402,10 +396,16 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; /* Determine how many of our SGE are to be transmitted */ - for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) { - sge_bytes = min((size_t)ctxt->sge[sge_no].length, - (size_t)byte_count); + for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { + sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; + atomic_inc(&rdma->sc_dma_used); + ctxt->sge[sge_no].addr = + ib_dma_map_single(rdma->sc_cm_id->device, + vec->sge[sge_no].iov_base, + sge_bytes, DMA_TO_DEVICE); + ctxt->sge[sge_no].length = sge_bytes; + ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey; } BUG_ON(byte_count != 0); @@ -417,8 +417,10 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; ctxt->count++; rqstp->rq_respages[page_no] = NULL; + /* If there are more pages than SGE, terminate SGE list */ + if (page_no+1 >= sge_no) + ctxt->sge[page_no+1].length = 0; } - BUG_ON(sge_no > rdma->sc_max_sge); memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; @@ -462,20 +464,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) enum rpcrdma_proc reply_type; int ret; int inline_bytes; - struct ib_sge *sge; - int sge_count = 0; struct page *res_page; struct svc_rdma_op_ctxt *ctxt; + struct svc_rdma_req_map *vec; dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); /* Get the RDMA request header. */ rdma_argp = xdr_start(&rqstp->rq_arg); - /* Build an SGE for the XDR */ + /* Build an req vec for the XDR */ ctxt = svc_rdma_get_context(rdma); ctxt->direction = DMA_TO_DEVICE; - sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count); + vec = svc_rdma_get_req_map(); + xdr_to_sge(rdma, &rqstp->rq_res, vec); inline_bytes = rqstp->rq_res.len; @@ -492,7 +494,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) /* Send any write-chunk data and build resp write-list */ ret = send_write_chunks(rdma, rdma_argp, rdma_resp, - rqstp, sge, sge_count); + rqstp, vec); if (ret < 0) { printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", ret); @@ -502,7 +504,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) /* Send any reply-list data and update resp reply-list */ ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, - rqstp, sge, sge_count); + rqstp, vec); if (ret < 0) { printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", ret); @@ -510,11 +512,13 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) } inline_bytes -= ret; - ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count, + ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec, inline_bytes); + svc_rdma_put_req_map(vec); dprintk("svcrdma: send_reply returns %d\n", ret); return ret; error: + svc_rdma_put_req_map(vec); svc_rdma_put_context(ctxt, 0); put_page(res_page); return ret; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index af408fc12634..19ddc382b777 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -84,67 +84,37 @@ struct svc_xprt_class svc_rdma_class = { .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, }; -static int rdma_bump_context_cache(struct svcxprt_rdma *xprt) +/* WR context cache. Created in svc_rdma.c */ +extern struct kmem_cache *svc_rdma_ctxt_cachep; + +struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) { - int target; - int at_least_one = 0; struct svc_rdma_op_ctxt *ctxt; - target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump, - xprt->sc_ctxt_max); - - spin_lock_bh(&xprt->sc_ctxt_lock); - while (xprt->sc_ctxt_cnt < target) { - xprt->sc_ctxt_cnt++; - spin_unlock_bh(&xprt->sc_ctxt_lock); - - ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); - - spin_lock_bh(&xprt->sc_ctxt_lock); - if (ctxt) { - at_least_one = 1; - ctxt->next = xprt->sc_ctxt_head; - xprt->sc_ctxt_head = ctxt; - } else { - /* kmalloc failed...give up for now */ - xprt->sc_ctxt_cnt--; + while (1) { + ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL); + if (ctxt) break; - } + schedule_timeout_uninterruptible(msecs_to_jiffies(500)); } - spin_unlock_bh(&xprt->sc_ctxt_lock); - dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n", - xprt->sc_ctxt_max, xprt->sc_ctxt_cnt); - return at_least_one; + ctxt->xprt = xprt; + INIT_LIST_HEAD(&ctxt->dto_q); + ctxt->count = 0; + atomic_inc(&xprt->sc_ctxt_used); + return ctxt; } -struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) +static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) { - struct svc_rdma_op_ctxt *ctxt; - - while (1) { - spin_lock_bh(&xprt->sc_ctxt_lock); - if (unlikely(xprt->sc_ctxt_head == NULL)) { - /* Try to bump my cache. */ - spin_unlock_bh(&xprt->sc_ctxt_lock); - - if (rdma_bump_context_cache(xprt)) - continue; - - printk(KERN_INFO "svcrdma: sleeping waiting for " - "context memory on xprt=%p\n", - xprt); - schedule_timeout_uninterruptible(msecs_to_jiffies(500)); - continue; - } - ctxt = xprt->sc_ctxt_head; - xprt->sc_ctxt_head = ctxt->next; - spin_unlock_bh(&xprt->sc_ctxt_lock); - ctxt->xprt = xprt; - INIT_LIST_HEAD(&ctxt->dto_q); - ctxt->count = 0; - break; + struct svcxprt_rdma *xprt = ctxt->xprt; + int i; + for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { + atomic_dec(&xprt->sc_dma_used); + ib_dma_unmap_single(xprt->sc_cm_id->device, + ctxt->sge[i].addr, + ctxt->sge[i].length, + ctxt->direction); } - return ctxt; } void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) @@ -158,15 +128,34 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) for (i = 0; i < ctxt->count; i++) put_page(ctxt->pages[i]); - for (i = 0; i < ctxt->count; i++) - dma_unmap_single(xprt->sc_cm_id->device->dma_device, - ctxt->sge[i].addr, - ctxt->sge[i].length, - ctxt->direction); - spin_lock_bh(&xprt->sc_ctxt_lock); - ctxt->next = xprt->sc_ctxt_head; - xprt->sc_ctxt_head = ctxt; - spin_unlock_bh(&xprt->sc_ctxt_lock); + kmem_cache_free(svc_rdma_ctxt_cachep, ctxt); + atomic_dec(&xprt->sc_ctxt_used); +} + +/* Temporary NFS request map cache. Created in svc_rdma.c */ +extern struct kmem_cache *svc_rdma_map_cachep; + +/* + * Temporary NFS req mappings are shared across all transport + * instances. These are short lived and should be bounded by the number + * of concurrent server threads * depth of the SQ. + */ +struct svc_rdma_req_map *svc_rdma_get_req_map(void) +{ + struct svc_rdma_req_map *map; + while (1) { + map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL); + if (map) + break; + schedule_timeout_uninterruptible(msecs_to_jiffies(500)); + } + map->count = 0; + return map; +} + +void svc_rdma_put_req_map(struct svc_rdma_req_map *map) +{ + kmem_cache_free(svc_rdma_map_cachep, map); } /* ib_cq event handler */ @@ -228,23 +217,8 @@ static void dto_tasklet_func(unsigned long data) list_del_init(&xprt->sc_dto_q); spin_unlock_irqrestore(&dto_lock, flags); - if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) { - ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); - rq_cq_reap(xprt); - set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); - /* - * If data arrived before established event, - * don't enqueue. This defers RPC I/O until the - * RDMA connection is complete. - */ - if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) - svc_xprt_enqueue(&xprt->sc_xprt); - } - - if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) { - ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); - sq_cq_reap(xprt); - } + rq_cq_reap(xprt); + sq_cq_reap(xprt); svc_xprt_put(&xprt->sc_xprt); spin_lock_irqsave(&dto_lock, flags); @@ -263,11 +237,15 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context) struct svcxprt_rdma *xprt = cq_context; unsigned long flags; + /* Guard against unconditional flush call for destroyed QP */ + if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0) + return; + /* * Set the bit regardless of whether or not it's on the list * because it may be on the list already due to an SQ * completion. - */ + */ set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags); /* @@ -290,6 +268,8 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context) * * Take all completing WC off the CQE and enqueue the associated DTO * context on the dto_q for the transport. + * + * Note that caller must hold a transport reference. */ static void rq_cq_reap(struct svcxprt_rdma *xprt) { @@ -297,29 +277,48 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) struct ib_wc wc; struct svc_rdma_op_ctxt *ctxt = NULL; + if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) + return; + + ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_rq_poll); - spin_lock_bh(&xprt->sc_rq_dto_lock); while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt->wc_status = wc.status; ctxt->byte_len = wc.byte_len; + svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) { /* Close the transport */ + dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 1); + svc_xprt_put(&xprt->sc_xprt); continue; } + spin_lock_bh(&xprt->sc_rq_dto_lock); list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); + spin_unlock_bh(&xprt->sc_rq_dto_lock); + svc_xprt_put(&xprt->sc_xprt); } - spin_unlock_bh(&xprt->sc_rq_dto_lock); if (ctxt) atomic_inc(&rdma_stat_rq_prod); + + set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); + /* + * If data arrived before established event, + * don't enqueue. This defers RPC I/O until the + * RDMA connection is complete. + */ + if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) + svc_xprt_enqueue(&xprt->sc_xprt); } /* * Send Queue Completion Handler - potentially called on interrupt context. + * + * Note that caller must hold a transport reference. */ static void sq_cq_reap(struct svcxprt_rdma *xprt) { @@ -328,11 +327,17 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) struct ib_cq *cq = xprt->sc_sq_cq; int ret; + + if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) + return; + + ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; xprt = ctxt->xprt; + svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); @@ -343,20 +348,25 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) switch (ctxt->wr_op) { case IB_WR_SEND: - case IB_WR_RDMA_WRITE: svc_rdma_put_context(ctxt, 1); break; + case IB_WR_RDMA_WRITE: + svc_rdma_put_context(ctxt, 0); + break; + case IB_WR_RDMA_READ: if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { + struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; + BUG_ON(!read_hdr); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); - set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); spin_lock_bh(&xprt->sc_read_complete_lock); - list_add_tail(&ctxt->dto_q, + list_add_tail(&read_hdr->dto_q, &xprt->sc_read_complete_q); spin_unlock_bh(&xprt->sc_read_complete_lock); svc_xprt_enqueue(&xprt->sc_xprt); } + svc_rdma_put_context(ctxt, 0); break; default: @@ -365,6 +375,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) wc.opcode, wc.status); break; } + svc_xprt_put(&xprt->sc_xprt); } if (ctxt) @@ -376,11 +387,15 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context) struct svcxprt_rdma *xprt = cq_context; unsigned long flags; + /* Guard against unconditional flush call for destroyed QP */ + if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0) + return; + /* * Set the bit regardless of whether or not it's on the list * because it may be on the list already due to an RQ * completion. - */ + */ set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags); /* @@ -398,39 +413,6 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context) tasklet_schedule(&dto_tasklet); } -static void create_context_cache(struct svcxprt_rdma *xprt, - int ctxt_count, int ctxt_bump, int ctxt_max) -{ - struct svc_rdma_op_ctxt *ctxt; - int i; - - xprt->sc_ctxt_max = ctxt_max; - xprt->sc_ctxt_bump = ctxt_bump; - xprt->sc_ctxt_cnt = 0; - xprt->sc_ctxt_head = NULL; - for (i = 0; i < ctxt_count; i++) { - ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); - if (ctxt) { - ctxt->next = xprt->sc_ctxt_head; - xprt->sc_ctxt_head = ctxt; - xprt->sc_ctxt_cnt++; - } - } -} - -static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt) -{ - struct svc_rdma_op_ctxt *next; - if (!ctxt) - return; - - do { - next = ctxt->next; - kfree(ctxt); - ctxt = next; - } while (next); -} - static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, int listener) { @@ -447,7 +429,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_read_complete_lock); - spin_lock_init(&cma_xprt->sc_ctxt_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); cma_xprt->sc_ord = svcrdma_ord; @@ -456,21 +437,9 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, cma_xprt->sc_max_requests = svcrdma_max_requests; cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT; atomic_set(&cma_xprt->sc_sq_count, 0); + atomic_set(&cma_xprt->sc_ctxt_used, 0); - if (!listener) { - int reqs = cma_xprt->sc_max_requests; - create_context_cache(cma_xprt, - reqs << 1, /* starting size */ - reqs, /* bump amount */ - reqs + - cma_xprt->sc_sq_depth + - RPCRDMA_MAX_THREADS + 1); /* max */ - if (!cma_xprt->sc_ctxt_head) { - kfree(cma_xprt); - return NULL; - } - clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); - } else + if (listener) set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); return cma_xprt; @@ -506,6 +475,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) BUG_ON(sge_no >= xprt->sc_max_sge); page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; + atomic_inc(&xprt->sc_dma_used); pa = ib_dma_map_page(xprt->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); @@ -520,7 +490,12 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) recv_wr.num_sge = ctxt->count; recv_wr.wr_id = (u64)(unsigned long)ctxt; + svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); + if (ret) { + svc_xprt_put(&xprt->sc_xprt); + svc_rdma_put_context(ctxt, 1); + } return ret; } @@ -535,10 +510,11 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) * will call the recvfrom method on the listen xprt which will accept the new * connection. */ -static void handle_connect_req(struct rdma_cm_id *new_cma_id) +static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird) { struct svcxprt_rdma *listen_xprt = new_cma_id->context; struct svcxprt_rdma *newxprt; + struct sockaddr *sa; /* Create a new transport */ newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); @@ -551,6 +527,15 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id) dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", newxprt, newxprt->sc_cm_id, listen_xprt); + /* Save client advertised inbound read limit for use later in accept. */ + newxprt->sc_ord = client_ird; + + /* Set the local and remote addresses in the transport */ + sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; + svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); + sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; + svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); + /* * Enqueue the new transport on the accept queue of the listening * transport @@ -581,7 +566,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, case RDMA_CM_EVENT_CONNECT_REQUEST: dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " "event=%d\n", cma_id, cma_id->context, event->event); - handle_connect_req(cma_id); + handle_connect_req(cma_id, + event->param.conn.responder_resources); break; case RDMA_CM_EVENT_ESTABLISHED: @@ -627,6 +613,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, if (xprt) { set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); + svc_xprt_put(xprt); } break; case RDMA_CM_EVENT_DEVICE_REMOVAL: @@ -661,31 +648,27 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, cma_xprt = rdma_create_xprt(serv, 1); if (!cma_xprt) - return ERR_PTR(ENOMEM); + return ERR_PTR(-ENOMEM); xprt = &cma_xprt->sc_xprt; listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); if (IS_ERR(listen_id)) { - svc_xprt_put(&cma_xprt->sc_xprt); - dprintk("svcrdma: rdma_create_id failed = %ld\n", - PTR_ERR(listen_id)); - return (void *)listen_id; + ret = PTR_ERR(listen_id); + dprintk("svcrdma: rdma_create_id failed = %d\n", ret); + goto err0; } + ret = rdma_bind_addr(listen_id, sa); if (ret) { - rdma_destroy_id(listen_id); - svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); - return ERR_PTR(ret); + goto err1; } cma_xprt->sc_cm_id = listen_id; ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); if (ret) { - rdma_destroy_id(listen_id); - svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_listen failed = %d\n", ret); - return ERR_PTR(ret); + goto err1; } /* @@ -696,6 +679,12 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); return &cma_xprt->sc_xprt; + + err1: + rdma_destroy_id(listen_id); + err0: + kfree(cma_xprt); + return ERR_PTR(ret); } /* @@ -716,7 +705,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; - struct sockaddr *sa; int ret; int i; @@ -753,8 +741,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) (size_t)svcrdma_max_requests); newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; - newxprt->sc_ord = min((size_t)devattr.max_qp_rd_atom, - (size_t)svcrdma_ord); + /* + * Limit ORD based on client limit, local device limit, and + * configured svcrdma limit. + */ + newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord); + newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord); newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device); if (IS_ERR(newxprt->sc_pd)) { @@ -826,7 +818,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; } - svc_xprt_get(&newxprt->sc_xprt); newxprt->sc_qp = newxprt->sc_cm_id->qp; /* Register all of physical memory */ @@ -850,6 +841,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Swap out the handler */ newxprt->sc_cm_id->event_handler = rdma_cma_handler; + /* + * Arm the CQs for the SQ and RQ before accepting so we can't + * miss the first message + */ + ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP); + ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP); + /* Accept Connection */ set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); memset(&conn_param, 0, sizeof conn_param); @@ -886,58 +884,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_requests, newxprt->sc_ord); - /* Set the local and remote addresses in the transport */ - sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; - svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); - sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; - svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); - - ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP); - ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP); return &newxprt->sc_xprt; errout: dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); /* Take a reference in case the DTO handler runs */ svc_xprt_get(&newxprt->sc_xprt); - if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) { + if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) ib_destroy_qp(newxprt->sc_qp); - svc_xprt_put(&newxprt->sc_xprt); - } rdma_destroy_id(newxprt->sc_cm_id); /* This call to put will destroy the transport */ svc_xprt_put(&newxprt->sc_xprt); return NULL; } -/* - * Post an RQ WQE to the RQ when the rqst is being released. This - * effectively returns an RQ credit to the client. The rq_xprt_ctxt - * will be null if the request is deferred due to an RDMA_READ or the - * transport had no data ready (EAGAIN). Note that an RPC deferred in - * svc_process will still return the credit, this is because the data - * is copied and no longer consume a WQE/WC. - */ static void svc_rdma_release_rqst(struct svc_rqst *rqstp) { - int err; - struct svcxprt_rdma *rdma = - container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt); - if (rqstp->rq_xprt_ctxt) { - BUG_ON(rqstp->rq_xprt_ctxt != rdma); - err = svc_rdma_post_recv(rdma); - if (err) - dprintk("svcrdma: failed to post an RQ WQE error=%d\n", - err); - } - rqstp->rq_xprt_ctxt = NULL; } /* - * When connected, an svc_xprt has at least three references: - * - * - A reference held by the QP. We still hold that here because this - * code deletes the QP and puts the reference. + * When connected, an svc_xprt has at least two references: * * - A reference held by the cm_id between the ESTABLISHED and * DISCONNECTED events. If the remote peer disconnected first, this @@ -946,7 +912,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp) * - A reference held by the svc_recv code that called this function * as part of close processing. * - * At a minimum two references should still be held. + * At a minimum one references should still be held. */ static void svc_rdma_detach(struct svc_xprt *xprt) { @@ -956,23 +922,50 @@ static void svc_rdma_detach(struct svc_xprt *xprt) /* Disconnect and flush posted WQE */ rdma_disconnect(rdma->sc_cm_id); - - /* Destroy the QP if present (not a listener) */ - if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) { - ib_destroy_qp(rdma->sc_qp); - svc_xprt_put(xprt); - } - - /* Destroy the CM ID */ - rdma_destroy_id(rdma->sc_cm_id); } -static void svc_rdma_free(struct svc_xprt *xprt) +static void __svc_rdma_free(struct work_struct *work) { - struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; + struct svcxprt_rdma *rdma = + container_of(work, struct svcxprt_rdma, sc_work); dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); + /* We should only be called from kref_put */ - BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0); + BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0); + + /* + * Destroy queued, but not processed read completions. Note + * that this cleanup has to be done before destroying the + * cm_id because the device ptr is needed to unmap the dma in + * svc_rdma_put_context. + */ + while (!list_empty(&rdma->sc_read_complete_q)) { + struct svc_rdma_op_ctxt *ctxt; + ctxt = list_entry(rdma->sc_read_complete_q.next, + struct svc_rdma_op_ctxt, + dto_q); + list_del_init(&ctxt->dto_q); + svc_rdma_put_context(ctxt, 1); + } + + /* Destroy queued, but not processed recv completions */ + while (!list_empty(&rdma->sc_rq_dto_q)) { + struct svc_rdma_op_ctxt *ctxt; + ctxt = list_entry(rdma->sc_rq_dto_q.next, + struct svc_rdma_op_ctxt, + dto_q); + list_del_init(&ctxt->dto_q); + svc_rdma_put_context(ctxt, 1); + } + + /* Warn if we leaked a resource or under-referenced */ + WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); + WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); + + /* Destroy the QP if present (not a listener) */ + if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) + ib_destroy_qp(rdma->sc_qp); + if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) ib_destroy_cq(rdma->sc_sq_cq); @@ -985,10 +978,20 @@ static void svc_rdma_free(struct svc_xprt *xprt) if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) ib_dealloc_pd(rdma->sc_pd); - destroy_context_cache(rdma->sc_ctxt_head); + /* Destroy the CM ID */ + rdma_destroy_id(rdma->sc_cm_id); + kfree(rdma); } +static void svc_rdma_free(struct svc_xprt *xprt) +{ + struct svcxprt_rdma *rdma = + container_of(xprt, struct svcxprt_rdma, sc_xprt); + INIT_WORK(&rdma->sc_work, __svc_rdma_free); + schedule_work(&rdma->sc_work); +} + static int svc_rdma_has_wspace(struct svc_xprt *xprt) { struct svcxprt_rdma *rdma = @@ -1018,7 +1021,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) int ret; if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) - return 0; + return -ENOTCONN; BUG_ON(wr->send_flags != IB_SEND_SIGNALED); BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != @@ -1029,7 +1032,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { spin_unlock_bh(&xprt->sc_lock); atomic_inc(&rdma_stat_sq_starve); - /* See if we can reap some SQ WR */ + + /* See if we can opportunistically reap SQ WR to make room */ sq_cq_reap(xprt); /* Wait until SQ WR available if SQ still full */ @@ -1041,22 +1045,25 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) continue; } /* Bumped used SQ WR count and post */ + svc_xprt_get(&xprt->sc_xprt); ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); if (!ret) atomic_inc(&xprt->sc_sq_count); - else + else { + svc_xprt_put(&xprt->sc_xprt); dprintk("svcrdma: failed to post SQ WR rc=%d, " "sc_sq_count=%d, sc_sq_depth=%d\n", ret, atomic_read(&xprt->sc_sq_count), xprt->sc_sq_depth); + } spin_unlock_bh(&xprt->sc_lock); break; } return ret; } -int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, - enum rpcrdma_errcode err) +void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, + enum rpcrdma_errcode err) { struct ib_send_wr err_wr; struct ib_sge sge; @@ -1073,6 +1080,7 @@ int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); /* Prepare SGE for local address */ + atomic_inc(&xprt->sc_dma_used); sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, p, 0, PAGE_SIZE, DMA_FROM_DEVICE); sge.lkey = xprt->sc_phys_mr->lkey; @@ -1094,9 +1102,8 @@ int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, /* Post It */ ret = svc_rdma_send(xprt, &err_wr); if (ret) { - dprintk("svcrdma: Error posting send = %d\n", ret); + dprintk("svcrdma: Error %d posting send for protocol error\n", + ret); svc_rdma_put_context(ctxt, 1); } - - return ret; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ddbe981ab516..4486c59c3aca 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -579,7 +579,6 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_svec->iov_base, req->rq_svec->iov_len); - req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, xdr, @@ -671,7 +670,6 @@ static int xs_tcp_send_request(struct rpc_task *task) * to cope with writespace callbacks arriving _after_ we have * called sendmsg(). */ while (1) { - req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent); diff --git a/net/sysctl_net.c b/net/sysctl_net.c index b4f0525f91af..972201cd5fa7 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -4,7 +4,6 @@ * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net directories for each protocol family. [MS] * - * $Log: sysctl_net.c,v $ * Revision 1.2 1996/05/08 20:24:40 shaver * Added bits for NET_BRIDGE and the NET_IPV4_ARP stuff and * NET_IPV4_IP_FORWARD. @@ -30,25 +29,59 @@ #include <linux/if_tr.h> #endif -static struct list_head * +static struct ctl_table_set * net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces) { - return &namespaces->net_ns->sysctl_table_headers; + return &namespaces->net_ns->sysctls; +} + +static int is_seen(struct ctl_table_set *set) +{ + return ¤t->nsproxy->net_ns->sysctls == set; +} + +/* Return standard mode bits for table entry. */ +static int net_ctl_permissions(struct ctl_table_root *root, + struct nsproxy *nsproxy, + struct ctl_table *table) +{ + /* Allow network administrator to have same access as root. */ + if (capable(CAP_NET_ADMIN)) { + int mode = (table->mode >> 6) & 7; + return (mode << 6) | (mode << 3) | mode; + } + return table->mode; } static struct ctl_table_root net_sysctl_root = { .lookup = net_ctl_header_lookup, + .permissions = net_ctl_permissions, +}; + +static int net_ctl_ro_header_perms(struct ctl_table_root *root, + struct nsproxy *namespaces, struct ctl_table *table) +{ + if (namespaces->net_ns == &init_net) + return table->mode; + else + return table->mode & ~0222; +} + +static struct ctl_table_root net_sysctl_ro_root = { + .permissions = net_ctl_ro_header_perms, }; static int sysctl_net_init(struct net *net) { - INIT_LIST_HEAD(&net->sysctl_table_headers); + setup_sysctl_set(&net->sysctls, + &net_sysctl_ro_root.default_set, + is_seen); return 0; } static void sysctl_net_exit(struct net *net) { - WARN_ON(!list_empty(&net->sysctl_table_headers)); + WARN_ON(!list_empty(&net->sysctls.list)); return; } @@ -64,6 +97,8 @@ static __init int sysctl_init(void) if (ret) goto out; register_sysctl_root(&net_sysctl_root); + setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL, NULL); + register_sysctl_root(&net_sysctl_ro_root); out: return ret; } @@ -80,6 +115,14 @@ struct ctl_table_header *register_net_sysctl_table(struct net *net, } EXPORT_SYMBOL_GPL(register_net_sysctl_table); +struct ctl_table_header *register_net_sysctl_rotable(const + struct ctl_path *path, struct ctl_table *table) +{ + return __register_sysctl_paths(&net_sysctl_ro_root, + &init_nsproxy, path, table); +} +EXPORT_SYMBOL_GPL(register_net_sysctl_rotable); + void unregister_net_sysctl_table(struct ctl_table_header *header) { unregister_sysctl_table(header); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index e7880172ef19..b1ff16aa4bdb 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -276,7 +276,7 @@ static void bclink_send_nack(struct node *n_ptr) if (buf) { msg = buf_msg(buf); msg_init(msg, BCAST_PROTOCOL, STATE_MSG, - TIPC_OK, INT_H_SIZE, n_ptr->addr); + INT_H_SIZE, n_ptr->addr); msg_set_mc_netid(msg, tipc_net_id); msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in)); msg_set_bcgap_after(msg, n_ptr->bclink.gap_after); @@ -571,7 +571,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, assert(tipc_cltr_bcast_nodes.count != 0); bcbuf_set_acks(buf, tipc_cltr_bcast_nodes.count); msg = buf_msg(buf); - msg_set_non_seq(msg); + msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tipc_net_id); } @@ -611,7 +611,7 @@ swap: bcbearer->bpairs[bp_index].secondary = p; update: if (bcbearer->remains_new.count == 0) - return TIPC_OK; + return 0; bcbearer->remains = bcbearer->remains_new; } @@ -620,7 +620,7 @@ update: bcbearer->bearer.publ.blocked = 1; bcl->stats.bearer_congs++; - return ~TIPC_OK; + return 1; } /** @@ -756,7 +756,7 @@ int tipc_bclink_reset_stats(void) spin_lock_bh(&bc_lock); memset(&bcl->stats, 0, sizeof(bcl->stats)); spin_unlock_bh(&bc_lock); - return TIPC_OK; + return 0; } int tipc_bclink_set_queue_limits(u32 limit) @@ -769,7 +769,7 @@ int tipc_bclink_set_queue_limits(u32 limit) spin_lock_bh(&bc_lock); tipc_link_set_queue_limits(bcl, limit); spin_unlock_bh(&bc_lock); - return TIPC_OK; + return 0; } int tipc_bclink_init(void) @@ -810,7 +810,7 @@ int tipc_bclink_init(void) tipc_printbuf_init(&bcl->print_buf, pb, BCLINK_LOG_BUF_SIZE); } - return TIPC_OK; + return 0; } void tipc_bclink_stop(void) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 271a375b49b7..6a9aba3edd08 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -370,7 +370,7 @@ void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest) */ static int bearer_push(struct bearer *b_ptr) { - u32 res = TIPC_OK; + u32 res = 0; struct link *ln, *tln; if (b_ptr->publ.blocked) @@ -607,7 +607,7 @@ int tipc_block_bearer(const char *name) } spin_unlock_bh(&b_ptr->publ.lock); read_unlock_bh(&tipc_net_lock); - return TIPC_OK; + return 0; } /** @@ -645,7 +645,7 @@ static int bearer_disable(const char *name) } spin_unlock_bh(&b_ptr->publ.lock); memset(b_ptr, 0, sizeof(struct bearer)); - return TIPC_OK; + return 0; } int tipc_disable_bearer(const char *name) @@ -668,7 +668,7 @@ int tipc_bearer_init(void) tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC); media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC); if (tipc_bearers && media_list) { - res = TIPC_OK; + res = 0; } else { kfree(tipc_bearers); kfree(media_list); diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c index 4bb3404f610b..46ee6c58532d 100644 --- a/net/tipc/cluster.c +++ b/net/tipc/cluster.c @@ -238,7 +238,7 @@ static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest) if (buf) { msg = buf_msg(buf); memset((char *)msg, 0, size); - msg_init(msg, ROUTE_DISTRIBUTOR, 0, TIPC_OK, INT_H_SIZE, dest); + msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest); } return buf; } @@ -571,6 +571,6 @@ exit: int tipc_cltr_init(void) { tipc_highest_allowed_slave = LOWEST_SLAVE + tipc_max_slaves; - return tipc_cltr_create(tipc_own_addr) ? TIPC_OK : -ENOMEM; + return tipc_cltr_create(tipc_own_addr) ? 0 : -ENOMEM; } diff --git a/net/tipc/config.c b/net/tipc/config.c index c71337a22d33..ca3544d030c7 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2006, Wind River Systems + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -293,7 +293,6 @@ static struct sk_buff *cfg_set_own_addr(void) if (tipc_mode == TIPC_NET_MODE) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - tipc_own_addr = addr; /* * Must release all spinlocks before calling start_net() because @@ -306,7 +305,7 @@ static struct sk_buff *cfg_set_own_addr(void) */ spin_unlock_bh(&config_lock); - tipc_core_start_net(); + tipc_core_start_net(addr); spin_lock_bh(&config_lock); return tipc_cfg_reply_none(); } @@ -529,7 +528,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area break; #endif case TIPC_CMD_SET_LOG_SIZE: - rep_tlv_buf = tipc_log_resize(req_tlv_area, req_tlv_space); + rep_tlv_buf = tipc_log_resize_cmd(req_tlv_area, req_tlv_space); break; case TIPC_CMD_DUMP_LOG: rep_tlv_buf = tipc_log_dump(); @@ -602,6 +601,10 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_NETID: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); break; + case TIPC_CMD_NOT_NET_ADMIN: + rep_tlv_buf = + tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN); + break; default: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (unknown command)"); diff --git a/net/tipc/core.c b/net/tipc/core.c index 740aac5cdfb6..3256bd7d398f 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -49,7 +49,7 @@ #include "config.h" -#define TIPC_MOD_VER "1.6.3" +#define TIPC_MOD_VER "1.6.4" #ifndef CONFIG_TIPC_ZONES #define CONFIG_TIPC_ZONES 3 @@ -117,11 +117,11 @@ void tipc_core_stop_net(void) * start_net - start TIPC networking sub-systems */ -int tipc_core_start_net(void) +int tipc_core_start_net(unsigned long addr) { int res; - if ((res = tipc_net_start()) || + if ((res = tipc_net_start(addr)) || (res = tipc_eth_media_start())) { tipc_core_stop_net(); } @@ -164,8 +164,7 @@ int tipc_core_start(void) tipc_mode = TIPC_NODE_MODE; if ((res = tipc_handler_start()) || - (res = tipc_ref_table_init(tipc_max_ports + tipc_max_subscriptions, - tipc_random)) || + (res = tipc_ref_table_init(tipc_max_ports, tipc_random)) || (res = tipc_reg_start()) || (res = tipc_nametbl_init()) || (res = tipc_k_signal((Handler)tipc_subscr_start, 0)) || @@ -182,7 +181,7 @@ static int __init tipc_init(void) { int res; - tipc_log_reinit(CONFIG_TIPC_LOG); + tipc_log_resize(CONFIG_TIPC_LOG); info("Activated (version " TIPC_MOD_VER " compiled " __DATE__ " " __TIME__ ")\n"); @@ -209,7 +208,7 @@ static void __exit tipc_exit(void) tipc_core_stop_net(); tipc_core_stop(); info("Deactivated\n"); - tipc_log_stop(); + tipc_log_resize(0); } module_init(tipc_init); diff --git a/net/tipc/core.h b/net/tipc/core.h index 325404fd4eb5..a881f92a8537 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -2,7 +2,7 @@ * net/tipc/core.h: Include file for TIPC global declarations * * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,84 +59,108 @@ #include <linux/vmalloc.h> /* - * TIPC debugging code + * TIPC sanity test macros */ #define assert(i) BUG_ON(!(i)) -struct tipc_msg; -extern struct print_buf *TIPC_NULL, *TIPC_CONS, *TIPC_LOG; -extern struct print_buf *TIPC_TEE(struct print_buf *, struct print_buf *); -void tipc_msg_print(struct print_buf*,struct tipc_msg *,const char*); -void tipc_printf(struct print_buf *, const char *fmt, ...); -void tipc_dump(struct print_buf*,const char *fmt, ...); - -#ifdef CONFIG_TIPC_DEBUG - /* - * TIPC debug support included: - * - system messages are printed to TIPC_OUTPUT print buffer - * - debug messages are printed to DBG_OUTPUT print buffer + * TIPC system monitoring code */ -#define err(fmt, arg...) tipc_printf(TIPC_OUTPUT, KERN_ERR "TIPC: " fmt, ## arg) -#define warn(fmt, arg...) tipc_printf(TIPC_OUTPUT, KERN_WARNING "TIPC: " fmt, ## arg) -#define info(fmt, arg...) tipc_printf(TIPC_OUTPUT, KERN_NOTICE "TIPC: " fmt, ## arg) +/* + * TIPC's print buffer subsystem supports the following print buffers: + * + * TIPC_NULL : null buffer (i.e. print nowhere) + * TIPC_CONS : system console + * TIPC_LOG : TIPC log buffer + * &buf : user-defined buffer (struct print_buf *) + * + * Note: TIPC_LOG is configured to echo its output to the system console; + * user-defined buffers can be configured to do the same thing. + */ -#define dbg(fmt, arg...) do {if (DBG_OUTPUT != TIPC_NULL) tipc_printf(DBG_OUTPUT, fmt, ## arg);} while(0) -#define msg_dbg(msg, txt) do {if (DBG_OUTPUT != TIPC_NULL) tipc_msg_print(DBG_OUTPUT, msg, txt);} while(0) -#define dump(fmt, arg...) do {if (DBG_OUTPUT != TIPC_NULL) tipc_dump(DBG_OUTPUT, fmt, ##arg);} while(0) +extern struct print_buf *const TIPC_NULL; +extern struct print_buf *const TIPC_CONS; +extern struct print_buf *const TIPC_LOG; +void tipc_printf(struct print_buf *, const char *fmt, ...); /* - * By default, TIPC_OUTPUT is defined to be system console and TIPC log buffer, - * while DBG_OUTPUT is the null print buffer. These defaults can be changed - * here, or on a per .c file basis, by redefining these symbols. The following - * print buffer options are available: - * - * TIPC_NULL : null buffer (i.e. print nowhere) - * TIPC_CONS : system console - * TIPC_LOG : TIPC log buffer - * &buf : user-defined buffer (struct print_buf *) - * TIPC_TEE(&buf_a,&buf_b) : list of buffers (eg. TIPC_TEE(TIPC_CONS,TIPC_LOG)) + * TIPC_OUTPUT is the destination print buffer for system messages. */ #ifndef TIPC_OUTPUT -#define TIPC_OUTPUT TIPC_TEE(TIPC_CONS,TIPC_LOG) -#endif - -#ifndef DBG_OUTPUT -#define DBG_OUTPUT TIPC_NULL +#define TIPC_OUTPUT TIPC_LOG #endif -#else - /* - * TIPC debug support not included: - * - system messages are printed to system console - * - debug messages are not printed + * TIPC can be configured to send system messages to TIPC_OUTPUT + * or to the system console only. */ +#ifdef CONFIG_TIPC_DEBUG + +#define err(fmt, arg...) tipc_printf(TIPC_OUTPUT, \ + KERN_ERR "TIPC: " fmt, ## arg) +#define warn(fmt, arg...) tipc_printf(TIPC_OUTPUT, \ + KERN_WARNING "TIPC: " fmt, ## arg) +#define info(fmt, arg...) tipc_printf(TIPC_OUTPUT, \ + KERN_NOTICE "TIPC: " fmt, ## arg) + +#else + #define err(fmt, arg...) printk(KERN_ERR "TIPC: " fmt , ## arg) #define info(fmt, arg...) printk(KERN_INFO "TIPC: " fmt , ## arg) #define warn(fmt, arg...) printk(KERN_WARNING "TIPC: " fmt , ## arg) -#define dbg(fmt, arg...) do {} while (0) -#define msg_dbg(msg,txt) do {} while (0) -#define dump(fmt,arg...) do {} while (0) +#endif +/* + * DBG_OUTPUT is the destination print buffer for debug messages. + * It defaults to the the null print buffer, but can be redefined + * (typically in the individual .c files being debugged) to allow + * selected debug messages to be generated where needed. + */ + +#ifndef DBG_OUTPUT +#define DBG_OUTPUT TIPC_NULL +#endif /* - * TIPC_OUTPUT is defined to be the system console, while DBG_OUTPUT is - * the null print buffer. Thes ensures that any system or debug messages - * that are generated without using the above macros are handled correctly. + * TIPC can be configured to send debug messages to the specified print buffer + * (typically DBG_OUTPUT) or to suppress them entirely. */ -#undef TIPC_OUTPUT -#define TIPC_OUTPUT TIPC_CONS +#ifdef CONFIG_TIPC_DEBUG -#undef DBG_OUTPUT -#define DBG_OUTPUT TIPC_NULL +#define dbg(fmt, arg...) \ + do { \ + if (DBG_OUTPUT != TIPC_NULL) \ + tipc_printf(DBG_OUTPUT, fmt, ## arg); \ + } while (0) +#define msg_dbg(msg, txt) \ + do { \ + if (DBG_OUTPUT != TIPC_NULL) \ + tipc_msg_dbg(DBG_OUTPUT, msg, txt); \ + } while (0) +#define dump(fmt, arg...) \ + do { \ + if (DBG_OUTPUT != TIPC_NULL) \ + tipc_dump_dbg(DBG_OUTPUT, fmt, ##arg); \ + } while (0) + +void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *); +void tipc_dump_dbg(struct print_buf *, const char *fmt, ...); + +#else + +#define dbg(fmt, arg...) do {} while (0) +#define msg_dbg(msg, txt) do {} while (0) +#define dump(fmt, arg...) do {} while (0) + +#define tipc_msg_dbg(...) do {} while (0) +#define tipc_dump_dbg(...) do {} while (0) #endif @@ -178,7 +202,7 @@ extern atomic_t tipc_user_count; extern int tipc_core_start(void); extern void tipc_core_stop(void); -extern int tipc_core_start_net(void); +extern int tipc_core_start_net(unsigned long addr); extern void tipc_core_stop_net(void); extern int tipc_handler_start(void); extern void tipc_handler_stop(void); @@ -279,15 +303,14 @@ static inline void k_term_timer(struct timer_list *timer) /* * TIPC message buffer code * - * TIPC message buffer headroom reserves space for a link-level header - * (in case the message is sent off-node), - * while ensuring TIPC header is word aligned for quicker access + * TIPC message buffer headroom reserves space for the worst-case + * link-level device header (in case the message is sent off-node). * - * The largest header currently supported is 18 bytes, which is used when - * the standard 14 byte Ethernet header has 4 added bytes for VLAN info + * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields + * are word aligned for quicker access */ -#define BUF_HEADROOM 20u +#define BUF_HEADROOM LL_MAX_HEADER struct tipc_skb_cb { void *handle; diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c index e809d2a2ce06..29ecae851668 100644 --- a/net/tipc/dbg.c +++ b/net/tipc/dbg.c @@ -2,7 +2,7 @@ * net/tipc/dbg.c: TIPC print buffer routines for debugging * * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,17 +38,43 @@ #include "config.h" #include "dbg.h" -static char print_string[TIPC_PB_MAX_STR]; -static DEFINE_SPINLOCK(print_lock); +/* + * TIPC pre-defines the following print buffers: + * + * TIPC_NULL : null buffer (i.e. print nowhere) + * TIPC_CONS : system console + * TIPC_LOG : TIPC log buffer + * + * Additional user-defined print buffers are also permitted. + */ -static struct print_buf null_buf = { NULL, 0, NULL, NULL }; -struct print_buf *TIPC_NULL = &null_buf; +static struct print_buf null_buf = { NULL, 0, NULL, 0 }; +struct print_buf *const TIPC_NULL = &null_buf; -static struct print_buf cons_buf = { NULL, 0, NULL, NULL }; -struct print_buf *TIPC_CONS = &cons_buf; +static struct print_buf cons_buf = { NULL, 0, NULL, 1 }; +struct print_buf *const TIPC_CONS = &cons_buf; -static struct print_buf log_buf = { NULL, 0, NULL, NULL }; -struct print_buf *TIPC_LOG = &log_buf; +static struct print_buf log_buf = { NULL, 0, NULL, 1 }; +struct print_buf *const TIPC_LOG = &log_buf; + +/* + * Locking policy when using print buffers. + * + * 1) tipc_printf() uses 'print_lock' to protect against concurrent access to + * 'print_string' when writing to a print buffer. This also protects against + * concurrent writes to the print buffer being written to. + * + * 2) tipc_dump() and tipc_log_XXX() leverage the aforementioned + * use of 'print_lock' to protect against all types of concurrent operations + * on their associated print buffer (not just write operations). + * + * Note: All routines of the form tipc_printbuf_XXX() are lock-free, and rely + * on the caller to prevent simultaneous use of the print buffer(s) being + * manipulated. + */ + +static char print_string[TIPC_PB_MAX_STR]; +static DEFINE_SPINLOCK(print_lock); #define FORMAT(PTR,LEN,FMT) \ @@ -60,27 +86,14 @@ struct print_buf *TIPC_LOG = &log_buf; *(PTR + LEN) = '\0';\ } -/* - * Locking policy when using print buffers. - * - * The following routines use 'print_lock' for protection: - * 1) tipc_printf() - to protect its print buffer(s) and 'print_string' - * 2) TIPC_TEE() - to protect its print buffer(s) - * 3) tipc_dump() - to protect its print buffer(s) and 'print_string' - * 4) tipc_log_XXX() - to protect TIPC_LOG - * - * All routines of the form tipc_printbuf_XXX() rely on the caller to prevent - * simultaneous use of the print buffer(s) being manipulated. - */ - /** * tipc_printbuf_init - initialize print buffer to empty * @pb: pointer to print buffer structure * @raw: pointer to character array used by print buffer * @size: size of character array * - * Makes the print buffer a null device that discards anything written to it - * if the character array is too small (or absent). + * Note: If the character array is too small (or absent), the print buffer + * becomes a null device that discards anything written to it. */ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size) @@ -88,13 +101,13 @@ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size) pb->buf = raw; pb->crs = raw; pb->size = size; - pb->next = NULL; + pb->echo = 0; if (size < TIPC_PB_MIN_SIZE) { pb->buf = NULL; } else if (raw) { pb->buf[0] = 0; - pb->buf[size-1] = ~0; + pb->buf[size - 1] = ~0; } } @@ -105,7 +118,11 @@ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size) void tipc_printbuf_reset(struct print_buf *pb) { - tipc_printbuf_init(pb, pb->buf, pb->size); + if (pb->buf) { + pb->crs = pb->buf; + pb->buf[0] = 0; + pb->buf[pb->size - 1] = ~0; + } } /** @@ -141,7 +158,7 @@ int tipc_printbuf_validate(struct print_buf *pb) if (pb->buf[pb->size - 1] == 0) { cp_buf = kmalloc(pb->size, GFP_ATOMIC); - if (cp_buf != NULL){ + if (cp_buf) { tipc_printbuf_init(&cb, cp_buf, pb->size); tipc_printbuf_move(&cb, pb); tipc_printbuf_move(pb, &cb); @@ -179,15 +196,16 @@ void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from) } if (pb_to->size < pb_from->size) { - tipc_printbuf_reset(pb_to); - tipc_printf(pb_to, "*** PRINT BUFFER MOVE ERROR ***"); + strcpy(pb_to->buf, "*** PRINT BUFFER MOVE ERROR ***"); + pb_to->buf[pb_to->size - 1] = ~0; + pb_to->crs = strchr(pb_to->buf, 0); return; } /* Copy data from char after cursor to end (if used) */ len = pb_from->buf + pb_from->size - pb_from->crs - 2; - if ((pb_from->buf[pb_from->size-1] == 0) && (len > 0)) { + if ((pb_from->buf[pb_from->size - 1] == 0) && (len > 0)) { strcpy(pb_to->buf, pb_from->crs + 1); pb_to->crs = pb_to->buf + len; } else @@ -203,8 +221,8 @@ void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from) } /** - * tipc_printf - append formatted output to print buffer chain - * @pb: pointer to chain of print buffers (may be NULL) + * tipc_printf - append formatted output to print buffer + * @pb: pointer to print buffer * @fmt: formatted info to be printed */ @@ -213,68 +231,40 @@ void tipc_printf(struct print_buf *pb, const char *fmt, ...) int chars_to_add; int chars_left; char save_char; - struct print_buf *pb_next; spin_lock_bh(&print_lock); + FORMAT(print_string, chars_to_add, fmt); if (chars_to_add >= TIPC_PB_MAX_STR) strcpy(print_string, "*** PRINT BUFFER STRING TOO LONG ***"); - while (pb) { - if (pb == TIPC_CONS) - printk(print_string); - else if (pb->buf) { - chars_left = pb->buf + pb->size - pb->crs - 1; - if (chars_to_add <= chars_left) { - strcpy(pb->crs, print_string); - pb->crs += chars_to_add; - } else if (chars_to_add >= (pb->size - 1)) { - strcpy(pb->buf, print_string + chars_to_add + 1 - - pb->size); - pb->crs = pb->buf + pb->size - 1; - } else { - strcpy(pb->buf, print_string + chars_left); - save_char = print_string[chars_left]; - print_string[chars_left] = 0; - strcpy(pb->crs, print_string); - print_string[chars_left] = save_char; - pb->crs = pb->buf + chars_to_add - chars_left; - } + if (pb->buf) { + chars_left = pb->buf + pb->size - pb->crs - 1; + if (chars_to_add <= chars_left) { + strcpy(pb->crs, print_string); + pb->crs += chars_to_add; + } else if (chars_to_add >= (pb->size - 1)) { + strcpy(pb->buf, print_string + chars_to_add + 1 + - pb->size); + pb->crs = pb->buf + pb->size - 1; + } else { + strcpy(pb->buf, print_string + chars_left); + save_char = print_string[chars_left]; + print_string[chars_left] = 0; + strcpy(pb->crs, print_string); + print_string[chars_left] = save_char; + pb->crs = pb->buf + chars_to_add - chars_left; } - pb_next = pb->next; - pb->next = NULL; - pb = pb_next; } - spin_unlock_bh(&print_lock); -} -/** - * TIPC_TEE - perform next output operation on both print buffers - * @b0: pointer to chain of print buffers (may be NULL) - * @b1: pointer to print buffer to add to chain - * - * Returns pointer to print buffer chain. - */ + if (pb->echo) + printk(print_string); -struct print_buf *TIPC_TEE(struct print_buf *b0, struct print_buf *b1) -{ - struct print_buf *pb = b0; - - if (!b0 || (b0 == b1)) - return b1; - - spin_lock_bh(&print_lock); - while (pb->next) { - if ((pb->next == b1) || (pb->next == b0)) - pb->next = pb->next->next; - else - pb = pb->next; - } - pb->next = b1; spin_unlock_bh(&print_lock); - return b0; } +#ifdef CONFIG_TIPC_DEBUG + /** * print_to_console - write string of bytes to console in multiple chunks */ @@ -321,72 +311,66 @@ static void printbuf_dump(struct print_buf *pb) } /** - * tipc_dump - dump non-console print buffer(s) to console - * @pb: pointer to chain of print buffers + * tipc_dump_dbg - dump (non-console) print buffer to console + * @pb: pointer to print buffer */ -void tipc_dump(struct print_buf *pb, const char *fmt, ...) +void tipc_dump_dbg(struct print_buf *pb, const char *fmt, ...) { - struct print_buf *pb_next; int len; + if (pb == TIPC_CONS) + return; + spin_lock_bh(&print_lock); + FORMAT(print_string, len, fmt); printk(print_string); - for (; pb; pb = pb->next) { - if (pb != TIPC_CONS) { - printk("\n---- Start of %s log dump ----\n\n", - (pb == TIPC_LOG) ? "global" : "local"); - printbuf_dump(pb); - tipc_printbuf_reset(pb); - printk("\n---- End of dump ----\n"); - } - pb_next = pb->next; - pb->next = NULL; - pb = pb_next; - } + printk("\n---- Start of %s log dump ----\n\n", + (pb == TIPC_LOG) ? "global" : "local"); + printbuf_dump(pb); + tipc_printbuf_reset(pb); + printk("\n---- End of dump ----\n"); + spin_unlock_bh(&print_lock); } +#endif + /** - * tipc_log_stop - free up TIPC log print buffer + * tipc_log_resize - change the size of the TIPC log buffer + * @log_size: print buffer size to use */ -void tipc_log_stop(void) +int tipc_log_resize(int log_size) { + int res = 0; + spin_lock_bh(&print_lock); if (TIPC_LOG->buf) { kfree(TIPC_LOG->buf); TIPC_LOG->buf = NULL; } - spin_unlock_bh(&print_lock); -} - -/** - * tipc_log_reinit - (re)initialize TIPC log print buffer - * @log_size: print buffer size to use - */ - -void tipc_log_reinit(int log_size) -{ - tipc_log_stop(); - if (log_size) { if (log_size < TIPC_PB_MIN_SIZE) log_size = TIPC_PB_MIN_SIZE; - spin_lock_bh(&print_lock); + res = TIPC_LOG->echo; tipc_printbuf_init(TIPC_LOG, kmalloc(log_size, GFP_ATOMIC), log_size); - spin_unlock_bh(&print_lock); + TIPC_LOG->echo = res; + res = !TIPC_LOG->buf; } + spin_unlock_bh(&print_lock); + + return res; } /** - * tipc_log_resize - reconfigure size of TIPC log buffer + * tipc_log_resize_cmd - reconfigure size of TIPC log buffer */ -struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space) +struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area, int req_tlv_space) { u32 value; @@ -397,7 +381,9 @@ struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space) if (value != delimit(value, 0, 32768)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (log size must be 0-32768)"); - tipc_log_reinit(value); + if (tipc_log_resize(value)) + return tipc_cfg_reply_error_string( + "unable to create specified log (log size is now 0)"); return tipc_cfg_reply_none(); } @@ -410,27 +396,32 @@ struct sk_buff *tipc_log_dump(void) struct sk_buff *reply; spin_lock_bh(&print_lock); - if (!TIPC_LOG->buf) + if (!TIPC_LOG->buf) { + spin_unlock_bh(&print_lock); reply = tipc_cfg_reply_ultra_string("log not activated\n"); - else if (tipc_printbuf_empty(TIPC_LOG)) + } else if (tipc_printbuf_empty(TIPC_LOG)) { + spin_unlock_bh(&print_lock); reply = tipc_cfg_reply_ultra_string("log is empty\n"); + } else { struct tlv_desc *rep_tlv; struct print_buf pb; int str_len; str_len = min(TIPC_LOG->size, 32768u); + spin_unlock_bh(&print_lock); reply = tipc_cfg_reply_alloc(TLV_SPACE(str_len)); if (reply) { rep_tlv = (struct tlv_desc *)reply->data; tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), str_len); + spin_lock_bh(&print_lock); tipc_printbuf_move(&pb, TIPC_LOG); + spin_unlock_bh(&print_lock); str_len = strlen(TLV_DATA(rep_tlv)) + 1; skb_put(reply, TLV_SPACE(str_len)); TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); } } - spin_unlock_bh(&print_lock); return reply; } diff --git a/net/tipc/dbg.h b/net/tipc/dbg.h index c01b085000e0..5ef1bc8f64ef 100644 --- a/net/tipc/dbg.h +++ b/net/tipc/dbg.h @@ -2,7 +2,7 @@ * net/tipc/dbg.h: Include file for TIPC print buffer routines * * Copyright (c) 1997-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,14 +42,14 @@ * @buf: pointer to character array containing print buffer contents * @size: size of character array * @crs: pointer to first unused space in character array (i.e. final NUL) - * @next: used to link print buffers when printing to more than one at a time + * @echo: echo output to system console if non-zero */ struct print_buf { char *buf; u32 size; char *crs; - struct print_buf *next; + int echo; }; #define TIPC_PB_MIN_SIZE 64 /* minimum size for a print buffer's array */ @@ -61,10 +61,10 @@ int tipc_printbuf_empty(struct print_buf *pb); int tipc_printbuf_validate(struct print_buf *pb); void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from); -void tipc_log_reinit(int log_size); -void tipc_log_stop(void); +int tipc_log_resize(int log_size); -struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space); +struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area, + int req_tlv_space); struct sk_buff *tipc_log_dump(void); #endif diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 5d643e5721eb..1657f0e795ff 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -120,9 +120,8 @@ static struct sk_buff *tipc_disc_init_msg(u32 type, if (buf) { msg = buf_msg(buf); - msg_init(msg, LINK_CONFIG, type, TIPC_OK, DSC_H_SIZE, - dest_domain); - msg_set_non_seq(msg); + msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain); + msg_set_non_seq(msg, 1); msg_set_req_links(msg, req_links); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tipc_net_id); @@ -156,11 +155,11 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr, /** * tipc_disc_recv_msg - handle incoming link setup message (request or response) * @buf: buffer containing message + * @b_ptr: bearer that message arrived on */ -void tipc_disc_recv_msg(struct sk_buff *buf) +void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) { - struct bearer *b_ptr = (struct bearer *)TIPC_SKB_CB(buf)->handle; struct link *link; struct tipc_media_addr media_addr; struct tipc_msg *msg = buf_msg(buf); @@ -200,9 +199,8 @@ void tipc_disc_recv_msg(struct sk_buff *buf) dbg(" in own cluster\n"); if (n_ptr == NULL) { n_ptr = tipc_node_create(orig); - } - if (n_ptr == NULL) { - return; + if (!n_ptr) + return; } spin_lock_bh(&n_ptr->lock); link = n_ptr->links[b_ptr->identity]; diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 9fd7587b143a..c36eaeb7d5d0 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -48,7 +48,7 @@ struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, void tipc_disc_update_link_req(struct link_req *req); void tipc_disc_stop_link_req(struct link_req *req); -void tipc_disc_recv_msg(struct sk_buff *buf); +void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr); void tipc_disc_link_event(u32 addr, char *name, int up); #if 0 diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 9cd35eec3e7f..fe43ef7dd7e3 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -82,7 +82,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, dev->dev_addr, clone->len); dev_queue_xmit(clone); } - return TIPC_OK; + return 0; } /** @@ -101,7 +101,7 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; u32 size; - if (dev_net(dev) != &init_net) { + if (!net_eq(dev_net(dev), &init_net)) { kfree_skb(buf); return 0; } @@ -113,12 +113,12 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, if (likely(buf->len == size)) { buf->next = NULL; tipc_recv_msg(buf, eb_ptr->bearer); - return TIPC_OK; + return 0; } } } kfree_skb(buf); - return TIPC_OK; + return 0; } /** @@ -198,7 +198,7 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; while ((eb_ptr->dev != dev)) { diff --git a/net/tipc/link.c b/net/tipc/link.c index 2a26a16e269f..d60113ba4b1b 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -51,6 +51,12 @@ /* + * Out-of-range value for link session numbers + */ + +#define INVALID_SESSION 0x10000 + +/* * Limit for deferred reception queue: */ @@ -147,9 +153,21 @@ static void link_print(struct link *l_ptr, struct print_buf *buf, #define LINK_LOG_BUF_SIZE 0 -#define dbg_link(fmt, arg...) do {if (LINK_LOG_BUF_SIZE) tipc_printf(&l_ptr->print_buf, fmt, ## arg); } while(0) -#define dbg_link_msg(msg, txt) do {if (LINK_LOG_BUF_SIZE) tipc_msg_print(&l_ptr->print_buf, msg, txt); } while(0) -#define dbg_link_state(txt) do {if (LINK_LOG_BUF_SIZE) link_print(l_ptr, &l_ptr->print_buf, txt); } while(0) +#define dbg_link(fmt, arg...) \ + do { \ + if (LINK_LOG_BUF_SIZE) \ + tipc_printf(&l_ptr->print_buf, fmt, ## arg); \ + } while (0) +#define dbg_link_msg(msg, txt) \ + do { \ + if (LINK_LOG_BUF_SIZE) \ + tipc_msg_dbg(&l_ptr->print_buf, msg, txt); \ + } while (0) +#define dbg_link_state(txt) \ + do { \ + if (LINK_LOG_BUF_SIZE) \ + link_print(l_ptr, &l_ptr->print_buf, txt); \ + } while (0) #define dbg_link_dump() do { \ if (LINK_LOG_BUF_SIZE) { \ tipc_printf(LOG, "\n\nDumping link <%s>:\n", l_ptr->name); \ @@ -450,9 +468,9 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; msg = l_ptr->pmsg; - msg_init(msg, LINK_PROTOCOL, RESET_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr); + msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr); msg_set_size(msg, sizeof(l_ptr->proto_msg)); - msg_set_session(msg, tipc_random); + msg_set_session(msg, (tipc_random & 0xffff)); msg_set_bearer_id(msg, b_ptr->identity); strcpy((char *)msg_data(msg), if_name); @@ -693,10 +711,10 @@ void tipc_link_reset(struct link *l_ptr) u32 checkpoint = l_ptr->next_in_no; int was_active_link = tipc_link_is_active(l_ptr); - msg_set_session(l_ptr->pmsg, msg_session(l_ptr->pmsg) + 1); + msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff)); - /* Link is down, accept any session: */ - l_ptr->peer_session = 0; + /* Link is down, accept any session */ + l_ptr->peer_session = INVALID_SESSION; /* Prepare for max packet size negotiation */ link_init_max_pkt(l_ptr); @@ -1110,7 +1128,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) if (bundler) { msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG, - TIPC_OK, INT_H_SIZE, l_ptr->addr); + INT_H_SIZE, l_ptr->addr); skb_copy_to_linear_data(bundler, &bundler_hdr, INT_H_SIZE); skb_trim(bundler, INT_H_SIZE); @@ -1374,7 +1392,7 @@ again: msg_dbg(hdr, ">FRAGMENTING>"); msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - TIPC_OK, INT_H_SIZE, msg_destnode(hdr)); + INT_H_SIZE, msg_destnode(hdr)); msg_set_link_selector(&fragm_hdr, sender->publ.ref); msg_set_size(&fragm_hdr, max_pkt); msg_set_fragm_no(&fragm_hdr, 1); @@ -1543,7 +1561,7 @@ u32 tipc_link_push_packet(struct link *l_ptr) l_ptr->retransm_queue_head = mod(++r_q_head); l_ptr->retransm_queue_size = --r_q_size; l_ptr->stats.retransmitted++; - return TIPC_OK; + return 0; } else { l_ptr->stats.bearer_congs++; msg_dbg(buf_msg(buf), "|>DEF-RETR>"); @@ -1562,7 +1580,7 @@ u32 tipc_link_push_packet(struct link *l_ptr) l_ptr->unacked_window = 0; buf_discard(buf); l_ptr->proto_msg_queue = NULL; - return TIPC_OK; + return 0; } else { msg_dbg(buf_msg(buf), "|>DEF-PROT>"); l_ptr->stats.bearer_congs++; @@ -1586,7 +1604,7 @@ u32 tipc_link_push_packet(struct link *l_ptr) msg_set_type(msg, CLOSED_MSG); msg_dbg(msg, ">PUSH-DATA>"); l_ptr->next_out = buf->next; - return TIPC_OK; + return 0; } else { msg_dbg(msg, "|PUSH-DATA|"); l_ptr->stats.bearer_congs++; @@ -1610,8 +1628,8 @@ void tipc_link_push_queue(struct link *l_ptr) do { res = tipc_link_push_packet(l_ptr); - } - while (res == TIPC_OK); + } while (!res); + if (res == PUSH_FAILED) tipc_bearer_schedule(l_ptr->b_ptr, l_ptr); } @@ -1651,7 +1669,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) struct tipc_msg *msg = buf_msg(buf); warn("Retransmission failure on link <%s>\n", l_ptr->name); - tipc_msg_print(TIPC_OUTPUT, msg, ">RETR-FAIL>"); + tipc_msg_dbg(TIPC_OUTPUT, msg, ">RETR-FAIL>"); if (l_ptr->addr) { @@ -1748,21 +1766,6 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0; } -/* - * link_recv_non_seq: Receive packets which are outside - * the link sequence flow - */ - -static void link_recv_non_seq(struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - - if (msg_user(msg) == LINK_CONFIG) - tipc_disc_recv_msg(buf); - else - tipc_bclink_recv_pkt(buf); -} - /** * link_insert_deferred_queue - insert deferred messages back into receive chain */ @@ -1839,7 +1842,7 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) { read_lock_bh(&tipc_net_lock); while (head) { - struct bearer *b_ptr; + struct bearer *b_ptr = (struct bearer *)tb_ptr; struct node *n_ptr; struct link *l_ptr; struct sk_buff *crs; @@ -1850,9 +1853,6 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) u32 released = 0; int type; - b_ptr = (struct bearer *)tb_ptr; - TIPC_SKB_CB(buf)->handle = b_ptr; - head = head->next; /* Ensure message is well-formed */ @@ -1871,7 +1871,10 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) msg = buf_msg(buf); if (unlikely(msg_non_seq(msg))) { - link_recv_non_seq(buf); + if (msg_user(msg) == LINK_CONFIG) + tipc_disc_recv_msg(buf, b_ptr); + else + tipc_bclink_recv_pkt(buf); continue; } @@ -1978,8 +1981,6 @@ deliver: if (link_recv_changeover_msg(&l_ptr, &buf)) { msg = buf_msg(buf); seq_no = msg_seqno(msg); - TIPC_SKB_CB(buf)->handle - = b_ptr; if (type == ORIGINAL_MSG) goto deliver; goto protocol_check; @@ -2263,7 +2264,8 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) switch (msg_type(msg)) { case RESET_MSG: - if (!link_working_unknown(l_ptr) && l_ptr->peer_session) { + if (!link_working_unknown(l_ptr) && + (l_ptr->peer_session != INVALID_SESSION)) { if (msg_session(msg) == l_ptr->peer_session) { dbg("Duplicate RESET: %u<->%u\n", msg_session(msg), l_ptr->peer_session); @@ -2424,7 +2426,7 @@ void tipc_link_changeover(struct link *l_ptr) } msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - ORIGINAL_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr); + ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); dbg("Link changeover requires %u tunnel messages\n", msgcount); @@ -2479,7 +2481,7 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel) struct tipc_msg tunnel_hdr; msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - DUPLICATE_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr); + DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); msg_set_msgcnt(&tunnel_hdr, l_ptr->out_queue_size); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); iter = l_ptr->first_out; @@ -2672,10 +2674,12 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) u32 pack_sz = link_max_pkt(l_ptr); u32 fragm_sz = pack_sz - INT_H_SIZE; u32 fragm_no = 1; - u32 destaddr = msg_destnode(inmsg); + u32 destaddr; if (msg_short(inmsg)) destaddr = l_ptr->addr; + else + destaddr = msg_destnode(inmsg); if (msg_routed(inmsg)) msg_set_prevnode(inmsg, tipc_own_addr); @@ -2683,7 +2687,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) /* Prepare reusable fragment header: */ msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - TIPC_OK, INT_H_SIZE, destaddr); + INT_H_SIZE, destaddr); msg_set_link_selector(&fragm_hdr, msg_link_selector(inmsg)); msg_set_long_msgno(&fragm_hdr, mod(l_ptr->long_msg_seq_no++)); msg_set_fragm_no(&fragm_hdr, fragm_no); @@ -2994,7 +2998,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space link_set_supervision_props(l_ptr, new_value); tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, new_value, 0, 0); - res = TIPC_OK; + res = 0; } break; case TIPC_CMD_SET_LINK_PRI: @@ -3003,14 +3007,14 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space l_ptr->priority = new_value; tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, new_value, 0); - res = TIPC_OK; + res = 0; } break; case TIPC_CMD_SET_LINK_WINDOW: if ((new_value >= TIPC_MIN_LINK_WIN) && (new_value <= TIPC_MAX_LINK_WIN)) { tipc_link_set_queue_limits(l_ptr, new_value); - res = TIPC_OK; + res = 0; } break; } @@ -3226,7 +3230,7 @@ int link_control(const char *name, u32 op, u32 val) if (op == TIPC_CMD_UNBLOCK_LINK) { l_ptr->blocked = 0; } - res = TIPC_OK; + res = 0; } tipc_node_unlock(node); } diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 696a8633df75..73dcd00d674e 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -41,7 +41,9 @@ #include "bearer.h" -void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str) +#ifdef CONFIG_TIPC_DEBUG + +void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) { u32 usr = msg_user(msg); tipc_printf(buf, str); @@ -228,13 +230,10 @@ void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str switch (usr) { case CONN_MANAGER: - case NAME_DISTRIBUTOR: case TIPC_LOW_IMPORTANCE: case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: - if (msg_short(msg)) - break; /* No error */ switch (msg_errcode(msg)) { case TIPC_OK: break; @@ -315,9 +314,11 @@ void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str } tipc_printf(buf, "\n"); if ((usr == CHANGEOVER_PROTOCOL) && (msg_msgcnt(msg))) { - tipc_msg_print(buf,msg_get_wrapped(msg)," /"); + tipc_msg_dbg(buf, msg_get_wrapped(msg), " /"); } if ((usr == MSG_FRAGMENTER) && (msg_type(msg) == FIRST_FRAGMENT)) { - tipc_msg_print(buf,msg_get_wrapped(msg)," /"); + tipc_msg_dbg(buf, msg_get_wrapped(msg), " /"); } } + +#endif diff --git a/net/tipc/msg.h b/net/tipc/msg.h index ad487e8abcc2..7ee6ae238147 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -2,7 +2,7 @@ * net/tipc/msg.h: Include file for TIPC message header routines * * Copyright (c) 2000-2007, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2008, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -75,6 +75,14 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w, m->hdr[w] |= htonl(val); } +static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b) +{ + u32 temp = msg->hdr[a]; + + msg->hdr[a] = msg->hdr[b]; + msg->hdr[b] = temp; +} + /* * Word 0 */ @@ -119,9 +127,9 @@ static inline int msg_non_seq(struct tipc_msg *m) return msg_bits(m, 0, 20, 1); } -static inline void msg_set_non_seq(struct tipc_msg *m) +static inline void msg_set_non_seq(struct tipc_msg *m, u32 n) { - msg_set_bits(m, 0, 20, 1, 1); + msg_set_bits(m, 0, 20, 1, n); } static inline int msg_dest_droppable(struct tipc_msg *m) @@ -224,6 +232,25 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n) msg_set_bits(m, 2, 0, 0xffff, n); } +/* + * TIPC may utilize the "link ack #" and "link seq #" fields of a short + * message header to hold the destination node for the message, since the + * normal "dest node" field isn't present. This cache is only referenced + * when required, so populating the cache of a longer message header is + * harmless (as long as the header has the two link sequence fields present). + * + * Note: Host byte order is OK here, since the info never goes off-card. + */ + +static inline u32 msg_destnode_cache(struct tipc_msg *m) +{ + return m->hdr[2]; +} + +static inline void msg_set_destnode_cache(struct tipc_msg *m, u32 dnode) +{ + m->hdr[2] = dnode; +} /* * Words 3-10 @@ -325,7 +352,7 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ w0:|vers |msg usr|hdr sz |n|resrv| packet size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w1:|m typ|rsv=0| sequence gap | broadcast ack no | + w1:|m typ| sequence gap | broadcast ack no | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ w2:| link level ack no/bc_gap_from | seq no / bcast_gap_to | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -388,12 +415,12 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) static inline u32 msg_seq_gap(struct tipc_msg *m) { - return msg_bits(m, 1, 16, 0xff); + return msg_bits(m, 1, 16, 0x1fff); } static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n) { - msg_set_bits(m, 1, 16, 0xff, n); + msg_set_bits(m, 1, 16, 0x1fff, n); } static inline u32 msg_req_links(struct tipc_msg *m) @@ -696,7 +723,7 @@ static inline u32 msg_tot_importance(struct tipc_msg *m) static inline void msg_init(struct tipc_msg *m, u32 user, u32 type, - u32 err, u32 hsize, u32 destnode) + u32 hsize, u32 destnode) { memset(m, 0, hsize); msg_set_version(m); @@ -705,7 +732,6 @@ static inline void msg_init(struct tipc_msg *m, u32 user, u32 type, msg_set_size(m, hsize); msg_set_prevnode(m, tipc_own_addr); msg_set_type(m, type); - msg_set_errcode(m, err); if (!msg_short(m)) { msg_set_orignode(m, tipc_own_addr); msg_set_destnode(m, destnode); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 39fd1619febf..10a69894e2fd 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -41,9 +41,6 @@ #include "msg.h" #include "name_distr.h" -#undef DBG_OUTPUT -#define DBG_OUTPUT NULL - #define ITEM_SIZE sizeof(struct distr_item) /** @@ -106,8 +103,7 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) if (buf != NULL) { msg = buf_msg(buf); - msg_init(msg, NAME_DISTRIBUTOR, type, TIPC_OK, - LONG_H_SIZE, dest); + msg_init(msg, NAME_DISTRIBUTOR, type, LONG_H_SIZE, dest); msg_set_size(msg, LONG_H_SIZE + size); } return buf; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index ac7dfdda7973..cd72e22b132b 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -2,7 +2,7 @@ * net/tipc/name_table.c: TIPC name table code * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2004-2008, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -52,9 +52,16 @@ static int tipc_nametbl_size = 1024; /* must be a power of 2 */ * struct sub_seq - container for all published instances of a name sequence * @lower: name sequence lower bound * @upper: name sequence upper bound - * @node_list: circular list of matching publications with >= node scope - * @cluster_list: circular list of matching publications with >= cluster scope - * @zone_list: circular list of matching publications with >= zone scope + * @node_list: circular list of publications made by own node + * @cluster_list: circular list of publications made by own cluster + * @zone_list: circular list of publications made by own zone + * @node_list_size: number of entries in "node_list" + * @cluster_list_size: number of entries in "cluster_list" + * @zone_list_size: number of entries in "zone_list" + * + * Note: The zone list always contains at least one entry, since all + * publications of the associated name sequence belong to it. + * (The cluster and node lists may be empty.) */ struct sub_seq { @@ -63,6 +70,9 @@ struct sub_seq { struct publication *node_list; struct publication *cluster_list; struct publication *zone_list; + u32 node_list_size; + u32 cluster_list_size; + u32 zone_list_size; }; /** @@ -74,7 +84,7 @@ struct sub_seq { * @first_free: array index of first unused sub-sequence entry * @ns_list: links to adjacent name sequences in hash chain * @subscriptions: list of subscriptions for this 'type' - * @lock: spinlock controlling access to name sequence structure + * @lock: spinlock controlling access to publication lists of all sub-sequences */ struct name_seq { @@ -317,6 +327,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, dbg("inserting publ %p, node=0x%x publ->node=0x%x, subscr->node=%p\n", publ, node, publ->node, publ->subscr.node); + sseq->zone_list_size++; if (!sseq->zone_list) sseq->zone_list = publ->zone_list_next = publ; else { @@ -325,6 +336,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, } if (in_own_cluster(node)) { + sseq->cluster_list_size++; if (!sseq->cluster_list) sseq->cluster_list = publ->cluster_list_next = publ; else { @@ -335,6 +347,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, } if (node == tipc_own_addr) { + sseq->node_list_size++; if (!sseq->node_list) sseq->node_list = publ->node_list_next = publ; else { @@ -411,6 +424,7 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i } else { sseq->zone_list = NULL; } + sseq->zone_list_size--; /* Remove publication from cluster scope list, if present */ @@ -439,6 +453,7 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i } else { sseq->cluster_list = NULL; } + sseq->cluster_list_size--; } end_cluster: @@ -469,6 +484,7 @@ end_cluster: } else { sseq->node_list = NULL; } + sseq->node_list_size--; } end_node: @@ -709,15 +725,18 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, if (sseq->lower > upper) break; - publ = sseq->cluster_list; - if (publ && (publ->scope <= limit)) + + publ = sseq->node_list; + if (publ) { do { - if (publ->node == tipc_own_addr) + if (publ->scope <= limit) tipc_port_list_add(dports, publ->ref); - else - res = 1; - publ = publ->cluster_list_next; - } while (publ != sseq->cluster_list); + publ = publ->node_list_next; + } while (publ != sseq->node_list); + } + + if (sseq->cluster_list_size != sseq->node_list_size) + res = 1; } spin_unlock_bh(&seq->lock); @@ -905,6 +924,9 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth, struct sub_seq *sseq; char typearea[11]; + if (seq->first_free == 0) + return; + sprintf(typearea, "%-10u", seq->type); if (depth == 1) { @@ -915,7 +937,9 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth, for (sseq = seq->sseqs; sseq != &seq->sseqs[seq->first_free]; sseq++) { if ((lowbound <= sseq->upper) && (upbound >= sseq->lower)) { tipc_printf(buf, "%s ", typearea); + spin_lock_bh(&seq->lock); subseq_list(sseq, buf, depth, index); + spin_unlock_bh(&seq->lock); sprintf(typearea, "%10s", " "); } } @@ -1050,15 +1074,12 @@ void tipc_nametbl_dump(void) int tipc_nametbl_init(void) { - int array_size = sizeof(struct hlist_head) * tipc_nametbl_size; - - table.types = kzalloc(array_size, GFP_ATOMIC); + table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head), + GFP_ATOMIC); if (!table.types) return -ENOMEM; - write_lock_bh(&tipc_nametbl_lock); table.local_publ_count = 0; - write_unlock_bh(&tipc_nametbl_lock); return 0; } diff --git a/net/tipc/net.c b/net/tipc/net.c index c39c76201e8e..ec7b04fbdc43 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -165,7 +165,7 @@ static int net_init(void) if (!tipc_net.zones) { return -ENOMEM; } - return TIPC_OK; + return 0; } static void net_stop(void) @@ -266,7 +266,7 @@ void tipc_net_route_msg(struct sk_buff *buf) tipc_link_send(buf, dnode, msg_link_selector(msg)); } -int tipc_net_start(void) +int tipc_net_start(u32 addr) { char addr_string[16]; int res; @@ -274,6 +274,10 @@ int tipc_net_start(void) if (tipc_mode != TIPC_NODE_MODE) return -ENOPROTOOPT; + tipc_subscr_stop(); + tipc_cfg_stop(); + + tipc_own_addr = addr; tipc_mode = TIPC_NET_MODE; tipc_named_reinit(); tipc_port_reinit(); @@ -284,14 +288,14 @@ int tipc_net_start(void) (res = tipc_bclink_init())) { return res; } - tipc_subscr_stop(); - tipc_cfg_stop(); + tipc_k_signal((Handler)tipc_subscr_start, 0); tipc_k_signal((Handler)tipc_cfg_init, 0); + info("Started in network mode\n"); info("Own node address %s, network identity %u\n", addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); - return TIPC_OK; + return 0; } void tipc_net_stop(void) diff --git a/net/tipc/net.h b/net/tipc/net.h index a6a0e9976ac9..d154ac2bda9a 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -58,7 +58,7 @@ void tipc_net_route_msg(struct sk_buff *buf); struct node *tipc_net_select_remote_node(u32 addr, u32 ref); u32 tipc_net_select_router(u32 addr, u32 ref); -int tipc_net_start(void); +int tipc_net_start(u32 addr); void tipc_net_stop(void); #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 6a7f7b4c2595..c387217bb230 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -2,7 +2,7 @@ * net/tipc/netlink.c: TIPC configuration handling * * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,15 +45,17 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) struct nlmsghdr *req_nlh = info->nlhdr; struct tipc_genlmsghdr *req_userhdr = info->userhdr; int hdr_space = NLMSG_SPACE(GENL_HDRLEN + TIPC_GENL_HDRLEN); + u16 cmd; if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) - rep_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN); + cmd = TIPC_CMD_NOT_NET_ADMIN; else - rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, - req_userhdr->cmd, - NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, - NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), - hdr_space); + cmd = req_userhdr->cmd; + + rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd, + NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, + NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), + hdr_space); if (rep_buf) { skb_push(rep_buf, hdr_space); diff --git a/net/tipc/node.c b/net/tipc/node.c index 598f4d3a0098..ee952ad60218 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -52,16 +52,40 @@ static void node_established_contact(struct node *n_ptr); struct node *tipc_nodes = NULL; /* sorted list of nodes within cluster */ +static DEFINE_SPINLOCK(node_create_lock); + u32 tipc_own_tag = 0; +/** + * tipc_node_create - create neighboring node + * + * Currently, this routine is called by neighbor discovery code, which holds + * net_lock for reading only. We must take node_create_lock to ensure a node + * isn't created twice if two different bearers discover the node at the same + * time. (It would be preferable to switch to holding net_lock in write mode, + * but this is a non-trivial change.) + */ + struct node *tipc_node_create(u32 addr) { struct cluster *c_ptr; struct node *n_ptr; struct node **curr_node; + spin_lock_bh(&node_create_lock); + + for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) { + if (addr < n_ptr->addr) + break; + if (addr == n_ptr->addr) { + spin_unlock_bh(&node_create_lock); + return n_ptr; + } + } + n_ptr = kzalloc(sizeof(*n_ptr),GFP_ATOMIC); if (!n_ptr) { + spin_unlock_bh(&node_create_lock); warn("Node creation failed, no memory\n"); return NULL; } @@ -71,6 +95,7 @@ struct node *tipc_node_create(u32 addr) c_ptr = tipc_cltr_create(addr); } if (!c_ptr) { + spin_unlock_bh(&node_create_lock); kfree(n_ptr); return NULL; } @@ -91,6 +116,7 @@ struct node *tipc_node_create(u32 addr) } } (*curr_node) = n_ptr; + spin_unlock_bh(&node_create_lock); return n_ptr; } @@ -574,12 +600,14 @@ u32 tipc_available_nodes(const u32 domain) struct node *n_ptr; u32 cnt = 0; + read_lock_bh(&tipc_net_lock); for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) { if (!in_scope(domain, n_ptr->addr)) continue; if (tipc_node_is_up(n_ptr)) cnt++; } + read_unlock_bh(&tipc_net_lock); return cnt; } @@ -599,19 +627,26 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (network address)"); - if (!tipc_nodes) + read_lock_bh(&tipc_net_lock); + if (!tipc_nodes) { + read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_none(); + } /* For now, get space for all other nodes (will need to modify this when slave nodes are supported */ payload_size = TLV_SPACE(sizeof(node_info)) * (tipc_max_nodes - 1); - if (payload_size > 32768u) + if (payload_size > 32768u) { + read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (too many nodes)"); + } buf = tipc_cfg_reply_alloc(payload_size); - if (!buf) + if (!buf) { + read_unlock_bh(&tipc_net_lock); return NULL; + } /* Add TLVs for all nodes in scope */ @@ -624,6 +659,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) &node_info, sizeof(node_info)); } + read_unlock_bh(&tipc_net_lock); return buf; } @@ -646,16 +682,22 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) if (tipc_mode != TIPC_NET_MODE) return tipc_cfg_reply_none(); + read_lock_bh(&tipc_net_lock); + /* Get space for all unicast links + multicast link */ payload_size = TLV_SPACE(sizeof(link_info)) * (tipc_net.zones[tipc_zone(tipc_own_addr)]->links + 1); - if (payload_size > 32768u) + if (payload_size > 32768u) { + read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (too many links)"); + } buf = tipc_cfg_reply_alloc(payload_size); - if (!buf) + if (!buf) { + read_unlock_bh(&tipc_net_lock); return NULL; + } /* Add TLV for broadcast link */ @@ -671,6 +713,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) if (!in_scope(domain, n_ptr->addr)) continue; + tipc_node_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { if (!n_ptr->links[i]) continue; @@ -680,7 +723,9 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); } + tipc_node_unlock(n_ptr); } + read_unlock_bh(&tipc_net_lock); return buf; } diff --git a/net/tipc/port.c b/net/tipc/port.c index 2f5806410c64..e70d27ea6578 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -2,7 +2,7 @@ * net/tipc/port.c: TIPC port code * * Copyright (c) 1992-2007, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems + * Copyright (c) 2004-2008, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -211,12 +211,12 @@ exit: } /** - * tipc_createport_raw - create a native TIPC port + * tipc_createport_raw - create a generic TIPC port * - * Returns local port reference + * Returns pointer to (locked) TIPC port, or NULL if unable to create it */ -u32 tipc_createport_raw(void *usr_handle, +struct tipc_port *tipc_createport_raw(void *usr_handle, u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), void (*wakeup)(struct tipc_port *), const u32 importance) @@ -228,26 +228,21 @@ u32 tipc_createport_raw(void *usr_handle, p_ptr = kzalloc(sizeof(*p_ptr), GFP_ATOMIC); if (!p_ptr) { warn("Port creation failed, no memory\n"); - return 0; + return NULL; } ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock); if (!ref) { warn("Port creation failed, reference table exhausted\n"); kfree(p_ptr); - return 0; + return NULL; } - tipc_port_lock(ref); p_ptr->publ.usr_handle = usr_handle; p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; p_ptr->publ.ref = ref; msg = &p_ptr->publ.phdr; - msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, - 0); - msg_set_orignode(msg, tipc_own_addr); - msg_set_prevnode(msg, tipc_own_addr); + msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0); msg_set_origport(msg, ref); - msg_set_importance(msg,importance); p_ptr->last_in_seqno = 41; p_ptr->sent = 1; INIT_LIST_HEAD(&p_ptr->wait_list); @@ -262,8 +257,7 @@ u32 tipc_createport_raw(void *usr_handle, INIT_LIST_HEAD(&p_ptr->port_list); list_add_tail(&p_ptr->port_list, &ports); spin_unlock_bh(&tipc_port_list_lock); - tipc_port_unlock(p_ptr); - return ref; + return &(p_ptr->publ); } int tipc_deleteport(u32 ref) @@ -297,7 +291,7 @@ int tipc_deleteport(u32 ref) kfree(p_ptr); dbg("Deleted port %u\n", ref); tipc_net_route_msg(buf); - return TIPC_OK; + return 0; } /** @@ -342,7 +336,7 @@ int tipc_portunreliable(u32 ref, unsigned int *isunreliable) return -EINVAL; *isunreliable = port_unreliable(p_ptr); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } int tipc_set_portunreliable(u32 ref, unsigned int isunreliable) @@ -354,7 +348,7 @@ int tipc_set_portunreliable(u32 ref, unsigned int isunreliable) return -EINVAL; msg_set_src_droppable(&p_ptr->publ.phdr, (isunreliable != 0)); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } static int port_unreturnable(struct port *p_ptr) @@ -371,7 +365,7 @@ int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable) return -EINVAL; *isunrejectable = port_unreturnable(p_ptr); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) @@ -383,7 +377,7 @@ int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) return -EINVAL; msg_set_dest_droppable(&p_ptr->publ.phdr, (isunrejectable != 0)); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } /* @@ -402,10 +396,10 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, buf = buf_acquire(LONG_H_SIZE); if (buf) { msg = buf_msg(buf); - msg_init(msg, usr, type, err, LONG_H_SIZE, destnode); + msg_init(msg, usr, type, LONG_H_SIZE, destnode); + msg_set_errcode(msg, err); msg_set_destport(msg, destport); msg_set_origport(msg, origport); - msg_set_destnode(msg, destnode); msg_set_orignode(msg, orignode); msg_set_transp_seqno(msg, seqno); msg_set_msgcnt(msg, ack); @@ -446,17 +440,19 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) return data_sz; } rmsg = buf_msg(rbuf); - msg_init(rmsg, imp, msg_type(msg), err, hdr_sz, msg_orignode(msg)); + msg_init(rmsg, imp, msg_type(msg), hdr_sz, msg_orignode(msg)); + msg_set_errcode(rmsg, err); msg_set_destport(rmsg, msg_origport(msg)); - msg_set_prevnode(rmsg, tipc_own_addr); msg_set_origport(rmsg, msg_destport(msg)); - if (msg_short(msg)) + if (msg_short(msg)) { msg_set_orignode(rmsg, tipc_own_addr); - else + /* leave name type & instance as zeroes */ + } else { msg_set_orignode(rmsg, msg_destnode(msg)); + msg_set_nametype(rmsg, msg_nametype(msg)); + msg_set_nameinst(rmsg, msg_nameinst(msg)); + } msg_set_size(rmsg, data_sz + hdr_sz); - msg_set_nametype(rmsg, msg_nametype(msg)); - msg_set_nameinst(rmsg, msg_nameinst(msg)); skb_copy_to_linear_data_offset(rbuf, hdr_sz, msg_data(msg), data_sz); /* send self-abort message when rejecting on a connected port */ @@ -778,6 +774,7 @@ void tipc_port_reinit(void) msg = &p_ptr->publ.phdr; if (msg_orignode(msg) == tipc_own_addr) break; + msg_set_prevnode(msg, tipc_own_addr); msg_set_orignode(msg, tipc_own_addr); } spin_unlock_bh(&tipc_port_list_lock); @@ -838,16 +835,13 @@ static void port_dispatcher_sigh(void *dummy) u32 peer_node = port_peernode(p_ptr); tipc_port_unlock(p_ptr); + if (unlikely(!cb)) + goto reject; if (unlikely(!connected)) { - if (unlikely(published)) + if (tipc_connect2port(dref, &orig)) goto reject; - tipc_connect2port(dref,&orig); - } - if (unlikely(msg_origport(msg) != peer_port)) - goto reject; - if (unlikely(msg_orignode(msg) != peer_node)) - goto reject; - if (unlikely(!cb)) + } else if ((msg_origport(msg) != peer_port) || + (msg_orignode(msg) != peer_node)) goto reject; if (unlikely(++p_ptr->publ.conn_unacked >= TIPC_FLOW_CONTROL_WIN)) @@ -862,9 +856,7 @@ static void port_dispatcher_sigh(void *dummy) tipc_msg_event cb = up_ptr->msg_cb; tipc_port_unlock(p_ptr); - if (unlikely(connected)) - goto reject; - if (unlikely(!cb)) + if (unlikely(!cb || connected)) goto reject; skb_pull(buf, msg_hdr_sz(msg)); cb(usr_handle, dref, &buf, msg_data(msg), @@ -877,11 +869,7 @@ static void port_dispatcher_sigh(void *dummy) tipc_named_msg_event cb = up_ptr->named_msg_cb; tipc_port_unlock(p_ptr); - if (unlikely(connected)) - goto reject; - if (unlikely(!cb)) - goto reject; - if (unlikely(!published)) + if (unlikely(!cb || connected || !published)) goto reject; dseq.type = msg_nametype(msg); dseq.lower = msg_nameinst(msg); @@ -908,11 +896,10 @@ err: u32 peer_node = port_peernode(p_ptr); tipc_port_unlock(p_ptr); - if (!connected || !cb) - break; - if (msg_origport(msg) != peer_port) + if (!cb || !connected) break; - if (msg_orignode(msg) != peer_node) + if ((msg_origport(msg) != peer_port) || + (msg_orignode(msg) != peer_node)) break; tipc_disconnect(dref); skb_pull(buf, msg_hdr_sz(msg)); @@ -924,7 +911,7 @@ err: tipc_msg_err_event cb = up_ptr->err_cb; tipc_port_unlock(p_ptr); - if (connected || !cb) + if (!cb || connected) break; skb_pull(buf, msg_hdr_sz(msg)); cb(usr_handle, dref, &buf, msg_data(msg), @@ -937,7 +924,7 @@ err: up_ptr->named_err_cb; tipc_port_unlock(p_ptr); - if (connected || !cb) + if (!cb || connected) break; dseq.type = msg_nametype(msg); dseq.lower = msg_nameinst(msg); @@ -976,7 +963,7 @@ static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf) tipc_k_signal((Handler)port_dispatcher_sigh, 0); } spin_unlock_bh(&queue_lock); - return TIPC_OK; + return 0; } /* @@ -1053,15 +1040,14 @@ int tipc_createport(u32 user_ref, { struct user_port *up_ptr; struct port *p_ptr; - u32 ref; up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC); if (!up_ptr) { warn("Port creation failed, no memory\n"); return -ENOMEM; } - ref = tipc_createport_raw(NULL, port_dispatcher, port_wakeup, importance); - p_ptr = tipc_port_lock(ref); + p_ptr = (struct port *)tipc_createport_raw(NULL, port_dispatcher, + port_wakeup, importance); if (!p_ptr) { kfree(up_ptr); return -ENOMEM; @@ -1081,16 +1067,15 @@ int tipc_createport(u32 user_ref, INIT_LIST_HEAD(&up_ptr->uport_list); tipc_reg_add_port(up_ptr); *portref = p_ptr->publ.ref; - dbg(" tipc_createport: %x with ref %u\n", p_ptr, p_ptr->publ.ref); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } int tipc_ownidentity(u32 ref, struct tipc_portid *id) { id->ref = ref; id->node = tipc_own_addr; - return TIPC_OK; + return 0; } int tipc_portimportance(u32 ref, unsigned int *importance) @@ -1102,7 +1087,7 @@ int tipc_portimportance(u32 ref, unsigned int *importance) return -EINVAL; *importance = (unsigned int)msg_importance(&p_ptr->publ.phdr); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } int tipc_set_portimportance(u32 ref, unsigned int imp) @@ -1117,7 +1102,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp) return -EINVAL; msg_set_importance(&p_ptr->publ.phdr, (u32)imp); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } @@ -1152,7 +1137,7 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) list_add(&publ->pport_list, &p_ptr->publications); p_ptr->pub_count++; p_ptr->publ.published = 1; - res = TIPC_OK; + res = 0; } exit: tipc_port_unlock(p_ptr); @@ -1175,7 +1160,7 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) tipc_nametbl_withdraw(publ->type, publ->lower, publ->ref, publ->key); } - res = TIPC_OK; + res = 0; } else { list_for_each_entry_safe(publ, tpubl, &p_ptr->publications, pport_list) { @@ -1189,7 +1174,7 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) break; tipc_nametbl_withdraw(publ->type, publ->lower, publ->ref, publ->key); - res = TIPC_OK; + res = 0; break; } } @@ -1233,7 +1218,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer) tipc_nodesub_subscribe(&p_ptr->subscription,peer->node, (void *)(unsigned long)ref, (net_ev_handler)port_handle_node_down); - res = TIPC_OK; + res = 0; exit: tipc_port_unlock(p_ptr); p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref); @@ -1255,7 +1240,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr) /* let timer expire on it's own to avoid deadlock! */ tipc_nodesub_unsubscribe( &((struct port *)tp_ptr)->subscription); - res = TIPC_OK; + res = 0; } else { res = -ENOTCONN; } @@ -1320,7 +1305,7 @@ int tipc_isconnected(u32 ref, int *isconnected) return -EINVAL; *isconnected = p_ptr->publ.connected; tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } int tipc_peer(u32 ref, struct tipc_portid *peer) @@ -1334,7 +1319,7 @@ int tipc_peer(u32 ref, struct tipc_portid *peer) if (p_ptr->publ.connected) { peer->ref = port_peerport(p_ptr); peer->node = port_peernode(p_ptr); - res = TIPC_OK; + res = 0; } else res = -ENOTCONN; tipc_port_unlock(p_ptr); diff --git a/net/tipc/ref.c b/net/tipc/ref.c index 89cbab24d08f..414fc34b8bea 100644 --- a/net/tipc/ref.c +++ b/net/tipc/ref.c @@ -123,7 +123,7 @@ int tipc_ref_table_init(u32 requested_size, u32 start) tipc_ref_table.index_mask = actual_size - 1; tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask; - return TIPC_OK; + return 0; } /** @@ -142,9 +142,13 @@ void tipc_ref_table_stop(void) /** * tipc_ref_acquire - create reference to an object * - * Return a unique reference value which can be translated back to the pointer - * 'object' at a later time. Also, pass back a pointer to the lock protecting - * the object, but without locking it. + * Register an object pointer in reference table and lock the object. + * Returns a unique reference value that is used from then on to retrieve the + * object pointer, or to determine that the object has been deregistered. + * + * Note: The object is returned in the locked state so that the caller can + * register a partially initialized object, without running the risk that + * the object will be accessed before initialization is complete. */ u32 tipc_ref_acquire(void *object, spinlock_t **lock) @@ -178,13 +182,13 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock) ref = (next_plus_upper & ~index_mask) + index; entry->ref = ref; entry->object = object; - spin_unlock_bh(&entry->lock); *lock = &entry->lock; } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { index = tipc_ref_table.init_point++; entry = &(tipc_ref_table.entries[index]); spin_lock_init(&entry->lock); + spin_lock_bh(&entry->lock); ref = tipc_ref_table.start_mask + index; entry->ref = ref; entry->object = object; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 230f9ca2ad6b..1848693ebb82 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2,7 +2,7 @@ * net/tipc/socket.c: TIPC socket API * * Copyright (c) 2001-2007, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems + * Copyright (c) 2004-2008, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -63,6 +63,7 @@ struct tipc_sock { struct sock sk; struct tipc_port *p; + struct tipc_portid peer_name; }; #define tipc_sk(sk) ((struct tipc_sock *)(sk)) @@ -188,7 +189,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol) const struct proto_ops *ops; socket_state state; struct sock *sk; - u32 portref; + struct tipc_port *tp_ptr; /* Validate arguments */ @@ -224,9 +225,9 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol) /* Allocate TIPC port for socket to use */ - portref = tipc_createport_raw(sk, &dispatch, &wakeupdispatch, - TIPC_LOW_IMPORTANCE); - if (unlikely(portref == 0)) { + tp_ptr = tipc_createport_raw(sk, &dispatch, &wakeupdispatch, + TIPC_LOW_IMPORTANCE); + if (unlikely(!tp_ptr)) { sk_free(sk); return -ENOMEM; } @@ -239,12 +240,14 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol) sock_init_data(sock, sk); sk->sk_rcvtimeo = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT); sk->sk_backlog_rcv = backlog_rcv; - tipc_sk(sk)->p = tipc_get_port(portref); + tipc_sk(sk)->p = tp_ptr; + + spin_unlock_bh(tp_ptr->lock); if (sock->state == SS_READY) { - tipc_set_portunreturnable(portref, 1); + tipc_set_portunreturnable(tp_ptr->ref, 1); if (sock->type == SOCK_DGRAM) - tipc_set_portunreliable(portref, 1); + tipc_set_portunreliable(tp_ptr->ref, 1); } atomic_inc(&tipc_user_count); @@ -375,27 +378,29 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) * @sock: socket structure * @uaddr: area for returned socket address * @uaddr_len: area for returned length of socket address - * @peer: 0 to obtain socket name, 1 to obtain peer socket name + * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID * * Returns 0 on success, errno otherwise * - * NOTE: This routine doesn't need to take the socket lock since it doesn't - * access any non-constant socket information. + * NOTE: This routine doesn't need to take the socket lock since it only + * accesses socket information that is unchanging (or which changes in + * a completely predictable manner). */ static int get_name(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - u32 portref = tipc_sk_port(sock->sk)->ref; - u32 res; + struct tipc_sock *tsock = tipc_sk(sock->sk); if (peer) { - res = tipc_peer(portref, &addr->addr.id); - if (res) - return res; + if ((sock->state != SS_CONNECTED) && + ((peer != 2) || (sock->state != SS_DISCONNECTING))) + return -ENOTCONN; + addr->addr.id.ref = tsock->peer_name.ref; + addr->addr.id.node = tsock->peer_name.node; } else { - tipc_ownidentity(portref, &addr->addr.id); + tipc_ownidentity(tsock->p->ref, &addr->addr.id); } *uaddr_len = sizeof(*addr); @@ -764,18 +769,17 @@ exit: static int auto_connect(struct socket *sock, struct tipc_msg *msg) { - struct tipc_port *tport = tipc_sk_port(sock->sk); - struct tipc_portid peer; + struct tipc_sock *tsock = tipc_sk(sock->sk); if (msg_errcode(msg)) { sock->state = SS_DISCONNECTING; return -ECONNREFUSED; } - peer.ref = msg_origport(msg); - peer.node = msg_orignode(msg); - tipc_connect2port(tport->ref, &peer); - tipc_set_portimportance(tport->ref, msg_importance(msg)); + tsock->peer_name.ref = msg_origport(msg); + tsock->peer_name.node = msg_orignode(msg); + tipc_connect2port(tsock->p->ref, &tsock->peer_name); + tipc_set_portimportance(tsock->p->ref, msg_importance(msg)); sock->state = SS_CONNECTED; return 0; } @@ -1131,7 +1135,7 @@ restart: /* Loop around if more data is required */ if ((sz_copied < buf_len) /* didn't get all requested data */ - && (!skb_queue_empty(&sock->sk->sk_receive_queue) || + && (!skb_queue_empty(&sk->sk_receive_queue) || (flags & MSG_WAITALL)) /* ... and more is ready or required */ && (!(flags & MSG_PEEK)) /* ... and aren't just peeking at data */ @@ -1527,9 +1531,9 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_create(sock_net(sock->sk), new_sock, 0); if (!res) { struct sock *new_sk = new_sock->sk; - struct tipc_port *new_tport = tipc_sk_port(new_sk); + struct tipc_sock *new_tsock = tipc_sk(new_sk); + struct tipc_port *new_tport = new_tsock->p; u32 new_ref = new_tport->ref; - struct tipc_portid id; struct tipc_msg *msg = buf_msg(buf); lock_sock(new_sk); @@ -1543,9 +1547,9 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) /* Connect new socket to it's peer */ - id.ref = msg_origport(msg); - id.node = msg_orignode(msg); - tipc_connect2port(new_ref, &id); + new_tsock->peer_name.ref = msg_origport(msg); + new_tsock->peer_name.node = msg_orignode(msg); + tipc_connect2port(new_ref, &new_tsock->peer_name); new_sock->state = SS_CONNECTED; tipc_set_portimportance(new_ref, msg_importance(msg)); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 8c01ccd3626c..0326d3060bc7 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -1,8 +1,8 @@ /* - * net/tipc/subscr.c: TIPC subscription service + * net/tipc/subscr.c: TIPC network topology service * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,27 +36,24 @@ #include "core.h" #include "dbg.h" -#include "subscr.h" #include "name_table.h" +#include "port.h" #include "ref.h" +#include "subscr.h" /** * struct subscriber - TIPC network topology subscriber - * @ref: object reference to subscriber object itself - * @lock: pointer to spinlock controlling access to subscriber object + * @port_ref: object reference to server port connecting to subscriber + * @lock: pointer to spinlock controlling access to subscriber's server port * @subscriber_list: adjacent subscribers in top. server's list of subscribers * @subscription_list: list of subscription objects for this subscriber - * @port_ref: object reference to port used to communicate with subscriber - * @swap: indicates if subscriber uses opposite endianness in its messages */ struct subscriber { - u32 ref; + u32 port_ref; spinlock_t *lock; struct list_head subscriber_list; struct list_head subscription_list; - u32 port_ref; - int swap; }; /** @@ -88,13 +85,14 @@ static struct top_srv topsrv = { 0 }; static u32 htohl(u32 in, int swap) { - char *c = (char *)∈ - - return swap ? ((c[3] << 3) + (c[2] << 2) + (c[1] << 1) + c[0]) : in; + return swap ? (u32)___constant_swab32(in) : in; } /** * subscr_send_event - send a message containing a tipc_event to the subscriber + * + * Note: Must not hold subscriber's server port lock, since tipc_send() will + * try to take the lock if the message is rejected and returned! */ static void subscr_send_event(struct subscription *sub, @@ -109,12 +107,12 @@ static void subscr_send_event(struct subscription *sub, msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); - sub->evt.event = htohl(event, sub->owner->swap); - sub->evt.found_lower = htohl(found_lower, sub->owner->swap); - sub->evt.found_upper = htohl(found_upper, sub->owner->swap); - sub->evt.port.ref = htohl(port_ref, sub->owner->swap); - sub->evt.port.node = htohl(node, sub->owner->swap); - tipc_send(sub->owner->port_ref, 1, &msg_sect); + sub->evt.event = htohl(event, sub->swap); + sub->evt.found_lower = htohl(found_lower, sub->swap); + sub->evt.found_upper = htohl(found_upper, sub->swap); + sub->evt.port.ref = htohl(port_ref, sub->swap); + sub->evt.port.node = htohl(node, sub->swap); + tipc_send(sub->server_ref, 1, &msg_sect); } /** @@ -151,13 +149,12 @@ void tipc_subscr_report_overlap(struct subscription *sub, u32 node, int must) { - dbg("Rep overlap %u:%u,%u<->%u,%u\n", sub->seq.type, sub->seq.lower, - sub->seq.upper, found_lower, found_upper); if (!tipc_subscr_overlap(sub, found_lower, found_upper)) return; if (!must && !(sub->filter & TIPC_SUB_PORTS)) return; - subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); + + sub->event_cb(sub, found_lower, found_upper, event, port_ref, node); } /** @@ -166,20 +163,18 @@ void tipc_subscr_report_overlap(struct subscription *sub, static void subscr_timeout(struct subscription *sub) { - struct subscriber *subscriber; - u32 subscriber_ref; + struct port *server_port; - /* Validate subscriber reference (in case subscriber is terminating) */ + /* Validate server port reference (in case subscriber is terminating) */ - subscriber_ref = sub->owner->ref; - subscriber = (struct subscriber *)tipc_ref_lock(subscriber_ref); - if (subscriber == NULL) + server_port = tipc_port_lock(sub->server_ref); + if (server_port == NULL) return; /* Validate timeout (in case subscription is being cancelled) */ if (sub->timeout == TIPC_WAIT_FOREVER) { - tipc_ref_unlock(subscriber_ref); + tipc_port_unlock(server_port); return; } @@ -187,19 +182,21 @@ static void subscr_timeout(struct subscription *sub) tipc_nametbl_unsubscribe(sub); - /* Notify subscriber of timeout, then unlink subscription */ + /* Unlink subscription from subscriber */ - subscr_send_event(sub, - sub->evt.s.seq.lower, - sub->evt.s.seq.upper, - TIPC_SUBSCR_TIMEOUT, - 0, - 0); list_del(&sub->subscription_list); + /* Release subscriber's server port */ + + tipc_port_unlock(server_port); + + /* Notify subscriber of timeout */ + + subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, + TIPC_SUBSCR_TIMEOUT, 0, 0); + /* Now destroy subscription */ - tipc_ref_unlock(subscriber_ref); k_term_timer(&sub->timer); kfree(sub); atomic_dec(&topsrv.subscription_count); @@ -208,7 +205,7 @@ static void subscr_timeout(struct subscription *sub) /** * subscr_del - delete a subscription within a subscription list * - * Called with subscriber locked. + * Called with subscriber port locked. */ static void subscr_del(struct subscription *sub) @@ -222,7 +219,7 @@ static void subscr_del(struct subscription *sub) /** * subscr_terminate - terminate communication with a subscriber * - * Called with subscriber locked. Routine must temporarily release this lock + * Called with subscriber port locked. Routine must temporarily release lock * to enable subscription timeout routine(s) to finish without deadlocking; * the lock is then reclaimed to allow caller to release it upon return. * (This should work even in the unlikely event some other thread creates @@ -232,14 +229,21 @@ static void subscr_del(struct subscription *sub) static void subscr_terminate(struct subscriber *subscriber) { + u32 port_ref; struct subscription *sub; struct subscription *sub_temp; /* Invalidate subscriber reference */ - tipc_ref_discard(subscriber->ref); + port_ref = subscriber->port_ref; + subscriber->port_ref = 0; spin_unlock_bh(subscriber->lock); + /* Sever connection to subscriber */ + + tipc_shutdown(port_ref); + tipc_deleteport(port_ref); + /* Destroy any existing subscriptions for subscriber */ list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, @@ -253,27 +257,25 @@ static void subscr_terminate(struct subscriber *subscriber) subscr_del(sub); } - /* Sever connection to subscriber */ - - tipc_shutdown(subscriber->port_ref); - tipc_deleteport(subscriber->port_ref); - /* Remove subscriber from topology server's subscriber list */ spin_lock_bh(&topsrv.lock); list_del(&subscriber->subscriber_list); spin_unlock_bh(&topsrv.lock); - /* Now destroy subscriber */ + /* Reclaim subscriber lock */ spin_lock_bh(subscriber->lock); + + /* Now destroy subscriber */ + kfree(subscriber); } /** * subscr_cancel - handle subscription cancellation request * - * Called with subscriber locked. Routine must temporarily release this lock + * Called with subscriber port locked. Routine must temporarily release lock * to enable the subscription timeout routine to finish without deadlocking; * the lock is then reclaimed to allow caller to release it upon return. * @@ -316,27 +318,25 @@ static void subscr_cancel(struct tipc_subscr *s, /** * subscr_subscribe - create subscription for subscriber * - * Called with subscriber locked + * Called with subscriber port locked. */ -static void subscr_subscribe(struct tipc_subscr *s, - struct subscriber *subscriber) +static struct subscription *subscr_subscribe(struct tipc_subscr *s, + struct subscriber *subscriber) { struct subscription *sub; + int swap; - /* Determine/update subscriber's endianness */ + /* Determine subscriber's endianness */ - if (s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE)) - subscriber->swap = 0; - else - subscriber->swap = 1; + swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE)); /* Detect & process a subscription cancellation request */ - if (s->filter & htohl(TIPC_SUB_CANCEL, subscriber->swap)) { - s->filter &= ~htohl(TIPC_SUB_CANCEL, subscriber->swap); + if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { + s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); subscr_cancel(s, subscriber); - return; + return NULL; } /* Refuse subscription if global limit exceeded */ @@ -345,63 +345,66 @@ static void subscr_subscribe(struct tipc_subscr *s, warn("Subscription rejected, subscription limit reached (%u)\n", tipc_max_subscriptions); subscr_terminate(subscriber); - return; + return NULL; } /* Allocate subscription object */ - sub = kzalloc(sizeof(*sub), GFP_ATOMIC); + sub = kmalloc(sizeof(*sub), GFP_ATOMIC); if (!sub) { warn("Subscription rejected, no memory\n"); subscr_terminate(subscriber); - return; + return NULL; } /* Initialize subscription object */ - sub->seq.type = htohl(s->seq.type, subscriber->swap); - sub->seq.lower = htohl(s->seq.lower, subscriber->swap); - sub->seq.upper = htohl(s->seq.upper, subscriber->swap); - sub->timeout = htohl(s->timeout, subscriber->swap); - sub->filter = htohl(s->filter, subscriber->swap); + sub->seq.type = htohl(s->seq.type, swap); + sub->seq.lower = htohl(s->seq.lower, swap); + sub->seq.upper = htohl(s->seq.upper, swap); + sub->timeout = htohl(s->timeout, swap); + sub->filter = htohl(s->filter, swap); if ((!(sub->filter & TIPC_SUB_PORTS) == !(sub->filter & TIPC_SUB_SERVICE)) || (sub->seq.lower > sub->seq.upper)) { warn("Subscription rejected, illegal request\n"); kfree(sub); subscr_terminate(subscriber); - return; + return NULL; } - memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); - INIT_LIST_HEAD(&sub->subscription_list); + sub->event_cb = subscr_send_event; INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); + sub->server_ref = subscriber->port_ref; + sub->swap = swap; + memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); atomic_inc(&topsrv.subscription_count); if (sub->timeout != TIPC_WAIT_FOREVER) { k_init_timer(&sub->timer, (Handler)subscr_timeout, (unsigned long)sub); k_start_timer(&sub->timer, sub->timeout); } - sub->owner = subscriber; - tipc_nametbl_subscribe(sub); + + return sub; } /** * subscr_conn_shutdown_event - handle termination request from subscriber + * + * Called with subscriber's server port unlocked. */ static void subscr_conn_shutdown_event(void *usr_handle, - u32 portref, + u32 port_ref, struct sk_buff **buf, unsigned char const *data, unsigned int size, int reason) { - struct subscriber *subscriber; + struct subscriber *subscriber = usr_handle; spinlock_t *subscriber_lock; - subscriber = tipc_ref_lock((u32)(unsigned long)usr_handle); - if (subscriber == NULL) + if (tipc_port_lock(port_ref) == NULL) return; subscriber_lock = subscriber->lock; @@ -411,6 +414,8 @@ static void subscr_conn_shutdown_event(void *usr_handle, /** * subscr_conn_msg_event - handle new subscription request from subscriber + * + * Called with subscriber's server port unlocked. */ static void subscr_conn_msg_event(void *usr_handle, @@ -419,20 +424,46 @@ static void subscr_conn_msg_event(void *usr_handle, const unchar *data, u32 size) { - struct subscriber *subscriber; + struct subscriber *subscriber = usr_handle; spinlock_t *subscriber_lock; + struct subscription *sub; + + /* + * Lock subscriber's server port (& make a local copy of lock pointer, + * in case subscriber is deleted while processing subscription request) + */ - subscriber = tipc_ref_lock((u32)(unsigned long)usr_handle); - if (subscriber == NULL) + if (tipc_port_lock(port_ref) == NULL) return; subscriber_lock = subscriber->lock; - if (size != sizeof(struct tipc_subscr)) - subscr_terminate(subscriber); - else - subscr_subscribe((struct tipc_subscr *)data, subscriber); - spin_unlock_bh(subscriber_lock); + if (size != sizeof(struct tipc_subscr)) { + subscr_terminate(subscriber); + spin_unlock_bh(subscriber_lock); + } else { + sub = subscr_subscribe((struct tipc_subscr *)data, subscriber); + spin_unlock_bh(subscriber_lock); + if (sub != NULL) { + + /* + * We must release the server port lock before adding a + * subscription to the name table since TIPC needs to be + * able to (re)acquire the port lock if an event message + * issued by the subscription process is rejected and + * returned. The subscription cannot be deleted while + * it is being added to the name table because: + * a) the single-threading of the native API port code + * ensures the subscription cannot be cancelled and + * the subscriber connection cannot be broken, and + * b) the name table lock ensures the subscription + * timeout code cannot delete the subscription, + * so the subscription object is still protected. + */ + + tipc_nametbl_subscribe(sub); + } + } } /** @@ -448,16 +479,10 @@ static void subscr_named_msg_event(void *usr_handle, struct tipc_portid const *orig, struct tipc_name_seq const *dest) { - struct subscriber *subscriber; - struct iovec msg_sect = {NULL, 0}; - spinlock_t *subscriber_lock; + static struct iovec msg_sect = {NULL, 0}; - dbg("subscr_named_msg_event: orig = %x own = %x,\n", - orig->node, tipc_own_addr); - if (size && (size != sizeof(struct tipc_subscr))) { - warn("Subscriber rejected, invalid subscription size\n"); - return; - } + struct subscriber *subscriber; + u32 server_port_ref; /* Create subscriber object */ @@ -468,17 +493,11 @@ static void subscr_named_msg_event(void *usr_handle, } INIT_LIST_HEAD(&subscriber->subscription_list); INIT_LIST_HEAD(&subscriber->subscriber_list); - subscriber->ref = tipc_ref_acquire(subscriber, &subscriber->lock); - if (subscriber->ref == 0) { - warn("Subscriber rejected, reference table exhausted\n"); - kfree(subscriber); - return; - } - /* Establish a connection to subscriber */ + /* Create server port & establish connection to subscriber */ tipc_createport(topsrv.user_ref, - (void *)(unsigned long)subscriber->ref, + subscriber, importance, NULL, NULL, @@ -490,32 +509,36 @@ static void subscr_named_msg_event(void *usr_handle, &subscriber->port_ref); if (subscriber->port_ref == 0) { warn("Subscriber rejected, unable to create port\n"); - tipc_ref_discard(subscriber->ref); kfree(subscriber); return; } tipc_connect2port(subscriber->port_ref, orig); + /* Lock server port (& save lock address for future use) */ + + subscriber->lock = tipc_port_lock(subscriber->port_ref)->publ.lock; /* Add subscriber to topology server's subscriber list */ - tipc_ref_lock(subscriber->ref); spin_lock_bh(&topsrv.lock); list_add(&subscriber->subscriber_list, &topsrv.subscriber_list); spin_unlock_bh(&topsrv.lock); - /* - * Subscribe now if message contains a subscription, - * otherwise send an empty response to complete connection handshaking - */ + /* Unlock server port */ - subscriber_lock = subscriber->lock; - if (size) - subscr_subscribe((struct tipc_subscr *)data, subscriber); - else - tipc_send(subscriber->port_ref, 1, &msg_sect); + server_port_ref = subscriber->port_ref; + spin_unlock_bh(subscriber->lock); - spin_unlock_bh(subscriber_lock); + /* Send an ACK- to complete connection handshaking */ + + tipc_send(server_port_ref, 1, &msg_sect); + + /* Handle optional subscription request */ + + if (size != 0) { + subscr_conn_msg_event(subscriber, server_port_ref, + buf, data, size); + } } int tipc_subscr_start(void) @@ -574,8 +597,8 @@ void tipc_subscr_stop(void) list_for_each_entry_safe(subscriber, subscriber_temp, &topsrv.subscriber_list, subscriber_list) { - tipc_ref_lock(subscriber->ref); subscriber_lock = subscriber->lock; + spin_lock_bh(subscriber_lock); subscr_terminate(subscriber); spin_unlock_bh(subscriber_lock); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 93a8e674fac1..45d89bf4d202 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -1,8 +1,8 @@ /* - * net/tipc/subscr.h: Include file for TIPC subscription service + * net/tipc/subscr.h: Include file for TIPC network topology service * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,34 +37,44 @@ #ifndef _TIPC_SUBSCR_H #define _TIPC_SUBSCR_H +struct subscription; + +typedef void (*tipc_subscr_event) (struct subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port_ref, u32 node); + /** * struct subscription - TIPC network topology subscription object * @seq: name sequence associated with subscription * @timeout: duration of subscription (in ms) * @filter: event filtering to be done for subscription - * @evt: template for events generated by subscription - * @subscription_list: adjacent subscriptions in subscriber's subscription list + * @event_cb: routine invoked when a subscription event is detected + * @timer: timer governing subscription duration (optional) * @nameseq_list: adjacent subscriptions in name sequence's subscription list - * @timer_ref: reference to timer governing subscription duration (may be NULL) - * @owner: pointer to subscriber object associated with this subscription + * @subscription_list: adjacent subscriptions in subscriber's subscription list + * @server_ref: object reference of server port associated with subscription + * @swap: indicates if subscriber uses opposite endianness in its messages + * @evt: template for events generated by subscription */ struct subscription { struct tipc_name_seq seq; u32 timeout; u32 filter; - struct tipc_event evt; - struct list_head subscription_list; - struct list_head nameseq_list; + tipc_subscr_event event_cb; struct timer_list timer; - struct subscriber *owner; + struct list_head nameseq_list; + struct list_head subscription_list; + u32 server_ref; + int swap; + struct tipc_event evt; }; -int tipc_subscr_overlap(struct subscription * sub, +int tipc_subscr_overlap(struct subscription *sub, u32 found_lower, u32 found_upper); -void tipc_subscr_report_overlap(struct subscription * sub, +void tipc_subscr_report_overlap(struct subscription *sub, u32 found_lower, u32 found_upper, u32 event, diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c index 4146c40cd20b..506928803162 100644 --- a/net/tipc/user_reg.c +++ b/net/tipc/user_reg.c @@ -91,7 +91,7 @@ static int reg_init(void) } } spin_unlock_bh(®_lock); - return users ? TIPC_OK : -ENOMEM; + return users ? 0 : -ENOMEM; } /** @@ -129,7 +129,7 @@ int tipc_reg_start(void) tipc_k_signal((Handler)reg_callback, (unsigned long)&users[u]); } - return TIPC_OK; + return 0; } /** @@ -184,7 +184,7 @@ int tipc_attach(u32 *userid, tipc_mode_event cb, void *usr_handle) if (cb && (tipc_mode != TIPC_NOT_RUNNING)) tipc_k_signal((Handler)reg_callback, (unsigned long)user_ptr); - return TIPC_OK; + return 0; } /** @@ -230,7 +230,7 @@ int tipc_reg_add_port(struct user_port *up_ptr) struct tipc_user *user_ptr; if (up_ptr->user_ref == 0) - return TIPC_OK; + return 0; if (up_ptr->user_ref > MAX_USERID) return -EINVAL; if ((tipc_mode == TIPC_NOT_RUNNING) || !users ) @@ -240,7 +240,7 @@ int tipc_reg_add_port(struct user_port *up_ptr) user_ptr = &users[up_ptr->user_ref]; list_add(&up_ptr->uport_list, &user_ptr->ports); spin_unlock_bh(®_lock); - return TIPC_OK; + return 0; } /** @@ -250,7 +250,7 @@ int tipc_reg_add_port(struct user_port *up_ptr) int tipc_reg_remove_port(struct user_port *up_ptr) { if (up_ptr->user_ref == 0) - return TIPC_OK; + return 0; if (up_ptr->user_ref > MAX_USERID) return -EINVAL; if (!users ) @@ -259,6 +259,6 @@ int tipc_reg_remove_port(struct user_port *up_ptr) spin_lock_bh(®_lock); list_del_init(&up_ptr->uport_list); spin_unlock_bh(®_lock); - return TIPC_OK; + return 0; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e18cd3628db4..015606b54d9b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -8,8 +8,6 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $ - * * Fixes: * Linus Torvalds : Assorted bug cures. * Niibe Yutaka : async I/O support. @@ -169,6 +167,11 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk) return (unix_peer(osk) == NULL || unix_our_peer(sk, osk)); } +static inline int unix_recvq_full(struct sock const *sk) +{ + return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; +} + static struct sock *unix_peer_get(struct sock *s) { struct sock *peer; @@ -224,7 +227,7 @@ static void __unix_remove_socket(struct sock *sk) static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) { - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); sk_add_node(sk, list); } @@ -347,9 +350,9 @@ static void unix_sock_destructor(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(sk_unhashed(sk)); - BUG_TRAP(!sk->sk_socket); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(!sk_unhashed(sk)); + WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive unix socket: %p\n", sk); return; @@ -482,6 +485,8 @@ static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int); static int unix_getname(struct socket *, struct sockaddr *, int *, int); static unsigned int unix_poll(struct file *, struct socket *, poll_table *); +static unsigned int unix_dgram_poll(struct file *, struct socket *, + poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct kiocb *, struct socket *, @@ -527,7 +532,7 @@ static const struct proto_ops unix_dgram_ops = { .socketpair = unix_socketpair, .accept = sock_no_accept, .getname = unix_getname, - .poll = datagram_poll, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = sock_no_listen, .shutdown = unix_shutdown, @@ -548,7 +553,7 @@ static const struct proto_ops unix_seqpacket_ops = { .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, - .poll = datagram_poll, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, @@ -598,7 +603,7 @@ static struct sock * unix_create1(struct net *net, struct socket *sock) u->dentry = NULL; u->mnt = NULL; spin_lock_init(&u->lock); - atomic_set(&u->inflight, 0); + atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); @@ -983,8 +988,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo) sched = !sock_flag(other, SOCK_DEAD) && !(other->sk_shutdown & RCV_SHUTDOWN) && - (skb_queue_len(&other->sk_receive_queue) > - other->sk_max_ack_backlog); + unix_recvq_full(other); unix_state_unlock(other); @@ -1058,8 +1062,7 @@ restart: if (other->sk_state != TCP_LISTEN) goto out_unlock; - if (skb_queue_len(&other->sk_receive_queue) > - other->sk_max_ack_backlog) { + if (unix_recvq_full(other)) { err = -EAGAIN; if (!timeo) goto out_unlock; @@ -1428,9 +1431,7 @@ restart: goto out_unlock; } - if (unix_peer(other) != sk && - (skb_queue_len(&other->sk_receive_queue) > - other->sk_max_ack_backlog)) { + if (unix_peer(other) != sk && unix_recvq_full(other)) { if (!timeo) { err = -EAGAIN; goto out_unlock; @@ -1991,6 +1992,60 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl return mask; } +static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk, *other; + unsigned int mask, writable; + + poll_wait(file, sk->sk_sleep, wait); + mask = 0; + + /* exceptional events? */ + if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + mask |= POLLERR; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLRDHUP; + if (sk->sk_shutdown == SHUTDOWN_MASK) + mask |= POLLHUP; + + /* readable? */ + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) + mask |= POLLIN | POLLRDNORM; + + /* Connection-based need to check for termination and startup */ + if (sk->sk_type == SOCK_SEQPACKET) { + if (sk->sk_state == TCP_CLOSE) + mask |= POLLHUP; + /* connection hasn't started yet? */ + if (sk->sk_state == TCP_SYN_SENT) + return mask; + } + + /* writable? */ + writable = unix_writable(sk); + if (writable) { + other = unix_peer_get(sk); + if (other) { + if (unix_peer(other) != sk) { + poll_wait(file, &unix_sk(other)->peer_wait, + wait); + if (unix_recvq_full(other)) + writable = 0; + } + + sock_put(other); + } + } + + if (writable) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + else + set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + + return mask; +} #ifdef CONFIG_PROC_FS static struct sock *first_unix_socket(int *i) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index ebdff3d877a1..2a27b84f740b 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -127,7 +127,7 @@ void unix_inflight(struct file *fp) if(s) { struct unix_sock *u = unix_sk(s); spin_lock(&unix_gc_lock); - if (atomic_inc_return(&u->inflight) == 1) { + if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); } else { @@ -145,7 +145,7 @@ void unix_notinflight(struct file *fp) struct unix_sock *u = unix_sk(s); spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); - if (atomic_dec_and_test(&u->inflight)) + if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; spin_unlock(&unix_gc_lock); @@ -237,17 +237,17 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), static void dec_inflight(struct unix_sock *usk) { - atomic_dec(&usk->inflight); + atomic_long_dec(&usk->inflight); } static void inc_inflight(struct unix_sock *usk) { - atomic_inc(&usk->inflight); + atomic_long_inc(&usk->inflight); } static void inc_inflight_move_tail(struct unix_sock *u) { - atomic_inc(&u->inflight); + atomic_long_inc(&u->inflight); /* * If this is still a candidate, move it to the end of the * list, so that it's checked even if it was already passed @@ -288,11 +288,11 @@ void unix_gc(void) * before the detach without atomicity guarantees. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { - int total_refs; - int inflight_refs; + long total_refs; + long inflight_refs; total_refs = file_count(u->sk.sk_socket->file); - inflight_refs = atomic_read(&u->inflight); + inflight_refs = atomic_long_read(&u->inflight); BUG_ON(inflight_refs < 1); BUG_ON(total_refs < inflight_refs); @@ -324,7 +324,7 @@ void unix_gc(void) /* Move cursor to after the current position. */ list_move(&cursor, &u->link); - if (atomic_read(&u->inflight) > 0) { + if (atomic_long_read(&u->inflight) > 0) { list_move_tail(&u->link, &gc_inflight_list); u->gc_candidate = 0; scan_children(&u->sk, inc_inflight_move_tail, NULL); diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig index 1debe1cb054e..61ceae0b9566 100644 --- a/net/wanrouter/Kconfig +++ b/net/wanrouter/Kconfig @@ -20,8 +20,6 @@ config WAN_ROUTER wish to use your Linux box as a WAN router, say Y here and also to the WAN driver for your card, below. You will then need the wan-tools package which is available from <ftp://ftp.sangoma.com/>. - Read <file:Documentation/networking/wan-router.txt> for more - information. To compile WAN routing support as a module, choose M here: the module will be called wanrouter. diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 9ab31a3ce3ad..b210a88d0960 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -350,9 +350,9 @@ __be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) * o execute requested action or pass command to the device driver */ -int wanrouter_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + struct inode *inode = file->f_path.dentry->d_inode; int err = 0; struct proc_dir_entry *dent; struct wan_device *wandev; @@ -372,6 +372,7 @@ int wanrouter_ioctl(struct inode *inode, struct file *file, if (wandev->magic != ROUTER_MAGIC) return -EINVAL; + lock_kernel(); switch (cmd) { case ROUTER_SETUP: err = wanrouter_device_setup(wandev, data); @@ -403,6 +404,7 @@ int wanrouter_ioctl(struct inode *inode, struct file *file, err = wandev->ioctl(wandev, cmd, arg); else err = -EINVAL; } + unlock_kernel(); return err; } diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 5bebe40bf4e6..267f7ff49827 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -278,7 +278,7 @@ static const struct file_operations wandev_fops = { .read = seq_read, .llseek = seq_lseek, .release = single_release, - .ioctl = wanrouter_ioctl, + .unlocked_ioctl = wanrouter_ioctl, }; /* diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 79270903bda6..ab015c62d561 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -29,3 +29,14 @@ config WIRELESS_EXT Say N (if you can) unless you know you need wireless extensions for external modules. + +config WIRELESS_EXT_SYSFS + bool "Wireless extensions sysfs files" + default y + depends on WIRELESS_EXT && SYSFS + help + This option enables the deprecated wireless statistics + files in /sys/class/net/*/wireless/. The same information + is available via the ioctls as well. + + Say Y if you have programs using it (we don't know of any). diff --git a/net/wireless/core.c b/net/wireless/core.c index 80afacdae46c..f1da0b93bc56 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -143,8 +143,11 @@ void cfg80211_put_dev(struct cfg80211_registered_device *drv) int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname) { + struct cfg80211_registered_device *drv; int idx, taken = -1, result, digits; + mutex_lock(&cfg80211_drv_mutex); + /* prohibit calling the thing phy%d when %d is not its number */ sscanf(newname, PHY_NAME "%d%n", &idx, &taken); if (taken == strlen(newname) && idx != rdev->idx) { @@ -156,14 +159,30 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, * deny the name if it is phy<idx> where <idx> is printed * without leading zeroes. taken == strlen(newname) here */ + result = -EINVAL; if (taken == strlen(PHY_NAME) + digits) - return -EINVAL; + goto out_unlock; + } + + + /* Ignore nop renames */ + result = 0; + if (strcmp(newname, dev_name(&rdev->wiphy.dev)) == 0) + goto out_unlock; + + /* Ensure another device does not already have this name. */ + list_for_each_entry(drv, &cfg80211_drv_list, list) { + result = -EINVAL; + if (strcmp(newname, dev_name(&drv->wiphy.dev)) == 0) + goto out_unlock; } - /* this will check for collisions */ + /* this will only check for collisions in sysfs + * which is not even always compiled in. + */ result = device_rename(&rdev->wiphy.dev, newname); if (result) - return result; + goto out_unlock; if (!debugfs_rename(rdev->wiphy.debugfsdir->d_parent, rdev->wiphy.debugfsdir, @@ -172,9 +191,13 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n", newname); - nl80211_notify_dev_rename(rdev); + result = 0; +out_unlock: + mutex_unlock(&cfg80211_drv_mutex); + if (result == 0) + nl80211_notify_dev_rename(rdev); - return 0; + return result; } /* exported functions */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2bdd4dddc0e1..b7fefffd2d0d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -187,7 +187,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) @@ -198,12 +199,14 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) mutex_lock(&cfg80211_drv_mutex); list_for_each_entry(dev, &cfg80211_drv_list, list) { - if (++idx < start) + if (++idx <= start) continue; if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - dev) < 0) + dev) < 0) { + idx--; break; + } } mutex_unlock(&cfg80211_drv_mutex); @@ -273,7 +276,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb) @@ -928,7 +932,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_station(struct sk_buff *skb, @@ -1267,7 +1272,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_mpath(struct sk_buff *skb, diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index 28fbd0b0b568..f591871a7b4f 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -59,23 +59,21 @@ int ieee80211_radiotap_iterator_init( return -EINVAL; /* sanity check for allowed length and radiotap length field */ - if (max_length < le16_to_cpu(get_unaligned(&radiotap_header->it_len))) + if (max_length < get_unaligned_le16(&radiotap_header->it_len)) return -EINVAL; iterator->rtheader = radiotap_header; - iterator->max_length = le16_to_cpu(get_unaligned( - &radiotap_header->it_len)); + iterator->max_length = get_unaligned_le16(&radiotap_header->it_len); iterator->arg_index = 0; - iterator->bitmap_shifter = le32_to_cpu(get_unaligned( - &radiotap_header->it_present)); + iterator->bitmap_shifter = get_unaligned_le32(&radiotap_header->it_present); iterator->arg = (u8 *)radiotap_header + sizeof(*radiotap_header); iterator->this_arg = NULL; /* find payload start allowing for extended bitmap(s) */ if (unlikely(iterator->bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT))) { - while (le32_to_cpu(get_unaligned((__le32 *)iterator->arg)) & - (1<<IEEE80211_RADIOTAP_EXT)) { + while (get_unaligned_le32(iterator->arg) & + (1 << IEEE80211_RADIOTAP_EXT)) { iterator->arg += sizeof(u32); /* @@ -241,8 +239,8 @@ int ieee80211_radiotap_iterator_next( if (iterator->bitmap_shifter & 1) { /* b31 was set, there is more */ /* move to next u32 bitmap */ - iterator->bitmap_shifter = le32_to_cpu( - get_unaligned(iterator->next_bitmap)); + iterator->bitmap_shifter = + get_unaligned_le32(iterator->next_bitmap); iterator->next_bitmap++; } else /* no more bitmaps: end */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 185488da2466..855bff4b3250 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -80,6 +80,23 @@ static const struct ieee80211_channel_range ieee80211_JP_channels[] = { IEEE80211_CHAN_RADAR), }; +static const struct ieee80211_channel_range ieee80211_EU_channels[] = { + /* IEEE 802.11b/g, channels 1..13 */ + RANGE_PWR(2412, 2472, 20, 6, 0), + /* IEEE 802.11a, channel 36*/ + RANGE_PWR(5180, 5180, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), + /* IEEE 802.11a, channel 40*/ + RANGE_PWR(5200, 5200, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), + /* IEEE 802.11a, channel 44*/ + RANGE_PWR(5220, 5220, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), + /* IEEE 802.11a, channels 48..64 */ + RANGE_PWR(5240, 5320, 23, 6, IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_RADAR), + /* IEEE 802.11a, channels 100..140 */ + RANGE_PWR(5500, 5700, 30, 6, IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_RADAR), +}; + #define REGDOM(_code) \ { \ .code = __stringify(_code), \ @@ -90,6 +107,7 @@ static const struct ieee80211_channel_range ieee80211_JP_channels[] = { static const struct ieee80211_regdomain ieee80211_regdoms[] = { REGDOM(US), REGDOM(JP), + REGDOM(EU), }; diff --git a/net/wireless/wext.c b/net/wireless/wext.c index 947188a5b937..df5b3886c36b 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -500,7 +500,7 @@ static int call_commit_handler(struct net_device *dev) /* * Calculate size of private arguments */ -static inline int get_priv_size(__u16 args) +static int get_priv_size(__u16 args) { int num = args & IW_PRIV_SIZE_MASK; int type = (args & IW_PRIV_TYPE_MASK) >> 12; @@ -512,10 +512,9 @@ static inline int get_priv_size(__u16 args) /* * Re-calculate the size of private arguments */ -static inline int adjust_priv_size(__u16 args, - union iwreq_data * wrqu) +static int adjust_priv_size(__u16 args, struct iw_point *iwp) { - int num = wrqu->data.length; + int num = iwp->length; int max = args & IW_PRIV_SIZE_MASK; int type = (args & IW_PRIV_TYPE_MASK) >> 12; @@ -695,19 +694,150 @@ void wext_proc_exit(struct net *net) */ /* ---------------------------------------------------------------- */ +static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, + const struct iw_ioctl_description *descr, + iw_handler handler, struct net_device *dev, + struct iw_request_info *info) +{ + int err, extra_size, user_length = 0, essid_compat = 0; + char *extra; + + /* Calculate space needed by arguments. Always allocate + * for max space. + */ + extra_size = descr->max_tokens * descr->token_size; + + /* Check need for ESSID compatibility for WE < 21 */ + switch (cmd) { + case SIOCSIWESSID: + case SIOCGIWESSID: + case SIOCSIWNICKN: + case SIOCGIWNICKN: + if (iwp->length == descr->max_tokens + 1) + essid_compat = 1; + else if (IW_IS_SET(cmd) && (iwp->length != 0)) { + char essid[IW_ESSID_MAX_SIZE + 1]; + + err = copy_from_user(essid, iwp->pointer, + iwp->length * + descr->token_size); + if (err) + return -EFAULT; + + if (essid[iwp->length - 1] == '\0') + essid_compat = 1; + } + break; + default: + break; + } + + iwp->length -= essid_compat; + + /* Check what user space is giving us */ + if (IW_IS_SET(cmd)) { + /* Check NULL pointer */ + if (!iwp->pointer && iwp->length != 0) + return -EFAULT; + /* Check if number of token fits within bounds */ + if (iwp->length > descr->max_tokens) + return -E2BIG; + if (iwp->length < descr->min_tokens) + return -EINVAL; + } else { + /* Check NULL pointer */ + if (!iwp->pointer) + return -EFAULT; + /* Save user space buffer size for checking */ + user_length = iwp->length; + + /* Don't check if user_length > max to allow forward + * compatibility. The test user_length < min is + * implied by the test at the end. + */ + + /* Support for very large requests */ + if ((descr->flags & IW_DESCR_FLAG_NOMAX) && + (user_length > descr->max_tokens)) { + /* Allow userspace to GET more than max so + * we can support any size GET requests. + * There is still a limit : -ENOMEM. + */ + extra_size = user_length * descr->token_size; + + /* Note : user_length is originally a __u16, + * and token_size is controlled by us, + * so extra_size won't get negative and + * won't overflow... + */ + } + } + + /* kzalloc() ensures NULL-termination for essid_compat. */ + extra = kzalloc(extra_size, GFP_KERNEL); + if (!extra) + return -ENOMEM; + + /* If it is a SET, get all the extra data in here */ + if (IW_IS_SET(cmd) && (iwp->length != 0)) { + if (copy_from_user(extra, iwp->pointer, + iwp->length * + descr->token_size)) { + err = -EFAULT; + goto out; + } + } + + err = handler(dev, info, (union iwreq_data *) iwp, extra); + + iwp->length += essid_compat; + + /* If we have something to return to the user */ + if (!err && IW_IS_GET(cmd)) { + /* Check if there is enough buffer up there */ + if (user_length < iwp->length) { + err = -E2BIG; + goto out; + } + + if (copy_to_user(iwp->pointer, extra, + iwp->length * + descr->token_size)) { + err = -EFAULT; + goto out; + } + } + + /* Generate an event to notify listeners of the change */ + if ((descr->flags & IW_DESCR_FLAG_EVENT) && err == -EIWCOMMIT) { + union iwreq_data *data = (union iwreq_data *) iwp; + + if (descr->flags & IW_DESCR_FLAG_RESTRICT) + /* If the event is restricted, don't + * export the payload. + */ + wireless_send_event(dev, cmd, data, NULL); + else + wireless_send_event(dev, cmd, data, extra); + } + +out: + kfree(extra); + return err; +} + /* * Wrapper to call a standard Wireless Extension handler. * We do various checks and also take care of moving data between * user space and kernel space. */ static int ioctl_standard_call(struct net_device * dev, - struct ifreq * ifr, + struct iwreq *iwr, unsigned int cmd, + struct iw_request_info *info, iw_handler handler) { - struct iwreq * iwr = (struct iwreq *) ifr; const struct iw_ioctl_description * descr; - struct iw_request_info info; int ret = -EINVAL; /* Get the description of the IOCTL */ @@ -715,145 +845,19 @@ static int ioctl_standard_call(struct net_device * dev, return -EOPNOTSUPP; descr = &(standard_ioctl[cmd - SIOCIWFIRST]); - /* Prepare the call */ - info.cmd = cmd; - info.flags = 0; - /* Check if we have a pointer to user space data or not */ if (descr->header_type != IW_HEADER_TYPE_POINT) { /* No extra arguments. Trivial to handle */ - ret = handler(dev, &info, &(iwr->u), NULL); + ret = handler(dev, info, &(iwr->u), NULL); /* Generate an event to notify listeners of the change */ if ((descr->flags & IW_DESCR_FLAG_EVENT) && ((ret == 0) || (ret == -EIWCOMMIT))) wireless_send_event(dev, cmd, &(iwr->u), NULL); } else { - char * extra; - int extra_size; - int user_length = 0; - int err; - int essid_compat = 0; - - /* Calculate space needed by arguments. Always allocate - * for max space. Easier, and won't last long... */ - extra_size = descr->max_tokens * descr->token_size; - - /* Check need for ESSID compatibility for WE < 21 */ - switch (cmd) { - case SIOCSIWESSID: - case SIOCGIWESSID: - case SIOCSIWNICKN: - case SIOCGIWNICKN: - if (iwr->u.data.length == descr->max_tokens + 1) - essid_compat = 1; - else if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) { - char essid[IW_ESSID_MAX_SIZE + 1]; - - err = copy_from_user(essid, iwr->u.data.pointer, - iwr->u.data.length * - descr->token_size); - if (err) - return -EFAULT; - - if (essid[iwr->u.data.length - 1] == '\0') - essid_compat = 1; - } - break; - default: - break; - } - - iwr->u.data.length -= essid_compat; - - /* Check what user space is giving us */ - if (IW_IS_SET(cmd)) { - /* Check NULL pointer */ - if ((iwr->u.data.pointer == NULL) && - (iwr->u.data.length != 0)) - return -EFAULT; - /* Check if number of token fits within bounds */ - if (iwr->u.data.length > descr->max_tokens) - return -E2BIG; - if (iwr->u.data.length < descr->min_tokens) - return -EINVAL; - } else { - /* Check NULL pointer */ - if (iwr->u.data.pointer == NULL) - return -EFAULT; - /* Save user space buffer size for checking */ - user_length = iwr->u.data.length; - - /* Don't check if user_length > max to allow forward - * compatibility. The test user_length < min is - * implied by the test at the end. */ - - /* Support for very large requests */ - if ((descr->flags & IW_DESCR_FLAG_NOMAX) && - (user_length > descr->max_tokens)) { - /* Allow userspace to GET more than max so - * we can support any size GET requests. - * There is still a limit : -ENOMEM. */ - extra_size = user_length * descr->token_size; - /* Note : user_length is originally a __u16, - * and token_size is controlled by us, - * so extra_size won't get negative and - * won't overflow... */ - } - } - - /* Create the kernel buffer */ - /* kzalloc ensures NULL-termination for essid_compat */ - extra = kzalloc(extra_size, GFP_KERNEL); - if (extra == NULL) - return -ENOMEM; - - /* If it is a SET, get all the extra data in here */ - if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) { - err = copy_from_user(extra, iwr->u.data.pointer, - iwr->u.data.length * - descr->token_size); - if (err) { - kfree(extra); - return -EFAULT; - } - } - - /* Call the handler */ - ret = handler(dev, &info, &(iwr->u), extra); - - iwr->u.data.length += essid_compat; - - /* If we have something to return to the user */ - if (!ret && IW_IS_GET(cmd)) { - /* Check if there is enough buffer up there */ - if (user_length < iwr->u.data.length) { - kfree(extra); - return -E2BIG; - } - - err = copy_to_user(iwr->u.data.pointer, extra, - iwr->u.data.length * - descr->token_size); - if (err) - ret = -EFAULT; - } - - /* Generate an event to notify listeners of the change */ - if ((descr->flags & IW_DESCR_FLAG_EVENT) && - ((ret == 0) || (ret == -EIWCOMMIT))) { - if (descr->flags & IW_DESCR_FLAG_RESTRICT) - /* If the event is restricted, don't - * export the payload */ - wireless_send_event(dev, cmd, &(iwr->u), NULL); - else - wireless_send_event(dev, cmd, &(iwr->u), - extra); - } - - /* Cleanup - I told you it wasn't that long ;-) */ - kfree(extra); + ret = ioctl_standard_iw_point(&iwr->u.data, cmd, descr, + handler, dev, info); } /* Call commit handler if needed and defined */ @@ -881,25 +885,22 @@ static int ioctl_standard_call(struct net_device * dev, * a iw_handler but process it in your ioctl handler (i.e. use the * old driver API). */ -static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr, - unsigned int cmd, iw_handler handler) +static int get_priv_descr_and_size(struct net_device *dev, unsigned int cmd, + const struct iw_priv_args **descrp) { - struct iwreq * iwr = (struct iwreq *) ifr; - const struct iw_priv_args * descr = NULL; - struct iw_request_info info; - int extra_size = 0; - int i; - int ret = -EINVAL; + const struct iw_priv_args *descr; + int i, extra_size; - /* Get the description of the IOCTL */ - for (i = 0; i < dev->wireless_handlers->num_private_args; i++) + descr = NULL; + for (i = 0; i < dev->wireless_handlers->num_private_args; i++) { if (cmd == dev->wireless_handlers->private_args[i].cmd) { - descr = &(dev->wireless_handlers->private_args[i]); + descr = &dev->wireless_handlers->private_args[i]; break; } + } - /* Compute the size of the set/get arguments */ - if (descr != NULL) { + extra_size = 0; + if (descr) { if (IW_IS_SET(cmd)) { int offset = 0; /* For sub-ioctls */ /* Check for sub-ioctl handler */ @@ -924,72 +925,77 @@ static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr, extra_size = 0; } } + *descrp = descr; + return extra_size; +} - /* Prepare the call */ - info.cmd = cmd; - info.flags = 0; +static int ioctl_private_iw_point(struct iw_point *iwp, unsigned int cmd, + const struct iw_priv_args *descr, + iw_handler handler, struct net_device *dev, + struct iw_request_info *info, int extra_size) +{ + char *extra; + int err; - /* Check if we have a pointer to user space data or not. */ - if (extra_size == 0) { - /* No extra arguments. Trivial to handle */ - ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u)); - } else { - char * extra; - int err; + /* Check what user space is giving us */ + if (IW_IS_SET(cmd)) { + if (!iwp->pointer && iwp->length != 0) + return -EFAULT; - /* Check what user space is giving us */ - if (IW_IS_SET(cmd)) { - /* Check NULL pointer */ - if ((iwr->u.data.pointer == NULL) && - (iwr->u.data.length != 0)) - return -EFAULT; + if (iwp->length > (descr->set_args & IW_PRIV_SIZE_MASK)) + return -E2BIG; + } else if (!iwp->pointer) + return -EFAULT; - /* Does it fits within bounds ? */ - if (iwr->u.data.length > (descr->set_args & - IW_PRIV_SIZE_MASK)) - return -E2BIG; - } else if (iwr->u.data.pointer == NULL) - return -EFAULT; + extra = kmalloc(extra_size, GFP_KERNEL); + if (!extra) + return -ENOMEM; - /* Always allocate for max space. Easier, and won't last - * long... */ - extra = kmalloc(extra_size, GFP_KERNEL); - if (extra == NULL) - return -ENOMEM; - - /* If it is a SET, get all the extra data in here */ - if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) { - err = copy_from_user(extra, iwr->u.data.pointer, - extra_size); - if (err) { - kfree(extra); - return -EFAULT; - } + /* If it is a SET, get all the extra data in here */ + if (IW_IS_SET(cmd) && (iwp->length != 0)) { + if (copy_from_user(extra, iwp->pointer, extra_size)) { + err = -EFAULT; + goto out; } + } - /* Call the handler */ - ret = handler(dev, &info, &(iwr->u), extra); + /* Call the handler */ + err = handler(dev, info, (union iwreq_data *) iwp, extra); - /* If we have something to return to the user */ - if (!ret && IW_IS_GET(cmd)) { + /* If we have something to return to the user */ + if (!err && IW_IS_GET(cmd)) { + /* Adjust for the actual length if it's variable, + * avoid leaking kernel bits outside. + */ + if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) + extra_size = adjust_priv_size(descr->get_args, iwp); - /* Adjust for the actual length if it's variable, - * avoid leaking kernel bits outside. */ - if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) { - extra_size = adjust_priv_size(descr->get_args, - &(iwr->u)); - } + if (copy_to_user(iwp->pointer, extra, extra_size)) + err = -EFAULT; + } - err = copy_to_user(iwr->u.data.pointer, extra, - extra_size); - if (err) - ret = -EFAULT; - } +out: + kfree(extra); + return err; +} - /* Cleanup - I told you it wasn't that long ;-) */ - kfree(extra); - } +static int ioctl_private_call(struct net_device *dev, struct iwreq *iwr, + unsigned int cmd, struct iw_request_info *info, + iw_handler handler) +{ + int extra_size = 0, ret = -EINVAL; + const struct iw_priv_args *descr; + extra_size = get_priv_descr_and_size(dev, cmd, &descr); + + /* Check if we have a pointer to user space data or not. */ + if (extra_size == 0) { + /* No extra arguments. Trivial to handle */ + ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u)); + } else { + ret = ioctl_private_iw_point(&iwr->u.data, cmd, descr, + handler, dev, info, extra_size); + } /* Call commit handler if needed and defined */ if (ret == -EIWCOMMIT) @@ -999,12 +1005,21 @@ static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr, } /* ---------------------------------------------------------------- */ +typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *, + unsigned int, struct iw_request_info *, + iw_handler); + /* * Main IOCTl dispatcher. * Check the type of IOCTL and call the appropriate wrapper... */ -static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd) +static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, + unsigned int cmd, + struct iw_request_info *info, + wext_ioctl_func standard, + wext_ioctl_func private) { + struct iwreq *iwr = (struct iwreq *) ifr; struct net_device *dev; iw_handler handler; @@ -1019,12 +1034,12 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned i * Note that 'cmd' is already filtered in dev_ioctl() with * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */ if (cmd == SIOCGIWSTATS) - return ioctl_standard_call(dev, ifr, cmd, - &iw_handler_get_iwstats); + return standard(dev, iwr, cmd, info, + &iw_handler_get_iwstats); if (cmd == SIOCGIWPRIV && dev->wireless_handlers) - return ioctl_standard_call(dev, ifr, cmd, - &iw_handler_get_private); + return standard(dev, iwr, cmd, info, + &iw_handler_get_private); /* Basic check */ if (!netif_device_present(dev)) @@ -1035,9 +1050,9 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned i if (handler) { /* Standard and private are not the same */ if (cmd < SIOCIWFIRSTPRIV) - return ioctl_standard_call(dev, ifr, cmd, handler); + return standard(dev, iwr, cmd, info, handler); else - return ioctl_private_call(dev, ifr, cmd, handler); + return private(dev, iwr, cmd, info, handler); } /* Old driver API : call driver ioctl handler */ if (dev->do_ioctl) @@ -1045,27 +1060,154 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned i return -EOPNOTSUPP; } -/* entry point from dev ioctl */ -int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, - void __user *arg) +/* If command is `set a parameter', or `get the encoding parameters', + * check if the user has the right to do it. + */ +static int wext_permission_check(unsigned int cmd) { - int ret; - - /* If command is `set a parameter', or - * `get the encoding parameters', check if - * the user has the right to do it */ if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT) && !capable(CAP_NET_ADMIN)) return -EPERM; + return 0; +} + +/* entry point from dev ioctl */ +static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr, + unsigned int cmd, struct iw_request_info *info, + wext_ioctl_func standard, + wext_ioctl_func private) +{ + int ret = wext_permission_check(cmd); + + if (ret) + return ret; + dev_load(net, ifr->ifr_name); rtnl_lock(); - ret = wireless_process_ioctl(net, ifr, cmd); + ret = wireless_process_ioctl(net, ifr, cmd, info, standard, private); rtnl_unlock(); - if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct iwreq))) + + return ret; +} + +int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, + void __user *arg) +{ + struct iw_request_info info = { .cmd = cmd, .flags = 0 }; + int ret; + + ret = wext_ioctl_dispatch(net, ifr, cmd, &info, + ioctl_standard_call, + ioctl_private_call); + if (ret >= 0 && + IW_IS_GET(cmd) && + copy_to_user(arg, ifr, sizeof(struct iwreq))) + return -EFAULT; + + return ret; +} + +#ifdef CONFIG_COMPAT +static int compat_standard_call(struct net_device *dev, + struct iwreq *iwr, + unsigned int cmd, + struct iw_request_info *info, + iw_handler handler) +{ + const struct iw_ioctl_description *descr; + struct compat_iw_point *iwp_compat; + struct iw_point iwp; + int err; + + descr = standard_ioctl + (cmd - SIOCIWFIRST); + + if (descr->header_type != IW_HEADER_TYPE_POINT) + return ioctl_standard_call(dev, iwr, cmd, info, handler); + + iwp_compat = (struct compat_iw_point *) &iwr->u.data; + iwp.pointer = compat_ptr(iwp_compat->pointer); + iwp.length = iwp_compat->length; + iwp.flags = iwp_compat->flags; + + err = ioctl_standard_iw_point(&iwp, cmd, descr, handler, dev, info); + + iwp_compat->pointer = ptr_to_compat(iwp.pointer); + iwp_compat->length = iwp.length; + iwp_compat->flags = iwp.flags; + + return err; +} + +static int compat_private_call(struct net_device *dev, struct iwreq *iwr, + unsigned int cmd, struct iw_request_info *info, + iw_handler handler) +{ + const struct iw_priv_args *descr; + int ret, extra_size; + + extra_size = get_priv_descr_and_size(dev, cmd, &descr); + + /* Check if we have a pointer to user space data or not. */ + if (extra_size == 0) { + /* No extra arguments. Trivial to handle */ + ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u)); + } else { + struct compat_iw_point *iwp_compat; + struct iw_point iwp; + + iwp_compat = (struct compat_iw_point *) &iwr->u.data; + iwp.pointer = compat_ptr(iwp_compat->pointer); + iwp.length = iwp_compat->length; + iwp.flags = iwp_compat->flags; + + ret = ioctl_private_iw_point(&iwp, cmd, descr, + handler, dev, info, extra_size); + + iwp_compat->pointer = ptr_to_compat(iwp.pointer); + iwp_compat->length = iwp.length; + iwp_compat->flags = iwp.flags; + } + + /* Call commit handler if needed and defined */ + if (ret == -EIWCOMMIT) + ret = call_commit_handler(dev); + + return ret; +} + +int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct iw_request_info info; + struct iwreq iwr; + char *colon; + int ret; + + if (copy_from_user(&iwr, argp, sizeof(struct iwreq))) + return -EFAULT; + + iwr.ifr_name[IFNAMSIZ-1] = 0; + colon = strchr(iwr.ifr_name, ':'); + if (colon) + *colon = 0; + + info.cmd = cmd; + info.flags = IW_REQUEST_FLAG_COMPAT; + + ret = wext_ioctl_dispatch(net, (struct ifreq *) &iwr, cmd, &info, + compat_standard_call, + compat_private_call); + + if (ret >= 0 && + IW_IS_GET(cmd) && + copy_to_user(argp, &iwr, sizeof(struct iwreq))) return -EFAULT; + return ret; } +#endif /************************* EVENT PROCESSING *************************/ /* @@ -1157,7 +1299,7 @@ static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len) struct sk_buff *skb; int err; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 6ba67c523c16..9fc5b023d111 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -191,7 +191,7 @@ static int x25_device_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct x25_neigh *nb; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (dev->type == ARPHRD_X25 @@ -555,13 +555,11 @@ static struct sock *x25_make_new(struct sock *osk) x25 = x25_sk(sk); sk->sk_type = osk->sk_type; - sk->sk_socket = osk->sk_socket; sk->sk_priority = osk->sk_priority; sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; - sk->sk_sleep = osk->sk_sleep; sk->sk_backlog_rcv = osk->sk_backlog_rcv; sock_copy_flags(sk, osk); @@ -614,8 +612,7 @@ static int x25_release(struct socket *sock) break; } - sock->sk = NULL; - sk->sk_socket = NULL; /* Not used, but we should do this */ + sock_orphan(sk); out: return 0; } @@ -808,14 +805,12 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags) if (!skb->sk) goto out2; newsk = skb->sk; - newsk->sk_socket = newsock; - newsk->sk_sleep = &newsock->wait; + sock_graft(newsk, newsock); /* Now attach up the new socket */ skb->sk = NULL; kfree_skb(skb); sk->sk_ack_backlog--; - newsock->sk = newsk; newsock->state = SS_CONNECTED; rc = 0; out2: diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 3ff206c0ae94..3e1efe534645 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -95,7 +95,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, struct sk_buff *nskb; struct x25_neigh *nb; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto drop; nskb = skb_copy(skb, GFP_ATOMIC); diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 9201ef8ad90e..6d081674515f 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -46,6 +46,12 @@ config XFRM_STATISTICS If unsure, say N. +config XFRM_IPCOMP + tristate + select XFRM + select CRYPTO + select CRYPTO_DEFLATE + config NET_KEY tristate "PF_KEY sockets" select XFRM diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 332cfb0ff566..0f439a72ccab 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -6,4 +6,5 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o xfrm_algo.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o +obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index ac765dd9c7f5..96036cf2216d 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -200,8 +200,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "hmac(ripemd160)", - .compat = "ripemd160", + .name = "hmac(rmd160)", + .compat = "rmd160", .uinfo = { .auth = { @@ -718,7 +718,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -748,7 +748,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c new file mode 100644 index 000000000000..c609a4b98e15 --- /dev/null +++ b/net/xfrm/xfrm_ipcomp.c @@ -0,0 +1,384 @@ +/* + * IP Payload Compression Protocol (IPComp) - RFC3173. + * + * Copyright (c) 2003 James Morris <jmorris@intercode.com.au> + * Copyright (c) 2003-2008 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Todo: + * - Tunable compression parameters. + * - Compression stats. + * - Adaptive compression. + */ + +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/gfp.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/smp.h> +#include <linux/vmalloc.h> +#include <net/ip.h> +#include <net/ipcomp.h> +#include <net/xfrm.h> + +struct ipcomp_tfms { + struct list_head list; + struct crypto_comp **tfms; + int users; +}; + +static DEFINE_MUTEX(ipcomp_resource_mutex); +static void **ipcomp_scratches; +static int ipcomp_scratch_users; +static LIST_HEAD(ipcomp_tfms_list); + +static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipcomp_data *ipcd = x->data; + const int plen = skb->len; + int dlen = IPCOMP_SCRATCH_SIZE; + const u8 *start = skb->data; + const int cpu = get_cpu(); + u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); + struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); + int len; + + if (err) + goto out; + + if (dlen < (plen + sizeof(struct ip_comp_hdr))) { + err = -EINVAL; + goto out; + } + + len = dlen - plen; + if (len > skb_tailroom(skb)) + len = skb_tailroom(skb); + + skb->truesize += len; + __skb_put(skb, len); + + len += plen; + skb_copy_to_linear_data(skb, scratch, len); + + while ((scratch += len, dlen -= len) > 0) { + skb_frag_t *frag; + + err = -EMSGSIZE; + if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) + goto out; + + frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; + frag->page = alloc_page(GFP_ATOMIC); + + err = -ENOMEM; + if (!frag->page) + goto out; + + len = PAGE_SIZE; + if (dlen < len) + len = dlen; + + memcpy(page_address(frag->page), scratch, len); + + frag->page_offset = 0; + frag->size = len; + skb->truesize += len; + skb->data_len += len; + skb->len += len; + + skb_shinfo(skb)->nr_frags++; + } + + err = 0; + +out: + put_cpu(); + return err; +} + +int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) +{ + int nexthdr; + int err = -ENOMEM; + struct ip_comp_hdr *ipch; + + if (skb_linearize_cow(skb)) + goto out; + + skb->ip_summed = CHECKSUM_NONE; + + /* Remove ipcomp header and decompress original payload */ + ipch = (void *)skb->data; + nexthdr = ipch->nexthdr; + + skb->transport_header = skb->network_header + sizeof(*ipch); + __skb_pull(skb, sizeof(*ipch)); + err = ipcomp_decompress(x, skb); + if (err) + goto out; + + err = nexthdr; + +out: + return err; +} +EXPORT_SYMBOL_GPL(ipcomp_input); + +static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipcomp_data *ipcd = x->data; + const int plen = skb->len; + int dlen = IPCOMP_SCRATCH_SIZE; + u8 *start = skb->data; + const int cpu = get_cpu(); + u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); + struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + int err; + + local_bh_disable(); + err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); + local_bh_enable(); + if (err) + goto out; + + if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) { + err = -EMSGSIZE; + goto out; + } + + memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); + put_cpu(); + + pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); + return 0; + +out: + put_cpu(); + return err; +} + +int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + struct ip_comp_hdr *ipch; + struct ipcomp_data *ipcd = x->data; + + if (skb->len < ipcd->threshold) { + /* Don't bother compressing */ + goto out_ok; + } + + if (skb_linearize_cow(skb)) + goto out_ok; + + err = ipcomp_compress(x, skb); + + if (err) { + goto out_ok; + } + + /* Install ipcomp header, convert into ipcomp datagram. */ + ipch = ip_comp_hdr(skb); + ipch->nexthdr = *skb_mac_header(skb); + ipch->flags = 0; + ipch->cpi = htons((u16 )ntohl(x->id.spi)); + *skb_mac_header(skb) = IPPROTO_COMP; +out_ok: + skb_push(skb, -skb_network_offset(skb)); + return 0; +} +EXPORT_SYMBOL_GPL(ipcomp_output); + +static void ipcomp_free_scratches(void) +{ + int i; + void **scratches; + + if (--ipcomp_scratch_users) + return; + + scratches = ipcomp_scratches; + if (!scratches) + return; + + for_each_possible_cpu(i) + vfree(*per_cpu_ptr(scratches, i)); + + free_percpu(scratches); +} + +static void **ipcomp_alloc_scratches(void) +{ + int i; + void **scratches; + + if (ipcomp_scratch_users++) + return ipcomp_scratches; + + scratches = alloc_percpu(void *); + if (!scratches) + return NULL; + + ipcomp_scratches = scratches; + + for_each_possible_cpu(i) { + void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); + if (!scratch) + return NULL; + *per_cpu_ptr(scratches, i) = scratch; + } + + return scratches; +} + +static void ipcomp_free_tfms(struct crypto_comp **tfms) +{ + struct ipcomp_tfms *pos; + int cpu; + + list_for_each_entry(pos, &ipcomp_tfms_list, list) { + if (pos->tfms == tfms) + break; + } + + WARN_ON(!pos); + + if (--pos->users) + return; + + list_del(&pos->list); + kfree(pos); + + if (!tfms) + return; + + for_each_possible_cpu(cpu) { + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); + } + free_percpu(tfms); +} + +static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) +{ + struct ipcomp_tfms *pos; + struct crypto_comp **tfms; + int cpu; + + /* This can be any valid CPU ID so we don't need locking. */ + cpu = raw_smp_processor_id(); + + list_for_each_entry(pos, &ipcomp_tfms_list, list) { + struct crypto_comp *tfm; + + tfms = pos->tfms; + tfm = *per_cpu_ptr(tfms, cpu); + + if (!strcmp(crypto_comp_name(tfm), alg_name)) { + pos->users++; + return tfms; + } + } + + pos = kmalloc(sizeof(*pos), GFP_KERNEL); + if (!pos) + return NULL; + + pos->users = 1; + INIT_LIST_HEAD(&pos->list); + list_add(&pos->list, &ipcomp_tfms_list); + + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); + if (!tfms) + goto error; + + for_each_possible_cpu(cpu) { + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto error; + *per_cpu_ptr(tfms, cpu) = tfm; + } + + return tfms; + +error: + ipcomp_free_tfms(tfms); + return NULL; +} + +static void ipcomp_free_data(struct ipcomp_data *ipcd) +{ + if (ipcd->tfms) + ipcomp_free_tfms(ipcd->tfms); + ipcomp_free_scratches(); +} + +void ipcomp_destroy(struct xfrm_state *x) +{ + struct ipcomp_data *ipcd = x->data; + if (!ipcd) + return; + xfrm_state_delete_tunnel(x); + mutex_lock(&ipcomp_resource_mutex); + ipcomp_free_data(ipcd); + mutex_unlock(&ipcomp_resource_mutex); + kfree(ipcd); +} +EXPORT_SYMBOL_GPL(ipcomp_destroy); + +int ipcomp_init_state(struct xfrm_state *x) +{ + int err; + struct ipcomp_data *ipcd; + struct xfrm_algo_desc *calg_desc; + + err = -EINVAL; + if (!x->calg) + goto out; + + if (x->encap) + goto out; + + err = -ENOMEM; + ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); + if (!ipcd) + goto out; + + mutex_lock(&ipcomp_resource_mutex); + if (!ipcomp_alloc_scratches()) + goto error; + + ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name); + if (!ipcd->tfms) + goto error; + mutex_unlock(&ipcomp_resource_mutex); + + calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0); + BUG_ON(!calg_desc); + ipcd->threshold = calg_desc->uinfo.comp.threshold; + x->data = ipcd; + err = 0; +out: + return err; + +error: + ipcomp_free_data(ipcd); + mutex_unlock(&ipcomp_resource_mutex); + kfree(ipcd); + goto out; +} +EXPORT_SYMBOL_GPL(ipcomp_init_state); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); +MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 09cd9c0c2d80..3f964db908a7 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -25,11 +25,11 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) struct dst_entry *dst = skb->dst; int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) - skb_headroom(skb); + int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); - if (nhead > 0) - return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + if (nhead > 0 || ntail > 0) + return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); - /* Check tail too... */ return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cae9fd815543..841b32a2e680 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2360,7 +2360,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; switch (event) { diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 72fddafd891a..4c6914ef7d92 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -538,7 +538,7 @@ EXPORT_SYMBOL(xfrm_state_alloc); void __xfrm_state_destroy(struct xfrm_state *x) { - BUG_TRAP(x->km.state == XFRM_STATE_DEAD); + WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_lock); list_del(&x->all); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a1b0fbe3ea35..04c41504f84c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -50,19 +50,8 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) switch (type) { case XFRMA_ALG_AUTH: - if (!algp->alg_key_len && - strcmp(algp->alg_name, "digest_null") != 0) - return -EINVAL; - break; - case XFRMA_ALG_CRYPT: - if (!algp->alg_key_len && - strcmp(algp->alg_name, "cipher_null") != 0) - return -EINVAL; - break; - case XFRMA_ALG_COMP: - /* Zero length keys are legal. */ break; default: @@ -288,9 +277,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; - if (!x->sel.family) + if (!x->sel.family && !(p->flags & XFRM_STATE_AF_UNSPEC)) x->sel.family = p->family; - } /* |