diff options
Diffstat (limited to 'net')
373 files changed, 17005 insertions, 12187 deletions
diff --git a/net/802/fc.c b/net/802/fc.c index 675d9ba8e591..cb3475ea6fda 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -35,7 +35,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) + const void *daddr, const void *saddr, unsigned len) { struct fch_hdr *fch; int hdr_len; @@ -95,11 +95,14 @@ static int fc_rebuild_header(struct sk_buff *skb) #endif } +static const struct header_ops fc_header_ops = { + .create = fc_header, + .rebuild = fc_rebuild_header, +}; + static void fc_setup(struct net_device *dev) { - dev->hard_header = fc_header; - dev->rebuild_header = fc_rebuild_header; - + dev->header_ops = &fc_header_ops; dev->type = ARPHRD_IEEE802; dev->hard_header_len = FC_HLEN; dev->mtu = 2024; diff --git a/net/802/fddi.c b/net/802/fddi.c index 91dde41b5481..0549317b9356 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -52,7 +52,7 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) + const void *daddr, const void *saddr, unsigned len) { int hl = FDDI_K_SNAP_HLEN; struct fddihdr *fddi; @@ -175,11 +175,15 @@ static int fddi_change_mtu(struct net_device *dev, int new_mtu) return(0); } +static const struct header_ops fddi_header_ops = { + .create = fddi_header, + .rebuild = fddi_rebuild_header, +}; + static void fddi_setup(struct net_device *dev) { dev->change_mtu = fddi_change_mtu; - dev->hard_header = fddi_header; - dev->rebuild_header = fddi_rebuild_header; + dev->header_ops = &fddi_header_ops; dev->type = ARPHRD_FDDI; dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */ diff --git a/net/802/hippi.c b/net/802/hippi.c index 87ffc12b6891..e35dc1e0915d 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -45,8 +45,8 @@ */ static int hippi_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, void *saddr, - unsigned len) + unsigned short type, + const void *daddr, const void *saddr, unsigned len) { struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN); struct hippi_cb *hcb = (struct hippi_cb *) skb->cb; @@ -182,16 +182,18 @@ static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) return 0; } +static const struct header_ops hippi_header_ops = { + .create = hippi_header, + .rebuild = hippi_rebuild_header, +}; + + static void hippi_setup(struct net_device *dev) { dev->set_multicast_list = NULL; dev->change_mtu = hippi_change_mtu; - dev->hard_header = hippi_header; - dev->rebuild_header = hippi_rebuild_header; + dev->header_ops = &hippi_header_ops; dev->set_mac_address = hippi_mac_addr; - dev->hard_header_parse = NULL; - dev->hard_header_cache = NULL; - dev->header_cache_update = NULL; dev->neigh_setup = hippi_neigh_setup_dev; /* diff --git a/net/802/p8023.c b/net/802/p8023.c index 53cf05709283..6ab1835041a7 100644 --- a/net/802/p8023.c +++ b/net/802/p8023.c @@ -31,7 +31,7 @@ static int p8023_request(struct datalink_proto *dl, { struct net_device *dev = skb->dev; - dev->hard_header(skb, dev, ETH_P_802_3, dest_node, NULL, skb->len); + dev_hard_header(skb, dev, ETH_P_802_3, dest_node, NULL, skb->len); return dev_queue_xmit(skb); } diff --git a/net/802/tr.c b/net/802/tr.c index e56e61a7f545..a2bd0f2e3af8 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -36,6 +36,7 @@ #include <linux/seq_file.h> #include <linux/init.h> #include <net/arp.h> +#include <net/net_namespace.h> static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev); static void rif_check_expire(unsigned long dummy); @@ -99,7 +100,7 @@ static inline unsigned long rif_hash(const unsigned char *addr) static int tr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) + const void *daddr, const void *saddr, unsigned len) { struct trh_hdr *trh; int hdr_len; @@ -141,7 +142,7 @@ static int tr_header(struct sk_buff *skb, struct net_device *dev, if(daddr) { memcpy(trh->daddr,daddr,dev->addr_len); - tr_source_route(skb,trh,dev); + tr_source_route(skb, trh, dev); return(hdr_len); } @@ -246,7 +247,8 @@ __be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev) * We try to do source routing... */ -void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device *dev) +void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh, + struct net_device *dev) { int slack; unsigned int hash; @@ -282,8 +284,10 @@ void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device * if(entry) { #if TR_SR_DEBUG -printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0], - trh->daddr[1],trh->daddr[2],trh->daddr[3],trh->daddr[4],trh->daddr[5]); +{ +DECLARE_MAC_BUF(mac); +printk("source routing for %s\n",print_mac(mac, trh->daddr)); +} #endif if(!entry->local_ring && (ntohs(entry->rcf) & TR_RCF_LEN_MASK) >> 8) { @@ -365,10 +369,9 @@ static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev) if(entry==NULL) { #if TR_SR_DEBUG -printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", - trh->saddr[0],trh->saddr[1],trh->saddr[2], - trh->saddr[3],trh->saddr[4],trh->saddr[5], - ntohs(trh->rcf)); + DECLARE_MAC_BUF(mac); + printk("adding rif_entry: addr:%s rcf:%04X\n", + print_mac(mac, trh->saddr), ntohs(trh->rcf)); #endif /* * Allocate our new entry. A failure to allocate loses @@ -413,10 +416,11 @@ printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", !(trh->rcf & htons(TR_RCF_BROADCAST_MASK))) { #if TR_SR_DEBUG -printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", - trh->saddr[0],trh->saddr[1],trh->saddr[2], - trh->saddr[3],trh->saddr[4],trh->saddr[5], - ntohs(trh->rcf)); +{ +DECLARE_MAC_BUF(mac); +printk("updating rif_entry: addr:%s rcf:%04X\n", + print_mac(mac, trh->saddr), ntohs(trh->rcf)); +} #endif entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK); memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short)); @@ -527,19 +531,19 @@ static int rif_seq_show(struct seq_file *seq, void *v) { int j, rcf_len, segment, brdgnmb; struct rif_cache *entry = v; + DECLARE_MAC_BUF(mac); if (v == SEQ_START_TOKEN) seq_puts(seq, "if TR address TTL rcf routing segments\n"); else { - struct net_device *dev = dev_get_by_index(entry->iface); + struct net_device *dev = dev_get_by_index(&init_net, entry->iface); long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout) - (long) jiffies; - seq_printf(seq, "%s %02X:%02X:%02X:%02X:%02X:%02X %7li ", + seq_printf(seq, "%s %s %7li ", dev?dev->name:"?", - entry->addr[0],entry->addr[1],entry->addr[2], - entry->addr[3],entry->addr[4],entry->addr[5], + print_mac(mac, entry->addr), ttl/HZ); if (entry->local_ring) @@ -589,14 +593,18 @@ static const struct file_operations rif_seq_fops = { #endif +static const struct header_ops tr_header_ops = { + .create = tr_header, + .rebuild= tr_rebuild_header, +}; + static void tr_setup(struct net_device *dev) { /* * Configure and register */ - dev->hard_header = tr_header; - dev->rebuild_header = tr_rebuild_header; + dev->header_ops = &tr_header_ops; dev->type = ARPHRD_IEEE802_TR; dev->hard_header_len = TR_HLEN; @@ -639,7 +647,7 @@ static int __init rif_init(void) rif_timer.function = rif_check_expire; add_timer(&rif_timer); - proc_net_fops_create("tr_rif", S_IRUGO, &rif_seq_fops); + proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops); return 0; } diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 2a546919d6fb..3fe4fc86055f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -31,6 +31,7 @@ #include <net/arp.h> #include <linux/rtnetlink.h> #include <linux/notifier.h> +#include <net/net_namespace.h> #include <linux/if_vlan.h> #include "vlan.h" @@ -50,7 +51,7 @@ static char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>"; static char vlan_buggyright[] = "David S. Miller <davem@redhat.com>"; static int vlan_device_event(struct notifier_block *, unsigned long, void *); -static int vlan_ioctl_handler(void __user *); +static int vlan_ioctl_handler(struct net *net, void __user *); static int unregister_vlan_dev(struct net_device *, unsigned short ); static struct notifier_block vlan_notifier_block = { @@ -313,6 +314,12 @@ int unregister_vlan_device(struct net_device *dev) */ static struct lock_class_key vlan_netdev_xmit_lock_key; +static const struct header_ops vlan_header_ops = { + .create = vlan_dev_hard_header, + .rebuild = vlan_dev_rebuild_header, + .parse = eth_header_parse, +}; + static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; @@ -324,24 +331,23 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); + /* ipv6 shared card related stuff */ + dev->dev_id = real_dev->dev_id; + if (is_zero_ether_addr(dev->dev_addr)) memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len); if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); if (real_dev->features & NETIF_F_HW_VLAN_TX) { - dev->hard_header = real_dev->hard_header; + dev->header_ops = real_dev->header_ops; dev->hard_header_len = real_dev->hard_header_len; dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit; - dev->rebuild_header = real_dev->rebuild_header; } else { - dev->hard_header = vlan_dev_hard_header; + dev->header_ops = &vlan_header_ops; dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; dev->hard_start_xmit = vlan_dev_hard_start_xmit; - dev->rebuild_header = vlan_dev_rebuild_header; } - dev->hard_header_parse = real_dev->hard_header_parse; - dev->hard_header_cache = NULL; lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key); return 0; @@ -349,8 +355,6 @@ static int vlan_dev_init(struct net_device *dev) void vlan_setup(struct net_device *new_dev) { - SET_MODULE_OWNER(new_dev); - ether_setup(new_dev); /* new_dev->ifindex = 0; it will be set when added to @@ -603,6 +607,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, int i, flgs; struct net_device *vlandev; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (!grp) goto out; @@ -693,7 +700,7 @@ out: * o execute requested action or pass command to the device driver * arg is really a struct vlan_ioctl_args __user *. */ -static int vlan_ioctl_handler(void __user *arg) +static int vlan_ioctl_handler(struct net *net, void __user *arg) { int err; unsigned short vid = 0; @@ -722,7 +729,7 @@ static int vlan_ioctl_handler(void __user *arg) case GET_VLAN_REALDEV_NAME_CMD: case GET_VLAN_VID_CMD: err = -ENODEV; - dev = __dev_get_by_name(args.device1); + dev = __dev_get_by_name(&init_net, args.device1); if (!dev) goto out; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 7df5b2935579..cf4a80d06b35 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -53,8 +53,8 @@ int vlan_dev_rebuild_header(struct sk_buff *skb); int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev); int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, void *saddr, - unsigned len); + unsigned short type, const void *daddr, + const void *saddr, unsigned len); int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); int vlan_dev_change_mtu(struct net_device *dev, int new_mtu); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 328759c32d61..1a1740aa9a8b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -122,6 +122,11 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, unsigned short vlan_TCI; __be16 proto; + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return -1; + } + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return -1; @@ -338,8 +343,8 @@ static inline unsigned short vlan_dev_get_egress_qos_mask(struct net_device* dev * physical devices. */ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, void *saddr, - unsigned len) + unsigned short type, + const void *daddr, const void *saddr, unsigned len) { struct vlan_hdr *vhdr; unsigned short veth_TCI = 0; @@ -429,21 +434,19 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, if (build_vlan_header) { /* Now make the underlying real hard header */ - rc = dev->hard_header(skb, dev, ETH_P_8021Q, daddr, saddr, len + VLAN_HLEN); - - if (rc > 0) { + rc = dev_hard_header(skb, dev, ETH_P_8021Q, daddr, saddr, + len + VLAN_HLEN); + if (rc > 0) rc += VLAN_HLEN; - } else if (rc < 0) { + else if (rc < 0) rc -= VLAN_HLEN; - } - } else { + } else /* If here, then we'll just make a normal looking ethernet frame, * but, the hard_start_xmit method will insert the tag (it has to * be able to do this for bridged and other skbs that don't come * down the protocol stack in an orderly manner. */ - rc = dev->hard_header(skb, dev, type, daddr, saddr, len); - } + rc = dev_hard_header(skb, dev, type, daddr, saddr, len); return rc; } diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 6cdd1e015e2d..0996185e2ed5 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> +#include <net/net_namespace.h> #include <net/netlink.h> #include <net/rtnetlink.h> #include "vlan.h" @@ -112,7 +113,7 @@ static int vlan_newlink(struct net_device *dev, if (!tb[IFLA_LINK]) return -EINVAL; - real_dev = __dev_get_by_index(nla_get_u32(tb[IFLA_LINK])); + real_dev = __dev_get_by_index(&init_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index bd08aa090763..6cefdf8e381a 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -33,6 +33,7 @@ #include <linux/fs.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> +#include <net/net_namespace.h> #include "vlanproc.h" #include "vlan.h" @@ -143,7 +144,7 @@ void vlan_proc_cleanup(void) remove_proc_entry(name_conf, proc_vlan_dir); if (proc_vlan_dir) - proc_net_remove(name_root); + proc_net_remove(&init_net, name_root); /* Dynamically added entries should be cleaned up as their vlan_device * is removed, so we should not have to take care of it here... @@ -156,7 +157,7 @@ void vlan_proc_cleanup(void) int __init vlan_proc_init(void) { - proc_vlan_dir = proc_mkdir(name_root, proc_net); + proc_vlan_dir = proc_mkdir(name_root, init_net.proc_net); if (proc_vlan_dir) { proc_vlan_conf = create_proc_entry(name_conf, S_IFREG|S_IRUSR|S_IWUSR, @@ -253,7 +254,7 @@ static void *vlan_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!is_vlan_dev(dev)) continue; @@ -272,9 +273,9 @@ static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos) dev = (struct net_device *)v; if (v == SEQ_START_TOKEN) - dev = net_device_entry(&dev_base_head); + dev = net_device_entry(&init_net.dev_base_head); - for_each_netdev_continue(dev) { + for_each_netdev_continue(&init_net, dev) { if (!is_vlan_dev(dev)) continue; diff --git a/net/Kconfig b/net/Kconfig index cdba08ca2efe..ab4e6da5012f 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -27,6 +27,14 @@ if NET menu "Networking options" +config NET_NS + bool "Network namespace support" + default n + depends on EXPERIMENTAL && !SYSFS + help + Allow user space to create what appear to be multiple instances + of the network stack. + source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 3d1655f98388..6c5c6dc098ec 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -330,15 +330,19 @@ static void aarp_expire_timeout(unsigned long unused) static int aarp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; int ct; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) { write_lock_bh(&aarp_lock); for (ct = 0; ct < AARP_HASH_SIZE; ct++) { - __aarp_expire_device(&resolved[ct], ptr); - __aarp_expire_device(&unresolved[ct], ptr); - __aarp_expire_device(&proxies[ct], ptr); + __aarp_expire_device(&resolved[ct], dev); + __aarp_expire_device(&unresolved[ct], dev); + __aarp_expire_device(&proxies[ct], dev); } write_unlock_bh(&aarp_lock); @@ -712,6 +716,9 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, struct atalk_addr sa, *ma, da; struct atalk_iface *ifa; + if (dev->nd_net != &init_net) + goto out0; + /* We only do Ethernet SNAP AARP. */ if (dev->type != ARPHRD_ETHER) goto out0; @@ -815,8 +822,6 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, * address. So as a precaution flush any * entries we have for this address. */ - struct aarp_entry *a; - a = __aarp_find_entry(resolved[sa.s_node % (AARP_HASH_SIZE - 1)], skb->dev, &sa); @@ -990,6 +995,7 @@ static int aarp_seq_show(struct seq_file *seq, void *v) struct aarp_iter_state *iter = seq->private; struct aarp_entry *entry = v; unsigned long now = jiffies; + DECLARE_MAC_BUF(mac); if (v == SEQ_START_TOKEN) seq_puts(seq, @@ -1000,13 +1006,7 @@ static int aarp_seq_show(struct seq_file *seq, void *v) ntohs(entry->target_addr.s_net), (unsigned int) entry->target_addr.s_node, entry->dev ? entry->dev->name : "????"); - seq_printf(seq, "%02X:%02X:%02X:%02X:%02X:%02X", - entry->hwaddr[0] & 0xFF, - entry->hwaddr[1] & 0xFF, - entry->hwaddr[2] & 0xFF, - entry->hwaddr[3] & 0xFF, - entry->hwaddr[4] & 0xFF, - entry->hwaddr[5] & 0xFF); + seq_printf(seq, "%s", print_mac(mac, entry->hwaddr)); seq_printf(seq, " %8s", dt2str((long)entry->expires_at - (long)now)); if (iter->table == unresolved) diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 87a582cc8111..05d9652afcb6 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <linux/atalk.h> @@ -271,7 +272,7 @@ int __init atalk_proc_init(void) struct proc_dir_entry *p; int rc = -ENOMEM; - atalk_proc_dir = proc_mkdir("atalk", proc_net); + atalk_proc_dir = proc_mkdir("atalk", init_net.proc_net); if (!atalk_proc_dir) goto out; atalk_proc_dir->owner = THIS_MODULE; @@ -306,7 +307,7 @@ out_socket: out_route: remove_proc_entry("interface", atalk_proc_dir); out_interface: - remove_proc_entry("atalk", proc_net); + remove_proc_entry("atalk", init_net.proc_net); goto out; } @@ -316,5 +317,5 @@ void __exit atalk_proc_exit(void) remove_proc_entry("route", atalk_proc_dir); remove_proc_entry("socket", atalk_proc_dir); remove_proc_entry("arp", atalk_proc_dir); - remove_proc_entry("atalk", proc_net); + remove_proc_entry("atalk", init_net.proc_net); } diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index fbdfb1224ae1..7c0b5151d526 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -647,9 +647,14 @@ static inline void atalk_dev_down(struct net_device *dev) static int ddp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) /* Discard any use of this */ - atalk_dev_down(ptr); + atalk_dev_down(dev); return NOTIFY_DONE; } @@ -672,7 +677,7 @@ static int atif_ioctl(int cmd, void __user *arg) if (copy_from_user(&atreq, arg, sizeof(atreq))) return -EFAULT; - dev = __dev_get_by_name(atreq.ifr_name); + dev = __dev_get_by_name(&init_net, atreq.ifr_name); if (!dev) return -ENODEV; @@ -896,7 +901,7 @@ static int atrtr_ioctl(unsigned int cmd, void __user *arg) if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) return -EFAULT; name[IFNAMSIZ-1] = '\0'; - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(&init_net, name); if (!dev) return -ENODEV; } @@ -1024,11 +1029,14 @@ static struct proto ddp_proto = { * Create a socket. Initialise the socket, blank the addresses * set the state. */ -static int atalk_create(struct socket *sock, int protocol) +static int atalk_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; + if (net != &init_net) + return -EAFNOSUPPORT; + /* * We permit SOCK_DGRAM and RAW is an extension. It is trivial to do * and gives you the full ELAP frame. Should be handy for CAP 8) @@ -1036,7 +1044,7 @@ static int atalk_create(struct socket *sock, int protocol) if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) goto out; rc = -ENOMEM; - sk = sk_alloc(PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); + sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); if (!sk) goto out; rc = 0; @@ -1265,7 +1273,7 @@ static __inline__ int is_ip_over_ddp(struct sk_buff *skb) static int handle_ip_over_ddp(struct sk_buff *skb) { - struct net_device *dev = __dev_get_by_name("ipddp0"); + struct net_device *dev = __dev_get_by_name(&init_net, "ipddp0"); struct net_device_stats *stats; /* This needs to be able to handle ipddp"N" devices */ @@ -1398,6 +1406,9 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, int origlen; __u16 len_hops; + if (dev->nd_net != &init_net) + goto freeit; + /* Don't mangle buffer if shared */ if (!(skb = skb_share_check(skb, GFP_ATOMIC))) goto out; @@ -1483,6 +1494,9 @@ freeit: static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { + if (dev->nd_net != &init_net) + goto freeit; + /* Expand any short form frames */ if (skb_mac_header(skb)[2] == 1) { struct ddpehdr *ddp; diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c index 9e4dffc1e423..d856a62ab50f 100644 --- a/net/appletalk/dev.c +++ b/net/appletalk/dev.c @@ -24,11 +24,7 @@ static void ltalk_setup(struct net_device *dev) /* Fill in the fields of the device structure with localtalk-generic values. */ dev->change_mtu = ltalk_change_mtu; - dev->hard_header = NULL; - dev->rebuild_header = NULL; dev->set_mac_address = ltalk_mac_addr; - dev->hard_header_cache = NULL; - dev->header_cache_update= NULL; dev->type = ARPHRD_LOCALTLK; dev->hard_header_len = LTALK_HLEN; diff --git a/net/atm/br2684.c b/net/atm/br2684.c index c0f6861eefe3..c742d37bfb97 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -34,12 +34,6 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hungary */ /* #define FASTER_VERSION */ -#ifdef DEBUG -#define DPRINTK(format, args...) printk(KERN_DEBUG "br2684: " format, ##args) -#else -#define DPRINTK(format, args...) -#endif - #ifdef SKB_DEBUG static void skb_debug(const struct sk_buff *skb) { @@ -180,7 +174,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, skb_debug(skb); ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; - DPRINTK("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); + pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); if (!atm_may_send(atmvcc, skb->truesize)) { /* we free this here for now, because we cannot know in a higher layer whether the skb point it supplied wasn't freed yet. @@ -209,11 +203,11 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev) struct br2684_dev *brdev = BRPRIV(dev); struct br2684_vcc *brvcc; - DPRINTK("br2684_start_xmit, skb->dst=%p\n", skb->dst); + pr_debug("br2684_start_xmit, skb->dst=%p\n", skb->dst); read_lock(&devs_lock); brvcc = pick_outgoing_vcc(skb, brdev); if (brvcc == NULL) { - DPRINTK("no vcc attached to dev %s\n", dev->name); + pr_debug("no vcc attached to dev %s\n", dev->name); brdev->stats.tx_errors++; brdev->stats.tx_carrier_errors++; /* netif_stop_queue(dev); */ @@ -239,7 +233,7 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev) static struct net_device_stats *br2684_get_stats(struct net_device *dev) { - DPRINTK("br2684_get_stats\n"); + pr_debug("br2684_get_stats\n"); return &BRPRIV(dev)->stats; } @@ -390,7 +384,7 @@ packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb) static void br2684_close_vcc(struct br2684_vcc *brvcc) { - DPRINTK("removing VCC %p from dev %p\n", brvcc, brvcc->device); + pr_debug("removing VCC %p from dev %p\n", brvcc, brvcc->device); write_lock_irq(&devs_lock); list_del(&brvcc->brvccs); write_unlock_irq(&devs_lock); @@ -408,7 +402,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) struct br2684_dev *brdev = BRPRIV(net_dev); int plen = sizeof(llc_oui_pid_pad) + ETH_HLEN; - DPRINTK("br2684_push\n"); + pr_debug("br2684_push\n"); if (unlikely(skb == NULL)) { /* skb==NULL means VCC is being destroyed */ @@ -425,7 +419,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) skb_debug(skb); atm_return(atmvcc, skb->truesize); - DPRINTK("skb from brdev %p\n", brdev); + pr_debug("skb from brdev %p\n", brdev); if (brvcc->encaps == e_llc) { /* let us waste some time for checking the encapsulation. Note, that only 7 char is checked so frames with a valid FCS @@ -474,7 +468,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) #endif /* CONFIG_ATM_BR2684_IPFILTER */ skb->dev = net_dev; ATM_SKB(skb)->vcc = atmvcc; /* needed ? */ - DPRINTK("received packet's protocol: %x\n", ntohs(skb->protocol)); + pr_debug("received packet's protocol: %x\n", ntohs(skb->protocol)); skb_debug(skb); if (unlikely(!(net_dev->flags & IFF_UP))) { /* sigh, interface is down */ @@ -532,7 +526,7 @@ Note: we do not have explicit unassign, but look at _push() err = -EINVAL; goto error; } - DPRINTK("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps, + pr_debug("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps, brvcc); if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) { unsigned char *esi = atmvcc->dev->esi; @@ -612,7 +606,7 @@ static int br2684_create(void __user *arg) struct br2684_dev *brdev; struct atm_newif_br2684 ni; - DPRINTK("br2684_create\n"); + pr_debug("br2684_create\n"); if (copy_from_user(&ni, arg, sizeof ni)) { return -EFAULT; @@ -629,7 +623,7 @@ static int br2684_create(void __user *arg) brdev = BRPRIV(netdev); - DPRINTK("registered netdev %s\n", netdev->name); + pr_debug("registered netdev %s\n", netdev->name); /* open, stop, do_ioctl ? */ err = register_netdev(netdev); if (err < 0) { @@ -715,17 +709,13 @@ static int br2684_seq_show(struct seq_file *seq, void *v) br2684_devs); const struct net_device *net_dev = brdev->net_dev; const struct br2684_vcc *brvcc; + DECLARE_MAC_BUF(mac); - seq_printf(seq, "dev %.16s: num=%d, mac=%02X:%02X:" - "%02X:%02X:%02X:%02X (%s)\n", net_dev->name, - brdev->number, - net_dev->dev_addr[0], - net_dev->dev_addr[1], - net_dev->dev_addr[2], - net_dev->dev_addr[3], - net_dev->dev_addr[4], - net_dev->dev_addr[5], - brdev->mac_was_set ? "set" : "auto"); + seq_printf(seq, "dev %.16s: num=%d, mac=%s (%s)\n", + net_dev->name, + brdev->number, + print_mac(mac, net_dev->dev_addr), + brdev->mac_was_set ? "set" : "auto"); list_for_each_entry(brvcc, &brdev->brvccs, brvccs) { seq_printf(seq, " vcc %d.%d.%d: encaps=%s" diff --git a/net/atm/clip.c b/net/atm/clip.c index ecf0f79b94ae..741742f00797 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -40,14 +40,6 @@ #include "resources.h" #include <net/atmclip.h> - -#if 0 -#define DPRINTK(format,args...) printk(format,##args) -#else -#define DPRINTK(format,args...) -#endif - - static struct net_device *clip_devs; static struct atm_vcc *atmarpd; static struct neigh_table clip_tbl; @@ -59,7 +51,7 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) struct atmarp_ctrl *ctrl; struct sk_buff *skb; - DPRINTK("to_atmarpd(%d)\n", type); + pr_debug("to_atmarpd(%d)\n", type); if (!atmarpd) return -EUNATCH; skb = alloc_skb(sizeof(struct atmarp_ctrl),GFP_ATOMIC); @@ -79,7 +71,7 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry) { - DPRINTK("link_vcc %p to entry %p (neigh %p)\n", clip_vcc, entry, + pr_debug("link_vcc %p to entry %p (neigh %p)\n", clip_vcc, entry, entry->neigh); clip_vcc->entry = entry; clip_vcc->xoff = 0; /* @@@ may overrun buffer by one packet */ @@ -134,7 +126,7 @@ static int neigh_check_cb(struct neighbour *n) unsigned long exp = cv->last_use + cv->idle_timeout; if (cv->idle_timeout && time_after(jiffies, exp)) { - DPRINTK("releasing vcc %p->%p of entry %p\n", + pr_debug("releasing vcc %p->%p of entry %p\n", cv, cv->vcc, entry); vcc_release_async(cv->vcc, -ETIMEDOUT); } @@ -146,7 +138,7 @@ static int neigh_check_cb(struct neighbour *n) if (atomic_read(&n->refcnt) > 1) { struct sk_buff *skb; - DPRINTK("destruction postponed with ref %d\n", + pr_debug("destruction postponed with ref %d\n", atomic_read(&n->refcnt)); while ((skb = skb_dequeue(&n->arp_queue)) != NULL) @@ -155,7 +147,7 @@ static int neigh_check_cb(struct neighbour *n) return 0; } - DPRINTK("expired neigh %p\n", n); + pr_debug("expired neigh %p\n", n); return 1; } @@ -171,14 +163,14 @@ static int clip_arp_rcv(struct sk_buff *skb) { struct atm_vcc *vcc; - DPRINTK("clip_arp_rcv\n"); + pr_debug("clip_arp_rcv\n"); vcc = ATM_SKB(skb)->vcc; if (!vcc || !atm_charge(vcc, skb->truesize)) { dev_kfree_skb_any(skb); return 0; } - DPRINTK("pushing to %p\n", vcc); - DPRINTK("using %p\n", CLIP_VCC(vcc)->old_push); + pr_debug("pushing to %p\n", vcc); + pr_debug("using %p\n", CLIP_VCC(vcc)->old_push); CLIP_VCC(vcc)->old_push(vcc, skb); return 0; } @@ -196,9 +188,9 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) { struct clip_vcc *clip_vcc = CLIP_VCC(vcc); - DPRINTK("clip push\n"); + pr_debug("clip push\n"); if (!skb) { - DPRINTK("removing VCC %p\n", clip_vcc); + pr_debug("removing VCC %p\n", clip_vcc); if (clip_vcc->entry) unlink_clip_vcc(clip_vcc); clip_vcc->old_push(vcc, NULL); /* pass on the bad news */ @@ -247,7 +239,7 @@ static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb) int old; unsigned long flags; - DPRINTK("clip_pop(vcc %p)\n", vcc); + pr_debug("clip_pop(vcc %p)\n", vcc); clip_vcc->old_pop(vcc, skb); /* skb->dev == NULL in outbound ARP packets */ if (!dev) @@ -263,7 +255,7 @@ static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb) static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb) { - DPRINTK("clip_neigh_solicit (neigh %p, skb %p)\n", neigh, skb); + pr_debug("clip_neigh_solicit (neigh %p, skb %p)\n", neigh, skb); to_atmarpd(act_need, PRIV(neigh->dev)->number, NEIGH2ENTRY(neigh)->ip); } @@ -292,7 +284,7 @@ static int clip_constructor(struct neighbour *neigh) struct in_device *in_dev; struct neigh_parms *parms; - DPRINTK("clip_constructor (neigh %p, entry %p)\n", neigh, entry); + pr_debug("clip_constructor (neigh %p, entry %p)\n", neigh, entry); neigh->type = inet_addr_type(entry->ip); if (neigh->type != RTN_UNICAST) return -EINVAL; @@ -376,7 +368,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) int old; unsigned long flags; - DPRINTK("clip_start_xmit (skb %p)\n", skb); + pr_debug("clip_start_xmit (skb %p)\n", skb); if (!skb->dst) { printk(KERN_ERR "clip_start_xmit: skb->dst == NULL\n"); dev_kfree_skb(skb); @@ -412,9 +404,9 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) } return 0; } - DPRINTK("neigh %p, vccs %p\n", entry, entry->vccs); + pr_debug("neigh %p, vccs %p\n", entry, entry->vccs); ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc; - DPRINTK("using neighbour %p, vcc %p\n", skb->dst->neighbour, vcc); + pr_debug("using neighbour %p, vcc %p\n", skb->dst->neighbour, vcc); if (entry->vccs->encap) { void *here; @@ -425,7 +417,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = vcc->atm_options; entry->vccs->last_use = jiffies; - DPRINTK("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); + pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); old = xchg(&entry->vccs->xoff, 1); /* assume XOFF ... */ if (old) { printk(KERN_WARNING "clip_start_xmit: XOFF->XOFF transition\n"); @@ -468,7 +460,7 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL); if (!clip_vcc) return -ENOMEM; - DPRINTK("mkip clip_vcc %p vcc %p\n", clip_vcc, vcc); + pr_debug("mkip clip_vcc %p vcc %p\n", clip_vcc, vcc); clip_vcc->vcc = vcc; vcc->user_back = clip_vcc; set_bit(ATM_VF_IS_CLIP, &vcc->flags); @@ -538,7 +530,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) printk(KERN_ERR "hiding hidden ATMARP entry\n"); return 0; } - DPRINTK("setentry: remove\n"); + pr_debug("setentry: remove\n"); unlink_clip_vcc(clip_vcc); return 0; } @@ -552,9 +544,9 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) entry = NEIGH2ENTRY(neigh); if (entry != clip_vcc->entry) { if (!clip_vcc->entry) - DPRINTK("setentry: add\n"); + pr_debug("setentry: add\n"); else { - DPRINTK("setentry: update\n"); + pr_debug("setentry: update\n"); unlink_clip_vcc(clip_vcc); } link_vcc(clip_vcc, entry); @@ -611,7 +603,7 @@ static int clip_create(int number) } clip_priv->next = clip_devs; clip_devs = dev; - DPRINTK("registered (net:%s)\n", dev->name); + pr_debug("registered (net:%s)\n", dev->name); return number; } @@ -620,6 +612,9 @@ static int clip_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = arg; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { neigh_ifdown(&clip_tbl, dev); return NOTIFY_DONE; @@ -631,16 +626,16 @@ static int clip_device_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_UP: - DPRINTK("clip_device_event NETDEV_UP\n"); + pr_debug("clip_device_event NETDEV_UP\n"); to_atmarpd(act_up, PRIV(dev)->number, 0); break; case NETDEV_GOING_DOWN: - DPRINTK("clip_device_event NETDEV_DOWN\n"); + pr_debug("clip_device_event NETDEV_DOWN\n"); to_atmarpd(act_down, PRIV(dev)->number, 0); break; case NETDEV_CHANGE: case NETDEV_CHANGEMTU: - DPRINTK("clip_device_event NETDEV_CHANGE*\n"); + pr_debug("clip_device_event NETDEV_CHANGE*\n"); to_atmarpd(act_change, PRIV(dev)->number, 0); break; } @@ -681,14 +676,14 @@ static struct notifier_block clip_inet_notifier = { static void atmarpd_close(struct atm_vcc *vcc) { - DPRINTK("atmarpd_close\n"); + pr_debug("atmarpd_close\n"); rtnl_lock(); atmarpd = NULL; skb_queue_purge(&sk_atm(vcc)->sk_receive_queue); rtnl_unlock(); - DPRINTK("(done)\n"); + pr_debug("(done)\n"); module_put(THIS_MODULE); } diff --git a/net/atm/common.c b/net/atm/common.c index 282d761454ba..e166d9e0ffd9 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -30,13 +30,6 @@ #include "addr.h" /* address registry */ #include "signaling.h" /* for WAITING and sigd_attach */ - -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - struct hlist_head vcc_hash[VCC_HTABLE_SIZE]; DEFINE_RWLOCK(vcc_sklist_lock); @@ -70,13 +63,13 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc,unsigned int size) struct sock *sk = sk_atm(vcc); if (atomic_read(&sk->sk_wmem_alloc) && !atm_may_send(vcc, size)) { - DPRINTK("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", + pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", atomic_read(&sk->sk_wmem_alloc), size, sk->sk_sndbuf); return NULL; } while (!(skb = alloc_skb(size,GFP_KERNEL))) schedule(); - DPRINTK("AlTx %d += %d\n", atomic_read(&sk->sk_wmem_alloc), + pr_debug("AlTx %d += %d\n", atomic_read(&sk->sk_wmem_alloc), skb->truesize); atomic_add(skb->truesize, &sk->sk_wmem_alloc); return skb; @@ -132,7 +125,7 @@ static struct proto vcc_proto = { .obj_size = sizeof(struct atm_vcc), }; -int vcc_create(struct socket *sock, int protocol, int family) +int vcc_create(struct net *net, struct socket *sock, int protocol, int family) { struct sock *sk; struct atm_vcc *vcc; @@ -140,7 +133,7 @@ int vcc_create(struct socket *sock, int protocol, int family) sock->sk = NULL; if (sock->type == SOCK_STREAM) return -EINVAL; - sk = sk_alloc(family, GFP_KERNEL, &vcc_proto, 1); + sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, 1); if (!sk) return -ENOMEM; sock_init_data(sock, sk); @@ -392,10 +385,10 @@ static int __vcc_connect(struct atm_vcc *vcc, struct atm_dev *dev, short vpi, if (!error) error = adjust_tp(&vcc->qos.rxtp,vcc->qos.aal); if (error) goto fail; - DPRINTK("VCC %d.%d, AAL %d\n",vpi,vci,vcc->qos.aal); - DPRINTK(" TX: %d, PCR %d..%d, SDU %d\n",vcc->qos.txtp.traffic_class, + pr_debug("VCC %d.%d, AAL %d\n",vpi,vci,vcc->qos.aal); + pr_debug(" TX: %d, PCR %d..%d, SDU %d\n",vcc->qos.txtp.traffic_class, vcc->qos.txtp.min_pcr,vcc->qos.txtp.max_pcr,vcc->qos.txtp.max_sdu); - DPRINTK(" RX: %d, PCR %d..%d, SDU %d\n",vcc->qos.rxtp.traffic_class, + pr_debug(" RX: %d, PCR %d..%d, SDU %d\n",vcc->qos.rxtp.traffic_class, vcc->qos.rxtp.min_pcr,vcc->qos.rxtp.max_pcr,vcc->qos.rxtp.max_sdu); if (dev->ops->open) { @@ -420,7 +413,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci) struct atm_vcc *vcc = ATM_SD(sock); int error; - DPRINTK("vcc_connect (vpi %d, vci %d)\n",vpi,vci); + pr_debug("vcc_connect (vpi %d, vci %d)\n",vpi,vci); if (sock->state == SS_CONNECTED) return -EISCONN; if (sock->state != SS_UNCONNECTED) @@ -433,7 +426,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci) else if (test_bit(ATM_VF_PARTIAL,&vcc->flags)) return -EINVAL; - DPRINTK("vcc_connect (TX: cl %d,bw %d-%d,sdu %d; " + pr_debug("vcc_connect (TX: cl %d,bw %d-%d,sdu %d; " "RX: cl %d,bw %d-%d,sdu %d,AAL %s%d)\n", vcc->qos.txtp.traffic_class,vcc->qos.txtp.min_pcr, vcc->qos.txtp.max_pcr,vcc->qos.txtp.max_sdu, @@ -504,7 +497,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, if (error) return error; sock_recv_timestamp(msg, sk, skb); - DPRINTK("RcvM %d -= %d\n", atomic_read(&sk->rmem_alloc), skb->truesize); + pr_debug("RcvM %d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize); atm_return(vcc, skb->truesize); skb_free_datagram(sk, skb); return copied; diff --git a/net/atm/common.h b/net/atm/common.h index ad78c9e1117d..16f32c1fa1c9 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -10,7 +10,7 @@ #include <linux/poll.h> /* for poll_table */ -int vcc_create(struct socket *sock, int protocol, int family); +int vcc_create(struct net *net, struct socket *sock, int protocol, int family); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, diff --git a/net/atm/lec.c b/net/atm/lec.c index 59d5aa3366f2..7eb1b21a0e94 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -48,12 +48,6 @@ static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 }; #include "lec_arpc.h" #include "resources.h" -#if 0 -#define DPRINTK printk -#else -#define DPRINTK(format,args...) -#endif - #define DUMP_PACKETS 0 /* * 0 = None, * 1 = 30 first bytes @@ -272,8 +266,9 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) char buf[300]; int i = 0; #endif /* DUMP_PACKETS >0 */ + DECLARE_MAC_BUF(mac); - DPRINTK("lec_start_xmit called\n"); + pr_debug("lec_start_xmit called\n"); if (!priv->lecd) { printk("%s:No lecd attached\n", dev->name); priv->stats.tx_errors++; @@ -281,7 +276,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) return -EUNATCH; } - DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n", + pr_debug("skbuff head:%lx data:%lx tail:%lx end:%lx\n", (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb), (long)skb_end_pointer(skb)); #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) @@ -292,7 +287,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Make sure we have room for lec_id */ if (skb_headroom(skb) < 2) { - DPRINTK("lec_start_xmit: reallocating skb\n"); + pr_debug("lec_start_xmit: reallocating skb\n"); skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); kfree_skb(skb); if (skb2 == NULL) @@ -373,25 +368,21 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) #endif entry = NULL; vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry); - DPRINTK("%s:vcc:%p vcc_flags:%x, entry:%p\n", dev->name, + pr_debug("%s:vcc:%p vcc_flags:%lx, entry:%p\n", dev->name, vcc, vcc ? vcc->flags : 0, entry); if (!vcc || !test_bit(ATM_VF_READY, &vcc->flags)) { if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) { - DPRINTK("%s:lec_start_xmit: queuing packet, ", + pr_debug("%s:lec_start_xmit: queuing packet, ", dev->name); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - lec_h->h_dest[0], lec_h->h_dest[1], - lec_h->h_dest[2], lec_h->h_dest[3], - lec_h->h_dest[4], lec_h->h_dest[5]); + pr_debug("MAC address %s\n", + print_mac(mac, lec_h->h_dest)); skb_queue_tail(&entry->tx_wait, skb); } else { - DPRINTK + pr_debug ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", dev->name); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - lec_h->h_dest[0], lec_h->h_dest[1], - lec_h->h_dest[2], lec_h->h_dest[3], - lec_h->h_dest[4], lec_h->h_dest[5]); + pr_debug("MAC address %s\n", + print_mac(mac, lec_h->h_dest)); priv->stats.tx_dropped++; dev_kfree_skb(skb); } @@ -402,10 +393,9 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) #endif /* DUMP_PACKETS > 0 */ while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { - DPRINTK("lec.c: emptying tx queue, "); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], - lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); + pr_debug("lec.c: emptying tx queue, "); + pr_debug("MAC address %s\n", + print_mac(mac, lec_h->h_dest)); lec_send(vcc, skb2, priv); } @@ -459,12 +449,13 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) struct lec_arp_table *entry; int i; char *tmp; /* FIXME */ + DECLARE_MAC_BUF(mac); atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); mesg = (struct atmlec_msg *)skb->data; tmp = skb->data; tmp += sizeof(struct atmlec_msg); - DPRINTK("%s: msg from zeppelin:%d\n", dev->name, mesg->type); + pr_debug("%s: msg from zeppelin:%d\n", dev->name, mesg->type); switch (mesg->type) { case l_set_mac_addr: for (i = 0; i < 6; i++) { @@ -500,9 +491,9 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) mesg->content.normal.atm_addr, mesg->content.normal.flag, mesg->content.normal.targetless_le_arp); - DPRINTK("lec: in l_arp_update\n"); + pr_debug("lec: in l_arp_update\n"); if (mesg->sizeoftlvs != 0) { /* LANE2 3.1.5 */ - DPRINTK("lec: LANE2 3.1.5, got tlvs, size %d\n", + pr_debug("lec: LANE2 3.1.5, got tlvs, size %d\n", mesg->sizeoftlvs); lane2_associate_ind(dev, mesg->content.normal.mac_addr, tmp, mesg->sizeoftlvs); @@ -544,14 +535,10 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) { struct net_bridge_fdb_entry *f; - DPRINTK - ("%s: bridge zeppelin asks about 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - dev->name, mesg->content.proxy.mac_addr[0], - mesg->content.proxy.mac_addr[1], - mesg->content.proxy.mac_addr[2], - mesg->content.proxy.mac_addr[3], - mesg->content.proxy.mac_addr[4], - mesg->content.proxy.mac_addr[5]); + pr_debug + ("%s: bridge zeppelin asks about %s\n", + dev->name, + print_mac(mac, mesg->content.proxy.mac_addr)); if (br_fdb_get_hook == NULL || dev->br_port == NULL) break; @@ -564,7 +551,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) struct sk_buff *skb2; struct sock *sk; - DPRINTK + pr_debug ("%s: entry found, responding to zeppelin\n", dev->name); skb2 = @@ -670,7 +657,7 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, sk->sk_data_ready(sk, skb->len); if (data != NULL) { - DPRINTK("lec: about to send %d bytes of data\n", data->len); + pr_debug("lec: about to send %d bytes of data\n", data->len); atm_force_charge(priv->lecd, data->truesize); skb_queue_tail(&sk->sk_receive_queue, data); sk->sk_data_ready(sk, skb->len); @@ -742,7 +729,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) vcc->vpi, vcc->vci); #endif if (!skb) { - DPRINTK("%s: null skb\n", dev->name); + pr_debug("%s: null skb\n", dev->name); lec_vcc_close(priv, vcc); return; } @@ -766,7 +753,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) if (memcmp(skb->data, lec_ctrl_magic, 4) == 0) { /* Control frame, to daemon */ struct sock *sk = sk_atm(vcc); - DPRINTK("%s: To daemon\n", dev->name); + pr_debug("%s: To daemon\n", dev->name); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, skb->len); } else { /* Data frame, queue to protocol handlers */ @@ -780,7 +767,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) * Probably looping back, or if lecd is missing, * lecd has gone down */ - DPRINTK("Ignoring frame...\n"); + pr_debug("Ignoring frame...\n"); dev_kfree_skb(skb); return; } @@ -1442,9 +1429,9 @@ static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr, #include <net/route.h> #if 0 -#define DPRINTK(format,args...) +#define pr_debug(format,args...) /* -#define DPRINTK printk +#define pr_debug printk */ #endif #define DEBUG_ARP_TABLE 0 @@ -1513,7 +1500,7 @@ lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry) tmp = &priv->lec_arp_tables[HASH(entry->mac_addr[ETH_ALEN - 1])]; hlist_add_head(&entry->next, tmp); - DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + pr_debug("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", 0xff & entry->mac_addr[0], 0xff & entry->mac_addr[1], 0xff & entry->mac_addr[2], 0xff & entry->mac_addr[3], 0xff & entry->mac_addr[4], 0xff & entry->mac_addr[5]); @@ -1555,7 +1542,7 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) } skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */ - DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + pr_debug("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", 0xff & to_remove->mac_addr[0], 0xff & to_remove->mac_addr[1], 0xff & to_remove->mac_addr[2], 0xff & to_remove->mac_addr[3], 0xff & to_remove->mac_addr[4], 0xff & to_remove->mac_addr[5]); @@ -1777,7 +1764,7 @@ static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, struct hlist_head *head; struct lec_arp_table *entry; - DPRINTK("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + pr_debug("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", mac_addr[0] & 0xff, mac_addr[1] & 0xff, mac_addr[2] & 0xff, mac_addr[3] & 0xff, mac_addr[4] & 0xff, mac_addr[5] & 0xff); @@ -1819,7 +1806,7 @@ static void lec_arp_expire_arp(unsigned long data) entry = (struct lec_arp_table *)data; - DPRINTK("lec_arp_expire_arp\n"); + pr_debug("lec_arp_expire_arp\n"); if (entry->status == ESI_ARP_PENDING) { if (entry->no_tries <= entry->priv->max_retry_count) { if (entry->is_rdesc) @@ -1843,7 +1830,7 @@ static void lec_arp_expire_vcc(unsigned long data) del_timer(&to_remove->timer); - DPRINTK("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n", + pr_debug("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n", to_remove, priv, to_remove->vcc ? to_remove->recv_vcc->vpi : 0, to_remove->vcc ? to_remove->recv_vcc->vci : 0); @@ -1883,7 +1870,7 @@ static void lec_arp_check_expire(struct work_struct *work) unsigned long time_to_check; int i; - DPRINTK("lec_arp_check_expire %p\n", priv); + pr_debug("lec_arp_check_expire %p\n", priv); now = jiffies; restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); @@ -1895,13 +1882,13 @@ restart: else time_to_check = priv->aging_time; - DPRINTK("About to expire: %lx - %lx > %lx\n", + pr_debug("About to expire: %lx - %lx > %lx\n", now, entry->last_used, time_to_check); if (time_after(now, entry->last_used + time_to_check) && !(entry->flags & LEC_PERMANENT_FLAG) && !(entry->mac_addr[0] & 0x01)) { /* LANE2: 7.1.20 */ /* Remove entry */ - DPRINTK("LEC:Entry timed out\n"); + pr_debug("LEC:Entry timed out\n"); lec_arp_remove(priv, entry); lec_arp_put(entry); } else { @@ -1999,7 +1986,7 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, entry->packets_flooded < priv->maximum_unknown_frame_count) { entry->packets_flooded++; - DPRINTK("LEC_ARP: Flooding..\n"); + pr_debug("LEC_ARP: Flooding..\n"); found = priv->mcast_vcc; goto out; } @@ -2010,13 +1997,13 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, */ lec_arp_hold(entry); *ret_entry = entry; - DPRINTK("lec: entry->status %d entry->vcc %p\n", entry->status, + pr_debug("lec: entry->status %d entry->vcc %p\n", entry->status, entry->vcc); found = NULL; } else { /* No matching entry was found */ entry = make_entry(priv, mac_to_find); - DPRINTK("LEC_ARP: Making entry\n"); + pr_debug("LEC_ARP: Making entry\n"); if (!entry) { found = priv->mcast_vcc; goto out; @@ -2053,7 +2040,7 @@ lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, struct lec_arp_table *entry; int i; - DPRINTK("lec_addr_delete\n"); + pr_debug("lec_addr_delete\n"); spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) { @@ -2084,8 +2071,8 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, struct lec_arp_table *entry, *tmp; int i; - DPRINTK("lec:%s", (targetless_le_arp) ? "targetless " : " "); - DPRINTK("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", + pr_debug("lec:%s", (targetless_le_arp) ? "targetless " : " "); + pr_debug("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3], mac_addr[4], mac_addr[5]); @@ -2122,7 +2109,7 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, entry->flags |= LEC_REMOTE_FLAG; else entry->flags &= ~LEC_REMOTE_FLAG; - DPRINTK("After update\n"); + pr_debug("After update\n"); dump_arp_table(priv); goto out; } @@ -2166,7 +2153,7 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, entry->status = ESI_VC_PENDING; send_to_lecd(priv, l_svc_setup, entry->mac_addr, atm_addr, NULL); } - DPRINTK("After update2\n"); + pr_debug("After update2\n"); dump_arp_table(priv); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); @@ -2189,7 +2176,7 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, if (ioc_data->receive == 2) { /* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */ - DPRINTK("LEC_ARP: Attaching mcast forward\n"); + pr_debug("LEC_ARP: Attaching mcast forward\n"); #if 0 entry = lec_arp_find(priv, bus_mac); if (!entry) { @@ -2214,7 +2201,7 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, * Vcc which we don't want to make default vcc, * attach it anyway. */ - DPRINTK + pr_debug ("LEC_ARP:Attaching data direct, not default: " "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", ioc_data->atm_addr[0], ioc_data->atm_addr[1], @@ -2242,7 +2229,7 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, dump_arp_table(priv); goto out; } - DPRINTK + pr_debug ("LEC_ARP:Attaching data direct, default: " "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", ioc_data->atm_addr[0], ioc_data->atm_addr[1], @@ -2260,8 +2247,8 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, if (memcmp (ioc_data->atm_addr, entry->atm_addr, ATM_ESA_LEN) == 0) { - DPRINTK("LEC_ARP: Attaching data direct\n"); - DPRINTK("Currently -> Vcc: %d, Rvcc:%d\n", + pr_debug("LEC_ARP: Attaching data direct\n"); + pr_debug("Currently -> Vcc: %d, Rvcc:%d\n", entry->vcc ? entry->vcc->vci : 0, entry->recv_vcc ? entry->recv_vcc-> vci : 0); @@ -2303,7 +2290,7 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, } } if (found_entry) { - DPRINTK("After vcc was added\n"); + pr_debug("After vcc was added\n"); dump_arp_table(priv); goto out; } @@ -2323,7 +2310,7 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, entry->timer.expires = jiffies + priv->vcc_timeout_period; entry->timer.function = lec_arp_expire_vcc; add_timer(&entry->timer); - DPRINTK("After vcc was added\n"); + pr_debug("After vcc was added\n"); dump_arp_table(priv); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); @@ -2336,7 +2323,7 @@ static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) struct lec_arp_table *entry; int i; - DPRINTK("LEC:lec_flush_complete %lx\n", tran_id); + pr_debug("LEC:lec_flush_complete %lx\n", tran_id); restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { @@ -2353,7 +2340,7 @@ restart: entry->last_used = jiffies; entry->status = ESI_FORWARD_DIRECT; lec_arp_put(entry); - DPRINTK("LEC_ARP: Flushed\n"); + pr_debug("LEC_ARP: Flushed\n"); goto restart; } } @@ -2376,7 +2363,7 @@ lec_set_flush_tran_id(struct lec_priv *priv, hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) { if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) { entry->flush_tran_id = tran_id; - DPRINTK("Set flush transaction id to %lx for %p\n", + pr_debug("Set flush transaction id to %lx for %p\n", tran_id, entry); } } @@ -2427,7 +2414,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) struct lec_arp_table *entry; int i; - DPRINTK("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n", vcc->vpi, vcc->vci); + pr_debug("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n", vcc->vpi, vcc->vci); dump_arp_table(priv); spin_lock_irqsave(&priv->lec_arp_lock, flags); @@ -2510,7 +2497,7 @@ lec_arp_check_empties(struct lec_priv *priv, goto out; } } - DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n"); + pr_debug("LEC_ARP: Arp_check_empties: entry not found!\n"); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 7c85aa551d5e..2086396de177 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -244,7 +244,7 @@ static struct net_device *find_lec_by_itfnum(int itf) char name[IFNAMSIZ]; sprintf(name, "lec%d", itf); - dev = dev_get_by_name(name); + dev = dev_get_by_name(&init_net, name); return dev; } @@ -956,6 +956,10 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo struct lec_priv *priv; dev = (struct net_device *)dev_ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (dev->name == NULL || strncmp(dev->name, "lec", 3)) return NOTIFY_DONE; /* we are only interested in lec:s */ diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 19d5dfc0702f..0af84cd4f65b 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -46,13 +46,6 @@ #include "common.h" -#if 0 -#define DPRINTK(format, args...) \ - printk(KERN_DEBUG "pppoatm: " format, ##args) -#else -#define DPRINTK(format, args...) -#endif - enum pppoatm_encaps { e_autodetect = PPPOATM_ENCAPS_AUTODETECT, e_vc = PPPOATM_ENCAPS_VC, @@ -139,9 +132,9 @@ static void pppoatm_unassign_vcc(struct atm_vcc *atmvcc) static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb) { struct pppoatm_vcc *pvcc = atmvcc_to_pvcc(atmvcc); - DPRINTK("pppoatm push\n"); + pr_debug("pppoatm push\n"); if (skb == NULL) { /* VCC was closed */ - DPRINTK("removing ATMPPP VCC %p\n", pvcc); + pr_debug("removing ATMPPP VCC %p\n", pvcc); pppoatm_unassign_vcc(atmvcc); atmvcc->push(atmvcc, NULL); /* Pass along bad news */ return; @@ -172,9 +165,8 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb) pvcc->chan.mtu += LLC_LEN; break; } - DPRINTK("(unit %d): Couldn't autodetect yet " + pr_debug("Couldn't autodetect yet " "(skb: %02X %02X %02X %02X %02X %02X)\n", - pvcc->chan.unit, skb->data[0], skb->data[1], skb->data[2], skb->data[3], skb->data[4], skb->data[5]); goto error; @@ -202,8 +194,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb) { struct pppoatm_vcc *pvcc = chan_to_pvcc(chan); ATM_SKB(skb)->vcc = pvcc->atmvcc; - DPRINTK("(unit %d): pppoatm_send (skb=0x%p, vcc=0x%p)\n", - pvcc->chan.unit, skb, pvcc->atmvcc); + pr_debug("pppoatm_send (skb=0x%p, vcc=0x%p)\n", skb, pvcc->atmvcc); if (skb->data[0] == '\0' && (pvcc->flags & SC_COMP_PROT)) (void) skb_pull(skb, 1); switch (pvcc->encaps) { /* LLC encapsulation needed */ @@ -228,16 +219,14 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb) goto nospace; break; case e_autodetect: - DPRINTK("(unit %d): Trying to send without setting encaps!\n", - pvcc->chan.unit); + pr_debug("Trying to send without setting encaps!\n"); kfree_skb(skb); return 1; } atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options; - DPRINTK("(unit %d): atm_skb(%p)->vcc(%p)->dev(%p)\n", - pvcc->chan.unit, skb, ATM_SKB(skb)->vcc, + pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev); return ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb) ? DROP_PACKET : 1; diff --git a/net/atm/proc.c b/net/atm/proc.c index 99fc1fe950ee..5d9d5ffba145 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -22,6 +22,7 @@ #include <linux/netdevice.h> #include <linux/atmclip.h> #include <linux/init.h> /* for __init */ +#include <net/net_namespace.h> #include <net/atmclip.h> #include <asm/uaccess.h> #include <asm/atomic.h> @@ -175,7 +176,7 @@ static void pvc_info(struct seq_file *seq, struct atm_vcc *vcc) seq_printf(seq, "%3d %3d %5d %-3s %7d %-5s %7d %-6s", vcc->dev->number,vcc->vpi,vcc->vci, - vcc->qos.aal >= sizeof(aal_name)/sizeof(aal_name[0]) ? "err" : + vcc->qos.aal >= ARRAY_SIZE(aal_name) ? "err" : aal_name[vcc->qos.aal],vcc->qos.rxtp.min_pcr, class_name[vcc->qos.rxtp.traffic_class],vcc->qos.txtp.min_pcr, class_name[vcc->qos.txtp.traffic_class]); @@ -475,7 +476,7 @@ static void atm_proc_dirs_remove(void) if (e->dirent) remove_proc_entry(e->name, atm_proc_root); } - remove_proc_entry("net/atm", NULL); + remove_proc_entry("atm", init_net.proc_net); } int __init atm_proc_init(void) @@ -483,7 +484,7 @@ int __init atm_proc_init(void) static struct atm_proc_entry *e; int ret; - atm_proc_root = proc_mkdir("net/atm",NULL); + atm_proc_root = proc_mkdir("atm", init_net.proc_net); if (!atm_proc_root) goto err_out; for (e = atm_proc_ents; e->name; e++) { diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 848e6e191cc7..43e8bf5ed001 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -124,10 +124,13 @@ static const struct proto_ops pvc_proto_ops = { }; -static int pvc_create(struct socket *sock,int protocol) +static int pvc_create(struct net *net, struct socket *sock,int protocol) { + if (net != &init_net) + return -EAFNOSUPPORT; + sock->ops = &pvc_proto_ops; - return vcc_create(sock, protocol, PF_ATMPVC); + return vcc_create(net, sock, protocol, PF_ATMPVC); } diff --git a/net/atm/raw.c b/net/atm/raw.c index 1378f61c5c31..b0a2d8cb6744 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -13,14 +13,6 @@ #include "common.h" #include "protocols.h" - -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - - /* * SKB == NULL indicates that the link is being closed */ @@ -40,8 +32,8 @@ static void atm_pop_raw(struct atm_vcc *vcc,struct sk_buff *skb) { struct sock *sk = sk_atm(vcc); - DPRINTK("APopR (%d) %d -= %d\n", vcc->vci, sk->sk_wmem_alloc, - skb->truesize); + pr_debug("APopR (%d) %d -= %d\n", vcc->vci, + atomic_read(&sk->sk_wmem_alloc), skb->truesize); atomic_sub(skb->truesize, &sk->sk_wmem_alloc); dev_kfree_skb_any(skb); sk->sk_write_space(sk); diff --git a/net/atm/signaling.c b/net/atm/signaling.c index d14baaf1f4c3..229921400522 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -23,13 +23,6 @@ Danger: may cause nasty hangs if the demon crashes. */ -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - - struct atm_vcc *sigd = NULL; #ifdef WAIT_FOR_DEMON static DECLARE_WAIT_QUEUE_HEAD(sigd_sleep); @@ -44,14 +37,14 @@ static void sigd_put_skb(struct sk_buff *skb) add_wait_queue(&sigd_sleep,&wait); while (!sigd) { set_current_state(TASK_UNINTERRUPTIBLE); - DPRINTK("atmsvc: waiting for signaling demon...\n"); + pr_debug("atmsvc: waiting for signaling demon...\n"); schedule(); } current->state = TASK_RUNNING; remove_wait_queue(&sigd_sleep,&wait); #else if (!sigd) { - DPRINTK("atmsvc: no signaling demon\n"); + pr_debug("atmsvc: no signaling demon\n"); kfree_skb(skb); return; } @@ -96,9 +89,9 @@ static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb) msg = (struct atmsvc_msg *) skb->data; atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); - DPRINTK("sigd_send %d (0x%lx)\n",(int) msg->type, - (unsigned long) msg->vcc); vcc = *(struct atm_vcc **) &msg->vcc; + pr_debug("sigd_send %d (0x%lx)\n",(int) msg->type, + (unsigned long) vcc); sk = sk_atm(vcc); switch (msg->type) { @@ -130,7 +123,7 @@ static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb) case as_indicate: vcc = *(struct atm_vcc **) &msg->listen_vcc; sk = sk_atm(vcc); - DPRINTK("as_indicate!!!\n"); + pr_debug("as_indicate!!!\n"); lock_sock(sk); if (sk_acceptq_is_full(sk)) { sigd_enq(NULL,as_reject,vcc,NULL,NULL); @@ -139,7 +132,7 @@ static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb) } sk->sk_ack_backlog++; skb_queue_tail(&sk->sk_receive_queue, skb); - DPRINTK("waking sk->sk_sleep 0x%p\n", sk->sk_sleep); + pr_debug("waking sk->sk_sleep 0x%p\n", sk->sk_sleep); sk->sk_state_change(sk); as_indicate_complete: release_sock(sk); @@ -176,7 +169,7 @@ void sigd_enq2(struct atm_vcc *vcc,enum atmsvc_msg_type type, struct atmsvc_msg *msg; static unsigned session = 0; - DPRINTK("sigd_enq %d (0x%p)\n",(int) type,vcc); + pr_debug("sigd_enq %d (0x%p)\n",(int) type,vcc); while (!(skb = alloc_skb(sizeof(struct atmsvc_msg),GFP_KERNEL))) schedule(); msg = (struct atmsvc_msg *) skb_put(skb,sizeof(struct atmsvc_msg)); @@ -226,7 +219,7 @@ static void sigd_close(struct atm_vcc *vcc) struct sock *s; int i; - DPRINTK("sigd_close\n"); + pr_debug("sigd_close\n"); sigd = NULL; if (skb_peek(&sk_atm(vcc)->sk_receive_queue)) printk(KERN_ERR "sigd_close: closing with requests pending\n"); @@ -237,7 +230,7 @@ static void sigd_close(struct atm_vcc *vcc) struct hlist_head *head = &vcc_hash[i]; sk_for_each(s, node, head) { - struct atm_vcc *vcc = atm_sk(s); + vcc = atm_sk(s); purge_vcc(vcc); } @@ -263,7 +256,7 @@ static struct atm_dev sigd_dev = { int sigd_attach(struct atm_vcc *vcc) { if (sigd) return -EADDRINUSE; - DPRINTK("sigd_attach\n"); + pr_debug("sigd_attach\n"); sigd = vcc; vcc->dev = &sigd_dev; vcc_insert_socket(sk_atm(vcc)); diff --git a/net/atm/svc.c b/net/atm/svc.c index 876ec7b47a2f..daf9a48a7db0 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -25,16 +25,7 @@ #include "signaling.h" #include "addr.h" - -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - - -static int svc_create(struct socket *sock,int protocol); - +static int svc_create(struct net *net, struct socket *sock,int protocol); /* * Note: since all this is still nicely synchronized with the signaling demon, @@ -55,7 +46,7 @@ static void svc_disconnect(struct atm_vcc *vcc) struct sk_buff *skb; struct sock *sk = sk_atm(vcc); - DPRINTK("svc_disconnect %p\n",vcc); + pr_debug("svc_disconnect %p\n",vcc); if (test_bit(ATM_VF_REGIS,&vcc->flags)) { prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc,as_close,NULL,NULL,NULL); @@ -69,7 +60,7 @@ static void svc_disconnect(struct atm_vcc *vcc) as_indicate has been answered */ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { atm_return(vcc, skb->truesize); - DPRINTK("LISTEN REL\n"); + pr_debug("LISTEN REL\n"); sigd_enq2(NULL,as_reject,vcc,NULL,NULL,&vcc->qos,0); dev_kfree_skb(skb); } @@ -85,7 +76,7 @@ static int svc_release(struct socket *sock) if (sk) { vcc = ATM_SD(sock); - DPRINTK("svc_release %p\n", vcc); + pr_debug("svc_release %p\n", vcc); clear_bit(ATM_VF_READY, &vcc->flags); /* VCC pointer is used as a reference, so we must not free it (thereby subjecting it to re-use) before all pending connections @@ -162,7 +153,7 @@ static int svc_connect(struct socket *sock,struct sockaddr *sockaddr, struct atm_vcc *vcc = ATM_SD(sock); int error; - DPRINTK("svc_connect %p\n",vcc); + pr_debug("svc_connect %p\n",vcc); lock_sock(sk); if (sockaddr_len != sizeof(struct sockaddr_atmsvc)) { error = -EINVAL; @@ -224,7 +215,7 @@ static int svc_connect(struct socket *sock,struct sockaddr *sockaddr, prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); continue; } - DPRINTK("*ABORT*\n"); + pr_debug("*ABORT*\n"); /* * This is tricky: * Kernel ---close--> Demon @@ -295,7 +286,7 @@ static int svc_listen(struct socket *sock,int backlog) struct atm_vcc *vcc = ATM_SD(sock); int error; - DPRINTK("svc_listen %p\n",vcc); + pr_debug("svc_listen %p\n",vcc); lock_sock(sk); /* let server handle listen on unbound sockets */ if (test_bit(ATM_VF_SESSION,&vcc->flags)) { @@ -335,13 +326,13 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags) lock_sock(sk); - error = svc_create(newsock,0); + error = svc_create(sk->sk_net, newsock,0); if (error) goto out; new_vcc = ATM_SD(newsock); - DPRINTK("svc_accept %p -> %p\n",old_vcc,new_vcc); + pr_debug("svc_accept %p -> %p\n",old_vcc,new_vcc); while (1) { DEFINE_WAIT(wait); @@ -545,7 +536,7 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr, error = -EINPROGRESS; goto out; } - DPRINTK("svc_addparty added wait queue\n"); + pr_debug("svc_addparty added wait queue\n"); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); @@ -636,12 +627,15 @@ static const struct proto_ops svc_proto_ops = { }; -static int svc_create(struct socket *sock,int protocol) +static int svc_create(struct net *net, struct socket *sock,int protocol) { int error; + if (net != &init_net) + return -EAFNOSUPPORT; + sock->ops = &svc_proto_ops; - error = vcc_create(sock, protocol, AF_ATMSVC); + error = vcc_create(net, sock, protocol, AF_ATMSVC); if (error) return error; ATM_SD(sock)->local.sas_family = AF_ATMSVC; ATM_SD(sock)->remote.sas_family = AF_ATMSVC; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index dae2a42d3d86..993e5c75e909 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -44,6 +44,7 @@ #include <linux/sysctl.h> #include <linux/init.h> #include <linux/spinlock.h> +#include <net/net_namespace.h> #include <net/tcp_states.h> #include <net/ip.h> #include <net/arp.h> @@ -103,6 +104,9 @@ static int ax25_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Reject non AX.25 devices */ if (dev->type != ARPHRD_AX25) return NOTIFY_DONE; @@ -627,7 +631,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } - dev = dev_get_by_name(devname); + dev = dev_get_by_name(&init_net, devname); if (dev == NULL) { res = -ENODEV; break; @@ -779,11 +783,14 @@ static struct proto ax25_proto = { .obj_size = sizeof(struct sock), }; -static int ax25_create(struct socket *sock, int protocol) +static int ax25_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; ax25_cb *ax25; + if (net != &init_net) + return -EAFNOSUPPORT; + switch (sock->type) { case SOCK_DGRAM: if (protocol == 0 || protocol == PF_AX25) @@ -829,7 +836,7 @@ static int ax25_create(struct socket *sock, int protocol) return -ESOCKTNOSUPPORT; } - if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) return -ENOMEM; ax25 = sk->sk_protinfo = ax25_create_cb(); @@ -854,7 +861,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) struct sock *sk; ax25_cb *ax25, *oax25; - if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) return NULL; if ((ax25 = ax25_create_cb()) == NULL) { @@ -1998,9 +2005,9 @@ static int __init ax25_init(void) register_netdevice_notifier(&ax25_dev_notifier); ax25_register_sysctl(); - proc_net_fops_create("ax25_route", S_IRUGO, &ax25_route_fops); - proc_net_fops_create("ax25", S_IRUGO, &ax25_info_fops); - proc_net_fops_create("ax25_calls", S_IRUGO, &ax25_uid_fops); + proc_net_fops_create(&init_net, "ax25_route", S_IRUGO, &ax25_route_fops); + proc_net_fops_create(&init_net, "ax25", S_IRUGO, &ax25_info_fops); + proc_net_fops_create(&init_net, "ax25_calls", S_IRUGO, &ax25_uid_fops); out: return rc; } @@ -2014,9 +2021,9 @@ MODULE_ALIAS_NETPROTO(PF_AX25); static void __exit ax25_exit(void) { - proc_net_remove("ax25_route"); - proc_net_remove("ax25"); - proc_net_remove("ax25_calls"); + proc_net_remove(&init_net, "ax25_route"); + proc_net_remove(&init_net, "ax25"); + proc_net_remove(&init_net, "ax25_calls"); ax25_rt_free(); ax25_uid_free(); ax25_dev_free(); diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 0ddaff0df217..3b7d1720c2ee 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -451,6 +451,11 @@ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, skb->sk = NULL; /* Initially we don't know who it's for */ skb->destructor = NULL; /* Who initializes this, dammit?! */ + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if ((*skb->data & 0x0F) != 0) { kfree_skb(skb); /* Not a KISS data frame */ return 0; diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index 930e4918037f..f047a57aa95c 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -46,7 +46,9 @@ #ifdef CONFIG_INET -int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) +int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned len) { unsigned char *buff; @@ -215,7 +217,9 @@ put: #else /* INET */ -int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) +int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned len) { return -AX25_HEADER_LEN; } @@ -227,5 +231,12 @@ int ax25_rebuild_header(struct sk_buff *skb) #endif +const struct header_ops ax25_header_ops = { + .create = ax25_hard_header, + .rebuild = ax25_rebuild_header, +}; + EXPORT_SYMBOL(ax25_hard_header); EXPORT_SYMBOL(ax25_rebuild_header); +EXPORT_SYMBOL(ax25_header_ops); + diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index d942b946ba07..1220d8a41eb5 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -95,10 +95,13 @@ int bt_sock_unregister(int proto) } EXPORT_SYMBOL(bt_sock_unregister); -static int bt_sock_create(struct socket *sock, int proto) +static int bt_sock_create(struct net *net, struct socket *sock, int proto) { int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; @@ -113,7 +116,7 @@ static int bt_sock_create(struct socket *sock, int proto) read_lock(&bt_proto_lock); if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { - err = bt_proto[proto]->create(sock, proto); + err = bt_proto[proto]->create(net, sock, proto); module_put(bt_proto[proto]->owner); } diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 10292e776046..f718965f296c 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -204,7 +204,7 @@ static struct proto bnep_proto = { .obj_size = sizeof(struct bt_sock) }; -static int bnep_sock_create(struct socket *sock, int protocol) +static int bnep_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -213,7 +213,7 @@ static int bnep_sock_create(struct socket *sock, int protocol) if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 19be7861e51e..cf700c20d11e 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -195,7 +195,7 @@ static struct proto cmtp_proto = { .obj_size = sizeof(struct bt_sock) }; -static int cmtp_sock_create(struct socket *sock, int protocol) +static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -204,7 +204,7 @@ static int cmtp_sock_create(struct socket *sock, int protocol) if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 5ccea5fbd236..43dd6373bff9 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -634,7 +634,7 @@ static struct proto hci_sk_proto = { .obj_size = sizeof(struct hci_pinfo) }; -static int hci_sock_create(struct socket *sock, int protocol) +static int hci_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -645,7 +645,7 @@ static int hci_sock_create(struct socket *sock, int protocol) sock->ops = &hci_sock_ops; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 64d89ca28847..ff5784b440d7 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -625,7 +625,7 @@ static struct device *hidp_get_device(struct hidp_session *session) return conn ? &conn->dev : NULL; } -static inline void hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req) +static inline int hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req) { struct input_dev *input = session->input; int i; @@ -667,7 +667,7 @@ static inline void hidp_setup_input(struct hidp_session *session, struct hidp_co input->event = hidp_input_event; - input_register_device(input); + return input_register_device(input); } static int hidp_open(struct hid_device *hid) @@ -820,8 +820,11 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); session->idle_to = req->idle_to; - if (session->input) - hidp_setup_input(session, req); + if (session->input) { + err = hidp_setup_input(session, req); + if (err < 0) + goto failed; + } if (session->hid) hidp_setup_hid(session, req); diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 0c185257e55b..1de2b6fbcac0 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -246,7 +246,7 @@ static struct proto hidp_proto = { .obj_size = sizeof(struct bt_sock) }; -static int hidp_sock_create(struct socket *sock, int protocol) +static int hidp_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -255,7 +255,7 @@ static int hidp_sock_create(struct socket *sock, int protocol) if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index c4e4ce4ebb2b..36ef27b625db 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -518,11 +518,11 @@ static struct proto l2cap_proto = { .obj_size = sizeof(struct l2cap_pinfo) }; -static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &l2cap_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, 1); if (!sk) return NULL; @@ -543,7 +543,7 @@ static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio) return sk; } -static int l2cap_sock_create(struct socket *sock, int protocol) +static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -560,7 +560,7 @@ static int l2cap_sock_create(struct socket *sock, int protocol) sock->ops = &l2cap_sock_ops; - sk = l2cap_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -1425,7 +1425,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd goto response; } - sk = l2cap_sock_alloc(NULL, BTPROTO_L2CAP, GFP_ATOMIC); + sk = l2cap_sock_alloc(parent->sk_net, NULL, BTPROTO_L2CAP, GFP_ATOMIC); if (!sk) goto response; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 30586ab9e878..266b6972667d 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -282,12 +282,12 @@ static struct proto rfcomm_proto = { .obj_size = sizeof(struct rfcomm_pinfo) }; -static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) { struct rfcomm_dlc *d; struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &rfcomm_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, 1); if (!sk) return NULL; @@ -323,7 +323,7 @@ static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio return sk; } -static int rfcomm_sock_create(struct socket *sock, int protocol) +static int rfcomm_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -336,7 +336,7 @@ static int rfcomm_sock_create(struct socket *sock, int protocol) sock->ops = &rfcomm_sock_ops; - sk = rfcomm_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -868,7 +868,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * goto done; } - sk = rfcomm_sock_alloc(NULL, BTPROTO_RFCOMM, GFP_ATOMIC); + sk = rfcomm_sock_alloc(parent->sk_net, NULL, BTPROTO_RFCOMM, GFP_ATOMIC); if (!sk) goto done; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 3f5163e725ed..65b6fb1c4154 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -414,11 +414,11 @@ static struct proto sco_proto = { .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &sco_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, 1); if (!sk) return NULL; @@ -439,7 +439,7 @@ static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio) return sk; } -static int sco_sock_create(struct socket *sock, int protocol) +static int sco_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -452,7 +452,7 @@ static int sco_sock_create(struct socket *sock, int protocol) sock->ops = &sco_sock_ops; - sk = sco_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -807,7 +807,7 @@ static void sco_conn_ready(struct sco_conn *conn) bh_lock_sock(parent); - sk = sco_sock_alloc(NULL, BTPROTO_SCO, GFP_ATOMIC); + sk = sco_sock_alloc(parent->sk_net, NULL, BTPROTO_SCO, GFP_ATOMIC); if (!sk) { bh_unlock_sock(parent); goto done; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 99292e8e1d0f..c07bac5e3e10 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -150,11 +150,8 @@ static int br_set_tx_csum(struct net_device *dev, u32 data) static struct ethtool_ops br_ethtool_ops = { .get_drvinfo = br_getinfo, .get_link = ethtool_op_get_link, - .get_sg = ethtool_op_get_sg, .set_sg = br_set_sg, - .get_tx_csum = ethtool_op_get_tx_csum, .set_tx_csum = br_set_tx_csum, - .get_tso = ethtool_op_get_tso, .set_tso = br_set_tso, }; @@ -171,7 +168,6 @@ void br_dev_setup(struct net_device *dev) dev->set_multicast_list = br_dev_set_multicast_list; dev->change_mtu = br_change_mtu; dev->destructor = free_netdev; - SET_MODULE_OWNER(dev); SET_ETHTOOL_OPS(dev, &br_ethtool_ops); dev->stop = br_dev_stop; dev->tx_queue_len = 0; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 9272f12f664c..935784f736b3 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -303,7 +303,7 @@ int br_del_bridge(const char *name) int ret = 0; rtnl_lock(); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(&init_net, name); if (dev == NULL) ret = -ENXIO; /* Could not find device */ @@ -444,7 +444,7 @@ void __exit br_cleanup_bridges(void) struct net_device *dev, *nxt; rtnl_lock(); - for_each_netdev_safe(dev, nxt) + for_each_netdev_safe(&init_net, dev, nxt) if (dev->priv_flags & IFF_EBRIDGE) del_br(dev->priv); rtnl_unlock(); diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index bb15e9e259b1..0655a5f07f58 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -18,6 +18,7 @@ #include <linux/if_bridge.h> #include <linux/netdevice.h> #include <linux/times.h> +#include <net/net_namespace.h> #include <asm/uaccess.h> #include "br_private.h" @@ -27,7 +28,7 @@ static int get_bridge_ifindices(int *indices, int num) struct net_device *dev; int i = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) @@ -90,7 +91,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev == NULL) return -EINVAL; @@ -364,7 +365,7 @@ static int old_deviceless(void __user *uarg) return -EOPNOTSUPP; } -int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg) +int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg) { switch (cmd) { case SIOCGIFBR: diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index fc13130035e7..8245f051ccbb 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -904,7 +904,6 @@ int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp, static ctl_table brnf_table[] = { { - .ctl_name = NET_BRIDGE_NF_CALL_ARPTABLES, .procname = "bridge-nf-call-arptables", .data = &brnf_call_arptables, .maxlen = sizeof(int), @@ -912,7 +911,6 @@ static ctl_table brnf_table[] = { .proc_handler = &brnf_sysctl_call_tables, }, { - .ctl_name = NET_BRIDGE_NF_CALL_IPTABLES, .procname = "bridge-nf-call-iptables", .data = &brnf_call_iptables, .maxlen = sizeof(int), @@ -920,7 +918,6 @@ static ctl_table brnf_table[] = { .proc_handler = &brnf_sysctl_call_tables, }, { - .ctl_name = NET_BRIDGE_NF_CALL_IP6TABLES, .procname = "bridge-nf-call-ip6tables", .data = &brnf_call_ip6tables, .maxlen = sizeof(int), @@ -928,7 +925,6 @@ static ctl_table brnf_table[] = { .proc_handler = &brnf_sysctl_call_tables, }, { - .ctl_name = NET_BRIDGE_NF_FILTER_VLAN_TAGGED, .procname = "bridge-nf-filter-vlan-tagged", .data = &brnf_filter_vlan_tagged, .maxlen = sizeof(int), @@ -936,7 +932,6 @@ static ctl_table brnf_table[] = { .proc_handler = &brnf_sysctl_call_tables, }, { - .ctl_name = NET_BRIDGE_NF_FILTER_PPPOE_TAGGED, .procname = "bridge-nf-filter-pppoe-tagged", .data = &brnf_filter_pppoe_tagged, .maxlen = sizeof(int), diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0fcf6f073064..53ab8e0cb518 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <net/rtnetlink.h> +#include <net/net_namespace.h> #include "br_private.h" static inline size_t br_nlmsg_size(void) @@ -110,7 +111,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) int idx; idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { /* not a bridge port */ if (dev->br_port == NULL || idx < cb->args[0]) goto skip; @@ -155,7 +156,7 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (new_state > BR_STATE_BLOCKING) return -EINVAL; - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(&init_net, ifm->ifi_index); if (!dev) return -ENODEV; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index c8451d3a070c..07ac3ae68d8f 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/rtnetlink.h> +#include <net/net_namespace.h> #include "br_private.h" @@ -36,6 +37,9 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_bridge_port *p = dev->br_port; struct net_bridge *br; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* not a port of a bridge */ if (p == NULL) return NOTIFY_DONE; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e6dc6f52990d..f666f7b28ff5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -192,7 +192,7 @@ extern struct sk_buff *br_handle_frame(struct net_bridge_port *p, /* br_ioctl.c */ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -extern int br_ioctl_deviceless_stub(unsigned int cmd, void __user *arg); +extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg); /* br_netfilter.c */ #ifdef CONFIG_BRIDGE_NETFILTER diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 60112bce6698..0edbd2a1c3f3 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -17,6 +17,7 @@ #include <linux/netfilter_bridge.h> #include <linux/etherdevice.h> #include <linux/llc.h> +#include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> #include <asm/unaligned.h> @@ -64,7 +65,7 @@ static inline int br_get_ticks(const unsigned char *src) { unsigned long ticks = ntohs(get_unaligned((__be16 *)src)); - return (ticks * HZ + STP_HZ - 1) / STP_HZ; + return DIV_ROUND_UP(ticks * HZ, STP_HZ); } /* called under bridge lock */ @@ -141,6 +142,9 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, struct net_bridge *br; const unsigned char *buf; + if (dev->nd_net != &init_net) + goto err; + if (!p) goto err; diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 204c968fa86d..e7cfd30bac75 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -300,8 +300,9 @@ static int __init ebt_ulog_init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, - NULL, NULL, THIS_MODULE); + ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, + EBT_ULOG_MAXNLGROUPS, NULL, NULL, + THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/core/Makefile b/net/core/Makefile index 4751613e1b59..b1332f6d0042 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o + gen_stats.o gen_estimator.o net_namespace.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o @@ -11,7 +11,7 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o obj-$(CONFIG_XFRM) += flow.o -obj-$(CONFIG_SYSFS) += net-sysfs.o +obj-y += net-sysfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o diff --git a/net/core/dev.c b/net/core/dev.c index a76021c71207..1e169a541ce7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -92,6 +92,7 @@ #include <linux/etherdevice.h> #include <linux/notifier.h> #include <linux/skbuff.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <linux/rtnetlink.h> #include <linux/proc_fs.h> @@ -189,25 +190,50 @@ static struct net_dma net_dma = { * unregister_netdevice(), which must be called with the rtnl * semaphore held. */ -LIST_HEAD(dev_base_head); DEFINE_RWLOCK(dev_base_lock); -EXPORT_SYMBOL(dev_base_head); EXPORT_SYMBOL(dev_base_lock); #define NETDEV_HASHBITS 8 -static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS]; -static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS]; +#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) -static inline struct hlist_head *dev_name_hash(const char *name) +static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); - return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)]; + return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; } -static inline struct hlist_head *dev_index_hash(int ifindex) +static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) { - return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)]; + return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; +} + +/* Device list insertion */ +static int list_netdevice(struct net_device *dev) +{ + struct net *net = dev->nd_net; + + ASSERT_RTNL(); + + write_lock_bh(&dev_base_lock); + list_add_tail(&dev->dev_list, &net->dev_base_head); + hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); + hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); + write_unlock_bh(&dev_base_lock); + return 0; +} + +/* Device list removal */ +static void unlist_netdevice(struct net_device *dev) +{ + ASSERT_RTNL(); + + /* Unlink dev from the device chain */ + write_lock_bh(&dev_base_lock); + list_del(&dev->dev_list); + hlist_del(&dev->name_hlist); + hlist_del(&dev->index_hlist); + write_unlock_bh(&dev_base_lock); } /* @@ -220,17 +246,12 @@ static RAW_NOTIFIER_HEAD(netdev_chain); * Device drivers call our routines to queue packets here. We empty the * queue in the local softnet handler. */ -DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; -#ifdef CONFIG_SYSFS -extern int netdev_sysfs_init(void); -extern int netdev_register_sysfs(struct net_device *); -extern void netdev_unregister_sysfs(struct net_device *); -#else -#define netdev_sysfs_init() (0) -#define netdev_register_sysfs(dev) (0) -#define netdev_unregister_sysfs(dev) do { } while(0) -#endif +DEFINE_PER_CPU(struct softnet_data, softnet_data); + +extern int netdev_kobject_init(void); +extern int netdev_register_kobject(struct net_device *); +extern void netdev_unregister_kobject(struct net_device *); #ifdef CONFIG_DEBUG_LOCK_ALLOC /* @@ -490,7 +511,7 @@ unsigned long netdev_boot_base(const char *prefix, int unit) * If device already registered then return base of 1 * to indicate not to probe for this interface */ - if (__dev_get_by_name(name)) + if (__dev_get_by_name(&init_net, name)) return 1; for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) @@ -545,11 +566,11 @@ __setup("netdev=", netdev_boot_setup); * careful with locks. */ -struct net_device *__dev_get_by_name(const char *name) +struct net_device *__dev_get_by_name(struct net *net, const char *name) { struct hlist_node *p; - hlist_for_each(p, dev_name_hash(name)) { + hlist_for_each(p, dev_name_hash(net, name)) { struct net_device *dev = hlist_entry(p, struct net_device, name_hlist); if (!strncmp(dev->name, name, IFNAMSIZ)) @@ -569,12 +590,12 @@ struct net_device *__dev_get_by_name(const char *name) * matching device is found. */ -struct net_device *dev_get_by_name(const char *name) +struct net_device *dev_get_by_name(struct net *net, const char *name) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(net, name); if (dev) dev_hold(dev); read_unlock(&dev_base_lock); @@ -592,11 +613,11 @@ struct net_device *dev_get_by_name(const char *name) * or @dev_base_lock. */ -struct net_device *__dev_get_by_index(int ifindex) +struct net_device *__dev_get_by_index(struct net *net, int ifindex) { struct hlist_node *p; - hlist_for_each(p, dev_index_hash(ifindex)) { + hlist_for_each(p, dev_index_hash(net, ifindex)) { struct net_device *dev = hlist_entry(p, struct net_device, index_hlist); if (dev->ifindex == ifindex) @@ -616,12 +637,12 @@ struct net_device *__dev_get_by_index(int ifindex) * dev_put to indicate they have finished with it. */ -struct net_device *dev_get_by_index(int ifindex) +struct net_device *dev_get_by_index(struct net *net, int ifindex) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev) dev_hold(dev); read_unlock(&dev_base_lock); @@ -642,13 +663,13 @@ struct net_device *dev_get_by_index(int ifindex) * If the API was consistent this would be __dev_get_by_hwaddr */ -struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) +struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) { struct net_device *dev; ASSERT_RTNL(); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) if (dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len)) return dev; @@ -658,12 +679,12 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) EXPORT_SYMBOL(dev_getbyhwaddr); -struct net_device *__dev_getfirstbyhwtype(unsigned short type) +struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev; ASSERT_RTNL(); - for_each_netdev(dev) + for_each_netdev(net, dev) if (dev->type == type) return dev; @@ -672,12 +693,12 @@ struct net_device *__dev_getfirstbyhwtype(unsigned short type) EXPORT_SYMBOL(__dev_getfirstbyhwtype); -struct net_device *dev_getfirstbyhwtype(unsigned short type) +struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev; rtnl_lock(); - dev = __dev_getfirstbyhwtype(type); + dev = __dev_getfirstbyhwtype(net, type); if (dev) dev_hold(dev); rtnl_unlock(); @@ -697,13 +718,13 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype); * dev_put to indicate they have finished with it. */ -struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) +struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask) { struct net_device *dev, *ret; ret = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { dev_hold(dev); ret = dev; @@ -740,9 +761,10 @@ int dev_valid_name(const char *name) } /** - * dev_alloc_name - allocate a name for a device - * @dev: device + * __dev_alloc_name - allocate a name for a device + * @net: network namespace to allocate the device name in * @name: name format string + * @buf: scratch buffer and result name string * * Passed a format string - eg "lt%d" it will try and find a suitable * id. It scans list of devices to build up a free map, then chooses @@ -753,13 +775,12 @@ int dev_valid_name(const char *name) * Returns the number of the unit assigned or a negative errno code. */ -int dev_alloc_name(struct net_device *dev, const char *name) +static int __dev_alloc_name(struct net *net, const char *name, char *buf) { int i = 0; - char buf[IFNAMSIZ]; const char *p; const int max_netdevices = 8*PAGE_SIZE; - long *inuse; + unsigned long *inuse; struct net_device *d; p = strnchr(name, IFNAMSIZ-1, '%'); @@ -773,18 +794,18 @@ int dev_alloc_name(struct net_device *dev, const char *name) return -EINVAL; /* Use one page as a bit array of possible slots */ - inuse = (long *) get_zeroed_page(GFP_ATOMIC); + inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); if (!inuse) return -ENOMEM; - for_each_netdev(d) { + for_each_netdev(net, d) { if (!sscanf(d->name, name, &i)) continue; if (i < 0 || i >= max_netdevices) continue; /* avoid cases where sscanf is not exact inverse of printf */ - snprintf(buf, sizeof(buf), name, i); + snprintf(buf, IFNAMSIZ, name, i); if (!strncmp(buf, d->name, IFNAMSIZ)) set_bit(i, inuse); } @@ -793,11 +814,9 @@ int dev_alloc_name(struct net_device *dev, const char *name) free_page((unsigned long) inuse); } - snprintf(buf, sizeof(buf), name, i); - if (!__dev_get_by_name(buf)) { - strlcpy(dev->name, buf, IFNAMSIZ); + snprintf(buf, IFNAMSIZ, name, i); + if (!__dev_get_by_name(net, buf)) return i; - } /* It is possible to run out of possible slots * when the name is long and there isn't enough space left @@ -806,6 +825,34 @@ int dev_alloc_name(struct net_device *dev, const char *name) return -ENFILE; } +/** + * dev_alloc_name - allocate a name for a device + * @dev: device + * @name: name format string + * + * Passed a format string - eg "lt%d" it will try and find a suitable + * id. It scans list of devices to build up a free map, then chooses + * the first empty slot. The caller must hold the dev_base or rtnl lock + * while allocating the name and adding the device in order to avoid + * duplicates. + * Limited to bits_per_byte * page size devices (ie 32K on most platforms). + * Returns the number of the unit assigned or a negative errno code. + */ + +int dev_alloc_name(struct net_device *dev, const char *name) +{ + char buf[IFNAMSIZ]; + struct net *net; + int ret; + + BUG_ON(!dev->nd_net); + net = dev->nd_net; + ret = __dev_alloc_name(net, name, buf); + if (ret >= 0) + strlcpy(dev->name, buf, IFNAMSIZ); + return ret; +} + /** * dev_change_name - change name of a device @@ -820,9 +867,12 @@ int dev_change_name(struct net_device *dev, char *newname) char oldname[IFNAMSIZ]; int err = 0; int ret; + struct net *net; ASSERT_RTNL(); + BUG_ON(!dev->nd_net); + net = dev->nd_net; if (dev->flags & IFF_UP) return -EBUSY; @@ -837,7 +887,7 @@ int dev_change_name(struct net_device *dev, char *newname) return err; strcpy(newname, dev->name); } - else if (__dev_get_by_name(newname)) + else if (__dev_get_by_name(net, newname)) return -EEXIST; else strlcpy(dev->name, newname, IFNAMSIZ); @@ -847,10 +897,10 @@ rollback: write_lock_bh(&dev_base_lock); hlist_del(&dev->name_hlist); - hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); + hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); write_unlock_bh(&dev_base_lock); - ret = raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); + ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); ret = notifier_to_errno(ret); if (ret) { @@ -876,7 +926,7 @@ rollback: */ void netdev_features_change(struct net_device *dev) { - raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); + call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev); } EXPORT_SYMBOL(netdev_features_change); @@ -891,8 +941,7 @@ EXPORT_SYMBOL(netdev_features_change); void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGE, dev); + call_netdevice_notifiers(NETDEV_CHANGE, dev); rtmsg_ifinfo(RTM_NEWLINK, dev, 0); } } @@ -906,26 +955,18 @@ void netdev_state_change(struct net_device *dev) * available in this kernel then it becomes a nop. */ -void dev_load(const char *name) +void dev_load(struct net *net, const char *name) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(net, name); read_unlock(&dev_base_lock); if (!dev && capable(CAP_SYS_MODULE)) request_module("%s", name); } -static int default_rebuild_header(struct sk_buff *skb) -{ - printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", - skb->dev ? skb->dev->name : "NULL!!!"); - kfree_skb(skb); - return 1; -} - /** * dev_open - prepare an interface for use. * @dev: device to open @@ -988,7 +1029,7 @@ int dev_open(struct net_device *dev) /* * ... and announce new interface. */ - raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev); + call_netdevice_notifiers(NETDEV_UP, dev); } return ret; } @@ -1004,6 +1045,8 @@ int dev_open(struct net_device *dev) */ int dev_close(struct net_device *dev) { + might_sleep(); + if (!(dev->flags & IFF_UP)) return 0; @@ -1011,23 +1054,19 @@ int dev_close(struct net_device *dev) * Tell people we are going down, so that they can * prepare to death, when device is still operating. */ - raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); + call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); dev_deactivate(dev); clear_bit(__LINK_STATE_START, &dev->state); /* Synchronize to scheduled poll. We cannot touch poll list, - * it can be even on different cpu. So just clear netif_running(), - * and wait when poll really will happen. Actually, the best place - * for this is inside dev->stop() after device stopped its irq - * engine, but this requires more changes in devices. */ - + * it can be even on different cpu. So just clear netif_running(). + * + * dev->stop() will invoke napi_disable() on all of it's + * napi_struct instances on this device. + */ smp_mb__after_clear_bit(); /* Commit netif_running(). */ - while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { - /* No hurry. */ - msleep(1); - } /* * Call the device specific close. This cannot fail. @@ -1048,12 +1087,14 @@ int dev_close(struct net_device *dev) /* * Tell people we are down */ - raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); + call_netdevice_notifiers(NETDEV_DOWN, dev); return 0; } +static int dev_boot_phase = 1; + /* * Device change register/unregister. These are not inline or static * as we export them to the world. @@ -1077,23 +1118,27 @@ int register_netdevice_notifier(struct notifier_block *nb) { struct net_device *dev; struct net_device *last; + struct net *net; int err; rtnl_lock(); err = raw_notifier_chain_register(&netdev_chain, nb); if (err) goto unlock; + if (dev_boot_phase) + goto unlock; + for_each_net(net) { + for_each_netdev(net, dev) { + err = nb->notifier_call(nb, NETDEV_REGISTER, dev); + err = notifier_to_errno(err); + if (err) + goto rollback; + + if (!(dev->flags & IFF_UP)) + continue; - for_each_netdev(dev) { - err = nb->notifier_call(nb, NETDEV_REGISTER, dev); - err = notifier_to_errno(err); - if (err) - goto rollback; - - if (!(dev->flags & IFF_UP)) - continue; - - nb->notifier_call(nb, NETDEV_UP, dev); + nb->notifier_call(nb, NETDEV_UP, dev); + } } unlock: @@ -1102,15 +1147,17 @@ unlock: rollback: last = dev; - for_each_netdev(dev) { - if (dev == last) - break; + for_each_net(net) { + for_each_netdev(net, dev) { + if (dev == last) + break; - if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); + if (dev->flags & IFF_UP) { + nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); + nb->notifier_call(nb, NETDEV_DOWN, dev); + } + nb->notifier_call(nb, NETDEV_UNREGISTER, dev); } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); } goto unlock; } @@ -1144,9 +1191,9 @@ int unregister_netdevice_notifier(struct notifier_block *nb) * are as for raw_notifier_call_chain(). */ -int call_netdevice_notifiers(unsigned long val, void *v) +int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - return raw_notifier_call_chain(&netdev_chain, val, v); + return raw_notifier_call_chain(&netdev_chain, val, dev); } /* When > 0 there are consumers of rx skb time stamps */ @@ -1233,21 +1280,21 @@ void __netif_schedule(struct net_device *dev) } EXPORT_SYMBOL(__netif_schedule); -void __netif_rx_schedule(struct net_device *dev) +void dev_kfree_skb_irq(struct sk_buff *skb) { - unsigned long flags; + if (atomic_dec_and_test(&skb->users)) { + struct softnet_data *sd; + unsigned long flags; - local_irq_save(flags); - dev_hold(dev); - list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); - if (dev->quota < 0) - dev->quota += dev->weight; - else - dev->quota = dev->weight; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - local_irq_restore(flags); + local_irq_save(flags); + sd = &__get_cpu_var(softnet_data); + skb->next = sd->completion_queue; + sd->completion_queue = skb; + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); + } } -EXPORT_SYMBOL(__netif_rx_schedule); +EXPORT_SYMBOL(dev_kfree_skb_irq); void dev_kfree_skb_any(struct sk_buff *skb) { @@ -1259,7 +1306,12 @@ void dev_kfree_skb_any(struct sk_buff *skb) EXPORT_SYMBOL(dev_kfree_skb_any); -/* Hot-plugging. */ +/** + * netif_device_detach - mark device as removed + * @dev: network device + * + * Mark device as removed from system and therefore no longer available. + */ void netif_device_detach(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && @@ -1269,6 +1321,12 @@ void netif_device_detach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_detach); +/** + * netif_device_attach - mark device as attached + * @dev: network device + * + * Mark device as attached from system and restart if needed. + */ void netif_device_attach(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && @@ -1501,18 +1559,6 @@ out_kfree_skb: return 0; } -#define HARD_TX_LOCK(dev, cpu) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ - netif_tx_lock(dev); \ - } \ -} - -#define HARD_TX_UNLOCK(dev) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ - netif_tx_unlock(dev); \ - } \ -} - /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -1730,7 +1776,7 @@ enqueue: return NET_RX_SUCCESS; } - netif_rx_schedule(&queue->backlog_dev); + napi_schedule(&queue->backlog); goto enqueue; } @@ -1771,6 +1817,7 @@ static inline struct net_device *skb_bond(struct sk_buff *skb) return dev; } + static void net_tx_action(struct softirq_action *h) { struct softnet_data *sd = &__get_cpu_var(softnet_data); @@ -1927,7 +1974,7 @@ int netif_receive_skb(struct sk_buff *skb) __be16 type; /* if we've gotten here through NAPI, check netpoll */ - if (skb->dev->poll && netpoll_rx(skb)) + if (netpoll_receive_skb(skb)) return NET_RX_DROP; if (!skb->tstamp.tv64) @@ -2017,22 +2064,25 @@ out: return ret; } -static int process_backlog(struct net_device *backlog_dev, int *budget) +static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; - int quota = min(backlog_dev->quota, *budget); struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; - backlog_dev->weight = weight_p; - for (;;) { + napi->weight = weight_p; + do { struct sk_buff *skb; struct net_device *dev; local_irq_disable(); skb = __skb_dequeue(&queue->input_pkt_queue); - if (!skb) - goto job_done; + if (!skb) { + __napi_complete(napi); + local_irq_enable(); + break; + } + local_irq_enable(); dev = skb->dev; @@ -2040,67 +2090,86 @@ static int process_backlog(struct net_device *backlog_dev, int *budget) netif_receive_skb(skb); dev_put(dev); + } while (++work < quota && jiffies == start_time); - work++; - - if (work >= quota || jiffies - start_time > 1) - break; - - } - - backlog_dev->quota -= work; - *budget -= work; - return -1; - -job_done: - backlog_dev->quota -= work; - *budget -= work; + return work; +} - list_del(&backlog_dev->poll_list); - smp_mb__before_clear_bit(); - netif_poll_enable(backlog_dev); +/** + * __napi_schedule - schedule for receive + * @napi: entry to schedule + * + * The entry's receive function will be scheduled to run + */ +void fastcall __napi_schedule(struct napi_struct *n) +{ + unsigned long flags; - local_irq_enable(); - return 0; + local_irq_save(flags); + list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + local_irq_restore(flags); } +EXPORT_SYMBOL(__napi_schedule); + static void net_rx_action(struct softirq_action *h) { - struct softnet_data *queue = &__get_cpu_var(softnet_data); + struct list_head *list = &__get_cpu_var(softnet_data).poll_list; unsigned long start_time = jiffies; int budget = netdev_budget; void *have; local_irq_disable(); - while (!list_empty(&queue->poll_list)) { - struct net_device *dev; + while (!list_empty(list)) { + struct napi_struct *n; + int work, weight; - if (budget <= 0 || jiffies - start_time > 1) + /* If softirq window is exhuasted then punt. + * + * Note that this is a slight policy change from the + * previous NAPI code, which would allow up to 2 + * jiffies to pass before breaking out. The test + * used to be "jiffies - start_time > 1". + */ + if (unlikely(budget <= 0 || jiffies != start_time)) goto softnet_break; local_irq_enable(); - dev = list_entry(queue->poll_list.next, - struct net_device, poll_list); - have = netpoll_poll_lock(dev); + /* Even though interrupts have been re-enabled, this + * access is safe because interrupts can only add new + * entries to the tail of this list, and only ->poll() + * calls can remove this head entry from the list. + */ + n = list_entry(list->next, struct napi_struct, poll_list); - if (dev->quota <= 0 || dev->poll(dev, &budget)) { - netpoll_poll_unlock(have); - local_irq_disable(); - list_move_tail(&dev->poll_list, &queue->poll_list); - if (dev->quota < 0) - dev->quota += dev->weight; - else - dev->quota = dev->weight; - } else { - netpoll_poll_unlock(have); - dev_put(dev); - local_irq_disable(); - } + have = netpoll_poll_lock(n); + + weight = n->weight; + + work = n->poll(n, weight); + + WARN_ON_ONCE(work > weight); + + budget -= work; + + local_irq_disable(); + + /* Drivers must not modify the NAPI state if they + * consume the entire weight. In such cases this code + * still "owns" the NAPI instance and therefore can + * move the instance around on the list at-will. + */ + if (unlikely(work == weight)) + list_move_tail(&n->poll_list, list); + + netpoll_poll_unlock(have); } out: local_irq_enable(); + #ifdef CONFIG_NET_DMA /* * There may not be any more sk_buffs coming right now, so push @@ -2115,6 +2184,7 @@ out: } } #endif + return; softnet_break: @@ -2154,7 +2224,7 @@ int register_gifconf(unsigned int family, gifconf_func_t * gifconf) * match. --pb */ -static int dev_ifname(struct ifreq __user *arg) +static int dev_ifname(struct net *net, struct ifreq __user *arg) { struct net_device *dev; struct ifreq ifr; @@ -2167,7 +2237,7 @@ static int dev_ifname(struct ifreq __user *arg) return -EFAULT; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifr.ifr_ifindex); + dev = __dev_get_by_index(net, ifr.ifr_ifindex); if (!dev) { read_unlock(&dev_base_lock); return -ENODEV; @@ -2187,7 +2257,7 @@ static int dev_ifname(struct ifreq __user *arg) * Thus we will need a 'compatibility mode'. */ -static int dev_ifconf(char __user *arg) +static int dev_ifconf(struct net *net, char __user *arg) { struct ifconf ifc; struct net_device *dev; @@ -2211,7 +2281,7 @@ static int dev_ifconf(char __user *arg) */ total = 0; - for_each_netdev(dev) { + for_each_netdev(net, dev) { for (i = 0; i < NPROTO; i++) { if (gifconf_list[i]) { int done; @@ -2245,6 +2315,7 @@ static int dev_ifconf(char __user *arg) */ void *dev_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq->private; loff_t off; struct net_device *dev; @@ -2253,7 +2324,7 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) return SEQ_START_TOKEN; off = 1; - for_each_netdev(dev) + for_each_netdev(net, dev) if (off++ == *pos) return dev; @@ -2262,9 +2333,10 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq->private; ++*pos; return v == SEQ_START_TOKEN ? - first_net_device() : next_net_device((struct net_device *)v); + first_net_device(net) : next_net_device((struct net_device *)v); } void dev_seq_stop(struct seq_file *seq, void *v) @@ -2360,7 +2432,26 @@ static const struct seq_operations dev_seq_ops = { static int dev_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &dev_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &dev_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_proc_net(inode); + if (!seq->private) { + seq_release(inode, file); + res = -ENXIO; + } + } + return res; +} + +static int dev_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations dev_seq_fops = { @@ -2368,7 +2459,7 @@ static const struct file_operations dev_seq_fops = { .open = dev_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = dev_seq_release, }; static const struct seq_operations softnet_seq_ops = { @@ -2520,30 +2611,49 @@ static const struct file_operations ptype_seq_fops = { }; -static int __init dev_proc_init(void) +static int __net_init dev_proc_net_init(struct net *net) { int rc = -ENOMEM; - if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) + if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) goto out; - if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) + if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) goto out_dev; - if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops)) - goto out_dev2; - - if (wext_proc_init()) + if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) goto out_softnet; + + if (wext_proc_init(net)) + goto out_ptype; rc = 0; out: return rc; +out_ptype: + proc_net_remove(net, "ptype"); out_softnet: - proc_net_remove("ptype"); -out_dev2: - proc_net_remove("softnet_stat"); + proc_net_remove(net, "softnet_stat"); out_dev: - proc_net_remove("dev"); + proc_net_remove(net, "dev"); goto out; } + +static void __net_exit dev_proc_net_exit(struct net *net) +{ + wext_proc_exit(net); + + proc_net_remove(net, "ptype"); + proc_net_remove(net, "softnet_stat"); + proc_net_remove(net, "dev"); +} + +static struct pernet_operations __net_initdata dev_proc_ops = { + .init = dev_proc_net_init, + .exit = dev_proc_net_exit, +}; + +static int __init dev_proc_init(void) +{ + return register_pernet_subsys(&dev_proc_ops); +} #else #define dev_proc_init() 0 #endif /* CONFIG_PROC_FS */ @@ -2906,8 +3016,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) if (dev->flags & IFF_UP && ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGE, dev); + call_netdevice_notifiers(NETDEV_CHANGE, dev); if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? +1 : -1; @@ -2953,8 +3062,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) else dev->mtu = new_mtu; if (!err && dev->flags & IFF_UP) - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGEMTU, dev); + call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; } @@ -2970,18 +3078,17 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) return -ENODEV; err = dev->set_mac_address(dev, sa); if (!err) - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGEADDR, dev); + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; } /* - * Perform the SIOCxIFxxx calls. + * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) */ -static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) +static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) return -ENODEV; @@ -2991,25 +3098,15 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) ifr->ifr_flags = dev_get_flags(dev); return 0; - case SIOCSIFFLAGS: /* Set interface flags */ - return dev_change_flags(dev, ifr->ifr_flags); - case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */ ifr->ifr_metric = 0; return 0; - case SIOCSIFMETRIC: /* Set the metric on the interface - (currently unused) */ - return -EOPNOTSUPP; - case SIOCGIFMTU: /* Get the MTU of a device */ ifr->ifr_mtu = dev->mtu; return 0; - case SIOCSIFMTU: /* Set the MTU of a device */ - return dev_set_mtu(dev, ifr->ifr_mtu); - case SIOCGIFHWADDR: if (!dev->addr_len) memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); @@ -3019,17 +3116,9 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) ifr->ifr_hwaddr.sa_family = dev->type; return 0; - case SIOCSIFHWADDR: - return dev_set_mac_address(dev, &ifr->ifr_hwaddr); - - case SIOCSIFHWBROADCAST: - if (ifr->ifr_hwaddr.sa_family != dev->type) - return -EINVAL; - memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGEADDR, dev); - return 0; + case SIOCGIFSLAVE: + err = -EINVAL; + break; case SIOCGIFMAP: ifr->ifr_map.mem_start = dev->mem_start; @@ -3040,6 +3129,59 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) ifr->ifr_map.port = dev->if_port; return 0; + case SIOCGIFINDEX: + ifr->ifr_ifindex = dev->ifindex; + return 0; + + case SIOCGIFTXQLEN: + ifr->ifr_qlen = dev->tx_queue_len; + return 0; + + default: + /* dev_ioctl() should ensure this case + * is never reached + */ + WARN_ON(1); + err = -EINVAL; + break; + + } + return err; +} + +/* + * Perform the SIOCxIFxxx calls, inside rtnl_lock() + */ +static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) +{ + int err; + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + + if (!dev) + return -ENODEV; + + switch (cmd) { + case SIOCSIFFLAGS: /* Set interface flags */ + return dev_change_flags(dev, ifr->ifr_flags); + + case SIOCSIFMETRIC: /* Set the metric on the interface + (currently unused) */ + return -EOPNOTSUPP; + + case SIOCSIFMTU: /* Set the MTU of a device */ + return dev_set_mtu(dev, ifr->ifr_mtu); + + case SIOCSIFHWADDR: + return dev_set_mac_address(dev, &ifr->ifr_hwaddr); + + case SIOCSIFHWBROADCAST: + if (ifr->ifr_hwaddr.sa_family != dev->type) + return -EINVAL; + memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, + min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return 0; + case SIOCSIFMAP: if (dev->set_config) { if (!netif_device_present(dev)) @@ -3066,14 +3208,6 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, dev->addr_len, 1); - case SIOCGIFINDEX: - ifr->ifr_ifindex = dev->ifindex; - return 0; - - case SIOCGIFTXQLEN: - ifr->ifr_qlen = dev->tx_queue_len; - return 0; - case SIOCSIFTXQLEN: if (ifr->ifr_qlen < 0) return -EINVAL; @@ -3134,7 +3268,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) * positive or a negative errno code on error. */ -int dev_ioctl(unsigned int cmd, void __user *arg) +int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct ifreq ifr; int ret; @@ -3147,12 +3281,12 @@ int dev_ioctl(unsigned int cmd, void __user *arg) if (cmd == SIOCGIFCONF) { rtnl_lock(); - ret = dev_ifconf((char __user *) arg); + ret = dev_ifconf(net, (char __user *) arg); rtnl_unlock(); return ret; } if (cmd == SIOCGIFNAME) - return dev_ifname((struct ifreq __user *)arg); + return dev_ifname(net, (struct ifreq __user *)arg); if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; @@ -3182,9 +3316,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); read_lock(&dev_base_lock); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc_locked(net, &ifr, cmd); read_unlock(&dev_base_lock); if (!ret) { if (colon) @@ -3196,9 +3330,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) return ret; case SIOCETHTOOL: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ethtool(&ifr); + ret = dev_ethtool(net, &ifr); rtnl_unlock(); if (!ret) { if (colon) @@ -3220,9 +3354,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) case SIOCSIFNAME: if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret) { if (colon) @@ -3261,9 +3395,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); return ret; @@ -3283,9 +3417,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) if (cmd == SIOCWANDEV || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) @@ -3294,7 +3428,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg) } /* Take care of Wireless Extensions */ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) - return wext_handle_ioctl(&ifr, cmd, arg); + return wext_handle_ioctl(net, &ifr, cmd, arg); return -EINVAL; } } @@ -3307,19 +3441,17 @@ int dev_ioctl(unsigned int cmd, void __user *arg) * number. The caller must hold the rtnl semaphore or the * dev_base_lock to be sure it remains unique. */ -static int dev_new_index(void) +static int dev_new_index(struct net *net) { static int ifindex; for (;;) { if (++ifindex <= 0) ifindex = 1; - if (!__dev_get_by_index(ifindex)) + if (!__dev_get_by_index(net, ifindex)) return ifindex; } } -static int dev_boot_phase = 1; - /* Delayed registration/unregisteration */ static DEFINE_SPINLOCK(net_todo_list_lock); static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); @@ -3353,6 +3485,7 @@ int register_netdevice(struct net_device *dev) struct hlist_head *head; struct hlist_node *p; int ret; + struct net *net; BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -3361,6 +3494,8 @@ int register_netdevice(struct net_device *dev) /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); + BUG_ON(!dev->nd_net); + net = dev->nd_net; spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->_xmit_lock); @@ -3385,12 +3520,12 @@ int register_netdevice(struct net_device *dev) goto err_uninit; } - dev->ifindex = dev_new_index(); + dev->ifindex = dev_new_index(net); if (dev->iflink == -1) dev->iflink = dev->ifindex; /* Check for existence of name */ - head = dev_name_hash(dev->name); + head = dev_name_hash(net, dev->name); hlist_for_each(p, head) { struct net_device *d = hlist_entry(p, struct net_device, name_hlist); @@ -3446,15 +3581,7 @@ int register_netdevice(struct net_device *dev) } } - /* - * nil rebuild_header routine, - * that should be never called and used as just bug trap. - */ - - if (!dev->rebuild_header) - dev->rebuild_header = default_rebuild_header; - - ret = netdev_register_sysfs(dev); + ret = netdev_register_kobject(dev); if (ret) goto err_uninit; dev->reg_state = NETREG_REGISTERED; @@ -3467,15 +3594,11 @@ int register_netdevice(struct net_device *dev) set_bit(__LINK_STATE_PRESENT, &dev->state); dev_init_scheduler(dev); - write_lock_bh(&dev_base_lock); - list_add_tail(&dev->dev_list, &dev_base_head); - hlist_add_head(&dev->name_hlist, head); - hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); dev_hold(dev); - write_unlock_bh(&dev_base_lock); + list_netdevice(dev); /* Notify protocols, that a new device appeared. */ - ret = raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); + ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); ret = notifier_to_errno(ret); if (ret) unregister_netdevice(dev); @@ -3546,8 +3669,7 @@ static void netdev_wait_allrefs(struct net_device *dev) rtnl_lock(); /* Rebroadcast unregister notification */ - raw_notifier_call_chain(&netdev_chain, - NETDEV_UNREGISTER, dev); + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { @@ -3692,6 +3814,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; + dev->nd_net = &init_net; if (sizeof_priv) { dev->priv = ((char *)dev + @@ -3704,6 +3827,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->egress_subqueue_count = queue_count; dev->get_stats = internal_stats; + netpoll_netdev_init(dev); setup(dev); strcpy(dev->name, name); return dev; @@ -3720,7 +3844,6 @@ EXPORT_SYMBOL(alloc_netdev_mq); */ void free_netdev(struct net_device *dev) { -#ifdef CONFIG_SYSFS /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); @@ -3732,9 +3855,6 @@ void free_netdev(struct net_device *dev) /* will free via device release */ put_device(&dev->dev); -#else - kfree((char *)dev - dev->padded); -#endif } /* Synchronize with packet receive processing. */ @@ -3773,15 +3893,10 @@ void unregister_netdevice(struct net_device *dev) BUG_ON(dev->reg_state != NETREG_REGISTERED); /* If device is running, close it first. */ - if (dev->flags & IFF_UP) - dev_close(dev); + dev_close(dev); /* And unlink it from device chain. */ - write_lock_bh(&dev_base_lock); - list_del(&dev->dev_list); - hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); - write_unlock_bh(&dev_base_lock); + unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; @@ -3794,7 +3909,7 @@ void unregister_netdevice(struct net_device *dev) /* Notify protocols, that we are about to destroy this device. They should clean all the things. */ - raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); /* * Flush the unicast and multicast chains @@ -3807,8 +3922,8 @@ void unregister_netdevice(struct net_device *dev) /* Notifier chain MUST detach us from master device. */ BUG_TRAP(!dev->master); - /* Remove entries from sysfs */ - netdev_unregister_sysfs(dev); + /* Remove entries from kobject tree */ + netdev_unregister_kobject(dev); /* Finish processing unregister after unlock */ net_set_todo(dev); @@ -3839,6 +3954,121 @@ void unregister_netdev(struct net_device *dev) EXPORT_SYMBOL(unregister_netdev); +/** + * dev_change_net_namespace - move device to different nethost namespace + * @dev: device + * @net: network namespace + * @pat: If not NULL name pattern to try if the current device name + * is already taken in the destination network namespace. + * + * This function shuts down a device interface and moves it + * to a new network namespace. On success 0 is returned, on + * a failure a netagive errno code is returned. + * + * Callers must hold the rtnl semaphore. + */ + +int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) +{ + char buf[IFNAMSIZ]; + const char *destname; + int err; + + ASSERT_RTNL(); + + /* Don't allow namespace local devices to be moved. */ + err = -EINVAL; + if (dev->features & NETIF_F_NETNS_LOCAL) + goto out; + + /* Ensure the device has been registrered */ + err = -EINVAL; + if (dev->reg_state != NETREG_REGISTERED) + goto out; + + /* Get out if there is nothing todo */ + err = 0; + if (dev->nd_net == net) + goto out; + + /* Pick the destination device name, and ensure + * we can use it in the destination network namespace. + */ + err = -EEXIST; + destname = dev->name; + if (__dev_get_by_name(net, destname)) { + /* We get here if we can't use the current device name */ + if (!pat) + goto out; + if (!dev_valid_name(pat)) + goto out; + if (strchr(pat, '%')) { + if (__dev_alloc_name(net, pat, buf) < 0) + goto out; + destname = buf; + } else + destname = pat; + if (__dev_get_by_name(net, destname)) + goto out; + } + + /* + * And now a mini version of register_netdevice unregister_netdevice. + */ + + /* If device is running close it first. */ + dev_close(dev); + + /* And unlink it from device chain */ + err = -ENODEV; + unlist_netdevice(dev); + + synchronize_net(); + + /* Shutdown queueing discipline. */ + dev_shutdown(dev); + + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + + /* + * Flush the unicast and multicast chains + */ + dev_addr_discard(dev); + + /* Actually switch the network namespace */ + dev->nd_net = net; + + /* Assign the new device name */ + if (destname != dev->name) + strcpy(dev->name, destname); + + /* If there is an ifindex conflict assign a new one */ + if (__dev_get_by_index(net, dev->ifindex)) { + int iflink = (dev->iflink == dev->ifindex); + dev->ifindex = dev_new_index(net); + if (iflink) + dev->iflink = dev->ifindex; + } + + /* Fixup kobjects */ + err = device_rename(&dev->dev, dev->name); + WARN_ON(err); + + /* Add the device back in the hashes */ + list_netdevice(dev); + + /* Notify protocols, that a new device appeared. */ + call_netdevice_notifiers(NETDEV_REGISTER, dev); + + synchronize_net(); + err = 0; +out: + return err; +} + static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, void *ocpu) @@ -4032,6 +4262,82 @@ int netdev_compute_features(unsigned long all, unsigned long one) } EXPORT_SYMBOL(netdev_compute_features); +static struct hlist_head *netdev_create_hash(void) +{ + int i; + struct hlist_head *hash; + + hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); + if (hash != NULL) + for (i = 0; i < NETDEV_HASHENTRIES; i++) + INIT_HLIST_HEAD(&hash[i]); + + return hash; +} + +/* Initialize per network namespace state */ +static int __net_init netdev_init(struct net *net) +{ + INIT_LIST_HEAD(&net->dev_base_head); + rwlock_init(&dev_base_lock); + + net->dev_name_head = netdev_create_hash(); + if (net->dev_name_head == NULL) + goto err_name; + + net->dev_index_head = netdev_create_hash(); + if (net->dev_index_head == NULL) + goto err_idx; + + return 0; + +err_idx: + kfree(net->dev_name_head); +err_name: + return -ENOMEM; +} + +static void __net_exit netdev_exit(struct net *net) +{ + kfree(net->dev_name_head); + kfree(net->dev_index_head); +} + +static struct pernet_operations __net_initdata netdev_net_ops = { + .init = netdev_init, + .exit = netdev_exit, +}; + +static void __net_exit default_device_exit(struct net *net) +{ + struct net_device *dev, *next; + /* + * Push all migratable of the network devices back to the + * initial network namespace + */ + rtnl_lock(); + for_each_netdev_safe(net, dev, next) { + int err; + + /* Ignore unmoveable devices (i.e. loopback) */ + if (dev->features & NETIF_F_NETNS_LOCAL) + continue; + + /* Push remaing network devices to init_net */ + err = dev_change_net_namespace(dev, &init_net, "dev%d"); + if (err) { + printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n", + __func__, dev->name, err); + unregister_netdevice(dev); + } + } + rtnl_unlock(); +} + +static struct pernet_operations __net_initdata default_device_ops = { + .exit = default_device_exit, +}; + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not @@ -4052,18 +4358,18 @@ static int __init net_dev_init(void) if (dev_proc_init()) goto out; - if (netdev_sysfs_init()) + if (netdev_kobject_init()) goto out; INIT_LIST_HEAD(&ptype_all); for (i = 0; i < 16; i++) INIT_LIST_HEAD(&ptype_base[i]); - for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) - INIT_HLIST_HEAD(&dev_name_head[i]); + if (register_pernet_subsys(&netdev_net_ops)) + goto out; - for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) - INIT_HLIST_HEAD(&dev_index_head[i]); + if (register_pernet_device(&default_device_ops)) + goto out; /* * Initialise the packet receive queues. @@ -4076,10 +4382,9 @@ static int __init net_dev_init(void) skb_queue_head_init(&queue->input_pkt_queue); queue->completion_queue = NULL; INIT_LIST_HEAD(&queue->poll_list); - set_bit(__LINK_STATE_START, &queue->backlog_dev.state); - queue->backlog_dev.weight = weight_p; - queue->backlog_dev.poll = process_backlog; - atomic_set(&queue->backlog_dev.refcnt, 1); + + queue->backlog.poll = process_backlog; + queue->backlog.weight = weight_p; } netdev_dma_register(); diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 20330c572610..15241cf48af8 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -41,6 +41,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/route.h> #include <linux/skbuff.h> @@ -186,11 +187,12 @@ EXPORT_SYMBOL(dev_mc_unsync); #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq->private; struct net_device *dev; loff_t off = 0; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (off++ == *pos) return dev; } @@ -239,7 +241,26 @@ static const struct seq_operations dev_mc_seq_ops = { static int dev_mc_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &dev_mc_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &dev_mc_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_proc_net(inode); + if (!seq->private) { + seq_release(inode, file); + res = -ENXIO; + } + } + return res; +} + +static int dev_mc_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations dev_mc_seq_fops = { @@ -247,14 +268,31 @@ static const struct file_operations dev_mc_seq_fops = { .open = dev_mc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = dev_mc_seq_release, }; #endif +static int __net_init dev_mc_net_init(struct net *net) +{ + if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) + return -ENOMEM; + return 0; +} + +static void __net_exit dev_mc_net_exit(struct net *net) +{ + proc_net_remove(net, "dev_mcast"); +} + +static struct pernet_operations __net_initdata dev_mc_net_ops = { + .init = dev_mc_net_init, + .exit = dev_mc_net_exit, +}; + void __init dev_mcast_init(void) { - proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops); + register_pernet_subsys(&dev_mc_net_ops); } EXPORT_SYMBOL(dev_mc_add); diff --git a/net/core/dst.c b/net/core/dst.c index c6a05879d58c..16958e64e577 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -9,59 +9,84 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/workqueue.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/string.h> #include <linux/types.h> +#include <net/net_namespace.h> +#include <net/net_namespace.h> #include <net/dst.h> -/* Locking strategy: - * 1) Garbage collection state of dead destination cache - * entries is protected by dst_lock. - * 2) GC is run only from BH context, and is the only remover - * of entries. - * 3) Entries are added to the garbage list from both BH - * and non-BH context, so local BH disabling is needed. - * 4) All operations modify state, so a spinlock is used. +/* + * Theory of operations: + * 1) We use a list, protected by a spinlock, to add + * new entries from both BH and non-BH context. + * 2) In order to keep spinlock held for a small delay, + * we use a second list where are stored long lived + * entries, that are handled by the garbage collect thread + * fired by a workqueue. + * 3) This list is guarded by a mutex, + * so that the gc_task and dst_dev_event() can be synchronized. */ -static struct dst_entry *dst_garbage_list; #if RT_CACHE_DEBUG >= 2 static atomic_t dst_total = ATOMIC_INIT(0); #endif -static DEFINE_SPINLOCK(dst_lock); -static unsigned long dst_gc_timer_expires; -static unsigned long dst_gc_timer_inc = DST_GC_MAX; -static void dst_run_gc(unsigned long); +/* + * We want to keep lock & list close together + * to dirty as few cache lines as possible in __dst_free(). + * As this is not a very strong hint, we dont force an alignment on SMP. + */ +static struct { + spinlock_t lock; + struct dst_entry *list; + unsigned long timer_inc; + unsigned long timer_expires; +} dst_garbage = { + .lock = __SPIN_LOCK_UNLOCKED(dst_garbage.lock), + .timer_inc = DST_GC_MAX, +}; +static void dst_gc_task(struct work_struct *work); static void ___dst_free(struct dst_entry * dst); -static DEFINE_TIMER(dst_gc_timer, dst_run_gc, DST_GC_MIN, 0); +static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task); -static void dst_run_gc(unsigned long dummy) +static DEFINE_MUTEX(dst_gc_mutex); +/* + * long lived entries are maintained in this list, guarded by dst_gc_mutex + */ +static struct dst_entry *dst_busy_list; + +static void dst_gc_task(struct work_struct *work) { int delayed = 0; - int work_performed; - struct dst_entry * dst, **dstp; + int work_performed = 0; + unsigned long expires = ~0L; + struct dst_entry *dst, *next, head; + struct dst_entry *last = &head; +#if RT_CACHE_DEBUG >= 2 + ktime_t time_start = ktime_get(); + struct timespec elapsed; +#endif - if (!spin_trylock(&dst_lock)) { - mod_timer(&dst_gc_timer, jiffies + HZ/10); - return; - } + mutex_lock(&dst_gc_mutex); + next = dst_busy_list; - del_timer(&dst_gc_timer); - dstp = &dst_garbage_list; - work_performed = 0; - while ((dst = *dstp) != NULL) { - if (atomic_read(&dst->__refcnt)) { - dstp = &dst->next; +loop: + while ((dst = next) != NULL) { + next = dst->next; + prefetch(&next->next); + if (likely(atomic_read(&dst->__refcnt))) { + last->next = dst; + last = dst; delayed++; continue; } - *dstp = dst->next; - work_performed = 1; + work_performed++; dst = dst_destroy(dst); if (dst) { @@ -77,38 +102,56 @@ static void dst_run_gc(unsigned long dummy) continue; ___dst_free(dst); - dst->next = *dstp; - *dstp = dst; - dstp = &dst->next; + dst->next = next; + next = dst; } } - if (!dst_garbage_list) { - dst_gc_timer_inc = DST_GC_MAX; - goto out; + + spin_lock_bh(&dst_garbage.lock); + next = dst_garbage.list; + if (next) { + dst_garbage.list = NULL; + spin_unlock_bh(&dst_garbage.lock); + goto loop; } - if (!work_performed) { - if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX) - dst_gc_timer_expires = DST_GC_MAX; - dst_gc_timer_inc += DST_GC_INC; - } else { - dst_gc_timer_inc = DST_GC_INC; - dst_gc_timer_expires = DST_GC_MIN; + last->next = NULL; + dst_busy_list = head.next; + if (!dst_busy_list) + dst_garbage.timer_inc = DST_GC_MAX; + else { + /* + * if we freed less than 1/10 of delayed entries, + * we can sleep longer. + */ + if (work_performed <= delayed/10) { + dst_garbage.timer_expires += dst_garbage.timer_inc; + if (dst_garbage.timer_expires > DST_GC_MAX) + dst_garbage.timer_expires = DST_GC_MAX; + dst_garbage.timer_inc += DST_GC_INC; + } else { + dst_garbage.timer_inc = DST_GC_INC; + dst_garbage.timer_expires = DST_GC_MIN; + } + expires = dst_garbage.timer_expires; + /* + * if the next desired timer is more than 4 seconds in the future + * then round the timer to whole seconds + */ + if (expires > 4*HZ) + expires = round_jiffies_relative(expires); + schedule_delayed_work(&dst_gc_work, expires); } + + spin_unlock_bh(&dst_garbage.lock); + mutex_unlock(&dst_gc_mutex); #if RT_CACHE_DEBUG >= 2 - printk("dst_total: %d/%d %ld\n", - atomic_read(&dst_total), delayed, dst_gc_timer_expires); + elapsed = ktime_to_timespec(ktime_sub(ktime_get(), time_start)); + printk(KERN_DEBUG "dst_total: %d delayed: %d work_perf: %d" + " expires: %lu elapsed: %lu us\n", + atomic_read(&dst_total), delayed, work_performed, + expires, + elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC); #endif - /* if the next desired timer is more than 4 seconds in the future - * then round the timer to whole seconds - */ - if (dst_gc_timer_expires > 4*HZ) - mod_timer(&dst_gc_timer, - round_jiffies(jiffies + dst_gc_timer_expires)); - else - mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); - -out: - spin_unlock(&dst_lock); } static int dst_discard(struct sk_buff *skb) @@ -153,16 +196,16 @@ static void ___dst_free(struct dst_entry * dst) void __dst_free(struct dst_entry * dst) { - spin_lock_bh(&dst_lock); + spin_lock_bh(&dst_garbage.lock); ___dst_free(dst); - dst->next = dst_garbage_list; - dst_garbage_list = dst; - if (dst_gc_timer_inc > DST_GC_INC) { - dst_gc_timer_inc = DST_GC_INC; - dst_gc_timer_expires = DST_GC_MIN; - mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); + dst->next = dst_garbage.list; + dst_garbage.list = dst; + if (dst_garbage.timer_inc > DST_GC_INC) { + dst_garbage.timer_inc = DST_GC_INC; + dst_garbage.timer_expires = DST_GC_MIN; + schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires); } - spin_unlock_bh(&dst_lock); + spin_unlock_bh(&dst_garbage.lock); } struct dst_entry *dst_destroy(struct dst_entry * dst) @@ -236,13 +279,13 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + dst->dev = init_net.loopback_dev; + dev_hold(dst->dev); dev_put(dev); if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = &loopback_dev; + dst->neighbour->dev = init_net.loopback_dev; dev_put(dev); - dev_hold(&loopback_dev); + dev_hold(dst->neighbour->dev); } } } @@ -250,16 +293,33 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct dst_entry *dst; + struct dst_entry *dst, *last = NULL; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; switch (event) { case NETDEV_UNREGISTER: case NETDEV_DOWN: - spin_lock_bh(&dst_lock); - for (dst = dst_garbage_list; dst; dst = dst->next) { + mutex_lock(&dst_gc_mutex); + for (dst = dst_busy_list; dst; dst = dst->next) { + last = dst; + dst_ifdown(dst, dev, event != NETDEV_DOWN); + } + + spin_lock_bh(&dst_garbage.lock); + dst = dst_garbage.list; + dst_garbage.list = NULL; + spin_unlock_bh(&dst_garbage.lock); + + if (last) + last->next = dst; + else + dst_busy_list = dst; + for (; dst; dst = dst->next) { dst_ifdown(dst, dev, event != NETDEV_DOWN); } - spin_unlock_bh(&dst_lock); + mutex_unlock(&dst_gc_mutex); break; } return NOTIFY_DONE; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index c5e059352d43..1163eb2256d0 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -109,6 +109,32 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data) return 0; } +/* the following list of flags are the same as their associated + * NETIF_F_xxx values in include/linux/netdevice.h + */ +static const u32 flags_dup_features = + ETH_FLAG_LRO; + +u32 ethtool_op_get_flags(struct net_device *dev) +{ + /* in the future, this function will probably contain additional + * handling for flags which are not so easily handled + * by a simple masking operation + */ + + return dev->features & flags_dup_features; +} + +int ethtool_op_set_flags(struct net_device *dev, u32 data) +{ + if (data & ETH_FLAG_LRO) + dev->features |= NETIF_F_LRO; + else + dev->features &= ~NETIF_F_LRO; + + return 0; +} + /* Handlers for each ethtool command */ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) @@ -153,10 +179,26 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) info.cmd = ETHTOOL_GDRVINFO; ops->get_drvinfo(dev, &info); - if (ops->self_test_count) - info.testinfo_len = ops->self_test_count(dev); - if (ops->get_stats_count) - info.n_stats = ops->get_stats_count(dev); + if (ops->get_sset_count) { + int rc; + + rc = ops->get_sset_count(dev, ETH_SS_TEST); + if (rc >= 0) + info.testinfo_len = rc; + rc = ops->get_sset_count(dev, ETH_SS_STATS); + if (rc >= 0) + info.n_stats = rc; + rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); + if (rc >= 0) + info.n_priv_flags = rc; + } else { + /* code path for obsolete hooks */ + + if (ops->self_test_count) + info.testinfo_len = ops->self_test_count(dev); + if (ops->get_stats_count) + info.n_stats = ops->get_stats_count(dev); + } if (ops->get_regs_len) info.regdump_len = ops->get_regs_len(dev); if (ops->get_eeprom_len) @@ -230,34 +272,6 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_wol(dev, &wol); } -static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GMSGLVL }; - - if (!dev->ethtool_ops->get_msglevel) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_msglevel(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_msglevel) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - dev->ethtool_ops->set_msglevel(dev, edata.data); - return 0; -} - static int ethtool_nway_reset(struct net_device *dev) { if (!dev->ethtool_ops->nway_reset) @@ -266,20 +280,6 @@ static int ethtool_nway_reset(struct net_device *dev) return dev->ethtool_ops->nway_reset(dev); } -static int ethtool_get_link(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GLINK }; - - if (!dev->ethtool_ops->get_link) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_link(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; @@ -447,48 +447,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr) return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); } -static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GRXCSUM }; - - if (!dev->ethtool_ops->get_rx_csum) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_rx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_rx_csum) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - dev->ethtool_ops->set_rx_csum(dev, edata.data); - return 0; -} - -static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTXCSUM }; - - if (!dev->ethtool_ops->get_tx_csum) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_tx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int __ethtool_set_sg(struct net_device *dev, u32 data) { int err; @@ -527,20 +485,6 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tx_csum(dev, edata.data); } -static int ethtool_get_sg(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GSG }; - - if (!dev->ethtool_ops->get_sg) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_sg(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -558,20 +502,6 @@ static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) return __ethtool_set_sg(dev, edata.data); } -static int ethtool_get_tso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTSO }; - - if (!dev->ethtool_ops->get_tso) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_tso(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -588,18 +518,6 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tso(dev, edata.data); } -static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GUFO }; - - if (!dev->ethtool_ops->get_ufo) - return -EOPNOTSUPP; - edata.data = dev->ethtool_ops->get_ufo(dev); - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -643,16 +561,27 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) struct ethtool_test test; const struct ethtool_ops *ops = dev->ethtool_ops; u64 *data; - int ret; + int ret, test_len; - if (!ops->self_test || !ops->self_test_count) + if (!ops->self_test) return -EOPNOTSUPP; + if (!ops->get_sset_count && !ops->self_test_count) + return -EOPNOTSUPP; + + if (ops->get_sset_count) + test_len = ops->get_sset_count(dev, ETH_SS_TEST); + else + /* code path for obsolete hook */ + test_len = ops->self_test_count(dev); + if (test_len < 0) + return test_len; + WARN_ON(test_len == 0); if (copy_from_user(&test, useraddr, sizeof(test))) return -EFAULT; - test.len = ops->self_test_count(dev); - data = kmalloc(test.len * sizeof(u64), GFP_USER); + test.len = test_len; + data = kmalloc(test_len * sizeof(u64), GFP_USER); if (!data) return -ENOMEM; @@ -684,19 +613,29 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) return -EFAULT; - switch (gstrings.string_set) { - case ETH_SS_TEST: - if (!ops->self_test_count) - return -EOPNOTSUPP; - gstrings.len = ops->self_test_count(dev); - break; - case ETH_SS_STATS: - if (!ops->get_stats_count) - return -EOPNOTSUPP; - gstrings.len = ops->get_stats_count(dev); - break; - default: - return -EINVAL; + if (ops->get_sset_count) { + ret = ops->get_sset_count(dev, gstrings.string_set); + if (ret < 0) + return ret; + + gstrings.len = ret; + } else { + /* code path for obsolete hooks */ + + switch (gstrings.string_set) { + case ETH_SS_TEST: + if (!ops->self_test_count) + return -EOPNOTSUPP; + gstrings.len = ops->self_test_count(dev); + break; + case ETH_SS_STATS: + if (!ops->get_stats_count) + return -EOPNOTSUPP; + gstrings.len = ops->get_stats_count(dev); + break; + default: + return -EINVAL; + } } data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); @@ -736,16 +675,27 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) struct ethtool_stats stats; const struct ethtool_ops *ops = dev->ethtool_ops; u64 *data; - int ret; + int ret, n_stats; - if (!ops->get_ethtool_stats || !ops->get_stats_count) + if (!ops->get_ethtool_stats) + return -EOPNOTSUPP; + if (!ops->get_sset_count && !ops->get_stats_count) return -EOPNOTSUPP; + if (ops->get_sset_count) + n_stats = ops->get_sset_count(dev, ETH_SS_STATS); + else + /* code path for obsolete hook */ + n_stats = ops->get_stats_count(dev); + if (n_stats < 0) + return n_stats; + WARN_ON(n_stats == 0); + if (copy_from_user(&stats, useraddr, sizeof(stats))) return -EFAULT; - stats.n_stats = ops->get_stats_count(dev); - data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER); + stats.n_stats = n_stats; + data = kmalloc(n_stats * sizeof(u64), GFP_USER); if (!data) return -ENOMEM; @@ -783,11 +733,55 @@ static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) return 0; } +static int ethtool_get_value(struct net_device *dev, char __user *useraddr, + u32 cmd, u32 (*actor)(struct net_device *)) +{ + struct ethtool_value edata = { cmd }; + + if (!actor) + return -EOPNOTSUPP; + + edata.data = actor(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_value_void(struct net_device *dev, char __user *useraddr, + void (*actor)(struct net_device *, u32)) +{ + struct ethtool_value edata; + + if (!actor) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + actor(dev, edata.data); + return 0; +} + +static int ethtool_set_value(struct net_device *dev, char __user *useraddr, + int (*actor)(struct net_device *, u32)) +{ + struct ethtool_value edata; + + if (!actor) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + return actor(dev, edata.data); +} + /* The main entry point in this file. Called from net/core/dev.c */ -int dev_ethtool(struct ifreq *ifr) +int dev_ethtool(struct net *net, struct ifreq *ifr) { - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; @@ -817,6 +811,8 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_GPERMADDR: case ETHTOOL_GUFO: case ETHTOOL_GGSO: + case ETHTOOL_GFLAGS: + case ETHTOOL_GPFLAGS: break; default: if (!capable(CAP_NET_ADMIN)) @@ -849,16 +845,19 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_set_wol(dev, useraddr); break; case ETHTOOL_GMSGLVL: - rc = ethtool_get_msglevel(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_msglevel); break; case ETHTOOL_SMSGLVL: - rc = ethtool_set_msglevel(dev, useraddr); + rc = ethtool_set_value_void(dev, useraddr, + dev->ethtool_ops->set_msglevel); break; case ETHTOOL_NWAY_RST: rc = ethtool_nway_reset(dev); break; case ETHTOOL_GLINK: - rc = ethtool_get_link(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_link); break; case ETHTOOL_GEEPROM: rc = ethtool_get_eeprom(dev, useraddr); @@ -885,25 +884,36 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_set_pauseparam(dev, useraddr); break; case ETHTOOL_GRXCSUM: - rc = ethtool_get_rx_csum(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_rx_csum); break; case ETHTOOL_SRXCSUM: - rc = ethtool_set_rx_csum(dev, useraddr); + rc = ethtool_set_value(dev, useraddr, + dev->ethtool_ops->set_rx_csum); break; case ETHTOOL_GTXCSUM: - rc = ethtool_get_tx_csum(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + (dev->ethtool_ops->get_tx_csum ? + dev->ethtool_ops->get_tx_csum : + ethtool_op_get_tx_csum)); break; case ETHTOOL_STXCSUM: rc = ethtool_set_tx_csum(dev, useraddr); break; case ETHTOOL_GSG: - rc = ethtool_get_sg(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + (dev->ethtool_ops->get_sg ? + dev->ethtool_ops->get_sg : + ethtool_op_get_sg)); break; case ETHTOOL_SSG: rc = ethtool_set_sg(dev, useraddr); break; case ETHTOOL_GTSO: - rc = ethtool_get_tso(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + (dev->ethtool_ops->get_tso ? + dev->ethtool_ops->get_tso : + ethtool_op_get_tso)); break; case ETHTOOL_STSO: rc = ethtool_set_tso(dev, useraddr); @@ -924,7 +934,10 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_get_perm_addr(dev, useraddr); break; case ETHTOOL_GUFO: - rc = ethtool_get_ufo(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + (dev->ethtool_ops->get_ufo ? + dev->ethtool_ops->get_ufo : + ethtool_op_get_ufo)); break; case ETHTOOL_SUFO: rc = ethtool_set_ufo(dev, useraddr); @@ -935,6 +948,22 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_SGSO: rc = ethtool_set_gso(dev, useraddr); break; + case ETHTOOL_GFLAGS: + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_flags); + break; + case ETHTOOL_SFLAGS: + rc = ethtool_set_value(dev, useraddr, + dev->ethtool_ops->set_flags); + break; + case ETHTOOL_GPFLAGS: + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_priv_flags); + break; + case ETHTOOL_SPFLAGS: + rc = ethtool_set_value(dev, useraddr, + dev->ethtool_ops->set_priv_flags); + break; default: rc = -EOPNOTSUPP; } @@ -959,3 +988,5 @@ EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); EXPORT_SYMBOL(ethtool_op_set_ufo); EXPORT_SYMBOL(ethtool_op_get_ufo); +EXPORT_SYMBOL(ethtool_op_set_flags); +EXPORT_SYMBOL(ethtool_op_get_flags); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 8c5474e16683..13de6f53f098 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -11,6 +11,8 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/list.h> +#include <net/net_namespace.h> +#include <net/sock.h> #include <net/fib_rules.h> static LIST_HEAD(rules_ops); @@ -82,7 +84,7 @@ static void cleanup_ops(struct fib_rules_ops *ops) { struct fib_rule *rule, *tmp; - list_for_each_entry_safe(rule, tmp, ops->rules_list, list) { + list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) { list_del_rcu(&rule->list); fib_rule_put(rule); } @@ -137,7 +139,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, rcu_read_lock(); - list_for_each_entry_rcu(rule, ops->rules_list, list) { + list_for_each_entry_rcu(rule, &ops->rules_list, list) { jumped: if (!fib_rule_match(rule, ops, fl, flags)) continue; @@ -197,6 +199,7 @@ errout: static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *r, *last = NULL; @@ -234,7 +237,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->ifindex = -1; nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); - dev = __dev_get_by_name(rule->ifname); + dev = __dev_get_by_name(net, rule->ifname); if (dev) rule->ifindex = dev->ifindex; } @@ -268,7 +271,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (rule->target <= rule->pref) goto errout_free; - list_for_each_entry(r, ops->rules_list, list) { + list_for_each_entry(r, &ops->rules_list, list) { if (r->pref == rule->target) { rule->ctarget = r; break; @@ -284,7 +287,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (err < 0) goto errout_free; - list_for_each_entry(r, ops->rules_list, list) { + list_for_each_entry(r, &ops->rules_list, list) { if (r->pref > rule->pref) break; last = r; @@ -297,7 +300,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) * There are unresolved goto rules in the list, check if * any of them are pointing to this new rule. */ - list_for_each_entry(r, ops->rules_list, list) { + list_for_each_entry(r, &ops->rules_list, list) { if (r->action == FR_ACT_GOTO && r->target == rule->pref) { BUG_ON(r->ctarget != NULL); @@ -317,7 +320,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (last) list_add_rcu(&rule->list, &last->list); else - list_add_rcu(&rule->list, ops->rules_list); + list_add_rcu(&rule->list, &ops->rules_list); notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); flush_route_cache(ops); @@ -356,7 +359,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (err < 0) goto errout; - list_for_each_entry(rule, ops->rules_list, list) { + list_for_each_entry(rule, &ops->rules_list, list) { if (frh->action && (frh->action != rule->action)) continue; @@ -399,7 +402,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) * actually been added. */ if (ops->nr_goto_rules > 0) { - list_for_each_entry(tmp, ops->rules_list, list) { + list_for_each_entry(tmp, &ops->rules_list, list) { if (tmp->ctarget == rule) { rcu_assign_pointer(tmp->ctarget, NULL); ops->unresolved_rules++; @@ -495,7 +498,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, int idx = 0; struct fib_rule *rule; - list_for_each_entry(rule, ops->rules_list, list) { + list_for_each_entry(rule, &ops->rules_list, list) { if (idx < cb->args[1]) goto skip; @@ -596,18 +599,21 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct fib_rules_ops *ops; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + ASSERT_RTNL(); rcu_read_lock(); switch (event) { case NETDEV_REGISTER: list_for_each_entry(ops, &rules_ops, list) - attach_rules(ops->rules_list, dev); + attach_rules(&ops->rules_list, dev); break; case NETDEV_UNREGISTER: list_for_each_entry(ops, &rules_ops, list) - detach_rules(ops->rules_list, dev); + detach_rules(&ops->rules_list, dev); break; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f7de8f24d8dd..c52df858d0be 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -25,6 +25,7 @@ #include <linux/sysctl.h> #endif #include <linux/times.h> +#include <net/net_namespace.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/sock.h> @@ -55,9 +56,8 @@ #define PNEIGH_HASHMASK 0xF static void neigh_timer_handler(unsigned long arg); -#ifdef CONFIG_ARPD -static void neigh_app_notify(struct neighbour *n); -#endif +static void __neigh_notify(struct neighbour *n, int type, int flags); +static void neigh_update_notify(struct neighbour *neigh); static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); @@ -105,6 +105,15 @@ static int neigh_blackhole(struct sk_buff *skb) return -ENETDOWN; } +static void neigh_cleanup_and_release(struct neighbour *neigh) +{ + if (neigh->parms->neigh_cleanup) + neigh->parms->neigh_cleanup(neigh); + + __neigh_notify(neigh, RTM_DELNEIGH, 0); + neigh_release(neigh); +} + /* * It is random distribution in the interval (1/2)*base...(3/2)*base. * It corresponds to default IPv6 settings and is not overridable, @@ -141,9 +150,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) n->dead = 1; shrunk = 1; write_unlock(&n->lock); - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); + neigh_cleanup_and_release(n); continue; } write_unlock(&n->lock); @@ -214,9 +221,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) NEIGH_PRINTK2("neigh %p is stray.\n", n); } write_unlock(&n->lock); - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); + neigh_cleanup_and_release(n); } } } @@ -677,9 +682,7 @@ static void neigh_periodic_timer(unsigned long arg) *np = n->next; n->dead = 1; write_unlock(&n->lock); - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); + neigh_cleanup_and_release(n); continue; } write_unlock(&n->lock); @@ -828,13 +831,10 @@ static void neigh_timer_handler(unsigned long arg) out: write_unlock(&neigh->lock); } + if (notify) - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); + neigh_update_notify(neigh); -#ifdef CONFIG_ARPD - if (notify && neigh->parms->app_probes) - neigh_app_notify(neigh); -#endif neigh_release(neigh); } @@ -897,8 +897,8 @@ out_unlock_bh: static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; - void (*update)(struct hh_cache*, struct net_device*, unsigned char *) = - neigh->dev->header_cache_update; + void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) + = neigh->dev->header_ops->cache_update; if (update) { for (hh = neigh->hh; hh; hh = hh->hh_next) { @@ -1063,11 +1063,8 @@ out: write_unlock_bh(&neigh->lock); if (notify) - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); -#ifdef CONFIG_ARPD - if (notify && neigh->parms->app_probes) - neigh_app_notify(neigh); -#endif + neigh_update_notify(neigh); + return err; } @@ -1098,7 +1095,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, hh->hh_type = protocol; atomic_set(&hh->hh_refcnt, 0); hh->hh_next = NULL; - if (dev->hard_header_cache(n, hh)) { + + if (dev->header_ops->cache(n, hh)) { kfree(hh); hh = NULL; } else { @@ -1128,10 +1126,9 @@ int neigh_compat_output(struct sk_buff *skb) __skb_pull(skb, skb_network_offset(skb)); - if (dev->hard_header && - dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, - skb->len) < 0 && - dev->rebuild_header(skb)) + if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, + skb->len) < 0 && + dev->header_ops->rebuild(skb)) return 0; return dev_queue_xmit(skb); @@ -1153,17 +1150,17 @@ int neigh_resolve_output(struct sk_buff *skb) if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; - if (dev->hard_header_cache && !dst->hh) { + if (dev->header_ops->cache && !dst->hh) { write_lock_bh(&neigh->lock); if (!dst->hh) neigh_hh_init(neigh, dst, dst->ops->protocol); - err = dev->hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); write_unlock_bh(&neigh->lock); } else { read_lock_bh(&neigh->lock); - err = dev->hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); read_unlock_bh(&neigh->lock); } if (err >= 0) @@ -1194,8 +1191,8 @@ int neigh_connected_output(struct sk_buff *skb) __skb_pull(skb, skb_network_offset(skb)); read_lock_bh(&neigh->lock); - err = dev->hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); read_unlock_bh(&neigh->lock); if (err >= 0) err = neigh->ops->queue_xmit(skb); @@ -1354,7 +1351,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat); + tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat); if (!tbl->pde) panic("cannot create neighbour proc dir entry"); tbl->pde->proc_fops = &neigh_stat_seq_fops; @@ -1444,6 +1441,7 @@ int neigh_table_clear(struct neigh_table *tbl) static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ndmsg *ndm; struct nlattr *dst_attr; struct neigh_table *tbl; @@ -1459,7 +1457,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); + dev = dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; @@ -1509,6 +1507,7 @@ out: static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; @@ -1525,7 +1524,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); + dev = dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; @@ -2000,6 +1999,11 @@ nla_put_failure: return -EMSGSIZE; } +static void neigh_update_notify(struct neighbour *neigh) +{ + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); + __neigh_notify(neigh, RTM_NEWNEIGH, 0); +} static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) @@ -2095,11 +2099,8 @@ void __neigh_for_each_release(struct neigh_table *tbl, } else np = &n->next; write_unlock(&n->lock); - if (release) { - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); - } + if (release) + neigh_cleanup_and_release(n); } } } @@ -2422,7 +2423,6 @@ static const struct file_operations neigh_stat_seq_fops = { #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_ARPD static inline size_t neigh_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) @@ -2454,16 +2454,11 @@ errout: rtnl_set_sk_err(RTNLGRP_NEIGH, err); } +#ifdef CONFIG_ARPD void neigh_app_ns(struct neighbour *n) { __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); } - -static void neigh_app_notify(struct neighbour *n) -{ - __neigh_notify(n, RTM_NEWNEIGH, 0); -} - #endif /* CONFIG_ARPD */ #ifdef CONFIG_SYSCTL diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 5c19b0646d7a..909a03d6c0e9 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -18,6 +18,7 @@ #include <linux/wireless.h> #include <net/iw_handler.h> +#ifdef CONFIG_SYSFS static const char fmt_hex[] = "%#x\n"; static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; @@ -216,20 +217,6 @@ static ssize_t store_tx_queue_len(struct device *dev, return netdev_store(dev, attr, buf, len, change_tx_queue_len); } -NETDEVICE_SHOW(weight, fmt_dec); - -static int change_weight(struct net_device *net, unsigned long new_weight) -{ - net->weight = new_weight; - return 0; -} - -static ssize_t store_weight(struct device *dev, struct device_attribute *attr, - const char *buf, size_t len) -{ - return netdev_store(dev, attr, buf, len, change_weight); -} - static struct device_attribute net_class_attributes[] = { __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(iflink, S_IRUGO, show_iflink, NULL), @@ -246,7 +233,6 @@ static struct device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), - __ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight), {} }; @@ -407,6 +393,8 @@ static struct attribute_group wireless_group = { }; #endif +#endif /* CONFIG_SYSFS */ + #ifdef CONFIG_HOTPLUG static int netdev_uevent(struct device *d, char **envp, int num_envp, char *buf, int size) @@ -450,7 +438,9 @@ static void netdev_release(struct device *d) static struct class net_class = { .name = "net", .dev_release = netdev_release, +#ifdef CONFIG_SYSFS .dev_attrs = net_class_attributes, +#endif /* CONFIG_SYSFS */ #ifdef CONFIG_HOTPLUG .dev_uevent = netdev_uevent, #endif @@ -459,7 +449,7 @@ static struct class net_class = { /* Delete sysfs entries but hold kobject reference until after all * netdev references are gone. */ -void netdev_unregister_sysfs(struct net_device * net) +void netdev_unregister_kobject(struct net_device * net) { struct device *dev = &(net->dev); @@ -468,7 +458,7 @@ void netdev_unregister_sysfs(struct net_device * net) } /* Create sysfs entries for network device. */ -int netdev_register_sysfs(struct net_device *net) +int netdev_register_kobject(struct net_device *net) { struct device *dev = &(net->dev); struct attribute_group **groups = net->sysfs_groups; @@ -481,6 +471,7 @@ int netdev_register_sysfs(struct net_device *net) BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); +#ifdef CONFIG_SYSFS if (net->get_stats) *groups++ = &netstat_group; @@ -488,11 +479,12 @@ int netdev_register_sysfs(struct net_device *net) if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) *groups++ = &wireless_group; #endif +#endif /* CONFIG_SYSFS */ return device_add(dev); } -int netdev_sysfs_init(void) +int netdev_kobject_init(void) { return class_register(&net_class); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c new file mode 100644 index 000000000000..6f71db8c4428 --- /dev/null +++ b/net/core/net_namespace.c @@ -0,0 +1,317 @@ +#include <linux/workqueue.h> +#include <linux/rtnetlink.h> +#include <linux/cache.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <net/net_namespace.h> + +/* + * Our network namespace constructor/destructor lists + */ + +static LIST_HEAD(pernet_list); +static struct list_head *first_device = &pernet_list; +static DEFINE_MUTEX(net_mutex); + +LIST_HEAD(net_namespace_list); + +static struct kmem_cache *net_cachep; + +struct net init_net; +EXPORT_SYMBOL_GPL(init_net); + +static struct net *net_alloc(void) +{ + return kmem_cache_zalloc(net_cachep, GFP_KERNEL); +} + +static void net_free(struct net *net) +{ + if (!net) + return; + + if (unlikely(atomic_read(&net->use_count) != 0)) { + printk(KERN_EMERG "network namespace not free! Usage: %d\n", + atomic_read(&net->use_count)); + return; + } + + kmem_cache_free(net_cachep, net); +} + +static void cleanup_net(struct work_struct *work) +{ + struct pernet_operations *ops; + struct net *net; + + net = container_of(work, struct net, work); + + mutex_lock(&net_mutex); + + /* Don't let anyone else find us. */ + rtnl_lock(); + list_del(&net->list); + rtnl_unlock(); + + /* Run all of the network namespace exit methods */ + list_for_each_entry_reverse(ops, &pernet_list, list) { + if (ops->exit) + ops->exit(net); + } + + mutex_unlock(&net_mutex); + + /* Ensure there are no outstanding rcu callbacks using this + * network namespace. + */ + rcu_barrier(); + + /* Finally it is safe to free my network namespace structure */ + net_free(net); +} + + +void __put_net(struct net *net) +{ + /* Cleanup the network namespace in process context */ + INIT_WORK(&net->work, cleanup_net); + schedule_work(&net->work); +} +EXPORT_SYMBOL_GPL(__put_net); + +/* + * setup_net runs the initializers for the network namespace object. + */ +static int setup_net(struct net *net) +{ + /* Must be called with net_mutex held */ + struct pernet_operations *ops; + int error; + + atomic_set(&net->count, 1); + atomic_set(&net->use_count, 0); + + error = 0; + list_for_each_entry(ops, &pernet_list, list) { + if (ops->init) { + error = ops->init(net); + if (error < 0) + goto out_undo; + } + } +out: + return error; + +out_undo: + /* Walk through the list backwards calling the exit functions + * for the pernet modules whose init functions did not fail. + */ + list_for_each_entry_continue_reverse(ops, &pernet_list, list) { + if (ops->exit) + ops->exit(net); + } + goto out; +} + +struct net *copy_net_ns(unsigned long flags, struct net *old_net) +{ + struct net *new_net = NULL; + int err; + + get_net(old_net); + + if (!(flags & CLONE_NEWNET)) + return old_net; + +#ifndef CONFIG_NET_NS + return ERR_PTR(-EINVAL); +#endif + + err = -ENOMEM; + new_net = net_alloc(); + if (!new_net) + goto out; + + mutex_lock(&net_mutex); + err = setup_net(new_net); + if (err) + goto out_unlock; + + rtnl_lock(); + list_add_tail(&new_net->list, &net_namespace_list); + rtnl_unlock(); + + +out_unlock: + mutex_unlock(&net_mutex); +out: + put_net(old_net); + if (err) { + net_free(new_net); + new_net = ERR_PTR(err); + } + return new_net; +} + +static int __init net_ns_init(void) +{ + int err; + + printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); + net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), + SMP_CACHE_BYTES, + SLAB_PANIC, NULL); + mutex_lock(&net_mutex); + err = setup_net(&init_net); + + rtnl_lock(); + list_add_tail(&init_net.list, &net_namespace_list); + rtnl_unlock(); + + mutex_unlock(&net_mutex); + if (err) + panic("Could not setup the initial network namespace"); + + return 0; +} + +pure_initcall(net_ns_init); + +static int register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + struct net *net, *undo_net; + int error; + + error = 0; + list_add_tail(&ops->list, list); + for_each_net(net) { + if (ops->init) { + error = ops->init(net); + if (error) + goto out_undo; + } + } +out: + return error; + +out_undo: + /* If I have an error cleanup all namespaces I initialized */ + list_del(&ops->list); + for_each_net(undo_net) { + if (undo_net == net) + goto undone; + if (ops->exit) + ops->exit(undo_net); + } +undone: + goto out; +} + +static void unregister_pernet_operations(struct pernet_operations *ops) +{ + struct net *net; + + list_del(&ops->list); + for_each_net(net) + if (ops->exit) + ops->exit(net); +} + +/** + * register_pernet_subsys - register a network namespace subsystem + * @ops: pernet operations structure for the subsystem + * + * Register a subsystem which has init and exit functions + * that are called when network namespaces are created and + * destroyed respectively. + * + * When registered all network namespace init functions are + * called for every existing network namespace. Allowing kernel + * modules to have a race free view of the set of network namespaces. + * + * When a new network namespace is created all of the init + * methods are called in the order in which they were registered. + * + * When a network namespace is destroyed all of the exit methods + * are called in the reverse of the order with which they were + * registered. + */ +int register_pernet_subsys(struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); + error = register_pernet_operations(first_device, ops); + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_subsys); + +/** + * unregister_pernet_subsys - unregister a network namespace subsystem + * @ops: pernet operations structure to manipulate + * + * Remove the pernet operations structure from the list to be + * used when network namespaces are created or destoryed. In + * addition run the exit method for all existing network + * namespaces. + */ +void unregister_pernet_subsys(struct pernet_operations *module) +{ + mutex_lock(&net_mutex); + unregister_pernet_operations(module); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_subsys); + +/** + * register_pernet_device - register a network namespace device + * @ops: pernet operations structure for the subsystem + * + * Register a device which has init and exit functions + * that are called when network namespaces are created and + * destroyed respectively. + * + * When registered all network namespace init functions are + * called for every existing network namespace. Allowing kernel + * modules to have a race free view of the set of network namespaces. + * + * When a new network namespace is created all of the init + * methods are called in the order in which they were registered. + * + * When a network namespace is destroyed all of the exit methods + * are called in the reverse of the order with which they were + * registered. + */ +int register_pernet_device(struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); + error = register_pernet_operations(&pernet_list, ops); + if (!error && (first_device == &pernet_list)) + first_device = &ops->list; + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_device); + +/** + * unregister_pernet_device - unregister a network namespace netdevice + * @ops: pernet operations structure to manipulate + * + * Remove the pernet operations structure from the list to be + * used when network namespaces are created or destoryed. In + * addition run the exit method for all existing network + * namespaces. + */ +void unregister_pernet_device(struct pernet_operations *ops) +{ + mutex_lock(&net_mutex); + if (&ops->list == first_device) + first_device = first_device->next; + unregister_pernet_operations(ops); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_device); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index de1b26aa5720..95daba624967 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -119,19 +119,22 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, static void poll_napi(struct netpoll *np) { struct netpoll_info *npinfo = np->dev->npinfo; + struct napi_struct *napi; int budget = 16; - if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && - npinfo->poll_owner != smp_processor_id() && - spin_trylock(&npinfo->poll_lock)) { - npinfo->rx_flags |= NETPOLL_RX_DROP; - atomic_inc(&trapped); + list_for_each_entry(napi, &np->dev->napi_list, dev_list) { + if (test_bit(NAPI_STATE_SCHED, &napi->state) && + napi->poll_owner != smp_processor_id() && + spin_trylock(&napi->poll_lock)) { + npinfo->rx_flags |= NETPOLL_RX_DROP; + atomic_inc(&trapped); - np->dev->poll(np->dev, &budget); + napi->poll(napi, budget); - atomic_dec(&trapped); - npinfo->rx_flags &= ~NETPOLL_RX_DROP; - spin_unlock(&npinfo->poll_lock); + atomic_dec(&trapped); + npinfo->rx_flags &= ~NETPOLL_RX_DROP; + spin_unlock(&napi->poll_lock); + } } } @@ -157,7 +160,7 @@ void netpoll_poll(struct netpoll *np) /* Process pending work on NIC */ np->dev->poll_controller(np->dev); - if (np->dev->poll) + if (!list_empty(&np->dev->napi_list)) poll_napi(np); service_arp_queue(np->dev->npinfo); @@ -233,6 +236,17 @@ repeat: return skb; } +static int netpoll_owner_active(struct net_device *dev) +{ + struct napi_struct *napi; + + list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (napi->poll_owner == smp_processor_id()) + return 1; + } + return 0; +} + static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { int status = NETDEV_TX_BUSY; @@ -246,8 +260,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) } /* don't get messages out of order, and no recursion */ - if (skb_queue_len(&npinfo->txq) == 0 && - npinfo->poll_owner != smp_processor_id()) { + if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { unsigned long flags; local_irq_save(flags); @@ -402,11 +415,9 @@ static void arp_reply(struct sk_buff *skb) send_skb->protocol = htons(ETH_P_ARP); /* Fill the device header for the ARP frame */ - - if (np->dev->hard_header && - np->dev->hard_header(send_skb, skb->dev, ptype, - sha, np->local_mac, - send_skb->len) < 0) { + if (dev_hard_header(send_skb, skb->dev, ptype, + sha, np->local_mac, + send_skb->len) < 0) { kfree_skb(send_skb); return; } @@ -519,6 +530,23 @@ out: return 0; } +void netpoll_print_options(struct netpoll *np) +{ + DECLARE_MAC_BUF(mac); + printk(KERN_INFO "%s: local port %d\n", + np->name, np->local_port); + printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", + np->name, HIPQUAD(np->local_ip)); + printk(KERN_INFO "%s: interface %s\n", + np->name, np->dev_name); + printk(KERN_INFO "%s: remote port %d\n", + np->name, np->remote_port); + printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", + np->name, HIPQUAD(np->remote_ip)); + printk(KERN_INFO "%s: remote ethernet address %s\n", + np->name, print_mac(mac, np->remote_mac)); +} + int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; @@ -531,7 +559,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) cur = delim; } cur++; - printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); if (*cur != '/') { if ((delim = strchr(cur, '/')) == NULL) @@ -539,9 +566,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) *delim = 0; np->local_ip = ntohl(in_aton(cur)); cur = delim; - - printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->local_ip)); } cur++; @@ -555,8 +579,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) } cur++; - printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name); - if (*cur != '@') { /* dst port */ if ((delim = strchr(cur, '@')) == NULL) @@ -566,7 +588,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) cur = delim; } cur++; - printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port); /* dst ip */ if ((delim = strchr(cur, '/')) == NULL) @@ -575,9 +596,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) np->remote_ip = ntohl(in_aton(cur)); cur = delim + 1; - printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->remote_ip)); - if (*cur != 0) { /* MAC address */ if ((delim = strchr(cur, ':')) == NULL) @@ -608,15 +626,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) np->remote_mac[5] = simple_strtol(cur, NULL, 16); } - printk(KERN_INFO "%s: remote ethernet address " - "%02x:%02x:%02x:%02x:%02x:%02x\n", - np->name, - np->remote_mac[0], - np->remote_mac[1], - np->remote_mac[2], - np->remote_mac[3], - np->remote_mac[4], - np->remote_mac[5]); + netpoll_print_options(np); return 0; @@ -635,7 +645,7 @@ int netpoll_setup(struct netpoll *np) int err; if (np->dev_name) - ndev = dev_get_by_name(np->dev_name); + ndev = dev_get_by_name(&init_net, np->dev_name); if (!ndev) { printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", np->name, np->dev_name); @@ -652,8 +662,6 @@ int netpoll_setup(struct netpoll *np) npinfo->rx_flags = 0; npinfo->rx_np = NULL; - spin_lock_init(&npinfo->poll_lock); - npinfo->poll_owner = -1; spin_lock_init(&npinfo->rx_lock); skb_queue_head_init(&npinfo->arp_tx); @@ -820,6 +828,7 @@ void netpoll_set_trap(int trap) EXPORT_SYMBOL(netpoll_set_trap); EXPORT_SYMBOL(netpoll_trap); +EXPORT_SYMBOL(netpoll_print_options); EXPORT_SYMBOL(netpoll_parse_options); EXPORT_SYMBOL(netpoll_setup); EXPORT_SYMBOL(netpoll_cleanup); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 803d0c8826af..2100c734b102 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -152,6 +152,7 @@ #include <linux/wait.h> #include <linux/etherdevice.h> #include <linux/kthread.h> +#include <net/net_namespace.h> #include <net/checksum.h> #include <net/ipv6.h> #include <net/addrconf.h> @@ -167,7 +168,7 @@ #include <asm/div64.h> /* do_div */ #include <asm/timex.h> -#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n" +#define VERSION "pktgen v2.69: Packet Generator for packet performance testing.\n" /* The buckets are exponential in 'width' */ #define LAT_BUCKETS_MAX 32 @@ -189,6 +190,7 @@ #define F_SVID_RND (1<<10) /* Random SVLAN ID */ #define F_FLOW_SEQ (1<<11) /* Sequential flows */ #define F_IPSEC_ON (1<<12) /* ipsec on for flows */ +#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ /* Thread control flag bits */ #define T_TERMINATE (1<<0) @@ -331,6 +333,7 @@ struct pktgen_dev { __be32 cur_daddr; __u16 cur_udp_dst; __u16 cur_udp_src; + __u16 cur_queue_map; __u32 cur_pkt_size; __u8 hh[14]; @@ -358,6 +361,10 @@ struct pktgen_dev { unsigned lflow; /* Flow length (config) */ unsigned nflows; /* accumulated flows (stats) */ unsigned curfl; /* current sequenced flow (state)*/ + + u16 queue_map_min; + u16 queue_map_max; + #ifdef CONFIG_XFRM __u8 ipsmode; /* IPSEC mode (config) */ __u8 ipsproto; /* IPSEC type (config) */ @@ -378,7 +385,6 @@ struct pktgen_thread { struct list_head th_list; struct task_struct *tsk; char result[512]; - u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ /* Field for thread to receive "posted" events terminate, stop ifs etc. */ @@ -593,11 +599,11 @@ static const struct file_operations pktgen_fops = { static int pktgen_if_show(struct seq_file *seq, void *v) { - int i; struct pktgen_dev *pkt_dev = seq->private; __u64 sa; __u64 stopped; __u64 now = getCurUs(); + DECLARE_MAC_BUF(mac); seq_printf(seq, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", @@ -613,6 +619,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); + seq_printf(seq, + " queue_map_min: %u queue_map_max: %u\n", + pkt_dev->queue_map_min, + pkt_dev->queue_map_max); + if (pkt_dev->flags & F_IPV6) { char b1[128], b2[128], b3[128]; fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); @@ -637,19 +648,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, " src_mac: "); - if (is_zero_ether_addr(pkt_dev->src_mac)) - for (i = 0; i < 6; i++) - seq_printf(seq, "%02X%s", pkt_dev->odev->dev_addr[i], - i == 5 ? " " : ":"); - else - for (i = 0; i < 6; i++) - seq_printf(seq, "%02X%s", pkt_dev->src_mac[i], - i == 5 ? " " : ":"); + seq_printf(seq, "%s ", + print_mac(mac, is_zero_ether_addr(pkt_dev->src_mac) ? + pkt_dev->odev->dev_addr : pkt_dev->src_mac)); seq_printf(seq, "dst_mac: "); - for (i = 0; i < 6; i++) - seq_printf(seq, "%02X%s", pkt_dev->dst_mac[i], - i == 5 ? "\n" : ":"); + seq_printf(seq, "%s\n", print_mac(mac, pkt_dev->dst_mac)); seq_printf(seq, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", @@ -709,6 +713,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->flags & F_MPLS_RND) seq_printf(seq, "MPLS_RND "); + if (pkt_dev->flags & F_QUEUE_MAP_RND) + seq_printf(seq, "QUEUE_MAP_RND "); + if (pkt_dev->cflows) { if (pkt_dev->flags & F_FLOW_SEQ) seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ @@ -764,6 +771,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n", pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); + seq_printf(seq, " cur_queue_map: %u\n", pkt_dev->cur_queue_map); + seq_printf(seq, " flows: %u\n", pkt_dev->nflows); if (pkt_dev->result[0]) @@ -1215,6 +1224,11 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "FLOW_SEQ") == 0) pkt_dev->flags |= F_FLOW_SEQ; + else if (strcmp(f, "QUEUE_MAP_RND") == 0) + pkt_dev->flags |= F_QUEUE_MAP_RND; + + else if (strcmp(f, "!QUEUE_MAP_RND") == 0) + pkt_dev->flags &= ~F_QUEUE_MAP_RND; #ifdef CONFIG_XFRM else if (strcmp(f, "IPSEC") == 0) pkt_dev->flags |= F_IPSEC_ON; @@ -1526,16 +1540,40 @@ static ssize_t pktgen_if_write(struct file *file, return count; } + if (!strcmp(name, "queue_map_min")) { + len = num_arg(&user_buffer[i], 5, &value); + if (len < 0) { + return len; + } + i += len; + pkt_dev->queue_map_min = value; + sprintf(pg_result, "OK: queue_map_min=%u", pkt_dev->queue_map_min); + return count; + } + + if (!strcmp(name, "queue_map_max")) { + len = num_arg(&user_buffer[i], 5, &value); + if (len < 0) { + return len; + } + i += len; + pkt_dev->queue_map_max = value; + sprintf(pg_result, "OK: queue_map_max=%u", pkt_dev->queue_map_max); + return count; + } + if (!strcmp(name, "mpls")) { - unsigned n, offset; + unsigned n, cnt; + len = get_labels(&user_buffer[i], pkt_dev); - if (len < 0) { return len; } + if (len < 0) + return len; i += len; - offset = sprintf(pg_result, "OK: mpls="); + cnt = sprintf(pg_result, "OK: mpls="); for (n = 0; n < pkt_dev->nr_labels; n++) - offset += sprintf(pg_result + offset, - "%08x%s", ntohl(pkt_dev->labels[n]), - n == pkt_dev->nr_labels-1 ? "" : ","); + cnt += sprintf(pg_result + cnt, + "%08x%s", ntohl(pkt_dev->labels[n]), + n == pkt_dev->nr_labels-1 ? "" : ","); if (pkt_dev->nr_labels && pkt_dev->vlan_id != 0xffff) { pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */ @@ -1718,9 +1756,6 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) BUG_ON(!t); - seq_printf(seq, "Name: %s max_before_softirq: %d\n", - t->tsk->comm, t->max_before_softirq); - seq_printf(seq, "Running: "); if_lock(t); @@ -1753,7 +1788,6 @@ static ssize_t pktgen_thread_write(struct file *file, int i = 0, max, len, ret; char name[40]; char *pg_result; - unsigned long value = 0; if (count < 1) { // sprintf(pg_result, "Wrong command format"); @@ -1827,12 +1861,8 @@ static ssize_t pktgen_thread_write(struct file *file, } if (!strcmp(name, "max_before_softirq")) { - len = num_arg(&user_buffer[i], 10, &value); - mutex_lock(&pktgen_thread_lock); - t->max_before_softirq = value; - mutex_unlock(&pktgen_thread_lock); + sprintf(pg_result, "OK: Note! max_before_softirq is obsoleted -- Do not use"); ret = count; - sprintf(pg_result, "OK: max_before_softirq=%lu", value); goto out; } @@ -1940,6 +1970,9 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* It is OK that we do not hold the group lock right now, * as we run under the RTNL lock. */ @@ -1970,7 +2003,7 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) pkt_dev->odev = NULL; } - odev = dev_get_by_name(ifname); + odev = dev_get_by_name(&init_net, ifname); if (!odev) { printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname); return -ENODEV; @@ -2111,7 +2144,6 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) if (spin_until_us - now > jiffies_to_usecs(1) + 1) schedule_timeout_interruptible(1); else if (spin_until_us - now > 100) { - do_softirq(); if (!pkt_dev->running) return; if (need_resched()) @@ -2387,6 +2419,20 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->cur_pkt_size = t; } + if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { + __u16 t; + if (pkt_dev->flags & F_QUEUE_MAP_RND) { + t = random32() % + (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1) + + pkt_dev->queue_map_min; + } else { + t = pkt_dev->cur_queue_map + 1; + if (t > pkt_dev->queue_map_max) + t = pkt_dev->queue_map_min; + } + pkt_dev->cur_queue_map = t; + } + pkt_dev->flows[flow].count++; } @@ -2557,6 +2603,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); + skb->queue_mapping = pkt_dev->cur_queue_map; iph = ip_hdr(skb); udph = udp_hdr(skb); @@ -2686,6 +2733,7 @@ static unsigned int scan_ip6(const char *s, char ip[16]) unsigned int prefixlen = 0; unsigned int suffixlen = 0; __be32 tmp; + char *pos; for (i = 0; i < 16; i++) ip[i] = 0; @@ -2700,12 +2748,9 @@ static unsigned int scan_ip6(const char *s, char ip[16]) } s++; } - { - char *tmp; - u = simple_strtoul(s, &tmp, 16); - i = tmp - s; - } + u = simple_strtoul(s, &pos, 16); + i = pos - s; if (!i) return 0; if (prefixlen == 12 && s[i] == '.') { @@ -2733,11 +2778,9 @@ static unsigned int scan_ip6(const char *s, char ip[16]) len++; } else if (suffixlen != 0) break; - { - char *tmp; - u = simple_strtol(s, &tmp, 16); - i = tmp - s; - } + + u = simple_strtol(s, &pos, 16); + i = pos - s; if (!i) { if (*s) len--; @@ -2898,6 +2941,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); + skb->queue_mapping = pkt_dev->cur_queue_map; iph = ipv6_hdr(skb); udph = udp_hdr(skb); @@ -3465,8 +3509,6 @@ static int pktgen_thread_worker(void *arg) struct pktgen_thread *t = arg; struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; - u32 max_before_softirq; - u32 tx_since_softirq = 0; BUG_ON(smp_processor_id() != cpu); @@ -3474,8 +3516,6 @@ static int pktgen_thread_worker(void *arg) pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid); - max_before_softirq = t->max_before_softirq; - set_current_state(TASK_INTERRUPTIBLE); set_freezable(); @@ -3494,24 +3534,9 @@ static int pktgen_thread_worker(void *arg) __set_current_state(TASK_RUNNING); - if (pkt_dev) { - + if (pkt_dev) pktgen_xmit(pkt_dev); - /* - * We like to stay RUNNING but must also give - * others fair share. - */ - - tx_since_softirq += pkt_dev->last_ok; - - if (tx_since_softirq > max_before_softirq) { - if (local_softirq_pending()) - do_softirq(); - tx_since_softirq = 0; - } - } - if (t->control & T_STOP) { pktgen_stop(t); t->control &= ~(T_STOP); @@ -3778,7 +3803,7 @@ static int __init pg_init(void) printk(KERN_INFO "%s", version); - pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); + pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); if (!pg_proc_dir) return -ENODEV; pg_proc_dir->owner = THIS_MODULE; @@ -3787,7 +3812,7 @@ static int __init pg_init(void) if (pe == NULL) { printk(KERN_ERR "pktgen: ERROR: cannot create %s " "procfs entry.\n", PGCTRL); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); return -EINVAL; } @@ -3811,7 +3836,7 @@ static int __init pg_init(void) "all threads\n"); unregister_netdevice_notifier(&pktgen_notifier_block); remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); return -ENODEV; } @@ -3838,7 +3863,7 @@ static void __exit pg_cleanup(void) /* Clean up proc file system */ remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); } module_init(pg_init); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4756d5857abf..1072d16696c3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -35,6 +35,7 @@ #include <linux/security.h> #include <linux/mutex.h> #include <linux/if_addr.h> +#include <linux/nsproxy.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -74,8 +75,6 @@ void __rtnl_unlock(void) void rtnl_unlock(void) { mutex_unlock(&rtnl_mutex); - if (rtnl && rtnl->sk_receive_queue.qlen) - rtnl->sk_data_ready(rtnl, 0); netdev_run_todo(); } @@ -306,10 +305,13 @@ EXPORT_SYMBOL_GPL(rtnl_link_register); void __rtnl_link_unregister(struct rtnl_link_ops *ops) { struct net_device *dev, *n; + struct net *net; - for_each_netdev_safe(dev, n) { - if (dev->rtnl_link_ops == ops) - ops->dellink(dev); + for_each_net(net) { + for_each_netdev_safe(net, dev, n) { + if (dev->rtnl_link_ops == ops) + ops->dellink(dev); + } } list_del(&ops->list); } @@ -634,7 +636,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len); - NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight); NLA_PUT_U8(skb, IFLA_OPERSTATE, netif_running(dev) ? dev->operstate : IF_OPER_DOWN); NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); @@ -694,12 +695,13 @@ nla_put_failure: static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx; int s_idx = cb->args[0]; struct net_device *dev; idx = 0; - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -714,7 +716,7 @@ cont: return skb->len; } -static const struct nla_policy ifla_policy[IFLA_MAX+1] = { +const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, @@ -724,6 +726,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, [IFLA_LINKMODE] = { .type = NLA_U8 }, + [IFLA_NET_NS_PID] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -731,12 +734,45 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_DATA] = { .type = NLA_NESTED }, }; +static struct net *get_net_ns_by_pid(pid_t pid) +{ + struct task_struct *tsk; + struct net *net; + + /* Lookup the network namespace */ + net = ERR_PTR(-ESRCH); + rcu_read_lock(); + tsk = find_task_by_pid(pid); + if (tsk) { + task_lock(tsk); + if (tsk->nsproxy) + net = get_net(tsk->nsproxy->net_ns); + task_unlock(tsk); + } + rcu_read_unlock(); + return net; +} + static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { int send_addr_notify = 0; int err; + if (tb[IFLA_NET_NS_PID]) { + struct net *net; + net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + if (IS_ERR(net)) { + err = PTR_ERR(net); + goto errout; + } + err = dev_change_net_namespace(dev, net, ifname); + put_net(net); + if (err) + goto errout; + modified = 1; + } + if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; struct ifmap k_map; @@ -834,9 +870,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_WEIGHT]) - dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); - if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); @@ -862,6 +895,7 @@ errout: static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifinfomsg *ifm; struct net_device *dev; int err; @@ -880,9 +914,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = dev_get_by_index(ifm->ifi_index); + dev = dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(ifname); + dev = dev_get_by_name(net, ifname); else goto errout; @@ -908,6 +942,7 @@ errout: static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; const struct rtnl_link_ops *ops; struct net_device *dev; struct ifinfomsg *ifm; @@ -924,9 +959,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(ifname); + dev = __dev_get_by_name(net, ifname); else return -EINVAL; @@ -941,8 +976,52 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return 0; } +struct net_device *rtnl_create_link(struct net *net, char *ifname, + const struct rtnl_link_ops *ops, struct nlattr *tb[]) +{ + int err; + struct net_device *dev; + + err = -ENOMEM; + dev = alloc_netdev(ops->priv_size, ifname, ops->setup); + if (!dev) + goto err; + + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto err_free; + } + + dev->nd_net = net; + dev->rtnl_link_ops = ops; + + if (tb[IFLA_MTU]) + dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_ADDRESS]) + memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), + nla_len(tb[IFLA_ADDRESS])); + if (tb[IFLA_BROADCAST]) + memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), + nla_len(tb[IFLA_BROADCAST])); + if (tb[IFLA_TXQLEN]) + dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + if (tb[IFLA_OPERSTATE]) + set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + if (tb[IFLA_LINKMODE]) + dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + + return dev; + +err_free: + free_netdev(dev); +err: + return ERR_PTR(err); +} + static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; const struct rtnl_link_ops *ops; struct net_device *dev; struct ifinfomsg *ifm; @@ -966,9 +1045,9 @@ replay: ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); else if (ifname[0]) - dev = __dev_get_by_name(ifname); + dev = __dev_get_by_name(net, ifname); else dev = NULL; @@ -1053,40 +1132,17 @@ replay: if (!ifname[0]) snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); - dev = alloc_netdev(ops->priv_size, ifname, ops->setup); - if (!dev) - return -ENOMEM; - - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto err_free; - } - dev->rtnl_link_ops = ops; - - if (tb[IFLA_MTU]) - dev->mtu = nla_get_u32(tb[IFLA_MTU]); - if (tb[IFLA_ADDRESS]) - memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), - nla_len(tb[IFLA_ADDRESS])); - if (tb[IFLA_BROADCAST]) - memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), - nla_len(tb[IFLA_BROADCAST])); - if (tb[IFLA_TXQLEN]) - dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_WEIGHT]) - dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); - if (tb[IFLA_OPERSTATE]) - set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); - if (tb[IFLA_LINKMODE]) - dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); - - if (ops->newlink) + + dev = rtnl_create_link(net, ifname, ops, tb); + + if (IS_ERR(dev)) + err = PTR_ERR(dev); + else if (ops->newlink) err = ops->newlink(dev, tb, data); else err = register_netdevice(dev); -err_free: - if (err < 0) + + if (err < 0 && !IS_ERR(dev)) free_netdev(dev); return err; } @@ -1094,6 +1150,7 @@ err_free: static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifinfomsg *ifm; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; @@ -1106,7 +1163,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) { - dev = dev_get_by_index(ifm->ifi_index); + dev = dev_get_by_index(net, ifm->ifi_index); if (dev == NULL) return -ENODEV; } else @@ -1255,22 +1312,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return doit(skb, nlh, (void *)&rta_buf[0]); } -static void rtnetlink_rcv(struct sock *sk, int len) +static void rtnetlink_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - mutex_lock(&rtnl_mutex); - netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg); - mutex_unlock(&rtnl_mutex); - - netdev_run_todo(); - } while (qlen); + rtnl_lock(); + netlink_rcv_skb(skb, &rtnetlink_rcv_msg); + rtnl_unlock(); } static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_UNREGISTER: rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); @@ -1308,8 +1363,8 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, - &rtnl_mutex, THIS_MODULE); + rtnl = netlink_kernel_create(&init_net, NETLINK_ROUTE, RTNLGRP_MAX, + rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); @@ -1335,3 +1390,5 @@ EXPORT_SYMBOL(rtnl_unlock); EXPORT_SYMBOL(rtnl_unicast); EXPORT_SYMBOL(rtnl_notify); EXPORT_SYMBOL(rtnl_set_sk_err); +EXPORT_SYMBOL(rtnl_create_link); +EXPORT_SYMBOL(ifla_policy); diff --git a/net/core/scm.c b/net/core/scm.c index 44c4ec2c8769..530bee8d9ed9 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -167,7 +167,8 @@ error: int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) { - struct cmsghdr __user *cm = (struct cmsghdr __user *)msg->msg_control; + struct cmsghdr __user *cm + = (__force struct cmsghdr __user *)msg->msg_control; struct cmsghdr cmhdr; int cmlen = CMSG_LEN(len); int err; @@ -202,7 +203,8 @@ out: void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { - struct cmsghdr __user *cm = (struct cmsghdr __user*)msg->msg_control; + struct cmsghdr __user *cm + = (__force struct cmsghdr __user*)msg->msg_control; int fdmax = 0; int fdnum = scm->fp->count; @@ -222,7 +224,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) if (fdnum < fdmax) fdmax = fdnum; - for (i=0, cmfptr=(int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++) + for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax; + i++, cmfptr++) { int new_fd; err = security_file_receive(fp[i]); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 35021eb3ed07..944189d96323 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -472,6 +472,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #ifdef CONFIG_INET new->sp = secpath_get(old->sp); #endif + new->csum_start = old->csum_start; + new->csum_offset = old->csum_offset; + new->ip_summed = old->ip_summed; new->transport_header = old->transport_header; new->network_header = old->network_header; new->mac_header = old->mac_header; @@ -545,8 +548,6 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) skb_reserve(n, headerlen); /* Set the tail pointer and length */ skb_put(n, skb->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) BUG(); @@ -589,8 +590,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) skb_put(n, skb_headlen(skb)); /* Copy the bytes */ skb_copy_from_linear_data(skb, n->data, n->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; n->truesize += skb->data_len; n->data_len = skb->data_len; @@ -686,6 +685,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->transport_header += off; skb->network_header += off; skb->mac_header += off; + skb->csum_start += off; skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; @@ -734,9 +734,6 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) * * You must pass %GFP_ATOMIC as the allocation priority if this function * is called from an interrupt. - * - * BUG ALERT: ip_summed is not copied. Why does this work? Is it used - * only by netfilter in the cases when checksum is recalculated? --ANK */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, @@ -749,7 +746,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, gfp_mask); int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; - int off = 0; + int off; if (!n) return NULL; @@ -773,12 +770,13 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, copy_skb_header(n, skb); -#ifdef NET_SKBUFF_DATA_USES_OFFSET off = newheadroom - oldheadroom; -#endif + n->csum_start += off; +#ifdef NET_SKBUFF_DATA_USES_OFFSET n->transport_header += off; n->network_header += off; n->mac_header += off; +#endif return n; } diff --git a/net/core/sock.c b/net/core/sock.c index 190de61cd648..4ed9b507c1e7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -119,6 +119,7 @@ #include <linux/netdevice.h> #include <net/protocol.h> #include <linux/skbuff.h> +#include <net/net_namespace.h> #include <net/request_sock.h> #include <net/sock.h> #include <net/xfrm.h> @@ -366,6 +367,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES + struct net *net = sk->sk_net; char devname[IFNAMSIZ]; int index; @@ -394,7 +396,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) if (devname[0] == '\0') { index = 0; } else { - struct net_device *dev = dev_get_by_name(devname); + struct net_device *dev = dev_get_by_name(net, devname); ret = -ENODEV; if (!dev) @@ -872,7 +874,7 @@ static inline void sock_lock_init(struct sock *sk) * @prot: struct proto associated with this new sock instance * @zero_it: if we should zero the newly allocated sock */ -struct sock *sk_alloc(int family, gfp_t priority, +struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; @@ -893,6 +895,7 @@ struct sock *sk_alloc(int family, gfp_t priority, */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); + sk->sk_net = get_net(net); } if (security_sk_alloc(sk, family, priority)) @@ -932,6 +935,7 @@ void sk_free(struct sock *sk) __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); + put_net(sk->sk_net); if (sk->sk_prot_creator->slab != NULL) kmem_cache_free(sk->sk_prot_creator->slab, sk); else @@ -941,7 +945,7 @@ void sk_free(struct sock *sk) struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { - struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0); + struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0); if (newsk != NULL) { struct sk_filter *filter; @@ -1585,9 +1589,9 @@ void fastcall lock_sock_nested(struct sock *sk, int subclass) { might_sleep(); spin_lock_bh(&sk->sk_lock.slock); - if (sk->sk_lock.owner) + if (sk->sk_lock.owned) __lock_sock(sk); - sk->sk_lock.owner = (void *)1; + sk->sk_lock.owned = 1; spin_unlock(&sk->sk_lock.slock); /* * The sk_lock has mutex_lock() semantics here: @@ -1608,7 +1612,7 @@ void fastcall release_sock(struct sock *sk) spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_backlog.tail) __release_sock(sk); - sk->sk_lock.owner = NULL; + sk->sk_lock.owned = 0; if (waitqueue_active(&sk->sk_lock.wq)) wake_up(&sk->sk_lock.wq); spin_unlock_bh(&sk->sk_lock.slock); @@ -1973,7 +1977,7 @@ static const struct file_operations proto_seq_fops = { static int __init proto_init(void) { /* register /proc/net/protocols */ - return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; + return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; } subsys_initcall(proto_init); diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 7ac775f9a64b..83378f379f72 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -69,21 +69,20 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; /* Figure out how many options do we need to represent the ackvec */ - const u16 nr_opts = (av->dccpav_vec_len + - DCCP_MAX_ACKVEC_OPT_LEN - 1) / - DCCP_MAX_ACKVEC_OPT_LEN; + const u16 nr_opts = DIV_ROUND_UP(av->dccpav_vec_len, + DCCP_MAX_ACKVEC_OPT_LEN); u16 len = av->dccpav_vec_len + 2 * nr_opts, i; - struct timeval now; u32 elapsed_time; const unsigned char *tail, *from; unsigned char *to; struct dccp_ackvec_record *avr; + suseconds_t delta; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) return -1; - dccp_timestamp(sk, &now); - elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10; + delta = ktime_us_delta(ktime_get_real(), av->dccpav_time); + elapsed_time = delta / 10; if (elapsed_time != 0 && dccp_insert_option_elapsed_time(sk, skb, elapsed_time)) @@ -159,8 +158,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; av->dccpav_buf_ackno = UINT48_MAX + 1; av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; - av->dccpav_time.tv_sec = 0; - av->dccpav_time.tv_usec = 0; + av->dccpav_time = ktime_set(0, 0); av->dccpav_vec_len = 0; INIT_LIST_HEAD(&av->dccpav_records); } @@ -321,7 +319,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, } av->dccpav_buf_ackno = ackno; - dccp_timestamp(sk, &av->dccpav_time); + av->dccpav_time = ktime_get_real(); out: return 0; diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 96504a3b16e4..9ef0737043ee 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -12,8 +12,8 @@ */ #include <linux/compiler.h> +#include <linux/ktime.h> #include <linux/list.h> -#include <linux/time.h> #include <linux/types.h> /* Read about the ECN nonce to see why it is 253 */ @@ -52,7 +52,7 @@ struct dccp_ackvec { u64 dccpav_buf_ackno; struct list_head dccpav_records; - struct timeval dccpav_time; + ktime_t dccpav_time; u16 dccpav_buf_head; u16 dccpav_vec_len; u8 dccpav_buf_nonce; diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index d29b88fe723c..426008e3b7e3 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -59,7 +59,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) pipe++; /* packets are sent sequentially */ - BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq); + BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq, + prev->ccid2s_seq ) >= 0); BUG_ON(time_before(seqp->ccid2s_sent, prev->ccid2s_sent)); @@ -83,8 +84,7 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) #define ccid2_hc_tx_check_sanity(hctx) #endif -static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, - gfp_t gfp) +static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) { struct ccid2_seq *seqp; int i; @@ -95,16 +95,16 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, return -ENOMEM; /* allocate buffer and initialize linked list */ - seqp = kmalloc(sizeof(*seqp) * num, gfp); + seqp = kmalloc(CCID2_SEQBUF_LEN * sizeof(struct ccid2_seq), gfp_any()); if (seqp == NULL) return -ENOMEM; - for (i = 0; i < (num - 1); i++) { + for (i = 0; i < (CCID2_SEQBUF_LEN - 1); i++) { seqp[i].ccid2s_next = &seqp[i + 1]; seqp[i + 1].ccid2s_prev = &seqp[i]; } - seqp[num - 1].ccid2s_next = seqp; - seqp->ccid2s_prev = &seqp[num - 1]; + seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = seqp; + seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; /* This is the first allocation. Initiate the head and tail. */ if (hctx->ccid2hctx_seqbufc == 0) @@ -114,8 +114,8 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, hctx->ccid2hctx_seqh->ccid2s_next = seqp; seqp->ccid2s_prev = hctx->ccid2hctx_seqh; - hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1]; - seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt; + hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; + seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt; } /* store the original pointer to the buffer so we can free it */ @@ -127,19 +127,7 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { - struct ccid2_hc_tx_sock *hctx; - - switch (DCCP_SKB_CB(skb)->dccpd_type) { - case 0: /* XXX data packets from userland come through like this */ - case DCCP_PKT_DATA: - case DCCP_PKT_DATAACK: - break; - /* No congestion control on other packets */ - default: - return 0; - } - - hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe, hctx->ccid2hctx_cwnd); @@ -180,16 +168,11 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int val) dp->dccps_l_ack_ratio = val; } -static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val) +static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, u32 val) { - if (val == 0) - val = 1; - /* XXX do we need to change ack ratio? */ - ccid2_pr_debug("change cwnd to %d\n", val); - - BUG_ON(val < 1); - hctx->ccid2hctx_cwnd = val; + hctx->ccid2hctx_cwnd = val? : 1; + ccid2_pr_debug("changed cwnd to %u\n", hctx->ccid2hctx_cwnd); } static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) @@ -295,12 +278,11 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) next = hctx->ccid2hctx_seqh->ccid2s_next; /* check if we need to alloc more space */ if (next == hctx->ccid2hctx_seqt) { - int rc; - - ccid2_pr_debug("allocating more space in history\n"); - rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, gfp_any()); - BUG_ON(rc); /* XXX what do we do? */ - + if (ccid2_hc_tx_alloc_seq(hctx)) { + DCCP_CRIT("packet history - out of memory!"); + /* FIXME: find a more graceful way to bail out */ + return; + } next = hctx->ccid2hctx_seqh->ccid2s_next; BUG_ON(next == hctx->ccid2hctx_seqt); } @@ -581,8 +563,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) hctx->ccid2hctx_rpseq = seqno; } else { /* check if packet is consecutive */ - if ((hctx->ccid2hctx_rpseq + 1) == seqno) - hctx->ccid2hctx_rpseq++; + if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1) + hctx->ccid2hctx_rpseq = seqno; /* it's a later packet */ else if (after48(seqno, hctx->ccid2hctx_rpseq)) { hctx->ccid2hctx_rpdupack++; @@ -771,7 +753,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hctx->ccid2hctx_seqbufc = 0; /* XXX init ~ to window size... */ - if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0) + if (ccid2_hc_tx_alloc_seq(hctx)) return -ENOMEM; hctx->ccid2hctx_sent = 0; @@ -835,7 +817,7 @@ static struct ccid_operations ccid2 = { }; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -module_param(ccid2_debug, int, 0444); +module_param(ccid2_debug, bool, 0444); MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); #endif diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index ebd79499c85a..d9daa534c9be 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -50,7 +50,7 @@ struct ccid2_seq { * @ccid2hctx_rpdupack - dupacks since rpseq */ struct ccid2_hc_tx_sock { - int ccid2hctx_cwnd; + u32 ccid2hctx_cwnd; int ccid2hctx_ssacks; int ccid2hctx_acks; unsigned int ccid2hctx_ssthresh; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index e91c2b9dc27b..25772c326172 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -113,27 +113,24 @@ static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6)); } -/* - * Update X by - * If (p > 0) - * X_calc = calcX(s, R, p); - * X = max(min(X_calc, 2 * X_recv), s / t_mbi); - * Else - * If (now - tld >= R) - * X = max(min(2 * X, 2 * X_recv), s / R); - * tld = now; + +/** + * ccid3_hc_tx_update_x - Update allowed sending rate X + * @stamp: most recent time if available - can be left NULL. + * This function tracks draft rfc3448bis, check there for latest details. * * Note: X and X_recv are both stored in units of 64 * bytes/second, to support * fine-grained resolution of sending rates. This requires scaling by 2^6 * throughout the code. Only X_calc is unscaled (in bytes/second). * */ -static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) +static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); __u64 min_rate = 2 * hctx->ccid3hctx_x_recv; const __u64 old_x = hctx->ccid3hctx_x; + ktime_t now = stamp? *stamp : ktime_get_real(); /* * Handle IDLE periods: do not reduce below RFC3390 initial sending rate @@ -153,14 +150,14 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) (((__u64)hctx->ccid3hctx_s) << 6) / TFRC_T_MBI); - } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) - - (suseconds_t)hctx->ccid3hctx_rtt >= 0) { + } else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld) + - (s64)hctx->ccid3hctx_rtt >= 0) { hctx->ccid3hctx_x = max(min(2 * hctx->ccid3hctx_x, min_rate), scaled_div(((__u64)hctx->ccid3hctx_s) << 6, hctx->ccid3hctx_rtt)); - hctx->ccid3hctx_t_ld = *now; + hctx->ccid3hctx_t_ld = now; } if (hctx->ccid3hctx_x != old_x) { @@ -214,7 +211,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct timeval now; unsigned long t_nfb = USEC_PER_SEC / 5; bh_lock_sock(sk); @@ -265,15 +261,12 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) max(hctx->ccid3hctx_x_recv / 2, (((__u64)hctx->ccid3hctx_s) << 6) / (2 * TFRC_T_MBI)); - - if (hctx->ccid3hctx_p == 0) - dccp_timestamp(sk, &now); } else { hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; hctx->ccid3hctx_x_recv <<= 4; } /* Now recalculate X [RFC 3448, 4.3, step (4)] */ - ccid3_hc_tx_update_x(sk, &now); + ccid3_hc_tx_update_x(sk, NULL); /* * Schedule no feedback timer to expire in * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) @@ -309,8 +302,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) ktime_t now = ktime_get_real(); s64 delay; - BUG_ON(hctx == NULL); - /* * This function is called only for Data and DataAck packets. Sending * zero-sized Data(Ack)s is theoretically possible, but for congestion @@ -341,7 +332,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); hctx->ccid3hctx_rtt = dp->dccps_syn_rtt; hctx->ccid3hctx_x = rfc3390_initial_rate(sk); - hctx->ccid3hctx_t_ld = ktime_to_timeval(now); + hctx->ccid3hctx_t_ld = now; } else { /* Sender does not have RTT sample: X = MSS/second */ hctx->ccid3hctx_x = dp->dccps_mss_cache; @@ -388,11 +379,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct timeval now; struct dccp_tx_hist_entry *packet; - BUG_ON(hctx == NULL); - ccid3_hc_tx_update_s(hctx, len); packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC); @@ -402,8 +390,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, } dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet); - dccp_timestamp(sk, &now); - packet->dccphtx_tstamp = now; + packet->dccphtx_tstamp = ktime_get_real(); packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss; packet->dccphtx_rtt = hctx->ccid3hctx_rtt; packet->dccphtx_sent = 1; @@ -414,12 +401,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; - struct timeval now; + ktime_t now; unsigned long t_nfb; u32 pinv, r_sample; - BUG_ON(hctx == NULL); - /* we are only interested in ACKs */ if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) @@ -452,13 +437,12 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) else /* can not exceed 100% */ hctx->ccid3hctx_p = 1000000 / pinv; - dccp_timestamp(sk, &now); - + now = ktime_get_real(); /* * Calculate new round trip sample as per [RFC 3448, 4.3] by * R_sample = (now - t_recvdata) - t_elapsed */ - r_sample = dccp_sample_rtt(sk, &now, &packet->dccphtx_tstamp); + r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, packet->dccphtx_tstamp)); /* * Update RTT estimate by @@ -561,8 +545,6 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; - BUG_ON(hctx == NULL); - opt_recv = &hctx->ccid3hctx_options_received; if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { @@ -636,8 +618,6 @@ static void ccid3_hc_tx_exit(struct sock *sk) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - BUG_ON(hctx == NULL); - ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); @@ -647,14 +627,13 @@ static void ccid3_hc_tx_exit(struct sock *sk) static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) { - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + struct ccid3_hc_tx_sock *hctx; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return; - BUG_ON(hctx == NULL); - + hctx = ccid3_hc_tx_sk(sk); info->tcpi_rto = hctx->ccid3hctx_t_rto; info->tcpi_rtt = hctx->ccid3hctx_rtt; } @@ -662,13 +641,14 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + const struct ccid3_hc_tx_sock *hctx; const void *val; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return -EINVAL; + hctx = ccid3_hc_tx_sk(sk); switch (optname) { case DCCP_SOCKOPT_CCID_TX_INFO: if (len < sizeof(hctx->ccid3hctx_tfrc)) @@ -729,20 +709,20 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct dccp_rx_hist_entry *packet; - struct timeval now; + ktime_t now; suseconds_t delta; ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); - dccp_timestamp(sk, &now); + now = ktime_get_real(); switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: hcrx->ccid3hcrx_x_recv = 0; break; case TFRC_RSTATE_DATA: - delta = timeval_delta(&now, - &hcrx->ccid3hcrx_tstamp_last_feedback); + delta = ktime_us_delta(now, + hcrx->ccid3hcrx_tstamp_last_feedback); DCCP_BUG_ON(delta < 0); hcrx->ccid3hcrx_x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); @@ -764,7 +744,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) hcrx->ccid3hcrx_bytes_recv = 0; /* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */ - delta = timeval_delta(&now, &packet->dccphrx_tstamp); + delta = ktime_us_delta(now, packet->dccphrx_tstamp); DCCP_BUG_ON(delta < 0); hcrx->ccid3hcrx_elapsed_time = delta / 10; @@ -782,14 +762,13 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const struct ccid3_hc_rx_sock *hcrx; __be32 x_recv, pinv; - BUG_ON(hcrx == NULL); - if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return 0; + hcrx = ccid3_hc_rx_sk(sk); DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter; if (dccp_packet_without_ack(skb)) @@ -839,7 +818,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, dccp_li_update_li(sk, &hcrx->ccid3hcrx_li_hist, &hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_tstamp_last_feedback, + hcrx->ccid3hcrx_tstamp_last_feedback, hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_bytes_recv, hcrx->ccid3hcrx_x_recv, @@ -876,11 +855,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; - struct timeval now; u32 p_prev, r_sample, rtt_prev; int loss, payload_size; - - BUG_ON(hcrx == NULL); + ktime_t now; opt_recv = &dccp_sk(sk)->dccps_options_received; @@ -891,9 +868,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_DATAACK: if (opt_recv->dccpor_timestamp_echo == 0) break; + r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo; rtt_prev = hcrx->ccid3hcrx_rtt; - dccp_timestamp(sk, &now); - r_sample = dccp_sample_rtt(sk, &now, NULL); + r_sample = dccp_sample_rtt(sk, 10 * r_sample); if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) hcrx->ccid3hcrx_rtt = r_sample; @@ -912,7 +889,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, skb, GFP_ATOMIC); if (unlikely(packet == NULL)) { DCCP_WARN("%s(%p), Not enough mem to add rx packet " @@ -941,9 +918,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (loss) break; - dccp_timestamp(sk, &now); - if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) - - (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) { + now = ktime_get_real(); + if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) - + (s64)hcrx->ccid3hcrx_rtt) >= 0) { hcrx->ccid3hcrx_tstamp_last_ack = now; ccid3_hc_rx_send_feedback(sk); } @@ -984,8 +961,8 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); - dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); - hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; + hcrx->ccid3hcrx_tstamp_last_feedback = + hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real(); hcrx->ccid3hcrx_s = 0; hcrx->ccid3hcrx_rtt = 0; return 0; @@ -995,8 +972,6 @@ static void ccid3_hc_rx_exit(struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - BUG_ON(hcrx == NULL); - ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); /* Empty packet history */ @@ -1008,14 +983,13 @@ static void ccid3_hc_rx_exit(struct sock *sk) static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) { - const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const struct ccid3_hc_rx_sock *hcrx; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return; - BUG_ON(hcrx == NULL); - + hcrx = ccid3_hc_rx_sk(sk); info->tcpi_ca_state = hcrx->ccid3hcrx_state; info->tcpi_options |= TCPI_OPT_TIMESTAMPS; info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; @@ -1024,13 +998,14 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { - const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const struct ccid3_hc_rx_sock *hcrx; const void *val; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return -EINVAL; + hcrx = ccid3_hc_rx_sk(sk); switch (optname) { case DCCP_SOCKOPT_CCID_RX_INFO: if (len < sizeof(hcrx->ccid3hcrx_tfrc)) @@ -1071,7 +1046,7 @@ static struct ccid_operations ccid3 = { }; #ifdef CONFIG_IP_DCCP_CCID3_DEBUG -module_param(ccid3_debug, int, 0444); +module_param(ccid3_debug, bool, 0444); MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); #endif diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 51d4b804e334..0cdc982cfe47 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -38,7 +38,6 @@ #include <linux/ktime.h> #include <linux/list.h> -#include <linux/time.h> #include <linux/types.h> #include <linux/tfrc.h> #include "../ccid.h" @@ -111,13 +110,20 @@ struct ccid3_hc_tx_sock { u8 ccid3hctx_idle; ktime_t ccid3hctx_t_last_win_count; struct timer_list ccid3hctx_no_feedback_timer; - struct timeval ccid3hctx_t_ld; + ktime_t ccid3hctx_t_ld; ktime_t ccid3hctx_t_nom; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; struct ccid3_options_received ccid3hctx_options_received; }; +static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) +{ + struct ccid3_hc_tx_sock *hctx = ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid); + BUG_ON(hctx == NULL); + return hctx; +} + /* TFRC receiver states */ enum ccid3_hc_rx_states { TFRC_RSTATE_NO_DATA = 1, @@ -153,8 +159,8 @@ struct ccid3_hc_rx_sock { ccid3hcrx_ccval_last_counter:4; enum ccid3_hc_rx_states ccid3hcrx_state:8; u32 ccid3hcrx_bytes_recv; - struct timeval ccid3hcrx_tstamp_last_feedback; - struct timeval ccid3hcrx_tstamp_last_ack; + ktime_t ccid3hcrx_tstamp_last_feedback; + ktime_t ccid3hcrx_tstamp_last_ack; struct list_head ccid3hcrx_hist; struct list_head ccid3hcrx_li_hist; u16 ccid3hcrx_s; @@ -162,14 +168,11 @@ struct ccid3_hc_rx_sock { u32 ccid3hcrx_elapsed_time; }; -static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) -{ - return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid); -} - static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) { - return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); + struct ccid3_hc_rx_sock *hcrx = ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); + BUG_ON(hcrx == NULL); + return hcrx; } #endif /* _DCCP_CCID3_H_ */ diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 174d3f13d93f..40ad428a27f5 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -125,14 +125,14 @@ static int dccp_li_hist_interval_new(struct list_head *list, * returns estimated loss interval in usecs */ static u32 dccp_li_calc_first_li(struct sock *sk, struct list_head *hist_list, - struct timeval *last_feedback, + ktime_t last_feedback, u16 s, u32 bytes_recv, u32 previous_x_recv) { struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 x_recv, p; suseconds_t rtt, delta; - struct timeval tstamp = { 0, 0 }; + ktime_t tstamp = ktime_set(0, 0); int interval = 0; int win_count = 0; int step = 0; @@ -176,7 +176,7 @@ found: return ~0; } - delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); + delta = ktime_us_delta(tstamp, tail->dccphrx_tstamp); DCCP_BUG_ON(delta < 0); rtt = delta * 4 / interval; @@ -196,8 +196,7 @@ found: return ~0; } - dccp_timestamp(sk, &tstamp); - delta = timeval_delta(&tstamp, last_feedback); + delta = ktime_us_delta(ktime_get_real(), last_feedback); DCCP_BUG_ON(delta <= 0); x_recv = scaled_div32(bytes_recv, delta); @@ -226,7 +225,7 @@ found: void dccp_li_update_li(struct sock *sk, struct list_head *li_hist_list, struct list_head *hist_list, - struct timeval *last_feedback, u16 s, u32 bytes_recv, + ktime_t last_feedback, u16 s, u32 bytes_recv, u32 previous_x_recv, u64 seq_loss, u8 win_loss) { struct dccp_li_hist_entry *head; diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 906c806d6d9d..27bee92dae13 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -13,8 +13,8 @@ * any later version. */ +#include <linux/ktime.h> #include <linux/list.h> -#include <linux/time.h> extern void dccp_li_hist_purge(struct list_head *list); @@ -23,7 +23,7 @@ extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); extern void dccp_li_update_li(struct sock *sk, struct list_head *li_hist_list, struct list_head *hist_list, - struct timeval *last_feedback, u16 s, + ktime_t last_feedback, u16 s, u32 bytes_recv, u32 previous_x_recv, u64 seq_loss, u8 win_loss); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 60d00f015390..032bb61c6e39 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -37,9 +37,9 @@ #ifndef _DCCP_PKT_HIST_ #define _DCCP_PKT_HIST_ +#include <linux/ktime.h> #include <linux/list.h> #include <linux/slab.h> -#include <linux/time.h> #include "../../dccp.h" @@ -57,7 +57,7 @@ struct dccp_tx_hist_entry { u64 dccphtx_seqno:48, dccphtx_sent:1; u32 dccphtx_rtt; - struct timeval dccphtx_tstamp; + ktime_t dccphtx_tstamp; }; struct dccp_tx_hist { @@ -124,7 +124,7 @@ struct dccp_rx_hist_entry { dccphrx_ccval:4, dccphrx_type:4; u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ - struct timeval dccphrx_tstamp; + ktime_t dccphrx_tstamp; }; struct dccp_rx_hist { @@ -136,7 +136,6 @@ extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); static inline struct dccp_rx_hist_entry * dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, - const struct sock *sk, const u32 ndp, const struct sk_buff *skb, const gfp_t prio) @@ -151,7 +150,7 @@ static inline struct dccp_rx_hist_entry * entry->dccphrx_ccval = dh->dccph_ccval; entry->dccphrx_type = dh->dccph_type; entry->dccphrx_ndp = ndp; - dccp_timestamp(sk, &entry->dccphrx_tstamp); + entry->dccphrx_tstamp = ktime_get_real(); } return entry; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index e2d74cd7eeeb..ee97950d77d1 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -13,6 +13,7 @@ */ #include <linux/dccp.h> +#include <linux/ktime.h> #include <net/snmp.h> #include <net/sock.h> #include <net/tcp.h> @@ -91,6 +92,7 @@ extern int sysctl_dccp_feat_ack_ratio; extern int sysctl_dccp_feat_send_ack_vector; extern int sysctl_dccp_feat_send_ndp_count; extern int sysctl_dccp_tx_qlen; +extern int sysctl_dccp_sync_ratelimit; /* * 48-bit sequence number arithmetic (signed and unsigned) @@ -208,7 +210,6 @@ extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); extern void dccp_send_ack(struct sock *sk); -extern void dccp_send_delayed_ack(struct sock *sk); extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk); extern void dccp_send_sync(struct sock *sk, const u64 seq, @@ -293,11 +294,12 @@ extern unsigned int dccp_poll(struct file *file, struct socket *sock, extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +extern struct sk_buff *dccp_ctl_make_reset(struct socket *ctl, + struct sk_buff *skb); extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); extern void dccp_send_close(struct sock *sk, const int active); extern int dccp_invalid_packet(struct sk_buff *skb); -extern u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv, - struct timeval *t_history); +extern u32 dccp_sample_rtt(struct sock *sk, long delta); static inline int dccp_bad_service_code(const struct sock *sk, const __be32 service) @@ -309,10 +311,22 @@ static inline int dccp_bad_service_code(const struct sock *sk, return !dccp_list_has_service(dp->dccps_service_list, service); } +/** + * dccp_skb_cb - DCCP per-packet control information + * @dccpd_type: one of %dccp_pkt_type (or unknown) + * @dccpd_ccval: CCVal field (5.1), see e.g. RFC 4342, 8.1 + * @dccpd_reset_code: one of %dccp_reset_codes + * @dccpd_reset_data: Data1..3 fields (depend on @dccpd_reset_code) + * @dccpd_opt_len: total length of all options (5.8) in the packet + * @dccpd_seq: sequence number + * @dccpd_ack_seq: acknowledgment number subheader field value + * This is used for transmission as well as for reception. + */ struct dccp_skb_cb { __u8 dccpd_type:4; __u8 dccpd_ccval:4; - __u8 dccpd_reset_code; + __u8 dccpd_reset_code, + dccpd_reset_data[3]; __u16 dccpd_opt_len; __u64 dccpd_seq; __u64 dccpd_ack_seq; @@ -395,53 +409,14 @@ extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, u32 elapsed_time); +extern u32 dccp_timestamp(void); +extern void dccp_timestamping_init(void); extern int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb); extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb, unsigned char option, const void *value, unsigned char len); -extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); - -static inline suseconds_t timeval_usecs(const struct timeval *tv) -{ - return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; -} - -static inline suseconds_t timeval_delta(const struct timeval *large, - const struct timeval *small) -{ - time_t secs = large->tv_sec - small->tv_sec; - suseconds_t usecs = large->tv_usec - small->tv_usec; - - if (usecs < 0) { - secs--; - usecs += USEC_PER_SEC; - } - return secs * USEC_PER_SEC + usecs; -} - -static inline void timeval_add_usecs(struct timeval *tv, - const suseconds_t usecs) -{ - tv->tv_usec += usecs; - while (tv->tv_usec >= USEC_PER_SEC) { - tv->tv_sec++; - tv->tv_usec -= USEC_PER_SEC; - } -} - -static inline void timeval_sub_usecs(struct timeval *tv, - const suseconds_t usecs) -{ - tv->tv_usec -= usecs; - while (tv->tv_usec < 0) { - tv->tv_sec--; - tv->tv_usec += USEC_PER_SEC; - } - DCCP_BUG_ON(tv->tv_sec < 0); -} - #ifdef CONFIG_SYSCTL extern int dccp_sysctl_init(void); extern void dccp_sysctl_exit(void); diff --git a/net/dccp/input.c b/net/dccp/input.c index da6ec185ed5b..19d7e1dbd87e 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -68,7 +68,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); struct dccp_sock *dp = dccp_sk(sk); - u64 lswl, lawl; + u64 lswl, lawl, seqno = DCCP_SKB_CB(skb)->dccpd_seq, + ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; /* * Step 5: Prepare sequence numbers for Sync @@ -84,11 +85,9 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) */ if (dh->dccph_type == DCCP_PKT_SYNC || dh->dccph_type == DCCP_PKT_SYNCACK) { - if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, - dp->dccps_awl, dp->dccps_awh) && - dccp_delta_seqno(dp->dccps_swl, - DCCP_SKB_CB(skb)->dccpd_seq) >= 0) - dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + if (between48(ackno, dp->dccps_awl, dp->dccps_awh) && + dccp_delta_seqno(dp->dccps_swl, seqno) >= 0) + dccp_update_gsr(sk, seqno); else return -1; } @@ -103,9 +102,6 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) * Update S.GSR, S.SWL, S.SWH * If P.type != Sync, * Update S.GAR - * Otherwise, - * Send Sync packet acknowledging P.seqno - * Drop packet and return */ lswl = dp->dccps_swl; lawl = dp->dccps_awl; @@ -113,35 +109,52 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) if (dh->dccph_type == DCCP_PKT_CLOSEREQ || dh->dccph_type == DCCP_PKT_CLOSE || dh->dccph_type == DCCP_PKT_RESET) { - lswl = dp->dccps_gsr; - dccp_inc_seqno(&lswl); + lswl = ADD48(dp->dccps_gsr, 1); lawl = dp->dccps_gar; } - if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && - (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || - between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, - lawl, dp->dccps_awh))) { - dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + if (between48(seqno, lswl, dp->dccps_swh) && + (ackno == DCCP_PKT_WITHOUT_ACK_SEQ || + between48(ackno, lawl, dp->dccps_awh))) { + dccp_update_gsr(sk, seqno); if (dh->dccph_type != DCCP_PKT_SYNC && - (DCCP_SKB_CB(skb)->dccpd_ack_seq != - DCCP_PKT_WITHOUT_ACK_SEQ)) - dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; + (ackno != DCCP_PKT_WITHOUT_ACK_SEQ)) + dp->dccps_gar = ackno; } else { + unsigned long now = jiffies; + /* + * Step 6: Check sequence numbers + * Otherwise, + * If P.type == Reset, + * Send Sync packet acknowledging S.GSR + * Otherwise, + * Send Sync packet acknowledging P.seqno + * Drop packet and return + * + * These Syncs are rate-limited as per RFC 4340, 7.5.4: + * at most 1 / (dccp_sync_rate_limit * HZ) Syncs per second. + */ + if (time_before(now, (dp->dccps_rate_last + + sysctl_dccp_sync_ratelimit))) + return 0; + DCCP_WARN("DCCP: Step 6 failed for %s packet, " "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " "sending SYNC...\n", dccp_packet_name(dh->dccph_type), - (unsigned long long) lswl, - (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq, + (unsigned long long) lswl, (unsigned long long) seqno, (unsigned long long) dp->dccps_swh, - (DCCP_SKB_CB(skb)->dccpd_ack_seq == - DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", - (unsigned long long) lawl, - (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, + (ackno == DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" + : "exists", + (unsigned long long) lawl, (unsigned long long) ackno, (unsigned long long) dp->dccps_awh); - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); + + dp->dccps_rate_last = now; + + if (dh->dccph_type == DCCP_PKT_RESET) + seqno = dp->dccps_gsr; + dccp_send_sync(sk, seqno, DCCP_PKT_SYNC); return -1; } @@ -280,6 +293,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (dh->dccph_type == DCCP_PKT_RESPONSE) { const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); + long tstamp = dccp_timestamp(); /* Stop the REQUEST timer */ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); @@ -300,13 +314,10 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (dccp_parse_options(sk, skb)) goto out_invalid_packet; - /* Obtain RTT sample from SYN exchange (used by CCID 3) */ - if (dp->dccps_options_received.dccpor_timestamp_echo) { - struct timeval now; - - dccp_timestamp(sk, &now); - dp->dccps_syn_rtt = dccp_sample_rtt(sk, &now, NULL); - } + /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ + if (likely(dp->dccps_options_received.dccpor_timestamp_echo)) + dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - + dp->dccps_options_received.dccpor_timestamp_echo)); if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, @@ -540,11 +551,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; } - if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { - dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); - goto discard; - } - switch (sk->sk_state) { case DCCP_CLOSED: dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; @@ -575,6 +581,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, sk_wake_async(sk, 0, POLL_OUT); break; } + } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); + goto discard; } if (!queued) { @@ -587,37 +596,22 @@ discard: EXPORT_SYMBOL_GPL(dccp_rcv_state_process); /** - * dccp_sample_rtt - Sample RTT from packet exchange - * - * @sk: connected dccp_sock - * @t_recv: receive timestamp of packet with timestamp echo - * @t_hist: packet history timestamp or NULL + * dccp_sample_rtt - Validate and finalise computation of RTT sample + * @delta: number of microseconds between packet and acknowledgment + * The routine is kept generic to work in different contexts. It should be + * called immediately when the ACK used for the RTT sample arrives. */ -u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv, - struct timeval *t_hist) +u32 dccp_sample_rtt(struct sock *sk, long delta) { - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_options_received *or = &dp->dccps_options_received; - suseconds_t delta; - - if (t_hist == NULL) { - if (!or->dccpor_timestamp_echo) { - DCCP_WARN("packet without timestamp echo\n"); - return DCCP_SANE_RTT_MAX; - } - timeval_sub_usecs(t_recv, or->dccpor_timestamp_echo * 10); - delta = timeval_usecs(t_recv); - } else - delta = timeval_delta(t_recv, t_hist); - - delta -= or->dccpor_elapsed_time * 10; /* either set or 0 */ + /* dccpor_elapsed_time is either zeroed out or set and > 0 */ + delta -= dccp_sk(sk)->dccps_options_received.dccpor_elapsed_time * 10; if (unlikely(delta <= 0)) { - DCCP_WARN("unusable RTT sample %ld, using min\n", (long)delta); + DCCP_WARN("unusable RTT sample %ld, using min\n", delta); return DCCP_SANE_RTT_MIN; } - if (unlikely(delta - (suseconds_t)DCCP_SANE_RTT_MAX > 0)) { - DCCP_WARN("RTT sample %ld too large, using max\n", (long)delta); + if (unlikely(delta > DCCP_SANE_RTT_MAX)) { + DCCP_WARN("RTT sample %ld too large, using max\n", delta); return DCCP_SANE_RTT_MAX; } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 718f2fa923a1..44f6e17e105f 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -381,7 +381,6 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, { struct inet_request_sock *ireq; struct inet_sock *newinet; - struct dccp_sock *newdp; struct sock *newsk; if (sk_acceptq_is_full(sk)) @@ -396,7 +395,6 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, sk_setup_caps(newsk, dst); - newdp = dccp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); newinet->daddr = ireq->rmt_addr; @@ -512,17 +510,12 @@ out: static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { int err; - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; const struct iphdr *rxiph; - const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct dst_entry *dst; - u64 seqno = 0; /* Never send a reset in response to a reset. */ - if (rxdh->dccph_type == DCCP_PKT_RESET) + if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) @@ -532,37 +525,14 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) if (dst == NULL) return; - skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, - GFP_ATOMIC); + skb = dccp_ctl_make_reset(dccp_v4_ctl_socket, rxskb); if (skb == NULL) goto out; - /* Reserve space for headers. */ - skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); - skb->dst = dst_clone(dst); - - dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); - - /* Build DCCP header and checksum it. */ - dh->dccph_type = DCCP_PKT_RESET; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_reset_len / 4; - dh->dccph_x = 1; - dccp_hdr_reset(skb)->dccph_reset_code = - DCCP_SKB_CB(rxskb)->dccpd_reset_code; - - /* See "8.3.1. Abnormal Termination" in RFC 4340 */ - if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); - - dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); - - dccp_csum_outgoing(skb); rxiph = ip_hdr(rxskb); - dh->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr, - rxiph->daddr); + dccp_hdr(skb)->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr, + rxiph->daddr); + skb->dst = dst_clone(dst); bh_lock_sock(dccp_v4_ctl_socket->sk); err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, @@ -598,17 +568,14 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_request_sock *dreq; const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) { - reset_code = DCCP_RESET_CODE_NO_CONNECTION; - goto drop; - } + (RTCF_BROADCAST | RTCF_MULTICAST)) + return 0; /* discard, don't send a reset here */ if (dccp_bad_service_code(sk, service)) { - reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; } /* @@ -616,6 +583,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * limitations, they conserve resources and peer is * evidently real one. */ + dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; if (inet_csk_reqsk_queue_is_full(sk)) goto drop; @@ -668,7 +636,6 @@ drop_and_free: reqsk_free(req); drop: DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); - dcb->dccpd_reset_code = reset_code; return -1; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b158c661867b..006a3834fbcd 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -301,50 +301,23 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; struct ipv6hdr *rxip6h; - const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct flowi fl; - u64 seqno = 0; - if (rxdh->dccph_type == DCCP_PKT_RESET) + if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; if (!ipv6_unicast_destination(rxskb)) return; - skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header, - GFP_ATOMIC); + skb = dccp_ctl_make_reset(dccp_v6_ctl_socket, rxskb); if (skb == NULL) return; - skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header); - - dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); - - /* Swap the send and the receive. */ - dh->dccph_type = DCCP_PKT_RESET; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_reset_len / 4; - dh->dccph_x = 1; - dccp_hdr_reset(skb)->dccph_reset_code = - DCCP_SKB_CB(rxskb)->dccpd_reset_code; - - /* See "8.3.1. Abnormal Termination" in RFC 4340 */ - if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); - - dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); - - dccp_csum_outgoing(skb); rxip6h = ipv6_hdr(rxskb); - dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, - &rxip6h->daddr); + dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, + &rxip6h->daddr); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); @@ -352,8 +325,8 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) fl.proto = IPPROTO_DCCP; fl.oif = inet6_iif(rxskb); - fl.fl_ip_dport = dh->dccph_dport; - fl.fl_ip_sport = dh->dccph_sport; + fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport; + fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport; security_skb_classify_flow(rxskb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ @@ -417,21 +390,21 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) - goto drop; + return 0; /* discard, don't send a reset here */ if (dccp_bad_service_code(sk, service)) { - reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; } /* * There are no SYN attacks on IPv6, yet... */ + dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; if (inet_csk_reqsk_queue_is_full(sk)) goto drop; @@ -491,7 +464,6 @@ drop_and_free: reqsk_free(req); drop: DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); - dcb->dccpd_reset_code = reset_code; return -1; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index e18e249ac49b..831b76e08d02 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -42,6 +42,16 @@ struct inet_timewait_death_row dccp_death_row = { EXPORT_SYMBOL_GPL(dccp_death_row); +void dccp_minisock_init(struct dccp_minisock *dmsk) +{ + dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; + dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; + dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; + dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio; + dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; + dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; +} + void dccp_time_wait(struct sock *sk, int state, int timeo) { struct inet_timewait_sock *tw = NULL; @@ -112,7 +122,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_service_list = NULL; newdp->dccps_service = dreq->dreq_service; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; - do_gettimeofday(&newdp->dccps_epoch); if (dccp_feat_clone(sk, newsk)) goto out_free; diff --git a/net/dccp/options.c b/net/dccp/options.c index 34d536d5f1a1..d361b5533309 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -29,16 +29,6 @@ int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO; int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; -void dccp_minisock_init(struct dccp_minisock *dmsk) -{ - dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; - dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; - dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; - dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio; - dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; - dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; -} - static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) { u32 value = 0; @@ -158,7 +148,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp = ntohl(*(__be32 *)value); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - dccp_timestamp(sk, &dp->dccps_timestamp_time); + dp->dccps_timestamp_time = ktime_get_real(); dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n", dccp_role(sk), opt_recv->dccpor_timestamp, @@ -189,7 +179,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) else elapsed_time = ntohl(*(__be32 *)(value + 4)); - dccp_pr_debug_cat(", ELAPSED_TIME=%d\n", elapsed_time); + dccp_pr_debug_cat(", ELAPSED_TIME=%u\n", elapsed_time); /* Give precedence to the biggest ELAPSED_TIME */ if (elapsed_time > opt_recv->dccpor_elapsed_time) @@ -370,29 +360,9 @@ int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); -void dccp_timestamp(const struct sock *sk, struct timeval *tv) -{ - const struct dccp_sock *dp = dccp_sk(sk); - - do_gettimeofday(tv); - tv->tv_sec -= dp->dccps_epoch.tv_sec; - tv->tv_usec -= dp->dccps_epoch.tv_usec; - - while (tv->tv_usec < 0) { - tv->tv_sec--; - tv->tv_usec += USEC_PER_SEC; - } -} - -EXPORT_SYMBOL_GPL(dccp_timestamp); - int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { - struct timeval tv; - __be32 now; - - dccp_timestamp(sk, &tv); - now = htonl(timeval_usecs(&tv) / 10); + __be32 now = htonl(dccp_timestamp()); /* yes this will overflow but that is the point as we want a * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ @@ -405,14 +375,12 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - struct timeval now; __be32 tstamp_echo; - u32 elapsed_time; int len, elapsed_time_len; unsigned char *to; - - dccp_timestamp(sk, &now); - elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10; + const suseconds_t delta = ktime_us_delta(ktime_get_real(), + dp->dccps_timestamp_time); + u32 elapsed_time = delta / 10; elapsed_time_len = dccp_elapsed_time_len(elapsed_time); len = 6 + elapsed_time_len; @@ -438,8 +406,7 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk, } dp->dccps_timestamp_echo = 0; - dp->dccps_timestamp_time.tv_sec = 0; - dp->dccps_timestamp_time.tv_usec = 0; + dp->dccps_timestamp_time = ktime_set(0, 0); return 0; } diff --git a/net/dccp/output.c b/net/dccp/output.c index c8d843e983fc..f49544618f20 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -61,6 +61,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) set_ack = 0; /* fall through */ case DCCP_PKT_DATAACK: + case DCCP_PKT_RESET: break; case DCCP_PKT_REQUEST: @@ -69,12 +70,14 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_SYNC: case DCCP_PKT_SYNCACK: - ackno = dcb->dccpd_seq; + ackno = dcb->dccpd_ack_seq; /* fall through */ default: /* - * Only data packets should come through with skb->sk - * set. + * Set owner/destructor: some skbs are allocated via + * alloc_skb (e.g. when retransmission may happen). + * Only Data, DataAck, and Reset packets should come + * through here with skb->sk set. */ WARN_ON(skb->sk); skb_set_owner_w(skb, sk); @@ -174,34 +177,38 @@ void dccp_write_space(struct sock *sk) /** * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet - * @sk: socket to wait for + * @sk: socket to wait for + * @skb: current skb to pass on for waiting + * @delay: sleep timeout in milliseconds (> 0) + * This function is called by default when the socket is closed, and + * when a non-zero linger time is set on the socket. For consistency */ -static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb) +static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) { struct dccp_sock *dp = dccp_sk(sk); DEFINE_WAIT(wait); - unsigned long delay; + unsigned long jiffdelay; int rc; - while (1) { + do { + dccp_pr_debug("delayed send by %d msec\n", delay); + jiffdelay = msecs_to_jiffies(delay); + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + sk->sk_write_pending++; + release_sock(sk); + schedule_timeout(jiffdelay); + lock_sock(sk); + sk->sk_write_pending--; + if (sk->sk_err) goto do_error; if (signal_pending(current)) goto do_interrupted; rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); - if (rc <= 0) - break; - dccp_pr_debug("delayed send by %d msec\n", rc); - delay = msecs_to_jiffies(rc); - sk->sk_write_pending++; - release_sock(sk); - schedule_timeout(delay); - lock_sock(sk); - sk->sk_write_pending--; - } + } while ((delay = rc) > 0); out: finish_wait(sk->sk_sleep, &wait); return rc; @@ -228,7 +235,7 @@ void dccp_write_xmit(struct sock *sk, int block) msecs_to_jiffies(err)+jiffies); break; } else - err = dccp_wait_for_ccid(sk, skb); + err = dccp_wait_for_ccid(sk, skb, err); if (err && err != -EINTR) DCCP_BUG("err=%d after dccp_wait_for_ccid", err); } @@ -324,72 +331,81 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, EXPORT_SYMBOL_GPL(dccp_make_response); -static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, - const enum dccp_reset_codes code) +/* answer offending packet in @rcv_skb with Reset from control socket @ctl */ +struct sk_buff *dccp_ctl_make_reset(struct socket *ctl, struct sk_buff *rcv_skb) { - struct dccp_hdr *dh; - struct dccp_sock *dp = dccp_sk(sk); - const u32 dccp_header_size = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_reset); - struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, - GFP_ATOMIC); - if (skb == NULL) - return NULL; - - /* Reserve space for headers. */ - skb_reserve(skb, sk->sk_prot->max_header); - - skb->dst = dst_clone(dst); - - dccp_inc_seqno(&dp->dccps_gss); - - DCCP_SKB_CB(skb)->dccpd_reset_code = code; - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; - DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss; + struct dccp_hdr *rxdh = dccp_hdr(rcv_skb), *dh; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(rcv_skb); + const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_reset); + struct dccp_hdr_reset *dhr; + struct sk_buff *skb; - if (dccp_insert_options(sk, skb)) { - kfree_skb(skb); + skb = alloc_skb(ctl->sk->sk_prot->max_header, GFP_ATOMIC); + if (skb == NULL) return NULL; - } - dh = dccp_zeroed_hdr(skb, dccp_header_size); + skb_reserve(skb, ctl->sk->sk_prot->max_header); - dh->dccph_sport = inet_sk(sk)->sport; - dh->dccph_dport = inet_sk(sk)->dport; - dh->dccph_doff = (dccp_header_size + - DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + /* Swap the send and the receive. */ + dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); dh->dccph_type = DCCP_PKT_RESET; + dh->dccph_sport = rxdh->dccph_dport; + dh->dccph_dport = rxdh->dccph_sport; + dh->dccph_doff = dccp_hdr_reset_len / 4; dh->dccph_x = 1; - dccp_hdr_set_seq(dh, dp->dccps_gss); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); - dccp_hdr_reset(skb)->dccph_reset_code = code; - inet_csk(sk)->icsk_af_ops->send_check(sk, 0, skb); + dhr = dccp_hdr_reset(skb); + dhr->dccph_reset_code = dcb->dccpd_reset_code; + + switch (dcb->dccpd_reset_code) { + case DCCP_RESET_CODE_PACKET_ERROR: + dhr->dccph_reset_data[0] = rxdh->dccph_type; + break; + case DCCP_RESET_CODE_OPTION_ERROR: /* fall through */ + case DCCP_RESET_CODE_MANDATORY_ERROR: + memcpy(dhr->dccph_reset_data, dcb->dccpd_reset_data, 3); + break; + } + /* + * From RFC 4340, 8.3.1: + * If P.ackno exists, set R.seqno := P.ackno + 1. + * Else set R.seqno := 0. + */ + if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_hdr_set_seq(dh, ADD48(dcb->dccpd_ack_seq, 1)); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dcb->dccpd_seq); - DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + dccp_csum_outgoing(skb); return skb; } +EXPORT_SYMBOL_GPL(dccp_ctl_make_reset); + +/* send Reset on established socket, to close or abort the connection */ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) { + struct sk_buff *skb; /* * FIXME: what if rebuild_header fails? * Should we be doing a rebuild_header here? */ int err = inet_sk_rebuild_header(sk); - if (err == 0) { - struct sk_buff *skb = dccp_make_reset(sk, sk->sk_dst_cache, - code); - if (skb != NULL) { - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0); - return net_xmit_eval(err); - } - } + if (err != 0) + return err; + + skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, GFP_ATOMIC); + if (skb == NULL) + return -ENOBUFS; - return err; + /* Reserve space for headers and prepare control bits. */ + skb_reserve(skb, sk->sk_prot->max_header); + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; + DCCP_SKB_CB(skb)->dccpd_reset_code = code; + + return dccp_transmit_skb(sk, skb); } /* @@ -477,6 +493,7 @@ void dccp_send_ack(struct sock *sk) EXPORT_SYMBOL_GPL(dccp_send_ack); +/* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */ void dccp_send_delayed_ack(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -507,7 +524,7 @@ void dccp_send_delayed_ack(struct sock *sk) sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } -void dccp_send_sync(struct sock *sk, const u64 seq, +void dccp_send_sync(struct sock *sk, const u64 ackno, const enum dccp_pkt_type pkt_type) { /* @@ -517,14 +534,16 @@ void dccp_send_sync(struct sock *sk, const u64 seq, */ struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC); - if (skb == NULL) + if (skb == NULL) { /* FIXME: how to make sure the sync is sent? */ + DCCP_CRIT("could not send %s", dccp_packet_name(pkt_type)); return; + } /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); DCCP_SKB_CB(skb)->dccpd_type = pkt_type; - DCCP_SKB_CB(skb)->dccpd_seq = seq; + DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; dccp_transmit_skb(sk, skb); } diff --git a/net/dccp/probe.c b/net/dccp/probe.c index bae10b0f2fc3..7053bb827bc8 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/kfifo.h> #include <linux/vmalloc.h> +#include <net/net_namespace.h> #include "dccp.h" #include "ccid.h" @@ -168,7 +169,7 @@ static __init int dccpprobe_init(void) if (IS_ERR(dccpw.fifo)) return PTR_ERR(dccpw.fifo); - if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops)) + if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; ret = register_jprobe(&dccp_send_probe); @@ -178,7 +179,7 @@ static __init int dccpprobe_init(void) pr_info("DCCP watch registered (port=%d)\n", port); return 0; err1: - proc_net_remove(procname); + proc_net_remove(&init_net, procname); err0: kfifo_free(dccpw.fifo); return ret; @@ -188,7 +189,7 @@ module_init(dccpprobe_init); static __exit void dccpprobe_exit(void) { kfifo_free(dccpw.fifo); - proc_net_remove(procname); + proc_net_remove(&init_net, procname); unregister_jprobe(&dccp_send_probe); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 04b59ec4f512..cc9bf1cb2646 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -172,7 +172,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) struct inet_connection_sock *icsk = inet_csk(sk); dccp_minisock_init(&dp->dccps_minisock); - do_gettimeofday(&dp->dccps_epoch); /* * FIXME: We're hardcoding the CCID, and doing this at this point makes @@ -220,6 +219,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) sk->sk_write_space = dccp_write_space; icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; + dp->dccps_rate_last = jiffies; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; @@ -587,6 +587,10 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_SERVICE: return dccp_getsockopt_service(sk, len, (__be32 __user *)optval, optlen); + case DCCP_SOCKOPT_GET_CUR_MPS: + val = dp->dccps_mss_cache; + len = sizeof(val); + break; case DCCP_SOCKOPT_SEND_CSCOV: val = dp->dccps_pcslen; len = sizeof(val); @@ -664,7 +668,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * so that the trick in dccp_rcv_request_sent_state_process. */ /* Wait for a connection to finish. */ - if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) + if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_release; @@ -988,7 +992,7 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); #ifdef CONFIG_IP_DCCP_DEBUG int dccp_debug; -module_param(dccp_debug, int, 0444); +module_param(dccp_debug, bool, 0444); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); EXPORT_SYMBOL_GPL(dccp_debug); @@ -1077,6 +1081,8 @@ static int __init dccp_init(void) rc = dccp_sysctl_init(); if (rc) goto out_ackvec_exit; + + dccp_timestamping_init(); out: return rc; out_ackvec_exit: diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 1260aabac5e1..9364b2fb4dbd 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -18,6 +18,9 @@ #error This file should not be compiled without CONFIG_SYSCTL defined #endif +/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */ +int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8; + static struct ctl_table dccp_default_table[] = { { .procname = "seq_window", @@ -89,6 +92,13 @@ static struct ctl_table dccp_default_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sync_ratelimit", + .data = &sysctl_dccp_sync_ratelimit, + .maxlen = sizeof(sysctl_dccp_sync_ratelimit), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, { .ctl_name = 0, } }; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 0197a41c256a..3af067354bd4 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -291,3 +291,24 @@ void dccp_init_xmit_timers(struct sock *sk) inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, &dccp_keepalive_timer); } + +static ktime_t dccp_timestamp_seed; +/** + * dccp_timestamp - 10s of microseconds time source + * Returns the number of 10s of microseconds since loading DCCP. This is native + * DCCP time difference format (RFC 4340, sec. 13). + * Please note: This will wrap around about circa every 11.9 hours. + */ +u32 dccp_timestamp(void) +{ + s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed); + + do_div(delta, 10); + return delta; +} +EXPORT_SYMBOL_GPL(dccp_timestamp); + +void __init dccp_timestamping_init(void) +{ + dccp_timestamp_seed = ktime_get_real(); +} diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index ed76d4aab4a9..aabe98d9402f 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -128,6 +128,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include <linux/stat.h> #include <linux/init.h> #include <linux/poll.h> +#include <net/net_namespace.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/fib_rules.h> @@ -470,10 +471,10 @@ static struct proto dn_proto = { .obj_size = sizeof(struct dn_sock), }; -static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp) +static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp) { struct dn_scp *scp; - struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1); + struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, 1); if (!sk) goto out; @@ -674,10 +675,13 @@ char *dn_addr2asc(__u16 addr, char *buf) -static int dn_create(struct socket *sock, int protocol) +static int dn_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; + if (net != &init_net) + return -EAFNOSUPPORT; + switch(sock->type) { case SOCK_SEQPACKET: if (protocol != DNPROTO_NSP) @@ -690,7 +694,7 @@ static int dn_create(struct socket *sock, int protocol) } - if ((sk = dn_alloc_sock(sock, GFP_KERNEL)) == NULL) + if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL) return -ENOBUFS; sk->sk_protocol = protocol; @@ -747,7 +751,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (dn_ntohs(saddr->sdn_nodeaddrl)) { read_lock(&dev_base_lock); ldev = NULL; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!dev->dn_ptr) continue; if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) { @@ -1090,7 +1094,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags) cb = DN_SKB_CB(skb); sk->sk_ack_backlog--; - newsk = dn_alloc_sock(newsock, sk->sk_allocation); + newsk = dn_alloc_sock(sk->sk_net, newsock, sk->sk_allocation); if (newsk == NULL) { release_sock(sk); kfree_skb(skb); @@ -2085,6 +2089,9 @@ static int dn_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch(event) { case NETDEV_UP: dn_dev_up(dev); @@ -2399,7 +2406,7 @@ static int __init decnet_init(void) dev_add_pack(&dn_dix_packet_type); register_netdevice_notifier(&dn_dev_notifier); - proc_net_fops_create("decnet", S_IRUGO, &dn_socket_seq_fops); + proc_net_fops_create(&init_net, "decnet", S_IRUGO, &dn_socket_seq_fops); dn_register_sysctl(); out: return rc; @@ -2428,7 +2435,7 @@ static void __exit decnet_exit(void) dn_neigh_cleanup(); dn_fib_cleanup(); - proc_net_remove("decnet"); + proc_net_remove(&init_net, "decnet"); proto_unregister(&dn_proto); } diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 8def68209edd..26130afd8029 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -42,6 +42,7 @@ #include <linux/notifier.h> #include <asm/uaccess.h> #include <asm/system.h> +#include <net/net_namespace.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/flow.h> @@ -149,7 +150,7 @@ static struct dn_dev_parms dn_dev_list[] = { } }; -#define DN_DEV_LIST_SIZE (sizeof(dn_dev_list)/sizeof(struct dn_dev_parms)) +#define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list) #define DN_DEV_PARMS_OFFSET(x) ((int) ((char *) &((struct dn_dev_parms *)0)->x)) @@ -512,7 +513,7 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) ifr->ifr_name[IFNAMSIZ-1] = 0; #ifdef CONFIG_KMOD - dev_load(ifr->ifr_name); + dev_load(&init_net, ifr->ifr_name); #endif switch(cmd) { @@ -530,7 +531,7 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); - if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) { + if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) { ret = -ENODEV; goto done; } @@ -628,7 +629,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex) { struct net_device *dev; struct dn_dev *dn_dev = NULL; - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev) { dn_dev = dev->dn_ptr; dev_put(dev); @@ -693,7 +694,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -EINVAL; ifm = nlmsg_data(nlh); - if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) + if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL) return -ENODEV; if ((dn_db = dev->dn_ptr) == NULL) { @@ -799,7 +800,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) skip_naddr = cb->args[1]; idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < skip_ndevs) goto cont; else if (idx > skip_ndevs) { @@ -868,10 +869,10 @@ last_chance: rv = dn_dev_get_first(dev, addr); read_unlock(&dev_base_lock); dev_put(dev); - if (rv == 0 || dev == &loopback_dev) + if (rv == 0 || dev == init_net.loopback_dev) return rv; } - dev = &loopback_dev; + dev = init_net.loopback_dev; dev_hold(dev); goto last_chance; } @@ -1296,7 +1297,7 @@ void dn_dev_devices_off(void) struct net_device *dev; rtnl_lock(); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) dn_dev_down(dev); rtnl_unlock(); @@ -1307,7 +1308,7 @@ void dn_dev_devices_on(void) struct net_device *dev; rtnl_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (dev->flags & IFF_UP) dn_dev_up(dev); } @@ -1341,7 +1342,7 @@ static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) return SEQ_START_TOKEN; i = 1; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!is_dn_dev(dev)) continue; @@ -1360,9 +1361,9 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) dev = (struct net_device *)v; if (v == SEQ_START_TOKEN) - dev = net_device_entry(&dev_base_head); + dev = net_device_entry(&init_net.dev_base_head); - for_each_netdev_continue(dev) { + for_each_netdev_continue(&init_net, dev) { if (!is_dn_dev(dev)) continue; @@ -1462,7 +1463,7 @@ void __init dn_dev_init(void) rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL); rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr); - proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops); + proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops); #ifdef CONFIG_SYSCTL { @@ -1483,7 +1484,7 @@ void __exit dn_dev_cleanup(void) } #endif /* CONFIG_SYSCTL */ - proc_net_remove("decnet_dev"); + proc_net_remove(&init_net, "decnet_dev"); dn_dev_devices_off(); } diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index d2bc19d47950..3760a20d10d0 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -212,7 +212,7 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct return -EINVAL; if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; - if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) + if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) return -ENETDOWN; @@ -255,7 +255,7 @@ out: if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) return -EINVAL; - dev = __dev_get_by_index(nh->nh_oif); + dev = __dev_get_by_index(&init_net, nh->nh_oif); if (dev == NULL || dev->dn_ptr == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) @@ -355,7 +355,7 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); + nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; @@ -602,7 +602,7 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) /* Scan device list */ read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { dn_db = dev->dn_ptr; if (dn_db == NULL) continue; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 174d8a7a6dac..e851b143cca3 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -38,6 +38,7 @@ #include <linux/rcupdate.h> #include <linux/jhash.h> #include <asm/atomic.h> +#include <net/net_namespace.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/flow.h> @@ -210,7 +211,8 @@ static int dn_neigh_output_packet(struct sk_buff *skb) char mac_addr[ETH_ALEN]; dn_dn2eth(mac_addr, rt->rt_local_src); - if (!dev->hard_header || dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, mac_addr, skb->len) >= 0) + if (dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, + mac_addr, skb->len) >= 0) return neigh->ops->queue_xmit(skb); if (net_ratelimit()) @@ -578,24 +580,8 @@ static const struct seq_operations dn_neigh_seq_ops = { static int dn_neigh_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &dn_neigh_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &dn_neigh_seq_ops, + sizeof(struct neigh_seq_state)); } static const struct file_operations dn_neigh_seq_fops = { @@ -611,11 +597,11 @@ static const struct file_operations dn_neigh_seq_fops = { void __init dn_neigh_init(void) { neigh_table_init(&dn_neigh_table); - proc_net_fops_create("decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); + proc_net_fops_create(&init_net, "decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); } void __exit dn_neigh_cleanup(void) { - proc_net_remove("decnet_neigh"); + proc_net_remove(&init_net, "decnet_neigh"); neigh_table_clear(&dn_neigh_table); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index a4a620971ef0..97eee5e8fbbe 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -77,6 +77,7 @@ #include <linux/rcupdate.h> #include <linux/times.h> #include <asm/errno.h> +#include <net/net_namespace.h> #include <net/netlink.h> #include <net/neighbour.h> #include <net/dst.h> @@ -583,6 +584,9 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; unsigned char padlen = 0; + if (dev->nd_net != &init_net) + goto dump_it; + if (dn == NULL) goto dump_it; @@ -883,7 +887,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old .scope = RT_SCOPE_UNIVERSE, } }, .mark = oldflp->mark, - .iif = loopback_dev.ifindex, + .iif = init_net.loopback_dev->ifindex, .oif = oldflp->oif }; struct dn_route *rt = NULL; struct net_device *dev_out = NULL, *dev; @@ -900,11 +904,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old "dn_route_output_slow: dst=%04x src=%04x mark=%d" " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst), dn_ntohs(oldflp->fld_src), - oldflp->mark, loopback_dev.ifindex, oldflp->oif); + oldflp->mark, init_net.loopback_dev->ifindex, oldflp->oif); /* If we have an output interface, verify its a DECnet device */ if (oldflp->oif) { - dev_out = dev_get_by_index(oldflp->oif); + dev_out = dev_get_by_index(&init_net, oldflp->oif); err = -ENODEV; if (dev_out && dev_out->dn_ptr == NULL) { dev_put(dev_out); @@ -925,7 +929,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old goto out; } read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!dev->dn_ptr) continue; if (!dn_dev_islocal(dev, oldflp->fld_src)) @@ -953,7 +957,7 @@ source_ok: err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = init_net.loopback_dev; dev_hold(dev_out); if (!fl.fld_dst) { fl.fld_dst = @@ -962,7 +966,7 @@ source_ok: if (!fl.fld_dst) goto out; } - fl.oif = loopback_dev.ifindex; + fl.oif = init_net.loopback_dev->ifindex; res.type = RTN_LOCAL; goto make_route; } @@ -1008,7 +1012,7 @@ source_ok: if (dev_out) dev_put(dev_out); if (dn_dev_islocal(neigh->dev, fl.fld_dst)) { - dev_out = &loopback_dev; + dev_out = init_net.loopback_dev; res.type = RTN_LOCAL; } else { dev_out = neigh->dev; @@ -1029,7 +1033,7 @@ source_ok: /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fl.fld_dst)) { dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = init_net.loopback_dev; dev_hold(dev_out); res.type = RTN_LOCAL; goto select_source; @@ -1065,7 +1069,7 @@ select_source: fl.fld_src = fl.fld_dst; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = init_net.loopback_dev; dev_hold(dev_out); fl.oif = dev_out->ifindex; if (res.fi) @@ -1552,7 +1556,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (fl.iif) { struct net_device *dev; - if ((dev = dev_get_by_index(fl.iif)) == NULL) { + if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) { kfree_skb(skb); return -ENODEV; } @@ -1735,23 +1739,8 @@ static const struct seq_operations dn_rt_cache_seq_ops = { static int dn_rt_cache_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct dn_rt_cache_iter_state *s; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - goto out; - rc = seq_open(file, &dn_rt_cache_seq_ops); - if (rc) - goto out_kfree; - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &dn_rt_cache_seq_ops, + sizeof(struct dn_rt_cache_iter_state)); } static const struct file_operations dn_rt_cache_seq_fops = { @@ -1814,7 +1803,7 @@ void __init dn_route_init(void) dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); - proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); + proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); #ifdef CONFIG_DECNET_ROUTER rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump); @@ -1829,6 +1818,6 @@ void __exit dn_route_cleanup(void) del_timer(&dn_route_timer); dn_run_flush(0); - proc_net_remove("decnet_cache"); + proc_net_remove(&init_net, "decnet_cache"); } diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 84ff3dd37070..ddd3f04f0919 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -57,8 +57,6 @@ static struct dn_fib_rule default_rule = { }, }; -static LIST_HEAD(dn_fib_rules); - int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) { @@ -228,9 +226,9 @@ static u32 dn_fib_rule_default_pref(void) struct list_head *pos; struct fib_rule *rule; - if (!list_empty(&dn_fib_rules)) { - pos = dn_fib_rules.next; - if (pos->next != &dn_fib_rules) { + if (!list_empty(&dn_fib_rules_ops.rules_list)) { + pos = dn_fib_rules_ops.rules_list.next; + if (pos->next != &dn_fib_rules_ops.rules_list) { rule = list_entry(pos->next, struct fib_rule, list); if (rule->pref) return rule->pref - 1; @@ -258,13 +256,14 @@ static struct fib_rules_ops dn_fib_rules_ops = { .flush_cache = dn_fib_rule_flush_cache, .nlgroup = RTNLGRP_DECnet_RULE, .policy = dn_fib_rule_policy, - .rules_list = &dn_fib_rules, + .rules_list = LIST_HEAD_INIT(dn_fib_rules_ops.rules_list), .owner = THIS_MODULE, }; void __init dn_fib_rules_init(void) { - list_add_tail(&default_rule.common.list, &dn_fib_rules); + list_add_tail(&default_rule.common.list, + &dn_fib_rules_ops.rules_list); fib_rules_register(&dn_fib_rules_ops); } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 696234688cf6..f7fba7721e63 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -115,17 +115,6 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) RCV_SKB_FAIL(-EINVAL); } -static void dnrmg_receive_user_sk(struct sock *sk, int len) -{ - struct sk_buff *skb; - unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); - - for (; qlen && (skb = skb_dequeue(&sk->sk_receive_queue)); qlen--) { - dnrmg_receive_user_skb(skb); - kfree_skb(skb); - } -} - static struct nf_hook_ops dnrmg_ops = { .hook = dnrmg_hook, .pf = PF_DECnet, @@ -137,8 +126,10 @@ static int __init dn_rtmsg_init(void) { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, - dnrmg_receive_user_sk, NULL, THIS_MODULE); + dnrmg = netlink_kernel_create(&init_net, + NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, + dnrmg_receive_user_skb, + NULL, THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 52e40d7eb22d..ae354a43fb97 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -259,7 +259,7 @@ static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen, devname[newlen] = 0; - dev = dev_get_by_name(devname); + dev = dev_get_by_name(&init_net, devname); if (dev == NULL) return -ENODEV; @@ -299,7 +299,7 @@ static int dn_def_dev_handler(ctl_table *table, int write, devname[*lenp] = 0; strip_it(devname); - dev = dev_get_by_name(devname); + dev = dev_get_by_name(&init_net, devname); if (dev == NULL) return -ENODEV; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 35c96bcc0f32..9cae16b4e0b7 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -336,6 +336,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, /* Real hardware Econet. We're not worthy etc. */ #ifdef CONFIG_ECONET_NATIVE unsigned short proto = 0; + int res; dev_hold(dev); @@ -354,12 +355,12 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, eb->sec = *saddr; eb->sent = ec_tx_done; - if (dev->hard_header) { - int res; + err = -EINVAL; + res = dev_hard_header(skb, dev, ntohs(proto), &addr, NULL, len); + if (res < 0) + goto out_free; + if (res > 0) { struct ec_framehdr *fh; - err = -EINVAL; - res = dev->hard_header(skb, dev, ntohs(proto), - &addr, NULL, len); /* Poke in our control byte and port number. Hack, hack. */ fh = (struct ec_framehdr *)(skb->data); @@ -368,8 +369,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, if (sock->type != SOCK_DGRAM) { skb_reset_tail_pointer(skb); skb->len = 0; - } else if (res < 0) - goto out_free; + } } /* Copy the data. Returns -EFAULT on error */ @@ -608,12 +608,15 @@ static struct proto econet_proto = { * Create an Econet socket */ -static int econet_create(struct socket *sock, int protocol) +static int econet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct econet_sock *eo; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + /* Econet only provides datagram services. */ if (sock->type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; @@ -621,7 +624,7 @@ static int econet_create(struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(PF_ECONET, GFP_KERNEL, &econet_proto, 1); + sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto, 1); if (sk == NULL) goto out; @@ -659,7 +662,7 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; - if ((dev = dev_get_by_name(ifr.ifr_name)) == NULL) + if ((dev = dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) return -ENODEV; sec = (struct sockaddr_ec *)&ifr.ifr_addr; @@ -1062,6 +1065,9 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet struct sock *sk; struct ec_device *edev = dev->ec_ptr; + if (dev->nd_net != &init_net) + goto drop; + if (skb->pkt_type == PACKET_OTHERHOST) goto drop; @@ -1116,6 +1122,9 @@ static int econet_notifier(struct notifier_block *this, unsigned long msg, void struct net_device *dev = (struct net_device *)data; struct ec_device *edev; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (msg) { case NETDEV_UNREGISTER: /* A device has gone down - kill any data we hold for it. */ diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 12c765715acf..ed8a3d49487d 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -75,8 +75,9 @@ __setup("ether=", netdev_boot_setup); * Set the protocol type. For a packet of type ETH_P_802_3 we put the length * in here instead. It is up to the 802.2 layer to carry protocol information. */ -int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) +int eth_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, unsigned len) { struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN); @@ -91,10 +92,10 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, if (!saddr) saddr = dev->dev_addr; - memcpy(eth->h_source, saddr, dev->addr_len); + memcpy(eth->h_source, saddr, ETH_ALEN); if (daddr) { - memcpy(eth->h_dest, daddr, dev->addr_len); + memcpy(eth->h_dest, daddr, ETH_ALEN); return ETH_HLEN; } @@ -103,12 +104,13 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, */ if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) { - memset(eth->h_dest, 0, dev->addr_len); + memset(eth->h_dest, 0, ETH_ALEN); return ETH_HLEN; } return -ETH_HLEN; } +EXPORT_SYMBOL(eth_header); /** * eth_rebuild_header- rebuild the Ethernet MAC header. @@ -135,12 +137,13 @@ int eth_rebuild_header(struct sk_buff *skb) "%s: unable to resolve type %X addresses.\n", dev->name, (int)eth->h_proto); - memcpy(eth->h_source, dev->dev_addr, dev->addr_len); + memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); break; } return 0; } +EXPORT_SYMBOL(eth_rebuild_header); /** * eth_type_trans - determine the packet's protocol ID. @@ -207,12 +210,13 @@ EXPORT_SYMBOL(eth_type_trans); * @skb: packet to extract header from * @haddr: destination buffer */ -static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) +int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr) { - struct ethhdr *eth = eth_hdr(skb); + const struct ethhdr *eth = eth_hdr(skb); memcpy(haddr, eth->h_source, ETH_ALEN); return ETH_ALEN; } +EXPORT_SYMBOL(eth_header_parse); /** * eth_header_cache - fill cache entry from neighbour @@ -220,11 +224,11 @@ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) * @hh: destination cache entry * Create an Ethernet header template from the neighbour. */ -int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) +int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh) { __be16 type = hh->hh_type; struct ethhdr *eth; - struct net_device *dev = neigh->dev; + const struct net_device *dev = neigh->dev; eth = (struct ethhdr *) (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); @@ -233,11 +237,12 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) return -1; eth->h_proto = type; - memcpy(eth->h_source, dev->dev_addr, dev->addr_len); - memcpy(eth->h_dest, neigh->ha, dev->addr_len); + memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); + memcpy(eth->h_dest, neigh->ha, ETH_ALEN); hh->hh_len = ETH_HLEN; return 0; } +EXPORT_SYMBOL(eth_header_cache); /** * eth_header_cache_update - update cache entry @@ -247,12 +252,14 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) * * Called by Address Resolution module to notify changes in address. */ -void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, - unsigned char *haddr) +void eth_header_cache_update(struct hh_cache *hh, + const struct net_device *dev, + const unsigned char *haddr) { memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), - haddr, dev->addr_len); + haddr, ETH_ALEN); } +EXPORT_SYMBOL(eth_header_cache_update); /** * eth_mac_addr - set new Ethernet hardware address @@ -271,7 +278,7 @@ static int eth_mac_addr(struct net_device *dev, void *p) return -EBUSY; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); return 0; } @@ -291,6 +298,14 @@ static int eth_change_mtu(struct net_device *dev, int new_mtu) return 0; } +const struct header_ops eth_header_ops ____cacheline_aligned = { + .create = eth_header, + .parse = eth_header_parse, + .rebuild = eth_rebuild_header, + .cache = eth_header_cache, + .cache_update = eth_header_cache_update, +}; + /** * ether_setup - setup Ethernet network device * @dev: network device @@ -298,13 +313,10 @@ static int eth_change_mtu(struct net_device *dev, int new_mtu) */ void ether_setup(struct net_device *dev) { + dev->header_ops = ð_header_ops; + dev->change_mtu = eth_change_mtu; - dev->hard_header = eth_header; - dev->rebuild_header = eth_rebuild_header; dev->set_mac_address = eth_mac_addr; - dev->hard_header_cache = eth_header_cache; - dev->header_cache_update= eth_header_cache_update; - dev->hard_header_parse = eth_header_parse; dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; @@ -337,3 +349,11 @@ struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count) return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count); } EXPORT_SYMBOL(alloc_etherdev_mq); + +char *print_mac(char *buf, const u8 *addr) +{ + sprintf(buf, MAC_FMT, + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + return buf; +} +EXPORT_SYMBOL(print_mac); diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c index 9d57b4fb6440..d60e15d9365e 100644 --- a/net/ethernet/pe2.c +++ b/net/ethernet/pe2.c @@ -12,9 +12,7 @@ static int pEII_request(struct datalink_proto *dl, struct net_device *dev = skb->dev; skb->protocol = htons(ETH_P_IPX); - if (dev->hard_header) - dev->hard_header(skb, dev, ETH_P_IPX, - dest_node, NULL, skb->len); + dev_hard_header(skb, dev, ETH_P_IPX, dest_node, NULL, skb->len); return dev_queue_xmit(skb); } diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c index b016b4104de6..0936a3e0210b 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/ieee80211/ieee80211_crypt_ccmp.c @@ -9,6 +9,7 @@ * more details. */ +#include <linux/kernel.h> #include <linux/err.h> #include <linux/module.h> #include <linux/init.h> @@ -241,7 +242,7 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) hdr = (struct ieee80211_hdr_4addr *)skb->data; ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0); - blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; + blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last = data_len % AES_BLOCK_LEN; for (i = 1; i <= blocks; i++) { @@ -296,6 +297,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) int i, blocks, last, len; size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN - CCMP_MIC_LEN; u8 *mic = skb->data + skb->len - CCMP_MIC_LEN; + DECLARE_MAC_BUF(mac); if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) { key->dot11RSNAStatsCCMPFormatErrors++; @@ -308,7 +310,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (!(keyidx & (1 << 5))) { if (net_ratelimit()) { printk(KERN_DEBUG "CCMP: received packet without ExtIV" - " flag from " MAC_FMT "\n", MAC_ARG(hdr->addr2)); + " flag from %s\n", print_mac(mac, hdr->addr2)); } key->dot11RSNAStatsCCMPFormatErrors++; return -2; @@ -321,9 +323,9 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) } if (!key->key_set) { if (net_ratelimit()) { - printk(KERN_DEBUG "CCMP: received packet from " MAC_FMT + printk(KERN_DEBUG "CCMP: received packet from %s" " with keyid=%d that does not have a configured" - " key\n", MAC_ARG(hdr->addr2), keyidx); + " key\n", print_mac(mac, hdr->addr2), keyidx); } return -3; } @@ -338,11 +340,13 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (ccmp_replay_check(pn, key->rx_pn)) { if (net_ratelimit()) { - IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=" MAC_FMT - " previous PN %02x%02x%02x%02x%02x%02x " - "received PN %02x%02x%02x%02x%02x%02x\n", - MAC_ARG(hdr->addr2), MAC_ARG(key->rx_pn), - MAC_ARG(pn)); + IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=%s " + "previous PN %02x%02x%02x%02x%02x%02x " + "received PN %02x%02x%02x%02x%02x%02x\n", + print_mac(mac, hdr->addr2), + key->rx_pn[0], key->rx_pn[1], key->rx_pn[2], + key->rx_pn[3], key->rx_pn[4], key->rx_pn[5], + pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); } key->dot11RSNAStatsCCMPReplays++; return -4; @@ -351,7 +355,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) ccmp_init_blocks(key->tfm, hdr, pn, data_len, b0, a, b); xor_block(mic, b, CCMP_MIC_LEN); - blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; + blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last = data_len % AES_BLOCK_LEN; for (i = 1; i <= blocks; i++) { @@ -370,7 +374,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (memcmp(mic, a, CCMP_MIC_LEN) != 0) { if (net_ratelimit()) { printk(KERN_DEBUG "CCMP: decrypt failed: STA=" - MAC_FMT "\n", MAC_ARG(hdr->addr2)); + "%s\n", print_mac(mac, hdr->addr2)); } key->dot11RSNAStatsCCMPDecryptErrors++; return -5; @@ -442,12 +446,16 @@ static int ieee80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) static char *ieee80211_ccmp_print_stats(char *p, void *priv) { struct ieee80211_ccmp_data *ccmp = priv; + p += sprintf(p, "key[%d] alg=CCMP key_set=%d " "tx_pn=%02x%02x%02x%02x%02x%02x " "rx_pn=%02x%02x%02x%02x%02x%02x " "format_errors=%d replays=%d decrypt_errors=%d\n", ccmp->key_idx, ccmp->key_set, - MAC_ARG(ccmp->tx_pn), MAC_ARG(ccmp->rx_pn), + ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2], + ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5], + ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2], + ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5], ccmp->dot11RSNAStatsCCMPFormatErrors, ccmp->dot11RSNAStatsCCMPReplays, ccmp->dot11RSNAStatsCCMPDecryptErrors); diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 5a48d8e0aec1..6cc54eeca3ed 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -359,14 +359,15 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) u8 rc4key[16], *pos, *icv; u32 crc; struct scatterlist sg; + DECLARE_MAC_BUF(mac); if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { if (net_ratelimit()) { struct ieee80211_hdr_4addr *hdr = (struct ieee80211_hdr_4addr *)skb->data; printk(KERN_DEBUG ": TKIP countermeasures: dropped " - "TX packet to " MAC_FMT "\n", - MAC_ARG(hdr->addr1)); + "TX packet to %s\n", + print_mac(mac, hdr->addr1)); } return -1; } @@ -421,14 +422,15 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) u32 crc; struct scatterlist sg; int plen; + DECLARE_MAC_BUF(mac); hdr = (struct ieee80211_hdr_4addr *)skb->data; if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { if (net_ratelimit()) { printk(KERN_DEBUG ": TKIP countermeasures: dropped " - "received packet from " MAC_FMT "\n", - MAC_ARG(hdr->addr2)); + "received packet from %s\n", + print_mac(mac, hdr->addr2)); } return -1; } @@ -441,7 +443,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (!(keyidx & (1 << 5))) { if (net_ratelimit()) { printk(KERN_DEBUG "TKIP: received packet without ExtIV" - " flag from " MAC_FMT "\n", MAC_ARG(hdr->addr2)); + " flag from %s\n", print_mac(mac, hdr->addr2)); } return -2; } @@ -453,9 +455,9 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) } if (!tkey->key_set) { if (net_ratelimit()) { - printk(KERN_DEBUG "TKIP: received packet from " MAC_FMT + printk(KERN_DEBUG "TKIP: received packet from %s" " with keyid=%d that does not have a configured" - " key\n", MAC_ARG(hdr->addr2), keyidx); + " key\n", print_mac(mac, hdr->addr2), keyidx); } return -3; } @@ -465,9 +467,9 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { if (net_ratelimit()) { - IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=" MAC_FMT + IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=%s" " previous TSC %08x%04x received TSC " - "%08x%04x\n", MAC_ARG(hdr->addr2), + "%08x%04x\n", print_mac(mac, hdr->addr2), tkey->rx_iv32, tkey->rx_iv16, iv32, iv16); } tkey->dot11RSNAStatsTKIPReplays++; @@ -489,8 +491,8 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) { if (net_ratelimit()) { printk(KERN_DEBUG ": TKIP: failed to decrypt " - "received packet from " MAC_FMT "\n", - MAC_ARG(hdr->addr2)); + "received packet from %s\n", + print_mac(mac, hdr->addr2)); } return -7; } @@ -508,7 +510,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) } if (net_ratelimit()) { IEEE80211_DEBUG_DROP("TKIP: ICV error detected: STA=" - MAC_FMT "\n", MAC_ARG(hdr->addr2)); + "%s\n", print_mac(mac, hdr->addr2)); } tkey->dot11RSNAStatsTKIPICVErrors++; return -5; @@ -639,6 +641,7 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, { struct ieee80211_tkip_data *tkey = priv; u8 mic[8]; + DECLARE_MAC_BUF(mac); if (!tkey->key_set) return -1; @@ -651,8 +654,8 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, struct ieee80211_hdr_4addr *hdr; hdr = (struct ieee80211_hdr_4addr *)skb->data; printk(KERN_DEBUG "%s: Michael MIC verification failed for " - "MSDU from " MAC_FMT " keyidx=%d\n", - skb->dev ? skb->dev->name : "N/A", MAC_ARG(hdr->addr2), + "MSDU from %s keyidx=%d\n", + skb->dev ? skb->dev->name : "N/A", print_mac(mac, hdr->addr2), keyidx); if (skb->dev) ieee80211_michael_mic_failure(skb->dev, hdr, keyidx); diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 17ad278696ed..69cb6aad25be 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -47,6 +47,7 @@ #include <linux/wireless.h> #include <linux/etherdevice.h> #include <asm/uaccess.h> +#include <net/net_namespace.h> #include <net/arp.h> #include <net/ieee80211.h> @@ -264,7 +265,7 @@ static int __init ieee80211_init(void) struct proc_dir_entry *e; ieee80211_debug_level = debug; - ieee80211_proc = proc_mkdir(DRV_NAME, proc_net); + ieee80211_proc = proc_mkdir(DRV_NAME, init_net.proc_net); if (ieee80211_proc == NULL) { IEEE80211_ERROR("Unable to create " DRV_NAME " proc directory\n"); @@ -273,7 +274,7 @@ static int __init ieee80211_init(void) e = create_proc_entry("debug_level", S_IFREG | S_IRUGO | S_IWUSR, ieee80211_proc); if (!e) { - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, init_net.proc_net); ieee80211_proc = NULL; return -EIO; } @@ -293,7 +294,7 @@ static void __exit ieee80211_exit(void) #ifdef CONFIG_IEEE80211_DEBUG if (ieee80211_proc) { remove_proc_entry("debug_level", ieee80211_proc); - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, init_net.proc_net); ieee80211_proc = NULL; } #endif /* CONFIG_IEEE80211_DEBUG */ diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 6284c99b456e..21c0fadde03b 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -271,6 +271,7 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb, { struct ieee80211_hdr_3addr *hdr; int res, hdrlen; + DECLARE_MAC_BUF(mac); if (crypt == NULL || crypt->ops->decrypt_mpdu == NULL) return 0; @@ -282,8 +283,8 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb, res = crypt->ops->decrypt_mpdu(skb, hdrlen, crypt->priv); atomic_dec(&crypt->refcnt); if (res < 0) { - IEEE80211_DEBUG_DROP("decryption failed (SA=" MAC_FMT - ") res=%d\n", MAC_ARG(hdr->addr2), res); + IEEE80211_DEBUG_DROP("decryption failed (SA=%s" + ") res=%d\n", print_mac(mac, hdr->addr2), res); if (res == -2) IEEE80211_DEBUG_DROP("Decryption failed ICV " "mismatch (key %d)\n", @@ -303,6 +304,7 @@ ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee, { struct ieee80211_hdr_3addr *hdr; int res, hdrlen; + DECLARE_MAC_BUF(mac); if (crypt == NULL || crypt->ops->decrypt_msdu == NULL) return 0; @@ -315,8 +317,8 @@ ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee, atomic_dec(&crypt->refcnt); if (res < 0) { printk(KERN_DEBUG "%s: MSDU decryption/MIC verification failed" - " (SA=" MAC_FMT " keyidx=%d)\n", - ieee->dev->name, MAC_ARG(hdr->addr2), keyidx); + " (SA=%s keyidx=%d)\n", + ieee->dev->name, print_mac(mac, hdr->addr2), keyidx); return -1; } @@ -350,6 +352,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, struct ieee80211_crypt_data *crypt = NULL; int keyidx = 0; int can_be_decrypted = 0; + DECLARE_MAC_BUF(mac); hdr = (struct ieee80211_hdr_4addr *)skb->data; stats = &ieee->stats; @@ -459,8 +462,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, * frames silently instead of filling system log with * these reports. */ IEEE80211_DEBUG_DROP("Decryption failed (not set)" - " (SA=" MAC_FMT ")\n", - MAC_ARG(hdr->addr2)); + " (SA=%s)\n", + print_mac(mac, hdr->addr2)); ieee->ieee_stats.rx_discards_undecryptable++; goto rx_dropped; } @@ -471,8 +474,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, fc & IEEE80211_FCTL_PROTECTED && ieee->host_decrypt && (keyidx = hostap_rx_frame_decrypt(ieee, skb, crypt)) < 0) { printk(KERN_DEBUG "%s: failed to decrypt mgmt::auth " - "from " MAC_FMT "\n", dev->name, - MAC_ARG(hdr->addr2)); + "from %s\n", dev->name, + print_mac(mac, hdr->addr2)); /* TODO: could inform hostapd about this so that it * could send auth failure report */ goto rx_dropped; @@ -650,8 +653,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, * configured */ } else { IEEE80211_DEBUG_DROP("encryption configured, but RX " - "frame not encrypted (SA=" MAC_FMT - ")\n", MAC_ARG(hdr->addr2)); + "frame not encrypted (SA=%s" + ")\n", print_mac(mac, hdr->addr2)); goto rx_dropped; } } @@ -659,9 +662,9 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep && !ieee80211_is_eapol_frame(ieee, skb)) { IEEE80211_DEBUG_DROP("dropped unencrypted RX data " - "frame from " MAC_FMT + "frame from %s" " (drop_unencrypted=1)\n", - MAC_ARG(hdr->addr2)); + print_mac(mac, hdr->addr2)); goto rx_dropped; } @@ -1411,6 +1414,8 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021 struct ieee80211_network *network, struct ieee80211_rx_stats *stats) { + DECLARE_MAC_BUF(mac); + network->qos_data.active = 0; network->qos_data.supported = 0; network->qos_data.param_count = 0; @@ -1457,11 +1462,11 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021 } if (network->mode == 0) { - IEEE80211_DEBUG_SCAN("Filtered out '%s (" MAC_FMT ")' " + IEEE80211_DEBUG_SCAN("Filtered out '%s (%s)' " "network.\n", escape_essid(network->ssid, network->ssid_len), - MAC_ARG(network->bssid)); + print_mac(mac, network->bssid)); return 1; } @@ -1490,6 +1495,7 @@ static void update_network(struct ieee80211_network *dst, { int qos_active; u8 old_param; + DECLARE_MAC_BUF(mac); ieee80211_network_reset(dst); dst->ibss_dfs = src->ibss_dfs; @@ -1503,8 +1509,8 @@ static void update_network(struct ieee80211_network *dst, memcpy(&dst->stats, &src->stats, sizeof(struct ieee80211_rx_stats)); else - IEEE80211_DEBUG_SCAN("Network " MAC_FMT " info received " - "off channel (%d vs. %d)\n", MAC_ARG(src->bssid), + IEEE80211_DEBUG_SCAN("Network %s info received " + "off channel (%d vs. %d)\n", print_mac(mac, src->bssid), dst->channel, src->stats.received_channel); dst->capability = src->capability; @@ -1576,12 +1582,13 @@ static void ieee80211_process_probe_response(struct ieee80211_device struct ieee80211_info_element *info_element = beacon->info_element; #endif unsigned long flags; + DECLARE_MAC_BUF(mac); - IEEE80211_DEBUG_SCAN("'%s' (" MAC_FMT + IEEE80211_DEBUG_SCAN("'%s' (%s" "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n", escape_essid(info_element->data, info_element->len), - MAC_ARG(beacon->header.addr3), + print_mac(mac, beacon->header.addr3), (beacon->capability & (1 << 0xf)) ? '1' : '0', (beacon->capability & (1 << 0xe)) ? '1' : '0', (beacon->capability & (1 << 0xd)) ? '1' : '0', @@ -1600,10 +1607,10 @@ static void ieee80211_process_probe_response(struct ieee80211_device (beacon->capability & (1 << 0x0)) ? '1' : '0'); if (ieee80211_network_init(ieee, beacon, &network, stats)) { - IEEE80211_DEBUG_SCAN("Dropped '%s' (" MAC_FMT ") via %s.\n", + IEEE80211_DEBUG_SCAN("Dropped '%s' (%s) via %s.\n", escape_essid(info_element->data, info_element->len), - MAC_ARG(beacon->header.addr3), + print_mac(mac, beacon->header.addr3), is_beacon(beacon->header.frame_ctl) ? "BEACON" : "PROBE RESPONSE"); return; @@ -1637,11 +1644,11 @@ static void ieee80211_process_probe_response(struct ieee80211_device /* If there are no more slots, expire the oldest */ list_del(&oldest->list); target = oldest; - IEEE80211_DEBUG_SCAN("Expired '%s' (" MAC_FMT ") from " + IEEE80211_DEBUG_SCAN("Expired '%s' (%s) from " "network list.\n", escape_essid(target->ssid, target->ssid_len), - MAC_ARG(target->bssid)); + print_mac(mac, target->bssid)); ieee80211_network_reset(target); } else { /* Otherwise just pull from the free list */ @@ -1651,10 +1658,10 @@ static void ieee80211_process_probe_response(struct ieee80211_device } #ifdef CONFIG_IEEE80211_DEBUG - IEEE80211_DEBUG_SCAN("Adding '%s' (" MAC_FMT ") via %s.\n", + IEEE80211_DEBUG_SCAN("Adding '%s' (%s) via %s.\n", escape_essid(network.ssid, network.ssid_len), - MAC_ARG(network.bssid), + print_mac(mac, network.bssid), is_beacon(beacon->header.frame_ctl) ? "BEACON" : "PROBE RESPONSE"); #endif @@ -1662,10 +1669,10 @@ static void ieee80211_process_probe_response(struct ieee80211_device network.ibss_dfs = NULL; list_add_tail(&target->list, &ieee->network_list); } else { - IEEE80211_DEBUG_SCAN("Updating '%s' (" MAC_FMT ") via %s.\n", + IEEE80211_DEBUG_SCAN("Updating '%s' (%s) via %s.\n", escape_essid(target->ssid, target->ssid_len), - MAC_ARG(target->bssid), + print_mac(mac, target->bssid), is_beacon(beacon->header.frame_ctl) ? "BEACON" : "PROBE RESPONSE"); update_network(target, &network); diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 465b73d50532..9b58dd67acb6 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -257,6 +257,7 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee, char *ev = extra; char *stop = ev + wrqu->data.length; int i = 0; + DECLARE_MAC_BUF(mac); IEEE80211_DEBUG_WX("Getting scan\n"); @@ -274,10 +275,10 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee, ev = ieee80211_translate_scan(ieee, ev, stop, network); else IEEE80211_DEBUG_SCAN("Not showing network '%s (" - MAC_FMT ")' due to age (%dms).\n", + "%s)' due to age (%dms).\n", escape_essid(network->ssid, network->ssid_len), - MAC_ARG(network->bssid), + print_mac(mac, network->bssid), jiffies_to_msecs(jiffies - network-> last_scanned)); diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c index e475f2e1be13..c4d122ddd72c 100644 --- a/net/ieee80211/softmac/ieee80211softmac_assoc.c +++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c @@ -53,7 +53,7 @@ ieee80211softmac_assoc(struct ieee80211softmac_device *mac, struct ieee80211soft /* Set a timer for timeout */ /* FIXME: make timeout configurable */ if (likely(mac->running)) - schedule_delayed_work(&mac->associnfo.timeout, 5 * HZ); + queue_delayed_work(mac->wq, &mac->associnfo.timeout, 5 * HZ); spin_unlock_irqrestore(&mac->lock, flags); } @@ -372,6 +372,7 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev, u16 status = le16_to_cpup(&resp->status); struct ieee80211softmac_network *network = NULL; unsigned long flags; + DECLARE_MAC_BUF(mac2); if (unlikely(!mac->running)) return -ENODEV; @@ -388,7 +389,8 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev, /* someone sending us things without us knowing him? Ignore. */ if (!network) { - dprintk(KERN_INFO PFX "Received unrequested assocation response from " MAC_FMT "\n", MAC_ARG(resp->header.addr3)); + dprintk(KERN_INFO PFX "Received unrequested assocation response from %s\n", + print_mac(mac2, resp->header.addr3)); spin_unlock_irqrestore(&mac->lock, flags); return 0; } @@ -417,7 +419,7 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev, network->authenticated = 0; /* we don't want to do this more than once ... */ network->auth_desynced_once = 1; - schedule_delayed_work(&mac->associnfo.work, 0); + queue_delayed_work(mac->wq, &mac->associnfo.work, 0); break; } default: @@ -439,7 +441,7 @@ ieee80211softmac_try_reassoc(struct ieee80211softmac_device *mac) spin_lock_irqsave(&mac->lock, flags); mac->associnfo.associating = 1; - schedule_delayed_work(&mac->associnfo.work, 0); + queue_delayed_work(mac->wq, &mac->associnfo.work, 0); spin_unlock_irqrestore(&mac->lock, flags); } @@ -481,7 +483,7 @@ ieee80211softmac_handle_reassoc_req(struct net_device * dev, dprintkl(KERN_INFO PFX "reassoc request from unknown network\n"); return 0; } - schedule_delayed_work(&mac->associnfo.work, 0); + queue_delayed_work(mac->wq, &mac->associnfo.work, 0); return 0; } diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c index 826c32d24461..a53a751d0702 100644 --- a/net/ieee80211/softmac/ieee80211softmac_auth.c +++ b/net/ieee80211/softmac/ieee80211softmac_auth.c @@ -35,6 +35,7 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac, { struct ieee80211softmac_auth_queue_item *auth; unsigned long flags; + DECLARE_MAC_BUF(mac2); if (net->authenticating || net->authenticated) return 0; @@ -43,7 +44,7 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac, /* Add the network if it's not already added */ ieee80211softmac_add_network(mac, net); - dprintk(KERN_NOTICE PFX "Queueing Authentication Request to "MAC_FMT"\n", MAC_ARG(net->bssid)); + dprintk(KERN_NOTICE PFX "Queueing Authentication Request to %s\n", print_mac(mac2, net->bssid)); /* Queue the auth request */ auth = (struct ieee80211softmac_auth_queue_item *) kmalloc(sizeof(struct ieee80211softmac_auth_queue_item), GFP_KERNEL); @@ -61,7 +62,7 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac, /* add to list */ list_add_tail(&auth->list, &mac->auth_queue); - schedule_delayed_work(&auth->work, 0); + queue_delayed_work(mac->wq, &auth->work, 0); spin_unlock_irqrestore(&mac->lock, flags); return 0; @@ -76,6 +77,7 @@ ieee80211softmac_auth_queue(struct work_struct *work) struct ieee80211softmac_auth_queue_item *auth; struct ieee80211softmac_network *net; unsigned long flags; + DECLARE_MAC_BUF(mac2); auth = container_of(work, struct ieee80211softmac_auth_queue_item, work.work); @@ -95,17 +97,18 @@ ieee80211softmac_auth_queue(struct work_struct *work) } net->authenticated = 0; /* add a timeout call so we eventually give up waiting for an auth reply */ - schedule_delayed_work(&auth->work, IEEE80211SOFTMAC_AUTH_TIMEOUT); + queue_delayed_work(mac->wq, &auth->work, IEEE80211SOFTMAC_AUTH_TIMEOUT); auth->retry--; spin_unlock_irqrestore(&mac->lock, flags); if (ieee80211softmac_send_mgt_frame(mac, auth->net, IEEE80211_STYPE_AUTH, auth->state)) - dprintk(KERN_NOTICE PFX "Sending Authentication Request to "MAC_FMT" failed (this shouldn't happen, wait for the timeout).\n", MAC_ARG(net->bssid)); + dprintk(KERN_NOTICE PFX "Sending Authentication Request to %s failed (this shouldn't happen, wait for the timeout).\n", + print_mac(mac2, net->bssid)); else - dprintk(KERN_NOTICE PFX "Sent Authentication Request to "MAC_FMT".\n", MAC_ARG(net->bssid)); + dprintk(KERN_NOTICE PFX "Sent Authentication Request to %s.\n", print_mac(mac2, net->bssid)); return; } - printkl(KERN_WARNING PFX "Authentication timed out with "MAC_FMT"\n", MAC_ARG(net->bssid)); + printkl(KERN_WARNING PFX "Authentication timed out with %s\n", print_mac(mac2, net->bssid)); /* Remove this item from the queue */ spin_lock_irqsave(&mac->lock, flags); net->authenticating = 0; @@ -142,6 +145,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) struct ieee80211softmac_network *net = NULL; unsigned long flags; u8 * data; + DECLARE_MAC_BUF(mac2); if (unlikely(!mac->running)) return -ENODEV; @@ -161,7 +165,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) /* Make sure that we've got an auth queue item for this request */ if(aq == NULL) { - dprintkl(KERN_DEBUG PFX "Authentication response received from "MAC_FMT" but no queue item exists.\n", MAC_ARG(auth->header.addr2)); + dprintkl(KERN_DEBUG PFX "Authentication response received from %s but no queue item exists.\n", print_mac(mac2, auth->header.addr2)); /* Error #? */ return -1; } @@ -169,7 +173,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) /* Check for out of order authentication */ if(!net->authenticating) { - dprintkl(KERN_DEBUG PFX "Authentication response received from "MAC_FMT" but did not request authentication.\n",MAC_ARG(auth->header.addr2)); + dprintkl(KERN_DEBUG PFX "Authentication response received from %s but did not request authentication.\n",print_mac(mac2, auth->header.addr2)); return -1; } @@ -187,7 +191,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) spin_unlock_irqrestore(&mac->lock, flags); /* Send event */ - printkl(KERN_NOTICE PFX "Open Authentication completed with "MAC_FMT"\n", MAC_ARG(net->bssid)); + printkl(KERN_NOTICE PFX "Open Authentication completed with %s\n", print_mac(mac2, net->bssid)); ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_AUTHENTICATED, net); break; default: @@ -197,8 +201,8 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) net->authenticating = 0; spin_unlock_irqrestore(&mac->lock, flags); - printkl(KERN_NOTICE PFX "Open Authentication with "MAC_FMT" failed, error code: %i\n", - MAC_ARG(net->bssid), le16_to_cpup(&auth->status)); + printkl(KERN_NOTICE PFX "Open Authentication with %s failed, error code: %i\n", + print_mac(mac2, net->bssid), le16_to_cpup(&auth->status)); /* Count the error? */ break; } @@ -238,7 +242,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) * request. */ cancel_delayed_work(&aq->work); INIT_DELAYED_WORK(&aq->work, &ieee80211softmac_auth_challenge_response); - schedule_delayed_work(&aq->work, 0); + queue_delayed_work(mac->wq, &aq->work, 0); spin_unlock_irqrestore(&mac->lock, flags); return 0; case IEEE80211SOFTMAC_AUTH_SHARED_PASS: @@ -253,13 +257,13 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) net->authenticating = 0; net->authenticated = 1; spin_unlock_irqrestore(&mac->lock, flags); - printkl(KERN_NOTICE PFX "Shared Key Authentication completed with "MAC_FMT"\n", - MAC_ARG(net->bssid)); + printkl(KERN_NOTICE PFX "Shared Key Authentication completed with %s\n", + print_mac(mac2, net->bssid)); ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_AUTHENTICATED, net); break; default: - printkl(KERN_NOTICE PFX "Shared Key Authentication with "MAC_FMT" failed, error code: %i\n", - MAC_ARG(net->bssid), le16_to_cpup(&auth->status)); + printkl(KERN_NOTICE PFX "Shared Key Authentication with %s failed, error code: %i\n", + print_mac(mac2, net->bssid), le16_to_cpup(&auth->status)); /* Lock and reset flags */ spin_lock_irqsave(&mac->lock, flags); net->authenticating = 0; @@ -375,6 +379,7 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de struct ieee80211softmac_network *net = NULL; struct ieee80211softmac_device *mac = ieee80211_priv(dev); + DECLARE_MAC_BUF(mac2); if (unlikely(!mac->running)) return -ENODEV; @@ -387,8 +392,8 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de net = ieee80211softmac_get_network_by_bssid(mac, deauth->header.addr2); if (net == NULL) { - dprintkl(KERN_DEBUG PFX "Received deauthentication packet from "MAC_FMT", but that network is unknown.\n", - MAC_ARG(deauth->header.addr2)); + dprintkl(KERN_DEBUG PFX "Received deauthentication packet from %s, but that network is unknown.\n", + print_mac(mac2, deauth->header.addr2)); return 0; } @@ -403,6 +408,6 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de ieee80211softmac_deauth_from_net(mac, net); /* let's try to re-associate */ - schedule_delayed_work(&mac->associnfo.work, 0); + queue_delayed_work(mac->wq, &mac->associnfo.work, 0); return 0; } diff --git a/net/ieee80211/softmac/ieee80211softmac_event.c b/net/ieee80211/softmac/ieee80211softmac_event.c index b3e33a4d4869..8cef05b60f16 100644 --- a/net/ieee80211/softmac/ieee80211softmac_event.c +++ b/net/ieee80211/softmac/ieee80211softmac_event.c @@ -172,7 +172,7 @@ ieee80211softmac_call_events_locked(struct ieee80211softmac_device *mac, int eve /* User may have subscribed to ANY event, so * we tell them which event triggered it. */ eventptr->event_type = event; - schedule_delayed_work(&eventptr->work, 0); + queue_delayed_work(mac->wq, &eventptr->work, 0); } } } diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c index 6398e6e67493..07505ca859af 100644 --- a/net/ieee80211/softmac/ieee80211softmac_module.c +++ b/net/ieee80211/softmac/ieee80211softmac_module.c @@ -36,8 +36,13 @@ struct net_device *alloc_ieee80211softmac(int sizeof_priv) dev = alloc_ieee80211(sizeof(*softmac) + sizeof_priv); if (!dev) return NULL; - softmac = ieee80211_priv(dev); + softmac->wq = create_freezeable_workqueue("softmac"); + if (!softmac->wq) { + free_ieee80211(dev); + return NULL; + } + softmac->dev = dev; softmac->ieee = netdev_priv(dev); spin_lock_init(&softmac->lock); @@ -105,7 +110,7 @@ ieee80211softmac_clear_pending_work(struct ieee80211softmac_device *sm) cancel_delayed_work(&eventptr->work); spin_unlock_irqrestore(&sm->lock, flags); - flush_scheduled_work(); + flush_workqueue(sm->wq); /* now we should be save and no longer need locking... */ spin_lock_irqsave(&sm->lock, flags); @@ -139,6 +144,7 @@ void free_ieee80211softmac(struct net_device *dev) ieee80211softmac_clear_pending_work(sm); kfree(sm->scaninfo); kfree(sm->wpa.IE); + destroy_workqueue(sm->wq); free_ieee80211(dev); } EXPORT_SYMBOL_GPL(free_ieee80211softmac); diff --git a/net/ieee80211/softmac/ieee80211softmac_scan.c b/net/ieee80211/softmac/ieee80211softmac_scan.c index abea3648680e..bfab8d7db88f 100644 --- a/net/ieee80211/softmac/ieee80211softmac_scan.c +++ b/net/ieee80211/softmac/ieee80211softmac_scan.c @@ -123,7 +123,7 @@ void ieee80211softmac_scan(struct work_struct *work) spin_unlock_irqrestore(&sm->lock, flags); break; } - schedule_delayed_work(&si->softmac_scan, IEEE80211SOFTMAC_PROBE_DELAY); + queue_delayed_work(sm->wq, &si->softmac_scan, IEEE80211SOFTMAC_PROBE_DELAY); spin_unlock_irqrestore(&sm->lock, flags); return; } else { @@ -190,7 +190,7 @@ int ieee80211softmac_start_scan_implementation(struct net_device *dev) sm->scaninfo->started = 1; sm->scaninfo->stop = 0; INIT_COMPLETION(sm->scaninfo->finished); - schedule_delayed_work(&sm->scaninfo->softmac_scan, 0); + queue_delayed_work(sm->wq, &sm->scaninfo->softmac_scan, 0); spin_unlock_irqrestore(&sm->lock, flags); return 0; } diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c index 5742dc803b79..ac36767b56e8 100644 --- a/net/ieee80211/softmac/ieee80211softmac_wx.c +++ b/net/ieee80211/softmac/ieee80211softmac_wx.c @@ -72,6 +72,7 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev, struct ieee80211softmac_device *sm = ieee80211_priv(net_dev); struct ieee80211softmac_auth_queue_item *authptr; int length = 0; + DECLARE_MAC_BUF(mac); check_assoc_again: mutex_lock(&sm->associnfo.mutex); @@ -90,7 +91,7 @@ check_assoc_again: /* We must unlock to avoid deadlocks with the assoc workqueue * on the associnfo.mutex */ mutex_unlock(&sm->associnfo.mutex); - flush_scheduled_work(); + flush_workqueue(sm->wq); /* Avoid race! Check assoc status again. Maybe someone started an * association while we flushed. */ goto check_assoc_again; @@ -113,7 +114,7 @@ check_assoc_again: sm->associnfo.associating = 1; /* queue lower level code to do work (if necessary) */ - schedule_delayed_work(&sm->associnfo.work, 0); + queue_delayed_work(sm->wq, &sm->associnfo.work, 0); mutex_unlock(&sm->associnfo.mutex); @@ -348,7 +349,7 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev, /* force reassociation */ mac->associnfo.bssvalid = 0; if (mac->associnfo.associated) - schedule_delayed_work(&mac->associnfo.work, 0); + queue_delayed_work(mac->wq, &mac->associnfo.work, 0); } else if (is_zero_ether_addr(data->ap_addr.sa_data)) { /* the bssid we have is no longer fixed */ mac->associnfo.bssfixed = 0; @@ -365,7 +366,7 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev, /* tell the other code that this bssid should be used no matter what */ mac->associnfo.bssfixed = 1; /* queue associate if new bssid or (old one again and not associated) */ - schedule_delayed_work(&mac->associnfo.work, 0); + queue_delayed_work(mac->wq, &mac->associnfo.work, 0); } out: diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index fb7909774254..d894f616c3d6 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -394,6 +394,14 @@ config INET_XFRM_MODE_BEET If unsure, say Y. +config INET_LRO + tristate "Large Receive Offload (ipv4/tcp)" + + ---help--- + Support for Large Receive Offload (ipv4/tcp). + + If unsure, say Y. + config INET_DIAG tristate "INET: socket monitoring interface" default y diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index fbf1674e0c2a..a02c36d0a13e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_INET_ESP) += esp4.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o +obj-$(CONFIG_INET_LRO) += inet_lro.o obj-$(CONFIG_INET_TUNNEL) += tunnel4.o obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e68103475cca..621b128897d7 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -241,7 +241,7 @@ EXPORT_SYMBOL(build_ehash_secret); * Create an inet socket. */ -static int inet_create(struct socket *sock, int protocol) +static int inet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct list_head *p; @@ -253,6 +253,9 @@ static int inet_create(struct socket *sock, int protocol) int try_loading_module = 0; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM && !inet_ehash_secret) @@ -320,7 +323,7 @@ lookup_protocol: BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, 1); if (sk == NULL) goto out; @@ -939,7 +942,7 @@ static struct inet_protosw inetsw_array[] = } }; -#define INETSW_ARRAY_LEN (sizeof(inetsw_array) / sizeof(struct inet_protosw)) +#define INETSW_ARRAY_LEN ARRAY_SIZE(inetsw_array) void inet_register_protosw(struct inet_protosw *p) { @@ -1299,6 +1302,10 @@ static int __init init_ipv4_mibs(void) sizeof(struct icmp_mib), __alignof__(struct icmp_mib)) < 0) goto err_icmp_mib; + if (snmp_mib_init((void **)icmpmsg_statistics, + sizeof(struct icmpmsg_mib), + __alignof__(struct icmpmsg_mib)) < 0) + goto err_icmpmsg_mib; if (snmp_mib_init((void **)tcp_statistics, sizeof(struct tcp_mib), __alignof__(struct tcp_mib)) < 0) @@ -1321,6 +1328,8 @@ err_udplite_mib: err_udp_mib: snmp_mib_free((void **)tcp_statistics); err_tcp_mib: + snmp_mib_free((void **)icmpmsg_statistics); +err_icmpmsg_mib: snmp_mib_free((void **)icmp_statistics); err_icmp_mib: snmp_mib_free((void **)ip_statistics); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 39f6211f1496..4e8e3b079f5b 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -5,6 +5,7 @@ #include <net/ah.h> #include <linux/crypto.h> #include <linux/pfkeyv2.h> +#include <linux/spinlock.h> #include <net/icmp.h> #include <net/protocol.h> #include <asm/scatterlist.h> @@ -65,6 +66,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) char buf[60]; } tmp_iph; + skb_push(skb, -skb_network_offset(skb)); top_iph = ip_hdr(skb); iph = &tmp_iph.iph; @@ -80,28 +82,30 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) goto error; } - ah = (struct ip_auth_hdr *)((char *)top_iph+top_iph->ihl*4); - ah->nexthdr = top_iph->protocol; + ah = ip_auth_hdr(skb); + ah->nexthdr = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_AH; top_iph->tos = 0; top_iph->tot_len = htons(skb->len); top_iph->frag_off = 0; top_iph->ttl = 0; - top_iph->protocol = IPPROTO_AH; top_iph->check = 0; ahp = x->data; - ah->hdrlen = (XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + - ahp->icv_trunc_len) >> 2) - 2; + ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; ah->reserved = 0; ah->spi = x->id.spi; - ah->seq_no = htonl(++x->replay.oseq); - xfrm_aevent_doreplay(x); + ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq); + + spin_lock_bh(&x->lock); err = ah_mac_digest(ahp, skb, ah->auth_data); + memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); + spin_unlock_bh(&x->lock); + if (err) goto error; - memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; top_iph->ttl = iph->ttl; @@ -111,8 +115,6 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); } - ip_send_check(top_iph); - err = 0; error: @@ -123,21 +125,23 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) { int ah_hlen; int ihl; + int nexthdr; int err = -EINVAL; struct iphdr *iph; struct ip_auth_hdr *ah; struct ah_data *ahp; char work_buf[60]; - if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr))) + if (!pskb_may_pull(skb, sizeof(*ah))) goto out; - ah = (struct ip_auth_hdr*)skb->data; + ah = (struct ip_auth_hdr *)skb->data; ahp = x->data; + nexthdr = ah->nexthdr; ah_hlen = (ah->hdrlen + 2) << 2; - if (ah_hlen != XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_full_len) && - ah_hlen != XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len)) + if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && + ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) goto out; if (!pskb_may_pull(skb, ah_hlen)) @@ -151,7 +155,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; - ah = (struct ip_auth_hdr*)skb->data; + ah = (struct ip_auth_hdr *)skb->data; iph = ip_hdr(skb); ihl = skb->data - skb_network_header(skb); @@ -180,13 +184,12 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } } - ((struct iphdr*)work_buf)->protocol = ah->nexthdr; skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_buf, ihl); skb->transport_header = skb->network_header; __skb_pull(skb, ah_hlen + ihl); - return 0; + return nexthdr; out: return err; @@ -219,10 +222,6 @@ static int ah_init_state(struct xfrm_state *x) if (!x->aalg) goto error; - /* null auth can use a zero length key */ - if (x->aalg->alg_key_len > 512) - goto error; - if (x->encap) goto error; @@ -230,14 +229,13 @@ static int ah_init_state(struct xfrm_state *x) if (ahp == NULL) return -ENOMEM; - ahp->key = x->aalg->alg_key; - ahp->key_len = (x->aalg->alg_key_len+7)/8; tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) goto error; ahp->tfm = tfm; - if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len)) + if (crypto_hash_setkey(tfm, x->aalg->alg_key, + (x->aalg->alg_key_len + 7) / 8)) goto error; /* @@ -266,7 +264,8 @@ static int ah_init_state(struct xfrm_state *x) if (!ahp->work_icv) goto error; - x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); + x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + + ahp->icv_trunc_len); if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); x->data = ahp; @@ -302,6 +301,7 @@ static struct xfrm_type ah_type = .description = "AH4", .owner = THIS_MODULE, .proto = IPPROTO_AH, + .flags = XFRM_TYPE_REPLAY_PROT, .init_state = ah_init_state, .destructor = ah_destroy, .input = ah_input, diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9ab9d534fbac..36d6798947b5 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -103,6 +103,7 @@ #include <linux/sysctl.h> #endif +#include <net/net_namespace.h> #include <net/ip.h> #include <net/icmp.h> #include <net/route.h> @@ -252,7 +253,7 @@ static int arp_constructor(struct neighbour *neigh) neigh->parms = neigh_parms_clone(parms); rcu_read_unlock(); - if (dev->hard_header == NULL) { + if (!dev->header_ops) { neigh->nud_state = NUD_NOARP; neigh->ops = &arp_direct_ops; neigh->output = neigh->ops->queue_xmit; @@ -309,10 +310,12 @@ static int arp_constructor(struct neighbour *neigh) neigh->nud_state = NUD_NOARP; memcpy(neigh->ha, dev->broadcast, dev->addr_len); } - if (dev->hard_header_cache) + + if (dev->header_ops->cache) neigh->ops = &arp_hh_ops; else neigh->ops = &arp_generic_ops; + if (neigh->nud_state&NUD_VALID) neigh->output = neigh->ops->connected_output; else @@ -590,8 +593,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, /* * Fill the device header for the ARP frame */ - if (dev->hard_header && - dev->hard_header(skb,dev,ptype,dest_hw,src_hw,skb->len) < 0) + if (dev_hard_header(skb, dev, ptype, dest_hw, src_hw, skb->len) < 0) goto out; /* @@ -931,6 +933,9 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, { struct arphdr *arp; + if (dev->nd_net != &init_net) + goto freeskb; + /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ if (!pskb_may_pull(skb, (sizeof(struct arphdr) + (2 * dev->addr_len) + @@ -977,7 +982,7 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev) if (mask && mask != htonl(0xFFFFFFFF)) return -EINVAL; if (!dev && (r->arp_flags & ATF_COM)) { - dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data); + dev = dev_getbyhwaddr(&init_net, r->arp_ha.sa_family, r->arp_ha.sa_data); if (!dev) return -ENODEV; } @@ -1165,7 +1170,7 @@ int arp_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); if (r.arp_dev[0]) { err = -ENODEV; - if ((dev = __dev_get_by_name(r.arp_dev)) == NULL) + if ((dev = __dev_get_by_name(&init_net, r.arp_dev)) == NULL) goto out; /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ @@ -1201,6 +1206,9 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); @@ -1370,24 +1378,8 @@ static const struct seq_operations arp_seq_ops = { static int arp_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &arp_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &arp_seq_ops, + sizeof(struct neigh_seq_state)); } static const struct file_operations arp_seq_fops = { @@ -1400,7 +1392,7 @@ static const struct file_operations arp_seq_fops = { static int __init arp_proc_init(void) { - if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops)) + if (!proc_net_fops_create(&init_net, "arp", S_IRUGO, &arp_seq_fops)) return -ENOMEM; return 0; } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index ab56a052ce31..805a78e6ed55 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1831,68 +1831,75 @@ socket_setattr_failure: } /** - * cipso_v4_sock_getattr - Get the security attributes from a sock - * @sk: the sock + * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions + * @cipso: the CIPSO v4 option * @secattr: the security attributes * * Description: - * Query @sk to see if there is a CIPSO option attached to the sock and if - * there is return the CIPSO security attributes in @secattr. This function - * requires that @sk be locked, or privately held, but it does not do any - * locking itself. Returns zero on success and negative values on failure. + * Inspect @cipso and return the security attributes in @secattr. Returns zero + * on success and negative values on failure. * */ -int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) +static int cipso_v4_getattr(const unsigned char *cipso, + struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOMSG; - struct inet_sock *sk_inet; - unsigned char *cipso_ptr; u32 doi; struct cipso_v4_doi *doi_def; - sk_inet = inet_sk(sk); - if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) - return -ENOMSG; - cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - - sizeof(struct iphdr); - ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); - if (ret_val == 0) - return ret_val; + if (cipso_v4_cache_check(cipso, cipso[1], secattr) == 0) + return 0; - doi = ntohl(get_unaligned((__be32 *)&cipso_ptr[2])); + doi = ntohl(get_unaligned((__be32 *)&cipso[2])); rcu_read_lock(); doi_def = cipso_v4_doi_search(doi); - if (doi_def == NULL) { - rcu_read_unlock(); - return -ENOMSG; - } - + if (doi_def == NULL) + goto getattr_return; /* XXX - This code assumes only one tag per CIPSO option which isn't * really a good assumption to make but since we only support the MAC * tags right now it is a safe assumption. */ - switch (cipso_ptr[6]) { + switch (cipso[6]) { case CIPSO_V4_TAG_RBITMAP: - ret_val = cipso_v4_parsetag_rbm(doi_def, - &cipso_ptr[6], - secattr); + ret_val = cipso_v4_parsetag_rbm(doi_def, &cipso[6], secattr); break; case CIPSO_V4_TAG_ENUM: - ret_val = cipso_v4_parsetag_enum(doi_def, - &cipso_ptr[6], - secattr); + ret_val = cipso_v4_parsetag_enum(doi_def, &cipso[6], secattr); break; case CIPSO_V4_TAG_RANGE: - ret_val = cipso_v4_parsetag_rng(doi_def, - &cipso_ptr[6], - secattr); + ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr); break; } - rcu_read_unlock(); +getattr_return: + rcu_read_unlock(); return ret_val; } /** + * cipso_v4_sock_getattr - Get the security attributes from a sock + * @sk: the sock + * @secattr: the security attributes + * + * Description: + * Query @sk to see if there is a CIPSO option attached to the sock and if + * there is return the CIPSO security attributes in @secattr. This function + * requires that @sk be locked, or privately held, but it does not do any + * locking itself. Returns zero on success and negative values on failure. + * + */ +int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) +{ + struct ip_options *opt; + + opt = inet_sk(sk)->opt; + if (opt == NULL || opt->cipso == 0) + return -ENOMSG; + + return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr), + secattr); +} + +/** * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option * @skb: the packet * @secattr: the security attributes @@ -1905,45 +1912,7 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) int cipso_v4_skbuff_getattr(const struct sk_buff *skb, struct netlbl_lsm_secattr *secattr) { - int ret_val = -ENOMSG; - unsigned char *cipso_ptr; - u32 doi; - struct cipso_v4_doi *doi_def; - - cipso_ptr = CIPSO_V4_OPTPTR(skb); - if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0) - return 0; - - doi = ntohl(get_unaligned((__be32 *)&cipso_ptr[2])); - rcu_read_lock(); - doi_def = cipso_v4_doi_search(doi); - if (doi_def == NULL) - goto skbuff_getattr_return; - - /* XXX - This code assumes only one tag per CIPSO option which isn't - * really a good assumption to make but since we only support the MAC - * tags right now it is a safe assumption. */ - switch (cipso_ptr[6]) { - case CIPSO_V4_TAG_RBITMAP: - ret_val = cipso_v4_parsetag_rbm(doi_def, - &cipso_ptr[6], - secattr); - break; - case CIPSO_V4_TAG_ENUM: - ret_val = cipso_v4_parsetag_enum(doi_def, - &cipso_ptr[6], - secattr); - break; - case CIPSO_V4_TAG_RANGE: - ret_val = cipso_v4_parsetag_rng(doi_def, - &cipso_ptr[6], - secattr); - break; - } - -skbuff_getattr_return: - rcu_read_unlock(); - return ret_val; + return cipso_v4_getattr(CIPSO_V4_OPTPTR(skb), secattr); } /* diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5dbe5803b7d5..55d199e4ae21 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -203,8 +203,6 @@ static void inetdev_destroy(struct in_device *in_dev) ASSERT_RTNL(); dev = in_dev->dev; - if (dev == &loopback_dev) - return; in_dev->dead = 1; @@ -420,7 +418,7 @@ struct in_device *inetdev_by_index(int ifindex) struct net_device *dev; struct in_device *in_dev = NULL; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifindex); + dev = __dev_get_by_index(&init_net, ifindex); if (dev) in_dev = in_dev_get(dev); read_unlock(&dev_base_lock); @@ -506,7 +504,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) goto errout; } - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(&init_net, ifm->ifa_index); if (dev == NULL) { err = -ENODEV; goto errout; @@ -628,7 +626,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) *colon = 0; #ifdef CONFIG_KMOD - dev_load(ifr.ifr_name); + dev_load(&init_net, ifr.ifr_name); #endif switch (cmd) { @@ -669,7 +667,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); ret = -ENODEV; - if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) goto done; if (colon) @@ -909,7 +907,7 @@ no_in_dev: */ read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; @@ -988,7 +986,7 @@ __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((in_dev = __in_dev_get_rcu(dev))) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) @@ -1051,6 +1049,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + ASSERT_RTNL(); if (!in_dev) { @@ -1058,7 +1059,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, in_dev = inetdev_init(dev); if (!in_dev) return notifier_from_errno(-ENOMEM); - if (dev == &loopback_dev) { + if (dev->flags & IFF_LOOPBACK) { IN_DEV_CONF_SET(in_dev, NOXFRM, 1); IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); } @@ -1074,7 +1075,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, case NETDEV_UP: if (dev->mtu < 68) break; - if (dev == &loopback_dev) { + if (dev->flags & IFF_LOOPBACK) { struct in_ifaddr *ifa; if ((ifa = inet_alloc_ifa()) != NULL) { ifa->ifa_local = @@ -1182,7 +1183,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) s_ip_idx = ip_idx = cb->args[1]; idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -1241,7 +1242,7 @@ static void devinet_copy_dflt_conf(int i) struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct in_device *in_dev; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -1330,7 +1331,7 @@ void inet_forward_change(void) IPV4_DEVCONF_DFLT(FORWARDING) = on; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct in_device *in_dev; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 98767a4f1185..6b1a31a74cf2 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -8,6 +8,7 @@ #include <linux/kernel.h> #include <linux/pfkeyv2.h> #include <linux/random.h> +#include <linux/spinlock.h> #include <net/icmp.h> #include <net/protocol.h> #include <net/udp.h> @@ -15,7 +16,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - struct iphdr *top_iph; struct ip_esp_hdr *esph; struct crypto_blkcipher *tfm; struct blkcipher_desc desc; @@ -27,9 +27,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int alen; int nfrags; - /* Strip IP+ESP header. */ - __skb_pull(skb, skb_transport_offset(skb)); - /* Now skb is pure payload to encrypt */ + /* skb is pure payload to encrypt */ err = -ENOMEM; @@ -59,12 +57,12 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) tail[clen - skb->len - 2] = (clen - skb->len) - 2; pskb_put(skb, trailer, clen - skb->len); - __skb_push(skb, skb->data - skb_network_header(skb)); - top_iph = ip_hdr(skb); - esph = (struct ip_esp_hdr *)(skb_network_header(skb) + - top_iph->ihl * 4); - top_iph->tot_len = htons(skb->len + alen); - *(skb_tail_pointer(trailer) - 1) = top_iph->protocol; + skb_push(skb, -skb_network_offset(skb)); + esph = ip_esp_hdr(skb); + *(skb_tail_pointer(trailer) - 1) = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_ESP; + + spin_lock_bh(&x->lock); /* this is non-NULL only with UDP Encapsulation */ if (x->encap) { @@ -75,7 +73,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) uh = (struct udphdr *)esph; uh->source = encap->encap_sport; uh->dest = encap->encap_dport; - uh->len = htons(skb->len + alen - top_iph->ihl*4); + uh->len = htons(skb->len + alen - skb_transport_offset(skb)); uh->check = 0; switch (encap->encap_type) { @@ -90,13 +88,11 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) break; } - top_iph->protocol = IPPROTO_UDP; - } else - top_iph->protocol = IPPROTO_ESP; + *skb_mac_header(skb) = IPPROTO_UDP; + } esph->spi = x->id.spi; - esph->seq_no = htonl(++x->replay.oseq); - xfrm_aevent_doreplay(x); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); if (esp->conf.ivlen) { if (unlikely(!esp->conf.ivinitted)) { @@ -112,7 +108,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) if (unlikely(nfrags > ESP_NUM_FAST_SG)) { sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); if (!sg) - goto error; + goto unlock; } skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); @@ -121,7 +117,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } while (0); if (unlikely(err)) - goto error; + goto unlock; if (esp->conf.ivlen) { memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); @@ -134,7 +130,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); } - ip_send_check(top_iph); +unlock: + spin_unlock_bh(&x->lock); error: return err; @@ -155,7 +152,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) struct sk_buff *trailer; int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); int alen = esp->auth.icv_trunc_len; - int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen; + int elen = skb->len - sizeof(*esph) - esp->conf.ivlen - alen; int nfrags; int ihl; u8 nexthdr[2]; @@ -163,7 +160,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) int padlen; int err; - if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr))) + if (!pskb_may_pull(skb, sizeof(*esph))) goto out; if (elen <= 0 || (elen & (blksize-1))) @@ -191,7 +188,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; - esph = (struct ip_esp_hdr*)skb->data; + esph = (struct ip_esp_hdr *)skb->data; /* Get ivec. This can be wrong, check against another impls. */ if (esp->conf.ivlen) @@ -204,7 +201,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) if (!sg) goto out; } - skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen); + skb_to_sgvec(skb, sg, sizeof(*esph) + esp->conf.ivlen, elen); err = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); @@ -256,17 +253,15 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ - if (x->props.mode == XFRM_MODE_TRANSPORT || - x->props.mode == XFRM_MODE_BEET) + if (x->props.mode == XFRM_MODE_TRANSPORT) skb->ip_summed = CHECKSUM_UNNECESSARY; } - iph->protocol = nexthdr[1]; pskb_trim(skb, skb->len - alen - padlen - 2); __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen); skb_set_transport_header(skb, -ihl); - return 0; + return nexthdr[1]; out: return -EINVAL; @@ -343,11 +338,6 @@ static int esp_init_state(struct xfrm_state *x) struct crypto_blkcipher *tfm; u32 align; - /* null auth and encryption can have zero length keys */ - if (x->aalg) { - if (x->aalg->alg_key_len > 512) - goto error; - } if (x->ealg == NULL) goto error; @@ -359,15 +349,14 @@ static int esp_init_state(struct xfrm_state *x) struct xfrm_algo_desc *aalg_desc; struct crypto_hash *hash; - esp->auth.key = x->aalg->alg_key; - esp->auth.key_len = (x->aalg->alg_key_len+7)/8; hash = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(hash)) goto error; esp->auth.tfm = hash; - if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len)) + if (crypto_hash_setkey(hash, x->aalg->alg_key, + (x->aalg->alg_key_len + 7) / 8)) goto error; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); @@ -389,8 +378,7 @@ static int esp_init_state(struct xfrm_state *x) if (!esp->auth.work_icv) goto error; } - esp->conf.key = x->ealg->alg_key; - esp->conf.key_len = (x->ealg->alg_key_len+7)/8; + tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) goto error; @@ -403,7 +391,8 @@ static int esp_init_state(struct xfrm_state *x) goto error; esp->conf.ivinitted = 0; } - if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) + if (crypto_blkcipher_setkey(tfm, x->ealg->alg_key, + (x->ealg->alg_key_len + 7) / 8)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; if (x->props.mode == XFRM_MODE_TUNNEL) @@ -443,6 +432,7 @@ static struct xfrm_type esp_type = .description = "ESP4", .owner = THIS_MODULE, .proto = IPPROTO_ESP, + .flags = XFRM_TYPE_REPLAY_PROT, .init_state = esp_init_state, .destructor = esp_destroy, .get_mtu = esp4_get_mtu, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eff6bce453ee..78b514ba1414 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -49,6 +49,8 @@ #define FFprint(a...) printk(KERN_DEBUG a) +static struct sock *fibnl; + #ifndef CONFIG_IP_MULTIPLE_TABLES struct fib_table *ip_fib_local_table; @@ -334,7 +336,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt, colon = strchr(devname, ':'); if (colon) *colon = 0; - dev = __dev_get_by_name(devname); + dev = __dev_get_by_name(&init_net, devname); if (!dev) return -ENODEV; cfg->fc_oif = dev->ifindex; @@ -487,7 +489,7 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, } nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { - switch (attr->nla_type) { + switch (nla_type(attr)) { case RTA_DST: cfg->fc_dst = nla_get_be32(attr); break; @@ -784,17 +786,12 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) } } -static void nl_fib_input(struct sock *sk, int len) +static void nl_fib_input(struct sk_buff *skb) { - struct sk_buff *skb = NULL; - struct nlmsghdr *nlh = NULL; struct fib_result_nl *frn; - u32 pid; + struct nlmsghdr *nlh; struct fib_table *tb; - - skb = skb_dequeue(&sk->sk_receive_queue); - if (skb == NULL) - return; + u32 pid; nlh = nlmsg_hdr(skb); if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || @@ -811,13 +808,13 @@ static void nl_fib_input(struct sock *sk, int len) pid = NETLINK_CB(skb).pid; /* pid of sending process */ NETLINK_CB(skb).pid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ - netlink_unicast(sk, skb, pid, MSG_DONTWAIT); + netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT); } static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL, - THIS_MODULE); + fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, + nl_fib_input, NULL, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) @@ -860,6 +857,9 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2); return NOTIFY_DONE; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 9ad1d9ff9ce7..527a6e0af5b6 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -35,6 +35,7 @@ #include <linux/netlink.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> @@ -1038,24 +1039,8 @@ static const struct seq_operations fib_seq_ops = { static int fib_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct fib_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &fib_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &fib_seq_ops, + sizeof(struct fib_iter_state)); } static const struct file_operations fib_seq_fops = { @@ -1068,13 +1053,13 @@ static const struct file_operations fib_seq_fops = { int __init fib_proc_init(void) { - if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops)) + if (!proc_net_fops_create(&init_net, "route", S_IRUGO, &fib_seq_fops)) return -ENOMEM; return 0; } void __init fib_proc_exit(void) { - proc_net_remove("route"); + proc_net_remove(&init_net, "route"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 2a947840210e..f16839c6a721 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -76,8 +76,6 @@ static struct fib4_rule local_rule = { }, }; -static LIST_HEAD(fib4_rules); - #ifdef CONFIG_NET_CLS_ROUTE u32 fib_rules_tclass(struct fib_result *res) { @@ -279,9 +277,9 @@ static u32 fib4_rule_default_pref(void) struct list_head *pos; struct fib_rule *rule; - if (!list_empty(&fib4_rules)) { - pos = fib4_rules.next; - if (pos->next != &fib4_rules) { + if (!list_empty(&fib4_rules_ops.rules_list)) { + pos = fib4_rules_ops.rules_list.next; + if (pos->next != &fib4_rules_ops.rules_list) { rule = list_entry(pos->next, struct fib_rule, list); if (rule->pref) return rule->pref - 1; @@ -317,15 +315,15 @@ static struct fib_rules_ops fib4_rules_ops = { .flush_cache = fib4_rule_flush_cache, .nlgroup = RTNLGRP_IPV4_RULE, .policy = fib4_rule_policy, - .rules_list = &fib4_rules, + .rules_list = LIST_HEAD_INIT(fib4_rules_ops.rules_list), .owner = THIS_MODULE, }; void __init fib4_rules_init(void) { - list_add_tail(&local_rule.common.list, &fib4_rules); - list_add_tail(&main_rule.common.list, &fib4_rules); - list_add_tail(&default_rule.common.list, &fib4_rules); + list_add_tail(&local_rule.common.list, &fib4_rules_ops.rules_list); + list_add_tail(&main_rule.common.list, &fib4_rules_ops.rules_list); + list_add_tail(&default_rule.common.list, &fib4_rules_ops.rules_list); fib_rules_register(&fib4_rules_ops); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c434119deb52..1351a2617dce 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -533,7 +533,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, return -EINVAL; if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; - if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) + if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) return -ENETDOWN; @@ -743,7 +743,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) int remaining; nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla->nla_type; + int type = nla_type(nla); if (type) { if (type > RTAX_MAX) @@ -799,7 +799,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); + nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 9ca786a6fd3c..81a8285d6d6a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -73,6 +73,7 @@ #include <linux/netlink.h> #include <linux/init.h> #include <linux/list.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> @@ -85,23 +86,14 @@ #define MAX_STAT_DEPTH 32 #define KEYLENGTH (8*sizeof(t_key)) -#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l)) -#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset)) typedef unsigned int t_key; #define T_TNODE 0 #define T_LEAF 1 #define NODE_TYPE_MASK 0x1UL -#define NODE_PARENT(node) \ - ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK))) - #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) -#define NODE_SET_PARENT(node, ptr) \ - rcu_assign_pointer((node)->parent, \ - ((unsigned long)(ptr)) | NODE_TYPE(node)) - #define IS_TNODE(n) (!(n->parent & T_LEAF)) #define IS_LEAF(n) (n->parent & T_LEAF) @@ -174,6 +166,19 @@ static void tnode_free(struct tnode *tn); static struct kmem_cache *fn_alias_kmem __read_mostly; static struct trie *trie_local = NULL, *trie_main = NULL; +static inline struct tnode *node_parent(struct node *node) +{ + struct tnode *ret; + + ret = (struct tnode *)(node->parent & ~NODE_TYPE_MASK); + return rcu_dereference(ret); +} + +static inline void node_set_parent(struct node *node, struct tnode *ptr) +{ + rcu_assign_pointer(node->parent, + (unsigned long)ptr | NODE_TYPE(node)); +} /* rcu_read_lock needs to be hold by caller from readside */ @@ -189,6 +194,11 @@ static inline int tnode_child_length(const struct tnode *tn) return 1 << tn->bits; } +static inline t_key mask_pfx(t_key k, unsigned short l) +{ + return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); +} + static inline t_key tkey_extract_bits(t_key a, int offset, int bits) { if (offset < KEYLENGTH) @@ -446,7 +456,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w tn->full_children++; if (n) - NODE_SET_PARENT(n, tn); + node_set_parent(n, tn); rcu_assign_pointer(tn->child[i], n); } @@ -481,7 +491,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) continue; /* compress one level */ - NODE_SET_PARENT(n, NULL); + node_set_parent(n, NULL); tnode_free(tn); return n; } @@ -636,7 +646,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* compress one level */ - NODE_SET_PARENT(n, NULL); + node_set_parent(n, NULL); tnode_free(tn); return n; } @@ -673,7 +683,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) inode->pos == oldtnode->pos + oldtnode->bits && inode->bits > 1) { struct tnode *left, *right; - t_key m = TKEY_GET_MASK(inode->pos, 1); + t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos; left = tnode_new(inode->key&(~m), inode->pos + 1, inode->bits - 1); @@ -961,24 +971,21 @@ fib_find_node(struct trie *t, u32 key) static struct node *trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; - t_key cindex, key; - struct tnode *tp = NULL; - - key = tn->key; - - while (tn != NULL && NODE_PARENT(tn) != NULL) { + t_key cindex, key = tn->key; + struct tnode *tp; - tp = NODE_PARENT(tn); + while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); tn = (struct tnode *) resize (t, (struct tnode *)tn); tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); - if (!NODE_PARENT(tn)) + tp = node_parent((struct node *) tn); + if (!tp) break; - - tn = NODE_PARENT(tn); + tn = tp; } + /* Handle last (top) tnode */ if (IS_TNODE(tn)) tn = (struct tnode*) resize(t, (struct tnode *)tn); @@ -1031,7 +1038,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) pos = tn->pos + tn->bits; n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); - BUG_ON(n && NODE_PARENT(n) != tn); + BUG_ON(n && node_parent(n) != tn); } else break; } @@ -1083,7 +1090,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) if (t->trie && n == NULL) { /* Case 2: n is NULL, and will just insert a new leaf */ - NODE_SET_PARENT(l, tp); + node_set_parent((struct node *)l, tp); cindex = tkey_extract_bits(key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, (struct node *)l); @@ -1114,7 +1121,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) goto err; } - NODE_SET_PARENT(tn, tp); + node_set_parent((struct node *)tn, tp); missbit = tkey_extract_bits(key, newpos, 1); put_child(t, tn, missbit, (struct node *)l); @@ -1364,7 +1371,8 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result bits = pn->bits; if (!chopped_off) - cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits); + cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), + pos, bits); n = tnode_get_child(pn, cindex); @@ -1450,8 +1458,8 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result * to find a matching prefix. */ - node_prefix = MASK_PFX(cn->key, cn->pos); - key_prefix = MASK_PFX(key, cn->pos); + node_prefix = mask_pfx(cn->key, cn->pos); + key_prefix = mask_pfx(key, cn->pos); pref_mismatch = key_prefix^node_prefix; mp = 0; @@ -1495,12 +1503,13 @@ backtrace: if (chopped_off <= pn->bits) { cindex &= ~(1 << (chopped_off-1)); } else { - if (NODE_PARENT(pn) == NULL) + struct tnode *parent = node_parent((struct node *) pn); + if (!parent) goto failed; /* Get Child's index */ - cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits); - pn = NODE_PARENT(pn); + cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits); + pn = parent; chopped_off = 0; #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -1536,7 +1545,7 @@ static int trie_leaf_remove(struct trie *t, t_key key) check_tnode(tn); n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); - BUG_ON(n && NODE_PARENT(n) != tn); + BUG_ON(n && node_parent(n) != tn); } l = (struct leaf *) n; @@ -1551,7 +1560,7 @@ static int trie_leaf_remove(struct trie *t, t_key key) t->revision++; t->size--; - tp = NODE_PARENT(n); + tp = node_parent(n); tnode_free((struct tnode *) n); if (tp) { @@ -1703,7 +1712,7 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) p = (struct tnode*) trie; /* Start */ } else - p = (struct tnode *) NODE_PARENT(c); + p = node_parent(c); while (p) { int pos, last; @@ -1740,7 +1749,7 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) up: /* No more children go up one step */ c = (struct node *) p; - p = (struct tnode *) NODE_PARENT(p); + p = node_parent(c); } return NULL; /* Ready. Root of trie */ } @@ -2043,7 +2052,7 @@ rescan: } /* Current node exhausted, pop back up */ - p = NODE_PARENT(tn); + p = node_parent((struct node *)tn); if (p) { cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; tn = p; @@ -2317,7 +2326,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) return 0; - if (!NODE_PARENT(n)) { + if (!node_parent(n)) { if (iter->trie == trie_local) seq_puts(seq, "<local>:\n"); else @@ -2326,7 +2335,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) if (IS_TNODE(n)) { struct tnode *tn = (struct tnode *) n; - __be32 prf = htonl(MASK_PFX(tn->key, tn->pos)); + __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); seq_indent(seq, iter->depth-1); seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", @@ -2370,25 +2379,8 @@ static const struct seq_operations fib_trie_seq_ops = { static int fib_trie_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &fib_trie_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; - memset(s, 0, sizeof(*s)); -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &fib_trie_seq_ops, + sizeof(struct fib_trie_iter)); } static const struct file_operations fib_trie_fops = { @@ -2491,25 +2483,8 @@ static const struct seq_operations fib_route_seq_ops = { static int fib_route_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &fib_route_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; - memset(s, 0, sizeof(*s)); -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &fib_route_seq_ops, + sizeof(struct fib_trie_iter)); } static const struct file_operations fib_route_fops = { @@ -2522,30 +2497,30 @@ static const struct file_operations fib_route_fops = { int __init fib_proc_init(void) { - if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops)) + if (!proc_net_fops_create(&init_net, "fib_trie", S_IRUGO, &fib_trie_fops)) goto out1; - if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops)) + if (!proc_net_fops_create(&init_net, "fib_triestat", S_IRUGO, &fib_triestat_fops)) goto out2; - if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops)) + if (!proc_net_fops_create(&init_net, "route", S_IRUGO, &fib_route_fops)) goto out3; return 0; out3: - proc_net_remove("fib_triestat"); + proc_net_remove(&init_net, "fib_triestat"); out2: - proc_net_remove("fib_trie"); + proc_net_remove(&init_net, "fib_trie"); out1: return -ENOMEM; } void __init fib_proc_exit(void) { - proc_net_remove("fib_trie"); - proc_net_remove("fib_triestat"); - proc_net_remove("route"); + proc_net_remove(&init_net, "fib_trie"); + proc_net_remove(&init_net, "fib_triestat"); + proc_net_remove(&init_net, "route"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 02a899bec196..272c69e106e9 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -115,6 +115,7 @@ struct icmp_bxm { * Statistics */ DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly; +DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics) __read_mostly; /* An array of errno for error messages from dest unreach. */ /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ @@ -214,8 +215,6 @@ int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly; */ struct icmp_control { - int output_entry; /* Field for increment on output */ - int input_entry; /* Field for increment on input */ void (*handler)(struct sk_buff *skb); short error; /* This ICMP is classed as an error message */ }; @@ -316,12 +315,10 @@ out: /* * Maintain the counters used in the SNMP statistics for outgoing ICMP */ -static void icmp_out_count(int type) +void icmp_out_count(unsigned char type) { - if (type <= NR_ICMP_TYPES) { - ICMP_INC_STATS(icmp_pointers[type].output_entry); - ICMP_INC_STATS(ICMP_MIB_OUTMSGS); - } + ICMPMSGOUT_INC_STATS(type); + ICMP_INC_STATS(ICMP_MIB_OUTMSGS); } /* @@ -390,7 +387,6 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) return; icmp_param->data.icmph.checksum = 0; - icmp_out_count(icmp_param->data.icmph.type); inet->tos = ip_hdr(skb)->tos; daddr = ipc.addr = rt->rt_src; @@ -517,7 +513,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct net_device *dev = NULL; if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) - dev = dev_get_by_index(rt->fl.iif); + dev = dev_get_by_index(&init_net, rt->fl.iif); if (dev) { saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); @@ -952,6 +948,7 @@ int icmp_rcv(struct sk_buff *skb) icmph = icmp_hdr(skb); + ICMPMSGIN_INC_STATS_BH(icmph->type); /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * @@ -986,7 +983,6 @@ int icmp_rcv(struct sk_buff *skb) } } - ICMP_INC_STATS_BH(icmp_pointers[icmph->type].input_entry); icmp_pointers[icmph->type].handler(skb); drop: @@ -1002,109 +998,71 @@ error: */ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { [ICMP_ECHOREPLY] = { - .output_entry = ICMP_MIB_OUTECHOREPS, - .input_entry = ICMP_MIB_INECHOREPS, .handler = icmp_discard, }, [1] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [2] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [ICMP_DEST_UNREACH] = { - .output_entry = ICMP_MIB_OUTDESTUNREACHS, - .input_entry = ICMP_MIB_INDESTUNREACHS, .handler = icmp_unreach, .error = 1, }, [ICMP_SOURCE_QUENCH] = { - .output_entry = ICMP_MIB_OUTSRCQUENCHS, - .input_entry = ICMP_MIB_INSRCQUENCHS, .handler = icmp_unreach, .error = 1, }, [ICMP_REDIRECT] = { - .output_entry = ICMP_MIB_OUTREDIRECTS, - .input_entry = ICMP_MIB_INREDIRECTS, .handler = icmp_redirect, .error = 1, }, [6] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [7] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [ICMP_ECHO] = { - .output_entry = ICMP_MIB_OUTECHOS, - .input_entry = ICMP_MIB_INECHOS, .handler = icmp_echo, }, [9] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [10] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [ICMP_TIME_EXCEEDED] = { - .output_entry = ICMP_MIB_OUTTIMEEXCDS, - .input_entry = ICMP_MIB_INTIMEEXCDS, .handler = icmp_unreach, .error = 1, }, [ICMP_PARAMETERPROB] = { - .output_entry = ICMP_MIB_OUTPARMPROBS, - .input_entry = ICMP_MIB_INPARMPROBS, .handler = icmp_unreach, .error = 1, }, [ICMP_TIMESTAMP] = { - .output_entry = ICMP_MIB_OUTTIMESTAMPS, - .input_entry = ICMP_MIB_INTIMESTAMPS, .handler = icmp_timestamp, }, [ICMP_TIMESTAMPREPLY] = { - .output_entry = ICMP_MIB_OUTTIMESTAMPREPS, - .input_entry = ICMP_MIB_INTIMESTAMPREPS, .handler = icmp_discard, }, [ICMP_INFO_REQUEST] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_DUMMY, .handler = icmp_discard, }, [ICMP_INFO_REPLY] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_DUMMY, .handler = icmp_discard, }, [ICMP_ADDRESS] = { - .output_entry = ICMP_MIB_OUTADDRMASKS, - .input_entry = ICMP_MIB_INADDRMASKS, .handler = icmp_address, }, [ICMP_ADDRESSREPLY] = { - .output_entry = ICMP_MIB_OUTADDRMASKREPS, - .input_entry = ICMP_MIB_INADDRMASKREPS, .handler = icmp_address_reply, }, }; @@ -1146,4 +1104,5 @@ void __init icmp_init(struct net_proto_family *ops) EXPORT_SYMBOL(icmp_err_convert); EXPORT_SYMBOL(icmp_send); EXPORT_SYMBOL(icmp_statistics); +EXPORT_SYMBOL(icmpmsg_statistics); EXPORT_SYMBOL(xrlim_allow); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a646409c2d06..7dbc282d4f9f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -91,6 +91,7 @@ #include <linux/rtnetlink.h> #include <linux/times.h> +#include <net/net_namespace.h> #include <net/arp.h> #include <net/ip.h> #include <net/protocol.h> @@ -1694,8 +1695,8 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { #ifdef CONFIG_IP_MULTICAST - struct in_device *in_dev = pmc->interface; struct ip_sf_list *psf; + in_dev = pmc->interface; #endif /* filter mode change */ @@ -1798,7 +1799,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, { int err; - if (iml->sflist == 0) { + if (iml->sflist == NULL) { /* any-source empty exclude case */ return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, iml->sfmode, 0, NULL, 0); @@ -2166,7 +2167,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, return -EFAULT; } for (i=0; i<copycount; i++) { - struct sockaddr_in *psin; struct sockaddr_storage ss; psin = (struct sockaddr_in *)&ss; @@ -2291,7 +2291,7 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); state->in_dev = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct in_device *in_dev; in_dev = in_dev_get(state->dev); if (!in_dev) @@ -2410,23 +2410,8 @@ static const struct seq_operations igmp_mc_seq_ops = { static int igmp_mc_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct igmp_mc_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - rc = seq_open(file, &igmp_mc_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &igmp_mc_seq_ops, + sizeof(struct igmp_mc_iter_state)); } static const struct file_operations igmp_mc_seq_fops = { @@ -2453,7 +2438,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) state->idev = NULL; state->im = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct in_device *idev; idev = in_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2584,23 +2569,8 @@ static const struct seq_operations igmp_mcf_seq_ops = { static int igmp_mcf_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct igmp_mcf_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - rc = seq_open(file, &igmp_mcf_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &igmp_mcf_seq_ops, + sizeof(struct igmp_mcf_iter_state)); } static const struct file_operations igmp_mcf_seq_fops = { @@ -2613,8 +2583,8 @@ static const struct file_operations igmp_mcf_seq_fops = { int __init igmp_mc_proc_init(void) { - proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops); - proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops); + proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); return 0; } #endif diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index fbe7714f21d0..3cef12835c4b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -33,6 +33,19 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); * This array holds the first and last local port number. */ int sysctl_local_port_range[2] = { 32768, 61000 }; +DEFINE_SEQLOCK(sysctl_port_range_lock); + +void inet_get_local_port_range(int *low, int *high) +{ + unsigned seq; + do { + seq = read_seqbegin(&sysctl_port_range_lock); + + *low = sysctl_local_port_range[0]; + *high = sysctl_local_port_range[1]; + } while (read_seqretry(&sysctl_port_range_lock, seq)); +} +EXPORT_SYMBOL(inet_get_local_port_range); int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) @@ -77,10 +90,11 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, local_bh_disable(); if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover = net_random() % (high - low) + low; + int remaining, rover, low, high; + + inet_get_local_port_range(&low, &high); + remaining = high - low; + rover = net_random() % remaining + low; do { head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index def007ec1d6f..7eb83ebed2ec 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -11,6 +11,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/fcntl.h> @@ -112,7 +113,7 @@ static int inet_csk_diag_fill(struct sock *sk, } #endif -#define EXPIRES_IN_MS(tmo) ((tmo - jiffies) * 1000 + HZ - 1) / HZ +#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) if (icsk->icsk_pending == ICSK_TIME_RETRANS) { r->idiag_timer = 1; @@ -190,7 +191,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->id.idiag_dst[0] = tw->tw_daddr; r->idiag_state = tw->tw_substate; r->idiag_timer = 3; - r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ; + r->idiag_expires = DIV_ROUND_UP(tmo * 1000, HZ); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; @@ -838,15 +839,11 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) static DEFINE_MUTEX(inet_diag_mutex); -static void inet_diag_rcv(struct sock *sk, int len) +static void inet_diag_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - mutex_lock(&inet_diag_mutex); - netlink_run_queue(sk, &qlen, &inet_diag_rcv_msg); - mutex_unlock(&inet_diag_mutex); - } while (qlen); + mutex_lock(&inet_diag_mutex); + netlink_rcv_skb(skb, &inet_diag_rcv_msg); + mutex_unlock(&inet_diag_mutex); } static DEFINE_SPINLOCK(inet_diag_register_lock); @@ -896,8 +893,8 @@ static int __init inet_diag_init(void) if (!inet_diag_table) goto out; - idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, - NULL, THIS_MODULE); + idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, + inet_diag_rcv, NULL, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; err = 0; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fb662621c54e..fac6398e4367 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -279,19 +279,18 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, int ret; if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int range = high - low; - int i; - int port; + int i, remaining, low, high, port; static u32 hint; u32 offset = hint + inet_sk_port_offset(sk); struct hlist_node *node; struct inet_timewait_sock *tw = NULL; + inet_get_local_port_range(&low, &high); + remaining = high - low; + local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; + for (i = 1; i <= remaining; i++) { + port = low + (i + offset) % remaining; head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; spin_lock(&head->lock); diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c new file mode 100644 index 000000000000..4545b64e2815 --- /dev/null +++ b/net/ipv4/inet_lro.c @@ -0,0 +1,600 @@ +/* + * linux/net/ipv4/inet_lro.c + * + * Large Receive Offload (ipv4 / tcp) + * + * (C) Copyright IBM Corp. 2007 + * + * Authors: + * Jan-Bernd Themann <themann@de.ibm.com> + * Christoph Raisch <raisch@de.ibm.com> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include <linux/module.h> +#include <linux/if_vlan.h> +#include <linux/inet_lro.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>"); +MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)"); + +#define TCP_HDR_LEN(tcph) (tcph->doff << 2) +#define IP_HDR_LEN(iph) (iph->ihl << 2) +#define TCP_PAYLOAD_LENGTH(iph, tcph) \ + (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph)) + +#define IPH_LEN_WO_OPTIONS 5 +#define TCPH_LEN_WO_OPTIONS 5 +#define TCPH_LEN_W_TIMESTAMP 8 + +#define LRO_MAX_PG_HLEN 64 + +#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; } + +/* + * Basic tcp checks whether packet is suitable for LRO + */ + +static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph, + int len, struct net_lro_desc *lro_desc) +{ + /* check ip header: don't aggregate padded frames */ + if (ntohs(iph->tot_len) != len) + return -1; + + if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0) + return -1; + + if (iph->ihl != IPH_LEN_WO_OPTIONS) + return -1; + + if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack + || tcph->rst || tcph->syn || tcph->fin) + return -1; + + if (INET_ECN_is_ce(ipv4_get_dsfield(iph))) + return -1; + + if (tcph->doff != TCPH_LEN_WO_OPTIONS + && tcph->doff != TCPH_LEN_W_TIMESTAMP) + return -1; + + /* check tcp options (only timestamp allowed) */ + if (tcph->doff == TCPH_LEN_W_TIMESTAMP) { + u32 *topt = (u32 *)(tcph + 1); + + if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) + | (TCPOPT_TIMESTAMP << 8) + | TCPOLEN_TIMESTAMP)) + return -1; + + /* timestamp should be in right order */ + topt++; + if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval), + ntohl(*topt))) + return -1; + + /* timestamp reply should not be zero */ + topt++; + if (*topt == 0) + return -1; + } + + return 0; +} + +static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) +{ + struct iphdr *iph = lro_desc->iph; + struct tcphdr *tcph = lro_desc->tcph; + u32 *p; + __wsum tcp_hdr_csum; + + tcph->ack_seq = lro_desc->tcp_ack; + tcph->window = lro_desc->tcp_window; + + if (lro_desc->tcp_saw_tstamp) { + p = (u32 *)(tcph + 1); + *(p+2) = lro_desc->tcp_rcv_tsecr; + } + + iph->tot_len = htons(lro_desc->ip_tot_len); + + iph->check = 0; + iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); + + tcph->check = 0; + tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), 0); + lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); + tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, + lro_desc->ip_tot_len - + IP_HDR_LEN(iph), IPPROTO_TCP, + lro_desc->data_csum); +} + +static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) +{ + __wsum tcp_csum; + __wsum tcp_hdr_csum; + __wsum tcp_ps_hdr_csum; + + tcp_csum = ~csum_unfold(tcph->check); + tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), tcp_csum); + + tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + len + TCP_HDR_LEN(tcph), + IPPROTO_TCP, 0); + + return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum), + tcp_ps_hdr_csum); +} + +static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, + struct iphdr *iph, struct tcphdr *tcph, + u16 vlan_tag, struct vlan_group *vgrp) +{ + int nr_frags; + u32 *ptr; + u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); + + nr_frags = skb_shinfo(skb)->nr_frags; + lro_desc->parent = skb; + lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]); + lro_desc->iph = iph; + lro_desc->tcph = tcph; + lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len; + lro_desc->tcp_ack = ntohl(tcph->ack_seq); + lro_desc->tcp_window = tcph->window; + + lro_desc->pkt_aggr_cnt = 1; + lro_desc->ip_tot_len = ntohs(iph->tot_len); + + if (tcph->doff == 8) { + ptr = (u32 *)(tcph+1); + lro_desc->tcp_saw_tstamp = 1; + lro_desc->tcp_rcv_tsval = *(ptr+1); + lro_desc->tcp_rcv_tsecr = *(ptr+2); + } + + lro_desc->mss = tcp_data_len; + lro_desc->vgrp = vgrp; + lro_desc->vlan_tag = vlan_tag; + lro_desc->active = 1; + + lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, + tcp_data_len); +} + +static inline void lro_clear_desc(struct net_lro_desc *lro_desc) +{ + memset(lro_desc, 0, sizeof(struct net_lro_desc)); +} + +static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph, + struct tcphdr *tcph, int tcp_data_len) +{ + struct sk_buff *parent = lro_desc->parent; + u32 *topt; + + lro_desc->pkt_aggr_cnt++; + lro_desc->ip_tot_len += tcp_data_len; + lro_desc->tcp_next_seq += tcp_data_len; + lro_desc->tcp_window = tcph->window; + lro_desc->tcp_ack = tcph->ack_seq; + + /* don't update tcp_rcv_tsval, would not work with PAWS */ + if (lro_desc->tcp_saw_tstamp) { + topt = (u32 *) (tcph + 1); + lro_desc->tcp_rcv_tsecr = *(topt + 2); + } + + lro_desc->data_csum = csum_block_add(lro_desc->data_csum, + lro_tcp_data_csum(iph, tcph, + tcp_data_len), + parent->len); + + parent->len += tcp_data_len; + parent->data_len += tcp_data_len; + if (tcp_data_len > lro_desc->mss) + lro_desc->mss = tcp_data_len; +} + +static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb, + struct iphdr *iph, struct tcphdr *tcph) +{ + struct sk_buff *parent = lro_desc->parent; + int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); + + lro_add_common(lro_desc, iph, tcph, tcp_data_len); + + skb_pull(skb, (skb->len - tcp_data_len)); + parent->truesize += skb->truesize; + + if (lro_desc->last_skb) + lro_desc->last_skb->next = skb; + else + skb_shinfo(parent)->frag_list = skb; + + lro_desc->last_skb = skb; +} + +static void lro_add_frags(struct net_lro_desc *lro_desc, + int len, int hlen, int truesize, + struct skb_frag_struct *skb_frags, + struct iphdr *iph, struct tcphdr *tcph) +{ + struct sk_buff *skb = lro_desc->parent; + int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); + + lro_add_common(lro_desc, iph, tcph, tcp_data_len); + + skb->truesize += truesize; + + skb_frags[0].page_offset += hlen; + skb_frags[0].size -= hlen; + + while (tcp_data_len > 0) { + *(lro_desc->next_frag) = *skb_frags; + tcp_data_len -= skb_frags->size; + lro_desc->next_frag++; + skb_frags++; + skb_shinfo(skb)->nr_frags++; + } +} + +static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, + struct iphdr *iph, + struct tcphdr *tcph) +{ + if ((lro_desc->iph->saddr != iph->saddr) + || (lro_desc->iph->daddr != iph->daddr) + || (lro_desc->tcph->source != tcph->source) + || (lro_desc->tcph->dest != tcph->dest)) + return -1; + return 0; +} + +static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr, + struct net_lro_desc *lro_arr, + struct iphdr *iph, + struct tcphdr *tcph) +{ + struct net_lro_desc *lro_desc = NULL; + struct net_lro_desc *tmp; + int max_desc = lro_mgr->max_desc; + int i; + + for (i = 0; i < max_desc; i++) { + tmp = &lro_arr[i]; + if (tmp->active) + if (!lro_check_tcp_conn(tmp, iph, tcph)) { + lro_desc = tmp; + goto out; + } + } + + for (i = 0; i < max_desc; i++) { + if (!lro_arr[i].active) { + lro_desc = &lro_arr[i]; + goto out; + } + } + + LRO_INC_STATS(lro_mgr, no_desc); +out: + return lro_desc; +} + +static void lro_flush(struct net_lro_mgr *lro_mgr, + struct net_lro_desc *lro_desc) +{ + if (lro_desc->pkt_aggr_cnt > 1) + lro_update_tcp_ip_header(lro_desc); + + skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; + + if (lro_desc->vgrp) { + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + vlan_hwaccel_receive_skb(lro_desc->parent, + lro_desc->vgrp, + lro_desc->vlan_tag); + else + vlan_hwaccel_rx(lro_desc->parent, + lro_desc->vgrp, + lro_desc->vlan_tag); + + } else { + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + netif_receive_skb(lro_desc->parent); + else + netif_rx(lro_desc->parent); + } + + LRO_INC_STATS(lro_mgr, flushed); + lro_clear_desc(lro_desc); +} + +static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, + struct vlan_group *vgrp, u16 vlan_tag, void *priv) +{ + struct net_lro_desc *lro_desc; + struct iphdr *iph; + struct tcphdr *tcph; + u64 flags; + int vlan_hdr_len = 0; + + if (!lro_mgr->get_skb_header + || lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, + &flags, priv)) + goto out; + + if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) + goto out; + + lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (!lro_desc) + goto out; + + if ((skb->protocol == htons(ETH_P_8021Q)) + && !test_bit(LRO_F_EXTRACT_VLAN_ID, &lro_mgr->features)) + vlan_hdr_len = VLAN_HLEN; + + if (!lro_desc->active) { /* start new lro session */ + if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL)) + goto out; + + skb->ip_summed = lro_mgr->ip_summed_aggr; + lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp); + LRO_INC_STATS(lro_mgr, aggregated); + return 0; + } + + if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) + goto out2; + + if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc)) + goto out2; + + lro_add_packet(lro_desc, skb, iph, tcph); + LRO_INC_STATS(lro_mgr, aggregated); + + if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) || + lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) + lro_flush(lro_mgr, lro_desc); + + return 0; + +out2: /* send aggregated SKBs to stack */ + lro_flush(lro_mgr, lro_desc); + +out: /* Original SKB has to be posted to stack */ + skb->ip_summed = lro_mgr->ip_summed; + return 1; +} + + +static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, + void *mac_hdr, + int hlen, __wsum sum, + u32 ip_summed) +{ + struct sk_buff *skb; + struct skb_frag_struct *skb_frags; + int data_len = len; + int hdr_len = min(len, hlen); + + skb = netdev_alloc_skb(lro_mgr->dev, hlen); + if (!skb) + return NULL; + + skb->len = len; + skb->data_len = len - hdr_len; + skb->truesize += true_size; + skb->tail += hdr_len; + + memcpy(skb->data, mac_hdr, hdr_len); + + skb_frags = skb_shinfo(skb)->frags; + while (data_len > 0) { + *skb_frags = *frags; + data_len -= frags->size; + skb_frags++; + frags++; + skb_shinfo(skb)->nr_frags++; + } + + skb_shinfo(skb)->frags[0].page_offset += hdr_len; + skb_shinfo(skb)->frags[0].size -= hdr_len; + + skb->ip_summed = ip_summed; + skb->csum = sum; + skb->protocol = eth_type_trans(skb, lro_mgr->dev); + return skb; +} + +static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, + struct vlan_group *vgrp, + u16 vlan_tag, void *priv, __wsum sum) +{ + struct net_lro_desc *lro_desc; + struct iphdr *iph; + struct tcphdr *tcph; + struct sk_buff *skb; + u64 flags; + void *mac_hdr; + int mac_hdr_len; + int hdr_len = LRO_MAX_PG_HLEN; + int vlan_hdr_len = 0; + + if (!lro_mgr->get_frag_header + || lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, + (void *)&tcph, &flags, priv)) { + mac_hdr = page_address(frags->page) + frags->page_offset; + goto out1; + } + + if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) + goto out1; + + hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr); + mac_hdr_len = (int)((void *)(iph) - mac_hdr); + + lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (!lro_desc) + goto out1; + + if (!lro_desc->active) { /* start new lro session */ + if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL)) + goto out1; + + skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, + hdr_len, 0, lro_mgr->ip_summed_aggr); + if (!skb) + goto out; + + if ((skb->protocol == htons(ETH_P_8021Q)) + && !test_bit(LRO_F_EXTRACT_VLAN_ID, &lro_mgr->features)) + vlan_hdr_len = VLAN_HLEN; + + iph = (void *)(skb->data + vlan_hdr_len); + tcph = (void *)((u8 *)skb->data + vlan_hdr_len + + IP_HDR_LEN(iph)); + + lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL); + LRO_INC_STATS(lro_mgr, aggregated); + return NULL; + } + + if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) + goto out2; + + if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc)) + goto out2; + + lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph); + LRO_INC_STATS(lro_mgr, aggregated); + + if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) || + lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) + lro_flush(lro_mgr, lro_desc); + + return NULL; + +out2: /* send aggregated packets to the stack */ + lro_flush(lro_mgr, lro_desc); + +out1: /* Original packet has to be posted to the stack */ + skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, + hdr_len, sum, lro_mgr->ip_summed); +out: + return skb; +} + +void lro_receive_skb(struct net_lro_mgr *lro_mgr, + struct sk_buff *skb, + void *priv) +{ + if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) { + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + netif_receive_skb(skb); + else + netif_rx(skb); + } +} +EXPORT_SYMBOL(lro_receive_skb); + +void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr, + struct sk_buff *skb, + struct vlan_group *vgrp, + u16 vlan_tag, + void *priv) +{ + if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) { + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); + else + vlan_hwaccel_rx(skb, vgrp, vlan_tag); + } +} +EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb); + +void lro_receive_frags(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, void *priv, __wsum sum) +{ + struct sk_buff *skb; + + skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0, + priv, sum); + if (!skb) + return; + + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + netif_receive_skb(skb); + else + netif_rx(skb); +} +EXPORT_SYMBOL(lro_receive_frags); + +void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, + struct vlan_group *vgrp, + u16 vlan_tag, void *priv, __wsum sum) +{ + struct sk_buff *skb; + + skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp, + vlan_tag, priv, sum); + if (!skb) + return; + + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); + else + vlan_hwaccel_rx(skb, vgrp, vlan_tag); +} +EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags); + +void lro_flush_all(struct net_lro_mgr *lro_mgr) +{ + int i; + struct net_lro_desc *lro_desc = lro_mgr->lro_arr; + + for (i = 0; i < lro_mgr->max_desc; i++) { + if (lro_desc[i].active) + lro_flush(lro_mgr, &lro_desc[i]); + } +} +EXPORT_SYMBOL(lro_flush_all); + +void lro_flush_pkt(struct net_lro_mgr *lro_mgr, + struct iphdr *iph, struct tcphdr *tcph) +{ + struct net_lro_desc *lro_desc; + + lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (lro_desc->active) + lro_flush(lro_mgr, lro_desc); +} +EXPORT_SYMBOL(lro_flush_pkt); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 2586df09b9b6..4e189e28f306 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -8,7 +8,7 @@ * From code orinally in TCP */ - +#include <linux/kernel.h> #include <net/inet_hashtables.h> #include <net/inet_timewait_sock.h> #include <net/ip.h> @@ -292,7 +292,7 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, if (timeo >= timewait_len) { slot = INET_TWDR_TWKILL_SLOTS - 1; } else { - slot = (timeo + twdr->period - 1) / twdr->period; + slot = DIV_ROUND_UP(timeo, twdr->period); if (slot >= INET_TWDR_TWKILL_SLOTS) slot = INET_TWDR_TWKILL_SLOTS - 1; } diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 8c95cf09f87a..afbf938836f5 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -105,7 +105,7 @@ int ip_forward(struct sk_buff *skb) * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ - if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr) + if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb->sp) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0231bdcb2ab7..fabb86db763b 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -292,7 +292,7 @@ static void ip_expire(unsigned long arg) if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { struct sk_buff *head = qp->fragments; /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if ((head->dev = dev_get_by_index(qp->iif)) != NULL) { + if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); dev_put(head->dev); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5c14ed63e56c..f151900efaf9 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -262,7 +262,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int int i; for (i=1; i<100; i++) { sprintf(name, "gre%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i==100) @@ -684,7 +684,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error; } - if (dev->hard_header) { + if (dev->header_ops) { gre_hlen = 0; tiph = (struct iphdr*)skb->data; } else { @@ -1063,8 +1063,9 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) */ -static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) +static int ipgre_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, unsigned len) { struct ip_tunnel *t = netdev_priv(dev); struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); @@ -1091,6 +1092,10 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned sh return -t->hlen; } +static const struct header_ops ipgre_header_ops = { + .create = ipgre_header, +}; + static int ipgre_open(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); @@ -1132,7 +1137,6 @@ static int ipgre_close(struct net_device *dev) static void ipgre_tunnel_setup(struct net_device *dev) { - SET_MODULE_OWNER(dev); dev->uninit = ipgre_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipgre_tunnel_xmit; @@ -1188,7 +1192,7 @@ static int ipgre_tunnel_init(struct net_device *dev) if (!iph->saddr) return -EINVAL; dev->flags = IFF_BROADCAST; - dev->hard_header = ipgre_header; + dev->header_ops = &ipgre_header_ops; dev->open = ipgre_open; dev->stop = ipgre_close; } @@ -1196,7 +1200,7 @@ static int ipgre_tunnel_init(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); if (tdev) { hlen = tdev->hard_header_len; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 97069399d864..41d8964591e7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -382,6 +382,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct iphdr *iph; u32 len; + if (dev->nd_net != &init_net) + goto drop; + /* When the interface is in promisc. mode, drop all the crap * that it receives, do not try to analyse it. */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0f1d7beacf78..699f06781fd8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -169,7 +169,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS); /* Be paranoid, rather than too clever. */ - if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) { + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { struct sk_buff *skb2; skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); @@ -1261,6 +1261,10 @@ int ip_push_pending_frames(struct sock *sk) skb->priority = sk->sk_priority; skb->dst = dst_clone(&rt->u.dst); + if (iph->protocol == IPPROTO_ICMP) + icmp_out_count(((struct icmphdr *) + skb_transport_header(skb))->type); + /* Netfilter gets whole the not fragmented skb. */ err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6b420aedcdcf..f51f20e487c8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -602,7 +602,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, dev_put(dev); } } else - dev = __dev_get_by_index(mreq.imr_ifindex); + dev = __dev_get_by_index(&init_net, mreq.imr_ifindex); err = -EADDRNOTAVAIL; @@ -659,7 +659,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; } msf = kmalloc(optlen, GFP_KERNEL); - if (msf == 0) { + if (!msf) { err = -ENOBUFS; break; } @@ -816,7 +816,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; } gsf = kmalloc(optlen,GFP_KERNEL); - if (gsf == 0) { + if (!gsf) { err = -ENOBUFS; break; } @@ -836,7 +836,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, } msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); msf = kmalloc(msize,GFP_KERNEL); - if (msf == 0) { + if (!msf) { err = -ENOBUFS; goto mc_msf_out; } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index e787044a8514..0bfeb02a5f87 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -75,7 +75,6 @@ out: static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -ENOMEM; - struct iphdr *iph; struct ip_comp_hdr *ipch; if (skb_linearize_cow(skb)) @@ -84,12 +83,14 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; /* Remove ipcomp header and decompress original payload */ - iph = ip_hdr(skb); ipch = (void *)skb->data; - iph->protocol = ipch->nexthdr; skb->transport_header = skb->network_header + sizeof(*ipch); __skb_pull(skb, sizeof(*ipch)); err = ipcomp_decompress(x, skb); + if (err) + goto out; + + err = ipch->nexthdr; out: return err; @@ -98,10 +99,9 @@ out: static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) { struct ipcomp_data *ipcd = x->data; - const int ihlen = ip_hdrlen(skb); - const int plen = skb->len - ihlen; + const int plen = skb->len; int dlen = IPCOMP_SCRATCH_SIZE; - u8 *start = skb->data + ihlen; + u8 *start = skb->data; const int cpu = get_cpu(); u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); @@ -118,7 +118,7 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); put_cpu(); - pskb_trim(skb, ihlen + dlen + sizeof(struct ip_comp_hdr)); + pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); return 0; out: @@ -131,12 +131,8 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) int err; struct ip_comp_hdr *ipch; struct ipcomp_data *ipcd = x->data; - int hdr_len = 0; - struct iphdr *iph = ip_hdr(skb); - iph->tot_len = htons(skb->len); - hdr_len = iph->ihl * 4; - if ((skb->len - hdr_len) < ipcd->threshold) { + if (skb->len < ipcd->threshold) { /* Don't bother compressing */ goto out_ok; } @@ -145,25 +141,19 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) goto out_ok; err = ipcomp_compress(x, skb); - iph = ip_hdr(skb); if (err) { goto out_ok; } /* Install ipcomp header, convert into ipcomp datagram. */ - iph->tot_len = htons(skb->len); - ipch = (struct ip_comp_hdr *)((char *)iph + iph->ihl * 4); - ipch->nexthdr = iph->protocol; + ipch = ip_comp_hdr(skb); + ipch->nexthdr = *skb_mac_header(skb); ipch->flags = 0; ipch->cpi = htons((u16 )ntohl(x->id.spi)); - iph->protocol = IPPROTO_COMP; - ip_send_check(iph); - return 0; - + *skb_mac_header(skb) = IPPROTO_COMP; out_ok: - if (x->props.mode == XFRM_MODE_TUNNEL) - ip_send_check(iph); + skb_push(skb, -skb_network_offset(skb)); return 0; } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index c5b247077539..c5c107a01823 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -55,6 +55,7 @@ #include <linux/root_dev.h> #include <linux/delay.h> #include <linux/nfs_fs.h> +#include <net/net_namespace.h> #include <net/arp.h> #include <net/ip.h> #include <net/ipconfig.h> @@ -189,11 +190,15 @@ static int __init ic_open_devs(void) rtnl_lock(); /* bring loopback device up first */ - if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0) - printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name); + for_each_netdev(&init_net, dev) { + if (!(dev->flags & IFF_LOOPBACK)) + continue; + if (dev_change_flags(dev, dev->flags | IFF_UP) < 0) + printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name); + } - for_each_netdev(dev) { - if (dev == &loopback_dev) + for_each_netdev(&init_net, dev) { + if (dev->flags & IFF_LOOPBACK) continue; if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : (!(dev->flags & IFF_LOOPBACK) && @@ -425,6 +430,9 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt unsigned char *sha, *tha; /* s for "source", t for "target" */ struct ic_device *d; + if (dev->nd_net != &init_net) + goto drop; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; @@ -749,8 +757,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d /* Chain packet down the line... */ skb->dev = dev; skb->protocol = htons(ETH_P_IP); - if ((dev->hard_header && - dev->hard_header(skb, dev, ntohs(skb->protocol), dev->broadcast, dev->dev_addr, skb->len) < 0) || + if (dev_hard_header(skb, dev, ntohs(skb->protocol), + dev->broadcast, dev->dev_addr, skb->len) < 0 || dev_queue_xmit(skb) < 0) printk("E"); } @@ -834,6 +842,9 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str struct ic_device *d; int len, ext_len; + if (dev->nd_net != &init_net) + goto drop; + /* Perform verifications before taking the lock. */ if (skb->pkt_type == PACKET_OTHERHOST) goto drop; @@ -1253,7 +1264,7 @@ static int __init ip_auto_config(void) __be32 addr; #ifdef CONFIG_PROC_FS - proc_net_fops_create("pnp", S_IRUGO, &pnp_seq_fops); + proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); #endif /* CONFIG_PROC_FS */ if (!ic_enable) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 396437242a1b..5cd5bbe1379a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -225,7 +225,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c int i; for (i=1; i<100; i++) { sprintf(name, "tunl%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i==100) @@ -237,7 +237,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c return NULL; nt = netdev_priv(dev); - SET_MODULE_OWNER(dev); dev->init = ipip_tunnel_init; nt->parms = *parms; @@ -775,7 +774,6 @@ static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) static void ipip_tunnel_setup(struct net_device *dev) { - SET_MODULE_OWNER(dev); dev->uninit = ipip_tunnel_uninit; dev->hard_start_xmit = ipip_tunnel_xmit; dev->get_stats = ipip_tunnel_get_stats; @@ -822,7 +820,7 @@ static int ipip_tunnel_init(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7003cc1b7fe2..37bb497d92af 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -49,6 +49,7 @@ #include <linux/mroute.h> #include <linux/init.h> #include <linux/if_ether.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> #include <linux/skbuff.h> @@ -124,7 +125,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) { struct net_device *dev; - dev = __dev_get_by_name("tunl0"); + dev = __dev_get_by_name(&init_net, "tunl0"); if (dev) { int err; @@ -148,7 +149,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) dev = NULL; - if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { + if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) { dev->flags |= IFF_MULTICAST; in_dev = __in_dev_get_rtnl(dev); @@ -1082,13 +1083,18 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; struct vif_device *v; int ct; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; v=&vif_table[0]; for (ct=0;ct<maxvif;ct++,v++) { - if (v->dev==ptr) + if (v->dev==dev) vif_delete(ct); } return NOTIFY_DONE; @@ -1708,26 +1714,8 @@ static const struct seq_operations ipmr_vif_seq_ops = { static int ipmr_vif_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &ipmr_vif_seq_ops); - if (rc) - goto out_kfree; - - s->ct = 0; - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; - + return seq_open_private(file, &ipmr_vif_seq_ops, + sizeof(struct ipmr_vif_iter)); } static const struct file_operations ipmr_vif_fops = { @@ -1871,25 +1859,8 @@ static const struct seq_operations ipmr_mfc_seq_ops = { static int ipmr_mfc_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &ipmr_mfc_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; - + return seq_open_private(file, &ipmr_mfc_seq_ops, + sizeof(struct ipmr_mfc_iter)); } static const struct file_operations ipmr_mfc_fops = { @@ -1922,7 +1893,7 @@ void __init ip_mr_init(void) ipmr_expire_timer.function=ipmr_expire_process; register_netdevice_notifier(&ip_mr_notifier); #ifdef CONFIG_PROC_FS - proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops); - proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops); + proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops); + proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops); #endif } diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 8d6901d4e94f..341474eefa55 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -25,6 +25,7 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> +#include <net/net_namespace.h> #include <net/protocol.h> #include <net/tcp.h> #include <asm/system.h> @@ -616,12 +617,12 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, int ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ - proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops); + proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); return 0; } void ip_vs_app_cleanup(void) { - proc_net_remove("ip_vs_app"); + proc_net_remove(&init_net, "ip_vs_app"); } diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index d612a6a5d957..4b702f708d30 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -35,6 +35,7 @@ #include <linux/jhash.h> #include <linux/random.h> +#include <net/net_namespace.h> #include <net/ip_vs.h> @@ -922,7 +923,7 @@ int ip_vs_conn_init(void) rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops); + proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); @@ -938,6 +939,6 @@ void ip_vs_conn_cleanup(void) /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); - proc_net_remove("ip_vs_conn"); + proc_net_remove(&init_net, "ip_vs_conn"); vfree(ip_vs_conn_tab); } diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index f005a2f929f4..fbca2a2ff29f 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -961,7 +961,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, * ... don't know why 1st test DOES NOT include 2nd (?) */ if (unlikely(skb->pkt_type != PACKET_HOST - || skb->dev == &loopback_dev || skb->sk)) { + || skb->dev->flags & IFF_LOOPBACK || skb->sk)) { IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", skb->pkt_type, ip_hdr(skb)->protocol, diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index f656d41d8d41..7345fc252a23 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -35,6 +35,7 @@ #include <linux/netfilter_ipv4.h> #include <linux/mutex.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/route.h> #include <net/sock.h> @@ -1791,24 +1792,8 @@ static const struct seq_operations ip_vs_info_seq_ops = { static int ip_vs_info_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &ip_vs_info_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &ip_vs_info_seq_ops, + sizeof(struct ip_vs_iter)); } static const struct file_operations ip_vs_info_fops = { @@ -2356,8 +2341,8 @@ int ip_vs_control_init(void) return ret; } - proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops); + proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); + proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); sysctl_header = register_sysctl_table(vs_root_table); @@ -2390,8 +2375,8 @@ void ip_vs_control_cleanup(void) cancel_work_sync(&defense_work.work); ip_vs_kill_estimator(&ip_vs_stats); unregister_sysctl_table(sysctl_header); - proc_net_remove("ip_vs_stats"); - proc_net_remove("ip_vs"); + proc_net_remove(&init_net, "ip_vs_stats"); + proc_net_remove(&init_net, "ip_vs"); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); } diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 6225acac7a3b..6a1fec416eaf 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -50,6 +50,7 @@ #include <linux/sysctl.h> /* for proc_net_create/proc_net_remove */ #include <linux/proc_fs.h> +#include <net/net_namespace.h> #include <net/ip_vs.h> @@ -843,7 +844,7 @@ static int __init ip_vs_lblcr_init(void) INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); sysctl_header = register_sysctl_table(lblcr_root_table); #ifdef CONFIG_IP_VS_LBLCR_DEBUG - proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); + proc_net_create(&init_net, "ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); #endif return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); } @@ -852,7 +853,7 @@ static int __init ip_vs_lblcr_init(void) static void __exit ip_vs_lblcr_cleanup(void) { #ifdef CONFIG_IP_VS_LBLCR_DEBUG - proc_net_remove("ip_vs_lblcr"); + proc_net_remove(&init_net, "ip_vs_lblcr"); #endif unregister_sysctl_table(sysctl_header); unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 356f067484e3..1960747f354c 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -387,7 +387,7 @@ static int set_mcast_if(struct sock *sk, char *ifname) struct net_device *dev; struct inet_sock *inet = inet_sk(sk); - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) @@ -412,7 +412,7 @@ static int set_sync_mesg_maxlen(int sync_state) int num; if (sync_state == IP_VS_STATE_MASTER) { - if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) return -ENODEV; num = (dev->mtu - sizeof(struct iphdr) - @@ -423,7 +423,7 @@ static int set_sync_mesg_maxlen(int sync_state) IP_VS_DBG(7, "setting the maximum length of sync sending " "message %d.\n", sync_send_mesg_maxlen); } else if (sync_state == IP_VS_STATE_BACKUP) { - if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) return -ENODEV; sync_recv_mesg_maxlen = dev->mtu - @@ -451,7 +451,7 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -472,7 +472,7 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) __be32 addr; struct sockaddr_in sin; - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) return -ENODEV; addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 702d94db19b9..23cbfc7c80fd 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -24,6 +24,7 @@ #include <linux/proc_fs.h> #include <linux/security.h> #include <linux/mutex.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/route.h> @@ -249,10 +250,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) if (entry->info->indev && entry->skb->dev) { pmsg->hw_type = entry->skb->dev->type; - if (entry->skb->dev->hard_header_parse) - pmsg->hw_addrlen = - entry->skb->dev->hard_header_parse(entry->skb, - pmsg->hw_addr); + pmsg->hw_addrlen = dev_parse_header(entry->skb, + pmsg->hw_addr); } if (data_len) @@ -476,7 +475,7 @@ ipq_dev_drop(int ifindex) #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) static inline void -ipq_rcv_skb(struct sk_buff *skb) +__ipq_rcv_skb(struct sk_buff *skb) { int status, type, pid, flags, nlmsglen, skblen; struct nlmsghdr *nlh; @@ -534,19 +533,10 @@ ipq_rcv_skb(struct sk_buff *skb) } static void -ipq_rcv_sk(struct sock *sk, int len) +ipq_rcv_skb(struct sk_buff *skb) { - struct sk_buff *skb; - unsigned int qlen; - mutex_lock(&ipqnl_mutex); - - for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { - skb = skb_dequeue(&sk->sk_receive_queue); - ipq_rcv_skb(skb); - kfree_skb(skb); - } - + __ipq_rcv_skb(skb); mutex_unlock(&ipqnl_mutex); } @@ -556,6 +546,9 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) ipq_dev_drop(dev->ifindex); @@ -575,7 +568,7 @@ ipq_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL && n->pid) { write_lock_bh(&queue_lock); - if (n->pid == peer_pid) + if ((n->net == &init_net) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -667,14 +660,14 @@ static int __init ip_queue_init(void) struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, - NULL, THIS_MODULE); + ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, + ipq_rcv_skb, NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } - proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); + proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); if (proc) proc->owner = THIS_MODULE; else { @@ -695,8 +688,7 @@ static int __init ip_queue_init(void) cleanup_sysctl: unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); - + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); cleanup_ipqnl: sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); @@ -715,7 +707,7 @@ static void __exit ip_queue_fini(void) unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 69bd362b5fa2..27f14e1ebd8b 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -25,6 +25,7 @@ #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> #include <net/netfilter/nf_conntrack.h> +#include <net/net_namespace.h> #include <net/checksum.h> #define CLUSTERIP_VERSION "0.8" @@ -400,7 +401,7 @@ checkentry(const char *tablename, return false; } - dev = dev_get_by_name(e->ip.iniface); + dev = dev_get_by_name(&init_net, e->ip.iniface); if (!dev) { printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); return false; @@ -726,7 +727,7 @@ static int __init ipt_clusterip_init(void) goto cleanup_target; #ifdef CONFIG_PROC_FS - clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net); + clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); if (!clusterip_procdir) { printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); ret = -ENOMEM; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 7c4e4be7c8b3..3e0b562b2db7 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -125,6 +125,9 @@ static int masq_device_event(struct notifier_block *this, { const struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for conntracks which were associated with that device, diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 6ca43e4ca7e3..c636d6d63574 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -409,7 +409,8 @@ static int __init ipt_ulog_init(void) for (i = 0; i < ULOG_MAXNLGROUPS; i++) setup_timer(&ulog_buffers[i].timer, ulog_timer, i); - nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, + nflognl = netlink_kernel_create(&init_net, + NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, NULL, THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 6d0c0f7364ad..11d39fb5f38b 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -24,6 +24,7 @@ #include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/inet.h> +#include <net/net_namespace.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_recent.h> @@ -380,25 +381,14 @@ static const struct seq_operations recent_seq_ops = { static int recent_seq_open(struct inode *inode, struct file *file) { struct proc_dir_entry *pde = PDE(inode); - struct seq_file *seq; struct recent_iter_state *st; - int ret; - st = kzalloc(sizeof(*st), GFP_KERNEL); + st = __seq_open_private(file, &recent_seq_ops, sizeof(*st)); if (st == NULL) return -ENOMEM; - ret = seq_open(file, &recent_seq_ops); - if (ret) { - kfree(st); - goto out; - } - st->table = pde->data; - seq = file->private_data; - seq->private = st; -out: - return ret; + return 0; } static ssize_t recent_proc_write(struct file *file, const char __user *input, @@ -487,7 +477,7 @@ static int __init ipt_recent_init(void) #ifdef CONFIG_PROC_FS if (err) return err; - proc_dir = proc_mkdir("ipt_recent", proc_net); + proc_dir = proc_mkdir("ipt_recent", init_net.proc_net); if (proc_dir == NULL) { xt_unregister_match(&recent_match); err = -ENOMEM; @@ -501,7 +491,7 @@ static void __exit ipt_recent_exit(void) BUG_ON(!list_empty(&tables)); xt_unregister_match(&recent_match); #ifdef CONFIG_PROC_FS - remove_proc_entry("ipt_recent", proc_net); + remove_proc_entry("ipt_recent", init_net.proc_net); #endif } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index f813e02aab30..2fcb9249a8da 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -360,35 +360,32 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static int ipv4_tuple_to_nfattr(struct sk_buff *skb, +static int ipv4_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), + NLA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.u3.ip); - NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), + NLA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.u3.ip); return 0; -nfattr_failure: +nla_put_failure: return -1; } -static const size_t cta_min_ip[CTA_IP_MAX] = { - [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), - [CTA_IP_V4_DST-1] = sizeof(u_int32_t), +static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = { + [CTA_IP_V4_SRC] = { .type = NLA_U32 }, + [CTA_IP_V4_DST] = { .type = NLA_U32 }, }; -static int ipv4_nfattr_to_tuple(struct nfattr *tb[], +static int ipv4_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t) { - if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1]) + if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) return -EINVAL; - if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) - return -EINVAL; - - t->src.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); - t->dst.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]); + t->src.u3.ip = *(__be32 *)nla_data(tb[CTA_IP_V4_SRC]); + t->dst.u3.ip = *(__be32 *)nla_data(tb[CTA_IP_V4_DST]); return 0; } @@ -411,8 +408,9 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .print_conntrack = ipv4_print_conntrack, .get_l4proto = ipv4_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = ipv4_tuple_to_nfattr, - .nfattr_to_tuple = ipv4_nfattr_to_tuple, + .tuple_to_nlattr = ipv4_tuple_to_nlattr, + .nlattr_to_tuple = ipv4_nlattr_to_tuple, + .nla_policy = ipv4_nla_policy, #endif #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index b3dd5de9a258..741f3dfaa5a1 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -11,6 +11,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/percpu.h> +#include <net/net_namespace.h> #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack_core.h> @@ -173,22 +174,8 @@ static const struct seq_operations ct_seq_ops = { static int ct_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct ct_iter_state *st; - int ret; - - st = kzalloc(sizeof(struct ct_iter_state), GFP_KERNEL); - if (st == NULL) - return -ENOMEM; - ret = seq_open(file, &ct_seq_ops); - if (ret) - goto out_free; - seq = file->private_data; - seq->private = st; - return ret; -out_free: - kfree(st); - return ret; + return seq_open_private(file, &ct_seq_ops, + sizeof(struct ct_iter_state)); } static const struct file_operations ct_file_ops = { @@ -290,22 +277,8 @@ static const struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct ct_expect_iter_state *st; - int ret; - - st = kzalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL); - if (!st) - return -ENOMEM; - ret = seq_open(file, &exp_seq_ops); - if (ret) - goto out_free; - seq = file->private_data; - seq->private = st; - return ret; -out_free: - kfree(st); - return ret; + return seq_open_private(file, &exp_seq_ops, + sizeof(struct ct_expect_iter_state)); } static const struct file_operations ip_exp_file_ops = { @@ -408,16 +381,16 @@ int __init nf_conntrack_ipv4_compat_init(void) { struct proc_dir_entry *proc, *proc_exp, *proc_stat; - proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops); if (!proc) goto err1; - proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440, &ip_exp_file_ops); if (!proc_exp) goto err2; - proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat); + proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, init_net.proc_net_stat); if (!proc_stat) goto err3; @@ -427,16 +400,16 @@ int __init nf_conntrack_ipv4_compat_init(void) return 0; err3: - proc_net_remove("ip_conntrack_expect"); + proc_net_remove(&init_net, "ip_conntrack_expect"); err2: - proc_net_remove("ip_conntrack"); + proc_net_remove(&init_net, "ip_conntrack"); err1: return -ENOMEM; } void __exit nf_conntrack_ipv4_compat_fini(void) { - remove_proc_entry("ip_conntrack", proc_net_stat); - proc_net_remove("ip_conntrack_expect"); - proc_net_remove("ip_conntrack"); + remove_proc_entry("ip_conntrack", init_net.proc_net_stat); + proc_net_remove(&init_net, "ip_conntrack_expect"); + proc_net_remove(&init_net, "ip_conntrack"); } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 6593fd2c5b10..11fedc73049c 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -232,45 +232,42 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static int icmp_tuple_to_nfattr(struct sk_buff *skb, +static int icmp_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *t) { - NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), + NLA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), &t->src.u.icmp.id); - NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), + NLA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), &t->dst.u.icmp.type); - NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), + NLA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), &t->dst.u.icmp.code); return 0; -nfattr_failure: +nla_put_failure: return -1; } -static const size_t cta_min_proto[CTA_PROTO_MAX] = { - [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), - [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t), - [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t) +static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = { + [CTA_PROTO_ICMP_TYPE] = { .type = NLA_U8 }, + [CTA_PROTO_ICMP_CODE] = { .type = NLA_U8 }, + [CTA_PROTO_ICMP_ID] = { .type = NLA_U16 }, }; -static int icmp_nfattr_to_tuple(struct nfattr *tb[], +static int icmp_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *tuple) { - if (!tb[CTA_PROTO_ICMP_TYPE-1] - || !tb[CTA_PROTO_ICMP_CODE-1] - || !tb[CTA_PROTO_ICMP_ID-1]) - return -EINVAL; - - if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + if (!tb[CTA_PROTO_ICMP_TYPE] + || !tb[CTA_PROTO_ICMP_CODE] + || !tb[CTA_PROTO_ICMP_ID]) return -EINVAL; tuple->dst.u.icmp.type = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]); + *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMP_TYPE]); tuple->dst.u.icmp.code = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); + *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMP_CODE]); tuple->src.u.icmp.id = - *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); + *(__be16 *)nla_data(tb[CTA_PROTO_ICMP_ID]); if (tuple->dst.u.icmp.type >= sizeof(invmap) || !invmap[tuple->dst.u.icmp.type]) @@ -327,8 +324,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .destroy = NULL, .me = NULL, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = icmp_tuple_to_nfattr, - .nfattr_to_tuple = icmp_nfattr_to_tuple, + .tuple_to_nlattr = icmp_tuple_to_nlattr, + .nlattr_to_tuple = icmp_nlattr_to_tuple, + .nla_policy = icmp_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_header = &icmp_sysctl_header, diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index deab27facbad..7221aa20e6ff 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -544,46 +544,46 @@ EXPORT_SYMBOL(nf_nat_protocol_unregister); #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) int -nf_nat_port_range_to_nfattr(struct sk_buff *skb, +nf_nat_port_range_to_nlattr(struct sk_buff *skb, const struct nf_nat_range *range) { - NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), + NLA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), &range->min.tcp.port); - NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16), + NLA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16), &range->max.tcp.port); return 0; -nfattr_failure: +nla_put_failure: return -1; } -EXPORT_SYMBOL_GPL(nf_nat_port_nfattr_to_range); +EXPORT_SYMBOL_GPL(nf_nat_port_nlattr_to_range); int -nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range) +nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range) { int ret = 0; /* we have to return whether we actually parsed something or not */ - if (tb[CTA_PROTONAT_PORT_MIN-1]) { + if (tb[CTA_PROTONAT_PORT_MIN]) { ret = 1; range->min.tcp.port = - *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); + *(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MIN]); } - if (!tb[CTA_PROTONAT_PORT_MAX-1]) { + if (!tb[CTA_PROTONAT_PORT_MAX]) { if (ret) range->max.tcp.port = range->min.tcp.port; } else { ret = 1; range->max.tcp.port = - *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); + *(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MAX]); } return ret; } -EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr); +EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nlattr); #endif /* Noone using conntrack by the time this called. */ diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 2e40cc83526a..d562290b1820 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -142,8 +142,8 @@ static struct nf_nat_protocol gre __read_mostly = { .in_range = gre_in_range, .unique_tuple = gre_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nfattr = nf_nat_port_range_to_nfattr, - .nfattr_to_range = nf_nat_port_nfattr_to_range, + .range_to_nlattr = nf_nat_port_range_to_nlattr, + .nlattr_to_range = nf_nat_port_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index f71ef9b5f428..898d73771155 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -79,7 +79,7 @@ struct nf_nat_protocol nf_nat_protocol_icmp = { .in_range = icmp_in_range, .unique_tuple = icmp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nfattr = nf_nat_port_range_to_nfattr, - .nfattr_to_range = nf_nat_port_nfattr_to_range, + .range_to_nlattr = nf_nat_port_range_to_nlattr, + .nlattr_to_range = nf_nat_port_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 123c95913f28..5bbbb2acdc70 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -145,7 +145,7 @@ struct nf_nat_protocol nf_nat_protocol_tcp = { .in_range = tcp_in_range, .unique_tuple = tcp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nfattr = nf_nat_port_range_to_nfattr, - .nfattr_to_range = nf_nat_port_nfattr_to_range, + .range_to_nlattr = nf_nat_port_range_to_nlattr, + .nlattr_to_range = nf_nat_port_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 1c4c70e25cd4..a0af4fd95584 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -135,7 +135,7 @@ struct nf_nat_protocol nf_nat_protocol_udp = { .in_range = udp_in_range, .unique_tuple = udp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nfattr = nf_nat_port_range_to_nfattr, - .nfattr_to_range = nf_nat_port_nfattr_to_range, + .range_to_nlattr = nf_nat_port_range_to_nlattr, + .nlattr_to_range = nf_nat_port_nlattr_to_range, #endif }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3b690cf2a4ee..e5b05b039101 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -34,6 +34,7 @@ * 2 of the License, or (at your option) any later version. */ #include <linux/types.h> +#include <net/net_namespace.h> #include <net/icmp.h> #include <net/protocol.h> #include <net/tcp.h> @@ -123,33 +124,30 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { static const struct snmp_mib snmp4_icmp_list[] = { SNMP_MIB_ITEM("InMsgs", ICMP_MIB_INMSGS), SNMP_MIB_ITEM("InErrors", ICMP_MIB_INERRORS), - SNMP_MIB_ITEM("InDestUnreachs", ICMP_MIB_INDESTUNREACHS), - SNMP_MIB_ITEM("InTimeExcds", ICMP_MIB_INTIMEEXCDS), - SNMP_MIB_ITEM("InParmProbs", ICMP_MIB_INPARMPROBS), - SNMP_MIB_ITEM("InSrcQuenchs", ICMP_MIB_INSRCQUENCHS), - SNMP_MIB_ITEM("InRedirects", ICMP_MIB_INREDIRECTS), - SNMP_MIB_ITEM("InEchos", ICMP_MIB_INECHOS), - SNMP_MIB_ITEM("InEchoReps", ICMP_MIB_INECHOREPS), - SNMP_MIB_ITEM("InTimestamps", ICMP_MIB_INTIMESTAMPS), - SNMP_MIB_ITEM("InTimestampReps", ICMP_MIB_INTIMESTAMPREPS), - SNMP_MIB_ITEM("InAddrMasks", ICMP_MIB_INADDRMASKS), - SNMP_MIB_ITEM("InAddrMaskReps", ICMP_MIB_INADDRMASKREPS), SNMP_MIB_ITEM("OutMsgs", ICMP_MIB_OUTMSGS), SNMP_MIB_ITEM("OutErrors", ICMP_MIB_OUTERRORS), - SNMP_MIB_ITEM("OutDestUnreachs", ICMP_MIB_OUTDESTUNREACHS), - SNMP_MIB_ITEM("OutTimeExcds", ICMP_MIB_OUTTIMEEXCDS), - SNMP_MIB_ITEM("OutParmProbs", ICMP_MIB_OUTPARMPROBS), - SNMP_MIB_ITEM("OutSrcQuenchs", ICMP_MIB_OUTSRCQUENCHS), - SNMP_MIB_ITEM("OutRedirects", ICMP_MIB_OUTREDIRECTS), - SNMP_MIB_ITEM("OutEchos", ICMP_MIB_OUTECHOS), - SNMP_MIB_ITEM("OutEchoReps", ICMP_MIB_OUTECHOREPS), - SNMP_MIB_ITEM("OutTimestamps", ICMP_MIB_OUTTIMESTAMPS), - SNMP_MIB_ITEM("OutTimestampReps", ICMP_MIB_OUTTIMESTAMPREPS), - SNMP_MIB_ITEM("OutAddrMasks", ICMP_MIB_OUTADDRMASKS), - SNMP_MIB_ITEM("OutAddrMaskReps", ICMP_MIB_OUTADDRMASKREPS), SNMP_MIB_SENTINEL }; +static struct { + char *name; + int index; +} icmpmibmap[] = { + { "DestUnreachs", ICMP_DEST_UNREACH }, + { "TimeExcds", ICMP_TIME_EXCEEDED }, + { "ParmProbs", ICMP_PARAMETERPROB }, + { "SrcQuenchs", ICMP_SOURCE_QUENCH }, + { "Redirects", ICMP_REDIRECT }, + { "Echos", ICMP_ECHO }, + { "EchoReps", ICMP_ECHOREPLY }, + { "Timestamps", ICMP_TIMESTAMP }, + { "TimestampReps", ICMP_TIMESTAMPREPLY }, + { "AddrMasks", ICMP_ADDRESS }, + { "AddrMaskReps", ICMP_ADDRESSREPLY }, + { NULL, 0 } +}; + + static const struct snmp_mib snmp4_tcp_list[] = { SNMP_MIB_ITEM("RtoAlgorithm", TCP_MIB_RTOALGORITHM), SNMP_MIB_ITEM("RtoMin", TCP_MIB_RTOMIN), @@ -244,9 +242,79 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPAbortOnLinger", LINUX_MIB_TCPABORTONLINGER), SNMP_MIB_ITEM("TCPAbortFailed", LINUX_MIB_TCPABORTFAILED), SNMP_MIB_ITEM("TCPMemoryPressures", LINUX_MIB_TCPMEMORYPRESSURES), + SNMP_MIB_ITEM("TCPSACKDiscard", LINUX_MIB_TCPSACKDISCARD), + SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), + SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), + SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), SNMP_MIB_SENTINEL }; +static void icmpmsg_put(struct seq_file *seq) +{ +#define PERLINE 16 + + int j, i, count; + static int out[PERLINE]; + + count = 0; + for (i = 0; i < ICMPMSG_MIB_MAX; i++) { + + if (snmp_fold_field((void **) icmpmsg_statistics, i)) + out[count++] = i; + if (count < PERLINE) + continue; + + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < PERLINE; ++j) + seq_printf(seq, " %sType%u", i & 0x100 ? "Out" : "In", + i & 0xff); + seq_printf(seq, "\nIcmpMsg: "); + for (j = 0; j < PERLINE; ++j) + seq_printf(seq, " %lu", + snmp_fold_field((void **) icmpmsg_statistics, + out[j])); + seq_putc(seq, '\n'); + } + if (count) { + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %sType%u", out[j] & 0x100 ? "Out" : + "In", out[j] & 0xff); + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %lu", snmp_fold_field((void **) + icmpmsg_statistics, out[j])); + } + +#undef PERLINE +} + +static void icmp_put(struct seq_file *seq) +{ + int i; + + seq_puts(seq, "\nIcmp: InMsgs InErrors"); + for (i=0; icmpmibmap[i].name != NULL; i++) + seq_printf(seq, " In%s", icmpmibmap[i].name); + seq_printf(seq, " OutMsgs OutErrors"); + for (i=0; icmpmibmap[i].name != NULL; i++) + seq_printf(seq, " Out%s", icmpmibmap[i].name); + seq_printf(seq, "\nIcmp: %lu %lu", + snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INMSGS), + snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INERRORS)); + for (i=0; icmpmibmap[i].name != NULL; i++) + seq_printf(seq, " %lu", + snmp_fold_field((void **) icmpmsg_statistics, + icmpmibmap[i].index)); + seq_printf(seq, " %lu %lu", + snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTMSGS), + snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTERRORS)); + for (i=0; icmpmibmap[i].name != NULL; i++) + seq_printf(seq, " %lu", + snmp_fold_field((void **) icmpmsg_statistics, + icmpmibmap[i].index)); +} + /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ @@ -267,15 +335,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v) snmp_fold_field((void **)ip_statistics, snmp4_ipstats_list[i].entry)); - seq_puts(seq, "\nIcmp:"); - for (i = 0; snmp4_icmp_list[i].name != NULL; i++) - seq_printf(seq, " %s", snmp4_icmp_list[i].name); - - seq_puts(seq, "\nIcmp:"); - for (i = 0; snmp4_icmp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field((void **)icmp_statistics, - snmp4_icmp_list[i].entry)); + icmp_put(seq); /* RFC 2011 compatibility */ + icmpmsg_put(seq); seq_puts(seq, "\nTcp:"); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) @@ -332,6 +393,8 @@ static const struct file_operations snmp_seq_fops = { .release = single_release, }; + + /* * Output /proc/net/netstat */ @@ -380,20 +443,20 @@ int __init ip_misc_proc_init(void) { int rc = 0; - if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops)) + if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops)) goto out_netstat; - if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops)) + if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops)) goto out_snmp; - if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops)) + if (!proc_net_fops_create(&init_net, "sockstat", S_IRUGO, &sockstat_seq_fops)) goto out_sockstat; out: return rc; out_sockstat: - proc_net_remove("snmp"); + proc_net_remove(&init_net, "snmp"); out_snmp: - proc_net_remove("netstat"); + proc_net_remove(&init_net, "netstat"); out_netstat: rc = -ENOMEM; goto out; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c6d71526f625..3916faca3afe 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -59,6 +59,7 @@ #include <linux/in_route.h> #include <linux/route.h> #include <linux/skbuff.h> +#include <net/net_namespace.h> #include <net/dst.h> #include <net/sock.h> #include <linux/gfp.h> @@ -313,6 +314,9 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } + if (iph->protocol == IPPROTO_ICMP) + icmp_out_count(((struct icmphdr *) + skb_transport_header(skb))->type); err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); @@ -898,24 +902,8 @@ static const struct seq_operations raw_seq_ops = { static int raw_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct raw_iter_state *s; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - goto out; - rc = seq_open(file, &raw_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &raw_seq_ops, + sizeof(struct raw_iter_state)); } static const struct file_operations raw_seq_fops = { @@ -928,13 +916,13 @@ static const struct file_operations raw_seq_fops = { int __init raw_proc_init(void) { - if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops)) + if (!proc_net_fops_create(&init_net, "raw", S_IRUGO, &raw_seq_fops)) return -ENOMEM; return 0; } void __init raw_proc_exit(void) { - proc_net_remove("raw"); + proc_net_remove(&init_net, "raw"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c7ca94bd152c..21b12de9e653 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -81,6 +81,7 @@ #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/init.h> +#include <linux/workqueue.h> #include <linux/skbuff.h> #include <linux/inetdevice.h> #include <linux/igmp.h> @@ -91,6 +92,7 @@ #include <linux/jhash.h> #include <linux/rcupdate.h> #include <linux/times.h> +#include <net/net_namespace.h> #include <net/protocol.h> #include <net/ip.h> #include <net/route.h> @@ -135,7 +137,8 @@ static unsigned long rt_deadline; #define RTprint(a...) printk(KERN_DEBUG a) static struct timer_list rt_flush_timer; -static struct timer_list rt_periodic_timer; +static void rt_check_expire(struct work_struct *work); +static DECLARE_DELAYED_WORK(expires_work, rt_check_expire); static struct timer_list rt_secret_timer; /* @@ -243,7 +246,7 @@ static spinlock_t *rt_hash_locks; static struct rt_hash_bucket *rt_hash_table; static unsigned rt_hash_mask; -static int rt_hash_log; +static unsigned int rt_hash_log; static unsigned int rt_hash_rnd; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); @@ -372,23 +375,8 @@ static const struct seq_operations rt_cache_seq_ops = { static int rt_cache_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct rt_cache_iter_state *s; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - goto out; - rc = seq_open(file, &rt_cache_seq_ops); - if (rc) - goto out_kfree; - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &rt_cache_seq_ops, + sizeof(struct rt_cache_iter_state)); } static const struct file_operations rt_cache_seq_fops = { @@ -571,33 +559,32 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) (fl1->iif ^ fl2->iif)) == 0; } -/* This runs via a timer and thus is always in BH context. */ -static void rt_check_expire(unsigned long dummy) +static void rt_check_expire(struct work_struct *work) { static unsigned int rover; unsigned int i = rover, goal; struct rtable *rth, **rthp; - unsigned long now = jiffies; u64 mult; mult = ((u64)ip_rt_gc_interval) << rt_hash_log; if (ip_rt_gc_timeout > 1) do_div(mult, ip_rt_gc_timeout); goal = (unsigned int)mult; - if (goal > rt_hash_mask) goal = rt_hash_mask + 1; + if (goal > rt_hash_mask) + goal = rt_hash_mask + 1; for (; goal > 0; goal--) { unsigned long tmo = ip_rt_gc_timeout; i = (i + 1) & rt_hash_mask; rthp = &rt_hash_table[i].chain; - if (*rthp == 0) + if (*rthp == NULL) continue; - spin_lock(rt_hash_lock_addr(i)); + spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { if (rth->u.dst.expires) { /* Entry is expired even if it is in use */ - if (time_before_eq(now, rth->u.dst.expires)) { + if (time_before_eq(jiffies, rth->u.dst.expires)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; continue; @@ -612,14 +599,10 @@ static void rt_check_expire(unsigned long dummy) *rthp = rth->u.dst.rt_next; rt_free(rth); } - spin_unlock(rt_hash_lock_addr(i)); - - /* Fallback loop breaker. */ - if (time_after(jiffies, now)) - break; + spin_unlock_bh(rt_hash_lock_addr(i)); } rover = i; - mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval); + schedule_delayed_work(&expires_work, ip_rt_gc_interval); } /* This can run from both BH and non-BH contexts, the latter @@ -1404,8 +1387,8 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { struct rtable *rt = (struct rtable *) dst; struct in_device *idev = rt->idev; - if (dev != &loopback_dev && idev && idev->dev == dev) { - struct in_device *loopback_idev = in_dev_get(&loopback_dev); + if (dev != init_net.loopback_dev && idev && idev->dev == dev) { + struct in_device *loopback_idev = in_dev_get(init_net.loopback_dev); if (loopback_idev) { rt->idev = loopback_idev; in_dev_put(idev); @@ -1557,7 +1540,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = &loopback_dev; + rth->u.dst.dev = init_net.loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->fl.oif = 0; @@ -1814,7 +1797,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { int result; result = fib_validate_source(saddr, daddr, tos, - loopback_dev.ifindex, + init_net.loopback_dev->ifindex, dev, &spec_dst, &itag); if (result < 0) goto martian_source; @@ -1881,7 +1864,7 @@ local_input: #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = &loopback_dev; + rth->u.dst.dev = init_net.loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->rt_gateway = daddr; @@ -2151,7 +2134,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) RT_SCOPE_UNIVERSE), } }, .mark = oldflp->mark, - .iif = loopback_dev.ifindex, + .iif = init_net.loopback_dev->ifindex, .oif = oldflp->oif }; struct fib_result res; unsigned flags = 0; @@ -2212,7 +2195,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) if (oldflp->oif) { - dev_out = dev_get_by_index(oldflp->oif); + dev_out = dev_get_by_index(&init_net, oldflp->oif); err = -ENODEV; if (dev_out == NULL) goto out; @@ -2245,9 +2228,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = init_net.loopback_dev; dev_hold(dev_out); - fl.oif = loopback_dev.ifindex; + fl.oif = init_net.loopback_dev->ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; @@ -2292,7 +2275,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) fl.fl4_src = fl.fl4_dst; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = init_net.loopback_dev; dev_hold(dev_out); fl.oif = dev_out->ifindex; if (res.fi) @@ -2591,7 +2574,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (iif) { struct net_device *dev; - dev = __dev_get_by_index(iif); + dev = __dev_get_by_index(&init_net, iif); if (dev == NULL) { err = -ENODEV; goto errout_free; @@ -2992,17 +2975,14 @@ int __init ip_rt_init(void) init_timer(&rt_flush_timer); rt_flush_timer.function = rt_run_flush; - init_timer(&rt_periodic_timer); - rt_periodic_timer.function = rt_check_expire; init_timer(&rt_secret_timer); rt_secret_timer.function = rt_secret_rebuild; /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ - rt_periodic_timer.expires = jiffies + net_random() % ip_rt_gc_interval + - ip_rt_gc_interval; - add_timer(&rt_periodic_timer); + schedule_delayed_work(&expires_work, + net_random() % ip_rt_gc_interval + ip_rt_gc_interval); rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + ip_rt_secret_interval; @@ -3011,15 +2991,15 @@ int __init ip_rt_init(void) #ifdef CONFIG_PROC_FS { struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */ - if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || + if (!proc_net_fops_create(&init_net, "rt_cache", S_IRUGO, &rt_cache_seq_fops) || !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, - proc_net_stat))) { + init_net.proc_net_stat))) { return -ENOMEM; } rtstat_pde->proc_fops = &rt_cpu_seq_fops; } #ifdef CONFIG_NET_CLS_ROUTE - create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL); + create_proc_read_entry("rt_acct", 0, init_net.proc_net, ip_rt_acct_read, NULL); #endif #endif #ifdef CONFIG_XFRM diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 53ef0f4bbdaa..eb286abcf5dc 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -12,6 +12,7 @@ #include <linux/sysctl.h> #include <linux/igmp.h> #include <linux/inetdevice.h> +#include <linux/seqlock.h> #include <net/snmp.h> #include <net/icmp.h> #include <net/ip.h> @@ -89,6 +90,74 @@ static int ipv4_sysctl_forward_strategy(ctl_table *table, return 1; } +extern seqlock_t sysctl_port_range_lock; +extern int sysctl_local_port_range[2]; + +/* Update system visible IP port range */ +static void set_local_port_range(int range[2]) +{ + write_seqlock(&sysctl_port_range_lock); + sysctl_local_port_range[0] = range[0]; + sysctl_local_port_range[1] = range[1]; + write_sequnlock(&sysctl_port_range_lock); +} + +/* Validate changes from /proc interface. */ +static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret; + int range[2] = { sysctl_local_port_range[0], + sysctl_local_port_range[1] }; + ctl_table tmp = { + .data = &range, + .maxlen = sizeof(range), + .mode = table->mode, + .extra1 = &ip_local_port_range_min, + .extra2 = &ip_local_port_range_max, + }; + + ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + + if (write && ret == 0) { + if (range[1] <= range[0]) + ret = -EINVAL; + else + set_local_port_range(range); + } + + return ret; +} + +/* Validate changes from sysctl interface. */ +static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + int ret; + int range[2] = { sysctl_local_port_range[0], + sysctl_local_port_range[1] }; + ctl_table tmp = { + .data = &range, + .maxlen = sizeof(range), + .mode = table->mode, + .extra1 = &ip_local_port_range_min, + .extra2 = &ip_local_port_range_max, + }; + + ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); + if (ret == 0 && newval && newlen) { + if (range[1] <= range[0]) + ret = -EINVAL; + else + set_local_port_range(range); + } + return ret; +} + + static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -427,10 +496,8 @@ ctl_table ipv4_table[] = { .data = &sysctl_local_port_range, .maxlen = sizeof(sysctl_local_port_range), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = ip_local_port_range_min, - .extra2 = ip_local_port_range_max + .proc_handler = &ipv4_local_port_range, + .strategy = &ipv4_sysctl_local_port_range, }, { .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7e740112b238..4f322003835d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -247,6 +247,7 @@ * TCP_CLOSE socket is finished */ +#include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/fcntl.h> @@ -2014,7 +2015,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) if (tp->rx_opt.tstamp_ok) info->tcpi_options |= TCPI_OPT_TIMESTAMPS; - if (tp->rx_opt.sack_ok) + if (tcp_is_sack(tp)) info->tcpi_options |= TCPI_OPT_SACK; if (tp->rx_opt.wscale_ok) { info->tcpi_options |= TCPI_OPT_WSCALE; @@ -2030,8 +2031,13 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; - info->tcpi_unacked = tp->packets_out; - info->tcpi_sacked = tp->sacked_out; + if (sk->sk_state == TCP_LISTEN) { + info->tcpi_unacked = sk->sk_ack_backlog; + info->tcpi_sacked = sk->sk_max_ack_backlog; + } else { + info->tcpi_unacked = tp->packets_out; + info->tcpi_sacked = tp->sacked_out; + } info->tcpi_lost = tp->lost_out; info->tcpi_retrans = tp->retrans_out; info->tcpi_fackets = tp->fackets_out; @@ -2210,7 +2216,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) goto out; mss = skb_shinfo(skb)->gso_size; - skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss; + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); segs = NULL; goto out; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 4586211e3757..5dba0fc8f579 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -210,7 +210,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) { const struct inet_connection_sock *icsk = inet_csk(sk); - if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { + if (icsk->icsk_ca_state == TCP_CA_Open) { struct bictcp *ca = inet_csk_ca(sk); cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 485d7ea35f75..80bd084a9f91 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -314,7 +314,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) struct bictcp *ca = inet_csk_ca(sk); u32 delay; - if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { + if (icsk->icsk_ca_state == TCP_CA_Open) { cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 57c5f0b10e6c..3904d2158a92 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -25,11 +25,13 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, const struct tcp_sock *tp = tcp_sk(sk); struct tcp_info *info = _info; - if (sk->sk_state == TCP_LISTEN) + if (sk->sk_state == TCP_LISTEN) { r->idiag_rqueue = sk->sk_ack_backlog; - else + r->idiag_wqueue = sk->sk_max_ack_backlog; + } else { r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; - r->idiag_wqueue = tp->write_seq - tp->snd_una; + r->idiag_wqueue = tp->write_seq - tp->snd_una; + } if (info != NULL) tcp_get_info(sk, info); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f893e90061eb..0a42e9340346 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -85,7 +85,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 2; int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; -int sysctl_tcp_frto __read_mostly; +int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_nometrics_save __read_mostly; @@ -104,6 +104,7 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained DSACK info */ +#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -111,13 +112,10 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) #define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) -#define IsReno(tp) ((tp)->rx_opt.sack_ok == 0) -#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2) -#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4) - #define IsSackFrto() (sysctl_tcp_frto == 0x2) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) +#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) /* Adapt the MSS value used to make delayed ack decision to the * real world. @@ -198,6 +196,55 @@ static inline int tcp_in_quickack_mode(const struct sock *sk) return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; } +static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp) +{ + if (tp->ecn_flags&TCP_ECN_OK) + tp->ecn_flags |= TCP_ECN_QUEUE_CWR; +} + +static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb) +{ + if (tcp_hdr(skb)->cwr) + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; +} + +static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp) +{ + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; +} + +static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) +{ + if (tp->ecn_flags&TCP_ECN_OK) { + if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + /* Funny extension: if ECT is not set on a segment, + * it is surely retransmit. It is not in ECN RFC, + * but Linux follows this rule. */ + else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) + tcp_enter_quickack_mode((struct sock *)tp); + } +} + +static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) +{ + if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || th->cwr)) + tp->ecn_flags &= ~TCP_ECN_OK; +} + +static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) +{ + if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || !th->cwr)) + tp->ecn_flags &= ~TCP_ECN_OK; +} + +static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) +{ + if (th->ece && !th->syn && (tp->ecn_flags&TCP_ECN_OK)) + return 1; + return 0; +} + /* Buffer size and advertised window tuning. * * 1. Tuning sk->sk_sndbuf, when connection enters established state. @@ -810,6 +857,21 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) } } +/* + * Packet counting of FACK is based on in-order assumptions, therefore TCP + * disables it when reordering is detected + */ +static void tcp_disable_fack(struct tcp_sock *tp) +{ + tp->rx_opt.sack_ok &= ~2; +} + +/* Take a notice that peer is sending DSACKs */ +static void tcp_dsack_seen(struct tcp_sock *tp) +{ + tp->rx_opt.sack_ok |= 4; +} + /* Initialize metrics on socket. */ static void tcp_init_metrics(struct sock *sk) @@ -831,7 +893,7 @@ static void tcp_init_metrics(struct sock *sk) } if (dst_metric(dst, RTAX_REORDERING) && tp->reordering != dst_metric(dst, RTAX_REORDERING)) { - tp->rx_opt.sack_ok &= ~2; + tcp_disable_fack(tp); tp->reordering = dst_metric(dst, RTAX_REORDERING); } @@ -893,9 +955,9 @@ static void tcp_update_reordering(struct sock *sk, const int metric, /* This exciting event is worth to be remembered. 8) */ if (ts) NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER); - else if (IsReno(tp)) + else if (tcp_is_reno(tp)) NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER); - else if (IsFack(tp)) + else if (tcp_is_fack(tp)) NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER); else NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); @@ -907,8 +969,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, tp->sacked_out, tp->undo_marker ? tp->undo_retrans : 0); #endif - /* Disable FACK yet. */ - tp->rx_opt.sack_ok &= ~2; + tcp_disable_fack(tp); } } @@ -959,7 +1020,216 @@ static void tcp_update_reordering(struct sock *sk, const int metric, * for retransmitted and already SACKed segment -> reordering.. * Both of these heuristics are not used in Loss state, when we cannot * account for retransmits accurately. + * + * SACK block validation. + * ---------------------- + * + * SACK block range validation checks that the received SACK block fits to + * the expected sequence limits, i.e., it is between SND.UNA and SND.NXT. + * Note that SND.UNA is not included to the range though being valid because + * it means that the receiver is rather inconsistent with itself reporting + * SACK reneging when it should advance SND.UNA. Such SACK block this is + * perfectly valid, however, in light of RFC2018 which explicitly states + * that "SACK block MUST reflect the newest segment. Even if the newest + * segment is going to be discarded ...", not that it looks very clever + * in case of head skb. Due to potentional receiver driven attacks, we + * choose to avoid immediate execution of a walk in write queue due to + * reneging and defer head skb's loss recovery to standard loss recovery + * procedure that will eventually trigger (nothing forbids us doing this). + * + * Implements also blockage to start_seq wrap-around. Problem lies in the + * fact that though start_seq (s) is before end_seq (i.e., not reversed), + * there's no guarantee that it will be before snd_nxt (n). The problem + * happens when start_seq resides between end_seq wrap (e_w) and snd_nxt + * wrap (s_w): + * + * <- outs wnd -> <- wrapzone -> + * u e n u_w e_w s n_w + * | | | | | | | + * |<------------+------+----- TCP seqno space --------------+---------->| + * ...-- <2^31 ->| |<--------... + * ...---- >2^31 ------>| |<--------... + * + * Current code wouldn't be vulnerable but it's better still to discard such + * crazy SACK blocks. Doing this check for start_seq alone closes somewhat + * similar case (end_seq after snd_nxt wrap) as earlier reversed check in + * snd_nxt wrap -> snd_una region will then become "well defined", i.e., + * equal to the ideal case (infinite seqno space without wrap caused issues). + * + * With D-SACK the lower bound is extended to cover sequence space below + * SND.UNA down to undo_marker, which is the last point of interest. Yet + * again, DSACK block must not to go across snd_una (for the same reason as + * for the normal SACK blocks, explained above). But there all simplicity + * ends, TCP might receive valid D-SACKs below that. As long as they reside + * fully below undo_marker they do not affect behavior in anyway and can + * therefore be safely ignored. In rare cases (which are more or less + * theoretical ones), the D-SACK will nicely cross that boundary due to skb + * fragmentation and packet reordering past skb's retransmission. To consider + * them correctly, the acceptable range must be extended even more though + * the exact amount is rather hard to quantify. However, tp->max_window can + * be used as an exaggerated estimate. + */ +static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, + u32 start_seq, u32 end_seq) +{ + /* Too far in future, or reversed (interpretation is ambiguous) */ + if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq)) + return 0; + + /* Nasty start_seq wrap-around check (see comments above) */ + if (!before(start_seq, tp->snd_nxt)) + return 0; + + /* In outstanding window? ...This is valid exit for DSACKs too. + * start_seq == snd_una is non-sensical (see comments above) + */ + if (after(start_seq, tp->snd_una)) + return 1; + + if (!is_dsack || !tp->undo_marker) + return 0; + + /* ...Then it's D-SACK, and must reside below snd_una completely */ + if (!after(end_seq, tp->snd_una)) + return 0; + + if (!before(start_seq, tp->undo_marker)) + return 1; + + /* Too old */ + if (!after(end_seq, tp->undo_marker)) + return 0; + + /* Undo_marker boundary crossing (overestimates a lot). Known already: + * start_seq < undo_marker and end_seq >= undo_marker. + */ + return !before(start_seq, end_seq - tp->max_window); +} + +/* Check for lost retransmit. This superb idea is borrowed from "ratehalving". + * Event "C". Later note: FACK people cheated me again 8), we have to account + * for reordering! Ugly, but should help. + * + * Search retransmitted skbs from write_queue that were sent when snd_nxt was + * less than what is now known to be received by the other end (derived from + * SACK blocks by the caller). Also calculate the lowest snd_nxt among the + * remaining retransmitted skbs to avoid some costly processing per ACKs. */ +static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + int flag = 0; + int cnt = 0; + u32 new_low_seq = 0; + + tcp_for_write_queue(skb, sk) { + u32 ack_seq = TCP_SKB_CB(skb)->ack_seq; + + if (skb == tcp_send_head(sk)) + break; + if (cnt == tp->retrans_out) + break; + if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) + continue; + + if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) + continue; + + if (after(received_upto, ack_seq) && + (tcp_is_fack(tp) || + !before(received_upto, + ack_seq + tp->reordering * tp->mss_cache))) { + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= tcp_skb_pcount(skb); + + /* clear lost hint */ + tp->retransmit_skb_hint = NULL; + + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { + tp->lost_out += tcp_skb_pcount(skb); + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + flag |= FLAG_DATA_SACKED; + NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); + } + } else { + if (!new_low_seq || before(ack_seq, new_low_seq)) + new_low_seq = ack_seq; + cnt += tcp_skb_pcount(skb); + } + } + + if (tp->retrans_out) + tp->lost_retrans_low = new_low_seq; + + return flag; +} + +static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, + struct tcp_sack_block_wire *sp, int num_sacks, + u32 prior_snd_una) +{ + u32 start_seq_0 = ntohl(get_unaligned(&sp[0].start_seq)); + u32 end_seq_0 = ntohl(get_unaligned(&sp[0].end_seq)); + int dup_sack = 0; + + if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { + dup_sack = 1; + tcp_dsack_seen(tp); + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); + } else if (num_sacks > 1) { + u32 end_seq_1 = ntohl(get_unaligned(&sp[1].end_seq)); + u32 start_seq_1 = ntohl(get_unaligned(&sp[1].start_seq)); + + if (!after(end_seq_0, end_seq_1) && + !before(start_seq_0, start_seq_1)) { + dup_sack = 1; + tcp_dsack_seen(tp); + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); + } + } + + /* D-SACK for already forgotten data... Do dumb counting. */ + if (dup_sack && + !after(end_seq_0, prior_snd_una) && + after(end_seq_0, tp->undo_marker)) + tp->undo_retrans--; + + return dup_sack; +} + +/* Check if skb is fully within the SACK block. In presence of GSO skbs, + * the incoming SACK may not exactly match but we can find smaller MSS + * aligned portion of it that matches. Therefore we might need to fragment + * which may fail and creates some hassle (caller must handle error case + * returns). + */ +int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, + u32 start_seq, u32 end_seq) +{ + int in_sack, err; + unsigned int pkt_len; + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && + !before(end_seq, TCP_SKB_CB(skb)->end_seq); + + if (tcp_skb_pcount(skb) > 1 && !in_sack && + after(TCP_SKB_CB(skb)->end_seq, start_seq)) { + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); + + if (!in_sack) + pkt_len = start_seq - TCP_SKB_CB(skb)->seq; + else + pkt_len = end_seq - TCP_SKB_CB(skb)->seq; + err = tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size); + if (err < 0) + return err; + } + + return in_sack; +} + static int tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) { @@ -972,38 +1242,24 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; int prior_fackets; - u32 lost_retrans = 0; + u32 highest_sack_end_seq = 0; int flag = 0; int found_dup_sack = 0; int cached_fack_count; int i; int first_sack_index; - if (!tp->sacked_out) - tp->fackets_out = 0; + if (!tp->sacked_out) { + if (WARN_ON(tp->fackets_out)) + tp->fackets_out = 0; + tp->highest_sack = tp->snd_una; + } prior_fackets = tp->fackets_out; - /* Check for D-SACK. */ - if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) { + found_dup_sack = tcp_check_dsack(tp, ack_skb, sp, + num_sacks, prior_snd_una); + if (found_dup_sack) flag |= FLAG_DSACKING_ACK; - found_dup_sack = 1; - tp->rx_opt.sack_ok |= 4; - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); - } else if (num_sacks > 1 && - !after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) && - !before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) { - flag |= FLAG_DSACKING_ACK; - found_dup_sack = 1; - tp->rx_opt.sack_ok |= 4; - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); - } - - /* D-SACK for already forgotten data... - * Do dumb counting. */ - if (found_dup_sack && - !after(ntohl(sp[0].end_seq), prior_snd_una) && - after(ntohl(sp[0].end_seq), tp->undo_marker)) - tp->undo_retrans--; /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can @@ -1083,6 +1339,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int fack_count; int dup_sack = (found_dup_sack && (i == first_sack_index)); + if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) { + if (dup_sack) { + if (!tp->undo_marker) + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); + else + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD); + } else { + /* Don't count olds caused by ACK reordering */ + if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && + !after(end_seq, tp->snd_una)) + continue; + NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); + } + continue; + } + skb = cached_skb; fack_count = cached_fack_count; @@ -1091,7 +1363,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ flag |= FLAG_DATA_LOST; tcp_for_write_queue_from(skb, sk) { - int in_sack, pcount; + int in_sack; u8 sacked; if (skb == tcp_send_head(sk)) @@ -1110,30 +1382,11 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; - in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && - !before(end_seq, TCP_SKB_CB(skb)->end_seq); - - pcount = tcp_skb_pcount(skb); - - if (pcount > 1 && !in_sack && - after(TCP_SKB_CB(skb)->end_seq, start_seq)) { - unsigned int pkt_len; - - in_sack = !after(start_seq, - TCP_SKB_CB(skb)->seq); - - if (!in_sack) - pkt_len = (start_seq - - TCP_SKB_CB(skb)->seq); - else - pkt_len = (end_seq - - TCP_SKB_CB(skb)->seq); - if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size)) - break; - pcount = tcp_skb_pcount(skb); - } + in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq); + if (in_sack < 0) + break; - fack_count += pcount; + fack_count += tcp_skb_pcount(skb); sacked = TCP_SKB_CB(skb)->sacked; @@ -1160,11 +1413,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ continue; } - if ((sacked&TCPCB_SACKED_RETRANS) && - after(end_seq, TCP_SKB_CB(skb)->ack_seq) && - (!lost_retrans || after(end_seq, lost_retrans))) - lost_retrans = end_seq; - if (!in_sack) continue; @@ -1217,6 +1465,11 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; + + if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) { + tp->highest_sack = TCP_SKB_CB(skb)->seq; + highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq; + } } else { if (dup_sack && (sacked&TCPCB_RETRANS)) reord = min(fack_count, reord); @@ -1236,45 +1489,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ } } - /* Check for lost retransmit. This superb idea is - * borrowed from "ratehalving". Event "C". - * Later note: FACK people cheated me again 8), - * we have to account for reordering! Ugly, - * but should help. - */ - if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) { - struct sk_buff *skb; - - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (after(TCP_SKB_CB(skb)->seq, lost_retrans)) - break; - if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) - continue; - if ((TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) && - after(lost_retrans, TCP_SKB_CB(skb)->ack_seq) && - (IsFack(tp) || - !before(lost_retrans, - TCP_SKB_CB(skb)->ack_seq + tp->reordering * - tp->mss_cache))) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; + if (tp->retrans_out && + after(highest_sack_end_seq, tp->lost_retrans_low) && + icsk->icsk_ca_state == TCP_CA_Recovery) + flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq); - if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { - tp->lost_out += tcp_skb_pcount(skb); - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - flag |= FLAG_DATA_SACKED; - NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); - } - } - } - } - - tp->left_out = tp->sacked_out + tp->lost_out; + tcp_verify_left_out(tp); if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss && (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) @@ -1289,6 +1509,56 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ return flag; } +/* If we receive more dupacks than we expected counting segments + * in assumption of absent reordering, interpret this as reordering. + * The only another reason could be bug in receiver TCP. + */ +static void tcp_check_reno_reordering(struct sock *sk, const int addend) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 holes; + + holes = max(tp->lost_out, 1U); + holes = min(holes, tp->packets_out); + + if ((tp->sacked_out + holes) > tp->packets_out) { + tp->sacked_out = tp->packets_out - holes; + tcp_update_reordering(sk, tp->packets_out + addend, 0); + } +} + +/* Emulate SACKs for SACKless connection: account for a new dupack. */ + +static void tcp_add_reno_sack(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + tp->sacked_out++; + tcp_check_reno_reordering(sk, 0); + tcp_verify_left_out(tp); +} + +/* Account for ACK, ACKing some data in Reno Recovery phase. */ + +static void tcp_remove_reno_sacks(struct sock *sk, int acked) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (acked > 0) { + /* One ACK acked hole. The rest eat duplicate ACKs. */ + if (acked-1 >= tp->sacked_out) + tp->sacked_out = 0; + else + tp->sacked_out -= acked-1; + } + tcp_check_reno_reordering(sk, acked); + tcp_verify_left_out(tp); +} + +static inline void tcp_reset_reno_sack(struct tcp_sock *tp) +{ + tp->sacked_out = 0; +} + /* F-RTO can only be used if TCP has never retransmitted anything other than * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) */ @@ -1376,11 +1646,13 @@ void tcp_enter_frto(struct sock *sk) tp->undo_retrans = 0; skb = tcp_write_queue_head(sk); + if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) + tp->undo_marker = 0; if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); } - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); /* Earlier loss recovery underway (see RFC4138; Appendix B). * The last condition is necessary at least in tp->frto_counter case. @@ -1405,17 +1677,15 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int cnt = 0; - tp->sacked_out = 0; tp->lost_out = 0; - tp->fackets_out = 0; tp->retrans_out = 0; + if (tcp_is_reno(tp)) + tcp_reset_reno_sack(tp); tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) break; - cnt += tcp_skb_pcount(skb); /* * Count the retransmission made on RTO correctly (only when * waiting for the first ACK and did not get it)... @@ -1427,30 +1697,25 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) /* ...enter this if branch just for the first segment */ flag |= FLAG_DATA_ACKED; } else { + if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) + tp->undo_marker = 0; TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); } - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { - /* Do not mark those segments lost that were - * forward transmitted after RTO - */ - if (!after(TCP_SKB_CB(skb)->end_seq, - tp->frto_highmark)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - } - } else { - tp->sacked_out += tcp_skb_pcount(skb); - tp->fackets_out = cnt; + /* Don't lost mark skbs that were fwd transmitted after RTO */ + if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) && + !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + tp->lost_out += tcp_skb_pcount(skb); } } - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; - tp->undo_marker = 0; tp->frto_counter = 0; + tp->bytes_acked = 0; tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); @@ -1458,22 +1723,26 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->high_seq = tp->frto_highmark; TCP_ECN_queue_cwr(tp); - clear_all_retrans_hints(tp); + tcp_clear_retrans_hints_partial(tp); } -void tcp_clear_retrans(struct tcp_sock *tp) +static void tcp_clear_retrans_partial(struct tcp_sock *tp) { - tp->left_out = 0; tp->retrans_out = 0; - - tp->fackets_out = 0; - tp->sacked_out = 0; tp->lost_out = 0; tp->undo_marker = 0; tp->undo_retrans = 0; } +void tcp_clear_retrans(struct tcp_sock *tp) +{ + tcp_clear_retrans_partial(tp); + + tp->fackets_out = 0; + tp->sacked_out = 0; +} + /* Enter Loss state. If "how" is not zero, forget all SACK information * and reset tags completely, otherwise preserve SACKs. If receiver * dropped its ofo queue, we will know this due to reneging detection. @@ -1483,7 +1752,6 @@ void tcp_enter_loss(struct sock *sk, int how) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int cnt = 0; /* Reduce ssthresh if it has not yet been made inside this window. */ if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || @@ -1497,17 +1765,26 @@ void tcp_enter_loss(struct sock *sk, int how) tp->snd_cwnd_stamp = tcp_time_stamp; tp->bytes_acked = 0; - tcp_clear_retrans(tp); + tcp_clear_retrans_partial(tp); + + if (tcp_is_reno(tp)) + tcp_reset_reno_sack(tp); - /* Push undo marker, if it was plain RTO and nothing - * was retransmitted. */ - if (!how) + if (!how) { + /* Push undo marker, if it was plain RTO and nothing + * was retransmitted. */ tp->undo_marker = tp->snd_una; + tcp_clear_retrans_hints_partial(tp); + } else { + tp->sacked_out = 0; + tp->fackets_out = 0; + tcp_clear_all_retrans_hints(tp); + } tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) break; - cnt += tcp_skb_pcount(skb); + if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) tp->undo_marker = 0; TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; @@ -1515,12 +1792,9 @@ void tcp_enter_loss(struct sock *sk, int how) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); - } else { - tp->sacked_out += tcp_skb_pcount(skb); - tp->fackets_out = cnt; } } - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); @@ -1529,8 +1803,6 @@ void tcp_enter_loss(struct sock *sk, int how) TCP_ECN_queue_cwr(tp); /* Abort FRTO algorithm if one is in progress */ tp->frto_counter = 0; - - clear_all_retrans_hints(tp); } static int tcp_check_sack_reneging(struct sock *sk) @@ -1560,7 +1832,7 @@ static int tcp_check_sack_reneging(struct sock *sk) static inline int tcp_fackets_out(struct tcp_sock *tp) { - return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; + return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out; } static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) @@ -1708,55 +1980,18 @@ static int tcp_time_to_recover(struct sock *sk) return 0; } -/* If we receive more dupacks than we expected counting segments - * in assumption of absent reordering, interpret this as reordering. - * The only another reason could be bug in receiver TCP. +/* RFC: This is from the original, I doubt that this is necessary at all: + * clear xmit_retrans hint if seq of this skb is beyond hint. How could we + * retransmitted past LOST markings in the first place? I'm not fully sure + * about undo and end of connection cases, which can cause R without L? */ -static void tcp_check_reno_reordering(struct sock *sk, const int addend) -{ - struct tcp_sock *tp = tcp_sk(sk); - u32 holes; - - holes = max(tp->lost_out, 1U); - holes = min(holes, tp->packets_out); - - if ((tp->sacked_out + holes) > tp->packets_out) { - tp->sacked_out = tp->packets_out - holes; - tcp_update_reordering(sk, tp->packets_out + addend, 0); - } -} - -/* Emulate SACKs for SACKless connection: account for a new dupack. */ - -static void tcp_add_reno_sack(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - tp->sacked_out++; - tcp_check_reno_reordering(sk, 0); - tcp_sync_left_out(tp); -} - -/* Account for ACK, ACKing some data in Reno Recovery phase. */ - -static void tcp_remove_reno_sacks(struct sock *sk, int acked) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (acked > 0) { - /* One ACK acked hole. The rest eat duplicate ACKs. */ - if (acked-1 >= tp->sacked_out) - tp->sacked_out = 0; - else - tp->sacked_out -= acked-1; - } - tcp_check_reno_reordering(sk, acked); - tcp_sync_left_out(tp); -} - -static inline void tcp_reset_reno_sack(struct tcp_sock *tp) +static void tcp_verify_retransmit_hint(struct tcp_sock *tp, + struct sk_buff *skb) { - tp->sacked_out = 0; - tp->left_out = tp->lost_out; + if ((tp->retransmit_skb_hint != NULL) && + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) + tp->retransmit_skb_hint = NULL; } /* Mark head of queue up as lost. */ @@ -1786,20 +2021,13 @@ static void tcp_mark_head_lost(struct sock *sk, cnt += tcp_skb_pcount(skb); if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq)) break; - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); - - /* clear xmit_retransmit_queue hints - * if this is beyond hint */ - if (tp->retransmit_skb_hint != NULL && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) - tp->retransmit_skb_hint = NULL; - + tcp_verify_retransmit_hint(tp, skb); } } - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); } /* Account newly detected lost packet(s) */ @@ -1808,7 +2036,7 @@ static void tcp_update_scoreboard(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (IsFack(tp)) { + if (tcp_is_fack(tp)) { int lost = tp->fackets_out - tp->reordering; if (lost <= 0) lost = 1; @@ -1822,7 +2050,7 @@ static void tcp_update_scoreboard(struct sock *sk) * Hence, we can detect timed out packets during fast * retransmit without falling to slow start. */ - if (!IsReno(tp) && tcp_head_timedout(sk)) { + if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) { struct sk_buff *skb; skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint @@ -1837,19 +2065,13 @@ static void tcp_update_scoreboard(struct sock *sk) if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); - - /* clear xmit_retrans hint */ - if (tp->retransmit_skb_hint && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) - - tp->retransmit_skb_hint = NULL; + tcp_verify_retransmit_hint(tp, skb); } } tp->scoreboard_skb_hint = skb; - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); } } @@ -1880,7 +2102,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag) int decr = tp->snd_cwnd_cnt + 1; if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) || - (IsReno(tp) && !(flag&FLAG_NOT_DUP))) { + (tcp_is_reno(tp) && !(flag&FLAG_NOT_DUP))) { tp->snd_cwnd_cnt = decr&1; decr >>= 1; @@ -1913,7 +2135,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", msg, NIPQUAD(inet->daddr), ntohs(inet->dport), - tp->snd_cwnd, tp->left_out, + tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } @@ -1945,7 +2167,7 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) /* There is something screwy going on with the retrans hints after an undo */ - clear_all_retrans_hints(tp); + tcp_clear_all_retrans_hints(tp); } static inline int tcp_may_undo(struct tcp_sock *tp) @@ -1971,7 +2193,7 @@ static int tcp_try_undo_recovery(struct sock *sk) NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); tp->undo_marker = 0; } - if (tp->snd_una == tp->high_seq && IsReno(tp)) { + if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq * is ACKed. For Reno it is MUST to prevent false * fast retransmits (RFC2582). SACK TCP is safe. */ @@ -2001,7 +2223,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) { struct tcp_sock *tp = tcp_sk(sk); /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = IsReno(tp) || tp->fackets_out>tp->reordering; + int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering; if (tcp_may_undo(tp)) { /* Plain luck! Hole if filled with delayed @@ -2038,16 +2260,15 @@ static int tcp_try_undo_loss(struct sock *sk) TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } - clear_all_retrans_hints(tp); + tcp_clear_all_retrans_hints(tp); DBGUNDO(sk, "partial loss"); tp->lost_out = 0; - tp->left_out = tp->sacked_out; tcp_undo_cwr(sk, 1); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; - if (!IsReno(tp)) + if (tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); return 1; } @@ -2066,7 +2287,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); if (tp->retrans_out == 0) tp->retrans_stamp = 0; @@ -2077,7 +2298,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { int state = TCP_CA_Open; - if (tp->left_out || tp->retrans_out || tp->undo_marker) + if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) state = TCP_CA_Disorder; if (inet_csk(sk)->icsk_ca_state != state) { @@ -2130,7 +2351,7 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) * tcp_xmit_retransmit_queue(). */ static void -tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) +tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -2142,8 +2363,8 @@ tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) * 1. Reno does not count dupacks (sacked_out) automatically. */ if (!tp->packets_out) tp->sacked_out = 0; - /* 2. SACK counts snd_fack in packets inaccurately. */ - if (tp->sacked_out == 0) + + if (WARN_ON(!tp->sacked_out && tp->fackets_out)) tp->fackets_out = 0; /* Now state machine starts. @@ -2164,8 +2385,8 @@ tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } - /* D. Synchronize left_out to current state. */ - tcp_sync_left_out(tp); + /* D. Check consistency of the current state. */ + tcp_verify_left_out(tp); /* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ @@ -2194,14 +2415,14 @@ tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) if (!tp->undo_marker || /* For SACK case do not Open to allow to undo * catching for all duplicate ACKs. */ - IsReno(tp) || tp->snd_una != tp->high_seq) { + tcp_is_reno(tp) || tp->snd_una != tp->high_seq) { tp->undo_marker = 0; tcp_set_ca_state(sk, TCP_CA_Open); } break; case TCP_CA_Recovery: - if (IsReno(tp)) + if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); if (tcp_try_undo_recovery(sk)) return; @@ -2214,14 +2435,10 @@ tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: if (!(flag & FLAG_SND_UNA_ADVANCED)) { - if (IsReno(tp) && is_dupack) + if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); - } else { - int acked = prior_packets - tp->packets_out; - if (IsReno(tp)) - tcp_remove_reno_sacks(sk, acked); - do_lost = tcp_try_undo_partial(sk, acked); - } + } else + do_lost = tcp_try_undo_partial(sk, pkts_acked); break; case TCP_CA_Loss: if (flag&FLAG_DATA_ACKED) @@ -2235,7 +2452,7 @@ tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) return; /* Loss is undone; fall through to processing in Open state. */ default: - if (IsReno(tp)) { + if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) tcp_reset_reno_sack(tp); if (is_dupack) @@ -2263,7 +2480,7 @@ tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) /* Otherwise enter Recovery state */ - if (IsReno(tp)) + if (tcp_is_reno(tp)) NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY); else NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY); @@ -2361,8 +2578,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, /* Restart timer after forward progress on connection. * RFC2988 recommends to restart timer to now+rto. */ - -static void tcp_ack_packets_out(struct sock *sk) +static void tcp_rearm_rto(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2373,158 +2589,143 @@ static void tcp_ack_packets_out(struct sock *sk) } } -static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, - __u32 now, __s32 *seq_rtt) +/* If we get here, the whole TSO packet has not been acked. */ +static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - __u32 seq = tp->snd_una; - __u32 packets_acked; - int acked = 0; + u32 packets_acked; - /* If we get here, the whole TSO packet has not been - * acked. - */ - BUG_ON(!after(scb->end_seq, seq)); + BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)); packets_acked = tcp_skb_pcount(skb); - if (tcp_trim_head(sk, skb, seq - scb->seq)) + if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) return 0; packets_acked -= tcp_skb_pcount(skb); if (packets_acked) { - __u8 sacked = scb->sacked; - - acked |= FLAG_DATA_ACKED; - if (sacked) { - if (sacked & TCPCB_RETRANS) { - if (sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out -= packets_acked; - acked |= FLAG_RETRANS_DATA_ACKED; - *seq_rtt = -1; - } else if (*seq_rtt < 0) - *seq_rtt = now - scb->when; - if (sacked & TCPCB_SACKED_ACKED) - tp->sacked_out -= packets_acked; - if (sacked & TCPCB_LOST) - tp->lost_out -= packets_acked; - if (sacked & TCPCB_URG) { - if (tp->urg_mode && - !before(seq, tp->snd_up)) - tp->urg_mode = 0; - } - } else if (*seq_rtt < 0) - *seq_rtt = now - scb->when; - - if (tp->fackets_out) { - __u32 dval = min(tp->fackets_out, packets_acked); - tp->fackets_out -= dval; - } - /* hint's skb might be NULL but we don't need to care */ - tp->fastpath_cnt_hint -= min_t(u32, packets_acked, - tp->fastpath_cnt_hint); - tp->packets_out -= packets_acked; - BUG_ON(tcp_skb_pcount(skb) == 0); - BUG_ON(!before(scb->seq, scb->end_seq)); + BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)); } - return acked; + return packets_acked; } -/* Remove acknowledged frames from the retransmission queue. */ -static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) +/* Remove acknowledged frames from the retransmission queue. If our packet + * is before the ack sequence we can discard it as it's confirmed to have + * arrived at the other end. + */ +static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb; - __u32 now = tcp_time_stamp; - int acked = 0; + u32 now = tcp_time_stamp; + int fully_acked = 1; + int flag = 0; int prior_packets = tp->packets_out; - __s32 seq_rtt = -1; + s32 seq_rtt = -1; ktime_t last_ackt = net_invalid_timestamp(); - while ((skb = tcp_write_queue_head(sk)) && - skb != tcp_send_head(sk)) { + while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - __u8 sacked = scb->sacked; + u32 end_seq; + u32 packets_acked; + u8 sacked = scb->sacked; - /* If our packet is before the ack sequence we can - * discard it as it's confirmed to have arrived at - * the other end. - */ if (after(scb->end_seq, tp->snd_una)) { - if (tcp_skb_pcount(skb) > 1 && - after(tp->snd_una, scb->seq)) - acked |= tcp_tso_acked(sk, skb, - now, &seq_rtt); - break; - } + if (tcp_skb_pcount(skb) == 1 || + !after(tp->snd_una, scb->seq)) + break; - /* Initial outgoing SYN's get put onto the write_queue - * just like anything else we transmit. It is not - * true data, and if we misinform our callers that - * this ACK acks real data, we will erroneously exit - * connection startup slow start one packet too - * quickly. This is severely frowned upon behavior. - */ - if (!(scb->flags & TCPCB_FLAG_SYN)) { - acked |= FLAG_DATA_ACKED; + packets_acked = tcp_tso_acked(sk, skb); + if (!packets_acked) + break; + + fully_acked = 0; + end_seq = tp->snd_una; } else { - acked |= FLAG_SYN_ACKED; - tp->retrans_stamp = 0; + packets_acked = tcp_skb_pcount(skb); + end_seq = scb->end_seq; } /* MTU probing checks */ - if (icsk->icsk_mtup.probe_size) { - if (!after(tp->mtu_probe.probe_seq_end, TCP_SKB_CB(skb)->end_seq)) { - tcp_mtup_probe_success(sk, skb); - } + if (fully_acked && icsk->icsk_mtup.probe_size && + !after(tp->mtu_probe.probe_seq_end, scb->end_seq)) { + tcp_mtup_probe_success(sk, skb); } if (sacked) { if (sacked & TCPCB_RETRANS) { if (sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out -= tcp_skb_pcount(skb); - acked |= FLAG_RETRANS_DATA_ACKED; + tp->retrans_out -= packets_acked; + flag |= FLAG_RETRANS_DATA_ACKED; seq_rtt = -1; + if ((flag & FLAG_DATA_ACKED) || + (packets_acked > 1)) + flag |= FLAG_NONHEAD_RETRANS_ACKED; } else if (seq_rtt < 0) { seq_rtt = now - scb->when; - last_ackt = skb->tstamp; + if (fully_acked) + last_ackt = skb->tstamp; } + if (sacked & TCPCB_SACKED_ACKED) - tp->sacked_out -= tcp_skb_pcount(skb); + tp->sacked_out -= packets_acked; if (sacked & TCPCB_LOST) - tp->lost_out -= tcp_skb_pcount(skb); - if (sacked & TCPCB_URG) { - if (tp->urg_mode && - !before(scb->end_seq, tp->snd_up)) - tp->urg_mode = 0; - } + tp->lost_out -= packets_acked; + + if ((sacked & TCPCB_URG) && tp->urg_mode && + !before(end_seq, tp->snd_up)) + tp->urg_mode = 0; } else if (seq_rtt < 0) { seq_rtt = now - scb->when; - last_ackt = skb->tstamp; + if (fully_acked) + last_ackt = skb->tstamp; + } + tp->packets_out -= packets_acked; + + /* Initial outgoing SYN's get put onto the write_queue + * just like anything else we transmit. It is not + * true data, and if we misinform our callers that + * this ACK acks real data, we will erroneously exit + * connection startup slow start one packet too + * quickly. This is severely frowned upon behavior. + */ + if (!(scb->flags & TCPCB_FLAG_SYN)) { + flag |= FLAG_DATA_ACKED; + } else { + flag |= FLAG_SYN_ACKED; + tp->retrans_stamp = 0; } - tcp_dec_pcount_approx(&tp->fackets_out, skb); - tcp_packets_out_dec(tp, skb); + + if (!fully_acked) + break; + tcp_unlink_write_queue(skb, sk); sk_stream_free_skb(sk, skb); - clear_all_retrans_hints(tp); + tcp_clear_all_retrans_hints(tp); } - if (acked&FLAG_ACKED) { + if (flag & FLAG_ACKED) { u32 pkts_acked = prior_packets - tp->packets_out; const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - tcp_ack_update_rtt(sk, acked, seq_rtt); - tcp_ack_packets_out(sk); + tcp_ack_update_rtt(sk, flag, seq_rtt); + tcp_rearm_rto(sk); + + tp->fackets_out -= min(pkts_acked, tp->fackets_out); + /* hint's skb might be NULL but we don't need to care */ + tp->fastpath_cnt_hint -= min_t(u32, pkts_acked, + tp->fastpath_cnt_hint); + if (tcp_is_reno(tp)) + tcp_remove_reno_sacks(sk, pkts_acked); if (ca_ops->pkts_acked) { s32 rtt_us = -1; /* Is the ACK triggering packet unambiguous? */ - if (!(acked & FLAG_RETRANS_DATA_ACKED)) { + if (!(flag & FLAG_RETRANS_DATA_ACKED)) { /* High resolution needed and available? */ if (ca_ops->flags & TCP_CONG_RTT_STAMP && !ktime_equal(last_ackt, @@ -2543,8 +2744,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) BUG_TRAP((int)tp->sacked_out >= 0); BUG_TRAP((int)tp->lost_out >= 0); BUG_TRAP((int)tp->retrans_out >= 0); - if (!tp->packets_out && tp->rx_opt.sack_ok) { - const struct inet_connection_sock *icsk = inet_csk(sk); + if (!tp->packets_out && tcp_is_sack(tp)) { + icsk = inet_csk(sk); if (tp->lost_out) { printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out, icsk->icsk_ca_state); @@ -2563,7 +2764,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } #endif *seq_rtt_p = seq_rtt; - return acked; + return flag; } static void tcp_ack_probe(struct sock *sk) @@ -2658,6 +2859,7 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) { tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); tp->snd_cwnd_cnt = 0; + tp->bytes_acked = 0; TCP_ECN_queue_cwr(tp); tcp_moderate_cwnd(tp); } @@ -2712,18 +2914,22 @@ static int tcp_process_frto(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); /* Duplicate the behavior from Loss state (fastretrans_alert) */ if (flag&FLAG_DATA_ACKED) inet_csk(sk)->icsk_retransmits = 0; + if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || + ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED))) + tp->undo_marker = 0; + if (!before(tp->snd_una, tp->frto_highmark)) { tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); return 1; } - if (!IsSackFrto() || IsReno(tp)) { + if (!IsSackFrto() || tcp_is_reno(tp)) { /* RFC4138 shortcoming in step 2; should also have case c): * ACK isn't duplicate nor advances window, e.g., opposite dir * data, winupdate @@ -2782,6 +2988,8 @@ static int tcp_process_frto(struct sock *sk, int flag) break; } tp->frto_counter = 0; + tp->undo_marker = 0; + NET_INC_STATS_BH(LINUX_MIB_TCPSPURIOUSRTOS); } return 0; } @@ -2862,6 +3070,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, &seq_rtt); + /* Guarantee sacktag reordering detection against wrap-arounds */ + if (before(tp->frto_highmark, tp->snd_una)) + tp->frto_highmark = 0; if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); @@ -2870,7 +3081,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight, 0); - tcp_fastretrans_alert(sk, prior_packets, flag); + tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight, 1); @@ -3207,7 +3418,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) * Probably, we should reset in this case. For now drop them. */ __skb_queue_purge(&tp->out_of_order_queue); - if (tp->rx_opt.sack_ok) + if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); sk_stream_mem_reclaim(sk); @@ -3237,7 +3448,7 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_se static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) { - if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { if (before(seq, tp->rcv_nxt)) NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT); else @@ -3267,7 +3478,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) @@ -3583,7 +3794,7 @@ drop: if (!skb_peek(&tp->out_of_order_queue)) { /* Initial out of order segment, build 1 SACK. */ - if (tp->rx_opt.sack_ok) { + if (tcp_is_sack(tp)) { tp->rx_opt.num_sacks = 1; tp->rx_opt.dsack = 0; tp->rx_opt.eff_sacks = 1; @@ -3648,7 +3859,7 @@ drop: } add_sack: - if (tp->rx_opt.sack_ok) + if (tcp_is_sack(tp)) tcp_sack_new_ofo_skb(sk, seq, end_seq); } } @@ -3837,7 +4048,7 @@ static int tcp_prune_queue(struct sock *sk) * is in a sad state like this, we care only about integrity * of the connection not performance. */ - if (tp->rx_opt.sack_ok) + if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); sk_stream_mem_reclaim(sk); } @@ -4538,8 +4749,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->tcp_header_len = sizeof(struct tcphdr); } - if (tp->rx_opt.sack_ok && sysctl_tcp_fack) - tp->rx_opt.sack_ok |= 2; + if (tcp_is_sack(tp) && sysctl_tcp_fack) + tcp_enable_fack(tp); tcp_mtup_init(sk); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e089a978e128..38cf73a56731 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -62,6 +62,7 @@ #include <linux/init.h> #include <linux/times.h> +#include <net/net_namespace.h> #include <net/icmp.h> #include <net/inet_hashtables.h> #include <net/tcp.h> @@ -2249,7 +2250,7 @@ int tcp_proc_register(struct tcp_seq_afinfo *afinfo) afinfo->seq_fops->llseek = seq_lseek; afinfo->seq_fops->release = seq_release_private; - p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); + p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops); if (p) p->data = afinfo; else @@ -2261,7 +2262,7 @@ void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) { if (!afinfo) return; - proc_net_remove(afinfo->name); + proc_net_remove(&init_net, afinfo->name); memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } @@ -2469,6 +2470,5 @@ EXPORT_SYMBOL(tcp_v4_syn_recv_sock); EXPORT_SYMBOL(tcp_proc_register); EXPORT_SYMBOL(tcp_proc_unregister); #endif -EXPORT_SYMBOL(sysctl_local_port_range); EXPORT_SYMBOL(sysctl_tcp_low_latency); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a12b08fca5ad..b61b76847ad9 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -368,6 +368,12 @@ void tcp_twsk_destructor(struct sock *sk) EXPORT_SYMBOL_GPL(tcp_twsk_destructor); +static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, + struct request_sock *req) +{ + tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; +} + /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * @@ -399,7 +405,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; - newtp->left_out = 0; newtp->retrans_out = 0; newtp->sacked_out = 0; newtp->fackets_out = 0; @@ -440,7 +445,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { if (sysctl_tcp_fack) - newtp->rx_opt.sack_ok |= 2; + tcp_enable_fack(newtp); } newtp->window_clamp = req->window_clamp; newtp->rcv_ssthresh = req->rcv_wnd; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 666d8a58d14a..324b4207254a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -61,6 +61,18 @@ int sysctl_tcp_base_mss __read_mostly = 512; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +static inline void tcp_packets_out_inc(struct sock *sk, + const struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + int orig = tp->packets_out; + + tp->packets_out += tcp_skb_pcount(skb); + if (!orig) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, TCP_RTO_MAX); +} + static void update_send_head(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -269,6 +281,56 @@ static u16 tcp_select_window(struct sock *sk) return new_win; } +static inline void TCP_ECN_send_synack(struct tcp_sock *tp, + struct sk_buff *skb) +{ + TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; + if (!(tp->ecn_flags&TCP_ECN_OK)) + TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; +} + +static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tp->ecn_flags = 0; + if (sysctl_tcp_ecn) { + TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; + tp->ecn_flags = TCP_ECN_OK; + } +} + +static __inline__ void +TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) +{ + if (inet_rsk(req)->ecn_ok) + th->ece = 1; +} + +static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, + int tcp_header_len) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->ecn_flags & TCP_ECN_OK) { + /* Not-retransmitted data segment: set ECT and inject CWR. */ + if (skb->len != tcp_header_len && + !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { + INET_ECN_xmit(sk); + if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { + tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; + tcp_hdr(skb)->cwr = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + } + } else { + /* ACK or retransmitted segment: clear ECT|CE */ + INET_ECN_dontxmit(sk); + } + if (tp->ecn_flags & TCP_ECN_DEMAND_CWR) + tcp_hdr(skb)->ece = 1; + } +} + static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, __u32 tstamp, __u8 **md5_hash) { @@ -584,16 +646,32 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_type = 0; } else { - unsigned int factor; - - factor = skb->len + (mss_now - 1); - factor /= mss_now; - skb_shinfo(skb)->gso_segs = factor; + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); skb_shinfo(skb)->gso_size = mss_now; skb_shinfo(skb)->gso_type = sk->sk_gso_type; } } +/* When a modification to fackets out becomes necessary, we need to check + * skb is counted to fackets_out or not. Another important thing is to + * tweak SACK fastpath hint too as it would overwrite all changes unless + * hint is also changed. + */ +static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb, + int decr) +{ + if (!tp->sacked_out || tcp_is_reno(tp)) + return; + + if (!before(tp->highest_sack, TCP_SKB_CB(skb)->seq)) + tp->fackets_out -= decr; + + /* cnt_hint is "off-by-one" compared with fackets_out (see sacktag) */ + if (tp->fastpath_skb_hint != NULL && + after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) + tp->fastpath_cnt_hint -= decr; +} + /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. @@ -609,7 +687,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss BUG_ON(len > skb->len); - clear_all_retrans_hints(tp); + tcp_clear_retrans_hints_partial(tp); nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; @@ -634,6 +712,10 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; + if (tcp_is_sack(tp) && tp->sacked_out && + (TCP_SKB_CB(skb)->seq == tp->highest_sack)) + tp->highest_sack = TCP_SKB_CB(buff)->seq; + /* PSH and FIN should only be set in the second packet. */ flags = TCP_SKB_CB(skb)->flags; TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); @@ -682,32 +764,15 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) tp->retrans_out -= diff; - if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) tp->lost_out -= diff; - tp->left_out -= diff; - } - - if (diff > 0) { - /* Adjust Reno SACK estimate. */ - if (!tp->rx_opt.sack_ok) { - tp->sacked_out -= diff; - if ((int)tp->sacked_out < 0) - tp->sacked_out = 0; - tcp_sync_left_out(tp); - } - tp->fackets_out -= diff; - if ((int)tp->fackets_out < 0) - tp->fackets_out = 0; - /* SACK fastpath might overwrite it unless dealt with */ - if (tp->fastpath_skb_hint != NULL && - after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, - TCP_SKB_CB(skb)->seq)) { - tp->fastpath_cnt_hint -= diff; - if ((int)tp->fastpath_cnt_hint < 0) - tp->fastpath_cnt_hint = 0; - } + /* Adjust Reno SACK estimate. */ + if (tcp_is_reno(tp) && diff > 0) { + tcp_dec_pcount_approx_int(&tp->sacked_out, diff); + tcp_verify_left_out(tp); } + tcp_adjust_fackets_out(tp, skb, diff); } /* Link BUFF into the send queue. */ @@ -1654,8 +1719,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); - /* changing transmit queue under us so clear hints */ - clear_all_retrans_hints(tp); + if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out && + (TCP_SKB_CB(next_skb)->seq == tp->highest_sack))) + return; /* Ok. We will be able to collapse the packet. */ tcp_unlink_write_queue(next_skb, sk); @@ -1683,21 +1749,23 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS) tp->retrans_out -= tcp_skb_pcount(next_skb); - if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) { + if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) tp->lost_out -= tcp_skb_pcount(next_skb); - tp->left_out -= tcp_skb_pcount(next_skb); - } /* Reno case is special. Sigh... */ - if (!tp->rx_opt.sack_ok && tp->sacked_out) { + if (tcp_is_reno(tp) && tp->sacked_out) tcp_dec_pcount_approx(&tp->sacked_out, next_skb); - tp->left_out -= tcp_skb_pcount(next_skb); + + tcp_adjust_fackets_out(tp, next_skb, tcp_skb_pcount(next_skb)); + tp->packets_out -= tcp_skb_pcount(next_skb); + + /* changed transmit queue under us so clear hints */ + tcp_clear_retrans_hints_partial(tp); + /* manually tune sacktag skb hint */ + if (tp->fastpath_skb_hint == next_skb) { + tp->fastpath_skb_hint = skb; + tp->fastpath_cnt_hint -= tcp_skb_pcount(skb); } - /* Not quite right: it can be > snd.fack, but - * it is better to underestimate fackets. - */ - tcp_dec_pcount_approx(&tp->fackets_out, next_skb); - tcp_packets_out_dec(tp, next_skb); sk_stream_free_skb(sk, next_skb); } } @@ -1731,12 +1799,12 @@ void tcp_simple_retransmit(struct sock *sk) } } - clear_all_retrans_hints(tp); + tcp_clear_all_retrans_hints(tp); if (!lost) return; - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); /* Don't muck with the congestion window here. * Reason is that we do not increase amount of _data_ @@ -1846,6 +1914,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) printk(KERN_DEBUG "retrans_out leaked.\n"); } #endif + if (!tp->retrans_out) + tp->lost_retrans_low = tp->snd_nxt; TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; tp->retrans_out += tcp_skb_pcount(skb); @@ -1938,40 +2008,35 @@ void tcp_xmit_retransmit_queue(struct sock *sk) return; /* No forward retransmissions in Reno are possible. */ - if (!tp->rx_opt.sack_ok) + if (tcp_is_reno(tp)) return; /* Yeah, we have to make difficult choice between forward transmission * and retransmission... Both ways have their merits... * * For now we do not retransmit anything, while we have some new - * segments to send. + * segments to send. In the other cases, follow rule 3 for + * NextSeg() specified in RFC3517. */ if (tcp_may_send_now(sk)) return; - if (tp->forward_skb_hint) { + /* If nothing is SACKed, highest_sack in the loop won't be valid */ + if (!tp->sacked_out) + return; + + if (tp->forward_skb_hint) skb = tp->forward_skb_hint; - packet_cnt = tp->forward_cnt_hint; - } else{ + else skb = tcp_write_queue_head(sk); - packet_cnt = 0; - } tcp_for_write_queue_from(skb, sk) { if (skb == tcp_send_head(sk)) break; - tp->forward_cnt_hint = packet_cnt; tp->forward_skb_hint = skb; - /* Similar to the retransmit loop above we - * can pretend that the retransmitted SKB - * we send out here will be composed of one - * real MSS sized packet because tcp_retransmit_skb() - * will fragment it if necessary. - */ - if (++packet_cnt > tp->fackets_out) + if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) break; if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index b76398d1b454..87dd5bff315f 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -26,6 +26,7 @@ #include <linux/module.h> #include <linux/ktime.h> #include <linux/time.h> +#include <net/net_namespace.h> #include <net/tcp.h> @@ -228,7 +229,7 @@ static __init int tcpprobe_init(void) if (!tcp_probe.log) goto err0; - if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) + if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &tcpprobe_fops)) goto err0; ret = register_jprobe(&tcp_jprobe); @@ -238,7 +239,7 @@ static __init int tcpprobe_init(void) pr_info("TCP probe registered (port=%d)\n", port); return 0; err1: - proc_net_remove(procname); + proc_net_remove(&init_net, procname); err0: kfree(tcp_probe.log); return ret; @@ -247,7 +248,7 @@ module_init(tcpprobe_init); static __exit void tcpprobe_exit(void) { - proc_net_remove(procname); + proc_net_remove(&init_net, procname); unregister_jprobe(&tcp_jprobe); kfree(tcp_probe.log); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index e9b151b3a598..d8970ecfcfc8 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -315,7 +315,7 @@ static void tcp_retransmit_timer(struct sock *sk) if (icsk->icsk_retransmits == 0) { if (icsk->icsk_ca_state == TCP_CA_Disorder || icsk->icsk_ca_state == TCP_CA_Recovery) { - if (tp->rx_opt.sack_ok) { + if (tcp_is_sack(tp)) { if (icsk->icsk_ca_state == TCP_CA_Recovery) NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); else diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 69d4bd10f9c6..cb9fc58efb2f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -98,6 +98,7 @@ #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/icmp.h> #include <net/route.h> #include <net/checksum.h> @@ -113,9 +114,8 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -static int udp_port_rover; - -static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) +static inline int __udp_lib_lport_inuse(__u16 num, + const struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; @@ -132,11 +132,10 @@ static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) * @sk: socket struct in question * @snum: port number to look up * @udptable: hash list table, must be of UDP_HTABLE_SIZE - * @port_rover: pointer to record of last unallocated port * @saddr_comp: AF-dependent comparison of bound local IP addresses */ int __udp_lib_get_port(struct sock *sk, unsigned short snum, - struct hlist_head udptable[], int *port_rover, + struct hlist_head udptable[], int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2 ) ) { @@ -146,49 +145,56 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, int error = 1; write_lock_bh(&udp_hash_lock); - if (snum == 0) { - int best_size_so_far, best, result, i; - - if (*port_rover > sysctl_local_port_range[1] || - *port_rover < sysctl_local_port_range[0]) - *port_rover = sysctl_local_port_range[0]; - best_size_so_far = 32767; - best = result = *port_rover; - for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - int size; - - head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; - if (hlist_empty(head)) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] + - ((result - sysctl_local_port_range[0]) & - (UDP_HTABLE_SIZE - 1)); + + if (!snum) { + int i, low, high; + unsigned rover, best, best_size_so_far; + + inet_get_local_port_range(&low, &high); + + best_size_so_far = UINT_MAX; + best = rover = net_random() % (high - low) + low; + + /* 1st pass: look for empty (or shortest) hash chain */ + for (i = 0; i < UDP_HTABLE_SIZE; i++) { + int size = 0; + + head = &udptable[rover & (UDP_HTABLE_SIZE - 1)]; + if (hlist_empty(head)) goto gotit; - } - size = 0; + sk_for_each(sk2, node, head) { if (++size >= best_size_so_far) goto next; } best_size_so_far = size; - best = result; + best = rover; next: - ; + /* fold back if end of range */ + if (++rover > high) + rover = low + ((rover - low) + & (UDP_HTABLE_SIZE - 1)); + + } - result = best; - for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; - i++, result += UDP_HTABLE_SIZE) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] - + ((result - sysctl_local_port_range[0]) & - (UDP_HTABLE_SIZE - 1)); - if (! __udp_lib_lport_inuse(result, udptable)) - break; + + /* 2nd pass: find hole in shortest hash chain */ + rover = best; + for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { + if (! __udp_lib_lport_inuse(rover, udptable)) + goto gotit; + rover += UDP_HTABLE_SIZE; + if (rover > high) + rover = low + ((rover - low) + & (UDP_HTABLE_SIZE - 1)); } - if (i >= (1 << 16) / UDP_HTABLE_SIZE) - goto fail; + + + /* All ports in use! */ + goto fail; + gotit: - *port_rover = snum = result; + snum = rover; } else { head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; @@ -201,6 +207,7 @@ gotit: (*saddr_comp)(sk, sk2) ) goto fail; } + inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { @@ -217,7 +224,7 @@ fail: int udp_get_port(struct sock *sk, unsigned short snum, int (*scmp)(const struct sock *, const struct sock *)) { - return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); + return __udp_lib_get_port(sk, snum, udp_hash, scmp); } int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) @@ -1560,7 +1567,7 @@ int udp_proc_register(struct udp_seq_afinfo *afinfo) afinfo->seq_fops->llseek = seq_lseek; afinfo->seq_fops->release = seq_release_private; - p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); + p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops); if (p) p->data = afinfo; else @@ -1572,7 +1579,7 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) { if (!afinfo) return; - proc_net_remove(afinfo->name); + proc_net_remove(&init_net, afinfo->name); memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 820a477cfaa6..6c55828e41ba 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -9,7 +9,7 @@ extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); extern int __udp_lib_get_port(struct sock *sk, unsigned short snum, - struct hlist_head udptable[], int *port_rover, + struct hlist_head udptable[], int (*)(const struct sock*,const struct sock*)); extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index f34fd686a8f1..94977205abb4 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -16,12 +16,11 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly; struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; -static int udplite_port_rover; int udplite_get_port(struct sock *sk, unsigned short p, int (*c)(const struct sock *, const struct sock *)) { - return __udp_lib_get_port(sk, p, udplite_hash, &udplite_port_rover, c); + return __udp_lib_get_port(sk, p, udplite_hash, c); } static int udplite_v4_get_port(struct sock *sk, unsigned short snum) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 2fa108245413..e9bbfde19ac3 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -54,12 +54,14 @@ static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) int xfrm_nr = 0; int decaps = 0; int err = xfrm4_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq); + unsigned int nhoff = offsetof(struct iphdr, protocol); if (err != 0) goto drop; do { const struct iphdr *iph = ip_hdr(skb); + int nexthdr; if (xfrm_nr == XFRM_MAX_DEPTH) goto drop; @@ -82,9 +84,12 @@ static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) if (xfrm_state_check_expire(x)) goto drop_unlock; - if (x->type->input(x, skb)) + nexthdr = x->type->input(x, skb); + if (nexthdr <= 0) goto drop_unlock; + skb_network_header(skb)[nhoff] = nexthdr; + /* only the first xfrm gets the encap type */ encap_type = 0; diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index a73e710740c2..73d2338bec55 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -20,38 +20,33 @@ /* Add encapsulation header. * * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. - * The following fields in it shall be filled in by x->type->output: - * tot_len - * check - * - * On exit, skb->h will be set to the start of the payload to be processed - * by x->type->output and skb->nh will be set to the top IP header. */ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) { + struct ip_beet_phdr *ph; struct iphdr *iph, *top_iph; int hdrlen, optlen; iph = ip_hdr(skb); - skb->transport_header = skb->network_header; hdrlen = 0; optlen = iph->ihl * 4 - sizeof(*iph); if (unlikely(optlen)) hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); - skb_push(skb, x->props.header_len - IPV4_BEET_PHMAXLEN + hdrlen); - skb_reset_network_header(skb); - top_iph = ip_hdr(skb); - skb->transport_header += sizeof(*iph) - hdrlen; + skb_set_network_header(skb, IPV4_BEET_PHMAXLEN - x->props.header_len - + hdrlen); + skb->mac_header = skb->network_header + + offsetof(struct iphdr, protocol); + skb->transport_header = skb->network_header + sizeof(*iph); + + ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen); + top_iph = ip_hdr(skb); memmove(top_iph, iph, sizeof(*iph)); if (unlikely(optlen)) { - struct ip_beet_phdr *ph; - BUG_ON(optlen < 0); - ph = (struct ip_beet_phdr *)skb_transport_header(skb); ph->padlen = 4 - (optlen & 4); ph->hdrlen = optlen / 8; ph->nexthdr = top_iph->protocol; diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index 601047161ea6..fd840c7d75ea 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -17,18 +17,17 @@ * * The IP header will be moved forward to make space for the encapsulation * header. - * - * On exit, skb->h will be set to the start of the payload to be processed - * by x->type->output and skb->nh will be set to the top IP header. */ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); int ihl = iph->ihl * 4; + skb_set_network_header(skb, -x->props.header_len); + skb->mac_header = skb->network_header + + offsetof(struct iphdr, protocol); skb->transport_header = skb->network_header + ihl; - skb_push(skb, x->props.header_len); - skb_reset_network_header(skb); + __skb_pull(skb, ihl); memmove(skb_network_header(skb), iph, ihl); return 0; } diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 9963700e74c1..1ae9d32276f0 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -31,13 +31,7 @@ static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) /* Add encapsulation header. * - * The top IP header will be constructed per RFC 2401. The following fields - * in it shall be filled in by x->type->output: - * tot_len - * check - * - * On exit, skb->h will be set to the start of the payload to be processed - * by x->type->output and skb->nh will be set to the top IP header. + * The top IP header will be constructed per RFC 2401. */ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -47,10 +41,11 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) int flags; iph = ip_hdr(skb); - skb->transport_header = skb->network_header; - skb_push(skb, x->props.header_len); - skb_reset_network_header(skb); + skb_set_network_header(skb, -x->props.header_len); + skb->mac_header = skb->network_header + + offsetof(struct iphdr, protocol); + skb->transport_header = skb->network_header + sizeof(*iph); top_iph = ip_hdr(skb); top_iph->ihl = 5; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 44ef208a75cb..434ef302ba83 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -12,7 +12,6 @@ #include <linux/if_ether.h> #include <linux/kernel.h> #include <linux/skbuff.h> -#include <linux/spinlock.h> #include <linux/netfilter_ipv4.h> #include <net/ip.h> #include <net/xfrm.h> @@ -41,58 +40,32 @@ out: return ret; } -static int xfrm4_output_one(struct sk_buff *skb) +static inline int xfrm4_output_one(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; + struct iphdr *iph; int err; - if (skb->ip_summed == CHECKSUM_PARTIAL) { - err = skb_checksum_help(skb); - if (err) - goto error_nolock; - } - if (x->props.mode == XFRM_MODE_TUNNEL) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; } - do { - spin_lock_bh(&x->lock); - err = xfrm_state_check(x, skb); - if (err) - goto error; - - err = x->mode->output(x, skb); - if (err) - goto error; - - err = x->type->output(x, skb); - if (err) - goto error; - - x->curlft.bytes += skb->len; - x->curlft.packets++; + err = xfrm_output(skb); + if (err) + goto error_nolock; - spin_unlock_bh(&x->lock); - - if (!(skb->dst = dst_pop(dst))) { - err = -EHOSTUNREACH; - goto error_nolock; - } - dst = skb->dst; - x = dst->xfrm; - } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len); + ip_send_check(iph); IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; err = 0; out_exit: return err; -error: - spin_unlock_bh(&x->lock); error_nolock: kfree_skb(skb); goto out_exit; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 4ff8ed30024f..329825ca68fe 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -306,7 +306,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xdst = (struct xfrm_dst *)dst; if (xdst->u.rt.idev->dev == dev) { - struct in_device *loopback_idev = in_dev_get(&loopback_dev); + struct in_device *loopback_idev = in_dev_get(init_net.loopback_dev); BUG_ON(!loopback_idev); do { diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 9275c79119b6..1312417608e2 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -12,17 +12,13 @@ static int ipip_output(struct xfrm_state *x, struct sk_buff *skb) { - struct iphdr *iph = ip_hdr(skb); - - iph->tot_len = htons(skb->len); - ip_send_check(iph); - + skb_push(skb, -skb_network_offset(skb)); return 0; } static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) { - return 0; + return IPPROTO_IP; } static int ipip_init_state(struct xfrm_state *x) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 45b4c82148a0..52d10d213217 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -62,6 +62,7 @@ #include <linux/notifier.h> #include <linux/string.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/snmp.h> @@ -73,6 +74,7 @@ #include <net/tcp.h> #include <net/ip.h> #include <net/netlink.h> +#include <net/pkt_sched.h> #include <linux/if_tunnel.h> #include <linux/rtnetlink.h> @@ -212,6 +214,12 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +/* Check if a valid qdisc is available */ +static inline int addrconf_qdisc_ok(struct net_device *dev) +{ + return (dev->qdisc != &noop_qdisc); +} + static void addrconf_del_timer(struct inet6_ifaddr *ifp) { if (del_timer(&ifp->timer)) @@ -260,9 +268,15 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) sizeof(struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) goto err_icmp; + if (snmp_mib_init((void **)idev->stats.icmpv6msg, + sizeof(struct icmpv6msg_mib), + __alignof__(struct icmpv6msg_mib)) < 0) + goto err_icmpmsg; return 0; +err_icmpmsg: + snmp_mib_free((void **)idev->stats.icmpv6); err_icmp: snmp_mib_free((void **)idev->stats.ipv6); err_ip: @@ -271,6 +285,7 @@ err_ip: static int snmp6_free_dev(struct inet6_dev *idev) { + snmp_mib_free((void **)idev->stats.icmpv6msg); snmp_mib_free((void **)idev->stats.icmpv6); snmp_mib_free((void **)idev->stats.ipv6); return 0; @@ -376,7 +391,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) } #endif - if (netif_running(dev) && netif_carrier_ok(dev)) + if (netif_running(dev) && addrconf_qdisc_ok(dev)) ndev->if_flags |= IF_READY; ipv6_mc_init_dev(ndev); @@ -449,7 +464,7 @@ static void addrconf_forward_change(void) struct inet6_dev *idev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { @@ -911,7 +926,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct inet6_dev *idev; struct inet6_ifaddr *ifa; @@ -1857,7 +1872,7 @@ int addrconf_set_dstaddr(void __user *arg) if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) goto err_exit; - dev = __dev_get_by_index(ireq.ifr6_ifindex); + dev = __dev_get_by_index(&init_net, ireq.ifr6_ifindex); err = -ENODEV; if (dev == NULL) @@ -1888,7 +1903,7 @@ int addrconf_set_dstaddr(void __user *arg) if (err == 0) { err = -ENOBUFS; - if ((dev = __dev_get_by_name(p.name)) == NULL) + if ((dev = __dev_get_by_name(&init_net, p.name)) == NULL) goto err_exit; err = dev_open(dev); } @@ -1918,7 +1933,7 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; - if ((dev = __dev_get_by_index(ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) return -ENODEV; if ((idev = addrconf_add_dev(dev)) == NULL) @@ -1969,7 +1984,7 @@ static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) struct inet6_dev *idev; struct net_device *dev; - if ((dev = __dev_get_by_index(ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) return -ENODEV; if ((idev = __in6_dev_get(dev)) == NULL) @@ -2064,7 +2079,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) return; } - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct in_device * in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; @@ -2220,12 +2235,12 @@ static void ip6_tnl_add_linklocal(struct inet6_dev *idev) /* first try to inherit the link-local address from the link device */ if (idev->dev->iflink && - (link_dev = __dev_get_by_index(idev->dev->iflink))) { + (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } /* then try to inherit it from any device */ - for_each_netdev(link_dev) { + for_each_netdev(&init_net, link_dev) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } @@ -2258,6 +2273,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, int run_pending = 0; int err; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch(event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { @@ -2272,7 +2290,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; if (event == NETDEV_UP) { - if (!netif_carrier_ok(dev)) { + if (!addrconf_qdisc_ok(dev)) { /* device is not ready yet. */ printk(KERN_INFO "ADDRCONF(NETDEV_UP): %s: " @@ -2284,7 +2302,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev) idev->if_flags |= IF_READY; } else { - if (!netif_carrier_ok(dev)) { + if (!addrconf_qdisc_ok(dev)) { /* device is still not ready. */ break; } @@ -2399,7 +2417,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) ASSERT_RTNL(); - if (dev == &loopback_dev && how == 1) + if (dev == init_net.loopback_dev && how == 1) how = 0; rt6_ifdown(dev); @@ -2491,9 +2509,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) else ipv6_mc_down(idev); - /* Step 5: netlink notification of this interface */ idev->tstamp = jiffies; - inet6_ifinfo_notify(RTM_DELLINK, idev); /* Shot the device (if unregistered) */ @@ -2797,24 +2813,8 @@ static const struct seq_operations if6_seq_ops = { static int if6_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct if6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &if6_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &if6_seq_ops, + sizeof(struct if6_iter_state)); } static const struct file_operations if6_fops = { @@ -2827,14 +2827,14 @@ static const struct file_operations if6_fops = { int __init if6_proc_init(void) { - if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops)) + if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops)) return -ENOMEM; return 0; } void if6_proc_exit(void) { - proc_net_remove("if_inet6"); + proc_net_remove(&init_net, "if_inet6"); } #endif /* CONFIG_PROC_FS */ @@ -3080,7 +3080,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) valid_lft = INFINITY_LIFE_TIME; } - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(&init_net, ifm->ifa_index); if (dev == NULL) return -ENODEV; @@ -3264,7 +3264,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, s_ip_idx = ip_idx = cb->args[1]; idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -3373,7 +3373,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, ifm = nlmsg_data(nlh); if (ifm->ifa_index) - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(&init_net, ifm->ifa_index); if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { err = -EADDRNOTAVAIL; @@ -3585,7 +3585,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) read_lock(&dev_base_lock); idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if ((idev = in6_dev_get(dev)) == NULL) @@ -4203,16 +4203,19 @@ int __init addrconf_init(void) * device and it being up should be removed. */ rtnl_lock(); - if (!ipv6_add_dev(&loopback_dev)) + if (!ipv6_add_dev(init_net.loopback_dev)) err = -ENOMEM; rtnl_unlock(); if (err) return err; - ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); + ip6_null_entry.u.dst.dev = init_net.loopback_dev; + ip6_null_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev); - ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev); + ip6_prohibit_entry.u.dst.dev = init_net.loopback_dev; + ip6_prohibit_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); + ip6_blk_hole_entry.u.dst.dev = init_net.loopback_dev; + ip6_blk_hole_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); #endif register_netdevice_notifier(&ipv6_dev_notf); @@ -4262,12 +4265,12 @@ void __exit addrconf_cleanup(void) * clean dev list. */ - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (__in6_dev_get(dev) == NULL) continue; addrconf_ifdown(dev, 1); } - addrconf_ifdown(&loopback_dev, 2); + addrconf_ifdown(init_net.loopback_dev, 2); /* * Check hash table. @@ -4293,6 +4296,6 @@ void __exit addrconf_cleanup(void) rtnl_unlock(); #ifdef CONFIG_PROC_FS - proc_net_remove("if_inet6"); + proc_net_remove(&init_net, "if_inet6"); #endif } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b5f96372ad73..bc929381fa46 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -81,7 +81,7 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } -static int inet6_create(struct socket *sock, int protocol) +static int inet6_create(struct net *net, struct socket *sock, int protocol) { struct inet_sock *inet; struct ipv6_pinfo *np; @@ -94,6 +94,9 @@ static int inet6_create(struct socket *sock, int protocol) int try_loading_module = 0; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM && !inet_ehash_secret) @@ -159,7 +162,7 @@ lookup_protocol: BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, 1); if (sk == NULL) goto out; @@ -299,7 +302,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EINVAL; goto out; } - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -716,6 +719,9 @@ static int __init init_ipv6_mibs(void) if (snmp_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) goto err_icmp_mib; + if (snmp_mib_init((void **)icmpv6msg_statistics, + sizeof (struct icmpv6msg_mib), __alignof__(struct icmpv6_mib)) < 0) + goto err_icmpmsg_mib; if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib), __alignof__(struct udp_mib)) < 0) goto err_udp_mib; @@ -727,6 +733,8 @@ static int __init init_ipv6_mibs(void) err_udplite_mib: snmp_mib_free((void **)udp_stats_in6); err_udp_mib: + snmp_mib_free((void **)icmpv6msg_statistics); +err_icmpmsg_mib: snmp_mib_free((void **)icmpv6_statistics); err_icmp_mib: snmp_mib_free((void **)ipv6_statistics); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 53f46ab6af70..f9f689162692 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -29,6 +29,7 @@ #include <net/ah.h> #include <linux/crypto.h> #include <linux/pfkeyv2.h> +#include <linux/spinlock.h> #include <linux/string.h> #include <net/icmp.h> #include <net/ipv6.h> @@ -235,11 +236,12 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) char hdrs[0]; } *tmp_ext; - top_iph = (struct ipv6hdr *)skb->data; + skb_push(skb, -skb_network_offset(skb)); + top_iph = ipv6_hdr(skb); top_iph->payload_len = htons(skb->len - sizeof(*top_iph)); - nexthdr = *skb_network_header(skb); - *skb_network_header(skb) = IPPROTO_AH; + nexthdr = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_AH; /* When there are no extension headers, we only need to save the first * 8 bytes of the base IP header. @@ -268,7 +270,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) goto error_free_iph; } - ah = (struct ip_auth_hdr *)skb_transport_header(skb); + ah = ip_auth_hdr(skb); ah->nexthdr = nexthdr; top_iph->priority = 0; @@ -278,19 +280,19 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->hop_limit = 0; ahp = x->data; - ah->hdrlen = (XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + - ahp->icv_trunc_len) >> 2) - 2; + ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; ah->reserved = 0; ah->spi = x->id.spi; - ah->seq_no = htonl(++x->replay.oseq); - xfrm_aevent_doreplay(x); + ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq); + + spin_lock_bh(&x->lock); err = ah_mac_digest(ahp, skb, ah->auth_data); - if (err) - goto error_free_iph; memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); + spin_unlock_bh(&x->lock); - err = 0; + if (err) + goto error_free_iph; memcpy(top_iph, tmp_base, sizeof(tmp_base)); if (tmp_ext) { @@ -324,7 +326,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) * There is offset of AH before IPv6 header after the process. */ - struct ipv6_auth_hdr *ah; + struct ip_auth_hdr *ah; struct ipv6hdr *ip6h; struct ah_data *ahp; unsigned char *tmp_hdr = NULL; @@ -343,13 +345,13 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) goto out; hdr_len = skb->data - skb_network_header(skb); - ah = (struct ipv6_auth_hdr*)skb->data; + ah = (struct ip_auth_hdr *)skb->data; ahp = x->data; nexthdr = ah->nexthdr; ah_hlen = (ah->hdrlen + 2) << 2; - if (ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_full_len) && - ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len)) + if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && + ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) goto out; if (!pskb_may_pull(skb, ah_hlen)) @@ -429,10 +431,6 @@ static int ah6_init_state(struct xfrm_state *x) if (!x->aalg) goto error; - /* null auth can use a zero length key */ - if (x->aalg->alg_key_len > 512) - goto error; - if (x->encap) goto error; @@ -440,14 +438,13 @@ static int ah6_init_state(struct xfrm_state *x) if (ahp == NULL) return -ENOMEM; - ahp->key = x->aalg->alg_key; - ahp->key_len = (x->aalg->alg_key_len+7)/8; tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) goto error; ahp->tfm = tfm; - if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len)) + if (crypto_hash_setkey(tfm, x->aalg->alg_key, + (x->aalg->alg_key_len + 7) / 8)) goto error; /* @@ -476,7 +473,8 @@ static int ah6_init_state(struct xfrm_state *x) if (!ahp->work_icv) goto error; - x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len); + x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + + ahp->icv_trunc_len); if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); x->data = ahp; @@ -511,6 +509,7 @@ static struct xfrm_type ah6_type = .description = "AH6", .owner = THIS_MODULE, .proto = IPPROTO_AH, + .flags = XFRM_TYPE_REPLAY_PROT, .init_state = ah6_init_state, .destructor = ah6_destroy, .input = ah6_input, diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index b8c533fbdb63..f915c4df9820 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -30,6 +30,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/snmp.h> @@ -111,10 +112,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) } else { /* router, no matching interface: just pick one */ - dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK); + dev = dev_get_by_flags(&init_net, IFF_UP, IFF_UP|IFF_LOOPBACK); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev == NULL) { err = -ENODEV; @@ -195,7 +196,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr) write_unlock_bh(&ipv6_sk_ac_lock); - dev = dev_get_by_index(pac->acl_ifindex); + dev = dev_get_by_index(&init_net, pac->acl_ifindex); if (dev) { ipv6_dev_ac_dec(dev, &pac->acl_addr); dev_put(dev); @@ -223,7 +224,7 @@ void ipv6_sock_ac_close(struct sock *sk) if (pac->acl_ifindex != prev_index) { if (dev) dev_put(dev); - dev = dev_get_by_index(pac->acl_ifindex); + dev = dev_get_by_index(&init_net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -413,7 +414,7 @@ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) break; read_unlock_bh(&idev->lock); in6_dev_put(idev); - return aca != 0; + return aca != NULL; } return 0; } @@ -428,7 +429,7 @@ int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr) if (dev) return ipv6_chk_acast_dev(dev, addr); read_lock(&dev_base_lock); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) if (ipv6_chk_acast_dev(dev, addr)) { found = 1; break; @@ -452,7 +453,7 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) struct ac6_iter_state *state = ac6_seq_private(seq); state->idev = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -548,24 +549,8 @@ static const struct seq_operations ac6_seq_ops = { static int ac6_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct ac6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &ac6_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &ac6_seq_ops, + sizeof(struct ac6_iter_state)); } static const struct file_operations ac6_seq_fops = { @@ -578,7 +563,7 @@ static const struct file_operations ac6_seq_fops = { int __init ac6_proc_init(void) { - if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops)) + if (!proc_net_fops_create(&init_net, "anycast6", S_IRUGO, &ac6_seq_fops)) return -ENOMEM; return 0; @@ -586,7 +571,7 @@ int __init ac6_proc_init(void) void ac6_proc_exit(void) { - proc_net_remove("anycast6"); + proc_net_remove(&init_net, "anycast6"); } #endif diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index fe0f49024a0a..2ed689ac449e 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -544,7 +544,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, if (!src_info->ipi6_ifindex) return -EINVAL; else { - dev = dev_get_by_index(src_info->ipi6_ifindex); + dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex); if (!dev) return -ENODEV; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 2db31ce3c7e6..9eb928598351 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -34,6 +34,7 @@ #include <linux/kernel.h> #include <linux/pfkeyv2.h> #include <linux/random.h> +#include <linux/spinlock.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> @@ -42,8 +43,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - struct ipv6hdr *top_iph; - struct ipv6_esp_hdr *esph; + struct ip_esp_hdr *esph; struct crypto_blkcipher *tfm; struct blkcipher_desc desc; struct sk_buff *trailer; @@ -53,13 +53,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int nfrags; u8 *tail; struct esp_data *esp = x->data; - int hdr_len = (skb_transport_offset(skb) + - sizeof(*esph) + esp->conf.ivlen); - /* Strip IP+ESP header. */ - __skb_pull(skb, hdr_len); - - /* Now skb is pure payload to encrypt */ + /* skb is pure payload to encrypt */ err = -ENOMEM; /* Round to block size */ @@ -88,15 +83,15 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) tail[clen-skb->len - 2] = (clen - skb->len) - 2; pskb_put(skb, trailer, clen - skb->len); - top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len); - esph = (struct ipv6_esp_hdr *)skb_transport_header(skb); - top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph)); - *(skb_tail_pointer(trailer) - 1) = *skb_network_header(skb); - *skb_network_header(skb) = IPPROTO_ESP; + skb_push(skb, -skb_network_offset(skb)); + esph = ip_esp_hdr(skb); + *(skb_tail_pointer(trailer) - 1) = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_ESP; esph->spi = x->id.spi; - esph->seq_no = htonl(++x->replay.oseq); - xfrm_aevent_doreplay(x); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); + + spin_lock_bh(&x->lock); if (esp->conf.ivlen) { if (unlikely(!esp->conf.ivinitted)) { @@ -112,7 +107,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) if (unlikely(nfrags > ESP_NUM_FAST_SG)) { sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); if (!sg) - goto error; + goto unlock; } skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); @@ -121,7 +116,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) } while (0); if (unlikely(err)) - goto error; + goto unlock; if (esp->conf.ivlen) { memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); @@ -134,6 +129,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); } +unlock: + spin_unlock_bh(&x->lock); + error: return err; } @@ -141,19 +139,19 @@ error: static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *iph; - struct ipv6_esp_hdr *esph; + struct ip_esp_hdr *esph; struct esp_data *esp = x->data; struct crypto_blkcipher *tfm = esp->conf.tfm; struct blkcipher_desc desc = { .tfm = tfm }; struct sk_buff *trailer; int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); int alen = esp->auth.icv_trunc_len; - int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen; + int elen = skb->len - sizeof(*esph) - esp->conf.ivlen - alen; int hdr_len = skb_network_header_len(skb); int nfrags; int ret = 0; - if (!pskb_may_pull(skb, sizeof(struct ipv6_esp_hdr))) { + if (!pskb_may_pull(skb, sizeof(*esph))) { ret = -EINVAL; goto out; } @@ -188,7 +186,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; - esph = (struct ipv6_esp_hdr*)skb->data; + esph = (struct ip_esp_hdr *)skb->data; iph = ipv6_hdr(skb); /* Get ivec. This can be wrong, check against another impls. */ @@ -207,7 +205,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } } - skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen); + skb_to_sgvec(skb, sg, sizeof(*esph) + esp->conf.ivlen, elen); ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); @@ -259,7 +257,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; - struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset); + struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset); struct xfrm_state *x; if (type != ICMPV6_DEST_UNREACH && @@ -297,11 +295,6 @@ static int esp6_init_state(struct xfrm_state *x) struct esp_data *esp = NULL; struct crypto_blkcipher *tfm; - /* null auth and encryption can have zero length keys */ - if (x->aalg) { - if (x->aalg->alg_key_len > 512) - goto error; - } if (x->ealg == NULL) goto error; @@ -316,15 +309,14 @@ static int esp6_init_state(struct xfrm_state *x) struct xfrm_algo_desc *aalg_desc; struct crypto_hash *hash; - esp->auth.key = x->aalg->alg_key; - esp->auth.key_len = (x->aalg->alg_key_len+7)/8; hash = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(hash)) goto error; esp->auth.tfm = hash; - if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len)) + if (crypto_hash_setkey(hash, x->aalg->alg_key, + (x->aalg->alg_key_len + 7) / 8)) goto error; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); @@ -346,8 +338,6 @@ static int esp6_init_state(struct xfrm_state *x) if (!esp->auth.work_icv) goto error; } - esp->conf.key = x->ealg->alg_key; - esp->conf.key_len = (x->ealg->alg_key_len+7)/8; tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) goto error; @@ -360,9 +350,10 @@ static int esp6_init_state(struct xfrm_state *x) goto error; esp->conf.ivinitted = 0; } - if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) + if (crypto_blkcipher_setkey(tfm, x->ealg->alg_key, + (x->ealg->alg_key_len + 7) / 8)) goto error; - x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; + x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); x->data = esp; @@ -380,6 +371,7 @@ static struct xfrm_type esp6_type = .description = "ESP6", .owner = THIS_MODULE, .proto = IPPROTO_ESP, + .flags = XFRM_TYPE_REPLAY_PROT, .init_state = esp6_init_state, .destructor = esp6_destroy, .get_mtu = esp6_get_mtu, diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 53b3998a486c..706622af206f 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -50,8 +50,6 @@ static struct fib6_rule local_rule = { }, }; -static LIST_HEAD(fib6_rules); - struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, pol_lookup_t lookup) { @@ -268,14 +266,14 @@ static struct fib_rules_ops fib6_rules_ops = { .nlmsg_payload = fib6_rule_nlmsg_payload, .nlgroup = RTNLGRP_IPV6_RULE, .policy = fib6_rule_policy, - .rules_list = &fib6_rules, + .rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list), .owner = THIS_MODULE, }; void __init fib6_rules_init(void) { - list_add_tail(&local_rule.common.list, &fib6_rules); - list_add_tail(&main_rule.common.list, &fib6_rules); + list_add_tail(&local_rule.common.list, &fib6_rules_ops.rules_list); + list_add_tail(&main_rule.common.list, &fib6_rules_ops.rules_list); fib_rules_register(&fib6_rules_ops); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6a6714d154ed..47b8ce232e84 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -69,6 +69,8 @@ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly; EXPORT_SYMBOL(icmpv6_statistics); +DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics) __read_mostly; +EXPORT_SYMBOL(icmpv6msg_statistics); /* * The ICMP socket(s). This is the most convenient way to flow control @@ -456,8 +458,6 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, } err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr)); - if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB) - ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH); ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); out_put: @@ -547,9 +547,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) } err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES); - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); - out_put: if (likely(idev != NULL)) in6_dev_put(idev); @@ -656,10 +653,7 @@ static int icmpv6_rcv(struct sk_buff **pskb) type = hdr->icmp6_type; - if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB) - ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH); - else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT) - ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST); + ICMP6MSGIN_INC_STATS_BH(idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 116f94a49071..25b931709749 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -139,6 +139,41 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); +static inline +void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + __ip6_dst_store(sk, dst, daddr, saddr); + +#ifdef CONFIG_XFRM + if (dst) { + struct rt6_info *rt = (struct rt6_info *)dst; + rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid); + } +#endif +} + +static inline +struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) +{ + struct dst_entry *dst; + + dst = __sk_dst_check(sk, cookie); + +#ifdef CONFIG_XFRM + if (dst) { + struct rt6_info *rt = (struct rt6_info *)dst; + if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) { + sk->sk_dst_cache = NULL; + dst_release(dst); + dst = NULL; + } + } +#endif + + return dst; +} + int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) { struct sock *sk = skb->sk; @@ -166,7 +201,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) final_p = &final; } - dst = __sk_dst_check(sk, np->dst_cookie); + dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (dst == NULL) { int err = ip6_dst_lookup(sk, &dst, &fl); @@ -186,7 +221,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) return err; } - __ip6_dst_store(sk, dst, NULL, NULL); + __inet6_csk_dst_store(sk, dst, NULL, NULL); } skb->dst = dst_clone(dst); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index ae6b0e7eb488..1c2c27655435 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -254,18 +254,18 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, int ret; if (snum == 0) { - const int low = sysctl_local_port_range[0]; - const int high = sysctl_local_port_range[1]; - const int range = high - low; - int i, port; + int i, port, low, high, remaining; static u32 hint; const u32 offset = hint + inet6_sk_port_offset(sk); struct hlist_node *node; struct inet_timewait_sock *tw = NULL; + inet_get_local_port_range(&low, &high); + remaining = high - low; + local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; + for (i = 1; i <= remaining; i++) { + port = low + (i + offset) % remaining; head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; spin_lock(&head->lock); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6a612a701eaa..946cf389ab95 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1313,7 +1313,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) { int res; struct rt6_info *rt; - struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w; + struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { res = c->func(rt, c->arg); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 413a4ebb195c..217d60f9fc80 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -21,6 +21,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/ipv6.h> @@ -657,24 +658,8 @@ static const struct seq_operations ip6fl_seq_ops = { static int ip6fl_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct ip6fl_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &ip6fl_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &ip6fl_seq_ops, + sizeof(struct ip6fl_iter_state)); } static const struct file_operations ip6fl_seq_fops = { @@ -690,7 +675,7 @@ static const struct file_operations ip6fl_seq_fops = { void ip6_flowlabel_init(void) { #ifdef CONFIG_PROC_FS - proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); + proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); #endif } @@ -698,6 +683,6 @@ void ip6_flowlabel_cleanup(void) { del_timer(&ip6_fl_gc_timer); #ifdef CONFIG_PROC_FS - proc_net_remove("ip6_flowlabel"); + proc_net_remove(&init_net, "ip6_flowlabel"); #endif } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 30a5cb1b203e..9149fc239759 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -61,6 +61,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt u32 pkt_len; struct inet6_dev *idev; + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if (skb->pkt_type == PACKET_OTHERHOST) { kfree_skb(skb); return 0; @@ -86,7 +91,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt * * BTW, when we send a packet for our own local address on a * non-loopback interface (e.g. ethX), it is being delivered - * via the loopback interface (lo) here; skb->dev = &loopback_dev. + * via the loopback interface (lo) here; skb->dev = loopback_dev. * It, however, should be considered as if it is being * arrived via the sending interface (ethX), because of the * nature of scoping architecture. --yoshfuji diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 26de3c0ea31e..011082ed921a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -441,8 +441,10 @@ int ip6_forward(struct sk_buff *skb) /* IPv6 specs say nothing about it, but it is clear that we cannot send redirects to source routed frames. + We don't send redirects to frames decapsulated from IPsec. */ - if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) { + if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && + !skb->sp) { struct in6_addr *target = NULL; struct rt6_info *rt; struct neighbour *n = dst->neighbour; @@ -1397,6 +1399,13 @@ int ip6_push_pending_frames(struct sock *sk) skb->dst = dst_clone(&rt->u.dst); IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); + if (proto == IPPROTO_ICMPV6) { + struct inet6_dev *idev = ip6_dst_idev(skb->dst); + + ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type); + ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); + } + err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (err) { if (err > 0) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ca774d8e3be3..2320cc27ff9e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -235,7 +235,7 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p) int i; for (i = 1; i < IP6_TNL_MAX; i++) { sprintf(name, "ip6tnl%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i == IP6_TNL_MAX) @@ -650,7 +650,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) struct net_device *ldev = NULL; if (p->link) - ldev = dev_get_by_index(p->link); + ldev = dev_get_by_index(&init_net, p->link); if ((ipv6_addr_is_multicast(&p->laddr) || likely(ipv6_chk_addr(&p->laddr, ldev, 0))) && @@ -786,7 +786,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) struct net_device *ldev = NULL; if (p->link) - ldev = dev_get_by_index(p->link); + ldev = dev_get_by_index(&init_net, p->link); if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0))) printk(KERN_WARNING @@ -1313,7 +1313,6 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) static void ip6_tnl_dev_setup(struct net_device *dev) { - SET_MODULE_OWNER(dev); dev->uninit = ip6_tnl_dev_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ip6_tnl_xmit; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 473f165310ea..28fc8edfdc3a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -65,8 +65,7 @@ static LIST_HEAD(ipcomp6_tfms_list); static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -ENOMEM; - struct ipv6hdr *iph; - struct ipv6_comp_hdr *ipch; + struct ip_comp_hdr *ipch; int plen, dlen; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; @@ -79,7 +78,6 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; /* Remove ipcomp header and decompress original payload */ - iph = ipv6_hdr(skb); ipch = (void *)skb->data; skb->transport_header = skb->network_header + sizeof(*ipch); __skb_pull(skb, sizeof(*ipch)); @@ -94,12 +92,10 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) tfm = *per_cpu_ptr(ipcd->tfms, cpu); err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); - if (err) { - err = -EINVAL; + if (err) goto out_put_cpu; - } - if (dlen < (plen + sizeof(struct ipv6_comp_hdr))) { + if (dlen < (plen + sizeof(*ipch))) { err = -EINVAL; goto out_put_cpu; } @@ -123,17 +119,15 @@ out: static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - struct ipv6hdr *top_iph; - struct ipv6_comp_hdr *ipch; + struct ip_comp_hdr *ipch; struct ipcomp_data *ipcd = x->data; int plen, dlen; u8 *start, *scratch; struct crypto_comp *tfm; int cpu; - int hdr_len = skb_transport_offset(skb); /* check whether datagram len is larger than threshold */ - if ((skb->len - hdr_len) < ipcd->threshold) { + if (skb->len < ipcd->threshold) { goto out_ok; } @@ -141,35 +135,33 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb) goto out_ok; /* compression */ - plen = skb->len - hdr_len; + plen = skb->len; dlen = IPCOMP_SCRATCH_SIZE; - start = skb_transport_header(skb); + start = skb->data; cpu = get_cpu(); scratch = *per_cpu_ptr(ipcomp6_scratches, cpu); tfm = *per_cpu_ptr(ipcd->tfms, cpu); err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - if (err || (dlen + sizeof(struct ipv6_comp_hdr)) >= plen) { + if (err || (dlen + sizeof(*ipch)) >= plen) { put_cpu(); goto out_ok; } memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); put_cpu(); - pskb_trim(skb, hdr_len + dlen + sizeof(struct ip_comp_hdr)); + pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); /* insert ipcomp header and replace datagram */ - top_iph = (struct ipv6hdr *)skb->data; - - top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); - - ipch = (struct ipv6_comp_hdr *)start; - ipch->nexthdr = *skb_network_header(skb); + ipch = ip_comp_hdr(skb); + ipch->nexthdr = *skb_mac_header(skb); ipch->flags = 0; ipch->cpi = htons((u16 )ntohl(x->id.spi)); - *skb_network_header(skb) = IPPROTO_COMP; + *skb_mac_header(skb) = IPPROTO_COMP; out_ok: + skb_push(skb, -skb_network_offset(skb)); + return 0; } @@ -178,7 +170,8 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { __be32 spi; struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; - struct ipv6_comp_hdr *ipcomph = (struct ipv6_comp_hdr*)(skb->data+offset); + struct ip_comp_hdr *ipcomph = + (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 6b038aa72e88..1334fc174bcf 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -249,7 +249,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } if (ipv6_only_sock(sk) || - !(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED)) { + !ipv6_addr_v4mapped(&np->daddr)) { retv = -EADDRNOTAVAIL; break; } @@ -539,12 +539,15 @@ done: case IPV6_MULTICAST_IF: if (sk->sk_type == SOCK_STREAM) goto e_inval; - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) - goto e_inval; - if (__dev_get_by_index(val) == NULL) { - retv = -ENODEV; - break; + if (val) { + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) + goto e_inval; + + if (__dev_get_by_index(&init_net, val) == NULL) { + retv = -ENODEV; + break; + } } np->mcast_oif = val; retv = 0; @@ -663,7 +666,7 @@ done: break; } gsf = kmalloc(optlen,GFP_KERNEL); - if (gsf == 0) { + if (!gsf) { retv = -ENOBUFS; break; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ae9881832a7e..331d728c2035 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -49,6 +49,7 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/snmp.h> @@ -214,7 +215,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev == NULL) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); @@ -265,7 +266,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) *lnk = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) { + if ((dev = dev_get_by_index(&init_net, mc_lst->ifindex)) != NULL) { struct inet6_dev *idev = in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); @@ -300,7 +301,7 @@ static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex) dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (!dev) return NULL; @@ -331,7 +332,7 @@ void ipv6_sock_mc_close(struct sock *sk) np->ipv6_mc_list = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(mc_lst->ifindex); + dev = dev_get_by_index(&init_net, mc_lst->ifindex); if (dev) { struct inet6_dev *idev = in6_dev_get(dev); @@ -1406,7 +1407,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) /* we assume size > sizeof(ra) here */ skb = sock_alloc_send_skb(sk, size + LL_RESERVED_SPACE(dev), 1, &err); - if (skb == 0) + if (!skb) return NULL; skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -1437,17 +1438,12 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) static inline int mld_dev_queue_xmit2(struct sk_buff *skb) { struct net_device *dev = skb->dev; + unsigned char ha[MAX_ADDR_LEN]; - if (dev->hard_header) { - unsigned char ha[MAX_ADDR_LEN]; - int err; - - ndisc_mc_map(&ipv6_hdr(skb)->daddr, ha, dev, 1); - err = dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len); - if (err < 0) { - kfree_skb(skb); - return err; - } + ndisc_mc_map(&ipv6_hdr(skb)->daddr, ha, dev, 1); + if (dev_hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len) < 0) { + kfree_skb(skb); + return -EINVAL; } return dev_queue_xmit(skb); } @@ -1478,10 +1474,11 @@ static void mld_sendpack(struct sk_buff *skb) err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, mld_dev_queue_xmit); if (!err) { - ICMP6_INC_STATS(idev,ICMP6_MIB_OUTMSGS); - IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); + ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); + ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTMCASTPKTS); } else - IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTDISCARDS); if (likely(idev != NULL)) in6_dev_put(idev); @@ -1821,10 +1818,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, mld_dev_queue_xmit); if (!err) { - if (type == ICMPV6_MGM_REDUCTION) - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBREDUCTIONS); - else - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBRESPONSES); + ICMP6MSGOUT_INC_STATS(idev, type); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); } else @@ -2150,7 +2144,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, /* callers have the socket lock and a write lock on ipv6_sk_mc_lock, * so no other readers or writers of iml or its sflist */ - if (iml->sflist == 0) { + if (!iml->sflist) { /* any-source empty exclude case */ return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); } @@ -2332,7 +2326,7 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); state->idev = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -2432,24 +2426,8 @@ static const struct seq_operations igmp6_mc_seq_ops = { static int igmp6_mc_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct igmp6_mc_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &igmp6_mc_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &igmp6_mc_seq_ops, + sizeof(struct igmp6_mc_iter_state)); } static const struct file_operations igmp6_mc_seq_fops = { @@ -2476,7 +2454,7 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) state->idev = NULL; state->im = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2606,24 +2584,8 @@ static const struct seq_operations igmp6_mcf_seq_ops = { static int igmp6_mcf_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct igmp6_mcf_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &igmp6_mcf_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &igmp6_mcf_seq_ops, + sizeof(struct igmp6_mcf_iter_state)); } static const struct file_operations igmp6_mcf_seq_fops = { @@ -2658,8 +2620,8 @@ int __init igmp6_init(struct net_proto_family *ops) np->hop_limit = 1; #ifdef CONFIG_PROC_FS - proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops); - proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); + proc_net_fops_create(&init_net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops); + proc_net_fops_create(&init_net, "mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); #endif return 0; @@ -2671,7 +2633,7 @@ void igmp6_cleanup(void) igmp6_socket = NULL; /* for safety */ #ifdef CONFIG_PROC_FS - proc_net_remove("mcfilter6"); - proc_net_remove("igmp6"); + proc_net_remove(&init_net, "mcfilter6"); + proc_net_remove(&init_net, "igmp6"); #endif } diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 8a1399ce38ce..7fd841d41019 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -153,11 +153,11 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) u8 nexthdr; int len; - iph = (struct ipv6hdr *)skb->data; - iph->payload_len = htons(skb->len - sizeof(*iph)); + skb_push(skb, -skb_network_offset(skb)); + iph = ipv6_hdr(skb); - nexthdr = *skb_network_header(skb); - *skb_network_header(skb) = IPPROTO_DSTOPTS; + nexthdr = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_DSTOPTS; dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb); dstopt->nexthdr = nexthdr; @@ -172,7 +172,9 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) len = ((char *)hao - (char *)dstopt) + sizeof(*hao); memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr)); + spin_lock_bh(&x->lock); memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr)); + spin_unlock_bh(&x->lock); BUG_TRAP(len == x->props.header_len); dstopt->hdrlen = (x->props.header_len >> 3) - 1; @@ -365,11 +367,11 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) struct rt2_hdr *rt2; u8 nexthdr; - iph = (struct ipv6hdr *)skb->data; - iph->payload_len = htons(skb->len - sizeof(*iph)); + skb_push(skb, -skb_network_offset(skb)); + iph = ipv6_hdr(skb); - nexthdr = *skb_network_header(skb); - *skb_network_header(skb) = IPPROTO_ROUTING; + nexthdr = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_ROUTING; rt2 = (struct rt2_hdr *)skb_transport_header(skb); rt2->rt_hdr.nexthdr = nexthdr; @@ -381,7 +383,9 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) BUG_TRAP(rt2->rt_hdr.hdrlen == 2); memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr)); + spin_lock_bh(&x->lock); memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr)); + spin_unlock_bh(&x->lock); return 0; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 5b596659177c..6cc33dc83d1c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -15,9 +15,10 @@ /* * Changes: * + * Pierre Ynard : export userland ND options + * through netlink (RDNSS support) * Lars Fenneberg : fixed MTU setting on receipt * of an RA. - * * Janos Farkas : kmalloc failure checks * Alexey Kuznetsov : state machine reworked * and moved to net/core. @@ -78,6 +79,9 @@ #include <net/addrconf.h> #include <net/icmp.h> +#include <net/netlink.h> +#include <linux/rtnetlink.h> + #include <net/flow.h> #include <net/ip6_checksum.h> #include <linux/proc_fs.h> @@ -161,6 +165,8 @@ struct ndisc_options { struct nd_opt_hdr *nd_opts_ri; struct nd_opt_hdr *nd_opts_ri_end; #endif + struct nd_opt_hdr *nd_useropts; + struct nd_opt_hdr *nd_useropts_end; }; #define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR] @@ -225,6 +231,22 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, return (cur <= end && cur->nd_opt_type == type ? cur : NULL); } +static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) +{ + return (opt->nd_opt_type == ND_OPT_RDNSS); +} + +static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, + struct nd_opt_hdr *end) +{ + if (!cur || !end || cur >= end) + return NULL; + do { + cur = ((void *)cur) + (cur->nd_opt_len << 3); + } while(cur < end && !ndisc_is_useropt(cur)); + return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL); +} + static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, struct ndisc_options *ndopts) { @@ -256,7 +278,7 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, break; case ND_OPT_PREFIX_INFO: ndopts->nd_opts_pi_end = nd_opt; - if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) + if (!ndopts->nd_opt_array[nd_opt->nd_opt_type]) ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; break; #ifdef CONFIG_IPV6_ROUTE_INFO @@ -267,14 +289,21 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, break; #endif default: - /* - * Unknown options must be silently ignored, - * to accommodate future extension to the protocol. - */ - ND_PRINTK2(KERN_NOTICE - "%s(): ignored unsupported option; type=%d, len=%d\n", - __FUNCTION__, - nd_opt->nd_opt_type, nd_opt->nd_opt_len); + if (ndisc_is_useropt(nd_opt)) { + ndopts->nd_useropts_end = nd_opt; + if (!ndopts->nd_useropts) + ndopts->nd_useropts = nd_opt; + } else { + /* + * Unknown options must be silently ignored, + * to accommodate future extension to the + * protocol. + */ + ND_PRINTK2(KERN_NOTICE + "%s(): ignored unsupported option; type=%d, len=%d\n", + __FUNCTION__, + nd_opt->nd_opt_type, nd_opt->nd_opt_len); + } } opt_len -= l; nd_opt = ((void *)nd_opt) + l; @@ -354,7 +383,7 @@ static int ndisc_constructor(struct neighbour *neigh) rcu_read_unlock(); neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST; - if (dev->hard_header == NULL) { + if (!dev->header_ops) { neigh->nud_state = NUD_NOARP; neigh->ops = &ndisc_direct_ops; neigh->output = neigh->ops->queue_xmit; @@ -371,7 +400,7 @@ static int ndisc_constructor(struct neighbour *neigh) neigh->nud_state = NUD_NOARP; memcpy(neigh->ha, dev->broadcast, dev->addr_len); } - if (dev->hard_header_cache) + if (dev->header_ops->cache) neigh->ops = &ndisc_hh_ops; else neigh->ops = &ndisc_generic_ops; @@ -431,7 +460,7 @@ static void __ndisc_send(struct net_device *dev, struct neighbour *neigh, struct in6_addr *daddr, struct in6_addr *saddr, struct icmp6hdr *icmp6h, struct in6_addr *target, - int llinfo, int icmp6_mib_outnd) + int llinfo) { struct flowi fl; struct dst_entry *dst; @@ -441,9 +470,11 @@ static void __ndisc_send(struct net_device *dev, struct inet6_dev *idev; int len; int err; - u8 *opt; + u8 *opt, type; + + type = icmp6h->icmp6_type; - ndisc_flow_init(&fl, icmp6h->icmp6_type, saddr, daddr, + ndisc_flow_init(&fl, type, saddr, daddr, dev->ifindex); dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); @@ -504,7 +535,7 @@ static void __ndisc_send(struct net_device *dev, err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { - ICMP6_INC_STATS(idev, icmp6_mib_outnd); + ICMP6MSGOUT_INC_STATS(idev, type); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); } @@ -542,8 +573,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, __ndisc_send(dev, neigh, daddr, src_addr, &icmp6h, solicited_addr, - inc_opt ? ND_OPT_TARGET_LL_ADDR : 0, - ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS); + inc_opt ? ND_OPT_TARGET_LL_ADDR : 0); } void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, @@ -564,8 +594,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, __ndisc_send(dev, neigh, daddr, saddr, &icmp6h, solicit, - !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0, - ICMP6_MIB_OUTNEIGHBORSOLICITS); + !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0); } void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, @@ -599,8 +628,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, #endif __ndisc_send(dev, NULL, daddr, saddr, &icmp6h, NULL, - send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0, - ICMP6_MIB_OUTROUTERSOLICITS); + send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0); } @@ -808,7 +836,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) neigh_update(neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE); - if (neigh || !dev->hard_header) { + if (neigh || !dev->header_ops) { ndisc_send_na(dev, neigh, saddr, &msg->target, is_router, 1, (ifp != NULL && inc), inc); @@ -985,6 +1013,53 @@ out: in6_dev_put(idev); } +static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) +{ + struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra); + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct nduseroptmsg *ndmsg; + int err; + int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg) + + (opt->nd_opt_len << 3)); + size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr)); + + skb = nlmsg_new(msg_size, GFP_ATOMIC); + if (skb == NULL) { + err = -ENOBUFS; + goto errout; + } + + nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0); + if (nlh == NULL) { + goto nla_put_failure; + } + + ndmsg = nlmsg_data(nlh); + ndmsg->nduseropt_family = AF_INET6; + ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type; + ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code; + ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3; + + memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); + + NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr), + &ipv6_hdr(ra)->saddr); + nlmsg_end(skb, nlh); + + err = rtnl_notify(skb, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); + if (err < 0) + goto errout; + + return; + +nla_put_failure: + nlmsg_free(skb); + err = -EMSGSIZE; +errout: + rtnl_set_sk_err(RTNLGRP_ND_USEROPT, err); +} + static void ndisc_router_discovery(struct sk_buff *skb) { struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); @@ -1217,6 +1292,15 @@ skip_defrtr: } } + if (ndopts.nd_useropts) { + struct nd_opt_hdr *opt; + for (opt = ndopts.nd_useropts; + opt; + opt = ndisc_next_useropt(opt, ndopts.nd_useropts_end)) { + ndisc_ra_useropt(skb, opt); + } + } + if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) { ND_PRINTK2(KERN_WARNING "ICMPv6 RA: invalid RA options"); @@ -1455,7 +1539,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output); if (!err) { - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTREDIRECTS); + ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); } @@ -1525,6 +1609,9 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 0004db38af6d..0473145ac534 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -24,6 +24,7 @@ #include <linux/sysctl.h> #include <linux/proc_fs.h> #include <linux/mutex.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/ipv6.h> #include <net/ip6_route.h> @@ -247,10 +248,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) if (entry->info->indev && entry->skb->dev) { pmsg->hw_type = entry->skb->dev->type; - if (entry->skb->dev->hard_header_parse) - pmsg->hw_addrlen = - entry->skb->dev->hard_header_parse(entry->skb, - pmsg->hw_addr); + pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); } if (data_len) @@ -466,7 +464,7 @@ ipq_dev_drop(int ifindex) #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) static inline void -ipq_rcv_skb(struct sk_buff *skb) +__ipq_rcv_skb(struct sk_buff *skb) { int status, type, pid, flags, nlmsglen, skblen; struct nlmsghdr *nlh; @@ -524,19 +522,10 @@ ipq_rcv_skb(struct sk_buff *skb) } static void -ipq_rcv_sk(struct sock *sk, int len) +ipq_rcv_skb(struct sk_buff *skb) { - struct sk_buff *skb; - unsigned int qlen; - mutex_lock(&ipqnl_mutex); - - for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { - skb = skb_dequeue(&sk->sk_receive_queue); - ipq_rcv_skb(skb); - kfree_skb(skb); - } - + __ipq_rcv_skb(skb); mutex_unlock(&ipqnl_mutex); } @@ -546,6 +535,9 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) ipq_dev_drop(dev->ifindex); @@ -565,7 +557,7 @@ ipq_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW && n->pid) { write_lock_bh(&queue_lock); - if (n->pid == peer_pid) + if ((n->net == &init_net) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -657,14 +649,14 @@ static int __init ip6_queue_init(void) struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL, - THIS_MODULE); + ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, + ipq_rcv_skb, NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } - proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); + proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); if (proc) proc->owner = THIS_MODULE; else { @@ -685,7 +677,7 @@ static int __init ip6_queue_init(void) cleanup_sysctl: unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); cleanup_ipqnl: sock_release(ipqnl->sk_socket); @@ -705,7 +697,7 @@ static void __exit ip6_queue_fini(void) unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 2f487cda3b6b..3fd08d5567a6 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -167,7 +167,7 @@ static inline void send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) { if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) - skb_in->dev = &loopback_dev; + skb_in->dev = init_net.loopback_dev; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 3153e15e0f7c..37a3db926953 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -337,36 +337,33 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = { #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static int ipv6_tuple_to_nfattr(struct sk_buff *skb, +static int ipv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - NFA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, + NLA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, &tuple->src.u3.ip6); - NFA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, + NLA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, &tuple->dst.u3.ip6); return 0; -nfattr_failure: +nla_put_failure: return -1; } -static const size_t cta_min_ip[CTA_IP_MAX] = { - [CTA_IP_V6_SRC-1] = sizeof(u_int32_t)*4, - [CTA_IP_V6_DST-1] = sizeof(u_int32_t)*4, +static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = { + [CTA_IP_V6_SRC] = { .len = sizeof(u_int32_t)*4 }, + [CTA_IP_V6_DST] = { .len = sizeof(u_int32_t)*4 }, }; -static int ipv6_nfattr_to_tuple(struct nfattr *tb[], +static int ipv6_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t) { - if (!tb[CTA_IP_V6_SRC-1] || !tb[CTA_IP_V6_DST-1]) + if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) return -EINVAL; - if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) - return -EINVAL; - - memcpy(&t->src.u3.ip6, NFA_DATA(tb[CTA_IP_V6_SRC-1]), + memcpy(&t->src.u3.ip6, nla_data(tb[CTA_IP_V6_SRC]), sizeof(u_int32_t) * 4); - memcpy(&t->dst.u3.ip6, NFA_DATA(tb[CTA_IP_V6_DST-1]), + memcpy(&t->dst.u3.ip6, nla_data(tb[CTA_IP_V6_DST]), sizeof(u_int32_t) * 4); return 0; @@ -382,8 +379,9 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .print_conntrack = ipv6_print_conntrack, .get_l4proto = ipv6_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = ipv6_tuple_to_nfattr, - .nfattr_to_tuple = ipv6_nfattr_to_tuple, + .tuple_to_nlattr = ipv6_tuple_to_nlattr, + .nlattr_to_tuple = ipv6_nlattr_to_tuple, + .nla_policy = ipv6_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_path = nf_net_netfilter_sysctl_path, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index ab154fb90018..fbdc66920de4 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -210,45 +210,42 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff, #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static int icmpv6_tuple_to_nfattr(struct sk_buff *skb, +static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *t) { - NFA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t), + NLA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t), &t->src.u.icmp.id); - NFA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t), + NLA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t), &t->dst.u.icmp.type); - NFA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t), + NLA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t), &t->dst.u.icmp.code); return 0; -nfattr_failure: +nla_put_failure: return -1; } -static const size_t cta_min_proto[CTA_PROTO_MAX] = { - [CTA_PROTO_ICMPV6_TYPE-1] = sizeof(u_int8_t), - [CTA_PROTO_ICMPV6_CODE-1] = sizeof(u_int8_t), - [CTA_PROTO_ICMPV6_ID-1] = sizeof(u_int16_t) +static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = { + [CTA_PROTO_ICMPV6_TYPE] = { .type = NLA_U8 }, + [CTA_PROTO_ICMPV6_CODE] = { .type = NLA_U8 }, + [CTA_PROTO_ICMPV6_ID] = { .type = NLA_U16 }, }; -static int icmpv6_nfattr_to_tuple(struct nfattr *tb[], +static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *tuple) { - if (!tb[CTA_PROTO_ICMPV6_TYPE-1] - || !tb[CTA_PROTO_ICMPV6_CODE-1] - || !tb[CTA_PROTO_ICMPV6_ID-1]) - return -EINVAL; - - if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + if (!tb[CTA_PROTO_ICMPV6_TYPE] + || !tb[CTA_PROTO_ICMPV6_CODE] + || !tb[CTA_PROTO_ICMPV6_ID]) return -EINVAL; tuple->dst.u.icmp.type = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_TYPE-1]); + *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_TYPE]); tuple->dst.u.icmp.code = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_CODE-1]); + *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_CODE]); tuple->src.u.icmp.id = - *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]); + *(__be16 *)nla_data(tb[CTA_PROTO_ICMPV6_ID]); if (tuple->dst.u.icmp.type < 128 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap) @@ -289,8 +286,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .new = icmpv6_new, .error = icmpv6_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = icmpv6_tuple_to_nfattr, - .nfattr_to_tuple = icmpv6_nfattr_to_tuple, + .tuple_to_nlattr = icmpv6_tuple_to_nlattr, + .nlattr_to_tuple = icmpv6_nlattr_to_tuple, + .nla_policy = icmpv6_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_header = &icmpv6_sysctl_header, diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 920dc9cf6a84..db945018579e 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -23,6 +23,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stddef.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/sock.h> #include <net/tcp.h> @@ -85,47 +86,33 @@ static struct snmp_mib snmp6_ipstats_list[] = { }; static struct snmp_mib snmp6_icmp6_list[] = { -/* icmpv6 mib according to RFC 2466 - - Exceptions: {In|Out}AdminProhibs are removed, because I see - no good reasons to account them separately - of another dest.unreachs. - OutErrs is zero identically. - OutEchos too. - OutRouterAdvertisements too. - OutGroupMembQueries too. - */ +/* icmpv6 mib according to RFC 2466 */ SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS), SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS), - SNMP_MIB_ITEM("Icmp6InDestUnreachs", ICMP6_MIB_INDESTUNREACHS), - SNMP_MIB_ITEM("Icmp6InPktTooBigs", ICMP6_MIB_INPKTTOOBIGS), - SNMP_MIB_ITEM("Icmp6InTimeExcds", ICMP6_MIB_INTIMEEXCDS), - SNMP_MIB_ITEM("Icmp6InParmProblems", ICMP6_MIB_INPARMPROBLEMS), - SNMP_MIB_ITEM("Icmp6InEchos", ICMP6_MIB_INECHOS), - SNMP_MIB_ITEM("Icmp6InEchoReplies", ICMP6_MIB_INECHOREPLIES), - SNMP_MIB_ITEM("Icmp6InGroupMembQueries", ICMP6_MIB_INGROUPMEMBQUERIES), - SNMP_MIB_ITEM("Icmp6InGroupMembResponses", ICMP6_MIB_INGROUPMEMBRESPONSES), - SNMP_MIB_ITEM("Icmp6InGroupMembReductions", ICMP6_MIB_INGROUPMEMBREDUCTIONS), - SNMP_MIB_ITEM("Icmp6InRouterSolicits", ICMP6_MIB_INROUTERSOLICITS), - SNMP_MIB_ITEM("Icmp6InRouterAdvertisements", ICMP6_MIB_INROUTERADVERTISEMENTS), - SNMP_MIB_ITEM("Icmp6InNeighborSolicits", ICMP6_MIB_INNEIGHBORSOLICITS), - SNMP_MIB_ITEM("Icmp6InNeighborAdvertisements", ICMP6_MIB_INNEIGHBORADVERTISEMENTS), - SNMP_MIB_ITEM("Icmp6InRedirects", ICMP6_MIB_INREDIRECTS), SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS), - SNMP_MIB_ITEM("Icmp6OutDestUnreachs", ICMP6_MIB_OUTDESTUNREACHS), - SNMP_MIB_ITEM("Icmp6OutPktTooBigs", ICMP6_MIB_OUTPKTTOOBIGS), - SNMP_MIB_ITEM("Icmp6OutTimeExcds", ICMP6_MIB_OUTTIMEEXCDS), - SNMP_MIB_ITEM("Icmp6OutParmProblems", ICMP6_MIB_OUTPARMPROBLEMS), - SNMP_MIB_ITEM("Icmp6OutEchoReplies", ICMP6_MIB_OUTECHOREPLIES), - SNMP_MIB_ITEM("Icmp6OutRouterSolicits", ICMP6_MIB_OUTROUTERSOLICITS), - SNMP_MIB_ITEM("Icmp6OutNeighborSolicits", ICMP6_MIB_OUTNEIGHBORSOLICITS), - SNMP_MIB_ITEM("Icmp6OutNeighborAdvertisements", ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS), - SNMP_MIB_ITEM("Icmp6OutRedirects", ICMP6_MIB_OUTREDIRECTS), - SNMP_MIB_ITEM("Icmp6OutGroupMembResponses", ICMP6_MIB_OUTGROUPMEMBRESPONSES), - SNMP_MIB_ITEM("Icmp6OutGroupMembReductions", ICMP6_MIB_OUTGROUPMEMBREDUCTIONS), SNMP_MIB_SENTINEL }; +/* RFC 4293 v6 ICMPMsgStatsTable; named items for RFC 2466 compatibility */ +static char *icmp6type2name[256] = { + [ICMPV6_DEST_UNREACH] = "DestUnreachs", + [ICMPV6_PKT_TOOBIG] = "PktTooBigs", + [ICMPV6_TIME_EXCEED] = "TimeExcds", + [ICMPV6_PARAMPROB] = "ParmProblems", + [ICMPV6_ECHO_REQUEST] = "EchoRequest", + [ICMPV6_ECHO_REPLY] = "EchoReplies", + [ICMPV6_MGM_QUERY] = "GroupMembQueries", + [ICMPV6_MGM_REPORT] = "GroupMembResponses", + [ICMPV6_MGM_REDUCTION] = "GroupMembReductions", + [ICMPV6_MLD2_REPORT] = "MLDv2Reports", + [NDISC_ROUTER_ADVERTISEMENT] = "RouterAdvertisements", + [NDISC_ROUTER_SOLICITATION] = "RouterSolicits", + [NDISC_NEIGHBOUR_ADVERTISEMENT] = "NeighborAdvertisements", + [NDISC_NEIGHBOUR_SOLICITATION] = "NeighborSolicits", + [NDISC_REDIRECT] = "NeighborRedirects", +}; + + static struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS), @@ -142,6 +129,40 @@ static struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_SENTINEL }; +static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) +{ + static char name[32]; + int i; + + /* print by name -- deprecated items */ + for (i = 0; i < ICMP6MSG_MIB_MAX; i++) { + int icmptype; + char *p; + + icmptype = i & 0xff; + p = icmp6type2name[icmptype]; + if (!p) /* don't print un-named types here */ + continue; + (void) snprintf(name, sizeof(name)-1, "Icmp6%s%s", + i & 0x100 ? "Out" : "In", p); + seq_printf(seq, "%-32s\t%lu\n", name, + snmp_fold_field(mib, i)); + } + + /* print by number (nonzero only) - ICMPMsgStat format */ + for (i = 0; i < ICMP6MSG_MIB_MAX; i++) { + unsigned long val; + + val = snmp_fold_field(mib, i); + if (!val) + continue; + (void) snprintf(name, sizeof(name)-1, "Icmp6%sType%u", + i & 0x100 ? "Out" : "In", i & 0xff); + seq_printf(seq, "%-32s\t%lu\n", name, val); + } + return; +} + static inline void snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist) { @@ -159,9 +180,11 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); snmp6_seq_show_item(seq, (void **)idev->stats.ipv6, snmp6_ipstats_list); snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list); + snmp6_seq_show_icmpv6msg(seq, (void **)idev->stats.icmpv6msg); } else { snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list); snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); + snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics); snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list); snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); } @@ -231,22 +254,22 @@ int __init ipv6_misc_proc_init(void) { int rc = 0; - if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops)) + if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops)) goto proc_snmp6_fail; - proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net); + proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net); if (!proc_net_devsnmp6) goto proc_dev_snmp6_fail; - if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops)) + if (!proc_net_fops_create(&init_net, "sockstat6", S_IRUGO, &sockstat6_seq_fops)) goto proc_sockstat6_fail; out: return rc; proc_sockstat6_fail: - proc_net_remove("dev_snmp6"); + proc_net_remove(&init_net, "dev_snmp6"); proc_dev_snmp6_fail: - proc_net_remove("snmp6"); + proc_net_remove(&init_net, "snmp6"); proc_snmp6_fail: rc = -ENOMEM; goto out; @@ -254,8 +277,8 @@ proc_snmp6_fail: void ipv6_misc_proc_exit(void) { - proc_net_remove("sockstat6"); - proc_net_remove("dev_snmp6"); - proc_net_remove("snmp6"); + proc_net_remove(&init_net, "sockstat6"); + proc_net_remove(&init_net, "dev_snmp6"); + proc_net_remove(&init_net, "snmp6"); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 77167afa3455..ca24ef19cd8f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -35,6 +35,7 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/sock.h> #include <net/snmp.h> @@ -282,7 +283,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!sk->sk_bound_dev_if) goto out; - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -1288,21 +1289,8 @@ static const struct seq_operations raw6_seq_ops = { static int raw6_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct raw6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - goto out; - rc = seq_open(file, &raw6_seq_ops); - if (rc) - goto out_kfree; - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &raw6_seq_ops, + sizeof(struct raw6_iter_state)); } static const struct file_operations raw6_seq_fops = { @@ -1315,13 +1303,13 @@ static const struct file_operations raw6_seq_fops = { int __init raw6_proc_init(void) { - if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops)) + if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops)) return -ENOMEM; return 0; } void raw6_proc_exit(void) { - proc_net_remove("raw6"); + proc_net_remove(&init_net, "raw6"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index de795c04e34c..31601c993541 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -301,7 +301,7 @@ static void ip6_frag_expire(unsigned long data) fq_kill(fq); - dev = dev_get_by_index(fq->iif); + dev = dev_get_by_index(&init_net, fq->iif); if (!dev) goto out; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 55ea80fac601..6ff19f9eb9ee 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -44,6 +44,7 @@ #include <linux/seq_file.h> #endif +#include <net/net_namespace.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/ip6_fib.h> @@ -137,7 +138,6 @@ struct rt6_info ip6_null_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, .obsolete = -1, .error = -ENETUNREACH, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -163,7 +163,6 @@ struct rt6_info ip6_prohibit_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, .obsolete = -1, .error = -EACCES, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -183,7 +182,6 @@ struct rt6_info ip6_blk_hole_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, .obsolete = -1, .error = -EINVAL, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -223,8 +221,8 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) { + struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); if (loopback_idev != NULL) { rt->rt6i_idev = loopback_idev; in6_dev_put(idev); @@ -1129,7 +1127,7 @@ int ip6_route_add(struct fib6_config *cfg) #endif if (cfg->fc_ifindex) { err = -ENODEV; - dev = dev_get_by_index(cfg->fc_ifindex); + dev = dev_get_by_index(&init_net, cfg->fc_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -1187,12 +1185,12 @@ int ip6_route_add(struct fib6_config *cfg) if ((cfg->fc_flags & RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ - if (dev != &loopback_dev) { + if (dev != init_net.loopback_dev) { if (dev) { dev_put(dev); in6_dev_put(idev); } - dev = &loopback_dev; + dev = init_net.loopback_dev; dev_hold(dev); idev = in6_dev_get(dev); if (!idev) { @@ -1278,7 +1276,7 @@ install_route: int remaining; nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla->nla_type; + int type = nla_type(nla); if (type) { if (type > RTAX_MAX) { @@ -1896,13 +1894,13 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, if (rt == NULL) return ERR_PTR(-ENOMEM); - dev_hold(&loopback_dev); + dev_hold(init_net.loopback_dev); in6_dev_hold(idev); rt->u.dst.flags = DST_HOST; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; - rt->rt6i_dev = &loopback_dev; + rt->rt6i_dev = init_net.loopback_dev; rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); @@ -2264,7 +2262,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (iif) { struct net_device *dev; - dev = __dev_get_by_index(iif); + dev = __dev_get_by_index(&init_net, iif); if (!dev) { err = -ENODEV; goto errout; @@ -2561,11 +2559,11 @@ void __init ip6_route_init(void) fib6_init(); #ifdef CONFIG_PROC_FS - p = proc_net_create("ipv6_route", 0, rt6_proc_info); + p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info); if (p) p->owner = THIS_MODULE; - proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops); + proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); #endif #ifdef CONFIG_XFRM xfrm6_init(); @@ -2585,8 +2583,8 @@ void ip6_route_cleanup(void) fib6_rules_cleanup(); #endif #ifdef CONFIG_PROC_FS - proc_net_remove("ipv6_route"); - proc_net_remove("rt6_stats"); + proc_net_remove(&init_net, "ipv6_route"); + proc_net_remove(&init_net, "rt6_stats"); #endif #ifdef CONFIG_XFRM xfrm6_fini(); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index eb20bb690abd..466657a9a8bd 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -167,7 +167,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int int i; for (i=1; i<100; i++) { sprintf(name, "sit%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i==100) @@ -714,7 +714,6 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) static void ipip6_tunnel_setup(struct net_device *dev) { - SET_MODULE_OWNER(dev); dev->uninit = ipip6_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipip6_tunnel_xmit; @@ -761,7 +760,7 @@ static int ipip6_tunnel_init(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3e06799b37a6..a07b59c528f3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -697,7 +697,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, if (!cmd.tcpm_keylen) { if (!tcp_sk(sk)->md5sig_info) return -ENOENT; - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED) + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]); return tcp_v6_md5_do_del(sk, &sin6->sin6_addr); } @@ -720,7 +720,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); if (!newkey) return -ENOMEM; - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED) { + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3], newkey, cmd.tcpm_keylen); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c347f3e30e2e..82ff26dd4470 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -612,7 +612,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, daddr = NULL; if (daddr) { - if (ipv6_addr_type(daddr) == IPV6_ADDR_MAPPED) { + if (ipv6_addr_v4mapped(daddr)) { struct sockaddr_in sin; sin.sin_family = AF_INET; sin.sin_port = sin6 ? sin6->sin6_port : inet->dport; diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 2e61d6ddece3..13bb1e856764 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -22,14 +22,6 @@ /* Add encapsulation header. * * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. - * The following fields in it shall be filled in by x->type->output: - * payload_len - * - * On exit, skb->h will be set to the start of the encapsulation header to be - * filled in by x->type->output and skb->nh will be set to the nextheader field - * of the extension header directly preceding the encapsulation header, or in - * its absence, that of the top IP header. The value of skb->data will always - * point to the top IP header. */ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -37,19 +29,17 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) u8 *prevhdr; int hdr_len; - skb_push(skb, x->props.header_len); iph = ipv6_hdr(skb); hdr_len = ip6_find_1stfragopt(skb, &prevhdr); - skb_set_network_header(skb, - (prevhdr - x->props.header_len) - skb->data); - skb_set_transport_header(skb, hdr_len); - memmove(skb->data, iph, hdr_len); - skb_reset_network_header(skb); + skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); + skb_set_network_header(skb, -x->props.header_len); + skb->transport_header = skb->network_header + hdr_len; + __skb_pull(skb, hdr_len); + top_iph = ipv6_hdr(skb); - skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); - skb->network_header += offsetof(struct ipv6hdr, nexthdr); + memmove(top_iph, iph, hdr_len); ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index 6ad6d7ac6bd7..957ae36b6695 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -29,6 +29,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/stringify.h> +#include <linux/time.h> #include <net/ipv6.h> #include <net/xfrm.h> @@ -36,12 +37,6 @@ * * The IP header and mutable extension headers will be moved forward to make * space for the route optimization header. - * - * On exit, skb->h will be set to the start of the encapsulation header to be - * filled in by x->type->output and skb->nh will be set to the nextheader field - * of the extension header directly preceding the encapsulation header, or in - * its absence, that of the top IP header. The value of skb->data will always - * point to the top IP header. */ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -49,14 +44,17 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) u8 *prevhdr; int hdr_len; - skb_push(skb, x->props.header_len); iph = ipv6_hdr(skb); hdr_len = x->type->hdr_offset(x, skb, &prevhdr); - skb_set_network_header(skb, - (prevhdr - x->props.header_len) - skb->data); - skb_set_transport_header(skb, hdr_len); - memmove(skb->data, iph, hdr_len); + skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); + skb_set_network_header(skb, -x->props.header_len); + skb->transport_header = skb->network_header + hdr_len; + __skb_pull(skb, hdr_len); + memmove(ipv6_hdr(skb), iph, hdr_len); + + x->lastused = get_seconds(); + return 0; } diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index c026bfea820a..4e344105b3fd 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -18,12 +18,6 @@ * * The IP header and mutable extension headers will be moved forward to make * space for the encapsulation header. - * - * On exit, skb->h will be set to the start of the encapsulation header to be - * filled in by x->type->output and skb->nh will be set to the nextheader field - * of the extension header directly preceding the encapsulation header, or in - * its absence, that of the top IP header. The value of skb->data will always - * point to the top IP header. */ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -31,14 +25,14 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) u8 *prevhdr; int hdr_len; - skb_push(skb, x->props.header_len); iph = ipv6_hdr(skb); hdr_len = x->type->hdr_offset(x, skb, &prevhdr); - skb_set_network_header(skb, - (prevhdr - x->props.header_len) - skb->data); - skb_set_transport_header(skb, hdr_len); - memmove(skb->data, iph, hdr_len); + skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); + skb_set_network_header(skb, -x->props.header_len); + skb->transport_header = skb->network_header + hdr_len; + __skb_pull(skb, hdr_len); + memmove(ipv6_hdr(skb), iph, hdr_len); return 0; } diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9fc95bc6509f..ea2283879112 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -33,15 +33,7 @@ static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb) /* Add encapsulation header. * - * The top IP header will be constructed per RFC 2401. The following fields - * in it shall be filled in by x->type->output: - * payload_len - * - * On exit, skb->h will be set to the start of the encapsulation header to be - * filled in by x->type->output and skb->nh will be set to the nextheader field - * of the extension header directly preceding the encapsulation header, or in - * its absence, that of the top IP header. The value of skb->data will always - * point to the top IP header. + * The top IP header will be constructed per RFC 2401. */ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -50,13 +42,13 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *iph, *top_iph; int dsfield; - skb_push(skb, x->props.header_len); iph = ipv6_hdr(skb); - skb_reset_network_header(skb); + skb_set_network_header(skb, -x->props.header_len); + skb->mac_header = skb->network_header + + offsetof(struct ipv6hdr, nexthdr); + skb->transport_header = skb->network_header + sizeof(*iph); top_iph = ipv6_hdr(skb); - skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); - skb->network_header += offsetof(struct ipv6hdr, nexthdr); top_iph->version = 6; if (xdst->route->ops->family == AF_INET6) { diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 56364a5f676a..4618c18e611d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -9,9 +9,9 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/if_ether.h> #include <linux/compiler.h> #include <linux/skbuff.h> -#include <linux/spinlock.h> #include <linux/icmpv6.h> #include <linux/netfilter_ipv6.h> #include <net/ipv6.h> @@ -43,62 +43,31 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) return ret; } -static int xfrm6_output_one(struct sk_buff *skb) +static inline int xfrm6_output_one(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; + struct ipv6hdr *iph; int err; - if (skb->ip_summed == CHECKSUM_PARTIAL) { - err = skb_checksum_help(skb); - if (err) - goto error_nolock; - } - if (x->props.mode == XFRM_MODE_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; } - do { - spin_lock_bh(&x->lock); - err = xfrm_state_check(x, skb); - if (err) - goto error; - - err = x->mode->output(x, skb); - if (err) - goto error; + err = xfrm_output(skb); + if (err) + goto error_nolock; - err = x->type->output(x, skb); - if (err) - goto error; - - x->curlft.bytes += skb->len; - x->curlft.packets++; - if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) - x->lastused = get_seconds(); - - spin_unlock_bh(&x->lock); - - skb_reset_network_header(skb); - - if (!(skb->dst = dst_pop(dst))) { - err = -EHOSTUNREACH; - goto error_nolock; - } - dst = skb->dst; - x = dst->xfrm; - } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); + iph = ipv6_hdr(skb); + iph->payload_len = htons(skb->len - sizeof(*iph)); IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; err = 0; out_exit: return err; -error: - spin_unlock_bh(&x->lock); error_nolock: kfree_skb(skb); goto out_exit; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 3ec0c4770ee3..15aa4c58c315 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -375,7 +375,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); BUG_ON(!loopback_idev); do { diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 30f3236c402a..3f8a3abde67e 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -242,11 +242,7 @@ EXPORT_SYMBOL(xfrm6_tunnel_free_spi); static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { - struct ipv6hdr *top_iph; - - top_iph = (struct ipv6hdr *)skb->data; - top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); - + skb_push(skb, -skb_network_offset(skb)); return 0; } diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 8400525177ab..29b063d43120 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -347,6 +347,9 @@ static int ipxitf_device_event(struct notifier_block *notifier, struct net_device *dev = ptr; struct ipx_interface *i, *tmp; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_DOWN && event != NETDEV_UP) goto out; @@ -986,7 +989,7 @@ static int ipxitf_create(struct ipx_interface_definition *idef) if (intrfc) ipxitf_put(intrfc); - dev = dev_get_by_name(idef->ipx_device); + dev = dev_get_by_name(&init_net, idef->ipx_device); rc = -ENODEV; if (!dev) goto out; @@ -1094,7 +1097,7 @@ static int ipxitf_delete(struct ipx_interface_definition *idef) if (!dlink_type) goto out; - dev = __dev_get_by_name(idef->ipx_device); + dev = __dev_get_by_name(&init_net, idef->ipx_device); rc = -ENODEV; if (!dev) goto out; @@ -1189,7 +1192,7 @@ static int ipxitf_ioctl(unsigned int cmd, void __user *arg) if (copy_from_user(&ifr, arg, sizeof(ifr))) break; sipx = (struct sockaddr_ipx *)&ifr.ifr_addr; - dev = __dev_get_by_name(ifr.ifr_name); + dev = __dev_get_by_name(&init_net, ifr.ifr_name); rc = -ENODEV; if (!dev) break; @@ -1360,11 +1363,14 @@ static struct proto ipx_proto = { .obj_size = sizeof(struct ipx_sock), }; -static int ipx_create(struct socket *sock, int protocol) +static int ipx_create(struct net *net, struct socket *sock, int protocol) { int rc = -ESOCKTNOSUPPORT; struct sock *sk; + if (net != &init_net) + return -EAFNOSUPPORT; + /* * SPX support is not anymore in the kernel sources. If you want to * ressurrect it, completing it and making it understand shared skbs, @@ -1375,7 +1381,7 @@ static int ipx_create(struct socket *sock, int protocol) goto out; rc = -ENOMEM; - sk = sk_alloc(PF_IPX, GFP_KERNEL, &ipx_proto, 1); + sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, 1); if (!sk) goto out; #ifdef IPX_REFCNT_DEBUG @@ -1644,6 +1650,9 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty u16 ipx_pktsize; int rc = 0; + if (dev->nd_net != &init_net) + goto drop; + /* Not ours */ if (skb->pkt_type == PACKET_OTHERHOST) goto drop; diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 4226e71ae1e3..d483a00dc427 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -9,6 +9,7 @@ #include <linux/proc_fs.h> #include <linux/spinlock.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/tcp_states.h> #include <net/ipx.h> @@ -353,7 +354,7 @@ int __init ipx_proc_init(void) struct proc_dir_entry *p; int rc = -ENOMEM; - ipx_proc_dir = proc_mkdir("ipx", proc_net); + ipx_proc_dir = proc_mkdir("ipx", init_net.proc_net); if (!ipx_proc_dir) goto out; @@ -381,7 +382,7 @@ out_socket: out_route: remove_proc_entry("interface", ipx_proc_dir); out_interface: - remove_proc_entry("ipx", proc_net); + remove_proc_entry("ipx", init_net.proc_net); goto out; } @@ -390,7 +391,7 @@ void __exit ipx_proc_exit(void) remove_proc_entry("interface", ipx_proc_dir); remove_proc_entry("route", ipx_proc_dir); remove_proc_entry("socket", ipx_proc_dir); - remove_proc_entry("ipx", proc_net); + remove_proc_entry("ipx", init_net.proc_net); } #else /* CONFIG_PROC_FS */ diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 4c670cf6aefa..0328ae2654f4 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -60,7 +60,7 @@ #include <net/irda/af_irda.h> -static int irda_create(struct socket *sock, int protocol); +static int irda_create(struct net *net, struct socket *sock, int protocol); static const struct proto_ops irda_stream_ops; static const struct proto_ops irda_seqpacket_ops; @@ -831,7 +831,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) IRDA_DEBUG(2, "%s()\n", __FUNCTION__); - err = irda_create(newsock, sk->sk_protocol); + err = irda_create(sk->sk_net, newsock, sk->sk_protocol); if (err) return err; @@ -1057,13 +1057,16 @@ static struct proto irda_proto = { * Create IrDA socket * */ -static int irda_create(struct socket *sock, int protocol) +static int irda_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct irda_sock *self; IRDA_DEBUG(2, "%s()\n", __FUNCTION__); + if (net != &init_net) + return -EAFNOSUPPORT; + /* Check for valid socket type */ switch (sock->type) { case SOCK_STREAM: /* For TTP connections with SAR disabled */ @@ -1075,7 +1078,7 @@ static int irda_create(struct socket *sock, int protocol) } /* Allocate networking socket */ - sk = sk_alloc(PF_IRDA, GFP_ATOMIC, &irda_proto, 1); + sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto, 1); if (sk == NULL) return -ENOMEM; @@ -1245,18 +1248,17 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct irda_sock *self; struct sk_buff *skb; - int err; + int err = -EPIPE; IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len); /* Note : socket.c set MSG_EOR on SEQPACKET sockets */ - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) + if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT | + MSG_NOSIGNAL)) return -EINVAL; - if (sk->sk_shutdown & SEND_SHUTDOWN) { - send_sig(SIGPIPE, current, 0); - return -EPIPE; - } + if (sk->sk_shutdown & SEND_SHUTDOWN) + goto out_err; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; @@ -1283,7 +1285,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, skb = sock_alloc_send_skb(sk, len + self->max_header_size + 16, msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) - return -ENOBUFS; + goto out_err; skb_reserve(skb, self->max_header_size + 16); skb_reset_transport_header(skb); @@ -1291,7 +1293,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len); if (err) { kfree_skb(skb); - return err; + goto out_err; } /* @@ -1301,10 +1303,14 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, err = irttp_data_request(self->tsap, skb); if (err) { IRDA_DEBUG(0, "%s(), err=%d\n", __FUNCTION__, err); - return err; + goto out_err; } /* Tell client how much data we actually sent */ return len; + + out_err: + return sk_stream_error(sk, msg->msg_flags, err); + } /* diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c index a4c1c9545827..87039c2fb6a2 100644 --- a/net/irda/irlan/irlan_client.c +++ b/net/irda/irlan/irlan_client.c @@ -436,6 +436,7 @@ static void irlan_check_response_param(struct irlan_cb *self, char *param, __u16 tmp_cpu; /* Temporary value in host order */ __u8 *bytes; int i; + DECLARE_MAC_BUF(mac); IRDA_DEBUG(4, "%s(), parm=%s\n", __FUNCTION__ , param); @@ -520,9 +521,8 @@ static void irlan_check_response_param(struct irlan_cb *self, char *param, /* FILTER_ENTRY, have we got an ethernet address? */ if (strcmp(param, "FILTER_ENTRY") == 0) { bytes = value; - IRDA_DEBUG(4, "Ethernet address = %02x:%02x:%02x:%02x:%02x:%02x\n", - bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], - bytes[5]); + IRDA_DEBUG(4, "Ethernet address = %s\n", + print_mac(mac, bytes)); for (i = 0; i < 6; i++) self->dev->dev_addr[i] = bytes[i]; } diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index c421521c0a99..340f04a36b02 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -60,8 +60,6 @@ static void irlan_eth_setup(struct net_device *dev) dev->set_multicast_list = irlan_eth_set_multicast_list; dev->destructor = free_netdev; - SET_MODULE_OWNER(dev); - ether_setup(dev); /* diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 3d76aafdb2e5..f3236acc8d22 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -1219,29 +1219,11 @@ static const struct seq_operations irlap_seq_ops = { static int irlap_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct irlap_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); + if (irlap == NULL) + return -EINVAL; - if (!s) - goto out; - - if (irlap == NULL) { - rc = -EINVAL; - goto out_kfree; - } - - rc = seq_open(file, &irlap_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &irlap_seq_ops, + sizeof(struct irlap_iter_state)); } const struct file_operations irlap_seq_fops = { diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 25a3444a9234..77ac27e81161 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -1326,6 +1326,9 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, int command; __u8 control; + if (dev->nd_net != &init_net) + goto out; + /* FIXME: should we get our own field? */ self = (struct irlap_cb *) dev->atalk_ptr; diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 7efa930ed684..7db92ced2c02 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -2003,27 +2003,10 @@ static const struct seq_operations irlmp_seq_ops = { static int irlmp_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct irlmp_iter_state *s; - IRDA_ASSERT(irlmp != NULL, return -EINVAL;); - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (!s) - goto out; - - rc = seq_open(file, &irlmp_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &irlmp_seq_ops, + sizeof(struct irlmp_iter_state)); } const struct file_operations irlmp_seq_fops = { diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 1e429c929739..cd9ff176ecde 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -15,6 +15,7 @@ #include <linux/socket.h> #include <linux/irda.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/irda/irda.h> #include <net/irda/irlap.h> @@ -30,7 +31,7 @@ static struct genl_family irda_nl_family = { .maxattr = IRDA_NL_CMD_MAX, }; -static struct net_device * ifname_to_netdev(struct genl_info *info) +static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info) { char * ifname; @@ -41,7 +42,7 @@ static struct net_device * ifname_to_netdev(struct genl_info *info) IRDA_DEBUG(5, "%s(): Looking for %s\n", __FUNCTION__, ifname); - return dev_get_by_name(ifname); + return dev_get_by_name(net, ifname); } static int irda_nl_set_mode(struct sk_buff *skb, struct genl_info *info) @@ -57,7 +58,7 @@ static int irda_nl_set_mode(struct sk_buff *skb, struct genl_info *info) IRDA_DEBUG(5, "%s(): Switching to mode: %d\n", __FUNCTION__, mode); - dev = ifname_to_netdev(info); + dev = ifname_to_netdev(&init_net, info); if (!dev) return -ENODEV; @@ -82,7 +83,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) void *hdr; int ret = -ENOBUFS; - dev = ifname_to_netdev(info); + dev = ifname_to_netdev(&init_net, info); if (!dev) return -ENODEV; diff --git a/net/irda/irproc.c b/net/irda/irproc.c index 181cb51b48a8..cae24fbda966 100644 --- a/net/irda/irproc.c +++ b/net/irda/irproc.c @@ -28,6 +28,7 @@ #include <linux/seq_file.h> #include <linux/module.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/irda/irda.h> #include <net/irda/irlap.h> @@ -66,7 +67,7 @@ void __init irda_proc_register(void) int i; struct proc_dir_entry *d; - proc_irda = proc_mkdir("irda", proc_net); + proc_irda = proc_mkdir("irda", init_net.proc_net); if (proc_irda == NULL) return; proc_irda->owner = THIS_MODULE; @@ -92,7 +93,7 @@ void irda_proc_unregister(void) for (i=0; i<ARRAY_SIZE(irda_dirs); i++) remove_proc_entry(irda_dirs[i].name, proc_irda); - remove_proc_entry("irda", proc_net); + remove_proc_entry("irda", init_net.proc_net); proc_irda = NULL; } } diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 3d7ab03fb131..1311976c9dfe 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -1884,25 +1884,8 @@ static const struct seq_operations irttp_seq_ops = { static int irttp_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct irttp_iter_state *s; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - goto out; - - rc = seq_open(file, &irttp_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &irttp_seq_ops, + sizeof(struct irttp_iter_state)); } const struct file_operations irttp_seq_fops = { diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 53ae14c35f70..43e01c8d382b 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -41,6 +41,9 @@ static struct proto iucv_proto = { .obj_size = sizeof(struct iucv_sock), }; +static void iucv_sock_kill(struct sock *sk); +static void iucv_sock_close(struct sock *sk); + /* Call Back functions */ static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *); @@ -213,7 +216,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(PF_IUCV, prio, &iucv_proto, 1); + sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto, 1); if (!sk) return NULL; @@ -221,6 +224,8 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) INIT_LIST_HEAD(&iucv_sk(sk)->accept_q); spin_lock_init(&iucv_sk(sk)->accept_q_lock); skb_queue_head_init(&iucv_sk(sk)->send_skb_q); + INIT_LIST_HEAD(&iucv_sk(sk)->message_q.list); + spin_lock_init(&iucv_sk(sk)->message_q.lock); skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; @@ -240,7 +245,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) } /* Create an IUCV socket */ -static int iucv_sock_create(struct socket *sock, int protocol) +static int iucv_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -670,6 +675,90 @@ out: return err; } +static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len) +{ + int dataleft, size, copied = 0; + struct sk_buff *nskb; + + dataleft = len; + while (dataleft) { + if (dataleft >= sk->sk_rcvbuf / 4) + size = sk->sk_rcvbuf / 4; + else + size = dataleft; + + nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA); + if (!nskb) + return -ENOMEM; + + memcpy(nskb->data, skb->data + copied, size); + copied += size; + dataleft -= size; + + skb_reset_transport_header(nskb); + skb_reset_network_header(nskb); + nskb->len = size; + + skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, nskb); + } + + return 0; +} + +static void iucv_process_message(struct sock *sk, struct sk_buff *skb, + struct iucv_path *path, + struct iucv_message *msg) +{ + int rc; + + if (msg->flags & IPRMDATA) { + skb->data = NULL; + skb->len = 0; + } else { + rc = iucv_message_receive(path, msg, 0, skb->data, + msg->length, NULL); + if (rc) { + kfree_skb(skb); + return; + } + if (skb->truesize >= sk->sk_rcvbuf / 4) { + rc = iucv_fragment_skb(sk, skb, msg->length); + kfree_skb(skb); + skb = NULL; + if (rc) { + iucv_path_sever(path, NULL); + return; + } + skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q); + } else { + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + skb->len = msg->length; + } + } + + if (sock_queue_rcv_skb(sk, skb)) + skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb); +} + +static void iucv_process_message_q(struct sock *sk) +{ + struct iucv_sock *iucv = iucv_sk(sk); + struct sk_buff *skb; + struct sock_msg_q *p, *n; + + list_for_each_entry_safe(p, n, &iucv->message_q.list, list) { + skb = alloc_skb(p->msg.length, GFP_ATOMIC | GFP_DMA); + if (!skb) + break; + iucv_process_message(sk, skb, p->path, &p->msg); + list_del(&p->list); + kfree(p); + if (!skb_queue_empty(&iucv->backlog_skb_q)) + break; + } +} + static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) { @@ -681,8 +770,9 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, int err = 0; if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) && - skb_queue_empty(&iucv->backlog_skb_q) && - skb_queue_empty(&sk->sk_receive_queue)) + skb_queue_empty(&iucv->backlog_skb_q) && + skb_queue_empty(&sk->sk_receive_queue) && + list_empty(&iucv->message_q.list)) return 0; if (flags & (MSG_OOB)) @@ -721,16 +811,23 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, kfree_skb(skb); /* Queue backlog skbs */ - rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q); + rskb = skb_dequeue(&iucv->backlog_skb_q); while (rskb) { if (sock_queue_rcv_skb(sk, rskb)) { - skb_queue_head(&iucv_sk(sk)->backlog_skb_q, + skb_queue_head(&iucv->backlog_skb_q, rskb); break; } else { - rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q); + rskb = skb_dequeue(&iucv->backlog_skb_q); } } + if (skb_queue_empty(&iucv->backlog_skb_q)) { + spin_lock_bh(&iucv->message_q.lock); + if (!list_empty(&iucv->message_q.list)) + iucv_process_message_q(sk); + spin_unlock_bh(&iucv->message_q.lock); + } + } else skb_queue_head(&sk->sk_receive_queue, skb); @@ -972,99 +1069,44 @@ static void iucv_callback_connack(struct iucv_path *path, u8 ipuser[16]) sk->sk_state_change(sk); } -static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len, - struct sk_buff_head *fragmented_skb_q) -{ - int dataleft, size, copied = 0; - struct sk_buff *nskb; - - dataleft = len; - while (dataleft) { - if (dataleft >= sk->sk_rcvbuf / 4) - size = sk->sk_rcvbuf / 4; - else - size = dataleft; - - nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA); - if (!nskb) - return -ENOMEM; - - memcpy(nskb->data, skb->data + copied, size); - copied += size; - dataleft -= size; - - skb_reset_transport_header(nskb); - skb_reset_network_header(nskb); - nskb->len = size; - - skb_queue_tail(fragmented_skb_q, nskb); - } - - return 0; -} - static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) { struct sock *sk = path->private; struct iucv_sock *iucv = iucv_sk(sk); - struct sk_buff *skb, *fskb; - struct sk_buff_head fragmented_skb_q; - int rc; - - skb_queue_head_init(&fragmented_skb_q); + struct sk_buff *skb; + struct sock_msg_q *save_msg; + int len; if (sk->sk_shutdown & RCV_SHUTDOWN) return; + if (!list_empty(&iucv->message_q.list) || + !skb_queue_empty(&iucv->backlog_skb_q)) + goto save_message; + + len = atomic_read(&sk->sk_rmem_alloc); + len += msg->length + sizeof(struct sk_buff); + if (len > sk->sk_rcvbuf) + goto save_message; + skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA); - if (!skb) { - iucv_path_sever(path, NULL); - return; - } + if (!skb) + goto save_message; - if (msg->flags & IPRMDATA) { - skb->data = NULL; - skb->len = 0; - } else { - rc = iucv_message_receive(path, msg, 0, skb->data, - msg->length, NULL); - if (rc) { - kfree_skb(skb); - return; - } - if (skb->truesize >= sk->sk_rcvbuf / 4) { - rc = iucv_fragment_skb(sk, skb, msg->length, - &fragmented_skb_q); - kfree_skb(skb); - skb = NULL; - if (rc) { - iucv_path_sever(path, NULL); - return; - } - } else { - skb_reset_transport_header(skb); - skb_reset_network_header(skb); - skb->len = msg->length; - } - } - /* Queue the fragmented skb */ - fskb = skb_dequeue(&fragmented_skb_q); - while (fskb) { - if (!skb_queue_empty(&iucv->backlog_skb_q)) - skb_queue_tail(&iucv->backlog_skb_q, fskb); - else if (sock_queue_rcv_skb(sk, fskb)) - skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, fskb); - fskb = skb_dequeue(&fragmented_skb_q); - } + spin_lock(&iucv->message_q.lock); + iucv_process_message(sk, skb, path, msg); + spin_unlock(&iucv->message_q.lock); - /* Queue the original skb if it exists (was not fragmented) */ - if (skb) { - if (!skb_queue_empty(&iucv->backlog_skb_q)) - skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); - else if (sock_queue_rcv_skb(sk, skb)) - skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); - } + return; + +save_message: + save_msg = kzalloc(sizeof(struct sock_msg_q), GFP_ATOMIC | GFP_DMA); + save_msg->path = path; + save_msg->msg = *msg; + spin_lock(&iucv->message_q.lock); + list_add_tail(&save_msg->list, &iucv->message_q.list); + spin_unlock(&iucv->message_q.lock); } static void iucv_callback_txdone(struct iucv_path *path, diff --git a/net/key/af_key.c b/net/key/af_key.c index 5502df115a63..7969f8a716df 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -26,8 +26,8 @@ #include <linux/in6.h> #include <linux/proc_fs.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/xfrm.h> -#include <linux/audit.h> #include <net/sock.h> @@ -136,11 +136,14 @@ static struct proto key_proto = { .obj_size = sizeof(struct pfkey_sock), }; -static int pfkey_create(struct socket *sock, int protocol) +static int pfkey_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; if (sock->type != SOCK_RAW) @@ -149,7 +152,7 @@ static int pfkey_create(struct socket *sock, int protocol) return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1); + sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, 1); if (sk == NULL) goto out; @@ -352,16 +355,14 @@ static int verify_address_len(void *p) switch (addr->sa_family) { case AF_INET: - len = sizeof(*sp) + sizeof(*sin) + (sizeof(uint64_t) - 1); - len /= sizeof(uint64_t); + len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 32) return -EINVAL; break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: - len = sizeof(*sp) + sizeof(*sin6) + (sizeof(uint64_t) - 1); - len /= sizeof(uint64_t); + len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin6), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 128) return -EINVAL; @@ -386,14 +387,9 @@ static int verify_address_len(void *p) static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx) { - int len = 0; - - len += sizeof(struct sadb_x_sec_ctx); - len += sec_ctx->sadb_x_ctx_len; - len += sizeof(uint64_t) - 1; - len /= sizeof(uint64_t); - - return len; + return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) + + sec_ctx->sadb_x_ctx_len, + sizeof(uint64_t)); } static inline int verify_sec_ctx_len(void *p) @@ -659,7 +655,8 @@ static inline int pfkey_mode_to_xfrm(int mode) } } -static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, int hsc) +static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, + int add_keys, int hsc) { struct sk_buff *skb; struct sadb_msg *hdr; @@ -1013,6 +1010,24 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, return skb; } + +static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x) +{ + struct sk_buff *skb; + + spin_lock_bh(&x->lock); + skb = __pfkey_xfrm_state2msg(x, 1, 3); + spin_unlock_bh(&x->lock); + + return skb; +} + +static inline struct sk_buff *pfkey_xfrm_state2msg_expire(struct xfrm_state *x, + int hsc) +{ + return __pfkey_xfrm_state2msg(x, 0, hsc); +} + static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, void **ext_hdrs) { @@ -1257,8 +1272,11 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h struct sadb_x_sa2 *sa2; struct sadb_address *saddr, *daddr; struct sadb_msg *out_hdr; + struct sadb_spirange *range; struct xfrm_state *x = NULL; int mode; + int err; + u32 min_spi, max_spi; u32 reqid; u8 proto; unsigned short family; @@ -1313,25 +1331,17 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (x == NULL) return -ENOENT; - resp_skb = ERR_PTR(-ENOENT); - - spin_lock_bh(&x->lock); - if (x->km.state != XFRM_STATE_DEAD) { - struct sadb_spirange *range = ext_hdrs[SADB_EXT_SPIRANGE-1]; - u32 min_spi, max_spi; + min_spi = 0x100; + max_spi = 0x0fffffff; - if (range != NULL) { - min_spi = range->sadb_spirange_min; - max_spi = range->sadb_spirange_max; - } else { - min_spi = 0x100; - max_spi = 0x0fffffff; - } - xfrm_alloc_spi(x, htonl(min_spi), htonl(max_spi)); - if (x->id.spi) - resp_skb = pfkey_xfrm_state2msg(x, 0, 3); + range = ext_hdrs[SADB_EXT_SPIRANGE-1]; + if (range) { + min_spi = range->sadb_spirange_min; + max_spi = range->sadb_spirange_max; } - spin_unlock_bh(&x->lock); + + err = xfrm_alloc_spi(x, min_spi, max_spi); + resp_skb = err ? ERR_PTR(err) : pfkey_xfrm_state2msg(x); if (IS_ERR(resp_skb)) { xfrm_state_put(x); @@ -1421,12 +1431,8 @@ static int key_notify_sa(struct xfrm_state *x, struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; - int hsc = 3; - - if (c->event == XFRM_MSG_DELSA) - hsc = 0; - skb = pfkey_xfrm_state2msg(x, 0, hsc); + skb = pfkey_xfrm_state2msg(x); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -1461,8 +1467,8 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, else err = xfrm_state_update(x); - xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, - AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x); + xfrm_audit_state_add(x, err ? 0 : 1, + audit_get_loginuid(current->audit_context), 0); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -1515,8 +1521,8 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: - xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, - AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + xfrm_audit_state_delete(x, err ? 0 : 1, + audit_get_loginuid(current->audit_context), 0); xfrm_state_put(x); return err; @@ -1538,7 +1544,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, if (x == NULL) return -ESRCH; - out_skb = pfkey_xfrm_state2msg(x, 1, 3); + out_skb = pfkey_xfrm_state2msg(x); proto = x->id.proto; xfrm_state_put(x); if (IS_ERR(out_skb)) @@ -1718,7 +1724,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) struct sk_buff *out_skb; struct sadb_msg *out_hdr; - out_skb = pfkey_xfrm_state2msg(x, 1, 3); + out_skb = pfkey_xfrm_state2msg(x); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); @@ -2268,8 +2274,8 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); - xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, - AUDIT_MAC_IPSEC_ADDSPD, err ? 0 : 1, xp, NULL); + xfrm_audit_policy_add(xp, err ? 0 : 1, + audit_get_loginuid(current->audit_context), 0); if (err) goto out; @@ -2352,8 +2358,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg if (xp == NULL) return -ENOENT; - xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, - AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL); + xfrm_audit_policy_delete(xp, err ? 0 : 1, + audit_get_loginuid(current->audit_context), 0); if (err) goto out; @@ -2613,8 +2619,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return -ENOENT; if (delete) { - xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, - AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL); + xfrm_audit_policy_delete(xp, err ? 0 : 1, + audit_get_loginuid(current->audit_context), 0); if (err) goto out; @@ -2919,7 +2925,7 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) else hsc = 1; - out_skb = pfkey_xfrm_state2msg(x, 0, hsc); + out_skb = pfkey_xfrm_state2msg_expire(x, hsc); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); @@ -3784,7 +3790,7 @@ static struct xfrm_mgr pfkeyv2_mgr = static void __exit ipsec_pfkey_exit(void) { xfrm_unregister_km(&pfkeyv2_mgr); - remove_proc_entry("net/pfkey", NULL); + remove_proc_entry("pfkey", init_net.proc_net); sock_unregister(PF_KEY); proto_unregister(&key_proto); } @@ -3801,7 +3807,7 @@ static int __init ipsec_pfkey_init(void) goto out_unregister_key_proto; #ifdef CONFIG_PROC_FS err = -ENOMEM; - if (create_proc_read_entry("net/pfkey", 0, NULL, pfkey_read_proc, NULL) == NULL) + if (create_proc_read_entry("pfkey", 0, init_net.proc_net, pfkey_read_proc, NULL) == NULL) goto out_sock_unregister; #endif err = xfrm_register_km(&pfkeyv2_mgr); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 6b8a103cf9e6..49eacba824df 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -150,14 +150,17 @@ static struct proto llc_proto = { * socket type we have available. * Returns 0 upon success, negative upon failure. */ -static int llc_ui_create(struct socket *sock, int protocol) +static int llc_ui_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; + if (net != &init_net) + return -EAFNOSUPPORT; + if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; - sk = llc_sk_alloc(PF_LLC, GFP_KERNEL, &llc_proto); + sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto); if (sk) { rc = 0; llc_ui_sk_init(sock, sk); @@ -249,7 +252,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) if (!sock_flag(sk, SOCK_ZAPPED)) goto out; rc = -ENODEV; - llc->dev = dev_getfirstbyhwtype(addr->sllc_arphrd); + llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); if (!llc->dev) goto out; rc = -EUSERS; @@ -300,7 +303,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) goto out; rc = -ENODEV; rtnl_lock(); - llc->dev = dev_getbyhwaddr(addr->sllc_arphrd, addr->sllc_mac); + llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, addr->sllc_mac); rtnl_unlock(); if (!llc->dev) goto out; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 3b8cfbe029a7..8ebc2769dfda 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -700,7 +700,7 @@ static struct sock *llc_create_incoming_sock(struct sock *sk, struct llc_addr *saddr, struct llc_addr *daddr) { - struct sock *newsk = llc_sk_alloc(sk->sk_family, GFP_ATOMIC, + struct sock *newsk = llc_sk_alloc(sk->sk_net, sk->sk_family, GFP_ATOMIC, sk->sk_prot); struct llc_sock *newllc, *llc = llc_sk(sk); @@ -867,9 +867,9 @@ static void llc_sk_init(struct sock* sk) * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *llc_sk_alloc(int family, gfp_t priority, struct proto *prot) +struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) { - struct sock *sk = sk_alloc(family, priority, prot, 1); + struct sock *sk = sk_alloc(net, family, priority, prot, 1); if (!sk) goto out; diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index d4b13a031fd5..248b5903bb13 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/llc.h> LIST_HEAD(llc_sap_list); @@ -162,7 +163,7 @@ static int __init llc_init(void) { struct net_device *dev; - dev = first_net_device(); + dev = first_net_device(&init_net); if (dev != NULL) dev = next_net_device(dev); diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 099ed8fec145..c40c9b2a345a 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -12,6 +12,7 @@ * See the GNU General Public License for more details. */ #include <linux/netdevice.h> +#include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> #include <net/llc_sap.h> @@ -145,6 +146,9 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, int (*rcv)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); + if (dev->nd_net != &init_net) + goto drop; + /* * When the interface is in promisc. mode, drop all the crap that it * receives, do not try to analyse it. diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 49be6c902c83..cb34bc0518e8 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -17,6 +17,7 @@ #include <linux/proc_fs.h> #include <linux/errno.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/llc.h> #include <net/llc_c_ac.h> @@ -24,10 +25,10 @@ #include <net/llc_c_st.h> #include <net/llc_conn.h> -static void llc_ui_format_mac(struct seq_file *seq, unsigned char *mac) +static void llc_ui_format_mac(struct seq_file *seq, u8 *addr) { - seq_printf(seq, "%02X:%02X:%02X:%02X:%02X:%02X", - mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); + DECLARE_MAC_BUF(mac); + seq_printf(seq, "%s", print_mac(mac, addr)); } static struct sock *llc_get_sk_idx(loff_t pos) @@ -127,8 +128,10 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v) if (llc->dev) llc_ui_format_mac(seq, llc->dev->dev_addr); - else - seq_printf(seq, "00:00:00:00:00:00"); + else { + u8 addr[6] = {0,0,0,0,0,0}; + llc_ui_format_mac(seq, addr); + } seq_printf(seq, "@%02X ", llc->sap->laddr.lsap); llc_ui_format_mac(seq, llc->daddr.mac); seq_printf(seq, "@%02X %8d %8d %2d %3d %4d\n", llc->daddr.lsap, @@ -231,7 +234,7 @@ int __init llc_proc_init(void) int rc = -ENOMEM; struct proc_dir_entry *p; - llc_proc_dir = proc_mkdir("llc", proc_net); + llc_proc_dir = proc_mkdir("llc", init_net.proc_net); if (!llc_proc_dir) goto out; llc_proc_dir->owner = THIS_MODULE; @@ -254,7 +257,7 @@ out: out_core: remove_proc_entry("socket", llc_proc_dir); out_socket: - remove_proc_entry("llc", proc_net); + remove_proc_entry("llc", init_net.proc_net); goto out; } @@ -262,5 +265,5 @@ void llc_proc_exit(void) { remove_proc_entry("socket", llc_proc_dir); remove_proc_entry("core", llc_proc_dir); - remove_proc_entry("llc", proc_net); + remove_proc_entry("llc", init_net.proc_net); } diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index a9c2d0787d4a..219cd9f9341f 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_MAC80211) += mac80211.o rc80211_simple.o mac80211-objs-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o mac80211-objs-$(CONFIG_MAC80211_DEBUGFS) += debugfs.o debugfs_sta.o debugfs_netdev.o debugfs_key.o +mac80211-objs-$(CONFIG_NET_SCHED) += wme.o mac80211-objs := \ ieee80211.o \ @@ -16,6 +17,10 @@ mac80211-objs := \ regdomain.o \ tkip.o \ aes_ccm.o \ - wme.o \ - ieee80211_cfg.o \ + cfg.o \ + rx.o \ + tx.o \ + key.o \ + util.o \ + event.o \ $(mac80211-objs-y) diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index e55569bee7d0..bf7ba128b963 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -7,6 +7,7 @@ * published by the Free Software Foundation. */ +#include <linux/kernel.h> #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> @@ -63,7 +64,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, s_0 = scratch + AES_BLOCK_LEN; e = scratch + 2 * AES_BLOCK_LEN; - num_blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; + num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last_len = data_len % AES_BLOCK_LEN; aes_ccm_prepare(tfm, b_0, aad, b, s_0, b); @@ -102,7 +103,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, s_0 = scratch + AES_BLOCK_LEN; a = scratch + 2 * AES_BLOCK_LEN; - num_blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; + num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last_len = data_len % AES_BLOCK_LEN; aes_ccm_prepare(tfm, b_0, aad, b, s_0, a); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c new file mode 100644 index 000000000000..9e2bc1fd0237 --- /dev/null +++ b/net/mac80211/cfg.c @@ -0,0 +1,106 @@ +/* + * mac80211 configuration hooks for cfg80211 + * + * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> + * + * This file is GPLv2 as found in COPYING. + */ + +#include <linux/nl80211.h> +#include <linux/rtnetlink.h> +#include <net/net_namespace.h> +#include <net/cfg80211.h> +#include "ieee80211_i.h" +#include "cfg.h" + +static enum ieee80211_if_types +nl80211_type_to_mac80211_type(enum nl80211_iftype type) +{ + switch (type) { + case NL80211_IFTYPE_UNSPECIFIED: + return IEEE80211_IF_TYPE_STA; + case NL80211_IFTYPE_ADHOC: + return IEEE80211_IF_TYPE_IBSS; + case NL80211_IFTYPE_STATION: + return IEEE80211_IF_TYPE_STA; + case NL80211_IFTYPE_MONITOR: + return IEEE80211_IF_TYPE_MNTR; + default: + return IEEE80211_IF_TYPE_INVALID; + } +} + +static int ieee80211_add_iface(struct wiphy *wiphy, char *name, + enum nl80211_iftype type) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + enum ieee80211_if_types itype; + + if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) + return -ENODEV; + + itype = nl80211_type_to_mac80211_type(type); + if (itype == IEEE80211_IF_TYPE_INVALID) + return -EINVAL; + + return ieee80211_if_add(local->mdev, name, NULL, itype); +} + +static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + struct net_device *dev; + char *name; + + if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) + return -ENODEV; + + /* we're under RTNL */ + dev = __dev_get_by_index(&init_net, ifindex); + if (!dev) + return 0; + + name = dev->name; + + return ieee80211_if_remove(local->mdev, name, -1); +} + +static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, + enum nl80211_iftype type) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + struct net_device *dev; + enum ieee80211_if_types itype; + struct ieee80211_sub_if_data *sdata; + + if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) + return -ENODEV; + + /* we're under RTNL */ + dev = __dev_get_by_index(&init_net, ifindex); + if (!dev) + return -ENODEV; + + if (netif_running(dev)) + return -EBUSY; + + itype = nl80211_type_to_mac80211_type(type); + if (itype == IEEE80211_IF_TYPE_INVALID) + return -EINVAL; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->type == IEEE80211_IF_TYPE_VLAN) + return -EOPNOTSUPP; + + ieee80211_if_reinit(dev); + ieee80211_if_set_type(dev, itype); + + return 0; +} + +struct cfg80211_ops mac80211_config_ops = { + .add_virtual_intf = ieee80211_add_iface, + .del_virtual_intf = ieee80211_del_iface, + .change_virtual_intf = ieee80211_change_iface, +}; diff --git a/net/mac80211/ieee80211_cfg.h b/net/mac80211/cfg.h index 85ed2c924878..7d7879f5b00b 100644 --- a/net/mac80211/ieee80211_cfg.h +++ b/net/mac80211/cfg.h @@ -1,9 +1,9 @@ /* * mac80211 configuration hooks for cfg80211 */ -#ifndef __IEEE80211_CFG_H -#define __IEEE80211_CFG_H +#ifndef __CFG_H +#define __CFG_H extern struct cfg80211_ops mac80211_config_ops; -#endif /* __IEEE80211_CFG_H */ +#endif /* __CFG_H */ diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 476c8486f789..60514b2c97b9 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -28,8 +28,6 @@ static const char *ieee80211_mode_str(int mode) return "IEEE 802.11b"; case MODE_IEEE80211G: return "IEEE 802.11g"; - case MODE_ATHEROS_TURBO: - return "Atheros Turbo (5 GHz)"; default: return "UNKNOWN"; } @@ -86,16 +84,12 @@ DEBUGFS_READONLY_FILE(channel, 20, "%d", local->hw.conf.channel); DEBUGFS_READONLY_FILE(frequency, 20, "%d", local->hw.conf.freq); -DEBUGFS_READONLY_FILE(radar_detect, 20, "%d", - local->hw.conf.radar_detect); DEBUGFS_READONLY_FILE(antenna_sel_tx, 20, "%d", local->hw.conf.antenna_sel_tx); DEBUGFS_READONLY_FILE(antenna_sel_rx, 20, "%d", local->hw.conf.antenna_sel_rx); DEBUGFS_READONLY_FILE(bridge_packets, 20, "%d", local->bridge_packets); -DEBUGFS_READONLY_FILE(key_tx_rx_threshold, 20, "%d", - local->key_tx_rx_threshold); DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d", local->rts_threshold); DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d", @@ -110,9 +104,6 @@ DEBUGFS_READONLY_FILE(mode, 20, "%s", ieee80211_mode_str(local->hw.conf.phymode)); DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x", local->wep_iv & 0xffffff); -DEBUGFS_READONLY_FILE(tx_power_reduction, 20, "%d.%d dBm", - local->hw.conf.tx_power_reduction / 10, - local->hw.conf.tx_power_reduction % 10); DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "<unset>"); @@ -305,11 +296,9 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(channel); DEBUGFS_ADD(frequency); - DEBUGFS_ADD(radar_detect); DEBUGFS_ADD(antenna_sel_tx); DEBUGFS_ADD(antenna_sel_rx); DEBUGFS_ADD(bridge_packets); - DEBUGFS_ADD(key_tx_rx_threshold); DEBUGFS_ADD(rts_threshold); DEBUGFS_ADD(fragmentation_threshold); DEBUGFS_ADD(short_retry_limit); @@ -317,7 +306,6 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(mode); DEBUGFS_ADD(wep_iv); - DEBUGFS_ADD(tx_power_reduction); DEBUGFS_ADD(modes); statsd = debugfs_create_dir("statistics", phyd); @@ -370,11 +358,9 @@ void debugfs_hw_del(struct ieee80211_local *local) { DEBUGFS_DEL(channel); DEBUGFS_DEL(frequency); - DEBUGFS_DEL(radar_detect); DEBUGFS_DEL(antenna_sel_tx); DEBUGFS_DEL(antenna_sel_rx); DEBUGFS_DEL(bridge_packets); - DEBUGFS_DEL(key_tx_rx_threshold); DEBUGFS_DEL(rts_threshold); DEBUGFS_DEL(fragmentation_threshold); DEBUGFS_DEL(short_retry_limit); @@ -382,7 +368,6 @@ void debugfs_hw_del(struct ieee80211_local *local) DEBUGFS_DEL(total_ps_buffered); DEBUGFS_DEL(mode); DEBUGFS_DEL(wep_iv); - DEBUGFS_DEL(tx_power_reduction); DEBUGFS_DEL(modes); DEBUGFS_STATS_DEL(transmitted_fragment_count); diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 7d56dc9e7326..c881524c8725 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -14,17 +14,18 @@ #include "debugfs.h" #include "debugfs_key.h" -#define KEY_READ(name, buflen, format_string) \ +#define KEY_READ(name, prop, buflen, format_string) \ static ssize_t key_##name##_read(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ char buf[buflen]; \ struct ieee80211_key *key = file->private_data; \ - int res = scnprintf(buf, buflen, format_string, key->name); \ + int res = scnprintf(buf, buflen, format_string, key->prop); \ return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ } -#define KEY_READ_D(name) KEY_READ(name, 20, "%d\n") +#define KEY_READ_D(name) KEY_READ(name, name, 20, "%d\n") +#define KEY_READ_X(name) KEY_READ(name, name, 20, "0x%x\n") #define KEY_OPS(name) \ static const struct file_operations key_ ##name## _ops = { \ @@ -36,11 +37,27 @@ static const struct file_operations key_ ##name## _ops = { \ KEY_READ_##format(name) \ KEY_OPS(name) -KEY_FILE(keylen, D); -KEY_FILE(force_sw_encrypt, D); -KEY_FILE(keyidx, D); -KEY_FILE(hw_key_idx, D); +#define KEY_CONF_READ(name, buflen, format_string) \ + KEY_READ(conf_##name, conf.name, buflen, format_string) +#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, 20, "%d\n") + +#define KEY_CONF_OPS(name) \ +static const struct file_operations key_ ##name## _ops = { \ + .read = key_conf_##name##_read, \ + .open = mac80211_open_file_generic, \ +} + +#define KEY_CONF_FILE(name, format) \ + KEY_CONF_READ_##format(name) \ + KEY_CONF_OPS(name) + +KEY_CONF_FILE(keylen, D); +KEY_CONF_FILE(keyidx, D); +KEY_CONF_FILE(hw_key_idx, D); +KEY_FILE(flags, X); KEY_FILE(tx_rx_count, D); +KEY_READ(ifindex, sdata->dev->ifindex, 20, "%d\n"); +KEY_OPS(ifindex); static ssize_t key_algorithm_read(struct file *file, char __user *userbuf, @@ -49,7 +66,7 @@ static ssize_t key_algorithm_read(struct file *file, char *alg; struct ieee80211_key *key = file->private_data; - switch (key->alg) { + switch (key->conf.alg) { case ALG_WEP: alg = "WEP\n"; break; @@ -74,17 +91,20 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, int len; struct ieee80211_key *key = file->private_data; - switch (key->alg) { + switch (key->conf.alg) { case ALG_WEP: len = scnprintf(buf, sizeof(buf), "\n"); + break; case ALG_TKIP: len = scnprintf(buf, sizeof(buf), "%08x %04x\n", key->u.tkip.iv32, key->u.tkip.iv16); + break; case ALG_CCMP: tpn = key->u.ccmp.tx_pn; len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); + break; default: return 0; } @@ -100,9 +120,10 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, int i, len; const u8 *rpn; - switch (key->alg) { + switch (key->conf.alg) { case ALG_WEP: len = scnprintf(buf, sizeof(buf), "\n"); + break; case ALG_TKIP: for (i = 0; i < NUM_RX_DATA_QUEUES; i++) p += scnprintf(p, sizeof(buf)+buf-p, @@ -110,6 +131,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, key->u.tkip.iv32_rx[i], key->u.tkip.iv16_rx[i]); len = p - buf; + break; case ALG_CCMP: for (i = 0; i < NUM_RX_DATA_QUEUES; i++) { rpn = key->u.ccmp.rx_pn[i]; @@ -119,6 +141,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, rpn[3], rpn[4], rpn[5]); } len = p - buf; + break; default: return 0; } @@ -133,7 +156,7 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf, char buf[20]; int len; - if (key->alg != ALG_CCMP) + if (key->conf.alg != ALG_CCMP) return 0; len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); return simple_read_from_buffer(userbuf, count, ppos, buf, len); @@ -144,12 +167,12 @@ static ssize_t key_key_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct ieee80211_key *key = file->private_data; - int i, res, bufsize = 2*key->keylen+2; + int i, res, bufsize = 2 * key->conf.keylen + 2; char *buf = kmalloc(bufsize, GFP_KERNEL); char *p = buf; - for (i = 0; i < key->keylen; i++) - p += scnprintf(p, bufsize+buf-p, "%02x", key->key[i]); + for (i = 0; i < key->conf.keylen; i++) + p += scnprintf(p, bufsize + buf - p, "%02x", key->conf.key[i]); p += scnprintf(p, bufsize+buf-p, "\n"); res = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); kfree(buf); @@ -164,12 +187,14 @@ KEY_OPS(key); void ieee80211_debugfs_key_add(struct ieee80211_local *local, struct ieee80211_key *key) { + static int keycount; char buf[20]; if (!local->debugfs.keys) return; - sprintf(buf, "%d", key->keyidx); + sprintf(buf, "%d", keycount); + keycount++; key->debugfs.dir = debugfs_create_dir(buf, local->debugfs.keys); @@ -177,7 +202,7 @@ void ieee80211_debugfs_key_add(struct ieee80211_local *local, return; DEBUGFS_ADD(keylen); - DEBUGFS_ADD(force_sw_encrypt); + DEBUGFS_ADD(flags); DEBUGFS_ADD(keyidx); DEBUGFS_ADD(hw_key_idx); DEBUGFS_ADD(tx_rx_count); @@ -186,6 +211,7 @@ void ieee80211_debugfs_key_add(struct ieee80211_local *local, DEBUGFS_ADD(rx_spec); DEBUGFS_ADD(replays); DEBUGFS_ADD(key); + DEBUGFS_ADD(ifindex); }; #define DEBUGFS_DEL(name) \ @@ -197,7 +223,7 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key) return; DEBUGFS_DEL(keylen); - DEBUGFS_DEL(force_sw_encrypt); + DEBUGFS_DEL(flags); DEBUGFS_DEL(keyidx); DEBUGFS_DEL(hw_key_idx); DEBUGFS_DEL(tx_rx_count); @@ -206,6 +232,7 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key) DEBUGFS_DEL(rx_spec); DEBUGFS_DEL(replays); DEBUGFS_DEL(key); + DEBUGFS_DEL(ifindex); debugfs_remove(key->debugfs.stalink); key->debugfs.stalink = NULL; @@ -219,7 +246,7 @@ void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata) if (!sdata->debugfsdir) return; - sprintf(buf, "../keys/%d", sdata->default_key->keyidx); + sprintf(buf, "../keys/%d", sdata->default_key->conf.keyidx); sdata->debugfs.default_key = debugfs_create_symlink("default_key", sdata->debugfsdir, buf); } @@ -235,11 +262,12 @@ void ieee80211_debugfs_key_sta_link(struct ieee80211_key *key, struct sta_info *sta) { char buf[50]; + DECLARE_MAC_BUF(mac); if (!key->debugfs.dir) return; - sprintf(buf, "../sta/" MAC_FMT, MAC_ARG(sta->addr)); + sprintf(buf, "../../stations/%s", print_mac(mac, sta->addr)); key->debugfs.stalink = debugfs_create_symlink("station", key->debugfs.dir, buf); } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 095be91829ca..f0e6ab7eb624 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -66,7 +66,8 @@ static ssize_t ieee80211_if_fmt_##name( \ const struct ieee80211_sub_if_data *sdata, char *buf, \ int buflen) \ { \ - return scnprintf(buf, buflen, MAC_FMT "\n", MAC_ARG(sdata->field));\ + DECLARE_MAC_BUF(mac); \ + return scnprintf(buf, buflen, "%s\n", print_mac(mac, sdata->field));\ } #define __IEEE80211_IF_FILE(name) \ @@ -112,13 +113,13 @@ static ssize_t ieee80211_if_fmt_flags( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) { return scnprintf(buf, buflen, "%s%s%s%s%s%s%s\n", - sdata->u.sta.ssid_set ? "SSID\n" : "", - sdata->u.sta.bssid_set ? "BSSID\n" : "", - sdata->u.sta.prev_bssid_set ? "prev BSSID\n" : "", - sdata->u.sta.authenticated ? "AUTH\n" : "", - sdata->u.sta.associated ? "ASSOC\n" : "", - sdata->u.sta.probereq_poll ? "PROBEREQ POLL\n" : "", - sdata->use_protection ? "CTS prot\n" : ""); + sdata->u.sta.flags & IEEE80211_STA_SSID_SET ? "SSID\n" : "", + sdata->u.sta.flags & IEEE80211_STA_BSSID_SET ? "BSSID\n" : "", + sdata->u.sta.flags & IEEE80211_STA_PREV_BSSID_SET ? "prev BSSID\n" : "", + sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", + sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", + sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", + sdata->flags & IEEE80211_SDATA_USE_PROTECTION ? "CTS prot\n" : ""); } __IEEE80211_IF_FILE(flags); @@ -161,23 +162,6 @@ __IEEE80211_IF_FILE(beacon_tail_len); /* WDS attributes */ IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); -/* VLAN attributes */ -IEEE80211_IF_FILE(vlan_id, u.vlan.id, DEC); - -/* MONITOR attributes */ -static ssize_t ieee80211_if_fmt_mode( - const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) -{ - struct ieee80211_local *local = sdata->local; - - return scnprintf(buf, buflen, "%s\n", - ((local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER) || - local->open_count == local->monitors) ? - "hard" : "soft"); -} -__IEEE80211_IF_FILE(mode); - - #define DEBUGFS_ADD(name, type)\ sdata->debugfs.type.name = debugfs_create_file(#name, 0444,\ sdata->debugfsdir, sdata, &name##_ops); @@ -236,12 +220,10 @@ static void add_vlan_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(drop_unencrypted, vlan); DEBUGFS_ADD(eapol, vlan); DEBUGFS_ADD(ieee8021_x, vlan); - DEBUGFS_ADD(vlan_id, vlan); } static void add_monitor_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_ADD(mode, monitor); } static void add_files(struct ieee80211_sub_if_data *sdata) @@ -331,12 +313,10 @@ static void del_vlan_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_DEL(drop_unencrypted, vlan); DEBUGFS_DEL(eapol, vlan); DEBUGFS_DEL(ieee8021_x, vlan); - DEBUGFS_DEL(vlan_id, vlan); } static void del_monitor_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_DEL(mode, monitor); } static void del_files(struct ieee80211_sub_if_data *sdata, int type) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index da34ea70276f..8f5944c53d4e 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -60,9 +60,7 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_OPS(name) STA_FILE(aid, aid, D); -STA_FILE(key_idx_compression, key_idx_compression, D); STA_FILE(dev, dev->name, S); -STA_FILE(vlan_id, vlan_id, D); STA_FILE(rx_packets, rx_packets, LU); STA_FILE(tx_packets, tx_packets, LU); STA_FILE(rx_bytes, rx_bytes, LU); @@ -204,15 +202,15 @@ STA_OPS(wme_tx_queue); void ieee80211_sta_debugfs_add(struct sta_info *sta) { - char buf[3*6]; struct dentry *stations_dir = sta->local->debugfs.stations; + DECLARE_MAC_BUF(mac); if (!stations_dir) return; - sprintf(buf, MAC_FMT, MAC_ARG(sta->addr)); + print_mac(mac, sta->addr); - sta->debugfs.dir = debugfs_create_dir(buf, stations_dir); + sta->debugfs.dir = debugfs_create_dir(mac, stations_dir); if (!sta->debugfs.dir) return; diff --git a/net/mac80211/event.c b/net/mac80211/event.c new file mode 100644 index 000000000000..2280f40b4560 --- /dev/null +++ b/net/mac80211/event.c @@ -0,0 +1,43 @@ +/* + * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * mac80211 - events + */ + +#include <linux/netdevice.h> +#include <net/iw_handler.h> +#include "ieee80211_i.h" + +/* + * indicate a failed Michael MIC to userspace; the passed packet + * (in the variable hdr) must be long enough to extract the TKIP + * fields like TSC + */ +void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, + struct ieee80211_hdr *hdr) +{ + union iwreq_data wrqu; + char *buf = kmalloc(128, GFP_ATOMIC); + DECLARE_MAC_BUF(mac); + + if (buf) { + /* TODO: needed parameters: count, key type, TSC */ + sprintf(buf, "MLME-MICHAELMICFAILURE.indication(" + "keyid=%d %scast addr=%s)", + keyidx, hdr->addr1[0] & 0x01 ? "broad" : "uni", + print_mac(mac, hdr->addr2)); + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = strlen(buf); + wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf); + kfree(buf); + } + + /* + * TODO: re-add support for sending MIC failure indication + * with all info via nl80211 + */ +} diff --git a/net/mac80211/hostapd_ioctl.h b/net/mac80211/hostapd_ioctl.h deleted file mode 100644 index 52da513f060a..000000000000 --- a/net/mac80211/hostapd_ioctl.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Host AP (software wireless LAN access point) user space daemon for - * Host AP kernel driver - * Copyright 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi> - * Copyright 2002-2004, Instant802 Networks, Inc. - * Copyright 2005, Devicescape Software, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef HOSTAPD_IOCTL_H -#define HOSTAPD_IOCTL_H - -#ifdef __KERNEL__ -#include <linux/types.h> -#endif /* __KERNEL__ */ - -#define PRISM2_IOCTL_PRISM2_PARAM (SIOCIWFIRSTPRIV + 0) -#define PRISM2_IOCTL_GET_PRISM2_PARAM (SIOCIWFIRSTPRIV + 1) -#define PRISM2_IOCTL_HOSTAPD (SIOCIWFIRSTPRIV + 3) - -/* PRISM2_IOCTL_PRISM2_PARAM ioctl() subtypes: - * This table is no longer added to, the whole sub-ioctl - * mess shall be deleted completely. */ -enum { - PRISM2_PARAM_IEEE_802_1X = 23, - - /* Instant802 additions */ - PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES = 1001, - PRISM2_PARAM_PREAMBLE = 1003, - PRISM2_PARAM_SHORT_SLOT_TIME = 1006, - PRISM2_PARAM_NEXT_MODE = 1008, - PRISM2_PARAM_RADIO_ENABLED = 1010, - PRISM2_PARAM_ANTENNA_MODE = 1013, - PRISM2_PARAM_STAT_TIME = 1016, - PRISM2_PARAM_STA_ANTENNA_SEL = 1017, - PRISM2_PARAM_TX_POWER_REDUCTION = 1022, - PRISM2_PARAM_KEY_TX_RX_THRESHOLD = 1024, - PRISM2_PARAM_DEFAULT_WEP_ONLY = 1026, - PRISM2_PARAM_WIFI_WME_NOACK_TEST = 1033, - PRISM2_PARAM_SCAN_FLAGS = 1035, - PRISM2_PARAM_HW_MODES = 1036, - PRISM2_PARAM_CREATE_IBSS = 1037, - PRISM2_PARAM_WMM_ENABLED = 1038, - PRISM2_PARAM_MIXED_CELL = 1039, - PRISM2_PARAM_RADAR_DETECT = 1043, - PRISM2_PARAM_SPECTRUM_MGMT = 1044, -}; - -enum { - IEEE80211_KEY_MGMT_NONE = 0, - IEEE80211_KEY_MGMT_IEEE8021X = 1, - IEEE80211_KEY_MGMT_WPA_PSK = 2, - IEEE80211_KEY_MGMT_WPA_EAP = 3, -}; - - -/* Data structures used for get_hw_features ioctl */ -struct hostapd_ioctl_hw_modes_hdr { - int mode; - int num_channels; - int num_rates; -}; - -struct ieee80211_channel_data { - short chan; /* channel number (IEEE 802.11) */ - short freq; /* frequency in MHz */ - int flag; /* flag for hostapd use (IEEE80211_CHAN_*) */ -}; - -struct ieee80211_rate_data { - int rate; /* rate in 100 kbps */ - int flags; /* IEEE80211_RATE_ flags */ -}; - - -/* ADD_IF, REMOVE_IF, and UPDATE_IF 'type' argument */ -enum { - HOSTAP_IF_WDS = 1, HOSTAP_IF_VLAN = 2, HOSTAP_IF_BSS = 3, - HOSTAP_IF_STA = 4 -}; - -struct hostapd_if_wds { - u8 remote_addr[ETH_ALEN]; -}; - -struct hostapd_if_vlan { - u8 id; -}; - -struct hostapd_if_bss { - u8 bssid[ETH_ALEN]; -}; - -struct hostapd_if_sta { -}; - -#endif /* HOSTAPD_IOCTL_H */ diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index ff2172ffd861..f484ca7ade9c 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -20,42 +20,19 @@ #include <linux/if_arp.h> #include <linux/wireless.h> #include <linux/rtnetlink.h> -#include <net/iw_handler.h> -#include <linux/compiler.h> #include <linux/bitmap.h> +#include <net/net_namespace.h> #include <net/cfg80211.h> -#include <asm/unaligned.h> -#include "ieee80211_common.h" #include "ieee80211_i.h" #include "ieee80211_rate.h" #include "wep.h" -#include "wpa.h" -#include "tkip.h" #include "wme.h" #include "aes_ccm.h" #include "ieee80211_led.h" -#include "ieee80211_cfg.h" +#include "cfg.h" #include "debugfs.h" #include "debugfs_netdev.h" -#include "debugfs_key.h" - -/* privid for wiphys to determine whether they belong to us or not */ -void *mac80211_wiphy_privid = &mac80211_wiphy_privid; - -/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ -/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ -static const unsigned char rfc1042_header[] = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; - -/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ -static const unsigned char bridge_tunnel_header[] = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; - -/* No encapsulation header if EtherType < 0x600 (=length) */ -static const unsigned char eapol_header[] = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e }; - /* * For seeing transmitted packets on monitor interfaces @@ -67,2217 +44,408 @@ struct ieee80211_tx_status_rtap_hdr { u8 data_retries; } __attribute__ ((packed)); +/* common interface routines */ -static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata, - struct ieee80211_hdr *hdr) -{ - /* Set the sequence number for this frame. */ - hdr->seq_ctrl = cpu_to_le16(sdata->sequence); - - /* Increase the sequence number. */ - sdata->sequence = (sdata->sequence + 0x10) & IEEE80211_SCTL_SEQ; -} - -struct ieee80211_key_conf * -ieee80211_key_data2conf(struct ieee80211_local *local, - const struct ieee80211_key *data) +static int header_parse_80211(const struct sk_buff *skb, unsigned char *haddr) { - struct ieee80211_key_conf *conf; - - conf = kmalloc(sizeof(*conf) + data->keylen, GFP_ATOMIC); - if (!conf) - return NULL; - - conf->hw_key_idx = data->hw_key_idx; - conf->alg = data->alg; - conf->keylen = data->keylen; - conf->flags = 0; - if (data->force_sw_encrypt) - conf->flags |= IEEE80211_KEY_FORCE_SW_ENCRYPT; - conf->keyidx = data->keyidx; - if (data->default_tx_key) - conf->flags |= IEEE80211_KEY_DEFAULT_TX_KEY; - if (local->default_wep_only) - conf->flags |= IEEE80211_KEY_DEFAULT_WEP_ONLY; - memcpy(conf->key, data->key, data->keylen); - - return conf; + memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */ + return ETH_ALEN; } -struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata, - int idx, size_t key_len, gfp_t flags) +/* must be called under mdev tx lock */ +static void ieee80211_configure_filter(struct ieee80211_local *local) { - struct ieee80211_key *key; + unsigned int changed_flags; + unsigned int new_flags = 0; - key = kzalloc(sizeof(struct ieee80211_key) + key_len, flags); - if (!key) - return NULL; - kref_init(&key->kref); - return key; -} + if (atomic_read(&local->iff_promiscs)) + new_flags |= FIF_PROMISC_IN_BSS; -static void ieee80211_key_release(struct kref *kref) -{ - struct ieee80211_key *key; + if (atomic_read(&local->iff_allmultis)) + new_flags |= FIF_ALLMULTI; - key = container_of(kref, struct ieee80211_key, kref); - if (key->alg == ALG_CCMP) - ieee80211_aes_key_free(key->u.ccmp.tfm); - ieee80211_debugfs_key_remove(key); - kfree(key); -} + if (local->monitors) + new_flags |= FIF_CONTROL | + FIF_OTHER_BSS | + FIF_BCN_PRBRESP_PROMISC; -void ieee80211_key_free(struct ieee80211_key *key) -{ - if (key) - kref_put(&key->kref, ieee80211_key_release); -} + changed_flags = local->filter_flags ^ new_flags; -static int rate_list_match(const int *rate_list, int rate) -{ - int i; + /* be a bit nasty */ + new_flags |= (1<<31); - if (!rate_list) - return 0; + local->ops->configure_filter(local_to_hw(local), + changed_flags, &new_flags, + local->mdev->mc_count, + local->mdev->mc_list); - for (i = 0; rate_list[i] >= 0; i++) - if (rate_list[i] == rate) - return 1; + WARN_ON(new_flags & (1<<31)); - return 0; + local->filter_flags = new_flags & ~(1<<31); } +/* master interface */ -void ieee80211_prepare_rates(struct ieee80211_local *local, - struct ieee80211_hw_mode *mode) +static int ieee80211_master_open(struct net_device *dev) { - int i; - - for (i = 0; i < mode->num_rates; i++) { - struct ieee80211_rate *rate = &mode->rates[i]; - - rate->flags &= ~(IEEE80211_RATE_SUPPORTED | - IEEE80211_RATE_BASIC); - - if (local->supp_rates[mode->mode]) { - if (!rate_list_match(local->supp_rates[mode->mode], - rate->rate)) - continue; - } - - rate->flags |= IEEE80211_RATE_SUPPORTED; - - /* Use configured basic rate set if it is available. If not, - * use defaults that are sane for most cases. */ - if (local->basic_rates[mode->mode]) { - if (rate_list_match(local->basic_rates[mode->mode], - rate->rate)) - rate->flags |= IEEE80211_RATE_BASIC; - } else switch (mode->mode) { - case MODE_IEEE80211A: - if (rate->rate == 60 || rate->rate == 120 || - rate->rate == 240) - rate->flags |= IEEE80211_RATE_BASIC; - break; - case MODE_IEEE80211B: - if (rate->rate == 10 || rate->rate == 20) - rate->flags |= IEEE80211_RATE_BASIC; - break; - case MODE_ATHEROS_TURBO: - if (rate->rate == 120 || rate->rate == 240 || - rate->rate == 480) - rate->flags |= IEEE80211_RATE_BASIC; - break; - case MODE_IEEE80211G: - if (rate->rate == 10 || rate->rate == 20 || - rate->rate == 55 || rate->rate == 110) - rate->flags |= IEEE80211_RATE_BASIC; - break; - } + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata; + int res = -EOPNOTSUPP; - /* Set ERP and MANDATORY flags based on phymode */ - switch (mode->mode) { - case MODE_IEEE80211A: - if (rate->rate == 60 || rate->rate == 120 || - rate->rate == 240) - rate->flags |= IEEE80211_RATE_MANDATORY; - break; - case MODE_IEEE80211B: - if (rate->rate == 10) - rate->flags |= IEEE80211_RATE_MANDATORY; - break; - case MODE_ATHEROS_TURBO: - break; - case MODE_IEEE80211G: - if (rate->rate == 10 || rate->rate == 20 || - rate->rate == 55 || rate->rate == 110 || - rate->rate == 60 || rate->rate == 120 || - rate->rate == 240) - rate->flags |= IEEE80211_RATE_MANDATORY; + /* we hold the RTNL here so can safely walk the list */ + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->dev != dev && netif_running(sdata->dev)) { + res = 0; break; } - if (ieee80211_is_erp_rate(mode->mode, rate->rate)) - rate->flags |= IEEE80211_RATE_ERP; } + return res; } - -static void ieee80211_key_threshold_notify(struct net_device *dev, - struct ieee80211_key *key, - struct sta_info *sta) +static int ieee80211_master_stop(struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - struct ieee80211_msg_key_notification *msg; - - /* if no one will get it anyway, don't even allocate it. - * unlikely because this is only relevant for APs - * where the device must be open... */ - if (unlikely(!local->apdev)) - return; - - skb = dev_alloc_skb(sizeof(struct ieee80211_frame_info) + - sizeof(struct ieee80211_msg_key_notification)); - if (!skb) - return; - - skb_reserve(skb, sizeof(struct ieee80211_frame_info)); - msg = (struct ieee80211_msg_key_notification *) - skb_put(skb, sizeof(struct ieee80211_msg_key_notification)); - msg->tx_rx_count = key->tx_rx_count; - memcpy(msg->ifname, dev->name, IFNAMSIZ); - if (sta) - memcpy(msg->addr, sta->addr, ETH_ALEN); - else - memset(msg->addr, 0xff, ETH_ALEN); - - key->tx_rx_count = 0; - - ieee80211_rx_mgmt(local, skb, NULL, - ieee80211_msg_key_threshold_notification); -} - - -static u8 * ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len) -{ - u16 fc; - - if (len < 24) - return NULL; - - fc = le16_to_cpu(hdr->frame_control); - - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_DATA: - switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { - case IEEE80211_FCTL_TODS: - return hdr->addr1; - case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): - return NULL; - case IEEE80211_FCTL_FROMDS: - return hdr->addr2; - case 0: - return hdr->addr3; - } - break; - case IEEE80211_FTYPE_MGMT: - return hdr->addr3; - case IEEE80211_FTYPE_CTL: - if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL) - return hdr->addr1; - else - return NULL; - } - - return NULL; -} - -int ieee80211_get_hdrlen(u16 fc) -{ - int hdrlen = 24; - - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_DATA: - if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS)) - hdrlen = 30; /* Addr4 */ - /* - * The QoS Control field is two bytes and its presence is - * indicated by the IEEE80211_STYPE_QOS_DATA bit. Add 2 to - * hdrlen if that bit is set. - * This works by masking out the bit and shifting it to - * bit position 1 so the result has the value 0 or 2. - */ - hdrlen += (fc & IEEE80211_STYPE_QOS_DATA) - >> (ilog2(IEEE80211_STYPE_QOS_DATA)-1); - break; - case IEEE80211_FTYPE_CTL: - /* - * ACK and CTS are 10 bytes, all others 16. To see how - * to get this condition consider - * subtype mask: 0b0000000011110000 (0x00F0) - * ACK subtype: 0b0000000011010000 (0x00D0) - * CTS subtype: 0b0000000011000000 (0x00C0) - * bits that matter: ^^^ (0x00E0) - * value of those: 0b0000000011000000 (0x00C0) - */ - if ((fc & 0xE0) == 0xC0) - hdrlen = 10; - else - hdrlen = 16; - break; - } - - return hdrlen; -} -EXPORT_SYMBOL(ieee80211_get_hdrlen); - -int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) -{ - const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *) skb->data; - int hdrlen; - - if (unlikely(skb->len < 10)) - return 0; - hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)); - if (unlikely(hdrlen > skb->len)) - return 0; - return hdrlen; -} -EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); - -static int ieee80211_get_radiotap_len(struct sk_buff *skb) -{ - struct ieee80211_radiotap_header *hdr = - (struct ieee80211_radiotap_header *) skb->data; - - return le16_to_cpu(hdr->it_len); -} - -#ifdef CONFIG_MAC80211_LOWTX_FRAME_DUMP -static void ieee80211_dump_frame(const char *ifname, const char *title, - const struct sk_buff *skb) -{ - const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; - - printk(KERN_DEBUG "%s: %s (len=%d)", ifname, title, skb->len); - if (skb->len < 4) { - printk("\n"); - return; - } - - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); - if (hdrlen > skb->len) - hdrlen = skb->len; - if (hdrlen >= 4) - printk(" FC=0x%04x DUR=0x%04x", - fc, le16_to_cpu(hdr->duration_id)); - if (hdrlen >= 10) - printk(" A1=" MAC_FMT, MAC_ARG(hdr->addr1)); - if (hdrlen >= 16) - printk(" A2=" MAC_FMT, MAC_ARG(hdr->addr2)); - if (hdrlen >= 24) - printk(" A3=" MAC_FMT, MAC_ARG(hdr->addr3)); - if (hdrlen >= 30) - printk(" A4=" MAC_FMT, MAC_ARG(hdr->addr4)); - printk("\n"); -} -#else /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ -static inline void ieee80211_dump_frame(const char *ifname, const char *title, - struct sk_buff *skb) -{ -} -#endif /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ - - -static int ieee80211_is_eapol(const struct sk_buff *skb) -{ - const struct ieee80211_hdr *hdr; - u16 fc; - int hdrlen; - - if (unlikely(skb->len < 10)) - return 0; - - hdr = (const struct ieee80211_hdr *) skb->data; - fc = le16_to_cpu(hdr->frame_control); - - if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) - return 0; - - hdrlen = ieee80211_get_hdrlen(fc); + struct ieee80211_sub_if_data *sdata; - if (unlikely(skb->len >= hdrlen + sizeof(eapol_header) && - memcmp(skb->data + hdrlen, eapol_header, - sizeof(eapol_header)) == 0)) - return 1; + /* we hold the RTNL here so can safely walk the list */ + list_for_each_entry(sdata, &local->interfaces, list) + if (sdata->dev != dev && netif_running(sdata->dev)) + dev_close(sdata->dev); return 0; } - -static ieee80211_txrx_result -ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx) -{ - struct rate_control_extra extra; - - memset(&extra, 0, sizeof(extra)); - extra.mode = tx->u.tx.mode; - extra.mgmt_data = tx->sdata && - tx->sdata->type == IEEE80211_IF_TYPE_MGMT; - extra.ethertype = tx->ethertype; - - tx->u.tx.rate = rate_control_get_rate(tx->local, tx->dev, tx->skb, - &extra); - if (unlikely(extra.probe != NULL)) { - tx->u.tx.control->flags |= IEEE80211_TXCTL_RATE_CTRL_PROBE; - tx->u.tx.probe_last_frag = 1; - tx->u.tx.control->alt_retry_rate = tx->u.tx.rate->val; - tx->u.tx.rate = extra.probe; - } else { - tx->u.tx.control->alt_retry_rate = -1; - } - if (!tx->u.tx.rate) - return TXRX_DROP; - if (tx->u.tx.mode->mode == MODE_IEEE80211G && - tx->sdata->use_protection && tx->fragmented && - extra.nonerp) { - tx->u.tx.last_frag_rate = tx->u.tx.rate; - tx->u.tx.probe_last_frag = extra.probe ? 1 : 0; - - tx->u.tx.rate = extra.nonerp; - tx->u.tx.control->rate = extra.nonerp; - tx->u.tx.control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE; - } else { - tx->u.tx.last_frag_rate = tx->u.tx.rate; - tx->u.tx.control->rate = tx->u.tx.rate; - } - tx->u.tx.control->tx_rate = tx->u.tx.rate->val; - if ((tx->u.tx.rate->flags & IEEE80211_RATE_PREAMBLE2) && - tx->local->short_preamble && - (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) { - tx->u.tx.short_preamble = 1; - tx->u.tx.control->tx_rate = tx->u.tx.rate->val2; - } - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx) +static void ieee80211_master_set_multicast_list(struct net_device *dev) { - if (tx->sta) - tx->u.tx.control->key_idx = tx->sta->key_idx_compression; - else - tx->u.tx.control->key_idx = HW_KEY_IDX_INVALID; - - if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)) - tx->key = NULL; - else if (tx->sta && tx->sta->key) - tx->key = tx->sta->key; - else if (tx->sdata->default_key) - tx->key = tx->sdata->default_key; - else if (tx->sdata->drop_unencrypted && - !(tx->sdata->eapol && ieee80211_is_eapol(tx->skb))) { - I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); - return TXRX_DROP; - } else - tx->key = NULL; - - if (tx->key) { - tx->key->tx_rx_count++; - if (unlikely(tx->local->key_tx_rx_threshold && - tx->key->tx_rx_count > - tx->local->key_tx_rx_threshold)) { - ieee80211_key_threshold_notify(tx->dev, tx->key, - tx->sta); - } - } + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - return TXRX_CONTINUE; + ieee80211_configure_filter(local); } +/* regular interfaces */ -static ieee80211_txrx_result -ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx) +static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - size_t hdrlen, per_fragm, num_fragm, payload_len, left; - struct sk_buff **frags, *first, *frag; - int i; - u16 seq; - u8 *pos; - int frag_threshold = tx->local->fragmentation_threshold; - - if (!tx->fragmented) - return TXRX_CONTINUE; - - first = tx->skb; - - hdrlen = ieee80211_get_hdrlen(tx->fc); - payload_len = first->len - hdrlen; - per_fragm = frag_threshold - hdrlen - FCS_LEN; - num_fragm = (payload_len + per_fragm - 1) / per_fragm; - - frags = kzalloc(num_fragm * sizeof(struct sk_buff *), GFP_ATOMIC); - if (!frags) - goto fail; - - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREFRAGS); - seq = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ; - pos = first->data + hdrlen + per_fragm; - left = payload_len - per_fragm; - for (i = 0; i < num_fragm - 1; i++) { - struct ieee80211_hdr *fhdr; - size_t copylen; - - if (left <= 0) - goto fail; - - /* reserve enough extra head and tail room for possible - * encryption */ - frag = frags[i] = - dev_alloc_skb(tx->local->tx_headroom + - frag_threshold + - IEEE80211_ENCRYPT_HEADROOM + - IEEE80211_ENCRYPT_TAILROOM); - if (!frag) - goto fail; - /* Make sure that all fragments use the same priority so - * that they end up using the same TX queue */ - frag->priority = first->priority; - skb_reserve(frag, tx->local->tx_headroom + - IEEE80211_ENCRYPT_HEADROOM); - fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); - memcpy(fhdr, first->data, hdrlen); - if (i == num_fragm - 2) - fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS); - fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG)); - copylen = left > per_fragm ? per_fragm : left; - memcpy(skb_put(frag, copylen), pos, copylen); - - pos += copylen; - left -= copylen; - } - skb_trim(first, hdrlen + per_fragm); - - tx->u.tx.num_extra_frag = num_fragm - 1; - tx->u.tx.extra_frag = frags; - - return TXRX_CONTINUE; - - fail: - printk(KERN_DEBUG "%s: failed to fragment frame\n", tx->dev->name); - if (frags) { - for (i = 0; i < num_fragm - 1; i++) - if (frags[i]) - dev_kfree_skb(frags[i]); - kfree(frags); + /* FIX: what would be proper limits for MTU? + * This interface uses 802.3 frames. */ + if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6) { + printk(KERN_WARNING "%s: invalid MTU %d\n", + dev->name, new_mtu); + return -EINVAL; } - I802_DEBUG_INC(tx->local->tx_handlers_drop_fragment); - return TXRX_DROP; -} - -static int wep_encrypt_skb(struct ieee80211_txrx_data *tx, struct sk_buff *skb) -{ - if (tx->key->force_sw_encrypt) { - if (ieee80211_wep_encrypt(tx->local, skb, tx->key)) - return -1; - } else { - tx->u.tx.control->key_idx = tx->key->hw_key_idx; - if (tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) { - if (ieee80211_wep_add_iv(tx->local, skb, tx->key) == - NULL) - return -1; - } - } +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + dev->mtu = new_mtu; return 0; } - -void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if (tx->u.tx.extra_frag) { - struct ieee80211_hdr *fhdr; - int i; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - fhdr = (struct ieee80211_hdr *) - tx->u.tx.extra_frag[i]->data; - fhdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - } - } -} - - -static ieee80211_txrx_result -ieee80211_tx_h_wep_encrypt(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - u16 fc; - - fc = le16_to_cpu(hdr->frame_control); - - if (!tx->key || tx->key->alg != ALG_WEP || - ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))) - return TXRX_CONTINUE; - - tx->u.tx.control->iv_len = WEP_IV_LEN; - tx->u.tx.control->icv_len = WEP_ICV_LEN; - ieee80211_tx_set_iswep(tx); - - if (wep_encrypt_skb(tx, tx->skb) < 0) { - I802_DEBUG_INC(tx->local->tx_handlers_drop_wep); - return TXRX_DROP; - } - - if (tx->u.tx.extra_frag) { - int i; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - if (wep_encrypt_skb(tx, tx->u.tx.extra_frag[i]) < 0) { - I802_DEBUG_INC(tx->local-> - tx_handlers_drop_wep); - return TXRX_DROP; - } - } - } - - return TXRX_CONTINUE; -} - - -static int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, - int rate, int erp, int short_preamble) -{ - int dur; - - /* calculate duration (in microseconds, rounded up to next higher - * integer if it includes a fractional microsecond) to send frame of - * len bytes (does not include FCS) at the given rate. Duration will - * also include SIFS. - * - * rate is in 100 kbps, so divident is multiplied by 10 in the - * DIV_ROUND_UP() operations. - */ - - if (local->hw.conf.phymode == MODE_IEEE80211A || erp || - local->hw.conf.phymode == MODE_ATHEROS_TURBO) { - /* - * OFDM: - * - * N_DBPS = DATARATE x 4 - * N_SYM = Ceiling((16+8xLENGTH+6) / N_DBPS) - * (16 = SIGNAL time, 6 = tail bits) - * TXTIME = T_PREAMBLE + T_SIGNAL + T_SYM x N_SYM + Signal Ext - * - * T_SYM = 4 usec - * 802.11a - 17.5.2: aSIFSTime = 16 usec - * 802.11g - 19.8.4: aSIFSTime = 10 usec + - * signal ext = 6 usec - */ - /* FIX: Atheros Turbo may have different (shorter) duration? */ - dur = 16; /* SIFS + signal ext */ - dur += 16; /* 17.3.2.3: T_PREAMBLE = 16 usec */ - dur += 4; /* 17.3.2.3: T_SIGNAL = 4 usec */ - dur += 4 * DIV_ROUND_UP((16 + 8 * (len + 4) + 6) * 10, - 4 * rate); /* T_SYM x N_SYM */ - } else { - /* - * 802.11b or 802.11g with 802.11b compatibility: - * 18.3.4: TXTIME = PreambleLength + PLCPHeaderTime + - * Ceiling(((LENGTH+PBCC)x8)/DATARATE). PBCC=0. - * - * 802.11 (DS): 15.3.3, 802.11b: 18.3.4 - * aSIFSTime = 10 usec - * aPreambleLength = 144 usec or 72 usec with short preamble - * aPLCPHeaderLength = 48 usec or 24 usec with short preamble - */ - dur = 10; /* aSIFSTime = 10 usec */ - dur += short_preamble ? (72 + 24) : (144 + 48); - - dur += DIV_ROUND_UP(8 * (len + 4) * 10, rate); - } - - return dur; -} - - -/* Exported duration function for driver use */ -__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, - size_t frame_len, int rate) -{ - struct ieee80211_local *local = hw_to_local(hw); - u16 dur; - int erp; - - erp = ieee80211_is_erp_rate(hw->conf.phymode, rate); - dur = ieee80211_frame_duration(local, frame_len, rate, - erp, local->short_preamble); - - return cpu_to_le16(dur); -} -EXPORT_SYMBOL(ieee80211_generic_frame_duration); - - -static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr, - int next_frag_len) -{ - int rate, mrate, erp, dur, i; - struct ieee80211_rate *txrate = tx->u.tx.rate; - struct ieee80211_local *local = tx->local; - struct ieee80211_hw_mode *mode = tx->u.tx.mode; - - erp = txrate->flags & IEEE80211_RATE_ERP; - - /* - * data and mgmt (except PS Poll): - * - during CFP: 32768 - * - during contention period: - * if addr1 is group address: 0 - * if more fragments = 0 and addr1 is individual address: time to - * transmit one ACK plus SIFS - * if more fragments = 1 and addr1 is individual address: time to - * transmit next fragment plus 2 x ACK plus 3 x SIFS - * - * IEEE 802.11, 9.6: - * - control response frame (CTS or ACK) shall be transmitted using the - * same rate as the immediately previous frame in the frame exchange - * sequence, if this rate belongs to the PHY mandatory rates, or else - * at the highest possible rate belonging to the PHY rates in the - * BSSBasicRateSet - */ - - if ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) { - /* TODO: These control frames are not currently sent by - * 80211.o, but should they be implemented, this function - * needs to be updated to support duration field calculation. - * - * RTS: time needed to transmit pending data/mgmt frame plus - * one CTS frame plus one ACK frame plus 3 x SIFS - * CTS: duration of immediately previous RTS minus time - * required to transmit CTS and its SIFS - * ACK: 0 if immediately previous directed data/mgmt had - * more=0, with more=1 duration in ACK frame is duration - * from previous frame minus time needed to transmit ACK - * and its SIFS - * PS Poll: BIT(15) | BIT(14) | aid - */ - return 0; - } - - /* data/mgmt */ - if (0 /* FIX: data/mgmt during CFP */) - return 32768; - - if (group_addr) /* Group address as the destination - no ACK */ - return 0; - - /* Individual destination address: - * IEEE 802.11, Ch. 9.6 (after IEEE 802.11g changes) - * CTS and ACK frames shall be transmitted using the highest rate in - * basic rate set that is less than or equal to the rate of the - * immediately previous frame and that is using the same modulation - * (CCK or OFDM). If no basic rate set matches with these requirements, - * the highest mandatory rate of the PHY that is less than or equal to - * the rate of the previous frame is used. - * Mandatory rates for IEEE 802.11g PHY: 1, 2, 5.5, 11, 6, 12, 24 Mbps - */ - rate = -1; - mrate = 10; /* use 1 Mbps if everything fails */ - for (i = 0; i < mode->num_rates; i++) { - struct ieee80211_rate *r = &mode->rates[i]; - if (r->rate > txrate->rate) - break; - - if (IEEE80211_RATE_MODULATION(txrate->flags) != - IEEE80211_RATE_MODULATION(r->flags)) - continue; - - if (r->flags & IEEE80211_RATE_BASIC) - rate = r->rate; - else if (r->flags & IEEE80211_RATE_MANDATORY) - mrate = r->rate; - } - if (rate == -1) { - /* No matching basic rate found; use highest suitable mandatory - * PHY rate */ - rate = mrate; - } - - /* Time needed to transmit ACK - * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up - * to closest integer */ - - dur = ieee80211_frame_duration(local, 10, rate, erp, - local->short_preamble); - - if (next_frag_len) { - /* Frame is fragmented: duration increases with time needed to - * transmit next fragment plus ACK and 2 x SIFS. */ - dur *= 2; /* ACK + SIFS */ - /* next fragment */ - dur += ieee80211_frame_duration(local, next_frag_len, - txrate->rate, erp, - local->short_preamble); - } - - return dur; -} - - -static ieee80211_txrx_result -ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx) +static inline int identical_mac_addr_allowed(int type1, int type2) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - u16 dur; - struct ieee80211_tx_control *control = tx->u.tx.control; - struct ieee80211_hw_mode *mode = tx->u.tx.mode; - - if (!is_multicast_ether_addr(hdr->addr1)) { - if (tx->skb->len + FCS_LEN > tx->local->rts_threshold && - tx->local->rts_threshold < IEEE80211_MAX_RTS_THRESHOLD) { - control->flags |= IEEE80211_TXCTL_USE_RTS_CTS; - control->retry_limit = - tx->local->long_retry_limit; - } else { - control->retry_limit = - tx->local->short_retry_limit; - } - } else { - control->retry_limit = 1; - } - - if (tx->fragmented) { - /* Do not use multiple retry rates when sending fragmented - * frames. - * TODO: The last fragment could still use multiple retry - * rates. */ - control->alt_retry_rate = -1; - } - - /* Use CTS protection for unicast frames sent using extended rates if - * there are associated non-ERP stations and RTS/CTS is not configured - * for the frame. */ - if (mode->mode == MODE_IEEE80211G && - (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) && - tx->u.tx.unicast && tx->sdata->use_protection && - !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) - control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; - - /* Setup duration field for the first fragment of the frame. Duration - * for remaining fragments will be updated when they are being sent - * to low-level driver in ieee80211_tx(). */ - dur = ieee80211_duration(tx, is_multicast_ether_addr(hdr->addr1), - tx->fragmented ? tx->u.tx.extra_frag[0]->len : - 0); - hdr->duration_id = cpu_to_le16(dur); - - if ((control->flags & IEEE80211_TXCTL_USE_RTS_CTS) || - (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) { - struct ieee80211_rate *rate; - - /* Do not use multiple retry rates when using RTS/CTS */ - control->alt_retry_rate = -1; - - /* Use min(data rate, max base rate) as CTS/RTS rate */ - rate = tx->u.tx.rate; - while (rate > mode->rates && - !(rate->flags & IEEE80211_RATE_BASIC)) - rate--; - - control->rts_cts_rate = rate->val; - control->rts_rate = rate; - } - - if (tx->sta) { - tx->sta->tx_packets++; - tx->sta->tx_fragments++; - tx->sta->tx_bytes += tx->skb->len; - if (tx->u.tx.extra_frag) { - int i; - tx->sta->tx_fragments += tx->u.tx.num_extra_frag; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - tx->sta->tx_bytes += - tx->u.tx.extra_frag[i]->len; - } - } - } - - return TXRX_CONTINUE; + return (type1 == IEEE80211_IF_TYPE_MNTR || + type2 == IEEE80211_IF_TYPE_MNTR || + (type1 == IEEE80211_IF_TYPE_AP && + type2 == IEEE80211_IF_TYPE_WDS) || + (type1 == IEEE80211_IF_TYPE_WDS && + (type2 == IEEE80211_IF_TYPE_WDS || + type2 == IEEE80211_IF_TYPE_AP)) || + (type1 == IEEE80211_IF_TYPE_AP && + type2 == IEEE80211_IF_TYPE_VLAN) || + (type1 == IEEE80211_IF_TYPE_VLAN && + (type2 == IEEE80211_IF_TYPE_AP || + type2 == IEEE80211_IF_TYPE_VLAN))); } - -static ieee80211_txrx_result -ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx) +static int ieee80211_open(struct net_device *dev) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - struct sk_buff *skb = tx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - u32 sta_flags; - - if (unlikely(tx->local->sta_scanning != 0) && - ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ)) - return TXRX_DROP; + struct ieee80211_sub_if_data *sdata, *nsdata; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_if_init_conf conf; + int res; - if (tx->u.tx.ps_buffered) - return TXRX_CONTINUE; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sta_flags = tx->sta ? tx->sta->flags : 0; + /* we hold the RTNL here so can safely walk the list */ + list_for_each_entry(nsdata, &local->interfaces, list) { + struct net_device *ndev = nsdata->dev; - if (likely(tx->u.tx.unicast)) { - if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && - tx->sdata->type != IEEE80211_IF_TYPE_IBSS && - (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: dropped data frame to not " - "associated station " MAC_FMT "\n", - tx->dev->name, MAC_ARG(hdr->addr1)); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); - return TXRX_DROP; - } - } else { - if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - tx->local->num_sta == 0 && - !tx->local->allow_broadcast_always && - tx->sdata->type != IEEE80211_IF_TYPE_IBSS)) { + if (ndev != dev && ndev != local->mdev && netif_running(ndev) && + compare_ether_addr(dev->dev_addr, ndev->dev_addr) == 0) { /* - * No associated STAs - no need to send multicast - * frames. + * check whether it may have the same address */ - return TXRX_DROP; - } - return TXRX_CONTINUE; - } - - if (unlikely(!tx->u.tx.mgmt_interface && tx->sdata->ieee802_1x && - !(sta_flags & WLAN_STA_AUTHORIZED))) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: dropped frame to " MAC_FMT - " (unauthorized port)\n", tx->dev->name, - MAC_ARG(hdr->addr1)); -#endif - I802_DEBUG_INC(tx->local->tx_handlers_drop_unauth_port); - return TXRX_DROP; - } - - return TXRX_CONTINUE; -} - -static ieee80211_txrx_result -ieee80211_tx_h_sequence(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + if (!identical_mac_addr_allowed(sdata->type, + nsdata->type)) + return -ENOTUNIQ; - if (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)) >= 24) - ieee80211_include_sequence(tx->sdata, hdr); - - return TXRX_CONTINUE; -} - -/* This function is called whenever the AP is about to exceed the maximum limit - * of buffered frames for power saving STAs. This situation should not really - * happen often during normal operation, so dropping the oldest buffered packet - * from each queue should be OK to make some room for new frames. */ -static void purge_old_ps_buffers(struct ieee80211_local *local) -{ - int total = 0, purged = 0; - struct sk_buff *skb; - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - struct ieee80211_if_ap *ap; - if (sdata->dev == local->mdev || - sdata->type != IEEE80211_IF_TYPE_AP) - continue; - ap = &sdata->u.ap; - skb = skb_dequeue(&ap->ps_bc_buf); - if (skb) { - purged++; - dev_kfree_skb(skb); + /* + * can only add VLANs to enabled APs + */ + if (sdata->type == IEEE80211_IF_TYPE_VLAN && + nsdata->type == IEEE80211_IF_TYPE_AP && + netif_running(nsdata->dev)) + sdata->u.vlan.ap = nsdata; } - total += skb_queue_len(&ap->ps_bc_buf); } - read_unlock(&local->sub_if_lock); - spin_lock_bh(&local->sta_lock); - list_for_each_entry(sta, &local->sta_list, list) { - skb = skb_dequeue(&sta->ps_tx_buf); - if (skb) { - purged++; - dev_kfree_skb(skb); - } - total += skb_queue_len(&sta->ps_tx_buf); + switch (sdata->type) { + case IEEE80211_IF_TYPE_WDS: + if (is_zero_ether_addr(sdata->u.wds.remote_addr)) + return -ENOLINK; + break; + case IEEE80211_IF_TYPE_VLAN: + if (!sdata->u.vlan.ap) + return -ENOLINK; + break; + case IEEE80211_IF_TYPE_AP: + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_MNTR: + case IEEE80211_IF_TYPE_IBSS: + /* no special treatment */ + break; + case IEEE80211_IF_TYPE_INVALID: + /* cannot happen */ + WARN_ON(1); + break; } - spin_unlock_bh(&local->sta_lock); - - local->total_ps_buffered = total; - printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", - local->mdev->name, purged); -} - -static inline ieee80211_txrx_result -ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx) -{ - /* broadcast/multicast frame */ - /* If any of the associated stations is in power save mode, - * the frame is buffered to be sent after DTIM beacon frame */ - if ((tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) && - tx->sdata->type != IEEE80211_IF_TYPE_WDS && - tx->sdata->bss && atomic_read(&tx->sdata->bss->num_sta_ps) && - !(tx->fc & IEEE80211_FCTL_ORDER)) { - if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) - purge_old_ps_buffers(tx->local); - if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= - AP_MAX_BC_BUFFER) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: BC TX buffer full - " - "dropping the oldest frame\n", - tx->dev->name); - } - dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf)); - } else - tx->local->total_ps_buffered++; - skb_queue_tail(&tx->sdata->bss->ps_bc_buf, tx->skb); - return TXRX_QUEUED; + if (local->open_count == 0) { + res = 0; + if (local->ops->start) + res = local->ops->start(local_to_hw(local)); + if (res) + return res; } - return TXRX_CONTINUE; -} - + switch (sdata->type) { + case IEEE80211_IF_TYPE_VLAN: + list_add(&sdata->u.vlan.list, &sdata->u.vlan.ap->u.ap.vlans); + /* no need to tell driver */ + break; + case IEEE80211_IF_TYPE_MNTR: + /* must be before the call to ieee80211_configure_filter */ + local->monitors++; + if (local->monitors == 1) { + netif_tx_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_tx_unlock_bh(local->mdev); -static inline ieee80211_txrx_result -ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx) -{ - struct sta_info *sta = tx->sta; - - if (unlikely(!sta || - ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT && - (tx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP))) - return TXRX_CONTINUE; - - if (unlikely((sta->flags & WLAN_STA_PS) && !sta->pspoll)) { - struct ieee80211_tx_packet_data *pkt_data; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA " MAC_FMT " aid %d: PS buffer (entries " - "before %d)\n", - MAC_ARG(sta->addr), sta->aid, - skb_queue_len(&sta->ps_tx_buf)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - sta->flags |= WLAN_STA_TIM; - if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) - purge_old_ps_buffers(tx->local); - if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) { - struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf); - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: STA " MAC_FMT " TX " - "buffer full - dropping oldest frame\n", - tx->dev->name, MAC_ARG(sta->addr)); - } - dev_kfree_skb(old); - } else - tx->local->total_ps_buffered++; - /* Queue frame to be sent after STA sends an PS Poll frame */ - if (skb_queue_empty(&sta->ps_tx_buf)) { - if (tx->local->ops->set_tim) - tx->local->ops->set_tim(local_to_hw(tx->local), - sta->aid, 1); - if (tx->sdata->bss) - bss_tim_set(tx->local, tx->sdata->bss, sta->aid); + local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; + ieee80211_hw_config(local); } - pkt_data = (struct ieee80211_tx_packet_data *)tx->skb->cb; - pkt_data->jiffies = jiffies; - skb_queue_tail(&sta->ps_tx_buf, tx->skb); - return TXRX_QUEUED; - } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - else if (unlikely(sta->flags & WLAN_STA_PS)) { - printk(KERN_DEBUG "%s: STA " MAC_FMT " in PS mode, but pspoll " - "set -> send frame\n", tx->dev->name, - MAC_ARG(sta->addr)); - } -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - sta->pspoll = 0; - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx) -{ - if (unlikely(tx->u.tx.ps_buffered)) - return TXRX_CONTINUE; - - if (tx->u.tx.unicast) - return ieee80211_tx_h_unicast_ps_buf(tx); - else - return ieee80211_tx_h_multicast_ps_buf(tx); -} - - -/* - * deal with packet injection down monitor interface - * with Radiotap Header -- only called for monitor mode interface - */ - -static ieee80211_txrx_result -__ieee80211_parse_tx_radiotap( - struct ieee80211_txrx_data *tx, - struct sk_buff *skb, struct ieee80211_tx_control *control) -{ - /* - * this is the moment to interpret and discard the radiotap header that - * must be at the start of the packet injected in Monitor mode - * - * Need to take some care with endian-ness since radiotap - * args are little-endian - */ - - struct ieee80211_radiotap_iterator iterator; - struct ieee80211_radiotap_header *rthdr = - (struct ieee80211_radiotap_header *) skb->data; - struct ieee80211_hw_mode *mode = tx->local->hw.conf.mode; - int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); - - /* - * default control situation for all injected packets - * FIXME: this does not suit all usage cases, expand to allow control - */ - - control->retry_limit = 1; /* no retry */ - control->key_idx = -1; /* no encryption key */ - control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | - IEEE80211_TXCTL_USE_CTS_PROTECT); - control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT | - IEEE80211_TXCTL_NO_ACK; - control->antenna_sel_tx = 0; /* default to default antenna */ - - /* - * for every radiotap entry that is present - * (ieee80211_radiotap_iterator_next returns -ENOENT when no more - * entries present, or -EINVAL on error) - */ - - while (!ret) { - int i, target_rate; - - ret = ieee80211_radiotap_iterator_next(&iterator); - - if (ret) - continue; - - /* see if this argument is something we can use */ - switch (iterator.this_arg_index) { - /* - * You must take care when dereferencing iterator.this_arg - * for multibyte types... the pointer is not aligned. Use - * get_unaligned((type *)iterator.this_arg) to dereference - * iterator.this_arg for type "type" safely on all arches. - */ - case IEEE80211_RADIOTAP_RATE: - /* - * radiotap rate u8 is in 500kbps units eg, 0x02=1Mbps - * ieee80211 rate int is in 100kbps units eg, 0x0a=1Mbps - */ - target_rate = (*iterator.this_arg) * 5; - for (i = 0; i < mode->num_rates; i++) { - struct ieee80211_rate *r = &mode->rates[i]; - - if (r->rate > target_rate) - continue; - - control->rate = r; - - if (r->flags & IEEE80211_RATE_PREAMBLE2) - control->tx_rate = r->val2; - else - control->tx_rate = r->val; - - /* end on exact match */ - if (r->rate == target_rate) - i = mode->num_rates; - } - break; - - case IEEE80211_RADIOTAP_ANTENNA: - /* - * radiotap uses 0 for 1st ant, mac80211 is 1 for - * 1st ant - */ - control->antenna_sel_tx = (*iterator.this_arg) + 1; - break; - - case IEEE80211_RADIOTAP_DBM_TX_POWER: - control->power_level = *iterator.this_arg; - break; + break; + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_IBSS: + sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; + /* fall through */ + default: + conf.if_id = dev->ifindex; + conf.type = sdata->type; + conf.mac_addr = dev->dev_addr; + res = local->ops->add_interface(local_to_hw(local), &conf); + if (res && !local->open_count && local->ops->stop) + local->ops->stop(local_to_hw(local)); + if (res) + return res; - case IEEE80211_RADIOTAP_FLAGS: - if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { - /* - * this indicates that the skb we have been - * handed has the 32-bit FCS CRC at the end... - * we should react to that by snipping it off - * because it will be recomputed and added - * on transmission - */ - if (skb->len < (iterator.max_length + FCS_LEN)) - return TXRX_DROP; + ieee80211_if_config(dev); + ieee80211_reset_erp_info(dev); + ieee80211_enable_keys(sdata); - skb_trim(skb, skb->len - FCS_LEN); - } - break; + if (sdata->type == IEEE80211_IF_TYPE_STA && + !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) + netif_carrier_off(dev); + else + netif_carrier_on(dev); + } - default: - break; - } + if (local->open_count == 0) { + res = dev_open(local->mdev); + WARN_ON(res); + tasklet_enable(&local->tx_pending_tasklet); + tasklet_enable(&local->tasklet); } - if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ - return TXRX_DROP; + local->open_count++; - /* - * remove the radiotap header - * iterator->max_length was sanity-checked against - * skb->len by iterator init - */ - skb_pull(skb, iterator.max_length); + netif_start_queue(dev); - return TXRX_CONTINUE; + return 0; } - -static ieee80211_txrx_result inline -__ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, - struct sk_buff *skb, - struct net_device *dev, - struct ieee80211_tx_control *control) +static int ieee80211_stop(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_sub_if_data *sdata; - ieee80211_txrx_result res = TXRX_CONTINUE; - - int hdrlen; - - memset(tx, 0, sizeof(*tx)); - tx->skb = skb; - tx->dev = dev; /* use original interface */ - tx->local = local; - tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); - tx->sta = sta_info_get(local, hdr->addr1); - tx->fc = le16_to_cpu(hdr->frame_control); - - /* - * set defaults for things that can be set by - * injected radiotap headers - */ - control->power_level = local->hw.conf.power_level; - control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; - if (local->sta_antenna_sel != STA_ANTENNA_SEL_AUTO && tx->sta) - control->antenna_sel_tx = tx->sta->antenna_sel_tx; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_if_init_conf conf; - /* process and remove the injection radiotap header */ sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { - if (__ieee80211_parse_tx_radiotap(tx, skb, control) == - TXRX_DROP) { - return TXRX_DROP; - } - /* - * we removed the radiotap header after this point, - * we filled control with what we could use - * set to the actual ieee header now - */ - hdr = (struct ieee80211_hdr *) skb->data; - res = TXRX_QUEUED; /* indication it was monitor packet */ - } - - tx->u.tx.control = control; - tx->u.tx.unicast = !is_multicast_ether_addr(hdr->addr1); - if (is_multicast_ether_addr(hdr->addr1)) - control->flags |= IEEE80211_TXCTL_NO_ACK; - else - control->flags &= ~IEEE80211_TXCTL_NO_ACK; - tx->fragmented = local->fragmentation_threshold < - IEEE80211_MAX_FRAG_THRESHOLD && tx->u.tx.unicast && - skb->len + FCS_LEN > local->fragmentation_threshold && - (!local->ops->set_frag_threshold); - if (!tx->sta) - control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; - else if (tx->sta->clear_dst_mask) { - control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; - tx->sta->clear_dst_mask = 0; - } - hdrlen = ieee80211_get_hdrlen(tx->fc); - if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { - u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; - tx->ethertype = (pos[0] << 8) | pos[1]; - } - control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; - return res; -} + netif_stop_queue(dev); -static int inline is_ieee80211_device(struct net_device *dev, - struct net_device *master) -{ - return (wdev_priv(dev->ieee80211_ptr) == - wdev_priv(master->ieee80211_ptr)); -} + dev_mc_unsync(local->mdev, dev); -/* Device in tx->dev has a reference added; use dev_put(tx->dev) when - * finished with it. */ -static int inline ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, - struct sk_buff *skb, - struct net_device *mdev, - struct ieee80211_tx_control *control) -{ - struct ieee80211_tx_packet_data *pkt_data; - struct net_device *dev; + /* down all dependent devices, that is VLANs */ + if (sdata->type == IEEE80211_IF_TYPE_AP) { + struct ieee80211_sub_if_data *vlan, *tmp; - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - dev = dev_get_by_index(pkt_data->ifindex); - if (unlikely(dev && !is_ieee80211_device(dev, mdev))) { - dev_put(dev); - dev = NULL; + list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans, + u.vlan.list) + dev_close(vlan->dev); + WARN_ON(!list_empty(&sdata->u.ap.vlans)); } - if (unlikely(!dev)) - return -ENODEV; - __ieee80211_tx_prepare(tx, skb, dev, control); - return 0; -} - -static inline int __ieee80211_queue_stopped(const struct ieee80211_local *local, - int queue) -{ - return test_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); -} - -static inline int __ieee80211_queue_pending(const struct ieee80211_local *local, - int queue) -{ - return test_bit(IEEE80211_LINK_STATE_PENDING, &local->state[queue]); -} - -#define IEEE80211_TX_OK 0 -#define IEEE80211_TX_AGAIN 1 -#define IEEE80211_TX_FRAG_AGAIN 2 -static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, - struct ieee80211_txrx_data *tx) -{ - struct ieee80211_tx_control *control = tx->u.tx.control; - int ret, i; + local->open_count--; - if (!ieee80211_qdisc_installed(local->mdev) && - __ieee80211_queue_stopped(local, 0)) { - netif_stop_queue(local->mdev); - return IEEE80211_TX_AGAIN; - } - if (skb) { - ieee80211_dump_frame(local->mdev->name, "TX to low-level driver", skb); - ret = local->ops->tx(local_to_hw(local), skb, control); - if (ret) - return IEEE80211_TX_AGAIN; - local->mdev->trans_start = jiffies; - ieee80211_led_tx(local, 1); - } - if (tx->u.tx.extra_frag) { - control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | - IEEE80211_TXCTL_USE_CTS_PROTECT | - IEEE80211_TXCTL_CLEAR_DST_MASK | - IEEE80211_TXCTL_FIRST_FRAGMENT); - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - if (!tx->u.tx.extra_frag[i]) - continue; - if (__ieee80211_queue_stopped(local, control->queue)) - return IEEE80211_TX_FRAG_AGAIN; - if (i == tx->u.tx.num_extra_frag) { - control->tx_rate = tx->u.tx.last_frag_hwrate; - control->rate = tx->u.tx.last_frag_rate; - if (tx->u.tx.probe_last_frag) - control->flags |= - IEEE80211_TXCTL_RATE_CTRL_PROBE; - else - control->flags &= - ~IEEE80211_TXCTL_RATE_CTRL_PROBE; - } + switch (sdata->type) { + case IEEE80211_IF_TYPE_VLAN: + list_del(&sdata->u.vlan.list); + sdata->u.vlan.ap = NULL; + /* no need to tell driver */ + break; + case IEEE80211_IF_TYPE_MNTR: + local->monitors--; + if (local->monitors == 0) { + netif_tx_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_tx_unlock_bh(local->mdev); - ieee80211_dump_frame(local->mdev->name, - "TX to low-level driver", - tx->u.tx.extra_frag[i]); - ret = local->ops->tx(local_to_hw(local), - tx->u.tx.extra_frag[i], - control); - if (ret) - return IEEE80211_TX_FRAG_AGAIN; - local->mdev->trans_start = jiffies; - ieee80211_led_tx(local, 1); - tx->u.tx.extra_frag[i] = NULL; + local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; + ieee80211_hw_config(local); } - kfree(tx->u.tx.extra_frag); - tx->u.tx.extra_frag = NULL; - } - return IEEE80211_TX_OK; -} - -static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, - struct ieee80211_tx_control *control, int mgmt) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sta_info *sta; - ieee80211_tx_handler *handler; - struct ieee80211_txrx_data tx; - ieee80211_txrx_result res = TXRX_DROP, res_prepare; - int ret, i; - - WARN_ON(__ieee80211_queue_pending(local, control->queue)); - - if (unlikely(skb->len < 10)) { - dev_kfree_skb(skb); - return 0; - } - - res_prepare = __ieee80211_tx_prepare(&tx, skb, dev, control); - - if (res_prepare == TXRX_DROP) { - dev_kfree_skb(skb); - return 0; - } - - sta = tx.sta; - tx.u.tx.mgmt_interface = mgmt; - tx.u.tx.mode = local->hw.conf.mode; + break; + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_IBSS: + sdata->u.sta.state = IEEE80211_DISABLED; + del_timer_sync(&sdata->u.sta.timer); + /* + * When we get here, the interface is marked down. + * Call synchronize_rcu() to wait for the RX path + * should it be using the interface and enqueuing + * frames at this very time on another CPU. + */ + synchronize_rcu(); + skb_queue_purge(&sdata->u.sta.skb_queue); - if (res_prepare == TXRX_QUEUED) { /* if it was an injected packet */ - res = TXRX_CONTINUE; - } else { - for (handler = local->tx_handlers; *handler != NULL; - handler++) { - res = (*handler)(&tx); - if (res != TXRX_CONTINUE) - break; + if (!local->ops->hw_scan && + local->scan_dev == sdata->dev) { + local->sta_scanning = 0; + cancel_delayed_work(&local->scan_work); } + flush_workqueue(local->hw.workqueue); + /* fall through */ + default: + conf.if_id = dev->ifindex; + conf.type = sdata->type; + conf.mac_addr = dev->dev_addr; + /* disable all keys for as long as this netdev is down */ + ieee80211_disable_keys(sdata); + local->ops->remove_interface(local_to_hw(local), &conf); } - skb = tx.skb; /* handlers are allowed to change skb */ - - if (sta) - sta_info_put(sta); - - if (unlikely(res == TXRX_DROP)) { - I802_DEBUG_INC(local->tx_handlers_drop); - goto drop; - } - - if (unlikely(res == TXRX_QUEUED)) { - I802_DEBUG_INC(local->tx_handlers_queued); - return 0; - } - - if (tx.u.tx.extra_frag) { - for (i = 0; i < tx.u.tx.num_extra_frag; i++) { - int next_len, dur; - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) - tx.u.tx.extra_frag[i]->data; + if (local->open_count == 0) { + if (netif_running(local->mdev)) + dev_close(local->mdev); - if (i + 1 < tx.u.tx.num_extra_frag) { - next_len = tx.u.tx.extra_frag[i + 1]->len; - } else { - next_len = 0; - tx.u.tx.rate = tx.u.tx.last_frag_rate; - tx.u.tx.last_frag_hwrate = tx.u.tx.rate->val; - } - dur = ieee80211_duration(&tx, 0, next_len); - hdr->duration_id = cpu_to_le16(dur); - } - } + if (local->ops->stop) + local->ops->stop(local_to_hw(local)); -retry: - ret = __ieee80211_tx(local, skb, &tx); - if (ret) { - struct ieee80211_tx_stored_packet *store = - &local->pending_packet[control->queue]; - - if (ret == IEEE80211_TX_FRAG_AGAIN) - skb = NULL; - set_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[control->queue]); - smp_mb(); - /* When the driver gets out of buffers during sending of - * fragments and calls ieee80211_stop_queue, there is - * a small window between IEEE80211_LINK_STATE_XOFF and - * IEEE80211_LINK_STATE_PENDING flags are set. If a buffer - * gets available in that window (i.e. driver calls - * ieee80211_wake_queue), we would end up with ieee80211_tx - * called with IEEE80211_LINK_STATE_PENDING. Prevent this by - * continuing transmitting here when that situation is - * possible to have happened. */ - if (!__ieee80211_queue_stopped(local, control->queue)) { - clear_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[control->queue]); - goto retry; - } - memcpy(&store->control, control, - sizeof(struct ieee80211_tx_control)); - store->skb = skb; - store->extra_frag = tx.u.tx.extra_frag; - store->num_extra_frag = tx.u.tx.num_extra_frag; - store->last_frag_hwrate = tx.u.tx.last_frag_hwrate; - store->last_frag_rate = tx.u.tx.last_frag_rate; - store->last_frag_rate_ctrl_probe = tx.u.tx.probe_last_frag; + tasklet_disable(&local->tx_pending_tasklet); + tasklet_disable(&local->tasklet); } - return 0; - drop: - if (skb) - dev_kfree_skb(skb); - for (i = 0; i < tx.u.tx.num_extra_frag; i++) - if (tx.u.tx.extra_frag[i]) - dev_kfree_skb(tx.u.tx.extra_frag[i]); - kfree(tx.u.tx.extra_frag); return 0; } -static void ieee80211_tx_pending(unsigned long data) -{ - struct ieee80211_local *local = (struct ieee80211_local *)data; - struct net_device *dev = local->mdev; - struct ieee80211_tx_stored_packet *store; - struct ieee80211_txrx_data tx; - int i, ret, reschedule = 0; - - netif_tx_lock_bh(dev); - for (i = 0; i < local->hw.queues; i++) { - if (__ieee80211_queue_stopped(local, i)) - continue; - if (!__ieee80211_queue_pending(local, i)) { - reschedule = 1; - continue; - } - store = &local->pending_packet[i]; - tx.u.tx.control = &store->control; - tx.u.tx.extra_frag = store->extra_frag; - tx.u.tx.num_extra_frag = store->num_extra_frag; - tx.u.tx.last_frag_hwrate = store->last_frag_hwrate; - tx.u.tx.last_frag_rate = store->last_frag_rate; - tx.u.tx.probe_last_frag = store->last_frag_rate_ctrl_probe; - ret = __ieee80211_tx(local, store->skb, &tx); - if (ret) { - if (ret == IEEE80211_TX_FRAG_AGAIN) - store->skb = NULL; - } else { - clear_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[i]); - reschedule = 1; - } - } - netif_tx_unlock_bh(dev); - if (reschedule) { - if (!ieee80211_qdisc_installed(dev)) { - if (!__ieee80211_queue_stopped(local, 0)) - netif_wake_queue(dev); - } else - netif_schedule(dev); - } -} - -static void ieee80211_clear_tx_pending(struct ieee80211_local *local) -{ - int i, j; - struct ieee80211_tx_stored_packet *store; - - for (i = 0; i < local->hw.queues; i++) { - if (!__ieee80211_queue_pending(local, i)) - continue; - store = &local->pending_packet[i]; - kfree_skb(store->skb); - for (j = 0; j < store->num_extra_frag; j++) - kfree_skb(store->extra_frag[j]); - kfree(store->extra_frag); - clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[i]); - } -} - -static int ieee80211_master_start_xmit(struct sk_buff *skb, - struct net_device *dev) +static void ieee80211_set_multicast_list(struct net_device *dev) { - struct ieee80211_tx_control control; - struct ieee80211_tx_packet_data *pkt_data; - struct net_device *odev = NULL; - struct ieee80211_sub_if_data *osdata; - int headroom; - int ret; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + int allmulti, promisc, sdata_allmulti, sdata_promisc; - /* - * copy control out of the skb so other people can use skb->cb - */ - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(&control, 0, sizeof(struct ieee80211_tx_control)); + allmulti = !!(dev->flags & IFF_ALLMULTI); + promisc = !!(dev->flags & IFF_PROMISC); + sdata_allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; + sdata_promisc = sdata->flags & IEEE80211_SDATA_PROMISC; - if (pkt_data->ifindex) - odev = dev_get_by_index(pkt_data->ifindex); - if (unlikely(odev && !is_ieee80211_device(odev, dev))) { - dev_put(odev); - odev = NULL; - } - if (unlikely(!odev)) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Discarded packet with nonexistent " - "originating device\n", dev->name); -#endif - dev_kfree_skb(skb); - return 0; + if (allmulti != sdata_allmulti) { + if (dev->flags & IFF_ALLMULTI) + atomic_inc(&local->iff_allmultis); + else + atomic_dec(&local->iff_allmultis); + sdata->flags ^= IEEE80211_SDATA_ALLMULTI; } - osdata = IEEE80211_DEV_TO_SUB_IF(odev); - headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM; - if (skb_headroom(skb) < headroom) { - if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - dev_put(odev); - return 0; - } + if (promisc != sdata_promisc) { + if (dev->flags & IFF_PROMISC) + atomic_inc(&local->iff_promiscs); + else + atomic_dec(&local->iff_promiscs); + sdata->flags ^= IEEE80211_SDATA_PROMISC; } - control.ifindex = odev->ifindex; - control.type = osdata->type; - if (pkt_data->req_tx_status) - control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS; - if (pkt_data->do_not_encrypt) - control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; - if (pkt_data->requeue) - control.flags |= IEEE80211_TXCTL_REQUEUE; - control.queue = pkt_data->queue; - - ret = ieee80211_tx(odev, skb, &control, - control.type == IEEE80211_IF_TYPE_MGMT); - dev_put(odev); - - return ret; + dev_mc_sync(local->mdev, dev); } +static const struct header_ops ieee80211_header_ops = { + .create = eth_header, + .parse = header_parse_80211, + .rebuild = eth_rebuild_header, + .cache = eth_header_cache, + .cache_update = eth_header_cache_update, +}; -int ieee80211_monitor_start_xmit(struct sk_buff *skb, - struct net_device *dev) +/* Must not be called for mdev */ +void ieee80211_if_setup(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_packet_data *pkt_data; - struct ieee80211_radiotap_header *prthdr = - (struct ieee80211_radiotap_header *)skb->data; - u16 len; - - /* - * there must be a radiotap header at the - * start in this case - */ - if (unlikely(prthdr->it_version)) { - /* only version 0 is supported */ - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - - skb->dev = local->mdev; - - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(pkt_data, 0, sizeof(*pkt_data)); - pkt_data->ifindex = dev->ifindex; - pkt_data->mgmt_iface = 0; - pkt_data->do_not_encrypt = 1; - - /* above needed because we set skb device to master */ - - /* - * fix up the pointers accounting for the radiotap - * header still being in there. We are being given - * a precooked IEEE80211 header so no need for - * normal processing - */ - len = le16_to_cpu(get_unaligned(&prthdr->it_len)); - skb_set_mac_header(skb, len); - skb_set_network_header(skb, len + sizeof(struct ieee80211_hdr)); - skb_set_transport_header(skb, len + sizeof(struct ieee80211_hdr)); - - /* - * pass the radiotap header up to - * the next stage intact - */ - dev_queue_xmit(skb); - - return NETDEV_TX_OK; + ether_setup(dev); + dev->header_ops = &ieee80211_header_ops; + dev->hard_start_xmit = ieee80211_subif_start_xmit; + dev->wireless_handlers = &ieee80211_iw_handler_def; + dev->set_multicast_list = ieee80211_set_multicast_list; + dev->change_mtu = ieee80211_change_mtu; + dev->open = ieee80211_open; + dev->stop = ieee80211_stop; + dev->destructor = ieee80211_if_free; } +/* WDS specialties */ -/** - * ieee80211_subif_start_xmit - netif start_xmit function for Ethernet-type - * subinterfaces (wlan#, WDS, and VLAN interfaces) - * @skb: packet to be sent - * @dev: incoming interface - * - * Returns: 0 on success (and frees skb in this case) or 1 on failure (skb will - * not be freed, and caller is responsible for either retrying later or freeing - * skb). - * - * This function takes in an Ethernet header and encapsulates it with suitable - * IEEE 802.11 header based on which interface the packet is coming in. The - * encapsulated packet will then be passed to master interface, wlan#.11, for - * transmission (through low-level driver). - */ -int ieee80211_subif_start_xmit(struct sk_buff *skb, - struct net_device *dev) +int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_packet_data *pkt_data; - struct ieee80211_sub_if_data *sdata; - int ret = 1, head_need; - u16 ethertype, hdrlen, fc; - struct ieee80211_hdr hdr; - const u8 *encaps_data; - int encaps_len, skip_header_bytes; - int nh_pos, h_pos, no_encrypt = 0; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; + DECLARE_MAC_BUF(mac); - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (unlikely(skb->len < ETH_HLEN)) { - printk(KERN_DEBUG "%s: short skb (len=%d)\n", - dev->name, skb->len); - ret = 0; - goto fail; - } + if (compare_ether_addr(remote_addr, sdata->u.wds.remote_addr) == 0) + return 0; - nh_pos = skb_network_header(skb) - skb->data; - h_pos = skb_transport_header(skb) - skb->data; - - /* convert Ethernet header to proper 802.11 header (based on - * operation mode) */ - ethertype = (skb->data[12] << 8) | skb->data[13]; - /* TODO: handling for 802.1x authorized/unauthorized port */ - fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; - - if (likely(sdata->type == IEEE80211_IF_TYPE_AP || - sdata->type == IEEE80211_IF_TYPE_VLAN)) { - fc |= IEEE80211_FCTL_FROMDS; - /* DA BSSID SA */ - memcpy(hdr.addr1, skb->data, ETH_ALEN); - memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); - hdrlen = 24; - } else if (sdata->type == IEEE80211_IF_TYPE_WDS) { - fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS; - /* RA TA DA SA */ - memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN); - memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); - hdrlen = 30; - } else if (sdata->type == IEEE80211_IF_TYPE_STA) { - fc |= IEEE80211_FCTL_TODS; - /* BSSID SA DA */ - memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - hdrlen = 24; - } else if (sdata->type == IEEE80211_IF_TYPE_IBSS) { - /* DA SA BSSID */ - memcpy(hdr.addr1, skb->data, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, sdata->u.sta.bssid, ETH_ALEN); - hdrlen = 24; - } else { - ret = 0; - goto fail; - } + /* Create STA entry for the new peer */ + sta = sta_info_add(local, dev, remote_addr, GFP_KERNEL); + if (!sta) + return -ENOMEM; + sta_info_put(sta); - /* receiver is QoS enabled, use a QoS type frame */ - sta = sta_info_get(local, hdr.addr1); + /* Remove STA entry for the old peer */ + sta = sta_info_get(local, sdata->u.wds.remote_addr); if (sta) { - if (sta->flags & WLAN_STA_WME) { - fc |= IEEE80211_STYPE_QOS_DATA; - hdrlen += 2; - } + sta_info_free(sta); sta_info_put(sta); - } - - hdr.frame_control = cpu_to_le16(fc); - hdr.duration_id = 0; - hdr.seq_ctrl = 0; - - skip_header_bytes = ETH_HLEN; - if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) { - encaps_data = bridge_tunnel_header; - encaps_len = sizeof(bridge_tunnel_header); - skip_header_bytes -= 2; - } else if (ethertype >= 0x600) { - encaps_data = rfc1042_header; - encaps_len = sizeof(rfc1042_header); - skip_header_bytes -= 2; } else { - encaps_data = NULL; - encaps_len = 0; - } - - skb_pull(skb, skip_header_bytes); - nh_pos -= skip_header_bytes; - h_pos -= skip_header_bytes; - - /* TODO: implement support for fragments so that there is no need to - * reallocate and copy payload; it might be enough to support one - * extra fragment that would be copied in the beginning of the frame - * data.. anyway, it would be nice to include this into skb structure - * somehow - * - * There are few options for this: - * use skb->cb as an extra space for 802.11 header - * allocate new buffer if not enough headroom - * make sure that there is enough headroom in every skb by increasing - * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and - * alloc_skb() (net/core/skbuff.c) - */ - head_need = hdrlen + encaps_len + local->tx_headroom; - head_need -= skb_headroom(skb); - - /* We are going to modify skb data, so make a copy of it if happens to - * be cloned. This could happen, e.g., with Linux bridge code passing - * us broadcast frames. */ - - if (head_need > 0 || skb_cloned(skb)) { -#if 0 - printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes " - "of headroom\n", dev->name, head_need); -#endif - - if (skb_cloned(skb)) - I802_DEBUG_INC(local->tx_expand_skb_head_cloned); - else - I802_DEBUG_INC(local->tx_expand_skb_head); - /* Since we have to reallocate the buffer, make sure that there - * is enough room for possible WEP IV/ICV and TKIP (8 bytes - * before payload and 12 after). */ - if (pskb_expand_head(skb, (head_need > 0 ? head_need + 8 : 8), - 12, GFP_ATOMIC)) { - printk(KERN_DEBUG "%s: failed to reallocate TX buffer" - "\n", dev->name); - goto fail; - } - } - - if (encaps_data) { - memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); - nh_pos += encaps_len; - h_pos += encaps_len; - } - memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); - nh_pos += hdrlen; - h_pos += hdrlen; - - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = dev->ifindex; - pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT); - pkt_data->do_not_encrypt = no_encrypt; - - skb->dev = local->mdev; - sdata->stats.tx_packets++; - sdata->stats.tx_bytes += skb->len; - - /* Update skb pointers to various headers since this modified frame - * is going to go through Linux networking code that may potentially - * need things like pointer to IP header. */ - skb_set_mac_header(skb, 0); - skb_set_network_header(skb, nh_pos); - skb_set_transport_header(skb, h_pos); - - dev->trans_start = jiffies; - dev_queue_xmit(skb); - - return 0; - - fail: - if (!ret) - dev_kfree_skb(skb); - - return ret; -} - - -/* - * This is the transmit routine for the 802.11 type interfaces - * called by upper layers of the linux networking - * stack when it has a frame to transmit - */ -static int -ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata; - struct ieee80211_tx_packet_data *pkt_data; - struct ieee80211_hdr *hdr; - u16 fc; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (skb->len < 10) { - dev_kfree_skb(skb); - return 0; - } - - if (skb_headroom(skb) < sdata->local->tx_headroom) { - if (pskb_expand_head(skb, sdata->local->tx_headroom, - 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - return 0; - } - } - - hdr = (struct ieee80211_hdr *) skb->data; - fc = le16_to_cpu(hdr->frame_control); - - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; - memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = sdata->dev->ifindex; - pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT); - - skb->priority = 20; /* use hardcoded priority for mgmt TX queue */ - skb->dev = sdata->local->mdev; - - /* - * We're using the protocol field of the the frame control header - * to request TX callback for hostapd. BIT(1) is checked. - */ - if ((fc & BIT(1)) == BIT(1)) { - pkt_data->req_tx_status = 1; - fc &= ~BIT(1); - hdr->frame_control = cpu_to_le16(fc); + printk(KERN_DEBUG "%s: could not find STA entry for WDS link " + "peer %s\n", + dev->name, print_mac(mac, sdata->u.wds.remote_addr)); } - pkt_data->do_not_encrypt = !(fc & IEEE80211_FCTL_PROTECTED); - - sdata->stats.tx_packets++; - sdata->stats.tx_bytes += skb->len; - - dev_queue_xmit(skb); + /* Update WDS link data */ + memcpy(&sdata->u.wds.remote_addr, remote_addr, ETH_ALEN); return 0; } - -static void ieee80211_beacon_add_tim(struct ieee80211_local *local, - struct ieee80211_if_ap *bss, - struct sk_buff *skb) -{ - u8 *pos, *tim; - int aid0 = 0; - int i, have_bits = 0, n1, n2; - - /* Generate bitmap for TIM only if there are any STAs in power save - * mode. */ - spin_lock_bh(&local->sta_lock); - if (atomic_read(&bss->num_sta_ps) > 0) - /* in the hope that this is faster than - * checking byte-for-byte */ - have_bits = !bitmap_empty((unsigned long*)bss->tim, - IEEE80211_MAX_AID+1); - - if (bss->dtim_count == 0) - bss->dtim_count = bss->dtim_period - 1; - else - bss->dtim_count--; - - tim = pos = (u8 *) skb_put(skb, 6); - *pos++ = WLAN_EID_TIM; - *pos++ = 4; - *pos++ = bss->dtim_count; - *pos++ = bss->dtim_period; - - if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) - aid0 = 1; - - if (have_bits) { - /* Find largest even number N1 so that bits numbered 1 through - * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits - * (N2 + 1) x 8 through 2007 are 0. */ - n1 = 0; - for (i = 0; i < IEEE80211_MAX_TIM_LEN; i++) { - if (bss->tim[i]) { - n1 = i & 0xfe; - break; - } - } - n2 = n1; - for (i = IEEE80211_MAX_TIM_LEN - 1; i >= n1; i--) { - if (bss->tim[i]) { - n2 = i; - break; - } - } - - /* Bitmap control */ - *pos++ = n1 | aid0; - /* Part Virt Bitmap */ - memcpy(pos, bss->tim + n1, n2 - n1 + 1); - - tim[1] = n2 - n1 + 4; - skb_put(skb, n2 - n1); - } else { - *pos++ = aid0; /* Bitmap control */ - *pos++ = 0; /* Part Virt Bitmap */ - } - spin_unlock_bh(&local->sta_lock); -} - - -struct sk_buff * ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id, - struct ieee80211_tx_control *control) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sk_buff *skb; - struct net_device *bdev; - struct ieee80211_sub_if_data *sdata = NULL; - struct ieee80211_if_ap *ap = NULL; - struct ieee80211_rate *rate; - struct rate_control_extra extra; - u8 *b_head, *b_tail; - int bh_len, bt_len; - - bdev = dev_get_by_index(if_id); - if (bdev) { - sdata = IEEE80211_DEV_TO_SUB_IF(bdev); - ap = &sdata->u.ap; - dev_put(bdev); - } - - if (!ap || sdata->type != IEEE80211_IF_TYPE_AP || - !ap->beacon_head) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "no beacon data avail for idx=%d " - "(%s)\n", if_id, bdev ? bdev->name : "N/A"); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - return NULL; - } - - /* Assume we are generating the normal beacon locally */ - b_head = ap->beacon_head; - b_tail = ap->beacon_tail; - bh_len = ap->beacon_head_len; - bt_len = ap->beacon_tail_len; - - skb = dev_alloc_skb(local->tx_headroom + - bh_len + bt_len + 256 /* maximum TIM len */); - if (!skb) - return NULL; - - skb_reserve(skb, local->tx_headroom); - memcpy(skb_put(skb, bh_len), b_head, bh_len); - - ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data); - - ieee80211_beacon_add_tim(local, ap, skb); - - if (b_tail) { - memcpy(skb_put(skb, bt_len), b_tail, bt_len); - } - - if (control) { - memset(&extra, 0, sizeof(extra)); - extra.mode = local->oper_hw_mode; - - rate = rate_control_get_rate(local, local->mdev, skb, &extra); - if (!rate) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: ieee80211_beacon_get: no rate " - "found\n", local->mdev->name); - } - dev_kfree_skb(skb); - return NULL; - } - - control->tx_rate = (local->short_preamble && - (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? - rate->val2 : rate->val; - control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; - control->power_level = local->hw.conf.power_level; - control->flags |= IEEE80211_TXCTL_NO_ACK; - control->retry_limit = 1; - control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; - } - - ap->num_beacons++; - return skb; -} -EXPORT_SYMBOL(ieee80211_beacon_get); - -__le16 ieee80211_rts_duration(struct ieee80211_hw *hw, - size_t frame_len, - const struct ieee80211_tx_control *frame_txctl) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_rate *rate; - int short_preamble = local->short_preamble; - int erp; - u16 dur; - - rate = frame_txctl->rts_rate; - erp = !!(rate->flags & IEEE80211_RATE_ERP); - - /* CTS duration */ - dur = ieee80211_frame_duration(local, 10, rate->rate, - erp, short_preamble); - /* Data frame duration */ - dur += ieee80211_frame_duration(local, frame_len, rate->rate, - erp, short_preamble); - /* ACK duration */ - dur += ieee80211_frame_duration(local, 10, rate->rate, - erp, short_preamble); - - return cpu_to_le16(dur); -} -EXPORT_SYMBOL(ieee80211_rts_duration); - - -__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, - size_t frame_len, - const struct ieee80211_tx_control *frame_txctl) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_rate *rate; - int short_preamble = local->short_preamble; - int erp; - u16 dur; - - rate = frame_txctl->rts_rate; - erp = !!(rate->flags & IEEE80211_RATE_ERP); - - /* Data frame duration */ - dur = ieee80211_frame_duration(local, frame_len, rate->rate, - erp, short_preamble); - if (!(frame_txctl->flags & IEEE80211_TXCTL_NO_ACK)) { - /* ACK duration */ - dur += ieee80211_frame_duration(local, 10, rate->rate, - erp, short_preamble); - } - - return cpu_to_le16(dur); -} -EXPORT_SYMBOL(ieee80211_ctstoself_duration); - -void ieee80211_rts_get(struct ieee80211_hw *hw, - const void *frame, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl, - struct ieee80211_rts *rts) -{ - const struct ieee80211_hdr *hdr = frame; - u16 fctl; - - fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS; - rts->frame_control = cpu_to_le16(fctl); - rts->duration = ieee80211_rts_duration(hw, frame_len, frame_txctl); - memcpy(rts->ra, hdr->addr1, sizeof(rts->ra)); - memcpy(rts->ta, hdr->addr2, sizeof(rts->ta)); -} -EXPORT_SYMBOL(ieee80211_rts_get); - -void ieee80211_ctstoself_get(struct ieee80211_hw *hw, - const void *frame, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl, - struct ieee80211_cts *cts) -{ - const struct ieee80211_hdr *hdr = frame; - u16 fctl; - - fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS; - cts->frame_control = cpu_to_le16(fctl); - cts->duration = ieee80211_ctstoself_duration(hw, frame_len, frame_txctl); - memcpy(cts->ra, hdr->addr1, sizeof(cts->ra)); -} -EXPORT_SYMBOL(ieee80211_ctstoself_get); - -struct sk_buff * -ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id, - struct ieee80211_tx_control *control) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sk_buff *skb; - struct sta_info *sta; - ieee80211_tx_handler *handler; - struct ieee80211_txrx_data tx; - ieee80211_txrx_result res = TXRX_DROP; - struct net_device *bdev; - struct ieee80211_sub_if_data *sdata; - struct ieee80211_if_ap *bss = NULL; - - bdev = dev_get_by_index(if_id); - if (bdev) { - sdata = IEEE80211_DEV_TO_SUB_IF(bdev); - bss = &sdata->u.ap; - dev_put(bdev); - } - if (!bss || sdata->type != IEEE80211_IF_TYPE_AP || !bss->beacon_head) - return NULL; - - if (bss->dtim_count != 0) - return NULL; /* send buffered bc/mc only after DTIM beacon */ - memset(control, 0, sizeof(*control)); - while (1) { - skb = skb_dequeue(&bss->ps_bc_buf); - if (!skb) - return NULL; - local->total_ps_buffered--; - - if (!skb_queue_empty(&bss->ps_bc_buf) && skb->len >= 2) { - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) skb->data; - /* more buffered multicast/broadcast frames ==> set - * MoreData flag in IEEE 802.11 header to inform PS - * STAs */ - hdr->frame_control |= - cpu_to_le16(IEEE80211_FCTL_MOREDATA); - } - - if (ieee80211_tx_prepare(&tx, skb, local->mdev, control) == 0) - break; - dev_kfree_skb_any(skb); - } - sta = tx.sta; - tx.u.tx.ps_buffered = 1; - - for (handler = local->tx_handlers; *handler != NULL; handler++) { - res = (*handler)(&tx); - if (res == TXRX_DROP || res == TXRX_QUEUED) - break; - } - dev_put(tx.dev); - skb = tx.skb; /* handlers are allowed to change skb */ - - if (res == TXRX_DROP) { - I802_DEBUG_INC(local->tx_handlers_drop); - dev_kfree_skb(skb); - skb = NULL; - } else if (res == TXRX_QUEUED) { - I802_DEBUG_INC(local->tx_handlers_queued); - skb = NULL; - } - - if (sta) - sta_info_put(sta); - - return skb; -} -EXPORT_SYMBOL(ieee80211_get_buffered_bc); +/* everything else */ static int __ieee80211_if_config(struct net_device *dev, struct sk_buff *beacon, @@ -2286,7 +454,6 @@ static int __ieee80211_if_config(struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_if_conf conf; - static u8 scan_bssid[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; if (!local->ops->config_interface || !netif_running(dev)) return 0; @@ -2295,20 +462,12 @@ static int __ieee80211_if_config(struct net_device *dev, conf.type = sdata->type; if (sdata->type == IEEE80211_IF_TYPE_STA || sdata->type == IEEE80211_IF_TYPE_IBSS) { - if (local->sta_scanning && - local->scan_dev == dev) - conf.bssid = scan_bssid; - else - conf.bssid = sdata->u.sta.bssid; + conf.bssid = sdata->u.sta.bssid; conf.ssid = sdata->u.sta.ssid; conf.ssid_len = sdata->u.sta.ssid_len; - conf.generic_elem = sdata->u.sta.extra_ie; - conf.generic_elem_len = sdata->u.sta.extra_ie_len; } else if (sdata->type == IEEE80211_IF_TYPE_AP) { conf.ssid = sdata->u.ap.ssid; conf.ssid_len = sdata->u.ap.ssid_len; - conf.generic_elem = sdata->u.ap.generic_elem; - conf.generic_elem_len = sdata->u.ap.generic_elem_len; conf.beacon = beacon; conf.beacon_control = control; } @@ -2351,7 +510,12 @@ int ieee80211_hw_config(struct ieee80211_local *local) local->hw.conf.channel = chan->chan; local->hw.conf.channel_val = chan->val; - local->hw.conf.power_level = chan->power_level; + if (!local->hw.conf.power_level) { + local->hw.conf.power_level = chan->power_level; + } else { + local->hw.conf.power_level = min(chan->power_level, + local->hw.conf.power_level); + } local->hw.conf.freq = chan->freq; local->hw.conf.phymode = mode->mode; local->hw.conf.antenna_max = chan->antenna_max; @@ -2364,1971 +528,33 @@ int ieee80211_hw_config(struct ieee80211_local *local) local->hw.conf.phymode); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - if (local->ops->config) + if (local->open_count) ret = local->ops->config(local_to_hw(local), &local->hw.conf); return ret; } - -static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) -{ - /* FIX: what would be proper limits for MTU? - * This interface uses 802.3 frames. */ - if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6) { - printk(KERN_WARNING "%s: invalid MTU %d\n", - dev->name, new_mtu); - return -EINVAL; - } - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - dev->mtu = new_mtu; - return 0; -} - - -static int ieee80211_change_mtu_apdev(struct net_device *dev, int new_mtu) -{ - /* FIX: what would be proper limits for MTU? - * This interface uses 802.11 frames. */ - if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN) { - printk(KERN_WARNING "%s: invalid MTU %d\n", - dev->name, new_mtu); - return -EINVAL; - } - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - dev->mtu = new_mtu; - return 0; -} - -enum netif_tx_lock_class { - TX_LOCK_NORMAL, - TX_LOCK_MASTER, -}; - -static inline void netif_tx_lock_nested(struct net_device *dev, int subclass) -{ - spin_lock_nested(&dev->_xmit_lock, subclass); - dev->xmit_lock_owner = smp_processor_id(); -} - -static void ieee80211_set_multicast_list(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - unsigned short flags; - - netif_tx_lock_nested(local->mdev, TX_LOCK_MASTER); - if (((dev->flags & IFF_ALLMULTI) != 0) ^ (sdata->allmulti != 0)) { - if (sdata->allmulti) { - sdata->allmulti = 0; - local->iff_allmultis--; - } else { - sdata->allmulti = 1; - local->iff_allmultis++; - } - } - if (((dev->flags & IFF_PROMISC) != 0) ^ (sdata->promisc != 0)) { - if (sdata->promisc) { - sdata->promisc = 0; - local->iff_promiscs--; - } else { - sdata->promisc = 1; - local->iff_promiscs++; - } - } - if (dev->mc_count != sdata->mc_count) { - local->mc_count = local->mc_count - sdata->mc_count + - dev->mc_count; - sdata->mc_count = dev->mc_count; - } - if (local->ops->set_multicast_list) { - flags = local->mdev->flags; - if (local->iff_allmultis) - flags |= IFF_ALLMULTI; - if (local->iff_promiscs) - flags |= IFF_PROMISC; - read_lock(&local->sub_if_lock); - local->ops->set_multicast_list(local_to_hw(local), flags, - local->mc_count); - read_unlock(&local->sub_if_lock); - } - netif_tx_unlock(local->mdev); -} - -struct dev_mc_list *ieee80211_get_mc_list_item(struct ieee80211_hw *hw, - struct dev_mc_list *prev, - void **ptr) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata = *ptr; - struct dev_mc_list *mc; - - if (!prev) { - WARN_ON(sdata); - sdata = NULL; - } - if (!prev || !prev->next) { - if (sdata) - sdata = list_entry(sdata->list.next, - struct ieee80211_sub_if_data, list); - else - sdata = list_entry(local->sub_if_list.next, - struct ieee80211_sub_if_data, list); - if (&sdata->list != &local->sub_if_list) - mc = sdata->dev->mc_list; - else - mc = NULL; - } else - mc = prev->next; - - *ptr = sdata; - return mc; -} -EXPORT_SYMBOL(ieee80211_get_mc_list_item); - -static struct net_device_stats *ieee80211_get_stats(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - return &(sdata->stats); -} - -static void ieee80211_if_shutdown(struct net_device *dev) +void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - ASSERT_RTNL(); - switch (sdata->type) { - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - sdata->u.sta.state = IEEE80211_DISABLED; - del_timer_sync(&sdata->u.sta.timer); - skb_queue_purge(&sdata->u.sta.skb_queue); - if (!local->ops->hw_scan && - local->scan_dev == sdata->dev) { - local->sta_scanning = 0; - cancel_delayed_work(&local->scan_work); - } - flush_workqueue(local->hw.workqueue); - break; - } -} - -static inline int identical_mac_addr_allowed(int type1, int type2) -{ - return (type1 == IEEE80211_IF_TYPE_MNTR || - type2 == IEEE80211_IF_TYPE_MNTR || - (type1 == IEEE80211_IF_TYPE_AP && - type2 == IEEE80211_IF_TYPE_WDS) || - (type1 == IEEE80211_IF_TYPE_WDS && - (type2 == IEEE80211_IF_TYPE_WDS || - type2 == IEEE80211_IF_TYPE_AP)) || - (type1 == IEEE80211_IF_TYPE_AP && - type2 == IEEE80211_IF_TYPE_VLAN) || - (type1 == IEEE80211_IF_TYPE_VLAN && - (type2 == IEEE80211_IF_TYPE_AP || - type2 == IEEE80211_IF_TYPE_VLAN))); -} - -static int ieee80211_master_open(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - int res = -EOPNOTSUPP; - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - if (sdata->dev != dev && netif_running(sdata->dev)) { - res = 0; - break; - } - } - read_unlock(&local->sub_if_lock); - return res; -} - -static int ieee80211_master_stop(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) - if (sdata->dev != dev && netif_running(sdata->dev)) - dev_close(sdata->dev); - read_unlock(&local->sub_if_lock); - - return 0; -} - -static int ieee80211_mgmt_open(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - if (!netif_running(local->mdev)) - return -EOPNOTSUPP; - return 0; -} - -static int ieee80211_mgmt_stop(struct net_device *dev) -{ - return 0; -} - -/* Check if running monitor interfaces should go to a "soft monitor" mode - * and switch them if necessary. */ -static inline void ieee80211_start_soft_monitor(struct ieee80211_local *local) -{ - struct ieee80211_if_init_conf conf; - - if (local->open_count && local->open_count == local->monitors && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER) && - local->ops->remove_interface) { - conf.if_id = -1; - conf.type = IEEE80211_IF_TYPE_MNTR; - conf.mac_addr = NULL; - local->ops->remove_interface(local_to_hw(local), &conf); - } -} - -/* Check if running monitor interfaces should go to a "hard monitor" mode - * and switch them if necessary. */ -static void ieee80211_start_hard_monitor(struct ieee80211_local *local) -{ - struct ieee80211_if_init_conf conf; - - if (local->open_count && local->open_count == local->monitors && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { - conf.if_id = -1; - conf.type = IEEE80211_IF_TYPE_MNTR; - conf.mac_addr = NULL; - local->ops->add_interface(local_to_hw(local), &conf); - } -} - -static int ieee80211_open(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata, *nsdata; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_if_init_conf conf; - int res; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - read_lock(&local->sub_if_lock); - list_for_each_entry(nsdata, &local->sub_if_list, list) { - struct net_device *ndev = nsdata->dev; - - if (ndev != dev && ndev != local->mdev && netif_running(ndev) && - compare_ether_addr(dev->dev_addr, ndev->dev_addr) == 0 && - !identical_mac_addr_allowed(sdata->type, nsdata->type)) { - read_unlock(&local->sub_if_lock); - return -ENOTUNIQ; - } - } - read_unlock(&local->sub_if_lock); - - if (sdata->type == IEEE80211_IF_TYPE_WDS && - is_zero_ether_addr(sdata->u.wds.remote_addr)) - return -ENOLINK; - - if (sdata->type == IEEE80211_IF_TYPE_MNTR && local->open_count && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { - /* run the interface in a "soft monitor" mode */ - local->monitors++; - local->open_count++; - local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; - return 0; - } - ieee80211_start_soft_monitor(local); - - conf.if_id = dev->ifindex; - conf.type = sdata->type; - conf.mac_addr = dev->dev_addr; - res = local->ops->add_interface(local_to_hw(local), &conf); - if (res) { - if (sdata->type == IEEE80211_IF_TYPE_MNTR) - ieee80211_start_hard_monitor(local); - return res; - } - - if (local->open_count == 0) { - res = 0; - tasklet_enable(&local->tx_pending_tasklet); - tasklet_enable(&local->tasklet); - if (local->ops->open) - res = local->ops->open(local_to_hw(local)); - if (res == 0) { - res = dev_open(local->mdev); - if (res) { - if (local->ops->stop) - local->ops->stop(local_to_hw(local)); - } else { - res = ieee80211_hw_config(local); - if (res && local->ops->stop) - local->ops->stop(local_to_hw(local)); - else if (!res && local->apdev) - dev_open(local->apdev); - } - } - if (res) { - if (local->ops->remove_interface) - local->ops->remove_interface(local_to_hw(local), - &conf); - return res; - } - } - local->open_count++; - - if (sdata->type == IEEE80211_IF_TYPE_MNTR) { - local->monitors++; - local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; - } else - ieee80211_if_config(dev); - - if (sdata->type == IEEE80211_IF_TYPE_STA && - !local->user_space_mlme) - netif_carrier_off(dev); - else - netif_carrier_on(dev); - - netif_start_queue(dev); - return 0; -} - - -static int ieee80211_stop(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (sdata->type == IEEE80211_IF_TYPE_MNTR && - local->open_count > 1 && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { - /* remove "soft monitor" interface */ - local->open_count--; - local->monitors--; - if (!local->monitors) - local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; - return 0; - } - - netif_stop_queue(dev); - ieee80211_if_shutdown(dev); - - if (sdata->type == IEEE80211_IF_TYPE_MNTR) { - local->monitors--; - if (!local->monitors) - local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; - } - - local->open_count--; - if (local->open_count == 0) { - if (netif_running(local->mdev)) - dev_close(local->mdev); - if (local->apdev) - dev_close(local->apdev); - if (local->ops->stop) - local->ops->stop(local_to_hw(local)); - tasklet_disable(&local->tx_pending_tasklet); - tasklet_disable(&local->tasklet); - } - if (local->ops->remove_interface) { - struct ieee80211_if_init_conf conf; - - conf.if_id = dev->ifindex; - conf.type = sdata->type; - conf.mac_addr = dev->dev_addr; - local->ops->remove_interface(local_to_hw(local), &conf); - } - - ieee80211_start_hard_monitor(local); - - return 0; -} - - -static int header_parse_80211(struct sk_buff *skb, unsigned char *haddr) -{ - memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */ - return ETH_ALEN; -} - -static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) -{ - return compare_ether_addr(raddr, addr) == 0 || - is_broadcast_ether_addr(raddr); + if (local->ops->erp_ie_changed) + local->ops->erp_ie_changed(local_to_hw(local), changes, + !!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION), + !(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE)); } - -static ieee80211_txrx_result -ieee80211_rx_h_data(struct ieee80211_txrx_data *rx) +void ieee80211_reset_erp_info(struct net_device *dev) { - struct net_device *dev = rx->dev; - struct ieee80211_local *local = rx->local; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - u16 fc, hdrlen, ethertype; - u8 *payload; - u8 dst[ETH_ALEN]; - u8 src[ETH_ALEN]; - struct sk_buff *skb = rx->skb, *skb2; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - fc = rx->fc; - if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) - return TXRX_CONTINUE; - - if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) - return TXRX_DROP; - - hdrlen = ieee80211_get_hdrlen(fc); - - /* convert IEEE 802.11 header + possible LLC headers into Ethernet - * header - * IEEE 802.11 address fields: - * ToDS FromDS Addr1 Addr2 Addr3 Addr4 - * 0 0 DA SA BSSID n/a - * 0 1 DA BSSID SA n/a - * 1 0 BSSID SA DA n/a - * 1 1 RA TA DA SA - */ - - switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { - case IEEE80211_FCTL_TODS: - /* BSSID SA DA */ - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - - if (unlikely(sdata->type != IEEE80211_IF_TYPE_AP && - sdata->type != IEEE80211_IF_TYPE_VLAN)) { - printk(KERN_DEBUG "%s: dropped ToDS frame (BSSID=" - MAC_FMT " SA=" MAC_FMT " DA=" MAC_FMT ")\n", - dev->name, MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr3)); - return TXRX_DROP; - } - break; - case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): - /* RA TA DA SA */ - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr4, ETH_ALEN); - - if (unlikely(sdata->type != IEEE80211_IF_TYPE_WDS)) { - printk(KERN_DEBUG "%s: dropped FromDS&ToDS frame (RA=" - MAC_FMT " TA=" MAC_FMT " DA=" MAC_FMT " SA=" - MAC_FMT ")\n", - rx->dev->name, MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr3), - MAC_ARG(hdr->addr4)); - return TXRX_DROP; - } - break; - case IEEE80211_FCTL_FROMDS: - /* DA BSSID SA */ - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr3, ETH_ALEN); - - if (sdata->type != IEEE80211_IF_TYPE_STA) { - return TXRX_DROP; - } - break; - case 0: - /* DA SA BSSID */ - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - - if (sdata->type != IEEE80211_IF_TYPE_IBSS) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: dropped IBSS frame (DA=" - MAC_FMT " SA=" MAC_FMT " BSSID=" MAC_FMT - ")\n", - dev->name, MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2), - MAC_ARG(hdr->addr3)); - } - return TXRX_DROP; - } - break; - } - - payload = skb->data + hdrlen; - - if (unlikely(skb->len - hdrlen < 8)) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: RX too short data frame " - "payload\n", dev->name); - } - return TXRX_DROP; - } - - ethertype = (payload[6] << 8) | payload[7]; - - if (likely((compare_ether_addr(payload, rfc1042_header) == 0 && - ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || - compare_ether_addr(payload, bridge_tunnel_header) == 0)) { - /* remove RFC1042 or Bridge-Tunnel encapsulation and - * replace EtherType */ - skb_pull(skb, hdrlen + 6); - memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); - memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); - } else { - struct ethhdr *ehdr; - __be16 len; - skb_pull(skb, hdrlen); - len = htons(skb->len); - ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); - memcpy(ehdr->h_dest, dst, ETH_ALEN); - memcpy(ehdr->h_source, src, ETH_ALEN); - ehdr->h_proto = len; - } - skb->dev = dev; - - skb2 = NULL; - - sdata->stats.rx_packets++; - sdata->stats.rx_bytes += skb->len; - - if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP - || sdata->type == IEEE80211_IF_TYPE_VLAN) && rx->u.rx.ra_match) { - if (is_multicast_ether_addr(skb->data)) { - /* send multicast frames both to higher layers in - * local net stack and back to the wireless media */ - skb2 = skb_copy(skb, GFP_ATOMIC); - if (!skb2) - printk(KERN_DEBUG "%s: failed to clone " - "multicast frame\n", dev->name); - } else { - struct sta_info *dsta; - dsta = sta_info_get(local, skb->data); - if (dsta && !dsta->dev) { - printk(KERN_DEBUG "Station with null dev " - "structure!\n"); - } else if (dsta && dsta->dev == dev) { - /* Destination station is associated to this - * AP, so send the frame directly to it and - * do not pass the frame to local net stack. - */ - skb2 = skb; - skb = NULL; - } - if (dsta) - sta_info_put(dsta); - } - } - - if (skb) { - /* deliver to local stack */ - skb->protocol = eth_type_trans(skb, dev); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); - } - - if (skb2) { - /* send to wireless media */ - skb2->protocol = __constant_htons(ETH_P_802_3); - skb_set_network_header(skb2, 0); - skb_set_mac_header(skb2, 0); - dev_queue_xmit(skb2); - } - - return TXRX_QUEUED; -} - - -static struct ieee80211_rate * -ieee80211_get_rate(struct ieee80211_local *local, int phymode, int hw_rate) -{ - struct ieee80211_hw_mode *mode; - int r; - - list_for_each_entry(mode, &local->modes_list, list) { - if (mode->mode != phymode) - continue; - for (r = 0; r < mode->num_rates; r++) { - struct ieee80211_rate *rate = &mode->rates[r]; - if (rate->val == hw_rate || - (rate->flags & IEEE80211_RATE_PREAMBLE2 && - rate->val2 == hw_rate)) - return rate; - } - } - - return NULL; -} - -static void -ieee80211_fill_frame_info(struct ieee80211_local *local, - struct ieee80211_frame_info *fi, - struct ieee80211_rx_status *status) -{ - if (status) { - struct timespec ts; - struct ieee80211_rate *rate; - - jiffies_to_timespec(jiffies, &ts); - fi->hosttime = cpu_to_be64((u64) ts.tv_sec * 1000000 + - ts.tv_nsec / 1000); - fi->mactime = cpu_to_be64(status->mactime); - switch (status->phymode) { - case MODE_IEEE80211A: - fi->phytype = htonl(ieee80211_phytype_ofdm_dot11_a); - break; - case MODE_IEEE80211B: - fi->phytype = htonl(ieee80211_phytype_dsss_dot11_b); - break; - case MODE_IEEE80211G: - fi->phytype = htonl(ieee80211_phytype_pbcc_dot11_g); - break; - case MODE_ATHEROS_TURBO: - fi->phytype = - htonl(ieee80211_phytype_dsss_dot11_turbo); - break; - default: - fi->phytype = htonl(0xAAAAAAAA); - break; - } - fi->channel = htonl(status->channel); - rate = ieee80211_get_rate(local, status->phymode, - status->rate); - if (rate) { - fi->datarate = htonl(rate->rate); - if (rate->flags & IEEE80211_RATE_PREAMBLE2) { - if (status->rate == rate->val) - fi->preamble = htonl(2); /* long */ - else if (status->rate == rate->val2) - fi->preamble = htonl(1); /* short */ - } else - fi->preamble = htonl(0); - } else { - fi->datarate = htonl(0); - fi->preamble = htonl(0); - } - - fi->antenna = htonl(status->antenna); - fi->priority = htonl(0xffffffff); /* no clue */ - fi->ssi_type = htonl(ieee80211_ssi_raw); - fi->ssi_signal = htonl(status->ssi); - fi->ssi_noise = 0x00000000; - fi->encoding = 0; - } else { - /* clear everything because we really don't know. - * the msg_type field isn't present on monitor frames - * so we don't know whether it will be present or not, - * but it's ok to not clear it since it'll be assigned - * anyway */ - memset(fi, 0, sizeof(*fi) - sizeof(fi->msg_type)); - - fi->ssi_type = htonl(ieee80211_ssi_none); - } - fi->version = htonl(IEEE80211_FI_VERSION); - fi->length = cpu_to_be32(sizeof(*fi) - sizeof(fi->msg_type)); -} - -/* this routine is actually not just for this, but also - * for pushing fake 'management' frames into userspace. - * it shall be replaced by a netlink-based system. */ -void -ieee80211_rx_mgmt(struct ieee80211_local *local, struct sk_buff *skb, - struct ieee80211_rx_status *status, u32 msg_type) -{ - struct ieee80211_frame_info *fi; - const size_t hlen = sizeof(struct ieee80211_frame_info); - struct ieee80211_sub_if_data *sdata; - - skb->dev = local->apdev; - - sdata = IEEE80211_DEV_TO_SUB_IF(local->apdev); - - if (skb_headroom(skb) < hlen) { - I802_DEBUG_INC(local->rx_expand_skb_head); - if (pskb_expand_head(skb, hlen, 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - return; - } - } - - fi = (struct ieee80211_frame_info *) skb_push(skb, hlen); - - ieee80211_fill_frame_info(local, fi, status); - fi->msg_type = htonl(msg_type); - - sdata->stats.rx_packets++; - sdata->stats.rx_bytes += skb->len; - - skb_set_mac_header(skb, 0); - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->pkt_type = PACKET_OTHERHOST; - skb->protocol = htons(ETH_P_802_2); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); -} - -static void -ieee80211_rx_monitor(struct net_device *dev, struct sk_buff *skb, - struct ieee80211_rx_status *status) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - struct ieee80211_rate *rate; - struct ieee80211_rtap_hdr { - struct ieee80211_radiotap_header hdr; - u8 flags; - u8 rate; - __le16 chan_freq; - __le16 chan_flags; - u8 antsignal; - } __attribute__ ((packed)) *rthdr; - - skb->dev = dev; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (status->flag & RX_FLAG_RADIOTAP) - goto out; - - if (skb_headroom(skb) < sizeof(*rthdr)) { - I802_DEBUG_INC(local->rx_expand_skb_head); - if (pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - return; - } - } - - rthdr = (struct ieee80211_rtap_hdr *) skb_push(skb, sizeof(*rthdr)); - memset(rthdr, 0, sizeof(*rthdr)); - rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); - rthdr->hdr.it_present = - cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | - (1 << IEEE80211_RADIOTAP_RATE) | - (1 << IEEE80211_RADIOTAP_CHANNEL) | - (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL)); - rthdr->flags = local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS ? - IEEE80211_RADIOTAP_F_FCS : 0; - rate = ieee80211_get_rate(local, status->phymode, status->rate); - if (rate) - rthdr->rate = rate->rate / 5; - rthdr->chan_freq = cpu_to_le16(status->freq); - rthdr->chan_flags = - status->phymode == MODE_IEEE80211A ? - cpu_to_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ) : - cpu_to_le16(IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ); - rthdr->antsignal = status->ssi; - - out: - sdata->stats.rx_packets++; - sdata->stats.rx_bytes += skb->len; - - skb_set_mac_header(skb, 0); - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->pkt_type = PACKET_OTHERHOST; - skb->protocol = htons(ETH_P_802_2); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); -} - -int ieee80211_radar_status(struct ieee80211_hw *hw, int channel, - int radar, int radar_type) -{ - struct sk_buff *skb; - struct ieee80211_radar_info *msg; - struct ieee80211_local *local = hw_to_local(hw); - - if (!local->apdev) - return 0; - - skb = dev_alloc_skb(sizeof(struct ieee80211_frame_info) + - sizeof(struct ieee80211_radar_info)); - - if (!skb) - return -ENOMEM; - skb_reserve(skb, sizeof(struct ieee80211_frame_info)); - - msg = (struct ieee80211_radar_info *) - skb_put(skb, sizeof(struct ieee80211_radar_info)); - msg->channel = channel; - msg->radar = radar; - msg->radar_type = radar_type; - - ieee80211_rx_mgmt(local, skb, NULL, ieee80211_msg_radar); - return 0; -} -EXPORT_SYMBOL(ieee80211_radar_status); - - -static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) -{ - struct ieee80211_sub_if_data *sdata; - sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); - - if (sdata->bss) - atomic_inc(&sdata->bss->num_sta_ps); - sta->flags |= WLAN_STA_PS; - sta->pspoll = 0; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d enters power " - "save mode\n", dev->name, MAC_ARG(sta->addr), sta->aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ -} - - -static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - int sent = 0; - struct ieee80211_sub_if_data *sdata; - struct ieee80211_tx_packet_data *pkt_data; - - sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); - if (sdata->bss) - atomic_dec(&sdata->bss->num_sta_ps); - sta->flags &= ~(WLAN_STA_PS | WLAN_STA_TIM); - sta->pspoll = 0; - if (!skb_queue_empty(&sta->ps_tx_buf)) { - if (local->ops->set_tim) - local->ops->set_tim(local_to_hw(local), sta->aid, 0); - if (sdata->bss) - bss_tim_clear(local, sdata->bss, sta->aid); - } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d exits power " - "save mode\n", dev->name, MAC_ARG(sta->addr), sta->aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - /* Send all buffered frames to the station */ - while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; - sent++; - pkt_data->requeue = 1; - dev_queue_xmit(skb); - } - while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; - local->total_ps_buffered--; - sent++; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d send PS frame " - "since STA not sleeping anymore\n", dev->name, - MAC_ARG(sta->addr), sta->aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - pkt_data->requeue = 1; - dev_queue_xmit(skb); - } - - return sent; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx) -{ - struct sk_buff *skb; - int no_pending_pkts; - - if (likely(!rx->sta || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PSPOLL || - !rx->u.rx.ra_match)) - return TXRX_CONTINUE; - - skb = skb_dequeue(&rx->sta->tx_filtered); - if (!skb) { - skb = skb_dequeue(&rx->sta->ps_tx_buf); - if (skb) - rx->local->total_ps_buffered--; - } - no_pending_pkts = skb_queue_empty(&rx->sta->tx_filtered) && - skb_queue_empty(&rx->sta->ps_tx_buf); - - if (skb) { - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) skb->data; - - /* tell TX path to send one frame even though the STA may - * still remain is PS mode after this frame exchange */ - rx->sta->pspoll = 1; - -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA " MAC_FMT " aid %d: PS Poll (entries " - "after %d)\n", - MAC_ARG(rx->sta->addr), rx->sta->aid, - skb_queue_len(&rx->sta->ps_tx_buf)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - - /* Use MoreData flag to indicate whether there are more - * buffered frames for this STA */ - if (no_pending_pkts) { - hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); - rx->sta->flags &= ~WLAN_STA_TIM; - } else - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); - - dev_queue_xmit(skb); - - if (no_pending_pkts) { - if (rx->local->ops->set_tim) - rx->local->ops->set_tim(local_to_hw(rx->local), - rx->sta->aid, 0); - if (rx->sdata->bss) - bss_tim_clear(rx->local, rx->sdata->bss, rx->sta->aid); - } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - } else if (!rx->u.rx.sent_ps_buffered) { - printk(KERN_DEBUG "%s: STA " MAC_FMT " sent PS Poll even " - "though there is no buffered frames for it\n", - rx->dev->name, MAC_ARG(rx->sta->addr)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - - } - - /* Free PS Poll skb here instead of returning TXRX_DROP that would - * count as an dropped frame. */ - dev_kfree_skb(rx->skb); - - return TXRX_QUEUED; -} - - -static inline struct ieee80211_fragment_entry * -ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, - unsigned int frag, unsigned int seq, int rx_queue, - struct sk_buff **skb) -{ - struct ieee80211_fragment_entry *entry; - int idx; - - idx = sdata->fragment_next; - entry = &sdata->fragments[sdata->fragment_next++]; - if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX) - sdata->fragment_next = 0; - - if (!skb_queue_empty(&entry->skb_list)) { -#ifdef CONFIG_MAC80211_DEBUG - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) entry->skb_list.next->data; - printk(KERN_DEBUG "%s: RX reassembly removed oldest " - "fragment entry (idx=%d age=%lu seq=%d last_frag=%d " - "addr1=" MAC_FMT " addr2=" MAC_FMT "\n", - sdata->dev->name, idx, - jiffies - entry->first_frag_time, entry->seq, - entry->last_frag, MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2)); -#endif /* CONFIG_MAC80211_DEBUG */ - __skb_queue_purge(&entry->skb_list); - } - - __skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */ - *skb = NULL; - entry->first_frag_time = jiffies; - entry->seq = seq; - entry->rx_queue = rx_queue; - entry->last_frag = frag; - entry->ccmp = 0; - entry->extra_len = 0; - - return entry; -} - - -static inline struct ieee80211_fragment_entry * -ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, - u16 fc, unsigned int frag, unsigned int seq, - int rx_queue, struct ieee80211_hdr *hdr) -{ - struct ieee80211_fragment_entry *entry; - int i, idx; - - idx = sdata->fragment_next; - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { - struct ieee80211_hdr *f_hdr; - u16 f_fc; - - idx--; - if (idx < 0) - idx = IEEE80211_FRAGMENT_MAX - 1; - - entry = &sdata->fragments[idx]; - if (skb_queue_empty(&entry->skb_list) || entry->seq != seq || - entry->rx_queue != rx_queue || - entry->last_frag + 1 != frag) - continue; - - f_hdr = (struct ieee80211_hdr *) entry->skb_list.next->data; - f_fc = le16_to_cpu(f_hdr->frame_control); - - if ((fc & IEEE80211_FCTL_FTYPE) != (f_fc & IEEE80211_FCTL_FTYPE) || - compare_ether_addr(hdr->addr1, f_hdr->addr1) != 0 || - compare_ether_addr(hdr->addr2, f_hdr->addr2) != 0) - continue; - - if (entry->first_frag_time + 2 * HZ < jiffies) { - __skb_queue_purge(&entry->skb_list); - continue; - } - return entry; - } - - return NULL; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_defragment(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_hdr *hdr; - u16 sc; - unsigned int frag, seq; - struct ieee80211_fragment_entry *entry; - struct sk_buff *skb; - - hdr = (struct ieee80211_hdr *) rx->skb->data; - sc = le16_to_cpu(hdr->seq_ctrl); - frag = sc & IEEE80211_SCTL_FRAG; - - if (likely((!(rx->fc & IEEE80211_FCTL_MOREFRAGS) && frag == 0) || - (rx->skb)->len < 24 || - is_multicast_ether_addr(hdr->addr1))) { - /* not fragmented */ - goto out; - } - I802_DEBUG_INC(rx->local->rx_handlers_fragments); - - seq = (sc & IEEE80211_SCTL_SEQ) >> 4; - - if (frag == 0) { - /* This is the first fragment of a new frame. */ - entry = ieee80211_reassemble_add(rx->sdata, frag, seq, - rx->u.rx.queue, &(rx->skb)); - if (rx->key && rx->key->alg == ALG_CCMP && - (rx->fc & IEEE80211_FCTL_PROTECTED)) { - /* Store CCMP PN so that we can verify that the next - * fragment has a sequential PN value. */ - entry->ccmp = 1; - memcpy(entry->last_pn, - rx->key->u.ccmp.rx_pn[rx->u.rx.queue], - CCMP_PN_LEN); - } - return TXRX_QUEUED; - } - - /* This is a fragment for a frame that should already be pending in - * fragment cache. Add this fragment to the end of the pending entry. - */ - entry = ieee80211_reassemble_find(rx->sdata, rx->fc, frag, seq, - rx->u.rx.queue, hdr); - if (!entry) { - I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); - return TXRX_DROP; - } - - /* Verify that MPDUs within one MSDU have sequential PN values. - * (IEEE 802.11i, 8.3.3.4.5) */ - if (entry->ccmp) { - int i; - u8 pn[CCMP_PN_LEN], *rpn; - if (!rx->key || rx->key->alg != ALG_CCMP) - return TXRX_DROP; - memcpy(pn, entry->last_pn, CCMP_PN_LEN); - for (i = CCMP_PN_LEN - 1; i >= 0; i--) { - pn[i]++; - if (pn[i]) - break; - } - rpn = rx->key->u.ccmp.rx_pn[rx->u.rx.queue]; - if (memcmp(pn, rpn, CCMP_PN_LEN) != 0) { - printk(KERN_DEBUG "%s: defrag: CCMP PN not sequential" - " A2=" MAC_FMT " PN=%02x%02x%02x%02x%02x%02x " - "(expected %02x%02x%02x%02x%02x%02x)\n", - rx->dev->name, MAC_ARG(hdr->addr2), - rpn[0], rpn[1], rpn[2], rpn[3], rpn[4], rpn[5], - pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); - return TXRX_DROP; - } - memcpy(entry->last_pn, pn, CCMP_PN_LEN); - } - - skb_pull(rx->skb, ieee80211_get_hdrlen(rx->fc)); - __skb_queue_tail(&entry->skb_list, rx->skb); - entry->last_frag = frag; - entry->extra_len += rx->skb->len; - if (rx->fc & IEEE80211_FCTL_MOREFRAGS) { - rx->skb = NULL; - return TXRX_QUEUED; - } - - rx->skb = __skb_dequeue(&entry->skb_list); - if (skb_tailroom(rx->skb) < entry->extra_len) { - I802_DEBUG_INC(rx->local->rx_expand_skb_head2); - if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len, - GFP_ATOMIC))) { - I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); - __skb_queue_purge(&entry->skb_list); - return TXRX_DROP; - } - } - while ((skb = __skb_dequeue(&entry->skb_list))) { - memcpy(skb_put(rx->skb, skb->len), skb->data, skb->len); - dev_kfree_skb(skb); - } - - /* Complete frame has been reassembled - process it now */ - rx->fragmented = 1; - - out: - if (rx->sta) - rx->sta->rx_packets++; - if (is_multicast_ether_addr(hdr->addr1)) - rx->local->dot11MulticastReceivedFrameCount++; - else - ieee80211_led_rx(rx->local); - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_monitor(struct ieee80211_txrx_data *rx) -{ - if (rx->sdata->type == IEEE80211_IF_TYPE_MNTR) { - ieee80211_rx_monitor(rx->dev, rx->skb, rx->u.rx.status); - return TXRX_QUEUED; - } - - if (rx->u.rx.status->flag & RX_FLAG_RADIOTAP) - skb_pull(rx->skb, ieee80211_get_radiotap_len(rx->skb)); - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_check(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_hdr *hdr; - int always_sta_key; - hdr = (struct ieee80211_hdr *) rx->skb->data; - - /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ - if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { - if (unlikely(rx->fc & IEEE80211_FCTL_RETRY && - rx->sta->last_seq_ctrl[rx->u.rx.queue] == - hdr->seq_ctrl)) { - if (rx->u.rx.ra_match) { - rx->local->dot11FrameDuplicateCount++; - rx->sta->num_duplicates++; - } - return TXRX_DROP; - } else - rx->sta->last_seq_ctrl[rx->u.rx.queue] = hdr->seq_ctrl; - } - - if ((rx->local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) && - rx->skb->len > FCS_LEN) - skb_trim(rx->skb, rx->skb->len - FCS_LEN); - - if (unlikely(rx->skb->len < 16)) { - I802_DEBUG_INC(rx->local->rx_handlers_drop_short); - return TXRX_DROP; - } - - if (!rx->u.rx.ra_match) - rx->skb->pkt_type = PACKET_OTHERHOST; - else if (compare_ether_addr(rx->dev->dev_addr, hdr->addr1) == 0) - rx->skb->pkt_type = PACKET_HOST; - else if (is_multicast_ether_addr(hdr->addr1)) { - if (is_broadcast_ether_addr(hdr->addr1)) - rx->skb->pkt_type = PACKET_BROADCAST; - else - rx->skb->pkt_type = PACKET_MULTICAST; - } else - rx->skb->pkt_type = PACKET_OTHERHOST; - - /* Drop disallowed frame classes based on STA auth/assoc state; - * IEEE 802.11, Chap 5.5. - * - * 80211.o does filtering only based on association state, i.e., it - * drops Class 3 frames from not associated stations. hostapd sends - * deauth/disassoc frames when needed. In addition, hostapd is - * responsible for filtering on both auth and assoc states. - */ - if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA || - ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL && - (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) && - rx->sdata->type != IEEE80211_IF_TYPE_IBSS && - (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) { - if ((!(rx->fc & IEEE80211_FCTL_FROMDS) && - !(rx->fc & IEEE80211_FCTL_TODS) && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) - || !rx->u.rx.ra_match) { - /* Drop IBSS frames and frames for other hosts - * silently. */ - return TXRX_DROP; - } - - if (!rx->local->apdev) - return TXRX_DROP; - - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_sta_not_assoc); - return TXRX_QUEUED; - } - - if (rx->sdata->type == IEEE80211_IF_TYPE_STA) - always_sta_key = 0; - else - always_sta_key = 1; - - if (rx->sta && rx->sta->key && always_sta_key) { - rx->key = rx->sta->key; - } else { - if (rx->sta && rx->sta->key) - rx->key = rx->sta->key; - else - rx->key = rx->sdata->default_key; - - if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) && - rx->fc & IEEE80211_FCTL_PROTECTED) { - int keyidx = ieee80211_wep_get_keyidx(rx->skb); - - if (keyidx >= 0 && keyidx < NUM_DEFAULT_KEYS && - (!rx->sta || !rx->sta->key || keyidx > 0)) - rx->key = rx->sdata->keys[keyidx]; - - if (!rx->key) { - if (!rx->u.rx.ra_match) - return TXRX_DROP; - printk(KERN_DEBUG "%s: RX WEP frame with " - "unknown keyidx %d (A1=" MAC_FMT " A2=" - MAC_FMT " A3=" MAC_FMT ")\n", - rx->dev->name, keyidx, - MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2), - MAC_ARG(hdr->addr3)); - if (!rx->local->apdev) - return TXRX_DROP; - ieee80211_rx_mgmt( - rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_wep_frame_unknown_key); - return TXRX_QUEUED; - } - } - } - - if (rx->fc & IEEE80211_FCTL_PROTECTED && rx->key && rx->u.rx.ra_match) { - rx->key->tx_rx_count++; - if (unlikely(rx->local->key_tx_rx_threshold && - rx->key->tx_rx_count > - rx->local->key_tx_rx_threshold)) { - ieee80211_key_threshold_notify(rx->dev, rx->key, - rx->sta); - } - } - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_sta_process(struct ieee80211_txrx_data *rx) -{ - struct sta_info *sta = rx->sta; - struct net_device *dev = rx->dev; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - - if (!sta) - return TXRX_CONTINUE; - - /* Update last_rx only for IBSS packets which are for the current - * BSSID to avoid keeping the current IBSS network alive in cases where - * other STAs are using different BSSID. */ - if (rx->sdata->type == IEEE80211_IF_TYPE_IBSS) { - u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len); - if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) - sta->last_rx = jiffies; - } else - if (!is_multicast_ether_addr(hdr->addr1) || - rx->sdata->type == IEEE80211_IF_TYPE_STA) { - /* Update last_rx only for unicast frames in order to prevent - * the Probe Request frames (the only broadcast frames from a - * STA in infrastructure mode) from keeping a connection alive. - */ - sta->last_rx = jiffies; - } - - if (!rx->u.rx.ra_match) - return TXRX_CONTINUE; - - sta->rx_fragments++; - sta->rx_bytes += rx->skb->len; - sta->last_rssi = (sta->last_rssi * 15 + - rx->u.rx.status->ssi) / 16; - sta->last_signal = (sta->last_signal * 15 + - rx->u.rx.status->signal) / 16; - sta->last_noise = (sta->last_noise * 15 + - rx->u.rx.status->noise) / 16; - - if (!(rx->fc & IEEE80211_FCTL_MOREFRAGS)) { - /* Change STA power saving mode only in the end of a frame - * exchange sequence */ - if ((sta->flags & WLAN_STA_PS) && !(rx->fc & IEEE80211_FCTL_PM)) - rx->u.rx.sent_ps_buffered += ap_sta_ps_end(dev, sta); - else if (!(sta->flags & WLAN_STA_PS) && - (rx->fc & IEEE80211_FCTL_PM)) - ap_sta_ps_start(dev, sta); - } - - /* Drop data::nullfunc frames silently, since they are used only to - * control station power saving mode. */ - if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_NULLFUNC) { - I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); - /* Update counter and free packet here to avoid counting this - * as a dropped packed. */ - sta->rx_packets++; - dev_kfree_skb(rx->skb); - return TXRX_QUEUED; - } - - return TXRX_CONTINUE; -} /* ieee80211_rx_h_sta_process */ - - -static ieee80211_txrx_result -ieee80211_rx_h_wep_weak_iv_detection(struct ieee80211_txrx_data *rx) -{ - if (!rx->sta || !(rx->fc & IEEE80211_FCTL_PROTECTED) || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || - !rx->key || rx->key->alg != ALG_WEP || !rx->u.rx.ra_match) - return TXRX_CONTINUE; - - /* Check for weak IVs, if hwaccel did not remove IV from the frame */ - if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) || - rx->key->force_sw_encrypt) { - u8 *iv = ieee80211_wep_is_weak_iv(rx->skb, rx->key); - if (iv) { - rx->sta->wep_weak_iv_count++; - } - } - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_wep_decrypt(struct ieee80211_txrx_data *rx) -{ - /* If the device handles decryption totally, skip this test */ - if (rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) - return TXRX_CONTINUE; - - if ((rx->key && rx->key->alg != ALG_WEP) || - !(rx->fc & IEEE80211_FCTL_PROTECTED) || - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))) - return TXRX_CONTINUE; - - if (!rx->key) { - printk(KERN_DEBUG "%s: RX WEP frame, but no key set\n", - rx->dev->name); - return TXRX_DROP; - } - - if (!(rx->u.rx.status->flag & RX_FLAG_DECRYPTED) || - rx->key->force_sw_encrypt) { - if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) { - printk(KERN_DEBUG "%s: RX WEP frame, decrypt " - "failed\n", rx->dev->name); - return TXRX_DROP; - } - } else if (rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) { - ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); - /* remove ICV */ - skb_trim(rx->skb, rx->skb->len - 4); - } - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_802_1x_pae(struct ieee80211_txrx_data *rx) -{ - if (rx->sdata->eapol && ieee80211_is_eapol(rx->skb) && - rx->sdata->type != IEEE80211_IF_TYPE_STA && rx->u.rx.ra_match) { - /* Pass both encrypted and unencrypted EAPOL frames to user - * space for processing. */ - if (!rx->local->apdev) - return TXRX_DROP; - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_normal); - return TXRX_QUEUED; - } - - if (unlikely(rx->sdata->ieee802_1x && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && - (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)) && - !ieee80211_is_eapol(rx->skb))) { -#ifdef CONFIG_MAC80211_DEBUG - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) rx->skb->data; - printk(KERN_DEBUG "%s: dropped frame from " MAC_FMT - " (unauthorized port)\n", rx->dev->name, - MAC_ARG(hdr->addr2)); -#endif /* CONFIG_MAC80211_DEBUG */ - return TXRX_DROP; - } - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_drop_unencrypted(struct ieee80211_txrx_data *rx) -{ - /* If the device handles decryption totally, skip this test */ - if (rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) - return TXRX_CONTINUE; - - /* Drop unencrypted frames if key is set. */ - if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && - (rx->key || rx->sdata->drop_unencrypted) && - (rx->sdata->eapol == 0 || - !ieee80211_is_eapol(rx->skb)))) { - printk(KERN_DEBUG "%s: RX non-WEP frame, but expected " - "encryption\n", rx->dev->name); - return TXRX_DROP; - } - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_sub_if_data *sdata; - - if (!rx->u.rx.ra_match) - return TXRX_DROP; - - sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); - if ((sdata->type == IEEE80211_IF_TYPE_STA || - sdata->type == IEEE80211_IF_TYPE_IBSS) && - !rx->local->user_space_mlme) { - ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->u.rx.status); - } else { - /* Management frames are sent to hostapd for processing */ - if (!rx->local->apdev) - return TXRX_DROP; - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_normal); - } - return TXRX_QUEUED; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_passive_scan(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_local *local = rx->local; - struct sk_buff *skb = rx->skb; - - if (unlikely(local->sta_scanning != 0)) { - ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status); - return TXRX_QUEUED; - } - - if (unlikely(rx->u.rx.in_scan)) { - /* scanning finished during invoking of handlers */ - I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); - return TXRX_DROP; - } - - return TXRX_CONTINUE; -} - - -static void ieee80211_rx_michael_mic_report(struct net_device *dev, - struct ieee80211_hdr *hdr, - struct sta_info *sta, - struct ieee80211_txrx_data *rx) -{ - int keyidx, hdrlen; - - hdrlen = ieee80211_get_hdrlen_from_skb(rx->skb); - if (rx->skb->len >= hdrlen + 4) - keyidx = rx->skb->data[hdrlen + 3] >> 6; - else - keyidx = -1; - - /* TODO: verify that this is not triggered by fragmented - * frames (hw does not verify MIC for them). */ - printk(KERN_DEBUG "%s: TKIP hwaccel reported Michael MIC " - "failure from " MAC_FMT " to " MAC_FMT " keyidx=%d\n", - dev->name, MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr1), keyidx); - - if (!sta) { - /* Some hardware versions seem to generate incorrect - * Michael MIC reports; ignore them to avoid triggering - * countermeasures. */ - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for unknown address " MAC_FMT "\n", - dev->name, MAC_ARG(hdr->addr2)); - goto ignore; - } - - if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) { - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for a frame with no ISWEP flag (src " - MAC_FMT ")\n", dev->name, MAC_ARG(hdr->addr2)); - goto ignore; - } - - if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) && - rx->sdata->type == IEEE80211_IF_TYPE_AP) { - keyidx = ieee80211_wep_get_keyidx(rx->skb); - /* AP with Pairwise keys support should never receive Michael - * MIC errors for non-zero keyidx because these are reserved - * for group keys and only the AP is sending real multicast - * frames in BSS. */ - if (keyidx) { - printk(KERN_DEBUG "%s: ignored Michael MIC error for " - "a frame with non-zero keyidx (%d) (src " MAC_FMT - ")\n", dev->name, keyidx, MAC_ARG(hdr->addr2)); - goto ignore; - } - } - - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) { - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for a frame that cannot be encrypted " - "(fc=0x%04x) (src " MAC_FMT ")\n", - dev->name, rx->fc, MAC_ARG(hdr->addr2)); - goto ignore; - } - - do { - union iwreq_data wrqu; - char *buf = kmalloc(128, GFP_ATOMIC); - if (!buf) - break; - - /* TODO: needed parameters: count, key type, TSC */ - sprintf(buf, "MLME-MICHAELMICFAILURE.indication(" - "keyid=%d %scast addr=" MAC_FMT ")", - keyidx, hdr->addr1[0] & 0x01 ? "broad" : "uni", - MAC_ARG(hdr->addr2)); - memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = strlen(buf); - wireless_send_event(rx->dev, IWEVCUSTOM, &wrqu, buf); - kfree(buf); - } while (0); - - /* TODO: consider verifying the MIC error report with software - * implementation if we get too many spurious reports from the - * hardware. */ - if (!rx->local->apdev) - goto ignore; - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_michael_mic_failure); - return; - - ignore: - dev_kfree_skb(rx->skb); - rx->skb = NULL; -} - -static inline ieee80211_txrx_result __ieee80211_invoke_rx_handlers( - struct ieee80211_local *local, - ieee80211_rx_handler *handlers, - struct ieee80211_txrx_data *rx, - struct sta_info *sta) -{ - ieee80211_rx_handler *handler; - ieee80211_txrx_result res = TXRX_DROP; - - for (handler = handlers; *handler != NULL; handler++) { - res = (*handler)(rx); - if (res != TXRX_CONTINUE) { - if (res == TXRX_DROP) { - I802_DEBUG_INC(local->rx_handlers_drop); - if (sta) - sta->rx_dropped++; - } - if (res == TXRX_QUEUED) - I802_DEBUG_INC(local->rx_handlers_queued); - break; - } - } - - if (res == TXRX_DROP) { - dev_kfree_skb(rx->skb); - } - return res; -} - -static inline void ieee80211_invoke_rx_handlers(struct ieee80211_local *local, - ieee80211_rx_handler *handlers, - struct ieee80211_txrx_data *rx, - struct sta_info *sta) -{ - if (__ieee80211_invoke_rx_handlers(local, handlers, rx, sta) == - TXRX_CONTINUE) - dev_kfree_skb(rx->skb); -} - -/* - * This is the receive path handler. It is called by a low level driver when an - * 802.11 MPDU is received from the hardware. - */ -void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, - struct ieee80211_rx_status *status) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; - struct ieee80211_hdr *hdr; - struct ieee80211_txrx_data rx; - u16 type; - int multicast; - int radiotap_len = 0; - - if (status->flag & RX_FLAG_RADIOTAP) { - radiotap_len = ieee80211_get_radiotap_len(skb); - skb_pull(skb, radiotap_len); - } - - hdr = (struct ieee80211_hdr *) skb->data; - memset(&rx, 0, sizeof(rx)); - rx.skb = skb; - rx.local = local; - - rx.u.rx.status = status; - rx.fc = skb->len >= 2 ? le16_to_cpu(hdr->frame_control) : 0; - type = rx.fc & IEEE80211_FCTL_FTYPE; - if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT) - local->dot11ReceivedFragmentCount++; - multicast = is_multicast_ether_addr(hdr->addr1); - - if (skb->len >= 16) - sta = rx.sta = sta_info_get(local, hdr->addr2); - else - sta = rx.sta = NULL; - - if (sta) { - rx.dev = sta->dev; - rx.sdata = IEEE80211_DEV_TO_SUB_IF(rx.dev); - } - - if ((status->flag & RX_FLAG_MMIC_ERROR)) { - ieee80211_rx_michael_mic_report(local->mdev, hdr, sta, &rx); - goto end; - } - - if (unlikely(local->sta_scanning)) - rx.u.rx.in_scan = 1; - - if (__ieee80211_invoke_rx_handlers(local, local->rx_pre_handlers, &rx, - sta) != TXRX_CONTINUE) - goto end; - skb = rx.skb; - - skb_push(skb, radiotap_len); - if (sta && !sta->assoc_ap && !(sta->flags & WLAN_STA_WDS) && - !local->iff_promiscs && !multicast) { - rx.u.rx.ra_match = 1; - ieee80211_invoke_rx_handlers(local, local->rx_handlers, &rx, - sta); - } else { - struct ieee80211_sub_if_data *prev = NULL; - struct sk_buff *skb_new; - u8 *bssid = ieee80211_get_bssid(hdr, skb->len - radiotap_len); - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - rx.u.rx.ra_match = 1; - switch (sdata->type) { - case IEEE80211_IF_TYPE_STA: - if (!bssid) - continue; - if (!ieee80211_bssid_match(bssid, - sdata->u.sta.bssid)) { - if (!rx.u.rx.in_scan) - continue; - rx.u.rx.ra_match = 0; - } else if (!multicast && - compare_ether_addr(sdata->dev->dev_addr, - hdr->addr1) != 0) { - if (!sdata->promisc) - continue; - rx.u.rx.ra_match = 0; - } - break; - case IEEE80211_IF_TYPE_IBSS: - if (!bssid) - continue; - if (!ieee80211_bssid_match(bssid, - sdata->u.sta.bssid)) { - if (!rx.u.rx.in_scan) - continue; - rx.u.rx.ra_match = 0; - } else if (!multicast && - compare_ether_addr(sdata->dev->dev_addr, - hdr->addr1) != 0) { - if (!sdata->promisc) - continue; - rx.u.rx.ra_match = 0; - } else if (!sta) - sta = rx.sta = - ieee80211_ibss_add_sta(sdata->dev, - skb, bssid, - hdr->addr2); - break; - case IEEE80211_IF_TYPE_AP: - if (!bssid) { - if (compare_ether_addr(sdata->dev->dev_addr, - hdr->addr1) != 0) - continue; - } else if (!ieee80211_bssid_match(bssid, - sdata->dev->dev_addr)) { - if (!rx.u.rx.in_scan) - continue; - rx.u.rx.ra_match = 0; - } - if (sdata->dev == local->mdev && - !rx.u.rx.in_scan) - /* do not receive anything via - * master device when not scanning */ - continue; - break; - case IEEE80211_IF_TYPE_WDS: - if (bssid || - (rx.fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) - continue; - if (compare_ether_addr(sdata->u.wds.remote_addr, - hdr->addr2) != 0) - continue; - break; - } - - if (prev) { - skb_new = skb_copy(skb, GFP_ATOMIC); - if (!skb_new) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: failed to copy " - "multicast frame for %s", - local->mdev->name, prev->dev->name); - continue; - } - rx.skb = skb_new; - rx.dev = prev->dev; - rx.sdata = prev; - ieee80211_invoke_rx_handlers(local, - local->rx_handlers, - &rx, sta); - } - prev = sdata; - } - if (prev) { - rx.skb = skb; - rx.dev = prev->dev; - rx.sdata = prev; - ieee80211_invoke_rx_handlers(local, local->rx_handlers, - &rx, sta); - } else - dev_kfree_skb(skb); - read_unlock(&local->sub_if_lock); - } - - end: - if (sta) - sta_info_put(sta); -} -EXPORT_SYMBOL(__ieee80211_rx); - -static ieee80211_txrx_result -ieee80211_tx_h_load_stats(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_local *local = tx->local; - struct ieee80211_hw_mode *mode = tx->u.tx.mode; - struct sk_buff *skb = tx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u32 load = 0, hdrtime; - - /* TODO: this could be part of tx_status handling, so that the number - * of retries would be known; TX rate should in that case be stored - * somewhere with the packet */ - - /* Estimate total channel use caused by this frame */ - - /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, - * 1 usec = 1/8 * (1080 / 10) = 13.5 */ - - if (mode->mode == MODE_IEEE80211A || - mode->mode == MODE_ATHEROS_TURBO || - mode->mode == MODE_ATHEROS_TURBOG || - (mode->mode == MODE_IEEE80211G && - tx->u.tx.rate->flags & IEEE80211_RATE_ERP)) - hdrtime = CHAN_UTIL_HDR_SHORT; - else - hdrtime = CHAN_UTIL_HDR_LONG; - - load = hdrtime; - if (!is_multicast_ether_addr(hdr->addr1)) - load += hdrtime; - - if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_RTS_CTS) - load += 2 * hdrtime; - else if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) - load += hdrtime; - - load += skb->len * tx->u.tx.rate->rate_inv; - - if (tx->u.tx.extra_frag) { - int i; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - load += 2 * hdrtime; - load += tx->u.tx.extra_frag[i]->len * - tx->u.tx.rate->rate; - } - } - - /* Divide channel_use by 8 to avoid wrapping around the counter */ - load >>= CHAN_UTIL_SHIFT; - local->channel_use_raw += load; - if (tx->sta) - tx->sta->channel_use_raw += load; - tx->sdata->channel_use_raw += load; - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_local *local = rx->local; - struct sk_buff *skb = rx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u32 load = 0, hdrtime; - struct ieee80211_rate *rate; - struct ieee80211_hw_mode *mode = local->hw.conf.mode; - int i; - - /* Estimate total channel use caused by this frame */ - - if (unlikely(mode->num_rates < 0)) - return TXRX_CONTINUE; - - rate = &mode->rates[0]; - for (i = 0; i < mode->num_rates; i++) { - if (mode->rates[i].val == rx->u.rx.status->rate) { - rate = &mode->rates[i]; - break; - } - } - - /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, - * 1 usec = 1/8 * (1080 / 10) = 13.5 */ - - if (mode->mode == MODE_IEEE80211A || - mode->mode == MODE_ATHEROS_TURBO || - mode->mode == MODE_ATHEROS_TURBOG || - (mode->mode == MODE_IEEE80211G && - rate->flags & IEEE80211_RATE_ERP)) - hdrtime = CHAN_UTIL_HDR_SHORT; - else - hdrtime = CHAN_UTIL_HDR_LONG; - - load = hdrtime; - if (!is_multicast_ether_addr(hdr->addr1)) - load += hdrtime; - - load += skb->len * rate->rate_inv; - - /* Divide channel_use by 8 to avoid wrapping around the counter */ - load >>= CHAN_UTIL_SHIFT; - local->channel_use_raw += load; - if (rx->sta) - rx->sta->channel_use_raw += load; - rx->u.rx.load = load; - - return TXRX_CONTINUE; -} - -static ieee80211_txrx_result -ieee80211_rx_h_if_stats(struct ieee80211_txrx_data *rx) -{ - rx->sdata->channel_use_raw += rx->u.rx.load; - return TXRX_CONTINUE; -} - -static void ieee80211_stat_refresh(unsigned long data) -{ - struct ieee80211_local *local = (struct ieee80211_local *) data; - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata; - - if (!local->stat_time) - return; - - /* go through all stations */ - spin_lock_bh(&local->sta_lock); - list_for_each_entry(sta, &local->sta_list, list) { - sta->channel_use = (sta->channel_use_raw / local->stat_time) / - CHAN_UTIL_PER_10MS; - sta->channel_use_raw = 0; - } - spin_unlock_bh(&local->sta_lock); - - /* go through all subinterfaces */ - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - sdata->channel_use = (sdata->channel_use_raw / - local->stat_time) / CHAN_UTIL_PER_10MS; - sdata->channel_use_raw = 0; - } - read_unlock(&local->sub_if_lock); - - /* hardware interface */ - local->channel_use = (local->channel_use_raw / - local->stat_time) / CHAN_UTIL_PER_10MS; - local->channel_use_raw = 0; - - local->stat_timer.expires = jiffies + HZ * local->stat_time / 100; - add_timer(&local->stat_timer); + sdata->flags &= ~(IEEE80211_SDATA_USE_PROTECTION | + IEEE80211_SDATA_SHORT_PREAMBLE); + ieee80211_erp_info_change_notify(dev, + IEEE80211_ERP_CHANGE_PROTECTION | + IEEE80211_ERP_CHANGE_PREAMBLE); } - -/* This is a version of the rx handler that can be called from hard irq - * context. Post the skb on the queue and schedule the tasklet */ -void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb, - struct ieee80211_rx_status *status) -{ - struct ieee80211_local *local = hw_to_local(hw); - - BUILD_BUG_ON(sizeof(struct ieee80211_rx_status) > sizeof(skb->cb)); - - skb->dev = local->mdev; - /* copy status into skb->cb for use by tasklet */ - memcpy(skb->cb, status, sizeof(*status)); - skb->pkt_type = IEEE80211_RX_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); -} -EXPORT_SYMBOL(ieee80211_rx_irqsafe); - void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_tx_status *status) @@ -4398,14 +624,13 @@ static void ieee80211_tasklet_handler(unsigned long data) break; default: /* should never get here! */ printk(KERN_ERR "%s: Unknown message type (%d)\n", - local->mdev->name, skb->pkt_type); + wiphy_name(local->hw.wiphy), skb->pkt_type); dev_kfree_skb(skb); break; } } } - /* Remove added headers (e.g., QoS control), encryption header/MIC, etc. to * make a prepared TX frame (one that has been given to hw) to look like brand * new IEEE 802.11 frame that is ready to go through TX processing again. @@ -4420,10 +645,13 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; pkt_data->ifindex = control->ifindex; - pkt_data->mgmt_iface = (control->type == IEEE80211_IF_TYPE_MGMT); - pkt_data->req_tx_status = !!(control->flags & IEEE80211_TXCTL_REQ_TX_STATUS); - pkt_data->do_not_encrypt = !!(control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT); - pkt_data->requeue = !!(control->flags & IEEE80211_TXCTL_REQUEUE); + pkt_data->flags = 0; + if (control->flags & IEEE80211_TXCTL_REQ_TX_STATUS) + pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS; + if (control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT) + pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; + if (control->flags & IEEE80211_TXCTL_REQUEUE) + pkt_data->flags |= IEEE80211_TXPD_REQUEUE; pkt_data->queue = control->queue; hdrlen = ieee80211_get_hdrlen_from_skb(skb); @@ -4431,7 +659,7 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, if (!key) goto no_key; - switch (key->alg) { + switch (key->conf.alg) { case ALG_WEP: iv_len = WEP_IV_LEN; mic_len = WEP_ICV_LEN; @@ -4448,7 +676,8 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, goto no_key; } - if (skb->len >= mic_len && key->force_sw_encrypt) + if (skb->len >= mic_len && + !(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) skb_trim(skb, skb->len - mic_len); if (skb->len >= iv_len && skb->len > hdrlen) { memmove(skb->data + iv_len, skb->data, hdrlen); @@ -4468,7 +697,6 @@ no_key: } } - void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_tx_status *status) { @@ -4476,7 +704,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_local *local = hw_to_local(hw); u16 frag, type; - u32 msg_type; struct ieee80211_tx_status_rtap_hdr *rthdr; struct ieee80211_sub_if_data *sdata; int monitors; @@ -4484,7 +711,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, if (!status) { printk(KERN_ERR "%s: ieee80211_tx_status called with NULL status\n", - local->mdev->name); + wiphy_name(local->hw.wiphy)); dev_kfree_skb(skb); return; } @@ -4541,7 +768,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, printk(KERN_DEBUG "%s: dropped TX " "filtered frame queue_len=%d " "PS=%d @%lu\n", - local->mdev->name, + wiphy_name(local->hw.wiphy), skb_queue_len( &sta->tx_filtered), !!(sta->flags & WLAN_STA_PS), @@ -4591,29 +818,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, local->dot11FailedCount++; } - msg_type = (status->flags & IEEE80211_TX_STATUS_ACK) ? - ieee80211_msg_tx_callback_ack : ieee80211_msg_tx_callback_fail; - /* this was a transmitted frame, but now we want to reuse it */ skb_orphan(skb); - if ((status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS) && - local->apdev) { - if (local->monitors) { - skb2 = skb_clone(skb, GFP_ATOMIC); - } else { - skb2 = skb; - skb = NULL; - } - - if (skb2) - /* Send frame to hostapd */ - ieee80211_rx_mgmt(local, skb2, NULL, msg_type); - - if (!skb) - return; - } - if (!local->monitors) { dev_kfree_skb(skb); return; @@ -4648,9 +855,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, rthdr->data_retries = status->retry_count; - read_lock(&local->sub_if_lock); + rcu_read_lock(); monitors = local->monitors; - list_for_each_entry(sdata, &local->sub_if_list, list) { + list_for_each_entry_rcu(sdata, &local->interfaces, list) { /* * Using the monitors counter is possibly racy, but * if the value is wrong we simply either clone the skb @@ -4666,7 +873,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, continue; monitors--; if (monitors) - skb2 = skb_clone(skb, GFP_KERNEL); + skb2 = skb_clone(skb, GFP_ATOMIC); else skb2 = NULL; skb->dev = sdata->dev; @@ -4681,170 +888,12 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, } } out: - read_unlock(&local->sub_if_lock); + rcu_read_unlock(); if (skb) dev_kfree_skb(skb); } EXPORT_SYMBOL(ieee80211_tx_status); -/* TODO: implement register/unregister functions for adding TX/RX handlers - * into ordered list */ - -/* rx_pre handlers don't have dev and sdata fields available in - * ieee80211_txrx_data */ -static ieee80211_rx_handler ieee80211_rx_pre_handlers[] = -{ - ieee80211_rx_h_parse_qos, - ieee80211_rx_h_load_stats, - NULL -}; - -static ieee80211_rx_handler ieee80211_rx_handlers[] = -{ - ieee80211_rx_h_if_stats, - ieee80211_rx_h_monitor, - ieee80211_rx_h_passive_scan, - ieee80211_rx_h_check, - ieee80211_rx_h_sta_process, - ieee80211_rx_h_ccmp_decrypt, - ieee80211_rx_h_tkip_decrypt, - ieee80211_rx_h_wep_weak_iv_detection, - ieee80211_rx_h_wep_decrypt, - ieee80211_rx_h_defragment, - ieee80211_rx_h_ps_poll, - ieee80211_rx_h_michael_mic_verify, - /* this must be after decryption - so header is counted in MPDU mic - * must be before pae and data, so QOS_DATA format frames - * are not passed to user space by these functions - */ - ieee80211_rx_h_remove_qos_control, - ieee80211_rx_h_802_1x_pae, - ieee80211_rx_h_drop_unencrypted, - ieee80211_rx_h_data, - ieee80211_rx_h_mgmt, - NULL -}; - -static ieee80211_tx_handler ieee80211_tx_handlers[] = -{ - ieee80211_tx_h_check_assoc, - ieee80211_tx_h_sequence, - ieee80211_tx_h_ps_buf, - ieee80211_tx_h_select_key, - ieee80211_tx_h_michael_mic_add, - ieee80211_tx_h_fragment, - ieee80211_tx_h_tkip_encrypt, - ieee80211_tx_h_ccmp_encrypt, - ieee80211_tx_h_wep_encrypt, - ieee80211_tx_h_rate_ctrl, - ieee80211_tx_h_misc, - ieee80211_tx_h_load_stats, - NULL -}; - - -int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct sta_info *sta; - - if (compare_ether_addr(remote_addr, sdata->u.wds.remote_addr) == 0) - return 0; - - /* Create STA entry for the new peer */ - sta = sta_info_add(local, dev, remote_addr, GFP_KERNEL); - if (!sta) - return -ENOMEM; - sta_info_put(sta); - - /* Remove STA entry for the old peer */ - sta = sta_info_get(local, sdata->u.wds.remote_addr); - if (sta) { - sta_info_put(sta); - sta_info_free(sta, 0); - } else { - printk(KERN_DEBUG "%s: could not find STA entry for WDS link " - "peer " MAC_FMT "\n", - dev->name, MAC_ARG(sdata->u.wds.remote_addr)); - } - - /* Update WDS link data */ - memcpy(&sdata->u.wds.remote_addr, remote_addr, ETH_ALEN); - - return 0; -} - -/* Must not be called for mdev and apdev */ -void ieee80211_if_setup(struct net_device *dev) -{ - ether_setup(dev); - dev->hard_start_xmit = ieee80211_subif_start_xmit; - dev->wireless_handlers = &ieee80211_iw_handler_def; - dev->set_multicast_list = ieee80211_set_multicast_list; - dev->change_mtu = ieee80211_change_mtu; - dev->get_stats = ieee80211_get_stats; - dev->open = ieee80211_open; - dev->stop = ieee80211_stop; - dev->uninit = ieee80211_if_reinit; - dev->destructor = ieee80211_if_free; -} - -void ieee80211_if_mgmt_setup(struct net_device *dev) -{ - ether_setup(dev); - dev->hard_start_xmit = ieee80211_mgmt_start_xmit; - dev->change_mtu = ieee80211_change_mtu_apdev; - dev->get_stats = ieee80211_get_stats; - dev->open = ieee80211_mgmt_open; - dev->stop = ieee80211_mgmt_stop; - dev->type = ARPHRD_IEEE80211_PRISM; - dev->hard_header_parse = header_parse_80211; - dev->uninit = ieee80211_if_reinit; - dev->destructor = ieee80211_if_free; -} - -int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, - const char *name) -{ - struct rate_control_ref *ref, *old; - - ASSERT_RTNL(); - if (local->open_count || netif_running(local->mdev) || - (local->apdev && netif_running(local->apdev))) - return -EBUSY; - - ref = rate_control_alloc(name, local); - if (!ref) { - printk(KERN_WARNING "%s: Failed to select rate control " - "algorithm\n", local->mdev->name); - return -ENOENT; - } - - old = local->rate_ctrl; - local->rate_ctrl = ref; - if (old) { - rate_control_put(old); - sta_info_flush(local, NULL); - } - - printk(KERN_DEBUG "%s: Selected rate control " - "algorithm '%s'\n", local->mdev->name, - ref->ops->name); - - - return 0; -} - -static void rate_control_deinitialize(struct ieee80211_local *local) -{ - struct rate_control_ref *ref; - - ref = local->rate_ctrl; - local->rate_ctrl = NULL; - rate_control_put(ref); -} - struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -4888,8 +937,12 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); BUG_ON(!ops->tx); + BUG_ON(!ops->start); + BUG_ON(!ops->stop); BUG_ON(!ops->config); BUG_ON(!ops->add_interface); + BUG_ON(!ops->remove_interface); + BUG_ON(!ops->configure_filter); local->ops = ops; /* for now, mdev needs sub_if_data :/ */ @@ -4919,17 +972,13 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->long_retry_limit = 4; local->hw.conf.radio_enabled = 1; - local->enabled_modes = (unsigned int) -1; + local->enabled_modes = ~0; INIT_LIST_HEAD(&local->modes_list); - rwlock_init(&local->sub_if_lock); - INIT_LIST_HEAD(&local->sub_if_list); + INIT_LIST_HEAD(&local->interfaces); INIT_DELAYED_WORK(&local->scan_work, ieee80211_sta_scan_work); - init_timer(&local->stat_timer); - local->stat_timer.function = ieee80211_stat_refresh; - local->stat_timer.data = (unsigned long) local; ieee80211_rx_bss_list_init(mdev); sta_info_init(local); @@ -4938,7 +987,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, mdev->open = ieee80211_master_open; mdev->stop = ieee80211_master_stop; mdev->type = ARPHRD_IEEE80211; - mdev->hard_header_parse = header_parse_80211; + mdev->header_ops = &ieee80211_header_ops; + mdev->set_multicast_list = ieee80211_master_set_multicast_list; sdata->type = IEEE80211_IF_TYPE_AP; sdata->dev = mdev; @@ -4946,7 +996,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, sdata->u.ap.force_unicast_rateidx = -1; sdata->u.ap.max_ratectrl_rateidx = -1; ieee80211_if_sdata_init(sdata); - list_add_tail(&sdata->list, &local->sub_if_list); + /* no RCU needed since we're still during init phase */ + list_add_tail(&sdata->list, &local->interfaces); tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, (unsigned long)local); @@ -5019,11 +1070,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_dev; ieee80211_debugfs_add_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev)); + ieee80211_if_set_type(local->mdev, IEEE80211_IF_TYPE_AP); result = ieee80211_init_rate_ctrl_alg(local, NULL); if (result < 0) { printk(KERN_DEBUG "%s: Failed to initialize rate control " - "algorithm\n", local->mdev->name); + "algorithm\n", wiphy_name(local->hw.wiphy)); goto fail_rate; } @@ -5031,7 +1083,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (result < 0) { printk(KERN_DEBUG "%s: Failed to initialize wep\n", - local->mdev->name); + wiphy_name(local->hw.wiphy)); goto fail_wep; } @@ -5042,7 +1094,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) IEEE80211_IF_TYPE_STA); if (result) printk(KERN_WARNING "%s: Failed to add default virtual iface\n", - local->mdev->name); + wiphy_name(local->hw.wiphy)); local->reg_state = IEEE80211_DEV_REGISTERED; rtnl_unlock(); @@ -5105,7 +1157,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata, *tmp; - struct list_head tmp_list; int i; tasklet_kill(&local->tx_pending_tasklet); @@ -5116,20 +1167,30 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) BUG_ON(local->reg_state != IEEE80211_DEV_REGISTERED); local->reg_state = IEEE80211_DEV_UNREGISTERED; - if (local->apdev) - ieee80211_if_del_mgmt(local); - write_lock_bh(&local->sub_if_lock); - list_replace_init(&local->sub_if_list, &tmp_list); - write_unlock_bh(&local->sub_if_lock); + /* + * At this point, interface list manipulations are fine + * because the driver cannot be handing us frames any + * more and the tasklet is killed. + */ - list_for_each_entry_safe(sdata, tmp, &tmp_list, list) + /* + * First, we remove all non-master interfaces. Do this because they + * may have bss pointer dependency on the master, and when we free + * the master these would be freed as well, breaking our list + * iteration completely. + */ + list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { + if (sdata->dev == local->mdev) + continue; + list_del(&sdata->list); __ieee80211_if_del(local, sdata); + } - rtnl_unlock(); + /* then, finally, remove the master interface */ + __ieee80211_if_del(local, IEEE80211_DEV_TO_SUB_IF(local->mdev)); - if (local->stat_time) - del_timer_sync(&local->stat_timer); + rtnl_unlock(); ieee80211_rx_bss_list_deinit(local->mdev); ieee80211_clear_tx_pending(local); @@ -5145,7 +1206,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) if (skb_queue_len(&local->skb_queue) || skb_queue_len(&local->skb_queue_unreliable)) printk(KERN_WARNING "%s: skb_queue not empty\n", - local->mdev->name); + wiphy_name(local->hw.wiphy)); skb_queue_purge(&local->skb_queue); skb_queue_purge(&local->skb_queue_unreliable); @@ -5165,72 +1226,6 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_free_hw); -void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue) -{ - struct ieee80211_local *local = hw_to_local(hw); - - if (test_and_clear_bit(IEEE80211_LINK_STATE_XOFF, - &local->state[queue])) { - if (test_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[queue])) - tasklet_schedule(&local->tx_pending_tasklet); - else - if (!ieee80211_qdisc_installed(local->mdev)) { - if (queue == 0) - netif_wake_queue(local->mdev); - } else - __netif_schedule(local->mdev); - } -} -EXPORT_SYMBOL(ieee80211_wake_queue); - -void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue) -{ - struct ieee80211_local *local = hw_to_local(hw); - - if (!ieee80211_qdisc_installed(local->mdev) && queue == 0) - netif_stop_queue(local->mdev); - set_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); -} -EXPORT_SYMBOL(ieee80211_stop_queue); - -void ieee80211_start_queues(struct ieee80211_hw *hw) -{ - struct ieee80211_local *local = hw_to_local(hw); - int i; - - for (i = 0; i < local->hw.queues; i++) - clear_bit(IEEE80211_LINK_STATE_XOFF, &local->state[i]); - if (!ieee80211_qdisc_installed(local->mdev)) - netif_start_queue(local->mdev); -} -EXPORT_SYMBOL(ieee80211_start_queues); - -void ieee80211_stop_queues(struct ieee80211_hw *hw) -{ - int i; - - for (i = 0; i < hw->queues; i++) - ieee80211_stop_queue(hw, i); -} -EXPORT_SYMBOL(ieee80211_stop_queues); - -void ieee80211_wake_queues(struct ieee80211_hw *hw) -{ - int i; - - for (i = 0; i < hw->queues; i++) - ieee80211_wake_queue(hw, i); -} -EXPORT_SYMBOL(ieee80211_wake_queues); - -struct net_device_stats *ieee80211_dev_stats(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - return &sdata->stats; -} - static int __init ieee80211_init(void) { struct sk_buff *skb; @@ -5251,7 +1246,6 @@ static int __init ieee80211_init(void) return 0; } - static void __exit ieee80211_exit(void) { ieee80211_wme_unregister(); diff --git a/net/mac80211/ieee80211_cfg.c b/net/mac80211/ieee80211_cfg.c deleted file mode 100644 index 509096edb324..000000000000 --- a/net/mac80211/ieee80211_cfg.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * mac80211 configuration hooks for cfg80211 - * - * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> - * - * This file is GPLv2 as found in COPYING. - */ - -#include <linux/nl80211.h> -#include <linux/rtnetlink.h> -#include <net/cfg80211.h> -#include "ieee80211_i.h" -#include "ieee80211_cfg.h" - -static int ieee80211_add_iface(struct wiphy *wiphy, char *name, - unsigned int type) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - int itype; - - if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) - return -ENODEV; - - switch (type) { - case NL80211_IFTYPE_UNSPECIFIED: - itype = IEEE80211_IF_TYPE_STA; - break; - case NL80211_IFTYPE_ADHOC: - itype = IEEE80211_IF_TYPE_IBSS; - break; - case NL80211_IFTYPE_STATION: - itype = IEEE80211_IF_TYPE_STA; - break; - case NL80211_IFTYPE_MONITOR: - itype = IEEE80211_IF_TYPE_MNTR; - break; - default: - return -EINVAL; - } - - return ieee80211_if_add(local->mdev, name, NULL, itype); -} - -static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - struct net_device *dev; - char *name; - - if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) - return -ENODEV; - - dev = dev_get_by_index(ifindex); - if (!dev) - return 0; - - name = dev->name; - dev_put(dev); - - return ieee80211_if_remove(local->mdev, name, -1); -} - -struct cfg80211_ops mac80211_config_ops = { - .add_virtual_intf = ieee80211_add_iface, - .del_virtual_intf = ieee80211_del_iface, -}; diff --git a/net/mac80211/ieee80211_common.h b/net/mac80211/ieee80211_common.h index 77c6afb7f6a8..c15295d43d87 100644 --- a/net/mac80211/ieee80211_common.h +++ b/net/mac80211/ieee80211_common.h @@ -48,13 +48,13 @@ enum ieee80211_msg_type { ieee80211_msg_tx_callback_ack = 1, ieee80211_msg_tx_callback_fail = 2, /* hole at 3, was ieee80211_msg_passive_scan but unused */ - ieee80211_msg_wep_frame_unknown_key = 4, + /* hole at 4, was ieee80211_msg_wep_frame_unknown_key but now unused */ ieee80211_msg_michael_mic_failure = 5, /* hole at 6, was monitor but never sent to userspace */ ieee80211_msg_sta_not_assoc = 7, /* 8 was ieee80211_msg_set_aid_for_sta */ - ieee80211_msg_key_threshold_notification = 9, - ieee80211_msg_radar = 11, + /* 9 was ieee80211_msg_key_threshold_notification */ + /* 11 was ieee80211_msg_radar */ }; struct ieee80211_msg_key_notification { @@ -73,8 +73,6 @@ enum ieee80211_phytype { ieee80211_phytype_ofdm_dot11_g = 6, ieee80211_phytype_pbcc_dot11_g = 7, ieee80211_phytype_ofdm_dot11_a = 8, - ieee80211_phytype_dsss_dot11_turbog = 255, - ieee80211_phytype_dsss_dot11_turbo = 256, }; enum ieee80211_ssi_type { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6f7bae7ef9c0..d34a9deca67a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -21,6 +21,7 @@ #include <linux/workqueue.h> #include <linux/types.h> #include <linux/spinlock.h> +#include <linux/etherdevice.h> #include <net/wireless.h> #include "ieee80211_key.h" #include "sta_info.h" @@ -112,6 +113,16 @@ typedef enum { TXRX_CONTINUE, TXRX_DROP, TXRX_QUEUED } ieee80211_txrx_result; +/* flags used in struct ieee80211_txrx_data.flags */ +/* whether the MSDU was fragmented */ +#define IEEE80211_TXRXD_FRAGMENTED BIT(0) +#define IEEE80211_TXRXD_TXUNICAST BIT(1) +#define IEEE80211_TXRXD_TXPS_BUFFERED BIT(2) +#define IEEE80211_TXRXD_TXPROBE_LAST_FRAG BIT(3) +#define IEEE80211_TXRXD_RXIN_SCAN BIT(4) +/* frame is destined to interface currently processed (incl. multicast frames) */ +#define IEEE80211_TXRXD_RXRA_MATCH BIT(5) +#define IEEE80211_TXRXD_TX_INJECTED BIT(6) struct ieee80211_txrx_data { struct sk_buff *skb; struct net_device *dev; @@ -120,14 +131,10 @@ struct ieee80211_txrx_data { struct sta_info *sta; u16 fc, ethertype; struct ieee80211_key *key; - unsigned int fragmented:1; /* whether the MSDU was fragmented */ + unsigned int flags; union { struct { struct ieee80211_tx_control *control; - unsigned int unicast:1; - unsigned int ps_buffered:1; - unsigned int short_preamble:1; - unsigned int probe_last_frag:1; struct ieee80211_hw_mode *mode; struct ieee80211_rate *rate; /* use this rate (if set) for last fragment; rate can @@ -135,7 +142,6 @@ struct ieee80211_txrx_data { * when using CTS protection with IEEE 802.11g. */ struct ieee80211_rate *last_frag_rate; int last_frag_hwrate; - int mgmt_interface; /* Extra fragments (in addition to the first fragment * in skb) */ @@ -147,23 +153,22 @@ struct ieee80211_txrx_data { int sent_ps_buffered; int queue; int load; - unsigned int in_scan:1; - /* frame is destined to interface currently processed - * (including multicast frames) */ - unsigned int ra_match:1; + u32 tkip_iv32; + u16 tkip_iv16; } rx; } u; }; +/* flags used in struct ieee80211_tx_packet_data.flags */ +#define IEEE80211_TXPD_REQ_TX_STATUS BIT(0) +#define IEEE80211_TXPD_DO_NOT_ENCRYPT BIT(1) +#define IEEE80211_TXPD_REQUEUE BIT(2) /* Stored in sk_buff->cb */ struct ieee80211_tx_packet_data { int ifindex; unsigned long jiffies; - unsigned int req_tx_status:1; - unsigned int do_not_encrypt:1; - unsigned int requeue:1; - unsigned int mgmt_iface:1; - unsigned int queue:4; + unsigned int flags; + u8 queue; }; struct ieee80211_tx_stored_packet { @@ -174,7 +179,7 @@ struct ieee80211_tx_stored_packet { int last_frag_rateidx; int last_frag_hwrate; struct ieee80211_rate *last_frag_rate; - unsigned int last_frag_rate_ctrl_probe:1; + unsigned int last_frag_rate_ctrl_probe; }; typedef ieee80211_txrx_result (*ieee80211_tx_handler) @@ -187,10 +192,10 @@ struct ieee80211_if_ap { u8 *beacon_head, *beacon_tail; int beacon_head_len, beacon_tail_len; + struct list_head vlans; + u8 ssid[IEEE80211_MAX_SSID_LEN]; size_t ssid_len; - u8 *generic_elem; - size_t generic_elem_len; /* yes, this looks ugly, but guarantees that we can later use * bitmap_empty :) @@ -210,9 +215,23 @@ struct ieee80211_if_wds { }; struct ieee80211_if_vlan { - u8 id; + struct ieee80211_sub_if_data *ap; + struct list_head list; }; +/* flags used in struct ieee80211_if_sta.flags */ +#define IEEE80211_STA_SSID_SET BIT(0) +#define IEEE80211_STA_BSSID_SET BIT(1) +#define IEEE80211_STA_PREV_BSSID_SET BIT(2) +#define IEEE80211_STA_AUTHENTICATED BIT(3) +#define IEEE80211_STA_ASSOCIATED BIT(4) +#define IEEE80211_STA_PROBEREQ_POLL BIT(5) +#define IEEE80211_STA_CREATE_IBSS BIT(6) +#define IEEE80211_STA_MIXED_CELL BIT(7) +#define IEEE80211_STA_WMM_ENABLED BIT(8) +#define IEEE80211_STA_AUTO_SSID_SEL BIT(10) +#define IEEE80211_STA_AUTO_BSSID_SEL BIT(11) +#define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12) struct ieee80211_if_sta { enum { IEEE80211_DISABLED, IEEE80211_AUTHENTICATE, @@ -235,25 +254,14 @@ struct ieee80211_if_sta { int auth_tries, assoc_tries; - unsigned int ssid_set:1; - unsigned int bssid_set:1; - unsigned int prev_bssid_set:1; - unsigned int authenticated:1; - unsigned int associated:1; - unsigned int probereq_poll:1; - unsigned int create_ibss:1; - unsigned int mixed_cell:1; - unsigned int wmm_enabled:1; - unsigned int auto_ssid_sel:1; - unsigned int auto_bssid_sel:1; - unsigned int auto_channel_sel:1; + unsigned int flags; #define IEEE80211_STA_REQ_SCAN 0 #define IEEE80211_STA_REQ_AUTH 1 #define IEEE80211_STA_REQ_RUN 2 unsigned long request; struct sk_buff_head skb_queue; - int key_mgmt; + int key_management_enabled; unsigned long last_probe; #define IEEE80211_AUTH_ALG_OPEN BIT(0) @@ -271,21 +279,29 @@ struct ieee80211_if_sta { }; +/* flags used in struct ieee80211_sub_if_data.flags */ +#define IEEE80211_SDATA_ALLMULTI BIT(0) +#define IEEE80211_SDATA_PROMISC BIT(1) +#define IEEE80211_SDATA_USE_PROTECTION BIT(2) /* CTS protect ERP frames */ +/* use short preamble with IEEE 802.11b: this flag is set when the AP or beacon + * generator reports that there are no present stations that cannot support short + * preambles */ +#define IEEE80211_SDATA_SHORT_PREAMBLE BIT(3) +#define IEEE80211_SDATA_USERSPACE_MLME BIT(4) struct ieee80211_sub_if_data { struct list_head list; - unsigned int type; + enum ieee80211_if_types type; struct wireless_dev wdev; + /* keys */ + struct list_head key_list; + struct net_device *dev; struct ieee80211_local *local; - int mc_count; - unsigned int allmulti:1; - unsigned int promisc:1; - unsigned int use_protection:1; /* CTS protect ERP frames */ + unsigned int flags; - struct net_device_stats stats; int drop_unencrypted; int eapol; /* 0 = process EAPOL frames as normal data frames, * 1 = send EAPOL frames through wlan#ap to hostapd @@ -364,7 +380,6 @@ struct ieee80211_sub_if_data { struct dentry *drop_unencrypted; struct dentry *eapol; struct dentry *ieee8021_x; - struct dentry *vlan_id; } vlan; struct { struct dentry *mode; @@ -393,9 +408,9 @@ struct ieee80211_local { struct list_head modes_list; struct net_device *mdev; /* wmaster# - "master" 802.11 device */ - struct net_device *apdev; /* wlan#ap - management frames (hostapd) */ int open_count; int monitors; + unsigned int filter_flags; /* FIF_* */ struct iw_statistics wstats; u8 wstats_flags; int tx_headroom; /* required headroom for hardware/radiotap */ @@ -416,10 +431,9 @@ struct ieee80211_local { struct sk_buff_head skb_queue_unreliable; /* Station data structures */ - spinlock_t sta_lock; /* mutex for STA data structures */ + rwlock_t sta_lock; /* protects STA data structures */ int num_sta; /* number of stations in sta_list */ struct list_head sta_list; - struct list_head deleted_sta_list; struct sta_info *sta_hash[STA_HASH_SIZE]; struct timer_list sta_cleanup; @@ -427,17 +441,11 @@ struct ieee80211_local { struct ieee80211_tx_stored_packet pending_packet[NUM_TX_DATA_QUEUES]; struct tasklet_struct tx_pending_tasklet; - int mc_count; /* total count of multicast entries in all interfaces */ - int iff_allmultis, iff_promiscs; - /* number of interfaces with corresponding IFF_ flags */ + /* number of interfaces with corresponding IFF_ flags */ + atomic_t iff_allmultis, iff_promiscs; struct rate_control_ref *rate_ctrl; - int next_mode; /* MODE_IEEE80211* - * The mode preference for next channel change. This is - * used to select .11g vs. .11b channels (or 4.9 GHz vs. - * .11a) when the channel number is not unique. */ - /* Supported and basic rate filters for different modes. These are * pointers to -1 terminated lists and rates in 100 kbps units. */ int *supp_rates[NUM_IEEE80211_MODES]; @@ -447,14 +455,10 @@ struct ieee80211_local { int fragmentation_threshold; int short_retry_limit; /* dot11ShortRetryLimit */ int long_retry_limit; /* dot11LongRetryLimit */ - int short_preamble; /* use short preamble with IEEE 802.11b */ struct crypto_blkcipher *wep_tx_tfm; struct crypto_blkcipher *wep_rx_tfm; u32 wep_iv; - int key_tx_rx_threshold; /* number of times any key can be used in TX - * or RX before generating a rekey - * notification; 0 = notification disabled. */ int bridge_packets; /* bridge packets between associated stations and * deliver multicast frames both back to wireless @@ -464,9 +468,8 @@ struct ieee80211_local { ieee80211_rx_handler *rx_handlers; ieee80211_tx_handler *tx_handlers; - rwlock_t sub_if_lock; /* Protects sub_if_list. Cannot be taken under - * sta_bss_lock or sta_lock. */ - struct list_head sub_if_list; + struct list_head interfaces; + int sta_scanning; int scan_channel_idx; enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state; @@ -500,25 +503,17 @@ struct ieee80211_local { #ifdef CONFIG_MAC80211_LEDS int tx_led_counter, rx_led_counter; - struct led_trigger *tx_led, *rx_led; - char tx_led_name[32], rx_led_name[32]; + struct led_trigger *tx_led, *rx_led, *assoc_led; + char tx_led_name[32], rx_led_name[32], assoc_led_name[32]; #endif u32 channel_use; u32 channel_use_raw; - u32 stat_time; - struct timer_list stat_timer; #ifdef CONFIG_MAC80211_DEBUGFS struct work_struct sta_debugfs_add; #endif - enum { - STA_ANTENNA_SEL_AUTO = 0, - STA_ANTENNA_SEL_SW_CTRL = 1, - STA_ANTENNA_SEL_SW_CTRL_DEBUG = 2 - } sta_antenna_sel; - #ifdef CONFIG_MAC80211_DEBUG_COUNTERS /* TX/RX handler statistics */ unsigned int tx_handlers_drop; @@ -548,16 +543,9 @@ struct ieee80211_local { #endif /* CONFIG_MAC80211_DEBUG_COUNTERS */ - int default_wep_only; /* only default WEP keys are used with this - * interface; this is used to decide when hwaccel - * can be used with default keys */ int total_ps_buffered; /* total number of all buffered unicast and * multicast packets for power saving stations */ - int allow_broadcast_always; /* whether to allow TX of broadcast frames - * even when there are no associated STAs - */ - int wifi_wme_noack_test; unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */ @@ -566,17 +554,13 @@ struct ieee80211_local { unsigned int hw_modes; /* bitfield of supported hardware modes; * (1 << MODE_*) */ - int user_space_mlme; - #ifdef CONFIG_MAC80211_DEBUGFS struct local_debugfsdentries { struct dentry *channel; struct dentry *frequency; - struct dentry *radar_detect; struct dentry *antenna_sel_tx; struct dentry *antenna_sel_rx; struct dentry *bridge_packets; - struct dentry *key_tx_rx_threshold; struct dentry *rts_threshold; struct dentry *fragmentation_threshold; struct dentry *short_retry_limit; @@ -584,7 +568,6 @@ struct ieee80211_local { struct dentry *total_ps_buffered; struct dentry *mode; struct dentry *wep_iv; - struct dentry *tx_power_reduction; struct dentry *modes; struct dentry *statistics; struct local_debugfsdentries_statsdentries { @@ -656,38 +639,38 @@ struct sta_attribute { ssize_t (*store)(struct sta_info *, const char *buf, size_t count); }; -static inline void __bss_tim_set(struct ieee80211_if_ap *bss, int aid) +static inline void __bss_tim_set(struct ieee80211_if_ap *bss, u16 aid) { /* - * This format has ben mandated by the IEEE specifications, + * This format has been mandated by the IEEE specifications, * so this line may not be changed to use the __set_bit() format. */ - bss->tim[(aid)/8] |= 1<<((aid) % 8); + bss->tim[aid / 8] |= (1 << (aid % 8)); } static inline void bss_tim_set(struct ieee80211_local *local, - struct ieee80211_if_ap *bss, int aid) + struct ieee80211_if_ap *bss, u16 aid) { - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); __bss_tim_set(bss, aid); - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); } -static inline void __bss_tim_clear(struct ieee80211_if_ap *bss, int aid) +static inline void __bss_tim_clear(struct ieee80211_if_ap *bss, u16 aid) { /* - * This format has ben mandated by the IEEE specifications, + * This format has been mandated by the IEEE specifications, * so this line may not be changed to use the __clear_bit() format. */ - bss->tim[(aid)/8] &= !(1<<((aid) % 8)); + bss->tim[aid / 8] &= ~(1 << (aid % 8)); } static inline void bss_tim_clear(struct ieee80211_local *local, - struct ieee80211_if_ap *bss, int aid) + struct ieee80211_if_ap *bss, u16 aid) { - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); __bss_tim_clear(bss, aid); - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); } /** @@ -707,35 +690,28 @@ static inline int ieee80211_is_erp_rate(int phymode, int rate) return 0; } +static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) +{ + return compare_ether_addr(raddr, addr) == 0 || + is_broadcast_ether_addr(raddr); +} + + /* ieee80211.c */ int ieee80211_hw_config(struct ieee80211_local *local); int ieee80211_if_config(struct net_device *dev); int ieee80211_if_config_beacon(struct net_device *dev); -struct ieee80211_key_conf * -ieee80211_key_data2conf(struct ieee80211_local *local, - const struct ieee80211_key *data); -struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata, - int idx, size_t key_len, gfp_t flags); -void ieee80211_key_free(struct ieee80211_key *key); -void ieee80211_rx_mgmt(struct ieee80211_local *local, struct sk_buff *skb, - struct ieee80211_rx_status *status, u32 msg_type); void ieee80211_prepare_rates(struct ieee80211_local *local, struct ieee80211_hw_mode *mode); void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx); int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr); -int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); -int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); void ieee80211_if_setup(struct net_device *dev); -void ieee80211_if_mgmt_setup(struct net_device *dev); -int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, - const char *name); -struct net_device_stats *ieee80211_dev_stats(struct net_device *dev); +struct ieee80211_rate *ieee80211_get_rate(struct ieee80211_local *local, + int phymode, int hwrate); /* ieee80211_ioctl.c */ extern const struct iw_handler_def ieee80211_iw_handler_def; -void ieee80211_update_default_wep_only(struct ieee80211_local *local); - /* Least common multiple of the used rates (in 100 kbps). This is used to * calculate rate_inv values for each rate so that only integers are needed. */ @@ -783,6 +759,8 @@ struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev, u8 *addr); int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason); int ieee80211_sta_disassociate(struct net_device *dev, u16 reason); +void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes); +void ieee80211_reset_erp_info(struct net_device *dev); /* ieee80211_iface.c */ int ieee80211_if_add(struct net_device *dev, const char *name, @@ -794,14 +772,32 @@ void __ieee80211_if_del(struct ieee80211_local *local, int ieee80211_if_remove(struct net_device *dev, const char *name, int id); void ieee80211_if_free(struct net_device *dev); void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata); -int ieee80211_if_add_mgmt(struct ieee80211_local *local); -void ieee80211_if_del_mgmt(struct ieee80211_local *local); /* regdomain.c */ void ieee80211_regdomain_init(void); void ieee80211_set_default_regdomain(struct ieee80211_hw_mode *mode); -/* for wiphy privid */ -extern void *mac80211_wiphy_privid; +/* rx handling */ +extern ieee80211_rx_handler ieee80211_rx_pre_handlers[]; +extern ieee80211_rx_handler ieee80211_rx_handlers[]; + +/* tx handling */ +extern ieee80211_tx_handler ieee80211_tx_handlers[]; +void ieee80211_clear_tx_pending(struct ieee80211_local *local); +void ieee80211_tx_pending(unsigned long data); +int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev); +int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); +int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); + +/* utility functions/constants */ +extern void *mac80211_wiphy_privid; /* for wiphy privid */ +extern const unsigned char rfc1042_header[6]; +extern const unsigned char bridge_tunnel_header[6]; +u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len); +int ieee80211_is_eapol(const struct sk_buff *skb); +int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, + int rate, int erp, int short_preamble); +void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, + struct ieee80211_hdr *hdr); #endif /* IEEE80211_I_H */ diff --git a/net/mac80211/ieee80211_iface.c b/net/mac80211/ieee80211_iface.c index 8532a5ccdd1e..43e505d29452 100644 --- a/net/mac80211/ieee80211_iface.c +++ b/net/mac80211/ieee80211_iface.c @@ -25,6 +25,8 @@ void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata) sdata->eapol = 1; for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) skb_queue_head_init(&sdata->fragments[i].skb_list); + + INIT_LIST_HEAD(&sdata->key_list); } static void ieee80211_if_sdata_deinit(struct ieee80211_sub_if_data *sdata) @@ -77,18 +79,15 @@ int ieee80211_if_add(struct net_device *dev, const char *name, ieee80211_debugfs_add_netdev(sdata); ieee80211_if_set_type(ndev, type); - write_lock_bh(&local->sub_if_lock); + /* we're under RTNL so all this is fine */ if (unlikely(local->reg_state == IEEE80211_DEV_UNREGISTERED)) { - write_unlock_bh(&local->sub_if_lock); __ieee80211_if_del(local, sdata); return -ENODEV; } - list_add(&sdata->list, &local->sub_if_list); + list_add_tail_rcu(&sdata->list, &local->interfaces); + if (new_dev) *new_dev = ndev; - write_unlock_bh(&local->sub_if_lock); - - ieee80211_update_default_wep_only(local); return 0; @@ -97,74 +96,35 @@ fail: return ret; } -int ieee80211_if_add_mgmt(struct ieee80211_local *local) -{ - struct net_device *ndev; - struct ieee80211_sub_if_data *nsdata; - int ret; - - ASSERT_RTNL(); - - ndev = alloc_netdev(sizeof(struct ieee80211_sub_if_data), "wmgmt%d", - ieee80211_if_mgmt_setup); - if (!ndev) - return -ENOMEM; - ret = dev_alloc_name(ndev, ndev->name); - if (ret < 0) - goto fail; - - memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); - SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); - - nsdata = IEEE80211_DEV_TO_SUB_IF(ndev); - ndev->ieee80211_ptr = &nsdata->wdev; - nsdata->wdev.wiphy = local->hw.wiphy; - nsdata->type = IEEE80211_IF_TYPE_MGMT; - nsdata->dev = ndev; - nsdata->local = local; - ieee80211_if_sdata_init(nsdata); - - ret = register_netdevice(ndev); - if (ret) - goto fail; - - ieee80211_debugfs_add_netdev(nsdata); - - if (local->open_count > 0) - dev_open(ndev); - local->apdev = ndev; - return 0; - -fail: - free_netdev(ndev); - return ret; -} - -void ieee80211_if_del_mgmt(struct ieee80211_local *local) -{ - struct net_device *apdev; - - ASSERT_RTNL(); - apdev = local->apdev; - ieee80211_debugfs_remove_netdev(IEEE80211_DEV_TO_SUB_IF(apdev)); - local->apdev = NULL; - unregister_netdevice(apdev); -} - void ieee80211_if_set_type(struct net_device *dev, int type) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int oldtype = sdata->type; - dev->hard_start_xmit = ieee80211_subif_start_xmit; - + /* + * We need to call this function on the master interface + * which already has a hard_start_xmit routine assigned + * which must not be changed. + */ + if (dev != sdata->local->mdev) + dev->hard_start_xmit = ieee80211_subif_start_xmit; + + /* + * Called even when register_netdevice fails, it would + * oops if assigned before initialising the rest. + */ + dev->uninit = ieee80211_if_reinit; + + /* most have no BSS pointer */ + sdata->bss = NULL; sdata->type = type; + switch (type) { case IEEE80211_IF_TYPE_WDS: - sdata->bss = NULL; + /* nothing special */ break; case IEEE80211_IF_TYPE_VLAN: + sdata->u.vlan.ap = NULL; break; case IEEE80211_IF_TYPE_AP: sdata->u.ap.dtim_period = 2; @@ -172,6 +132,7 @@ void ieee80211_if_set_type(struct net_device *dev, int type) sdata->u.ap.max_ratectrl_rateidx = -1; skb_queue_head_init(&sdata->u.ap.ps_bc_buf); sdata->bss = &sdata->u.ap; + INIT_LIST_HEAD(&sdata->u.ap.vlans); break; case IEEE80211_IF_TYPE_STA: case IEEE80211_IF_TYPE_IBSS: { @@ -187,10 +148,10 @@ void ieee80211_if_set_type(struct net_device *dev, int type) ifsta->capab = WLAN_CAPABILITY_ESS; ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN | IEEE80211_AUTH_ALG_SHARED_KEY; - ifsta->create_ibss = 1; - ifsta->wmm_enabled = 1; - ifsta->auto_channel_sel = 1; - ifsta->auto_bssid_sel = 1; + ifsta->flags |= IEEE80211_STA_CREATE_IBSS | + IEEE80211_STA_WMM_ENABLED | + IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL; msdata = IEEE80211_DEV_TO_SUB_IF(sdata->local->mdev); sdata->bss = &msdata->u.ap; @@ -205,7 +166,6 @@ void ieee80211_if_set_type(struct net_device *dev, int type) dev->name, __FUNCTION__, type); } ieee80211_debugfs_change_if_type(sdata, oldtype); - ieee80211_update_default_wep_only(local); } /* Must be called with rtnl lock held. */ @@ -214,57 +174,46 @@ void ieee80211_if_reinit(struct net_device *dev) struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; - int i; + struct sk_buff *skb; ASSERT_RTNL(); + + ieee80211_free_keys(sdata); + ieee80211_if_sdata_deinit(sdata); - for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - if (!sdata->keys[i]) - continue; -#if 0 - /* The interface is down at the moment, so there is not - * really much point in disabling the keys at this point. */ - memset(addr, 0xff, ETH_ALEN); - if (local->ops->set_key) - local->ops->set_key(local_to_hw(local), DISABLE_KEY, addr, - local->keys[i], 0); -#endif - ieee80211_key_free(sdata->keys[i]); - sdata->keys[i] = NULL; - } switch (sdata->type) { + case IEEE80211_IF_TYPE_INVALID: + /* cannot happen */ + WARN_ON(1); + break; case IEEE80211_IF_TYPE_AP: { /* Remove all virtual interfaces that use this BSS * as their sdata->bss */ struct ieee80211_sub_if_data *tsdata, *n; - LIST_HEAD(tmp_list); - write_lock_bh(&local->sub_if_lock); - list_for_each_entry_safe(tsdata, n, &local->sub_if_list, list) { + list_for_each_entry_safe(tsdata, n, &local->interfaces, list) { if (tsdata != sdata && tsdata->bss == &sdata->u.ap) { printk(KERN_DEBUG "%s: removing virtual " "interface %s because its BSS interface" " is being removed\n", sdata->dev->name, tsdata->dev->name); - list_move_tail(&tsdata->list, &tmp_list); + list_del_rcu(&tsdata->list); + /* + * We have lots of time and can afford + * to sync for each interface + */ + synchronize_rcu(); + __ieee80211_if_del(local, tsdata); } } - write_unlock_bh(&local->sub_if_lock); - - list_for_each_entry_safe(tsdata, n, &tmp_list, list) - __ieee80211_if_del(local, tsdata); kfree(sdata->u.ap.beacon_head); kfree(sdata->u.ap.beacon_tail); - kfree(sdata->u.ap.generic_elem); - if (dev != local->mdev) { - struct sk_buff *skb; - while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) { - local->total_ps_buffered--; - dev_kfree_skb(skb); - } + while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) { + local->total_ps_buffered--; + dev_kfree_skb(skb); } break; @@ -272,8 +221,8 @@ void ieee80211_if_reinit(struct net_device *dev) case IEEE80211_IF_TYPE_WDS: sta = sta_info_get(local, sdata->u.wds.remote_addr); if (sta) { + sta_info_free(sta); sta_info_put(sta); - sta_info_free(sta, 0); } else { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Someone had deleted my STA " @@ -298,6 +247,9 @@ void ieee80211_if_reinit(struct net_device *dev) case IEEE80211_IF_TYPE_MNTR: dev->type = ARPHRD_ETHER; break; + case IEEE80211_IF_TYPE_VLAN: + sdata->u.vlan.ap = NULL; + break; } /* remove all STAs that are bound to this virtual interface */ @@ -327,29 +279,23 @@ int ieee80211_if_remove(struct net_device *dev, const char *name, int id) ASSERT_RTNL(); - write_lock_bh(&local->sub_if_lock); - list_for_each_entry_safe(sdata, n, &local->sub_if_list, list) { + list_for_each_entry_safe(sdata, n, &local->interfaces, list) { if ((sdata->type == id || id == -1) && strcmp(name, sdata->dev->name) == 0 && sdata->dev != local->mdev) { - list_del(&sdata->list); - write_unlock_bh(&local->sub_if_lock); + list_del_rcu(&sdata->list); + synchronize_rcu(); __ieee80211_if_del(local, sdata); - ieee80211_update_default_wep_only(local); return 0; } } - write_unlock_bh(&local->sub_if_lock); return -ENODEV; } void ieee80211_if_free(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - /* local->apdev must be NULL when freeing management interface */ - BUG_ON(dev == local->apdev); ieee80211_if_sdata_deinit(sdata); free_netdev(dev); } diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index e7904db55325..f0224c2311d2 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -21,77 +21,41 @@ #include <net/mac80211.h> #include "ieee80211_i.h" -#include "hostapd_ioctl.h" #include "ieee80211_rate.h" #include "wpa.h" #include "aes_ccm.h" -#include "debugfs_key.h" - -static void ieee80211_set_hw_encryption(struct net_device *dev, - struct sta_info *sta, u8 addr[ETH_ALEN], - struct ieee80211_key *key) -{ - struct ieee80211_key_conf *keyconf = NULL; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - /* default to sw encryption; this will be cleared by low-level - * driver if the hw supports requested encryption */ - if (key) - key->force_sw_encrypt = 1; - - if (key && local->ops->set_key && - (keyconf = ieee80211_key_data2conf(local, key))) { - if (local->ops->set_key(local_to_hw(local), SET_KEY, addr, - keyconf, sta ? sta->aid : 0)) { - key->force_sw_encrypt = 1; - key->hw_key_idx = HW_KEY_IDX_INVALID; - } else { - key->force_sw_encrypt = - !!(keyconf->flags & IEEE80211_KEY_FORCE_SW_ENCRYPT); - key->hw_key_idx = - keyconf->hw_key_idx; - - } - } - kfree(keyconf); -} static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, - int idx, int alg, int set_tx_key, - const u8 *_key, size_t key_len) + int idx, int alg, int remove, + int set_tx_key, const u8 *_key, + size_t key_len) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int ret = 0; struct sta_info *sta; - struct ieee80211_key *key, *old_key; - int try_hwaccel = 1; - struct ieee80211_key_conf *keyconf; + struct ieee80211_key *key; struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (idx < 0 || idx >= NUM_DEFAULT_KEYS) { + printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n", + dev->name, idx); + return -EINVAL; + } + if (is_broadcast_ether_addr(sta_addr)) { sta = NULL; - if (idx >= NUM_DEFAULT_KEYS) { - printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n", - dev->name, idx); - return -EINVAL; - } key = sdata->keys[idx]; - - /* TODO: consider adding hwaccel support for these; at least - * Atheros key cache should be able to handle this since AP is - * only transmitting frames with default keys. */ - /* FIX: hw key cache can be used when only one virtual - * STA is associated with each AP. If more than one STA - * is associated to the same AP, software encryption - * must be used. This should be done automatically - * based on configured station devices. For the time - * being, this can be only set at compile time. */ } else { set_tx_key = 0; - if (idx != 0) { + /* + * According to the standard, the key index of a pairwise + * key must be zero. However, some AP are broken when it + * comes to WEP key indices, so we work around this. + */ + if (idx != 0 && alg != ALG_WEP) { printk(KERN_DEBUG "%s: set_encrypt - non-zero idx for " "individual key\n", dev->name); return -EINVAL; @@ -100,9 +64,10 @@ static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, sta = sta_info_get(local, sta_addr); if (!sta) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG + DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "%s: set_encrypt - unknown addr " - MAC_FMT "\n", - dev->name, MAC_ARG(sta_addr)); + "%s\n", + dev->name, print_mac(mac, sta_addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ return -ENOENT; @@ -111,144 +76,25 @@ static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, key = sta->key; } - /* FIX: - * Cannot configure default hwaccel keys with WEP algorithm, if - * any of the virtual interfaces is using static WEP - * configuration because hwaccel would otherwise try to decrypt - * these frames. - * - * For now, just disable WEP hwaccel for broadcast when there is - * possibility of conflict with default keys. This can maybe later be - * optimized by using non-default keys (at least with Atheros ar521x). - */ - if (!sta && alg == ALG_WEP && !local->default_wep_only && - sdata->type != IEEE80211_IF_TYPE_IBSS && - sdata->type != IEEE80211_IF_TYPE_AP) { - try_hwaccel = 0; - } - - if (local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) { - /* Software encryption cannot be used with devices that hide - * encryption from the host system, so always try to use - * hardware acceleration with such devices. */ - try_hwaccel = 1; - } - - if ((local->hw.flags & IEEE80211_HW_NO_TKIP_WMM_HWACCEL) && - alg == ALG_TKIP) { - if (sta && (sta->flags & WLAN_STA_WME)) { - /* Hardware does not support hwaccel with TKIP when using WMM. - */ - try_hwaccel = 0; - } - else if (sdata->type == IEEE80211_IF_TYPE_STA) { - sta = sta_info_get(local, sdata->u.sta.bssid); - if (sta) { - if (sta->flags & WLAN_STA_WME) { - try_hwaccel = 0; - } - sta_info_put(sta); - sta = NULL; - } - } - } - - if (alg == ALG_NONE) { - keyconf = NULL; - if (try_hwaccel && key && - key->hw_key_idx != HW_KEY_IDX_INVALID && - local->ops->set_key && - (keyconf = ieee80211_key_data2conf(local, key)) != NULL && - local->ops->set_key(local_to_hw(local), DISABLE_KEY, - sta_addr, keyconf, sta ? sta->aid : 0)) { - printk(KERN_DEBUG "%s: set_encrypt - low-level disable" - " failed\n", dev->name); - ret = -EINVAL; - } - kfree(keyconf); - - if (set_tx_key || sdata->default_key == key) { - ieee80211_debugfs_key_remove_default(sdata); - sdata->default_key = NULL; - } - ieee80211_debugfs_key_remove(key); - if (sta) - sta->key = NULL; - else - sdata->keys[idx] = NULL; + if (remove) { ieee80211_key_free(key); key = NULL; } else { - old_key = key; - key = ieee80211_key_alloc(sta ? NULL : sdata, idx, key_len, - GFP_KERNEL); + /* + * Automatically frees any old key if present. + */ + key = ieee80211_key_alloc(sdata, sta, alg, idx, key_len, _key); if (!key) { ret = -ENOMEM; goto err_out; } - - /* default to sw encryption; low-level driver sets these if the - * requested encryption is supported */ - key->hw_key_idx = HW_KEY_IDX_INVALID; - key->force_sw_encrypt = 1; - - key->alg = alg; - key->keyidx = idx; - key->keylen = key_len; - memcpy(key->key, _key, key_len); - if (set_tx_key) - key->default_tx_key = 1; - - if (alg == ALG_CCMP) { - /* Initialize AES key state here as an optimization - * so that it does not need to be initialized for every - * packet. */ - key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt( - key->key); - if (!key->u.ccmp.tfm) { - ret = -ENOMEM; - goto err_free; - } - } - - if (set_tx_key || sdata->default_key == old_key) { - ieee80211_debugfs_key_remove_default(sdata); - sdata->default_key = NULL; - } - ieee80211_debugfs_key_remove(old_key); - if (sta) - sta->key = key; - else - sdata->keys[idx] = key; - ieee80211_key_free(old_key); - ieee80211_debugfs_key_add(local, key); - if (sta) - ieee80211_debugfs_key_sta_link(key, sta); - - if (try_hwaccel && - (alg == ALG_WEP || alg == ALG_TKIP || alg == ALG_CCMP)) - ieee80211_set_hw_encryption(dev, sta, sta_addr, key); } - if (set_tx_key || (!sta && !sdata->default_key && key)) { - sdata->default_key = key; - if (key) - ieee80211_debugfs_key_add_default(sdata); - - if (local->ops->set_key_idx && - local->ops->set_key_idx(local_to_hw(local), idx)) - printk(KERN_DEBUG "%s: failed to set TX key idx for " - "low-level driver\n", dev->name); - } + if (set_tx_key || (!sta && !sdata->default_key && key)) + ieee80211_set_default_key(sdata, idx); - if (sta) - sta_info_put(sta); - - return 0; - -err_free: - ieee80211_key_free(key); -err_out: + ret = 0; + err_out: if (sta) sta_info_put(sta); return ret; @@ -259,44 +105,25 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, struct iw_point *data, char *extra) { struct ieee80211_sub_if_data *sdata; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (local->user_space_mlme) + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type == IEEE80211_IF_TYPE_STA || sdata->type == IEEE80211_IF_TYPE_IBSS) { int ret = ieee80211_sta_set_extra_ie(dev, extra, data->length); if (ret) return ret; - sdata->u.sta.auto_bssid_sel = 0; + sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; ieee80211_sta_req_auth(dev, &sdata->u.sta); return 0; } - if (sdata->type == IEEE80211_IF_TYPE_AP) { - kfree(sdata->u.ap.generic_elem); - sdata->u.ap.generic_elem = kmalloc(data->length, GFP_KERNEL); - if (!sdata->u.ap.generic_elem) - return -ENOMEM; - memcpy(sdata->u.ap.generic_elem, extra, data->length); - sdata->u.ap.generic_elem_len = data->length; - return ieee80211_if_config(dev); - } return -EOPNOTSUPP; } -static int ieee80211_ioctl_set_radio_enabled(struct net_device *dev, - int val) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_conf *conf = &local->hw.conf; - - conf->radio_enabled = val; - return ieee80211_hw_config(wdev_priv(dev->ieee80211_ptr)); -} - static int ieee80211_ioctl_giwname(struct net_device *dev, struct iw_request_info *info, char *name, char *extra) @@ -313,9 +140,6 @@ static int ieee80211_ioctl_giwname(struct net_device *dev, case MODE_IEEE80211G: strcpy(name, "IEEE 802.11g"); break; - case MODE_ATHEROS_TURBO: - strcpy(name, "5GHz Turbo"); - break; default: strcpy(name, "IEEE 802.11"); break; @@ -480,11 +304,6 @@ int ieee80211_set_channel(struct ieee80211_local *local, int channel, int freq) struct ieee80211_channel *chan = &mode->channels[c]; if (chan->flag & IEEE80211_CHAN_W_SCAN && ((chan->chan == channel) || (chan->freq == freq))) { - /* Use next_mode as the mode preference to - * resolve non-unique channel numbers. */ - if (set && mode->mode != local->next_mode) - continue; - local->oper_channel = chan; local->oper_hw_mode = mode; set++; @@ -512,13 +331,14 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type == IEEE80211_IF_TYPE_STA) - sdata->u.sta.auto_channel_sel = 0; + sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ if (freq->e == 0) { if (freq->m < 0) { if (sdata->type == IEEE80211_IF_TYPE_STA) - sdata->u.sta.auto_channel_sel = 1; + sdata->u.sta.flags |= + IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; } else return ieee80211_set_channel(local, freq->m, -1); @@ -554,7 +374,6 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *ssid) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata; size_t len = data->length; @@ -566,14 +385,17 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, if (sdata->type == IEEE80211_IF_TYPE_STA || sdata->type == IEEE80211_IF_TYPE_IBSS) { int ret; - if (local->user_space_mlme) { + if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { if (len > IEEE80211_MAX_SSID_LEN) return -EINVAL; memcpy(sdata->u.sta.ssid, ssid, len); sdata->u.sta.ssid_len = len; return 0; } - sdata->u.sta.auto_ssid_sel = !data->flags; + if (data->flags) + sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; + else + sdata->u.sta.flags |= IEEE80211_STA_AUTO_SSID_SEL; ret = ieee80211_sta_set_ssid(dev, ssid, len); if (ret) return ret; @@ -628,25 +450,24 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, struct iw_request_info *info, struct sockaddr *ap_addr, char *extra) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type == IEEE80211_IF_TYPE_STA || sdata->type == IEEE80211_IF_TYPE_IBSS) { int ret; - if (local->user_space_mlme) { + if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data, ETH_ALEN); return 0; } - if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) { - sdata->u.sta.auto_bssid_sel = 1; - sdata->u.sta.auto_channel_sel = 1; - } else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) - sdata->u.sta.auto_bssid_sel = 1; + if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) + sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL; + else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) + sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL; else - sdata->u.sta.auto_bssid_sel = 0; + sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; ret = ieee80211_sta_set_bssid(dev, (u8 *) &ap_addr->sa_data); if (ret) return ret; @@ -762,9 +583,6 @@ static int ieee80211_ioctl_siwrate(struct net_device *dev, struct ieee80211_rate *rates = &mode->rates[i]; int this_rate = rates->rate; - if (mode->mode == MODE_ATHEROS_TURBO || - mode->mode == MODE_ATHEROS_TURBOG) - this_rate *= 2; if (target_rate == this_rate) { sdata->bss->max_ratectrl_rateidx = i; if (rate->fixed) @@ -798,6 +616,52 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev, return 0; } +static int ieee80211_ioctl_siwtxpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *data, char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + bool need_reconfig = 0; + + if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) + return -EINVAL; + if (data->txpower.flags & IW_TXPOW_RANGE) + return -EINVAL; + if (!data->txpower.fixed) + return -EINVAL; + + if (local->hw.conf.power_level != data->txpower.value) { + local->hw.conf.power_level = data->txpower.value; + need_reconfig = 1; + } + if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { + local->hw.conf.radio_enabled = !(data->txpower.disabled); + need_reconfig = 1; + } + if (need_reconfig) { + ieee80211_hw_config(local); + /* The return value of hw_config is not of big interest here, + * as it doesn't say that it failed because of _this_ config + * change or something else. Ignore it. */ + } + + return 0; +} + +static int ieee80211_ioctl_giwtxpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *data, char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + + data->txpower.fixed = 1; + data->txpower.disabled = !(local->hw.conf.radio_enabled); + data->txpower.value = local->hw.conf.power_level; + data->txpower.flags = IW_TXPOW_DBM; + + return 0; +} + static int ieee80211_ioctl_siwrts(struct net_device *dev, struct iw_request_info *info, struct iw_param *rts, char *extra) @@ -930,335 +794,6 @@ static int ieee80211_ioctl_giwretry(struct net_device *dev, return 0; } -static void ieee80211_key_enable_hwaccel(struct ieee80211_local *local, - struct ieee80211_key *key) -{ - struct ieee80211_key_conf *keyconf; - u8 addr[ETH_ALEN]; - - if (!key || key->alg != ALG_WEP || !key->force_sw_encrypt || - (local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP)) - return; - - memset(addr, 0xff, ETH_ALEN); - keyconf = ieee80211_key_data2conf(local, key); - if (keyconf && local->ops->set_key && - local->ops->set_key(local_to_hw(local), - SET_KEY, addr, keyconf, 0) == 0) { - key->force_sw_encrypt = - !!(keyconf->flags & IEEE80211_KEY_FORCE_SW_ENCRYPT); - key->hw_key_idx = keyconf->hw_key_idx; - } - kfree(keyconf); -} - - -static void ieee80211_key_disable_hwaccel(struct ieee80211_local *local, - struct ieee80211_key *key) -{ - struct ieee80211_key_conf *keyconf; - u8 addr[ETH_ALEN]; - - if (!key || key->alg != ALG_WEP || key->force_sw_encrypt || - (local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP)) - return; - - memset(addr, 0xff, ETH_ALEN); - keyconf = ieee80211_key_data2conf(local, key); - if (keyconf && local->ops->set_key) - local->ops->set_key(local_to_hw(local), DISABLE_KEY, - addr, keyconf, 0); - kfree(keyconf); - key->force_sw_encrypt = 1; -} - - -static int ieee80211_ioctl_default_wep_only(struct ieee80211_local *local, - int value) -{ - int i; - struct ieee80211_sub_if_data *sdata; - - local->default_wep_only = value; - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) - for (i = 0; i < NUM_DEFAULT_KEYS; i++) - if (value) - ieee80211_key_enable_hwaccel(local, - sdata->keys[i]); - else - ieee80211_key_disable_hwaccel(local, - sdata->keys[i]); - read_unlock(&local->sub_if_lock); - - return 0; -} - - -void ieee80211_update_default_wep_only(struct ieee80211_local *local) -{ - int i = 0; - struct ieee80211_sub_if_data *sdata; - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - - if (sdata->dev == local->mdev) - continue; - - /* If there is an AP interface then depend on userspace to - set default_wep_only correctly. */ - if (sdata->type == IEEE80211_IF_TYPE_AP) { - read_unlock(&local->sub_if_lock); - return; - } - - i++; - } - - read_unlock(&local->sub_if_lock); - - if (i <= 1) - ieee80211_ioctl_default_wep_only(local, 1); - else - ieee80211_ioctl_default_wep_only(local, 0); -} - - -static int ieee80211_ioctl_prism2_param(struct net_device *dev, - struct iw_request_info *info, - void *wrqu, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - int *i = (int *) extra; - int param = *i; - int value = *(i + 1); - int ret = 0; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - switch (param) { - case PRISM2_PARAM_IEEE_802_1X: - if (local->ops->set_ieee8021x) - ret = local->ops->set_ieee8021x(local_to_hw(local), - value); - if (ret) - printk(KERN_DEBUG "%s: failed to set IEEE 802.1X (%d) " - "for low-level driver\n", dev->name, value); - else - sdata->ieee802_1x = value; - break; - - case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: - if (sdata->type != IEEE80211_IF_TYPE_AP) - ret = -ENOENT; - else - sdata->use_protection = value; - break; - - case PRISM2_PARAM_PREAMBLE: - local->short_preamble = value; - break; - - case PRISM2_PARAM_STAT_TIME: - if (!local->stat_time && value) { - local->stat_timer.expires = jiffies + HZ * value / 100; - add_timer(&local->stat_timer); - } else if (local->stat_time && !value) { - del_timer_sync(&local->stat_timer); - } - local->stat_time = value; - break; - case PRISM2_PARAM_SHORT_SLOT_TIME: - if (value) - local->hw.conf.flags |= IEEE80211_CONF_SHORT_SLOT_TIME; - else - local->hw.conf.flags &= ~IEEE80211_CONF_SHORT_SLOT_TIME; - if (ieee80211_hw_config(local)) - ret = -EINVAL; - break; - - case PRISM2_PARAM_NEXT_MODE: - local->next_mode = value; - break; - - case PRISM2_PARAM_RADIO_ENABLED: - ret = ieee80211_ioctl_set_radio_enabled(dev, value); - break; - - case PRISM2_PARAM_ANTENNA_MODE: - local->hw.conf.antenna_mode = value; - if (ieee80211_hw_config(local)) - ret = -EINVAL; - break; - - case PRISM2_PARAM_STA_ANTENNA_SEL: - local->sta_antenna_sel = value; - break; - - case PRISM2_PARAM_TX_POWER_REDUCTION: - if (value < 0) - ret = -EINVAL; - else - local->hw.conf.tx_power_reduction = value; - break; - - case PRISM2_PARAM_KEY_TX_RX_THRESHOLD: - local->key_tx_rx_threshold = value; - break; - - case PRISM2_PARAM_DEFAULT_WEP_ONLY: - ret = ieee80211_ioctl_default_wep_only(local, value); - break; - - case PRISM2_PARAM_WIFI_WME_NOACK_TEST: - local->wifi_wme_noack_test = value; - break; - - case PRISM2_PARAM_SCAN_FLAGS: - local->scan_flags = value; - break; - - case PRISM2_PARAM_MIXED_CELL: - if (sdata->type != IEEE80211_IF_TYPE_STA && - sdata->type != IEEE80211_IF_TYPE_IBSS) - ret = -EINVAL; - else - sdata->u.sta.mixed_cell = !!value; - break; - - case PRISM2_PARAM_HW_MODES: - local->enabled_modes = value; - break; - - case PRISM2_PARAM_CREATE_IBSS: - if (sdata->type != IEEE80211_IF_TYPE_IBSS) - ret = -EINVAL; - else - sdata->u.sta.create_ibss = !!value; - break; - case PRISM2_PARAM_WMM_ENABLED: - if (sdata->type != IEEE80211_IF_TYPE_STA && - sdata->type != IEEE80211_IF_TYPE_IBSS) - ret = -EINVAL; - else - sdata->u.sta.wmm_enabled = !!value; - break; - case PRISM2_PARAM_RADAR_DETECT: - local->hw.conf.radar_detect = value; - break; - case PRISM2_PARAM_SPECTRUM_MGMT: - local->hw.conf.spect_mgmt = value; - break; - default: - ret = -EOPNOTSUPP; - break; - } - - return ret; -} - - -static int ieee80211_ioctl_get_prism2_param(struct net_device *dev, - struct iw_request_info *info, - void *wrqu, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - int *param = (int *) extra; - int ret = 0; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - switch (*param) { - case PRISM2_PARAM_IEEE_802_1X: - *param = sdata->ieee802_1x; - break; - - case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: - *param = sdata->use_protection; - break; - - case PRISM2_PARAM_PREAMBLE: - *param = local->short_preamble; - break; - - case PRISM2_PARAM_STAT_TIME: - *param = local->stat_time; - break; - case PRISM2_PARAM_SHORT_SLOT_TIME: - *param = !!(local->hw.conf.flags & IEEE80211_CONF_SHORT_SLOT_TIME); - break; - - case PRISM2_PARAM_NEXT_MODE: - *param = local->next_mode; - break; - - case PRISM2_PARAM_ANTENNA_MODE: - *param = local->hw.conf.antenna_mode; - break; - - case PRISM2_PARAM_STA_ANTENNA_SEL: - *param = local->sta_antenna_sel; - break; - - case PRISM2_PARAM_TX_POWER_REDUCTION: - *param = local->hw.conf.tx_power_reduction; - break; - - case PRISM2_PARAM_KEY_TX_RX_THRESHOLD: - *param = local->key_tx_rx_threshold; - break; - - case PRISM2_PARAM_DEFAULT_WEP_ONLY: - *param = local->default_wep_only; - break; - - case PRISM2_PARAM_WIFI_WME_NOACK_TEST: - *param = local->wifi_wme_noack_test; - break; - - case PRISM2_PARAM_SCAN_FLAGS: - *param = local->scan_flags; - break; - - case PRISM2_PARAM_HW_MODES: - *param = local->enabled_modes; - break; - - case PRISM2_PARAM_CREATE_IBSS: - if (sdata->type != IEEE80211_IF_TYPE_IBSS) - ret = -EINVAL; - else - *param = !!sdata->u.sta.create_ibss; - break; - - case PRISM2_PARAM_MIXED_CELL: - if (sdata->type != IEEE80211_IF_TYPE_STA && - sdata->type != IEEE80211_IF_TYPE_IBSS) - ret = -EINVAL; - else - *param = !!sdata->u.sta.mixed_cell; - break; - case PRISM2_PARAM_WMM_ENABLED: - if (sdata->type != IEEE80211_IF_TYPE_STA && - sdata->type != IEEE80211_IF_TYPE_IBSS) - ret = -EINVAL; - else - *param = !!sdata->u.sta.wmm_enabled; - break; - default: - ret = -EOPNOTSUPP; - break; - } - - return ret; -} - static int ieee80211_ioctl_siwmlme(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *extra) @@ -1291,6 +826,7 @@ static int ieee80211_ioctl_siwencode(struct net_device *dev, struct ieee80211_sub_if_data *sdata; int idx, i, alg = ALG_WEP; u8 bcaddr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + int remove = 0; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1309,21 +845,16 @@ static int ieee80211_ioctl_siwencode(struct net_device *dev, idx--; if (erq->flags & IW_ENCODE_DISABLED) - alg = ALG_NONE; + remove = 1; else if (erq->length == 0) { /* No key data - just set the default TX key index */ - if (sdata->default_key != sdata->keys[idx]) { - ieee80211_debugfs_key_remove_default(sdata); - sdata->default_key = sdata->keys[idx]; - if (sdata->default_key) - ieee80211_debugfs_key_add_default(sdata); - } + ieee80211_set_default_key(sdata, idx); return 0; } return ieee80211_set_encryption( dev, bcaddr, - idx, alg, + idx, alg, remove, !sdata->default_key, keybuf, erq->length); } @@ -1362,9 +893,9 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev, return 0; } - memcpy(key, sdata->keys[idx]->key, - min((int)erq->length, sdata->keys[idx]->keylen)); - erq->length = sdata->keys[idx]->keylen; + memcpy(key, sdata->keys[idx]->conf.key, + min_t(int, erq->length, sdata->keys[idx]->conf.keylen)); + erq->length = sdata->keys[idx]->conf.keylen; erq->flags |= IW_ENCODE_ENABLED; return 0; @@ -1390,22 +921,12 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, ret = -EINVAL; else { /* - * TODO: sdata->u.sta.key_mgmt does not match with WE18 - * value completely; could consider modifying this to - * be closer to WE18. For now, this value is not really - * used for anything else than Privacy matching, so the - * current code here should be more or less OK. + * Key management was set by wpa_supplicant, + * we only need this to associate to a network + * that has privacy enabled regardless of not + * having a key. */ - if (data->value & IW_AUTH_KEY_MGMT_802_1X) { - sdata->u.sta.key_mgmt = - IEEE80211_KEY_MGMT_WPA_EAP; - } else if (data->value & IW_AUTH_KEY_MGMT_PSK) { - sdata->u.sta.key_mgmt = - IEEE80211_KEY_MGMT_WPA_PSK; - } else { - sdata->u.sta.key_mgmt = - IEEE80211_KEY_MGMT_NONE; - } + sdata->u.sta.key_management_enabled = !!data->value; } break; case IW_AUTH_80211_AUTH_ALG: @@ -1484,11 +1005,11 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct iw_encode_ext *ext = (struct iw_encode_ext *) extra; - int alg, idx, i; + int uninitialized_var(alg), idx, i, remove = 0; switch (ext->alg) { case IW_ENCODE_ALG_NONE: - alg = ALG_NONE; + remove = 1; break; case IW_ENCODE_ALG_WEP: alg = ALG_WEP; @@ -1504,7 +1025,7 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev, } if (erq->flags & IW_ENCODE_DISABLED) - alg = ALG_NONE; + remove = 1; idx = erq->flags & IW_ENCODE_INDEX; if (idx < 1 || idx > 4) { @@ -1523,20 +1044,13 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev, idx--; return ieee80211_set_encryption(dev, ext->addr.sa_data, idx, alg, + remove, ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, ext->key, ext->key_len); } -static const struct iw_priv_args ieee80211_ioctl_priv[] = { - { PRISM2_IOCTL_PRISM2_PARAM, - IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 2, 0, "param" }, - { PRISM2_IOCTL_GET_PRISM2_PARAM, - IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, - IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "get_param" }, -}; - /* Structures to export the Wireless Handlers */ static const iw_handler ieee80211_handler[] = @@ -1557,10 +1071,10 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL /* kernel code */, /* SIOCGIWPRIV */ (iw_handler) NULL /* not used */, /* SIOCSIWSTATS */ (iw_handler) NULL /* kernel code */, /* SIOCGIWSTATS */ - iw_handler_set_spy, /* SIOCSIWSPY */ - iw_handler_get_spy, /* SIOCGIWSPY */ - iw_handler_set_thrspy, /* SIOCSIWTHRSPY */ - iw_handler_get_thrspy, /* SIOCGIWTHRSPY */ + (iw_handler) NULL, /* SIOCSIWSPY */ + (iw_handler) NULL, /* SIOCGIWSPY */ + (iw_handler) NULL, /* SIOCSIWTHRSPY */ + (iw_handler) NULL, /* SIOCGIWTHRSPY */ (iw_handler) ieee80211_ioctl_siwap, /* SIOCSIWAP */ (iw_handler) ieee80211_ioctl_giwap, /* SIOCGIWAP */ (iw_handler) ieee80211_ioctl_siwmlme, /* SIOCSIWMLME */ @@ -1579,8 +1093,8 @@ static const iw_handler ieee80211_handler[] = (iw_handler) ieee80211_ioctl_giwrts, /* SIOCGIWRTS */ (iw_handler) ieee80211_ioctl_siwfrag, /* SIOCSIWFRAG */ (iw_handler) ieee80211_ioctl_giwfrag, /* SIOCGIWFRAG */ - (iw_handler) NULL, /* SIOCSIWTXPOW */ - (iw_handler) NULL, /* SIOCGIWTXPOW */ + (iw_handler) ieee80211_ioctl_siwtxpower, /* SIOCSIWTXPOW */ + (iw_handler) ieee80211_ioctl_giwtxpower, /* SIOCGIWTXPOW */ (iw_handler) ieee80211_ioctl_siwretry, /* SIOCSIWRETRY */ (iw_handler) ieee80211_ioctl_giwretry, /* SIOCGIWRETRY */ (iw_handler) ieee80211_ioctl_siwencode, /* SIOCSIWENCODE */ @@ -1599,19 +1113,9 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL, /* -- hole -- */ }; -static const iw_handler ieee80211_private_handler[] = -{ /* SIOCIWFIRSTPRIV + */ - (iw_handler) ieee80211_ioctl_prism2_param, /* 0 */ - (iw_handler) ieee80211_ioctl_get_prism2_param, /* 1 */ -}; - const struct iw_handler_def ieee80211_iw_handler_def = { .num_standard = ARRAY_SIZE(ieee80211_handler), - .num_private = ARRAY_SIZE(ieee80211_private_handler), - .num_private_args = ARRAY_SIZE(ieee80211_ioctl_priv), .standard = (iw_handler *) ieee80211_handler, - .private = (iw_handler *) ieee80211_private_handler, - .private_args = (struct iw_priv_args *) ieee80211_ioctl_priv, .get_wireless_stats = ieee80211_get_wireless_stats, }; diff --git a/net/mac80211/ieee80211_key.h b/net/mac80211/ieee80211_key.h index c33384912782..fc770e98d47b 100644 --- a/net/mac80211/ieee80211_key.h +++ b/net/mac80211/ieee80211_key.h @@ -11,7 +11,7 @@ #define IEEE80211_KEY_H #include <linux/types.h> -#include <linux/kref.h> +#include <linux/list.h> #include <linux/crypto.h> #include <net/mac80211.h> @@ -41,11 +41,21 @@ #define NUM_RX_DATA_QUEUES 17 +struct ieee80211_local; +struct ieee80211_sub_if_data; +struct sta_info; + +#define KEY_FLAG_UPLOADED_TO_HARDWARE (1<<0) + struct ieee80211_key { - struct kref kref; + struct ieee80211_local *local; + struct ieee80211_sub_if_data *sdata; + struct sta_info *sta; + + struct list_head list; + + unsigned int flags; - int hw_key_idx; /* filled and used by low-level driver */ - ieee80211_key_alg alg; union { struct { /* last used TSC */ @@ -73,22 +83,16 @@ struct ieee80211_key { u8 rx_crypto_buf[6 * AES_BLOCK_LEN]; } ccmp; } u; - int tx_rx_count; /* number of times this key has been used */ - int keylen; - /* if the low level driver can provide hardware acceleration it should - * clear this flag */ - unsigned int force_sw_encrypt:1; - unsigned int default_tx_key:1; /* This key is the new default TX key - * (used only for broadcast keys). */ - s8 keyidx; /* WEP key index */ + /* number of times this key has been used */ + int tx_rx_count; #ifdef CONFIG_MAC80211_DEBUGFS struct { struct dentry *stalink; struct dentry *dir; struct dentry *keylen; - struct dentry *force_sw_encrypt; + struct dentry *flags; struct dentry *keyidx; struct dentry *hw_key_idx; struct dentry *tx_rx_count; @@ -97,10 +101,27 @@ struct ieee80211_key { struct dentry *rx_spec; struct dentry *replays; struct dentry *key; + struct dentry *ifindex; } debugfs; #endif - u8 key[0]; + /* + * key config, must be last because it contains key + * material as variable length member + */ + struct ieee80211_key_conf conf; }; +struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + enum ieee80211_key_alg alg, + int idx, + size_t key_len, + const u8 *key_data); +void ieee80211_key_free(struct ieee80211_key *key); +void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata); + #endif /* IEEE80211_KEY_H */ diff --git a/net/mac80211/ieee80211_led.c b/net/mac80211/ieee80211_led.c index 719d75b20707..4cf89af9d100 100644 --- a/net/mac80211/ieee80211_led.c +++ b/net/mac80211/ieee80211_led.c @@ -33,33 +33,58 @@ void ieee80211_led_tx(struct ieee80211_local *local, int q) led_trigger_event(local->tx_led, LED_FULL); } +void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) +{ + if (unlikely(!local->assoc_led)) + return; + if (associated) + led_trigger_event(local->assoc_led, LED_FULL); + else + led_trigger_event(local->assoc_led, LED_OFF); +} + void ieee80211_led_init(struct ieee80211_local *local) { local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); - if (!local->rx_led) - return; - snprintf(local->rx_led_name, sizeof(local->rx_led_name), - "%srx", wiphy_name(local->hw.wiphy)); - local->rx_led->name = local->rx_led_name; - if (led_trigger_register(local->rx_led)) { - kfree(local->rx_led); - local->rx_led = NULL; + if (local->rx_led) { + snprintf(local->rx_led_name, sizeof(local->rx_led_name), + "%srx", wiphy_name(local->hw.wiphy)); + local->rx_led->name = local->rx_led_name; + if (led_trigger_register(local->rx_led)) { + kfree(local->rx_led); + local->rx_led = NULL; + } } local->tx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); - if (!local->tx_led) - return; - snprintf(local->tx_led_name, sizeof(local->tx_led_name), - "%stx", wiphy_name(local->hw.wiphy)); - local->tx_led->name = local->tx_led_name; - if (led_trigger_register(local->tx_led)) { - kfree(local->tx_led); - local->tx_led = NULL; + if (local->tx_led) { + snprintf(local->tx_led_name, sizeof(local->tx_led_name), + "%stx", wiphy_name(local->hw.wiphy)); + local->tx_led->name = local->tx_led_name; + if (led_trigger_register(local->tx_led)) { + kfree(local->tx_led); + local->tx_led = NULL; + } + } + + local->assoc_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); + if (local->assoc_led) { + snprintf(local->assoc_led_name, sizeof(local->assoc_led_name), + "%sassoc", wiphy_name(local->hw.wiphy)); + local->assoc_led->name = local->assoc_led_name; + if (led_trigger_register(local->assoc_led)) { + kfree(local->assoc_led); + local->assoc_led = NULL; + } } } void ieee80211_led_exit(struct ieee80211_local *local) { + if (local->assoc_led) { + led_trigger_unregister(local->assoc_led); + kfree(local->assoc_led); + } if (local->tx_led) { led_trigger_unregister(local->tx_led); kfree(local->tx_led); @@ -70,6 +95,16 @@ void ieee80211_led_exit(struct ieee80211_local *local) } } +char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + if (local->assoc_led) + return local->assoc_led_name; + return NULL; +} +EXPORT_SYMBOL(__ieee80211_get_assoc_led_name); + char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); diff --git a/net/mac80211/ieee80211_led.h b/net/mac80211/ieee80211_led.h index 5c8ab8263878..0feb22619835 100644 --- a/net/mac80211/ieee80211_led.h +++ b/net/mac80211/ieee80211_led.h @@ -14,6 +14,8 @@ #ifdef CONFIG_MAC80211_LEDS extern void ieee80211_led_rx(struct ieee80211_local *local); extern void ieee80211_led_tx(struct ieee80211_local *local, int q); +extern void ieee80211_led_assoc(struct ieee80211_local *local, + bool associated); extern void ieee80211_led_init(struct ieee80211_local *local); extern void ieee80211_led_exit(struct ieee80211_local *local); #else @@ -23,6 +25,10 @@ static inline void ieee80211_led_rx(struct ieee80211_local *local) static inline void ieee80211_led_tx(struct ieee80211_local *local, int q) { } +static inline void ieee80211_led_assoc(struct ieee80211_local *local, + bool associated) +{ +} static inline void ieee80211_led_init(struct ieee80211_local *local) { } diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c index 2118de04fc35..93abb8fff141 100644 --- a/net/mac80211/ieee80211_rate.c +++ b/net/mac80211/ieee80211_rate.c @@ -9,6 +9,7 @@ */ #include <linux/kernel.h> +#include <linux/rtnetlink.h> #include "ieee80211_rate.h" #include "ieee80211_i.h" @@ -137,3 +138,43 @@ void rate_control_put(struct rate_control_ref *ref) { kref_put(&ref->kref, rate_control_release); } + +int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, + const char *name) +{ + struct rate_control_ref *ref, *old; + + ASSERT_RTNL(); + if (local->open_count || netif_running(local->mdev)) + return -EBUSY; + + ref = rate_control_alloc(name, local); + if (!ref) { + printk(KERN_WARNING "%s: Failed to select rate control " + "algorithm\n", wiphy_name(local->hw.wiphy)); + return -ENOENT; + } + + old = local->rate_ctrl; + local->rate_ctrl = ref; + if (old) { + rate_control_put(old); + sta_info_flush(local, NULL); + } + + printk(KERN_DEBUG "%s: Selected rate control " + "algorithm '%s'\n", wiphy_name(local->hw.wiphy), + ref->ops->name); + + + return 0; +} + +void rate_control_deinitialize(struct ieee80211_local *local) +{ + struct rate_control_ref *ref; + + ref = local->rate_ctrl; + local->rate_ctrl = NULL; + rate_control_put(ref); +} diff --git a/net/mac80211/ieee80211_rate.h b/net/mac80211/ieee80211_rate.h index f021a028d9d0..7cd1ebab4f83 100644 --- a/net/mac80211/ieee80211_rate.h +++ b/net/mac80211/ieee80211_rate.h @@ -30,8 +30,6 @@ struct rate_control_extra { /* parameters from the caller to rate_control_get_rate(): */ struct ieee80211_hw_mode *mode; - int mgmt_data; /* this is data frame that is used for management - * (e.g., IEEE 802.1X EAPOL) */ u16 ethertype; }; @@ -141,4 +139,10 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta) #endif } + +/* functions for rate control related to a device */ +int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, + const char *name); +void rate_control_deinitialize(struct ieee80211_local *local); + #endif /* IEEE80211_RATE_H */ diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index 0d99b685df5f..1641e8fe44b7 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -31,7 +31,7 @@ #include <net/mac80211.h> #include "ieee80211_i.h" #include "ieee80211_rate.h" -#include "hostapd_ioctl.h" +#include "ieee80211_led.h" #define IEEE80211_AUTH_TIMEOUT (HZ / 5) #define IEEE80211_AUTH_MAX_TRIES 3 @@ -108,11 +108,10 @@ struct ieee802_11_elems { u8 wmm_param_len; }; -typedef enum { ParseOK = 0, ParseUnknown = 1, ParseFailed = -1 } ParseRes; +enum ParseRes { ParseOK = 0, ParseUnknown = 1, ParseFailed = -1 }; - -static ParseRes ieee802_11_parse_elems(u8 *start, size_t len, - struct ieee802_11_elems *elems) +static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len, + struct ieee802_11_elems *elems) { size_t left = len; u8 *pos = start; @@ -234,7 +233,6 @@ static int ecw2cw(int ecw) return cw - 1; } - static void ieee80211_sta_wmm_params(struct net_device *dev, struct ieee80211_if_sta *ifsta, u8 *wmm_param, size_t wmm_param_len) @@ -318,17 +316,43 @@ static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value) struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; int use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; + int preamble_mode = (erp_value & WLAN_ERP_BARKER_PREAMBLE) != 0; + u8 changes = 0; + DECLARE_MAC_BUF(mac); - if (use_protection != sdata->use_protection) { + if (use_protection != !!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION)) { if (net_ratelimit()) { printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" - MAC_FMT ")\n", + "%s)\n", dev->name, use_protection ? "enabled" : "disabled", - MAC_ARG(ifsta->bssid)); + print_mac(mac, ifsta->bssid)); + } + if (use_protection) + sdata->flags |= IEEE80211_SDATA_USE_PROTECTION; + else + sdata->flags &= ~IEEE80211_SDATA_USE_PROTECTION; + changes |= IEEE80211_ERP_CHANGE_PROTECTION; + } + + if (preamble_mode != !(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE)) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: switched to %s barker preamble" + " (BSSID=%s)\n", + dev->name, + (preamble_mode == WLAN_ERP_PREAMBLE_SHORT) ? + "short" : "long", + print_mac(mac, ifsta->bssid)); } - sdata->use_protection = use_protection; + if (preamble_mode) + sdata->flags &= ~IEEE80211_SDATA_SHORT_PREAMBLE; + else + sdata->flags |= IEEE80211_SDATA_SHORT_PREAMBLE; + changes |= IEEE80211_ERP_CHANGE_PREAMBLE; } + + if (changes) + ieee80211_erp_info_change_notify(dev, changes); } @@ -344,7 +368,7 @@ static void ieee80211_sta_send_associnfo(struct net_device *dev, return; buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len + - ifsta->assocresp_ies_len), GFP_ATOMIC); + ifsta->assocresp_ies_len), GFP_KERNEL); if (!buf) return; @@ -384,19 +408,21 @@ static void ieee80211_sta_send_associnfo(struct net_device *dev, static void ieee80211_set_associated(struct net_device *dev, - struct ieee80211_if_sta *ifsta, int assoc) + struct ieee80211_if_sta *ifsta, + bool assoc) { + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); union iwreq_data wrqu; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (ifsta->associated == assoc) + if (!!(ifsta->flags & IEEE80211_STA_ASSOCIATED) == assoc) return; - ifsta->associated = assoc; - if (assoc) { struct ieee80211_sub_if_data *sdata; struct ieee80211_sta_bss *bss; + + ifsta->flags |= IEEE80211_STA_ASSOCIATED; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type != IEEE80211_IF_TYPE_STA) return; @@ -409,18 +435,21 @@ static void ieee80211_set_associated(struct net_device *dev, } netif_carrier_on(dev); - ifsta->prev_bssid_set = 1; + ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); ieee80211_sta_send_associnfo(dev, ifsta); } else { + ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; + netif_carrier_off(dev); - sdata->use_protection = 0; + ieee80211_reset_erp_info(dev); memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN); } wrqu.ap_addr.sa_family = ARPHRD_ETHER; wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); ifsta->last_probe = jiffies; + ieee80211_led_assoc(local, assoc); } static void ieee80211_set_disassoc(struct net_device *dev, @@ -447,8 +476,8 @@ static void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb, pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); pkt_data->ifindex = sdata->dev->ifindex; - pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT); - pkt_data->do_not_encrypt = !encrypt; + if (!encrypt) + pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; dev_queue_xmit(skb); } @@ -495,18 +524,20 @@ static void ieee80211_send_auth(struct net_device *dev, static void ieee80211_authenticate(struct net_device *dev, struct ieee80211_if_sta *ifsta) { + DECLARE_MAC_BUF(mac); + ifsta->auth_tries++; if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) { - printk(KERN_DEBUG "%s: authentication with AP " MAC_FMT + printk(KERN_DEBUG "%s: authentication with AP %s" " timed out\n", - dev->name, MAC_ARG(ifsta->bssid)); + dev->name, print_mac(mac, ifsta->bssid)); ifsta->state = IEEE80211_DISABLED; return; } ifsta->state = IEEE80211_AUTHENTICATE; - printk(KERN_DEBUG "%s: authenticate with AP " MAC_FMT "\n", - dev->name, MAC_ARG(ifsta->bssid)); + printk(KERN_DEBUG "%s: authenticate with AP %s\n", + dev->name, print_mac(mac, ifsta->bssid)); ieee80211_send_auth(dev, ifsta, 1, NULL, 0, 0); @@ -559,7 +590,7 @@ static void ieee80211_send_assoc(struct net_device *dev, memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - if (ifsta->prev_bssid_set) { + if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { skb_put(skb, 10); mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, IEEE80211_STYPE_REASSOC_REQ); @@ -589,8 +620,6 @@ static void ieee80211_send_assoc(struct net_device *dev, *pos++ = len; for (i = 0; i < len; i++) { int rate = mode->rates[i].rate; - if (mode->mode == MODE_ATHEROS_TURBO) - rate /= 2; *pos++ = (u8) (rate / 5); } @@ -600,8 +629,6 @@ static void ieee80211_send_assoc(struct net_device *dev, *pos++ = mode->num_rates - len; for (i = len; i < mode->num_rates; i++) { int rate = mode->rates[i].rate; - if (mode->mode == MODE_ATHEROS_TURBO) - rate /= 2; *pos++ = (u8) (rate / 5); } } @@ -611,7 +638,7 @@ static void ieee80211_send_assoc(struct net_device *dev, memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len); } - if (wmm && ifsta->wmm_enabled) { + if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { pos = skb_put(skb, 9); *pos++ = WLAN_EID_VENDOR_SPECIFIC; *pos++ = 7; /* len */ @@ -626,7 +653,7 @@ static void ieee80211_send_assoc(struct net_device *dev, kfree(ifsta->assocreq_ies); ifsta->assocreq_ies_len = (skb->data + skb->len) - ies; - ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_ATOMIC); + ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL); if (ifsta->assocreq_ies) memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len); @@ -698,8 +725,8 @@ static int ieee80211_privacy_mismatch(struct net_device *dev, struct ieee80211_sta_bss *bss; int res = 0; - if (!ifsta || ifsta->mixed_cell || - ifsta->key_mgmt != IEEE80211_KEY_MGMT_NONE) + if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL) || + ifsta->key_management_enabled) return 0; bss = ieee80211_rx_bss_get(dev, ifsta->bssid); @@ -719,18 +746,20 @@ static int ieee80211_privacy_mismatch(struct net_device *dev, static void ieee80211_associate(struct net_device *dev, struct ieee80211_if_sta *ifsta) { + DECLARE_MAC_BUF(mac); + ifsta->assoc_tries++; if (ifsta->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { - printk(KERN_DEBUG "%s: association with AP " MAC_FMT + printk(KERN_DEBUG "%s: association with AP %s" " timed out\n", - dev->name, MAC_ARG(ifsta->bssid)); + dev->name, print_mac(mac, ifsta->bssid)); ifsta->state = IEEE80211_DISABLED; return; } ifsta->state = IEEE80211_ASSOCIATE; - printk(KERN_DEBUG "%s: associate with AP " MAC_FMT "\n", - dev->name, MAC_ARG(ifsta->bssid)); + printk(KERN_DEBUG "%s: associate with AP %s\n", + dev->name, print_mac(mac, ifsta->bssid)); if (ieee80211_privacy_mismatch(dev, ifsta)) { printk(KERN_DEBUG "%s: mismatch in privacy configuration and " "mixed-cell disabled - abort association\n", dev->name); @@ -750,6 +779,7 @@ static void ieee80211_associated(struct net_device *dev, struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sta_info *sta; int disassoc; + DECLARE_MAC_BUF(mac); /* TODO: start monitoring current AP signal quality and number of * missed beacons. Scan other channels every now and then and search @@ -760,29 +790,27 @@ static void ieee80211_associated(struct net_device *dev, sta = sta_info_get(local, ifsta->bssid); if (!sta) { - printk(KERN_DEBUG "%s: No STA entry for own AP " MAC_FMT "\n", - dev->name, MAC_ARG(ifsta->bssid)); + printk(KERN_DEBUG "%s: No STA entry for own AP %s\n", + dev->name, print_mac(mac, ifsta->bssid)); disassoc = 1; } else { disassoc = 0; if (time_after(jiffies, sta->last_rx + IEEE80211_MONITORING_INTERVAL)) { - if (ifsta->probereq_poll) { + if (ifsta->flags & IEEE80211_STA_PROBEREQ_POLL) { printk(KERN_DEBUG "%s: No ProbeResp from " - "current AP " MAC_FMT " - assume out of " + "current AP %s - assume out of " "range\n", - dev->name, MAC_ARG(ifsta->bssid)); + dev->name, print_mac(mac, ifsta->bssid)); disassoc = 1; - sta_info_free(sta, 0); - ifsta->probereq_poll = 0; - } else { + sta_info_free(sta); + } else ieee80211_send_probe_req(dev, ifsta->bssid, local->scan_ssid, local->scan_ssid_len); - ifsta->probereq_poll = 1; - } + ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL; } else { - ifsta->probereq_poll = 0; + ifsta->flags &= ~IEEE80211_STA_PROBEREQ_POLL; if (time_after(jiffies, ifsta->last_probe + IEEE80211_PROBE_INTERVAL)) { ifsta->last_probe = jiffies; @@ -862,10 +890,7 @@ static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst, pos = skb_put(skb, 1); supp_rates[1]++; } - if (mode->mode == MODE_ATHEROS_TURBO) - *pos = rate->rate / 10; - else - *pos = rate->rate / 5; + *pos = rate->rate / 5; } ieee80211_sta_tx(dev, skb, 0); @@ -876,7 +901,7 @@ static int ieee80211_sta_wep_configured(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!sdata || !sdata->default_key || - sdata->default_key->alg != ALG_WEP) + sdata->default_key->conf.alg != ALG_WEP) return 0; return 1; } @@ -886,7 +911,7 @@ static void ieee80211_auth_completed(struct net_device *dev, struct ieee80211_if_sta *ifsta) { printk(KERN_DEBUG "%s: authenticated\n", dev->name); - ifsta->authenticated = 1; + ifsta->flags |= IEEE80211_STA_AUTHENTICATED; ieee80211_associate(dev, ifsta); } @@ -924,37 +949,38 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); u16 auth_alg, auth_transaction, status_code; + DECLARE_MAC_BUF(mac); if (ifsta->state != IEEE80211_AUTHENTICATE && sdata->type != IEEE80211_IF_TYPE_IBSS) { printk(KERN_DEBUG "%s: authentication frame received from " - MAC_FMT ", but not in authenticate state - ignored\n", - dev->name, MAC_ARG(mgmt->sa)); + "%s, but not in authenticate state - ignored\n", + dev->name, print_mac(mac, mgmt->sa)); return; } if (len < 24 + 6) { printk(KERN_DEBUG "%s: too short (%zd) authentication frame " - "received from " MAC_FMT " - ignored\n", - dev->name, len, MAC_ARG(mgmt->sa)); + "received from %s - ignored\n", + dev->name, len, print_mac(mac, mgmt->sa)); return; } if (sdata->type != IEEE80211_IF_TYPE_IBSS && memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { printk(KERN_DEBUG "%s: authentication frame received from " - "unknown AP (SA=" MAC_FMT " BSSID=" MAC_FMT ") - " - "ignored\n", dev->name, MAC_ARG(mgmt->sa), - MAC_ARG(mgmt->bssid)); + "unknown AP (SA=%s BSSID=%s) - " + "ignored\n", dev->name, print_mac(mac, mgmt->sa), + print_mac(mac, mgmt->bssid)); return; } if (sdata->type != IEEE80211_IF_TYPE_IBSS && memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) { printk(KERN_DEBUG "%s: authentication frame received from " - "unknown BSSID (SA=" MAC_FMT " BSSID=" MAC_FMT ") - " - "ignored\n", dev->name, MAC_ARG(mgmt->sa), - MAC_ARG(mgmt->bssid)); + "unknown BSSID (SA=%s BSSID=%s) - " + "ignored\n", dev->name, print_mac(mac, mgmt->sa), + print_mac(mac, mgmt->bssid)); return; } @@ -962,9 +988,9 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev, auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); status_code = le16_to_cpu(mgmt->u.auth.status_code); - printk(KERN_DEBUG "%s: RX authentication from " MAC_FMT " (alg=%d " + printk(KERN_DEBUG "%s: RX authentication from %s (alg=%d " "transaction=%d status=%d)\n", - dev->name, MAC_ARG(mgmt->sa), auth_alg, + dev->name, print_mac(mac, mgmt->sa), auth_alg, auth_transaction, status_code); if (sdata->type == IEEE80211_IF_TYPE_IBSS) { @@ -1051,29 +1077,30 @@ static void ieee80211_rx_mgmt_deauth(struct net_device *dev, size_t len) { u16 reason_code; + DECLARE_MAC_BUF(mac); if (len < 24 + 2) { printk(KERN_DEBUG "%s: too short (%zd) deauthentication frame " - "received from " MAC_FMT " - ignored\n", - dev->name, len, MAC_ARG(mgmt->sa)); + "received from %s - ignored\n", + dev->name, len, print_mac(mac, mgmt->sa)); return; } if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { printk(KERN_DEBUG "%s: deauthentication frame received from " - "unknown AP (SA=" MAC_FMT " BSSID=" MAC_FMT ") - " - "ignored\n", dev->name, MAC_ARG(mgmt->sa), - MAC_ARG(mgmt->bssid)); + "unknown AP (SA=%s BSSID=%s) - " + "ignored\n", dev->name, print_mac(mac, mgmt->sa), + print_mac(mac, mgmt->bssid)); return; } reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); - printk(KERN_DEBUG "%s: RX deauthentication from " MAC_FMT + printk(KERN_DEBUG "%s: RX deauthentication from %s" " (reason=%d)\n", - dev->name, MAC_ARG(mgmt->sa), reason_code); + dev->name, print_mac(mac, mgmt->sa), reason_code); - if (ifsta->authenticated) { + if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) { printk(KERN_DEBUG "%s: deauthenticated\n", dev->name); } @@ -1086,7 +1113,7 @@ static void ieee80211_rx_mgmt_deauth(struct net_device *dev, } ieee80211_set_disassoc(dev, ifsta, 1); - ifsta->authenticated = 0; + ifsta->flags &= ~IEEE80211_STA_AUTHENTICATED; } @@ -1096,29 +1123,30 @@ static void ieee80211_rx_mgmt_disassoc(struct net_device *dev, size_t len) { u16 reason_code; + DECLARE_MAC_BUF(mac); if (len < 24 + 2) { printk(KERN_DEBUG "%s: too short (%zd) disassociation frame " - "received from " MAC_FMT " - ignored\n", - dev->name, len, MAC_ARG(mgmt->sa)); + "received from %s - ignored\n", + dev->name, len, print_mac(mac, mgmt->sa)); return; } if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { printk(KERN_DEBUG "%s: disassociation frame received from " - "unknown AP (SA=" MAC_FMT " BSSID=" MAC_FMT ") - " - "ignored\n", dev->name, MAC_ARG(mgmt->sa), - MAC_ARG(mgmt->bssid)); + "unknown AP (SA=%s BSSID=%s) - " + "ignored\n", dev->name, print_mac(mac, mgmt->sa), + print_mac(mac, mgmt->bssid)); return; } reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); - printk(KERN_DEBUG "%s: RX disassociation from " MAC_FMT + printk(KERN_DEBUG "%s: RX disassociation from %s" " (reason=%d)\n", - dev->name, MAC_ARG(mgmt->sa), reason_code); + dev->name, print_mac(mac, mgmt->sa), reason_code); - if (ifsta->associated) + if (ifsta->flags & IEEE80211_STA_ASSOCIATED) printk(KERN_DEBUG "%s: disassociated\n", dev->name); if (ifsta->state == IEEE80211_ASSOCIATED) { @@ -1145,29 +1173,30 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, struct ieee802_11_elems elems; u8 *pos; int i, j; + DECLARE_MAC_BUF(mac); /* AssocResp and ReassocResp have identical structure, so process both * of them in this function. */ if (ifsta->state != IEEE80211_ASSOCIATE) { printk(KERN_DEBUG "%s: association frame received from " - MAC_FMT ", but not in associate state - ignored\n", - dev->name, MAC_ARG(mgmt->sa)); + "%s, but not in associate state - ignored\n", + dev->name, print_mac(mac, mgmt->sa)); return; } if (len < 24 + 6) { printk(KERN_DEBUG "%s: too short (%zd) association frame " - "received from " MAC_FMT " - ignored\n", - dev->name, len, MAC_ARG(mgmt->sa)); + "received from %s - ignored\n", + dev->name, len, print_mac(mac, mgmt->sa)); return; } if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { printk(KERN_DEBUG "%s: association frame received from " - "unknown AP (SA=" MAC_FMT " BSSID=" MAC_FMT ") - " - "ignored\n", dev->name, MAC_ARG(mgmt->sa), - MAC_ARG(mgmt->bssid)); + "unknown AP (SA=%s BSSID=%s) - " + "ignored\n", dev->name, print_mac(mac, mgmt->sa), + print_mac(mac, mgmt->bssid)); return; } @@ -1179,16 +1208,18 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, "set\n", dev->name, aid); aid &= ~(BIT(15) | BIT(14)); - printk(KERN_DEBUG "%s: RX %sssocResp from " MAC_FMT " (capab=0x%x " + printk(KERN_DEBUG "%s: RX %sssocResp from %s (capab=0x%x " "status=%d aid=%d)\n", - dev->name, reassoc ? "Rea" : "A", MAC_ARG(mgmt->sa), + dev->name, reassoc ? "Rea" : "A", print_mac(mac, mgmt->sa), capab_info, status_code, aid); if (status_code != WLAN_STATUS_SUCCESS) { printk(KERN_DEBUG "%s: AP denied association (code=%d)\n", dev->name, status_code); - if (status_code == WLAN_STATUS_REASSOC_NO_ASSOC) - ifsta->prev_bssid_set = 0; + /* if this was a reassociation, ensure we try a "full" + * association next time. This works around some broken APs + * which do not correctly reject reassociation requests. */ + ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; return; } @@ -1224,7 +1255,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, kfree(ifsta->assocresp_ies); ifsta->assocresp_ies_len = len - (pos - (u8 *) mgmt); - ifsta->assocresp_ies = kmalloc(ifsta->assocresp_ies_len, GFP_ATOMIC); + ifsta->assocresp_ies = kmalloc(ifsta->assocresp_ies_len, GFP_KERNEL); if (ifsta->assocresp_ies) memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len); @@ -1234,7 +1265,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, sta = sta_info_get(local, ifsta->bssid); if (!sta) { struct ieee80211_sta_bss *bss; - sta = sta_info_add(local, dev, ifsta->bssid, GFP_ATOMIC); + sta = sta_info_add(local, dev, ifsta->bssid, GFP_KERNEL); if (!sta) { printk(KERN_DEBUG "%s: failed to add STA entry for the" " AP\n", dev->name); @@ -1250,23 +1281,18 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, } sta->dev = dev; - sta->flags |= WLAN_STA_AUTH | WLAN_STA_ASSOC; - sta->assoc_ap = 1; + sta->flags |= WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP; rates = 0; mode = local->oper_hw_mode; for (i = 0; i < elems.supp_rates_len; i++) { int rate = (elems.supp_rates[i] & 0x7f) * 5; - if (mode->mode == MODE_ATHEROS_TURBO) - rate *= 2; for (j = 0; j < mode->num_rates; j++) if (mode->rates[j].rate == rate) rates |= BIT(j); } for (i = 0; i < elems.ext_supp_rates_len; i++) { int rate = (elems.ext_supp_rates[i] & 0x7f) * 5; - if (mode->mode == MODE_ATHEROS_TURBO) - rate *= 2; for (j = 0; j < mode->num_rates; j++) if (mode->rates[j].rate == rate) rates |= BIT(j); @@ -1275,7 +1301,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, rate_control_rate_init(sta, local); - if (elems.wmm_param && ifsta->wmm_enabled) { + if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { sta->flags |= WLAN_STA_WME; ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, elems.wmm_param_len); @@ -1418,14 +1444,16 @@ static void ieee80211_rx_bss_info(struct net_device *dev, struct sta_info *sta; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); u64 timestamp; + DECLARE_MAC_BUF(mac); + DECLARE_MAC_BUF(mac2); if (!beacon && memcmp(mgmt->da, dev->dev_addr, ETH_ALEN)) return; /* ignore ProbeResp to foreign address */ #if 0 - printk(KERN_DEBUG "%s: RX %s from " MAC_FMT " to " MAC_FMT "\n", + printk(KERN_DEBUG "%s: RX %s from %s to %s\n", dev->name, beacon ? "Beacon" : "Probe Response", - MAC_ARG(mgmt->sa), MAC_ARG(mgmt->da)); + print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da)); #endif baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; @@ -1444,10 +1472,10 @@ static void ieee80211_rx_bss_info(struct net_device *dev, else tsf = -1LLU; if (time_after(jiffies, last_tsf_debug + 5 * HZ)) { - printk(KERN_DEBUG "RX beacon SA=" MAC_FMT " BSSID=" - MAC_FMT " TSF=0x%llx BCN=0x%llx diff=%lld " + printk(KERN_DEBUG "RX beacon SA=%s BSSID=" + "%s TSF=0x%llx BCN=0x%llx diff=%lld " "@%lu\n", - MAC_ARG(mgmt->sa), MAC_ARG(mgmt->bssid), + print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->bssid), (unsigned long long)tsf, (unsigned long long)timestamp, (unsigned long long)(tsf - timestamp), @@ -1486,8 +1514,6 @@ static void ieee80211_rx_bss_info(struct net_device *dev, rate = elems.ext_supp_rates [i - elems.supp_rates_len]; own_rate = 5 * (rate & 0x7f); - if (mode->mode == MODE_ATHEROS_TURBO) - own_rate *= 2; for (j = 0; j < num_rates; j++) if (rates[j].rate == own_rate) supp_rates |= BIT(j); @@ -1503,9 +1529,9 @@ static void ieee80211_rx_bss_info(struct net_device *dev, } if (sta->supp_rates != prev_rates) { printk(KERN_DEBUG "%s: updated supp_rates set for " - MAC_FMT " based on beacon info (0x%x & 0x%x -> " + "%s based on beacon info (0x%x & 0x%x -> " "0x%x)\n", - dev->name, MAC_ARG(sta->addr), prev_rates, + dev->name, print_mac(mac, sta->addr), prev_rates, supp_rates, sta->supp_rates); } sta_info_put(sta); @@ -1672,7 +1698,7 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, return; ifsta = &sdata->u.sta; - if (!ifsta->associated || + if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED) || memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) return; @@ -1688,7 +1714,7 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, if (elems.erp_info && elems.erp_info_len >= 1) ieee80211_handle_erp_ie(dev, elems.erp_info[0]); - if (elems.wmm_param && ifsta->wmm_enabled) { + if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, elems.wmm_param_len); } @@ -1707,6 +1733,11 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, struct sk_buff *skb; struct ieee80211_mgmt *resp; u8 *pos, *end; + DECLARE_MAC_BUF(mac); +#ifdef CONFIG_MAC80211_IBSS_DEBUG + DECLARE_MAC_BUF(mac2); + DECLARE_MAC_BUF(mac3); +#endif if (sdata->type != IEEE80211_IF_TYPE_IBSS || ifsta->state != IEEE80211_IBSS_JOINED || @@ -1719,10 +1750,10 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, tx_last_beacon = 1; #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: RX ProbeReq SA=" MAC_FMT " DA=" MAC_FMT " BSSID=" - MAC_FMT " (tx_last_beacon=%d)\n", - dev->name, MAC_ARG(mgmt->sa), MAC_ARG(mgmt->da), - MAC_ARG(mgmt->bssid), tx_last_beacon); + printk(KERN_DEBUG "%s: RX ProbeReq SA=%s DA=%s BSSID=" + "%s (tx_last_beacon=%d)\n", + dev->name, print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da), + print_mac(mac3, mgmt->bssid), tx_last_beacon); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ if (!tx_last_beacon) @@ -1738,8 +1769,8 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, pos + 2 + pos[1] > end) { if (net_ratelimit()) { printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " - "from " MAC_FMT "\n", - dev->name, MAC_ARG(mgmt->sa)); + "from %s\n", + dev->name, print_mac(mac, mgmt->sa)); } return; } @@ -1751,15 +1782,15 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, } /* Reply with ProbeResp */ - skb = skb_copy(ifsta->probe_resp, GFP_ATOMIC); + skb = skb_copy(ifsta->probe_resp, GFP_KERNEL); if (!skb) return; resp = (struct ieee80211_mgmt *) skb->data; memcpy(resp->da, mgmt->sa, ETH_ALEN); #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: Sending ProbeResp to " MAC_FMT "\n", - dev->name, MAC_ARG(resp->da)); + printk(KERN_DEBUG "%s: Sending ProbeResp to %s\n", + dev->name, print_mac(mac, resp->da)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ ieee80211_sta_tx(dev, skb, 0); } @@ -1890,7 +1921,7 @@ static int ieee80211_sta_active_ibss(struct net_device *dev) int active = 0; struct sta_info *sta; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); list_for_each_entry(sta, &local->sta_list, list) { if (sta->dev == dev && time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL, @@ -1899,7 +1930,7 @@ static int ieee80211_sta_active_ibss(struct net_device *dev) break; } } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); return active; } @@ -1909,16 +1940,25 @@ static void ieee80211_sta_expire(struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sta_info *sta, *tmp; + LIST_HEAD(tmp_list); + DECLARE_MAC_BUF(mac); - spin_lock_bh(&local->sta_lock); + write_lock_bh(&local->sta_lock); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) if (time_after(jiffies, sta->last_rx + IEEE80211_IBSS_INACTIVITY_LIMIT)) { - printk(KERN_DEBUG "%s: expiring inactive STA " MAC_FMT - "\n", dev->name, MAC_ARG(sta->addr)); - sta_info_free(sta, 1); + printk(KERN_DEBUG "%s: expiring inactive STA %s\n", + dev->name, print_mac(mac, sta->addr)); + __sta_info_get(sta); + sta_info_remove(sta); + list_add(&sta->list, &tmp_list); } - spin_unlock_bh(&local->sta_lock); + write_unlock_bh(&local->sta_lock); + + list_for_each_entry_safe(sta, tmp, &tmp_list, list) { + sta_info_free(sta); + sta_info_put(sta); + } } @@ -2047,7 +2087,8 @@ static void ieee80211_sta_reset_auth(struct net_device *dev, printk(KERN_DEBUG "%s: Initial auth_alg=%d\n", dev->name, ifsta->auth_alg); ifsta->auth_transaction = -1; - ifsta->associated = ifsta->auth_tries = ifsta->assoc_tries = 0; + ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; + ifsta->auth_tries = ifsta->assoc_tries = 0; netif_carrier_off(dev); } @@ -2061,8 +2102,10 @@ void ieee80211_sta_req_auth(struct net_device *dev, if (sdata->type != IEEE80211_IF_TYPE_STA) return; - if ((ifsta->bssid_set || ifsta->auto_bssid_sel) && - (ifsta->ssid_set || ifsta->auto_ssid_sel)) { + if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | + IEEE80211_STA_AUTO_BSSID_SEL)) && + (ifsta->flags & (IEEE80211_STA_SSID_SET | + IEEE80211_STA_AUTO_SSID_SEL))) { set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); queue_work(local->hw.workqueue, &ifsta->work); } @@ -2076,7 +2119,7 @@ static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta, if (!memcmp(ifsta->ssid, ssid, ssid_len)) return 1; - if (ifsta->auto_bssid_sel) + if (ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) return 0; hidden_ssid = 1; @@ -2105,8 +2148,8 @@ static int ieee80211_sta_config_auth(struct net_device *dev, struct ieee80211_sta_bss *bss, *selected = NULL; int top_rssi = 0, freq; - if (!ifsta->auto_channel_sel && !ifsta->auto_bssid_sel && - !ifsta->auto_ssid_sel) { + if (!(ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | + IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL))) { ifsta->state = IEEE80211_AUTHENTICATE; ieee80211_sta_reset_auth(dev, ifsta); return 0; @@ -2122,14 +2165,15 @@ static int ieee80211_sta_config_auth(struct net_device *dev, !!sdata->default_key) continue; - if (!ifsta->auto_channel_sel && bss->freq != freq) + if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && + bss->freq != freq) continue; - if (!ifsta->auto_bssid_sel && + if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) && memcmp(bss->bssid, ifsta->bssid, ETH_ALEN)) continue; - if (!ifsta->auto_ssid_sel && + if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) && !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len)) continue; @@ -2144,7 +2188,7 @@ static int ieee80211_sta_config_auth(struct net_device *dev, if (selected) { ieee80211_set_channel(local, -1, selected->freq); - if (!ifsta->ssid_set) + if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) ieee80211_sta_set_ssid(dev, selected->ssid, selected->ssid_len); ieee80211_sta_set_bssid(dev, selected->bssid); @@ -2154,7 +2198,7 @@ static int ieee80211_sta_config_auth(struct net_device *dev, return 0; } else { if (ifsta->state != IEEE80211_AUTHENTICATE) { - if (ifsta->auto_ssid_sel) + if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) ieee80211_sta_start_scan(dev, NULL, 0); else ieee80211_sta_start_scan(dev, ifsta->ssid, @@ -2271,8 +2315,9 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, "for IBSS beacon\n", dev->name); break; } - control.tx_rate = (local->short_preamble && - (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? + control.tx_rate = + ((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && + (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? rate->val2 : rate->val; control.antenna_sel_tx = local->hw.conf.antenna_sel_tx; control.power_level = local->hw.conf.power_level; @@ -2303,8 +2348,6 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, mode = local->oper_hw_mode; for (i = 0; i < bss->supp_rates_len; i++) { int bitrate = (bss->supp_rates[i] & 0x7f) * 5; - if (mode->mode == MODE_ATHEROS_TURBO) - bitrate *= 2; for (j = 0; j < mode->num_rates; j++) if (mode->rates[j].rate == bitrate) rates |= BIT(j); @@ -2336,6 +2379,7 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, struct ieee80211_hw_mode *mode; u8 bssid[ETH_ALEN], *pos; int i; + DECLARE_MAC_BUF(mac); #if 0 /* Easier testing, use fixed BSSID. */ @@ -2351,8 +2395,8 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, bssid[0] |= 0x02; #endif - printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID " MAC_FMT "\n", - dev->name, MAC_ARG(bssid)); + printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %s\n", + dev->name, print_mac(mac, bssid)); bss = ieee80211_rx_bss_add(dev, bssid); if (!bss) @@ -2377,8 +2421,6 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, pos = bss->supp_rates; for (i = 0; i < mode->num_rates; i++) { int rate = mode->rates[i].rate; - if (mode->mode == MODE_ATHEROS_TURBO) - rate /= 2; *pos++ = (u8) (rate / 5); } @@ -2394,6 +2436,8 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, int found = 0; u8 bssid[ETH_ALEN]; int active_ibss; + DECLARE_MAC_BUF(mac); + DECLARE_MAC_BUF(mac2); if (ifsta->ssid_len == 0) return -EINVAL; @@ -2410,8 +2454,8 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, || !(bss->capability & WLAN_CAPABILITY_IBSS)) continue; #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG " bssid=" MAC_FMT " found\n", - MAC_ARG(bss->bssid)); + printk(KERN_DEBUG " bssid=%s found\n", + print_mac(mac, bss->bssid)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ memcpy(bssid, bss->bssid, ETH_ALEN); found = 1; @@ -2421,14 +2465,14 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, spin_unlock_bh(&local->sta_bss_lock); #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG " sta_find_ibss: selected " MAC_FMT " current " - MAC_FMT "\n", MAC_ARG(bssid), MAC_ARG(ifsta->bssid)); + printk(KERN_DEBUG " sta_find_ibss: selected %s current " + "%s\n", print_mac(mac, bssid), print_mac(mac2, ifsta->bssid)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 && (bss = ieee80211_rx_bss_get(dev, bssid))) { - printk(KERN_DEBUG "%s: Selected IBSS BSSID " MAC_FMT + printk(KERN_DEBUG "%s: Selected IBSS BSSID %s" " based on configured SSID\n", - dev->name, MAC_ARG(bssid)); + dev->name, print_mac(mac, bssid)); return ieee80211_sta_join_ibss(dev, ifsta, bss); } #ifdef CONFIG_MAC80211_IBSS_DEBUG @@ -2451,10 +2495,10 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, if (time_after(jiffies, ifsta->ibss_join_req + IEEE80211_IBSS_JOIN_TIMEOUT)) { - if (ifsta->create_ibss && + if ((ifsta->flags & IEEE80211_STA_CREATE_IBSS) && local->oper_channel->flag & IEEE80211_CHAN_W_IBSS) return ieee80211_sta_create_ibss(dev, ifsta); - if (ifsta->create_ibss) { + if (ifsta->flags & IEEE80211_STA_CREATE_IBSS) { printk(KERN_DEBUG "%s: IBSS not allowed on the" " configured channel %d (%d MHz)\n", dev->name, local->hw.conf.channel, @@ -2515,13 +2559,17 @@ int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len) ifsta = &sdata->u.sta; if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) - ifsta->prev_bssid_set = 0; + ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; memcpy(ifsta->ssid, ssid, len); memset(ifsta->ssid + len, 0, IEEE80211_MAX_SSID_LEN - len); ifsta->ssid_len = len; - ifsta->ssid_set = len ? 1 : 0; - if (sdata->type == IEEE80211_IF_TYPE_IBSS && !ifsta->bssid_set) { + if (len) + ifsta->flags |= IEEE80211_STA_SSID_SET; + else + ifsta->flags &= ~IEEE80211_STA_SSID_SET; + if (sdata->type == IEEE80211_IF_TYPE_IBSS && + !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { ifsta->ibss_join_req = jiffies; ifsta->state = IEEE80211_IBSS_SEARCH; return ieee80211_sta_find_ibss(dev, ifsta); @@ -2559,10 +2607,11 @@ int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid) } } - if (!is_valid_ether_addr(bssid)) - ifsta->bssid_set = 0; + if (is_valid_ether_addr(bssid)) + ifsta->flags |= IEEE80211_STA_BSSID_SET; else - ifsta->bssid_set = 1; + ifsta->flags &= ~IEEE80211_STA_BSSID_SET; + return 0; } @@ -2613,35 +2662,41 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) printk(KERN_DEBUG "%s: failed to restore operational" "channel after scan\n", dev->name); - if (!(local->hw.flags & IEEE80211_HW_NO_PROBE_FILTERING) && - ieee80211_if_config(dev)) - printk(KERN_DEBUG "%s: failed to restore operational" - "BSSID after scan\n", dev->name); + + netif_tx_lock_bh(local->mdev); + local->filter_flags &= ~FIF_BCN_PRBRESP_PROMISC; + local->ops->configure_filter(local_to_hw(local), + FIF_BCN_PRBRESP_PROMISC, + &local->filter_flags, + local->mdev->mc_count, + local->mdev->mc_list); + + netif_tx_unlock_bh(local->mdev); memset(&wrqu, 0, sizeof(wrqu)); wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL); - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { /* No need to wake the master device. */ if (sdata->dev == local->mdev) continue; if (sdata->type == IEEE80211_IF_TYPE_STA) { - if (sdata->u.sta.associated) + if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) ieee80211_send_nullfunc(local, sdata, 0); ieee80211_sta_timer((unsigned long)sdata); } netif_wake_queue(sdata->dev); } - read_unlock(&local->sub_if_lock); + rcu_read_unlock(); sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type == IEEE80211_IF_TYPE_IBSS) { struct ieee80211_if_sta *ifsta = &sdata->u.sta; - if (!ifsta->bssid_set || + if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) || (!ifsta->state == IEEE80211_IBSS_JOINED && !ieee80211_sta_active_ibss(dev))) ieee80211_sta_find_ibss(dev, ifsta); @@ -2773,8 +2828,8 @@ static int ieee80211_sta_start_scan(struct net_device *dev, local->sta_scanning = 1; - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { /* Don't stop the master interface, otherwise we can't transmit * probes! */ @@ -2783,10 +2838,10 @@ static int ieee80211_sta_start_scan(struct net_device *dev, netif_stop_queue(sdata->dev); if (sdata->type == IEEE80211_IF_TYPE_STA && - sdata->u.sta.associated) + (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) ieee80211_send_nullfunc(local, sdata, 1); } - read_unlock(&local->sub_if_lock); + rcu_read_unlock(); if (ssid) { local->scan_ssid_len = ssid_len; @@ -2800,10 +2855,14 @@ static int ieee80211_sta_start_scan(struct net_device *dev, local->scan_channel_idx = 0; local->scan_dev = dev; - if (!(local->hw.flags & IEEE80211_HW_NO_PROBE_FILTERING) && - ieee80211_if_config(dev)) - printk(KERN_DEBUG "%s: failed to set BSSID for scan\n", - dev->name); + netif_tx_lock_bh(local->mdev); + local->filter_flags |= FIF_BCN_PRBRESP_PROMISC; + local->ops->configure_filter(local_to_hw(local), + FIF_BCN_PRBRESP_PROMISC, + &local->filter_flags, + local->mdev->mc_count, + local->mdev->mc_list); + netif_tx_unlock_bh(local->mdev); /* TODO: start scan as soon as all nullfunc frames are ACKed */ queue_delayed_work(local->hw.workqueue, &local->scan_work, @@ -3041,19 +3100,20 @@ struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev, struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sta_info *sta; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + DECLARE_MAC_BUF(mac); /* TODO: Could consider removing the least recently used entry and * allow new one to be added. */ if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { if (net_ratelimit()) { printk(KERN_DEBUG "%s: No room for a new IBSS STA " - "entry " MAC_FMT "\n", dev->name, MAC_ARG(addr)); + "entry %s\n", dev->name, print_mac(mac, addr)); } return NULL; } - printk(KERN_DEBUG "%s: Adding new IBSS station " MAC_FMT " (dev=%s)\n", - local->mdev->name, MAC_ARG(addr), dev->name); + printk(KERN_DEBUG "%s: Adding new IBSS station %s (dev=%s)\n", + wiphy_name(local->hw.wiphy), print_mac(mac, addr), dev->name); sta = sta_info_add(local, dev, addr, GFP_ATOMIC); if (!sta) @@ -3096,7 +3156,7 @@ int ieee80211_sta_disassociate(struct net_device *dev, u16 reason) if (sdata->type != IEEE80211_IF_TYPE_STA) return -EINVAL; - if (!ifsta->associated) + if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED)) return -1; ieee80211_send_disassoc(dev, ifsta, reason); diff --git a/net/mac80211/key.c b/net/mac80211/key.c new file mode 100644 index 000000000000..0b2328f7d67c --- /dev/null +++ b/net/mac80211/key.c @@ -0,0 +1,279 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/if_ether.h> +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/rcupdate.h> +#include <net/mac80211.h> +#include "ieee80211_i.h" +#include "debugfs_key.h" +#include "aes_ccm.h" + + +/* + * Key handling basics + * + * Key handling in mac80211 is done based on per-interface (sub_if_data) + * keys and per-station keys. Since each station belongs to an interface, + * each station key also belongs to that interface. + * + * Hardware acceleration is done on a best-effort basis, for each key + * that is eligible the hardware is asked to enable that key but if + * it cannot do that they key is simply kept for software encryption. + * There is currently no way of knowing this except by looking into + * debugfs. + * + * All operations here are called under RTNL so no extra locking is + * required. + */ + +static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; +static const u8 zero_addr[ETH_ALEN]; + +static const u8 *get_mac_for_key(struct ieee80211_key *key) +{ + const u8 *addr = bcast_addr; + + /* + * If we're an AP we won't ever receive frames with a non-WEP + * group key so we tell the driver that by using the zero MAC + * address to indicate a transmit-only key. + */ + if (key->conf.alg != ALG_WEP && + (key->sdata->type == IEEE80211_IF_TYPE_AP || + key->sdata->type == IEEE80211_IF_TYPE_VLAN)) + addr = zero_addr; + + if (key->sta) + addr = key->sta->addr; + + return addr; +} + +static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) +{ + const u8 *addr; + int ret; + DECLARE_MAC_BUF(mac); + + if (!key->local->ops->set_key) + return; + + addr = get_mac_for_key(key); + + ret = key->local->ops->set_key(local_to_hw(key->local), SET_KEY, + key->sdata->dev->dev_addr, addr, + &key->conf); + + if (!ret) + key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; + + if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) + printk(KERN_ERR "mac80211-%s: failed to set key " + "(%d, %s) to hardware (%d)\n", + wiphy_name(key->local->hw.wiphy), + key->conf.keyidx, print_mac(mac, addr), ret); +} + +static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) +{ + const u8 *addr; + int ret; + DECLARE_MAC_BUF(mac); + + if (!key->local->ops->set_key) + return; + + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + return; + + addr = get_mac_for_key(key); + + ret = key->local->ops->set_key(local_to_hw(key->local), DISABLE_KEY, + key->sdata->dev->dev_addr, addr, + &key->conf); + + if (ret) + printk(KERN_ERR "mac80211-%s: failed to remove key " + "(%d, %s) from hardware (%d)\n", + wiphy_name(key->local->hw.wiphy), + key->conf.keyidx, print_mac(mac, addr), ret); + + key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; +} + +struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + enum ieee80211_key_alg alg, + int idx, + size_t key_len, + const u8 *key_data) +{ + struct ieee80211_key *key; + + BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS); + + key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); + if (!key) + return NULL; + + /* + * Default to software encryption; we'll later upload the + * key to the hardware if possible. + */ + key->conf.flags = 0; + key->flags = 0; + + key->conf.alg = alg; + key->conf.keyidx = idx; + key->conf.keylen = key_len; + memcpy(key->conf.key, key_data, key_len); + + key->local = sdata->local; + key->sdata = sdata; + key->sta = sta; + + if (alg == ALG_CCMP) { + /* + * Initialize AES key state here as an optimization so that + * it does not need to be initialized for every packet. + */ + key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data); + if (!key->u.ccmp.tfm) { + ieee80211_key_free(key); + return NULL; + } + } + + ieee80211_debugfs_key_add(key->local, key); + + /* remove key first */ + if (sta) + ieee80211_key_free(sta->key); + else + ieee80211_key_free(sdata->keys[idx]); + + if (sta) { + ieee80211_debugfs_key_sta_link(key, sta); + + /* + * some hardware cannot handle TKIP with QoS, so + * we indicate whether QoS could be in use. + */ + if (sta->flags & WLAN_STA_WME) + key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; + } else { + if (sdata->type == IEEE80211_IF_TYPE_STA) { + struct sta_info *ap; + + /* same here, the AP could be using QoS */ + ap = sta_info_get(key->local, key->sdata->u.sta.bssid); + if (ap) { + if (ap->flags & WLAN_STA_WME) + key->conf.flags |= + IEEE80211_KEY_FLAG_WMM_STA; + sta_info_put(ap); + } + } + } + + /* enable hwaccel if appropriate */ + if (netif_running(key->sdata->dev)) + ieee80211_key_enable_hw_accel(key); + + if (sta) + rcu_assign_pointer(sta->key, key); + else + rcu_assign_pointer(sdata->keys[idx], key); + + list_add(&key->list, &sdata->key_list); + + return key; +} + +void ieee80211_key_free(struct ieee80211_key *key) +{ + if (!key) + return; + + if (key->sta) { + rcu_assign_pointer(key->sta->key, NULL); + } else { + if (key->sdata->default_key == key) + ieee80211_set_default_key(key->sdata, -1); + if (key->conf.keyidx >= 0 && + key->conf.keyidx < NUM_DEFAULT_KEYS) + rcu_assign_pointer(key->sdata->keys[key->conf.keyidx], + NULL); + else + WARN_ON(1); + } + + /* wait for all key users to complete */ + synchronize_rcu(); + + /* remove from hwaccel if appropriate */ + ieee80211_key_disable_hw_accel(key); + + if (key->conf.alg == ALG_CCMP) + ieee80211_aes_key_free(key->u.ccmp.tfm); + ieee80211_debugfs_key_remove(key); + + list_del(&key->list); + + kfree(key); +} + +void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx) +{ + struct ieee80211_key *key = NULL; + + if (idx >= 0 && idx < NUM_DEFAULT_KEYS) + key = sdata->keys[idx]; + + if (sdata->default_key != key) { + ieee80211_debugfs_key_remove_default(sdata); + + rcu_assign_pointer(sdata->default_key, key); + + if (sdata->default_key) + ieee80211_debugfs_key_add_default(sdata); + } +} + +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_key *key, *tmp; + + list_for_each_entry_safe(key, tmp, &sdata->key_list, list) + ieee80211_key_free(key); +} + +void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_key *key; + + WARN_ON(!netif_running(sdata->dev)); + if (!netif_running(sdata->dev)) + return; + + list_for_each_entry(key, &sdata->key_list, list) + ieee80211_key_enable_hw_accel(key); +} + +void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_key *key; + + list_for_each_entry(key, &sdata->key_list, list) + ieee80211_key_disable_hw_accel(key); +} diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c index 17b9f46bbf2b..314b8de88862 100644 --- a/net/mac80211/rc80211_simple.c +++ b/net/mac80211/rc80211_simple.c @@ -147,14 +147,6 @@ static void rate_control_simple_tx_status(void *priv, struct net_device *dev, srctrl = sta->rate_ctrl_priv; srctrl->tx_num_xmit++; if (status->excessive_retries) { - sta->antenna_sel_tx = sta->antenna_sel_tx == 1 ? 2 : 1; - sta->antenna_sel_rx = sta->antenna_sel_rx == 1 ? 2 : 1; - if (local->sta_antenna_sel == STA_ANTENNA_SEL_SW_CTRL_DEBUG) { - printk(KERN_DEBUG "%s: " MAC_FMT " TX antenna --> %d " - "RX antenna --> %d (@%lu)\n", - dev->name, MAC_ARG(hdr->addr1), - sta->antenna_sel_tx, sta->antenna_sel_rx, jiffies); - } srctrl->tx_num_failures++; sta->tx_retry_failed++; sta->tx_num_consecutive_failures++; @@ -209,9 +201,10 @@ static void rate_control_simple_tx_status(void *priv, struct net_device *dev, srctrl->avg_rate_update = jiffies; if (srctrl->tx_avg_rate_num > 0) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: STA " MAC_FMT " Average rate: " + DECLARE_MAC_BUF(mac); + printk(KERN_DEBUG "%s: STA %s Average rate: " "%d (%d/%d)\n", - dev->name, MAC_ARG(sta->addr), + dev->name, print_mac(mac, sta->addr), srctrl->tx_avg_rate_sum / srctrl->tx_avg_rate_num, srctrl->tx_avg_rate_sum, diff --git a/net/mac80211/regdomain.c b/net/mac80211/regdomain.c index b697a2afbb4b..f42678fa62d1 100644 --- a/net/mac80211/regdomain.c +++ b/net/mac80211/regdomain.c @@ -82,12 +82,6 @@ static void ieee80211_unmask_channel(int mode, struct ieee80211_channel *chan) chan->flag = 0; - if (ieee80211_regdom == 64 && - (mode == MODE_ATHEROS_TURBO || mode == MODE_ATHEROS_TURBOG)) { - /* Do not allow Turbo modes in Japan. */ - return; - } - for (i = 0; channel_range[i].start_freq; i++) { const struct ieee80211_channel_range *r = &channel_range[i]; if (r->start_freq <= chan->freq && r->end_freq >= chan->freq) { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c new file mode 100644 index 000000000000..ece77766ea2b --- /dev/null +++ b/net/mac80211/rx.c @@ -0,0 +1,1588 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rcupdate.h> +#include <net/mac80211.h> +#include <net/ieee80211_radiotap.h> + +#include "ieee80211_i.h" +#include "ieee80211_led.h" +#include "wep.h" +#include "wpa.h" +#include "tkip.h" +#include "wme.h" + +/* + * monitor mode reception + * + * This function cleans up the SKB, i.e. it removes all the stuff + * only useful for monitoring. + */ +static struct sk_buff *remove_monitor_info(struct ieee80211_local *local, + struct sk_buff *skb, + int rtap_len) +{ + skb_pull(skb, rtap_len); + + if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) { + if (likely(skb->len > FCS_LEN)) + skb_trim(skb, skb->len - FCS_LEN); + else { + /* driver bug */ + WARN_ON(1); + dev_kfree_skb(skb); + skb = NULL; + } + } + + return skb; +} + +static inline int should_drop_frame(struct ieee80211_rx_status *status, + struct sk_buff *skb, + int present_fcs_len, + int radiotap_len) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + + if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) + return 1; + if (unlikely(skb->len < 16 + present_fcs_len + radiotap_len)) + return 1; + if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == + cpu_to_le16(IEEE80211_FTYPE_CTL)) + return 1; + return 0; +} + +/* + * This function copies a received frame to all monitor interfaces and + * returns a cleaned-up SKB that no longer includes the FCS nor the + * radiotap header the driver might have added. + */ +static struct sk_buff * +ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, + struct ieee80211_rx_status *status) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_rate *rate; + int needed_headroom = 0; + struct ieee80211_rtap_hdr { + struct ieee80211_radiotap_header hdr; + u8 flags; + u8 rate; + __le16 chan_freq; + __le16 chan_flags; + u8 antsignal; + u8 padding_for_rxflags; + __le16 rx_flags; + } __attribute__ ((packed)) *rthdr; + struct sk_buff *skb, *skb2; + struct net_device *prev_dev = NULL; + int present_fcs_len = 0; + int rtap_len = 0; + + /* + * First, we may need to make a copy of the skb because + * (1) we need to modify it for radiotap (if not present), and + * (2) the other RX handlers will modify the skb we got. + * + * We don't need to, of course, if we aren't going to return + * the SKB because it has a bad FCS/PLCP checksum. + */ + if (status->flag & RX_FLAG_RADIOTAP) + rtap_len = ieee80211_get_radiotap_len(origskb->data); + else + needed_headroom = sizeof(*rthdr); + + if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) + present_fcs_len = FCS_LEN; + + if (!local->monitors) { + if (should_drop_frame(status, origskb, present_fcs_len, + rtap_len)) { + dev_kfree_skb(origskb); + return NULL; + } + + return remove_monitor_info(local, origskb, rtap_len); + } + + if (should_drop_frame(status, origskb, present_fcs_len, rtap_len)) { + /* only need to expand headroom if necessary */ + skb = origskb; + origskb = NULL; + + /* + * This shouldn't trigger often because most devices have an + * RX header they pull before we get here, and that should + * be big enough for our radiotap information. We should + * probably export the length to drivers so that we can have + * them allocate enough headroom to start with. + */ + if (skb_headroom(skb) < needed_headroom && + pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) { + dev_kfree_skb(skb); + return NULL; + } + } else { + /* + * Need to make a copy and possibly remove radiotap header + * and FCS from the original. + */ + skb = skb_copy_expand(origskb, needed_headroom, 0, GFP_ATOMIC); + + origskb = remove_monitor_info(local, origskb, rtap_len); + + if (!skb) + return origskb; + } + + /* if necessary, prepend radiotap information */ + if (!(status->flag & RX_FLAG_RADIOTAP)) { + rthdr = (void *) skb_push(skb, sizeof(*rthdr)); + memset(rthdr, 0, sizeof(*rthdr)); + rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); + rthdr->hdr.it_present = + cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | + (1 << IEEE80211_RADIOTAP_RATE) | + (1 << IEEE80211_RADIOTAP_CHANNEL) | + (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL) | + (1 << IEEE80211_RADIOTAP_RX_FLAGS)); + rthdr->flags = local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS ? + IEEE80211_RADIOTAP_F_FCS : 0; + + /* FIXME: when radiotap gets a 'bad PLCP' flag use it here */ + rthdr->rx_flags = 0; + if (status->flag & + (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) + rthdr->rx_flags |= + cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS); + + rate = ieee80211_get_rate(local, status->phymode, + status->rate); + if (rate) + rthdr->rate = rate->rate / 5; + + rthdr->chan_freq = cpu_to_le16(status->freq); + + if (status->phymode == MODE_IEEE80211A) + rthdr->chan_flags = + cpu_to_le16(IEEE80211_CHAN_OFDM | + IEEE80211_CHAN_5GHZ); + else + rthdr->chan_flags = + cpu_to_le16(IEEE80211_CHAN_DYN | + IEEE80211_CHAN_2GHZ); + + rthdr->antsignal = status->ssi; + } + + skb_set_mac_header(skb, 0); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->pkt_type = PACKET_OTHERHOST; + skb->protocol = htons(ETH_P_802_2); + + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!netif_running(sdata->dev)) + continue; + + if (sdata->type != IEEE80211_IF_TYPE_MNTR) + continue; + + if (prev_dev) { + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) { + skb2->dev = prev_dev; + netif_rx(skb2); + } + } + + prev_dev = sdata->dev; + sdata->dev->stats.rx_packets++; + sdata->dev->stats.rx_bytes += skb->len; + } + + if (prev_dev) { + skb->dev = prev_dev; + netif_rx(skb); + } else + dev_kfree_skb(skb); + + return origskb; +} + + +/* pre-rx handlers + * + * these don't have dev/sdata fields in the rx data + * The sta value should also not be used because it may + * be NULL even though a STA (in IBSS mode) will be added. + */ + +static ieee80211_txrx_result +ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx) +{ + u8 *data = rx->skb->data; + int tid; + + /* does the frame have a qos control field? */ + if (WLAN_FC_IS_QOS_DATA(rx->fc)) { + u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN; + /* frame has qos control */ + tid = qc[0] & QOS_CONTROL_TID_MASK; + } else { + if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) { + /* Separate TID for management frames */ + tid = NUM_RX_DATA_QUEUES - 1; + } else { + /* no qos control present */ + tid = 0; /* 802.1d - Best Effort */ + } + } + + I802_DEBUG_INC(rx->local->wme_rx_queue[tid]); + /* only a debug counter, sta might not be assigned properly yet */ + if (rx->sta) + I802_DEBUG_INC(rx->sta->wme_rx_queue[tid]); + + rx->u.rx.queue = tid; + /* Set skb->priority to 1d tag if highest order bit of TID is not set. + * For now, set skb->priority to 0 for other cases. */ + rx->skb->priority = (tid > 7) ? 0 : tid; + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_local *local = rx->local; + struct sk_buff *skb = rx->skb; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u32 load = 0, hdrtime; + struct ieee80211_rate *rate; + struct ieee80211_hw_mode *mode = local->hw.conf.mode; + int i; + + /* Estimate total channel use caused by this frame */ + + if (unlikely(mode->num_rates < 0)) + return TXRX_CONTINUE; + + rate = &mode->rates[0]; + for (i = 0; i < mode->num_rates; i++) { + if (mode->rates[i].val == rx->u.rx.status->rate) { + rate = &mode->rates[i]; + break; + } + } + + /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, + * 1 usec = 1/8 * (1080 / 10) = 13.5 */ + + if (mode->mode == MODE_IEEE80211A || + (mode->mode == MODE_IEEE80211G && + rate->flags & IEEE80211_RATE_ERP)) + hdrtime = CHAN_UTIL_HDR_SHORT; + else + hdrtime = CHAN_UTIL_HDR_LONG; + + load = hdrtime; + if (!is_multicast_ether_addr(hdr->addr1)) + load += hdrtime; + + load += skb->len * rate->rate_inv; + + /* Divide channel_use by 8 to avoid wrapping around the counter */ + load >>= CHAN_UTIL_SHIFT; + local->channel_use_raw += load; + rx->u.rx.load = load; + + return TXRX_CONTINUE; +} + +ieee80211_rx_handler ieee80211_rx_pre_handlers[] = +{ + ieee80211_rx_h_parse_qos, + ieee80211_rx_h_load_stats, + NULL +}; + +/* rx handlers */ + +static ieee80211_txrx_result +ieee80211_rx_h_if_stats(struct ieee80211_txrx_data *rx) +{ + if (rx->sta) + rx->sta->channel_use_raw += rx->u.rx.load; + rx->sdata->channel_use_raw += rx->u.rx.load; + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_passive_scan(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_local *local = rx->local; + struct sk_buff *skb = rx->skb; + + if (unlikely(local->sta_scanning != 0)) { + ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status); + return TXRX_QUEUED; + } + + if (unlikely(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) { + /* scanning finished during invoking of handlers */ + I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); + return TXRX_DROP; + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_check(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_hdr *hdr; + hdr = (struct ieee80211_hdr *) rx->skb->data; + + /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ + if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { + if (unlikely(rx->fc & IEEE80211_FCTL_RETRY && + rx->sta->last_seq_ctrl[rx->u.rx.queue] == + hdr->seq_ctrl)) { + if (rx->flags & IEEE80211_TXRXD_RXRA_MATCH) { + rx->local->dot11FrameDuplicateCount++; + rx->sta->num_duplicates++; + } + return TXRX_DROP; + } else + rx->sta->last_seq_ctrl[rx->u.rx.queue] = hdr->seq_ctrl; + } + + if (unlikely(rx->skb->len < 16)) { + I802_DEBUG_INC(rx->local->rx_handlers_drop_short); + return TXRX_DROP; + } + + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + rx->skb->pkt_type = PACKET_OTHERHOST; + else if (compare_ether_addr(rx->dev->dev_addr, hdr->addr1) == 0) + rx->skb->pkt_type = PACKET_HOST; + else if (is_multicast_ether_addr(hdr->addr1)) { + if (is_broadcast_ether_addr(hdr->addr1)) + rx->skb->pkt_type = PACKET_BROADCAST; + else + rx->skb->pkt_type = PACKET_MULTICAST; + } else + rx->skb->pkt_type = PACKET_OTHERHOST; + + /* Drop disallowed frame classes based on STA auth/assoc state; + * IEEE 802.11, Chap 5.5. + * + * 80211.o does filtering only based on association state, i.e., it + * drops Class 3 frames from not associated stations. hostapd sends + * deauth/disassoc frames when needed. In addition, hostapd is + * responsible for filtering on both auth and assoc states. + */ + if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA || + ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL && + (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) && + rx->sdata->type != IEEE80211_IF_TYPE_IBSS && + (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) { + if ((!(rx->fc & IEEE80211_FCTL_FROMDS) && + !(rx->fc & IEEE80211_FCTL_TODS) && + (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) + || !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) { + /* Drop IBSS frames and frames for other hosts + * silently. */ + return TXRX_DROP; + } + + return TXRX_DROP; + } + + return TXRX_CONTINUE; +} + + +static ieee80211_txrx_result +ieee80211_rx_h_decrypt(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + int keyidx; + int hdrlen; + ieee80211_txrx_result result = TXRX_DROP; + struct ieee80211_key *stakey = NULL; + + /* + * Key selection 101 + * + * There are three types of keys: + * - GTK (group keys) + * - PTK (pairwise keys) + * - STK (station-to-station pairwise keys) + * + * When selecting a key, we have to distinguish between multicast + * (including broadcast) and unicast frames, the latter can only + * use PTKs and STKs while the former always use GTKs. Unless, of + * course, actual WEP keys ("pre-RSNA") are used, then unicast + * frames can also use key indizes like GTKs. Hence, if we don't + * have a PTK/STK we check the key index for a WEP key. + * + * Note that in a regular BSS, multicast frames are sent by the + * AP only, associated stations unicast the frame to the AP first + * which then multicasts it on their behalf. + * + * There is also a slight problem in IBSS mode: GTKs are negotiated + * with each station, that is something we don't currently handle. + * The spec seems to expect that one negotiates the same key with + * every station but there's no such requirement; VLANs could be + * possible. + */ + + if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) + return TXRX_CONTINUE; + + /* + * No point in finding a key and decrypting if the frame is neither + * addressed to us nor a multicast frame. + */ + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + return TXRX_CONTINUE; + + if (rx->sta) + stakey = rcu_dereference(rx->sta->key); + + if (!is_multicast_ether_addr(hdr->addr1) && stakey) { + rx->key = stakey; + } else { + /* + * The device doesn't give us the IV so we won't be + * able to look up the key. That's ok though, we + * don't need to decrypt the frame, we just won't + * be able to keep statistics accurate. + * Except for key threshold notifications, should + * we somehow allow the driver to tell us which key + * the hardware used if this flag is set? + */ + if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && + (rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED)) + return TXRX_CONTINUE; + + hdrlen = ieee80211_get_hdrlen(rx->fc); + + if (rx->skb->len < 8 + hdrlen) + return TXRX_DROP; /* TODO: count this? */ + + /* + * no need to call ieee80211_wep_get_keyidx, + * it verifies a bunch of things we've done already + */ + keyidx = rx->skb->data[hdrlen + 3] >> 6; + + rx->key = rcu_dereference(rx->sdata->keys[keyidx]); + + /* + * RSNA-protected unicast frames should always be sent with + * pairwise or station-to-station keys, but for WEP we allow + * using a key index as well. + */ + if (rx->key && rx->key->conf.alg != ALG_WEP && + !is_multicast_ether_addr(hdr->addr1)) + rx->key = NULL; + } + + if (rx->key) { + rx->key->tx_rx_count++; + /* TODO: add threshold stuff again */ + } else { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: RX protected frame," + " but have no key\n", rx->dev->name); + return TXRX_DROP; + } + + /* Check for weak IVs if possible */ + if (rx->sta && rx->key->conf.alg == ALG_WEP && + ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) && + (!(rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED) || + !(rx->u.rx.status->flag & RX_FLAG_DECRYPTED)) && + ieee80211_wep_is_weak_iv(rx->skb, rx->key)) + rx->sta->wep_weak_iv_count++; + + switch (rx->key->conf.alg) { + case ALG_WEP: + result = ieee80211_crypto_wep_decrypt(rx); + break; + case ALG_TKIP: + result = ieee80211_crypto_tkip_decrypt(rx); + break; + case ALG_CCMP: + result = ieee80211_crypto_ccmp_decrypt(rx); + break; + } + + /* either the frame has been decrypted or will be dropped */ + rx->u.rx.status->flag |= RX_FLAG_DECRYPTED; + + return result; +} + +static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata; + DECLARE_MAC_BUF(mac); + + sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); + + if (sdata->bss) + atomic_inc(&sdata->bss->num_sta_ps); + sta->flags |= WLAN_STA_PS; + sta->pspoll = 0; +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "%s: STA %s aid %d enters power save mode\n", + dev->name, print_mac(mac, sta->addr), sta->aid); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ +} + +static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct sk_buff *skb; + int sent = 0; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_tx_packet_data *pkt_data; + DECLARE_MAC_BUF(mac); + + sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); + if (sdata->bss) + atomic_dec(&sdata->bss->num_sta_ps); + sta->flags &= ~(WLAN_STA_PS | WLAN_STA_TIM); + sta->pspoll = 0; + if (!skb_queue_empty(&sta->ps_tx_buf)) { + if (local->ops->set_tim) + local->ops->set_tim(local_to_hw(local), sta->aid, 0); + if (sdata->bss) + bss_tim_clear(local, sdata->bss, sta->aid); + } +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "%s: STA %s aid %d exits power save mode\n", + dev->name, print_mac(mac, sta->addr), sta->aid); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + /* Send all buffered frames to the station */ + while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { + pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + sent++; + pkt_data->flags |= IEEE80211_TXPD_REQUEUE; + dev_queue_xmit(skb); + } + while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { + pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + local->total_ps_buffered--; + sent++; +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "%s: STA %s aid %d send PS frame " + "since STA not sleeping anymore\n", dev->name, + print_mac(mac, sta->addr), sta->aid); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + pkt_data->flags |= IEEE80211_TXPD_REQUEUE; + dev_queue_xmit(skb); + } + + return sent; +} + +static ieee80211_txrx_result +ieee80211_rx_h_sta_process(struct ieee80211_txrx_data *rx) +{ + struct sta_info *sta = rx->sta; + struct net_device *dev = rx->dev; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + + if (!sta) + return TXRX_CONTINUE; + + /* Update last_rx only for IBSS packets which are for the current + * BSSID to avoid keeping the current IBSS network alive in cases where + * other STAs are using different BSSID. */ + if (rx->sdata->type == IEEE80211_IF_TYPE_IBSS) { + u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len); + if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) + sta->last_rx = jiffies; + } else + if (!is_multicast_ether_addr(hdr->addr1) || + rx->sdata->type == IEEE80211_IF_TYPE_STA) { + /* Update last_rx only for unicast frames in order to prevent + * the Probe Request frames (the only broadcast frames from a + * STA in infrastructure mode) from keeping a connection alive. + */ + sta->last_rx = jiffies; + } + + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + return TXRX_CONTINUE; + + sta->rx_fragments++; + sta->rx_bytes += rx->skb->len; + sta->last_rssi = rx->u.rx.status->ssi; + sta->last_signal = rx->u.rx.status->signal; + sta->last_noise = rx->u.rx.status->noise; + + if (!(rx->fc & IEEE80211_FCTL_MOREFRAGS)) { + /* Change STA power saving mode only in the end of a frame + * exchange sequence */ + if ((sta->flags & WLAN_STA_PS) && !(rx->fc & IEEE80211_FCTL_PM)) + rx->u.rx.sent_ps_buffered += ap_sta_ps_end(dev, sta); + else if (!(sta->flags & WLAN_STA_PS) && + (rx->fc & IEEE80211_FCTL_PM)) + ap_sta_ps_start(dev, sta); + } + + /* Drop data::nullfunc frames silently, since they are used only to + * control station power saving mode. */ + if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_NULLFUNC) { + I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); + /* Update counter and free packet here to avoid counting this + * as a dropped packed. */ + sta->rx_packets++; + dev_kfree_skb(rx->skb); + return TXRX_QUEUED; + } + + return TXRX_CONTINUE; +} /* ieee80211_rx_h_sta_process */ + +static inline struct ieee80211_fragment_entry * +ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, + unsigned int frag, unsigned int seq, int rx_queue, + struct sk_buff **skb) +{ + struct ieee80211_fragment_entry *entry; + int idx; + + idx = sdata->fragment_next; + entry = &sdata->fragments[sdata->fragment_next++]; + if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX) + sdata->fragment_next = 0; + + if (!skb_queue_empty(&entry->skb_list)) { +#ifdef CONFIG_MAC80211_DEBUG + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) entry->skb_list.next->data; + DECLARE_MAC_BUF(mac); + DECLARE_MAC_BUF(mac2); + printk(KERN_DEBUG "%s: RX reassembly removed oldest " + "fragment entry (idx=%d age=%lu seq=%d last_frag=%d " + "addr1=%s addr2=%s\n", + sdata->dev->name, idx, + jiffies - entry->first_frag_time, entry->seq, + entry->last_frag, print_mac(mac, hdr->addr1), + print_mac(mac2, hdr->addr2)); +#endif /* CONFIG_MAC80211_DEBUG */ + __skb_queue_purge(&entry->skb_list); + } + + __skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */ + *skb = NULL; + entry->first_frag_time = jiffies; + entry->seq = seq; + entry->rx_queue = rx_queue; + entry->last_frag = frag; + entry->ccmp = 0; + entry->extra_len = 0; + + return entry; +} + +static inline struct ieee80211_fragment_entry * +ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, + u16 fc, unsigned int frag, unsigned int seq, + int rx_queue, struct ieee80211_hdr *hdr) +{ + struct ieee80211_fragment_entry *entry; + int i, idx; + + idx = sdata->fragment_next; + for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { + struct ieee80211_hdr *f_hdr; + u16 f_fc; + + idx--; + if (idx < 0) + idx = IEEE80211_FRAGMENT_MAX - 1; + + entry = &sdata->fragments[idx]; + if (skb_queue_empty(&entry->skb_list) || entry->seq != seq || + entry->rx_queue != rx_queue || + entry->last_frag + 1 != frag) + continue; + + f_hdr = (struct ieee80211_hdr *) entry->skb_list.next->data; + f_fc = le16_to_cpu(f_hdr->frame_control); + + if ((fc & IEEE80211_FCTL_FTYPE) != (f_fc & IEEE80211_FCTL_FTYPE) || + compare_ether_addr(hdr->addr1, f_hdr->addr1) != 0 || + compare_ether_addr(hdr->addr2, f_hdr->addr2) != 0) + continue; + + if (entry->first_frag_time + 2 * HZ < jiffies) { + __skb_queue_purge(&entry->skb_list); + continue; + } + return entry; + } + + return NULL; +} + +static ieee80211_txrx_result +ieee80211_rx_h_defragment(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_hdr *hdr; + u16 sc; + unsigned int frag, seq; + struct ieee80211_fragment_entry *entry; + struct sk_buff *skb; + DECLARE_MAC_BUF(mac); + + hdr = (struct ieee80211_hdr *) rx->skb->data; + sc = le16_to_cpu(hdr->seq_ctrl); + frag = sc & IEEE80211_SCTL_FRAG; + + if (likely((!(rx->fc & IEEE80211_FCTL_MOREFRAGS) && frag == 0) || + (rx->skb)->len < 24 || + is_multicast_ether_addr(hdr->addr1))) { + /* not fragmented */ + goto out; + } + I802_DEBUG_INC(rx->local->rx_handlers_fragments); + + seq = (sc & IEEE80211_SCTL_SEQ) >> 4; + + if (frag == 0) { + /* This is the first fragment of a new frame. */ + entry = ieee80211_reassemble_add(rx->sdata, frag, seq, + rx->u.rx.queue, &(rx->skb)); + if (rx->key && rx->key->conf.alg == ALG_CCMP && + (rx->fc & IEEE80211_FCTL_PROTECTED)) { + /* Store CCMP PN so that we can verify that the next + * fragment has a sequential PN value. */ + entry->ccmp = 1; + memcpy(entry->last_pn, + rx->key->u.ccmp.rx_pn[rx->u.rx.queue], + CCMP_PN_LEN); + } + return TXRX_QUEUED; + } + + /* This is a fragment for a frame that should already be pending in + * fragment cache. Add this fragment to the end of the pending entry. + */ + entry = ieee80211_reassemble_find(rx->sdata, rx->fc, frag, seq, + rx->u.rx.queue, hdr); + if (!entry) { + I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); + return TXRX_DROP; + } + + /* Verify that MPDUs within one MSDU have sequential PN values. + * (IEEE 802.11i, 8.3.3.4.5) */ + if (entry->ccmp) { + int i; + u8 pn[CCMP_PN_LEN], *rpn; + if (!rx->key || rx->key->conf.alg != ALG_CCMP) + return TXRX_DROP; + memcpy(pn, entry->last_pn, CCMP_PN_LEN); + for (i = CCMP_PN_LEN - 1; i >= 0; i--) { + pn[i]++; + if (pn[i]) + break; + } + rpn = rx->key->u.ccmp.rx_pn[rx->u.rx.queue]; + if (memcmp(pn, rpn, CCMP_PN_LEN) != 0) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: defrag: CCMP PN not " + "sequential A2=%s" + " PN=%02x%02x%02x%02x%02x%02x " + "(expected %02x%02x%02x%02x%02x%02x)\n", + rx->dev->name, print_mac(mac, hdr->addr2), + rpn[0], rpn[1], rpn[2], rpn[3], rpn[4], + rpn[5], pn[0], pn[1], pn[2], pn[3], + pn[4], pn[5]); + return TXRX_DROP; + } + memcpy(entry->last_pn, pn, CCMP_PN_LEN); + } + + skb_pull(rx->skb, ieee80211_get_hdrlen(rx->fc)); + __skb_queue_tail(&entry->skb_list, rx->skb); + entry->last_frag = frag; + entry->extra_len += rx->skb->len; + if (rx->fc & IEEE80211_FCTL_MOREFRAGS) { + rx->skb = NULL; + return TXRX_QUEUED; + } + + rx->skb = __skb_dequeue(&entry->skb_list); + if (skb_tailroom(rx->skb) < entry->extra_len) { + I802_DEBUG_INC(rx->local->rx_expand_skb_head2); + if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len, + GFP_ATOMIC))) { + I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); + __skb_queue_purge(&entry->skb_list); + return TXRX_DROP; + } + } + while ((skb = __skb_dequeue(&entry->skb_list))) { + memcpy(skb_put(rx->skb, skb->len), skb->data, skb->len); + dev_kfree_skb(skb); + } + + /* Complete frame has been reassembled - process it now */ + rx->flags |= IEEE80211_TXRXD_FRAGMENTED; + + out: + if (rx->sta) + rx->sta->rx_packets++; + if (is_multicast_ether_addr(hdr->addr1)) + rx->local->dot11MulticastReceivedFrameCount++; + else + ieee80211_led_rx(rx->local); + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx) +{ + struct sk_buff *skb; + int no_pending_pkts; + DECLARE_MAC_BUF(mac); + + if (likely(!rx->sta || + (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL || + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PSPOLL || + !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH))) + return TXRX_CONTINUE; + + skb = skb_dequeue(&rx->sta->tx_filtered); + if (!skb) { + skb = skb_dequeue(&rx->sta->ps_tx_buf); + if (skb) + rx->local->total_ps_buffered--; + } + no_pending_pkts = skb_queue_empty(&rx->sta->tx_filtered) && + skb_queue_empty(&rx->sta->ps_tx_buf); + + if (skb) { + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) skb->data; + + /* tell TX path to send one frame even though the STA may + * still remain is PS mode after this frame exchange */ + rx->sta->pspoll = 1; + +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "STA %s aid %d: PS Poll (entries after %d)\n", + print_mac(mac, rx->sta->addr), rx->sta->aid, + skb_queue_len(&rx->sta->ps_tx_buf)); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + + /* Use MoreData flag to indicate whether there are more + * buffered frames for this STA */ + if (no_pending_pkts) { + hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); + rx->sta->flags &= ~WLAN_STA_TIM; + } else + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); + + dev_queue_xmit(skb); + + if (no_pending_pkts) { + if (rx->local->ops->set_tim) + rx->local->ops->set_tim(local_to_hw(rx->local), + rx->sta->aid, 0); + if (rx->sdata->bss) + bss_tim_clear(rx->local, rx->sdata->bss, rx->sta->aid); + } +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + } else if (!rx->u.rx.sent_ps_buffered) { + printk(KERN_DEBUG "%s: STA %s sent PS Poll even " + "though there is no buffered frames for it\n", + rx->dev->name, print_mac(mac, rx->sta->addr)); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + + } + + /* Free PS Poll skb here instead of returning TXRX_DROP that would + * count as an dropped frame. */ + dev_kfree_skb(rx->skb); + + return TXRX_QUEUED; +} + +static ieee80211_txrx_result +ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx) +{ + u16 fc = rx->fc; + u8 *data = rx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) data; + + if (!WLAN_FC_IS_QOS_DATA(fc)) + return TXRX_CONTINUE; + + /* remove the qos control field, update frame type and meta-data */ + memmove(data + 2, data, ieee80211_get_hdrlen(fc) - 2); + hdr = (struct ieee80211_hdr *) skb_pull(rx->skb, 2); + /* change frame type to non QOS */ + rx->fc = fc &= ~IEEE80211_STYPE_QOS_DATA; + hdr->frame_control = cpu_to_le16(fc); + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_802_1x_pae(struct ieee80211_txrx_data *rx) +{ + if (rx->sdata->eapol && ieee80211_is_eapol(rx->skb) && + rx->sdata->type != IEEE80211_IF_TYPE_STA && + (rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + return TXRX_CONTINUE; + + if (unlikely(rx->sdata->ieee802_1x && + (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && + (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)) && + !ieee80211_is_eapol(rx->skb))) { +#ifdef CONFIG_MAC80211_DEBUG + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) rx->skb->data; + DECLARE_MAC_BUF(mac); + printk(KERN_DEBUG "%s: dropped frame from %s" + " (unauthorized port)\n", rx->dev->name, + print_mac(mac, hdr->addr2)); +#endif /* CONFIG_MAC80211_DEBUG */ + return TXRX_DROP; + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_drop_unencrypted(struct ieee80211_txrx_data *rx) +{ + /* + * Pass through unencrypted frames if the hardware has + * decrypted them already. + */ + if (rx->u.rx.status->flag & RX_FLAG_DECRYPTED) + return TXRX_CONTINUE; + + /* Drop unencrypted frames if key is set. */ + if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) && + (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && + rx->sdata->drop_unencrypted && + (rx->sdata->eapol == 0 || !ieee80211_is_eapol(rx->skb)))) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: RX non-WEP frame, but expected " + "encryption\n", rx->dev->name); + return TXRX_DROP; + } + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_data(struct ieee80211_txrx_data *rx) +{ + struct net_device *dev = rx->dev; + struct ieee80211_local *local = rx->local; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + u16 fc, hdrlen, ethertype; + u8 *payload; + u8 dst[ETH_ALEN]; + u8 src[ETH_ALEN]; + struct sk_buff *skb = rx->skb, *skb2; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + DECLARE_MAC_BUF(mac); + DECLARE_MAC_BUF(mac2); + DECLARE_MAC_BUF(mac3); + DECLARE_MAC_BUF(mac4); + + fc = rx->fc; + if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) + return TXRX_CONTINUE; + + if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) + return TXRX_DROP; + + hdrlen = ieee80211_get_hdrlen(fc); + + /* convert IEEE 802.11 header + possible LLC headers into Ethernet + * header + * IEEE 802.11 address fields: + * ToDS FromDS Addr1 Addr2 Addr3 Addr4 + * 0 0 DA SA BSSID n/a + * 0 1 DA BSSID SA n/a + * 1 0 BSSID SA DA n/a + * 1 1 RA TA DA SA + */ + + switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + case IEEE80211_FCTL_TODS: + /* BSSID SA DA */ + memcpy(dst, hdr->addr3, ETH_ALEN); + memcpy(src, hdr->addr2, ETH_ALEN); + + if (unlikely(sdata->type != IEEE80211_IF_TYPE_AP && + sdata->type != IEEE80211_IF_TYPE_VLAN)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: dropped ToDS frame " + "(BSSID=%s SA=%s DA=%s)\n", + dev->name, + print_mac(mac, hdr->addr1), + print_mac(mac2, hdr->addr2), + print_mac(mac3, hdr->addr3)); + return TXRX_DROP; + } + break; + case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): + /* RA TA DA SA */ + memcpy(dst, hdr->addr3, ETH_ALEN); + memcpy(src, hdr->addr4, ETH_ALEN); + + if (unlikely(sdata->type != IEEE80211_IF_TYPE_WDS)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: dropped FromDS&ToDS " + "frame (RA=%s TA=%s DA=%s SA=%s)\n", + rx->dev->name, + print_mac(mac, hdr->addr1), + print_mac(mac2, hdr->addr2), + print_mac(mac3, hdr->addr3), + print_mac(mac4, hdr->addr4)); + return TXRX_DROP; + } + break; + case IEEE80211_FCTL_FROMDS: + /* DA BSSID SA */ + memcpy(dst, hdr->addr1, ETH_ALEN); + memcpy(src, hdr->addr3, ETH_ALEN); + + if (sdata->type != IEEE80211_IF_TYPE_STA || + (is_multicast_ether_addr(dst) && + !compare_ether_addr(src, dev->dev_addr))) + return TXRX_DROP; + break; + case 0: + /* DA SA BSSID */ + memcpy(dst, hdr->addr1, ETH_ALEN); + memcpy(src, hdr->addr2, ETH_ALEN); + + if (sdata->type != IEEE80211_IF_TYPE_IBSS) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: dropped IBSS frame " + "(DA=%s SA=%s BSSID=%s)\n", + dev->name, + print_mac(mac, hdr->addr1), + print_mac(mac2, hdr->addr2), + print_mac(mac3, hdr->addr3)); + } + return TXRX_DROP; + } + break; + } + + payload = skb->data + hdrlen; + + if (unlikely(skb->len - hdrlen < 8)) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: RX too short data frame " + "payload\n", dev->name); + } + return TXRX_DROP; + } + + ethertype = (payload[6] << 8) | payload[7]; + + if (likely((compare_ether_addr(payload, rfc1042_header) == 0 && + ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || + compare_ether_addr(payload, bridge_tunnel_header) == 0)) { + /* remove RFC1042 or Bridge-Tunnel encapsulation and + * replace EtherType */ + skb_pull(skb, hdrlen + 6); + memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); + memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); + } else { + struct ethhdr *ehdr; + __be16 len; + skb_pull(skb, hdrlen); + len = htons(skb->len); + ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); + memcpy(ehdr->h_dest, dst, ETH_ALEN); + memcpy(ehdr->h_source, src, ETH_ALEN); + ehdr->h_proto = len; + } + skb->dev = dev; + + skb2 = NULL; + + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; + + if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP + || sdata->type == IEEE80211_IF_TYPE_VLAN) && + (rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) { + if (is_multicast_ether_addr(skb->data)) { + /* send multicast frames both to higher layers in + * local net stack and back to the wireless media */ + skb2 = skb_copy(skb, GFP_ATOMIC); + if (!skb2 && net_ratelimit()) + printk(KERN_DEBUG "%s: failed to clone " + "multicast frame\n", dev->name); + } else { + struct sta_info *dsta; + dsta = sta_info_get(local, skb->data); + if (dsta && !dsta->dev) { + if (net_ratelimit()) + printk(KERN_DEBUG "Station with null " + "dev structure!\n"); + } else if (dsta && dsta->dev == dev) { + /* Destination station is associated to this + * AP, so send the frame directly to it and + * do not pass the frame to local net stack. + */ + skb2 = skb; + skb = NULL; + } + if (dsta) + sta_info_put(dsta); + } + } + + if (skb) { + /* deliver to local stack */ + skb->protocol = eth_type_trans(skb, dev); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_rx(skb); + } + + if (skb2) { + /* send to wireless media */ + skb2->protocol = __constant_htons(ETH_P_802_3); + skb_set_network_header(skb2, 0); + skb_set_mac_header(skb2, 0); + dev_queue_xmit(skb2); + } + + return TXRX_QUEUED; +} + +static ieee80211_txrx_result +ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_sub_if_data *sdata; + + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + return TXRX_DROP; + + sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); + if ((sdata->type == IEEE80211_IF_TYPE_STA || + sdata->type == IEEE80211_IF_TYPE_IBSS) && + !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) + ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->u.rx.status); + else + return TXRX_DROP; + + return TXRX_QUEUED; +} + +static inline ieee80211_txrx_result __ieee80211_invoke_rx_handlers( + struct ieee80211_local *local, + ieee80211_rx_handler *handlers, + struct ieee80211_txrx_data *rx, + struct sta_info *sta) +{ + ieee80211_rx_handler *handler; + ieee80211_txrx_result res = TXRX_DROP; + + for (handler = handlers; *handler != NULL; handler++) { + res = (*handler)(rx); + + switch (res) { + case TXRX_CONTINUE: + continue; + case TXRX_DROP: + I802_DEBUG_INC(local->rx_handlers_drop); + if (sta) + sta->rx_dropped++; + break; + case TXRX_QUEUED: + I802_DEBUG_INC(local->rx_handlers_queued); + break; + } + break; + } + + if (res == TXRX_DROP) + dev_kfree_skb(rx->skb); + return res; +} + +static inline void ieee80211_invoke_rx_handlers(struct ieee80211_local *local, + ieee80211_rx_handler *handlers, + struct ieee80211_txrx_data *rx, + struct sta_info *sta) +{ + if (__ieee80211_invoke_rx_handlers(local, handlers, rx, sta) == + TXRX_CONTINUE) + dev_kfree_skb(rx->skb); +} + +static void ieee80211_rx_michael_mic_report(struct net_device *dev, + struct ieee80211_hdr *hdr, + struct sta_info *sta, + struct ieee80211_txrx_data *rx) +{ + int keyidx, hdrlen; + DECLARE_MAC_BUF(mac); + DECLARE_MAC_BUF(mac2); + + hdrlen = ieee80211_get_hdrlen_from_skb(rx->skb); + if (rx->skb->len >= hdrlen + 4) + keyidx = rx->skb->data[hdrlen + 3] >> 6; + else + keyidx = -1; + + if (net_ratelimit()) + printk(KERN_DEBUG "%s: TKIP hwaccel reported Michael MIC " + "failure from %s to %s keyidx=%d\n", + dev->name, print_mac(mac, hdr->addr2), + print_mac(mac2, hdr->addr1), keyidx); + + if (!sta) { + /* + * Some hardware seem to generate incorrect Michael MIC + * reports; ignore them to avoid triggering countermeasures. + */ + if (net_ratelimit()) + printk(KERN_DEBUG "%s: ignored spurious Michael MIC " + "error for unknown address %s\n", + dev->name, print_mac(mac, hdr->addr2)); + goto ignore; + } + + if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: ignored spurious Michael MIC " + "error for a frame with no PROTECTED flag (src " + "%s)\n", dev->name, print_mac(mac, hdr->addr2)); + goto ignore; + } + + if (rx->sdata->type == IEEE80211_IF_TYPE_AP && keyidx) { + /* + * APs with pairwise keys should never receive Michael MIC + * errors for non-zero keyidx because these are reserved for + * group keys and only the AP is sending real multicast + * frames in the BSS. + */ + if (net_ratelimit()) + printk(KERN_DEBUG "%s: ignored Michael MIC error for " + "a frame with non-zero keyidx (%d)" + " (src %s)\n", dev->name, keyidx, + print_mac(mac, hdr->addr2)); + goto ignore; + } + + if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && + ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: ignored spurious Michael MIC " + "error for a frame that cannot be encrypted " + "(fc=0x%04x) (src %s)\n", + dev->name, rx->fc, print_mac(mac, hdr->addr2)); + goto ignore; + } + + mac80211_ev_michael_mic_failure(rx->dev, keyidx, hdr); + ignore: + dev_kfree_skb(rx->skb); + rx->skb = NULL; +} + +ieee80211_rx_handler ieee80211_rx_handlers[] = +{ + ieee80211_rx_h_if_stats, + ieee80211_rx_h_passive_scan, + ieee80211_rx_h_check, + ieee80211_rx_h_decrypt, + ieee80211_rx_h_sta_process, + ieee80211_rx_h_defragment, + ieee80211_rx_h_ps_poll, + ieee80211_rx_h_michael_mic_verify, + /* this must be after decryption - so header is counted in MPDU mic + * must be before pae and data, so QOS_DATA format frames + * are not passed to user space by these functions + */ + ieee80211_rx_h_remove_qos_control, + ieee80211_rx_h_802_1x_pae, + ieee80211_rx_h_drop_unencrypted, + ieee80211_rx_h_data, + ieee80211_rx_h_mgmt, + NULL +}; + +/* main receive path */ + +static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, + u8 *bssid, struct ieee80211_txrx_data *rx, + struct ieee80211_hdr *hdr) +{ + int multicast = is_multicast_ether_addr(hdr->addr1); + + switch (sdata->type) { + case IEEE80211_IF_TYPE_STA: + if (!bssid) + return 0; + if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { + if (!(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } else if (!multicast && + compare_ether_addr(sdata->dev->dev_addr, + hdr->addr1) != 0) { + if (!(sdata->dev->flags & IFF_PROMISC)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } + break; + case IEEE80211_IF_TYPE_IBSS: + if (!bssid) + return 0; + if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { + if (!(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } else if (!multicast && + compare_ether_addr(sdata->dev->dev_addr, + hdr->addr1) != 0) { + if (!(sdata->dev->flags & IFF_PROMISC)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } else if (!rx->sta) + rx->sta = ieee80211_ibss_add_sta(sdata->dev, rx->skb, + bssid, hdr->addr2); + break; + case IEEE80211_IF_TYPE_VLAN: + case IEEE80211_IF_TYPE_AP: + if (!bssid) { + if (compare_ether_addr(sdata->dev->dev_addr, + hdr->addr1)) + return 0; + } else if (!ieee80211_bssid_match(bssid, + sdata->dev->dev_addr)) { + if (!(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } + if (sdata->dev == sdata->local->mdev && + !(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) + /* do not receive anything via + * master device when not scanning */ + return 0; + break; + case IEEE80211_IF_TYPE_WDS: + if (bssid || + (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + return 0; + if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2)) + return 0; + break; + case IEEE80211_IF_TYPE_MNTR: + /* take everything */ + break; + case IEEE80211_IF_TYPE_INVALID: + /* should never get here */ + WARN_ON(1); + break; + } + + return 1; +} + +/* + * This is the receive path handler. It is called by a low level driver when an + * 802.11 MPDU is received from the hardware. + */ +void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, + struct ieee80211_rx_status *status) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + struct sta_info *sta; + struct ieee80211_hdr *hdr; + struct ieee80211_txrx_data rx; + u16 type; + int prepres; + struct ieee80211_sub_if_data *prev = NULL; + struct sk_buff *skb_new; + u8 *bssid; + + /* + * key references and virtual interfaces are protected using RCU + * and this requires that we are in a read-side RCU section during + * receive processing + */ + rcu_read_lock(); + + /* + * Frames with failed FCS/PLCP checksum are not returned, + * all other frames are returned without radiotap header + * if it was previously present. + * Also, frames with less than 16 bytes are dropped. + */ + skb = ieee80211_rx_monitor(local, skb, status); + if (!skb) { + rcu_read_unlock(); + return; + } + + hdr = (struct ieee80211_hdr *) skb->data; + memset(&rx, 0, sizeof(rx)); + rx.skb = skb; + rx.local = local; + + rx.u.rx.status = status; + rx.fc = le16_to_cpu(hdr->frame_control); + type = rx.fc & IEEE80211_FCTL_FTYPE; + + if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT) + local->dot11ReceivedFragmentCount++; + + sta = rx.sta = sta_info_get(local, hdr->addr2); + if (sta) { + rx.dev = rx.sta->dev; + rx.sdata = IEEE80211_DEV_TO_SUB_IF(rx.dev); + } + + if ((status->flag & RX_FLAG_MMIC_ERROR)) { + ieee80211_rx_michael_mic_report(local->mdev, hdr, sta, &rx); + goto end; + } + + if (unlikely(local->sta_scanning)) + rx.flags |= IEEE80211_TXRXD_RXIN_SCAN; + + if (__ieee80211_invoke_rx_handlers(local, local->rx_pre_handlers, &rx, + sta) != TXRX_CONTINUE) + goto end; + skb = rx.skb; + + if (sta && !(sta->flags & (WLAN_STA_WDS | WLAN_STA_ASSOC_AP)) && + !atomic_read(&local->iff_promiscs) && + !is_multicast_ether_addr(hdr->addr1)) { + rx.flags |= IEEE80211_TXRXD_RXRA_MATCH; + ieee80211_invoke_rx_handlers(local, local->rx_handlers, &rx, + rx.sta); + sta_info_put(sta); + rcu_read_unlock(); + return; + } + + bssid = ieee80211_get_bssid(hdr, skb->len); + + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!netif_running(sdata->dev)) + continue; + + if (sdata->type == IEEE80211_IF_TYPE_MNTR) + continue; + + rx.flags |= IEEE80211_TXRXD_RXRA_MATCH; + prepres = prepare_for_handlers(sdata, bssid, &rx, hdr); + /* prepare_for_handlers can change sta */ + sta = rx.sta; + + if (!prepres) + continue; + + /* + * frame is destined for this interface, but if it's not + * also for the previous one we handle that after the + * loop to avoid copying the SKB once too much + */ + + if (!prev) { + prev = sdata; + continue; + } + + /* + * frame was destined for the previous interface + * so invoke RX handlers for it + */ + + skb_new = skb_copy(skb, GFP_ATOMIC); + if (!skb_new) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: failed to copy " + "multicast frame for %s", + wiphy_name(local->hw.wiphy), + prev->dev->name); + continue; + } + rx.skb = skb_new; + rx.dev = prev->dev; + rx.sdata = prev; + ieee80211_invoke_rx_handlers(local, local->rx_handlers, + &rx, sta); + prev = sdata; + } + if (prev) { + rx.skb = skb; + rx.dev = prev->dev; + rx.sdata = prev; + ieee80211_invoke_rx_handlers(local, local->rx_handlers, + &rx, sta); + } else + dev_kfree_skb(skb); + + end: + rcu_read_unlock(); + + if (sta) + sta_info_put(sta); +} +EXPORT_SYMBOL(__ieee80211_rx); + +/* This is a version of the rx handler that can be called from hard irq + * context. Post the skb on the queue and schedule the tasklet */ +void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb, + struct ieee80211_rx_status *status) +{ + struct ieee80211_local *local = hw_to_local(hw); + + BUILD_BUG_ON(sizeof(struct ieee80211_rx_status) > sizeof(skb->cb)); + + skb->dev = local->mdev; + /* copy status into skb->cb for use by tasklet */ + memcpy(skb->cb, status, sizeof(*status)); + skb->pkt_type = IEEE80211_RX_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_rx_irqsafe); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ab7b1f067c6e..e8491554a5dc 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -19,7 +19,6 @@ #include "ieee80211_i.h" #include "ieee80211_rate.h" #include "sta_info.h" -#include "debugfs_key.h" #include "debugfs_sta.h" /* Caller must hold local->sta_lock */ @@ -32,38 +31,34 @@ static void sta_info_hash_add(struct ieee80211_local *local, /* Caller must hold local->sta_lock */ -static void sta_info_hash_del(struct ieee80211_local *local, - struct sta_info *sta) +static int sta_info_hash_del(struct ieee80211_local *local, + struct sta_info *sta) { struct sta_info *s; s = local->sta_hash[STA_HASH(sta->addr)]; if (!s) - return; - if (memcmp(s->addr, sta->addr, ETH_ALEN) == 0) { + return -ENOENT; + if (s == sta) { local->sta_hash[STA_HASH(sta->addr)] = s->hnext; - return; + return 0; } - while (s->hnext && memcmp(s->hnext->addr, sta->addr, ETH_ALEN) != 0) + while (s->hnext && s->hnext != sta) s = s->hnext; - if (s->hnext) - s->hnext = s->hnext->hnext; - else - printk(KERN_ERR "%s: could not remove STA " MAC_FMT " from " - "hash table\n", local->mdev->name, MAC_ARG(sta->addr)); -} + if (s->hnext) { + s->hnext = sta->hnext; + return 0; + } -static inline void __sta_info_get(struct sta_info *sta) -{ - kref_get(&sta->kref); + return -ENOENT; } struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr) { struct sta_info *sta; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); sta = local->sta_hash[STA_HASH(addr)]; while (sta) { if (memcmp(sta->addr, addr, ETH_ALEN) == 0) { @@ -72,7 +67,7 @@ struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr) } sta = sta->hnext; } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); return sta; } @@ -85,7 +80,7 @@ int sta_info_min_txrate_get(struct ieee80211_local *local) int min_txrate = 9999999; int i; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); mode = local->oper_hw_mode; for (i = 0; i < STA_HASH_SIZE; i++) { sta = local->sta_hash[i]; @@ -95,7 +90,7 @@ int sta_info_min_txrate_get(struct ieee80211_local *local) sta = sta->hnext; } } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); if (min_txrate == 9999999) min_txrate = 0; @@ -122,8 +117,6 @@ static void sta_info_release(struct kref *kref) } rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv); rate_control_put(sta->rate_ctrl); - if (sta->key) - ieee80211_debugfs_key_sta_del(sta->key, sta); kfree(sta); } @@ -139,6 +132,7 @@ struct sta_info * sta_info_add(struct ieee80211_local *local, struct net_device *dev, u8 *addr, gfp_t gfp) { struct sta_info *sta; + DECLARE_MAC_BUF(mac); sta = kzalloc(sizeof(*sta), gfp); if (!sta) @@ -150,7 +144,6 @@ struct sta_info * sta_info_add(struct ieee80211_local *local, sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, gfp); if (!sta->rate_ctrl_priv) { rate_control_put(sta->rate_ctrl); - kref_put(&sta->kref, sta_info_release); kfree(sta); return NULL; } @@ -162,63 +155,40 @@ struct sta_info * sta_info_add(struct ieee80211_local *local, skb_queue_head_init(&sta->tx_filtered); __sta_info_get(sta); /* sta used by caller, decremented by * sta_info_put() */ - spin_lock_bh(&local->sta_lock); + write_lock_bh(&local->sta_lock); list_add(&sta->list, &local->sta_list); local->num_sta++; sta_info_hash_add(local, sta); - spin_unlock_bh(&local->sta_lock); - if (local->ops->sta_table_notification) - local->ops->sta_table_notification(local_to_hw(local), - local->num_sta); - sta->key_idx_compression = HW_KEY_IDX_INVALID; + if (local->ops->sta_notify) + local->ops->sta_notify(local_to_hw(local), dev->ifindex, + STA_NOTIFY_ADD, addr); + write_unlock_bh(&local->sta_lock); #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Added STA " MAC_FMT "\n", - local->mdev->name, MAC_ARG(addr)); + printk(KERN_DEBUG "%s: Added STA %s\n", + wiphy_name(local->hw.wiphy), print_mac(mac, addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ #ifdef CONFIG_MAC80211_DEBUGFS - if (!in_interrupt()) { - sta->debugfs_registered = 1; - ieee80211_sta_debugfs_add(sta); - rate_control_add_sta_debugfs(sta); - } else { - /* debugfs entry adding might sleep, so schedule process - * context task for adding entry for STAs that do not yet - * have one. */ - queue_work(local->hw.workqueue, &local->sta_debugfs_add); - } + /* debugfs entry adding might sleep, so schedule process + * context task for adding entry for STAs that do not yet + * have one. */ + queue_work(local->hw.workqueue, &local->sta_debugfs_add); #endif return sta; } -static void finish_sta_info_free(struct ieee80211_local *local, - struct sta_info *sta) -{ -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Removed STA " MAC_FMT "\n", - local->mdev->name, MAC_ARG(sta->addr)); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - - if (sta->key) { - ieee80211_debugfs_key_remove(sta->key); - ieee80211_key_free(sta->key); - sta->key = NULL; - } - - rate_control_remove_sta_debugfs(sta); - ieee80211_sta_debugfs_remove(sta); - - sta_info_put(sta); -} - -static void sta_info_remove(struct sta_info *sta) +/* Caller must hold local->sta_lock */ +void sta_info_remove(struct sta_info *sta) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata; - sta_info_hash_del(local, sta); + /* don't do anything if we've been removed already */ + if (sta_info_hash_del(local, sta)) + return; + list_del(&sta->list); sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); if (sta->flags & WLAN_STA_PS) { @@ -228,61 +198,45 @@ static void sta_info_remove(struct sta_info *sta) } local->num_sta--; sta_info_remove_aid_ptr(sta); + } -void sta_info_free(struct sta_info *sta, int locked) +void sta_info_free(struct sta_info *sta) { struct sk_buff *skb; struct ieee80211_local *local = sta->local; + DECLARE_MAC_BUF(mac); - if (!locked) { - spin_lock_bh(&local->sta_lock); - sta_info_remove(sta); - spin_unlock_bh(&local->sta_lock); - } else { - sta_info_remove(sta); - } - if (local->ops->sta_table_notification) - local->ops->sta_table_notification(local_to_hw(local), - local->num_sta); + might_sleep(); + + write_lock_bh(&local->sta_lock); + sta_info_remove(sta); + write_unlock_bh(&local->sta_lock); while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { local->total_ps_buffered--; - dev_kfree_skb_any(skb); + dev_kfree_skb(skb); } while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { - dev_kfree_skb_any(skb); + dev_kfree_skb(skb); } - if (sta->key) { - if (local->ops->set_key) { - struct ieee80211_key_conf *key; - key = ieee80211_key_data2conf(local, sta->key); - if (key) { - local->ops->set_key(local_to_hw(local), - DISABLE_KEY, - sta->addr, key, sta->aid); - kfree(key); - } - } - } else if (sta->key_idx_compression != HW_KEY_IDX_INVALID) { - struct ieee80211_key_conf conf; - memset(&conf, 0, sizeof(conf)); - conf.hw_key_idx = sta->key_idx_compression; - conf.alg = ALG_NULL; - conf.flags |= IEEE80211_KEY_FORCE_SW_ENCRYPT; - local->ops->set_key(local_to_hw(local), DISABLE_KEY, - sta->addr, &conf, sta->aid); - sta->key_idx_compression = HW_KEY_IDX_INVALID; - } +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: Removed STA %s\n", + wiphy_name(local->hw.wiphy), print_mac(mac, sta->addr)); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ -#ifdef CONFIG_MAC80211_DEBUGFS - if (in_atomic()) { - list_add(&sta->list, &local->deleted_sta_list); - queue_work(local->hw.workqueue, &local->sta_debugfs_add); - } else -#endif - finish_sta_info_free(local, sta); + ieee80211_key_free(sta->key); + sta->key = NULL; + + if (local->ops->sta_notify) + local->ops->sta_notify(local_to_hw(local), sta->dev->ifindex, + STA_NOTIFY_REMOVE, sta->addr); + + rate_control_remove_sta_debugfs(sta); + ieee80211_sta_debugfs_remove(sta); + + sta_info_put(sta); } @@ -312,6 +266,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, { unsigned long flags; struct sk_buff *skb; + DECLARE_MAC_BUF(mac); if (skb_queue_empty(&sta->ps_tx_buf)) return; @@ -330,7 +285,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, if (skb) { local->total_ps_buffered--; printk(KERN_DEBUG "Buffered frame expired (STA " - MAC_FMT ")\n", MAC_ARG(sta->addr)); + "%s)\n", print_mac(mac, sta->addr)); dev_kfree_skb(skb); } else break; @@ -343,13 +298,13 @@ static void sta_info_cleanup(unsigned long data) struct ieee80211_local *local = (struct ieee80211_local *) data; struct sta_info *sta; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); list_for_each_entry(sta, &local->sta_list, list) { __sta_info_get(sta); sta_info_cleanup_expire_buffered(local, sta); sta_info_put(sta); } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); local->sta_cleanup.expires = jiffies + STA_INFO_CLEANUP_INTERVAL; add_timer(&local->sta_cleanup); @@ -363,35 +318,20 @@ static void sta_info_debugfs_add_task(struct work_struct *work) struct sta_info *sta, *tmp; while (1) { - spin_lock_bh(&local->sta_lock); - if (!list_empty(&local->deleted_sta_list)) { - sta = list_entry(local->deleted_sta_list.next, - struct sta_info, list); - list_del(local->deleted_sta_list.next); - } else - sta = NULL; - spin_unlock_bh(&local->sta_lock); - if (!sta) - break; - finish_sta_info_free(local, sta); - } - - while (1) { sta = NULL; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); list_for_each_entry(tmp, &local->sta_list, list) { - if (!tmp->debugfs_registered) { + if (!tmp->debugfs.dir) { sta = tmp; __sta_info_get(sta); break; } } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); if (!sta) break; - sta->debugfs_registered = 1; ieee80211_sta_debugfs_add(sta); rate_control_add_sta_debugfs(sta); sta_info_put(sta); @@ -401,9 +341,8 @@ static void sta_info_debugfs_add_task(struct work_struct *work) void sta_info_init(struct ieee80211_local *local) { - spin_lock_init(&local->sta_lock); + rwlock_init(&local->sta_lock); INIT_LIST_HEAD(&local->sta_list); - INIT_LIST_HEAD(&local->deleted_sta_list); init_timer(&local->sta_cleanup); local->sta_cleanup.expires = jiffies + STA_INFO_CLEANUP_INTERVAL; @@ -423,17 +362,8 @@ int sta_info_start(struct ieee80211_local *local) void sta_info_stop(struct ieee80211_local *local) { - struct sta_info *sta, *tmp; - del_timer(&local->sta_cleanup); - - list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - /* sta_info_free must be called with 0 as the last - * parameter to ensure all debugfs sta entries are - * unregistered. We don't need locking at this - * point. */ - sta_info_free(sta, 0); - } + sta_info_flush(local, NULL); } void sta_info_remove_aid_ptr(struct sta_info *sta) @@ -461,10 +391,19 @@ void sta_info_remove_aid_ptr(struct sta_info *sta) void sta_info_flush(struct ieee80211_local *local, struct net_device *dev) { struct sta_info *sta, *tmp; + LIST_HEAD(tmp_list); - spin_lock_bh(&local->sta_lock); + write_lock_bh(&local->sta_lock); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) - if (!dev || dev == sta->dev) - sta_info_free(sta, 1); - spin_unlock_bh(&local->sta_lock); + if (!dev || dev == sta->dev) { + __sta_info_get(sta); + sta_info_remove(sta); + list_add_tail(&sta->list, &tmp_list); + } + write_unlock_bh(&local->sta_lock); + + list_for_each_entry_safe(sta, tmp, &tmp_list, list) { + sta_info_free(sta); + sta_info_put(sta); + } } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index b5591d2f60a4..8f7ebe41c024 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -26,6 +26,8 @@ * send and receive non-IEEE 802.1X frames */ #define WLAN_STA_SHORT_PREAMBLE BIT(7) +/* whether this is an AP that we are associated with as a client */ +#define WLAN_STA_ASSOC_AP BIT(8) #define WLAN_STA_WME BIT(9) #define WLAN_STA_WDS BIT(27) @@ -90,27 +92,11 @@ struct sta_info { int channel_use; int channel_use_raw; - u8 antenna_sel_tx; - u8 antenna_sel_rx; - - - int key_idx_compression; /* key table index for compression and TX - * filtering; used only if sta->key is not - * set */ - -#ifdef CONFIG_MAC80211_DEBUGFS - int debugfs_registered; -#endif - int assoc_ap; /* whether this is an AP that we are - * associated with as a client */ - #ifdef CONFIG_MAC80211_DEBUG_COUNTERS unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES]; unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES]; #endif /* CONFIG_MAC80211_DEBUG_COUNTERS */ - int vlan_id; - u16 listen_interval; #ifdef CONFIG_MAC80211_DEBUGFS @@ -149,12 +135,18 @@ struct sta_info { */ #define STA_INFO_CLEANUP_INTERVAL (10 * HZ) +static inline void __sta_info_get(struct sta_info *sta) +{ + kref_get(&sta->kref); +} + struct sta_info * sta_info_get(struct ieee80211_local *local, u8 *addr); int sta_info_min_txrate_get(struct ieee80211_local *local); void sta_info_put(struct sta_info *sta); struct sta_info * sta_info_add(struct ieee80211_local *local, struct net_device *dev, u8 *addr, gfp_t gfp); -void sta_info_free(struct sta_info *sta, int locked); +void sta_info_remove(struct sta_info *sta); +void sta_info_free(struct sta_info *sta); void sta_info_init(struct ieee80211_local *local); int sta_info_start(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 41621720e560..3abe194e4d55 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -182,7 +182,7 @@ u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, *pos++ = iv0; *pos++ = iv1; *pos++ = iv2; - *pos++ = (key->keyidx << 6) | (1 << 5) /* Ext IV */; + *pos++ = (key->conf.keyidx << 6) | (1 << 5) /* Ext IV */; *pos++ = key->u.tkip.iv32 & 0xff; *pos++ = (key->u.tkip.iv32 >> 8) & 0xff; *pos++ = (key->u.tkip.iv32 >> 16) & 0xff; @@ -194,7 +194,7 @@ u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta, u16 *phase1key) { - tkip_mixing_phase1(ta, &key->key[ALG_TKIP_TEMP_ENCR_KEY], + tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], key->u.tkip.iv32, phase1key); } @@ -204,12 +204,13 @@ void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta, /* Calculate per-packet key */ if (key->u.tkip.iv16 == 0 || !key->u.tkip.tx_initialized) { /* IV16 wrapped around - perform TKIP phase 1 */ - tkip_mixing_phase1(ta, &key->key[ALG_TKIP_TEMP_ENCR_KEY], + tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], key->u.tkip.iv32, key->u.tkip.p1k); key->u.tkip.tx_initialized = 1; } - tkip_mixing_phase2(key->u.tkip.p1k, &key->key[ALG_TKIP_TEMP_ENCR_KEY], + tkip_mixing_phase2(key->u.tkip.p1k, + &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], key->u.tkip.iv16, rc4key); } @@ -237,7 +238,8 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, struct ieee80211_key *key, u8 *payload, size_t payload_len, u8 *ta, - int only_iv, int queue) + int only_iv, int queue, + u32 *out_iv32, u16 *out_iv16) { u32 iv32; u32 iv16; @@ -266,7 +268,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, if (!(keyid & (1 << 5))) return TKIP_DECRYPT_NO_EXT_IV; - if ((keyid >> 6) != key->keyidx) + if ((keyid >> 6) != key->conf.keyidx) return TKIP_DECRYPT_INVALID_KEYIDX; if (key->u.tkip.rx_initialized[queue] && @@ -274,9 +276,10 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, (iv32 == key->u.tkip.iv32_rx[queue] && iv16 <= key->u.tkip.iv16_rx[queue]))) { #ifdef CONFIG_TKIP_DEBUG + DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "TKIP replay detected for RX frame from " - MAC_FMT " (RX IV (%04x,%02x) <= prev. IV (%04x,%02x)\n", - MAC_ARG(ta), + "%s (RX IV (%04x,%02x) <= prev. IV (%04x,%02x)\n", + print_mac(mac, ta), iv32, iv16, key->u.tkip.iv32_rx[queue], key->u.tkip.iv16_rx[queue]); #endif /* CONFIG_TKIP_DEBUG */ @@ -293,16 +296,18 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, key->u.tkip.iv32_rx[queue] != iv32) { key->u.tkip.rx_initialized[queue] = 1; /* IV16 wrapped around - perform TKIP phase 1 */ - tkip_mixing_phase1(ta, &key->key[ALG_TKIP_TEMP_ENCR_KEY], + tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], iv32, key->u.tkip.p1k_rx[queue]); #ifdef CONFIG_TKIP_DEBUG { int i; - printk(KERN_DEBUG "TKIP decrypt: Phase1 TA=" MAC_FMT - " TK=", MAC_ARG(ta)); + DECLARE_MAC_BUF(mac); + printk(KERN_DEBUG "TKIP decrypt: Phase1 TA=%s" + " TK=", print_mac(mac, ta)); for (i = 0; i < 16; i++) printk("%02x ", - key->key[ALG_TKIP_TEMP_ENCR_KEY + i]); + key->conf.key[ + ALG_TKIP_TEMP_ENCR_KEY + i]); printk("\n"); printk(KERN_DEBUG "TKIP decrypt: P1K="); for (i = 0; i < 5; i++) @@ -313,7 +318,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, } tkip_mixing_phase2(key->u.tkip.p1k_rx[queue], - &key->key[ALG_TKIP_TEMP_ENCR_KEY], + &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], iv16, rc4key); #ifdef CONFIG_TKIP_DEBUG { @@ -328,11 +333,14 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, res = ieee80211_wep_decrypt_data(tfm, rc4key, 16, pos, payload_len - 12); done: if (res == TKIP_DECRYPT_OK) { - /* FIX: these should be updated only after Michael MIC has been - * verified */ - /* Record previously received IV */ - key->u.tkip.iv32_rx[queue] = iv32; - key->u.tkip.iv16_rx[queue] = iv16; + /* + * Record previously received IV, will be copied into the + * key information after MIC verification. It is possible + * that we don't catch replays of fragments but that's ok + * because the Michael MIC verication will then fail. + */ + *out_iv32 = iv32; + *out_iv16 = iv16; } return res; diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h index a0d181a18049..73d8ef2a93b0 100644 --- a/net/mac80211/tkip.h +++ b/net/mac80211/tkip.h @@ -31,6 +31,7 @@ enum { int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, struct ieee80211_key *key, u8 *payload, size_t payload_len, u8 *ta, - int only_iv, int queue); + int only_iv, int queue, + u32 *out_iv32, u16 *out_iv16); #endif /* TKIP_H */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c new file mode 100644 index 000000000000..1a531543bccb --- /dev/null +++ b/net/mac80211/tx.c @@ -0,0 +1,1903 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * + * Transmit and frame generation functions. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/bitmap.h> +#include <linux/rcupdate.h> +#include <net/net_namespace.h> +#include <net/ieee80211_radiotap.h> +#include <net/cfg80211.h> +#include <net/mac80211.h> +#include <asm/unaligned.h> + +#include "ieee80211_i.h" +#include "ieee80211_led.h" +#include "wep.h" +#include "wpa.h" +#include "wme.h" +#include "ieee80211_rate.h" + +#define IEEE80211_TX_OK 0 +#define IEEE80211_TX_AGAIN 1 +#define IEEE80211_TX_FRAG_AGAIN 2 + +/* misc utils */ + +static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata, + struct ieee80211_hdr *hdr) +{ + /* Set the sequence number for this frame. */ + hdr->seq_ctrl = cpu_to_le16(sdata->sequence); + + /* Increase the sequence number. */ + sdata->sequence = (sdata->sequence + 0x10) & IEEE80211_SCTL_SEQ; +} + +#ifdef CONFIG_MAC80211_LOWTX_FRAME_DUMP +static void ieee80211_dump_frame(const char *ifname, const char *title, + const struct sk_buff *skb) +{ + const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u16 fc; + int hdrlen; + DECLARE_MAC_BUF(mac); + + printk(KERN_DEBUG "%s: %s (len=%d)", ifname, title, skb->len); + if (skb->len < 4) { + printk("\n"); + return; + } + + fc = le16_to_cpu(hdr->frame_control); + hdrlen = ieee80211_get_hdrlen(fc); + if (hdrlen > skb->len) + hdrlen = skb->len; + if (hdrlen >= 4) + printk(" FC=0x%04x DUR=0x%04x", + fc, le16_to_cpu(hdr->duration_id)); + if (hdrlen >= 10) + printk(" A1=%s", print_mac(mac, hdr->addr1)); + if (hdrlen >= 16) + printk(" A2=%s", print_mac(mac, hdr->addr2)); + if (hdrlen >= 24) + printk(" A3=%s", print_mac(mac, hdr->addr3)); + if (hdrlen >= 30) + printk(" A4=%s", print_mac(mac, hdr->addr4)); + printk("\n"); +} +#else /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ +static inline void ieee80211_dump_frame(const char *ifname, const char *title, + struct sk_buff *skb) +{ +} +#endif /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ + +static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr, + int next_frag_len) +{ + int rate, mrate, erp, dur, i; + struct ieee80211_rate *txrate = tx->u.tx.rate; + struct ieee80211_local *local = tx->local; + struct ieee80211_hw_mode *mode = tx->u.tx.mode; + + erp = txrate->flags & IEEE80211_RATE_ERP; + + /* + * data and mgmt (except PS Poll): + * - during CFP: 32768 + * - during contention period: + * if addr1 is group address: 0 + * if more fragments = 0 and addr1 is individual address: time to + * transmit one ACK plus SIFS + * if more fragments = 1 and addr1 is individual address: time to + * transmit next fragment plus 2 x ACK plus 3 x SIFS + * + * IEEE 802.11, 9.6: + * - control response frame (CTS or ACK) shall be transmitted using the + * same rate as the immediately previous frame in the frame exchange + * sequence, if this rate belongs to the PHY mandatory rates, or else + * at the highest possible rate belonging to the PHY rates in the + * BSSBasicRateSet + */ + + if ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) { + /* TODO: These control frames are not currently sent by + * 80211.o, but should they be implemented, this function + * needs to be updated to support duration field calculation. + * + * RTS: time needed to transmit pending data/mgmt frame plus + * one CTS frame plus one ACK frame plus 3 x SIFS + * CTS: duration of immediately previous RTS minus time + * required to transmit CTS and its SIFS + * ACK: 0 if immediately previous directed data/mgmt had + * more=0, with more=1 duration in ACK frame is duration + * from previous frame minus time needed to transmit ACK + * and its SIFS + * PS Poll: BIT(15) | BIT(14) | aid + */ + return 0; + } + + /* data/mgmt */ + if (0 /* FIX: data/mgmt during CFP */) + return 32768; + + if (group_addr) /* Group address as the destination - no ACK */ + return 0; + + /* Individual destination address: + * IEEE 802.11, Ch. 9.6 (after IEEE 802.11g changes) + * CTS and ACK frames shall be transmitted using the highest rate in + * basic rate set that is less than or equal to the rate of the + * immediately previous frame and that is using the same modulation + * (CCK or OFDM). If no basic rate set matches with these requirements, + * the highest mandatory rate of the PHY that is less than or equal to + * the rate of the previous frame is used. + * Mandatory rates for IEEE 802.11g PHY: 1, 2, 5.5, 11, 6, 12, 24 Mbps + */ + rate = -1; + mrate = 10; /* use 1 Mbps if everything fails */ + for (i = 0; i < mode->num_rates; i++) { + struct ieee80211_rate *r = &mode->rates[i]; + if (r->rate > txrate->rate) + break; + + if (IEEE80211_RATE_MODULATION(txrate->flags) != + IEEE80211_RATE_MODULATION(r->flags)) + continue; + + if (r->flags & IEEE80211_RATE_BASIC) + rate = r->rate; + else if (r->flags & IEEE80211_RATE_MANDATORY) + mrate = r->rate; + } + if (rate == -1) { + /* No matching basic rate found; use highest suitable mandatory + * PHY rate */ + rate = mrate; + } + + /* Time needed to transmit ACK + * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up + * to closest integer */ + + dur = ieee80211_frame_duration(local, 10, rate, erp, + tx->sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE); + + if (next_frag_len) { + /* Frame is fragmented: duration increases with time needed to + * transmit next fragment plus ACK and 2 x SIFS. */ + dur *= 2; /* ACK + SIFS */ + /* next fragment */ + dur += ieee80211_frame_duration(local, next_frag_len, + txrate->rate, erp, + tx->sdata->flags & + IEEE80211_SDATA_SHORT_PREAMBLE); + } + + return dur; +} + +static inline int __ieee80211_queue_stopped(const struct ieee80211_local *local, + int queue) +{ + return test_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); +} + +static inline int __ieee80211_queue_pending(const struct ieee80211_local *local, + int queue) +{ + return test_bit(IEEE80211_LINK_STATE_PENDING, &local->state[queue]); +} + +static int inline is_ieee80211_device(struct net_device *dev, + struct net_device *master) +{ + return (wdev_priv(dev->ieee80211_ptr) == + wdev_priv(master->ieee80211_ptr)); +} + +/* tx handlers */ + +static ieee80211_txrx_result +ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx) +{ +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + struct sk_buff *skb = tx->skb; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + u32 sta_flags; + + if (unlikely(tx->flags & IEEE80211_TXRXD_TX_INJECTED)) + return TXRX_CONTINUE; + + if (unlikely(tx->local->sta_scanning != 0) && + ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || + (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ)) + return TXRX_DROP; + + if (tx->flags & IEEE80211_TXRXD_TXPS_BUFFERED) + return TXRX_CONTINUE; + + sta_flags = tx->sta ? tx->sta->flags : 0; + + if (likely(tx->flags & IEEE80211_TXRXD_TXUNICAST)) { + if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && + tx->sdata->type != IEEE80211_IF_TYPE_IBSS && + (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + DECLARE_MAC_BUF(mac); + printk(KERN_DEBUG "%s: dropped data frame to not " + "associated station %s\n", + tx->dev->name, print_mac(mac, hdr->addr1)); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); + return TXRX_DROP; + } + } else { + if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + tx->local->num_sta == 0 && + tx->sdata->type != IEEE80211_IF_TYPE_IBSS)) { + /* + * No associated STAs - no need to send multicast + * frames. + */ + return TXRX_DROP; + } + return TXRX_CONTINUE; + } + + if (unlikely(/* !injected && */ tx->sdata->ieee802_1x && + !(sta_flags & WLAN_STA_AUTHORIZED))) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + DECLARE_MAC_BUF(mac); + printk(KERN_DEBUG "%s: dropped frame to %s" + " (unauthorized port)\n", tx->dev->name, + print_mac(mac, hdr->addr1)); +#endif + I802_DEBUG_INC(tx->local->tx_handlers_drop_unauth_port); + return TXRX_DROP; + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_sequence(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + + if (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)) >= 24) + ieee80211_include_sequence(tx->sdata, hdr); + + return TXRX_CONTINUE; +} + +/* This function is called whenever the AP is about to exceed the maximum limit + * of buffered frames for power saving STAs. This situation should not really + * happen often during normal operation, so dropping the oldest buffered packet + * from each queue should be OK to make some room for new frames. */ +static void purge_old_ps_buffers(struct ieee80211_local *local) +{ + int total = 0, purged = 0; + struct sk_buff *skb; + struct ieee80211_sub_if_data *sdata; + struct sta_info *sta; + + /* + * virtual interfaces are protected by RCU + */ + rcu_read_lock(); + + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + struct ieee80211_if_ap *ap; + if (sdata->dev == local->mdev || + sdata->type != IEEE80211_IF_TYPE_AP) + continue; + ap = &sdata->u.ap; + skb = skb_dequeue(&ap->ps_bc_buf); + if (skb) { + purged++; + dev_kfree_skb(skb); + } + total += skb_queue_len(&ap->ps_bc_buf); + } + rcu_read_unlock(); + + read_lock_bh(&local->sta_lock); + list_for_each_entry(sta, &local->sta_list, list) { + skb = skb_dequeue(&sta->ps_tx_buf); + if (skb) { + purged++; + dev_kfree_skb(skb); + } + total += skb_queue_len(&sta->ps_tx_buf); + } + read_unlock_bh(&local->sta_lock); + + local->total_ps_buffered = total; + printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", + wiphy_name(local->hw.wiphy), purged); +} + +static inline ieee80211_txrx_result +ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx) +{ + /* broadcast/multicast frame */ + /* If any of the associated stations is in power save mode, + * the frame is buffered to be sent after DTIM beacon frame */ + if ((tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) && + tx->sdata->type != IEEE80211_IF_TYPE_WDS && + tx->sdata->bss && atomic_read(&tx->sdata->bss->num_sta_ps) && + !(tx->fc & IEEE80211_FCTL_ORDER)) { + if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) + purge_old_ps_buffers(tx->local); + if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= + AP_MAX_BC_BUFFER) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: BC TX buffer full - " + "dropping the oldest frame\n", + tx->dev->name); + } + dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf)); + } else + tx->local->total_ps_buffered++; + skb_queue_tail(&tx->sdata->bss->ps_bc_buf, tx->skb); + return TXRX_QUEUED; + } + + return TXRX_CONTINUE; +} + +static inline ieee80211_txrx_result +ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx) +{ + struct sta_info *sta = tx->sta; + DECLARE_MAC_BUF(mac); + + if (unlikely(!sta || + ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT && + (tx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP))) + return TXRX_CONTINUE; + + if (unlikely((sta->flags & WLAN_STA_PS) && !sta->pspoll)) { + struct ieee80211_tx_packet_data *pkt_data; +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "STA %s aid %d: PS buffer (entries " + "before %d)\n", + print_mac(mac, sta->addr), sta->aid, + skb_queue_len(&sta->ps_tx_buf)); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + sta->flags |= WLAN_STA_TIM; + if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) + purge_old_ps_buffers(tx->local); + if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) { + struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf); + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: STA %s TX " + "buffer full - dropping oldest frame\n", + tx->dev->name, print_mac(mac, sta->addr)); + } + dev_kfree_skb(old); + } else + tx->local->total_ps_buffered++; + /* Queue frame to be sent after STA sends an PS Poll frame */ + if (skb_queue_empty(&sta->ps_tx_buf)) { + if (tx->local->ops->set_tim) + tx->local->ops->set_tim(local_to_hw(tx->local), + sta->aid, 1); + if (tx->sdata->bss) + bss_tim_set(tx->local, tx->sdata->bss, sta->aid); + } + pkt_data = (struct ieee80211_tx_packet_data *)tx->skb->cb; + pkt_data->jiffies = jiffies; + skb_queue_tail(&sta->ps_tx_buf, tx->skb); + return TXRX_QUEUED; + } +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + else if (unlikely(sta->flags & WLAN_STA_PS)) { + printk(KERN_DEBUG "%s: STA %s in PS mode, but pspoll " + "set -> send frame\n", tx->dev->name, + print_mac(mac, sta->addr)); + } +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + sta->pspoll = 0; + + return TXRX_CONTINUE; +} + + +static ieee80211_txrx_result +ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx) +{ + if (unlikely(tx->flags & IEEE80211_TXRXD_TXPS_BUFFERED)) + return TXRX_CONTINUE; + + if (tx->flags & IEEE80211_TXRXD_TXUNICAST) + return ieee80211_tx_h_unicast_ps_buf(tx); + else + return ieee80211_tx_h_multicast_ps_buf(tx); +} + + + + +static ieee80211_txrx_result +ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_key *key; + + if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)) + tx->key = NULL; + else if (tx->sta && (key = rcu_dereference(tx->sta->key))) + tx->key = key; + else if ((key = rcu_dereference(tx->sdata->default_key))) + tx->key = key; + else if (tx->sdata->drop_unencrypted && + !(tx->sdata->eapol && ieee80211_is_eapol(tx->skb))) { + I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); + return TXRX_DROP; + } else { + tx->key = NULL; + tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; + } + + if (tx->key) { + tx->key->tx_rx_count++; + /* TODO: add threshold stuff again */ + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; + size_t hdrlen, per_fragm, num_fragm, payload_len, left; + struct sk_buff **frags, *first, *frag; + int i; + u16 seq; + u8 *pos; + int frag_threshold = tx->local->fragmentation_threshold; + + if (!(tx->flags & IEEE80211_TXRXD_FRAGMENTED)) + return TXRX_CONTINUE; + + first = tx->skb; + + hdrlen = ieee80211_get_hdrlen(tx->fc); + payload_len = first->len - hdrlen; + per_fragm = frag_threshold - hdrlen - FCS_LEN; + num_fragm = DIV_ROUND_UP(payload_len, per_fragm); + + frags = kzalloc(num_fragm * sizeof(struct sk_buff *), GFP_ATOMIC); + if (!frags) + goto fail; + + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREFRAGS); + seq = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ; + pos = first->data + hdrlen + per_fragm; + left = payload_len - per_fragm; + for (i = 0; i < num_fragm - 1; i++) { + struct ieee80211_hdr *fhdr; + size_t copylen; + + if (left <= 0) + goto fail; + + /* reserve enough extra head and tail room for possible + * encryption */ + frag = frags[i] = + dev_alloc_skb(tx->local->tx_headroom + + frag_threshold + + IEEE80211_ENCRYPT_HEADROOM + + IEEE80211_ENCRYPT_TAILROOM); + if (!frag) + goto fail; + /* Make sure that all fragments use the same priority so + * that they end up using the same TX queue */ + frag->priority = first->priority; + skb_reserve(frag, tx->local->tx_headroom + + IEEE80211_ENCRYPT_HEADROOM); + fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); + memcpy(fhdr, first->data, hdrlen); + if (i == num_fragm - 2) + fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS); + fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG)); + copylen = left > per_fragm ? per_fragm : left; + memcpy(skb_put(frag, copylen), pos, copylen); + + pos += copylen; + left -= copylen; + } + skb_trim(first, hdrlen + per_fragm); + + tx->u.tx.num_extra_frag = num_fragm - 1; + tx->u.tx.extra_frag = frags; + + return TXRX_CONTINUE; + + fail: + printk(KERN_DEBUG "%s: failed to fragment frame\n", tx->dev->name); + if (frags) { + for (i = 0; i < num_fragm - 1; i++) + if (frags[i]) + dev_kfree_skb(frags[i]); + kfree(frags); + } + I802_DEBUG_INC(tx->local->tx_handlers_drop_fragment); + return TXRX_DROP; +} + +static ieee80211_txrx_result +ieee80211_tx_h_encrypt(struct ieee80211_txrx_data *tx) +{ + if (!tx->key) + return TXRX_CONTINUE; + + switch (tx->key->conf.alg) { + case ALG_WEP: + return ieee80211_crypto_wep_encrypt(tx); + case ALG_TKIP: + return ieee80211_crypto_tkip_encrypt(tx); + case ALG_CCMP: + return ieee80211_crypto_ccmp_encrypt(tx); + } + + /* not reached */ + WARN_ON(1); + return TXRX_DROP; +} + +static ieee80211_txrx_result +ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx) +{ + struct rate_control_extra extra; + + if (likely(!tx->u.tx.rate)) { + memset(&extra, 0, sizeof(extra)); + extra.mode = tx->u.tx.mode; + extra.ethertype = tx->ethertype; + + tx->u.tx.rate = rate_control_get_rate(tx->local, tx->dev, + tx->skb, &extra); + if (unlikely(extra.probe != NULL)) { + tx->u.tx.control->flags |= + IEEE80211_TXCTL_RATE_CTRL_PROBE; + tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; + tx->u.tx.control->alt_retry_rate = tx->u.tx.rate->val; + tx->u.tx.rate = extra.probe; + } else + tx->u.tx.control->alt_retry_rate = -1; + + if (!tx->u.tx.rate) + return TXRX_DROP; + } else + tx->u.tx.control->alt_retry_rate = -1; + + if (tx->u.tx.mode->mode == MODE_IEEE80211G && + (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) && + (tx->flags & IEEE80211_TXRXD_FRAGMENTED) && extra.nonerp) { + tx->u.tx.last_frag_rate = tx->u.tx.rate; + if (extra.probe) + tx->flags &= ~IEEE80211_TXRXD_TXPROBE_LAST_FRAG; + else + tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; + tx->u.tx.rate = extra.nonerp; + tx->u.tx.control->rate = extra.nonerp; + tx->u.tx.control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE; + } else { + tx->u.tx.last_frag_rate = tx->u.tx.rate; + tx->u.tx.control->rate = tx->u.tx.rate; + } + tx->u.tx.control->tx_rate = tx->u.tx.rate->val; + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; + u16 fc = le16_to_cpu(hdr->frame_control); + u16 dur; + struct ieee80211_tx_control *control = tx->u.tx.control; + struct ieee80211_hw_mode *mode = tx->u.tx.mode; + + if (!control->retry_limit) { + if (!is_multicast_ether_addr(hdr->addr1)) { + if (tx->skb->len + FCS_LEN > tx->local->rts_threshold + && tx->local->rts_threshold < + IEEE80211_MAX_RTS_THRESHOLD) { + control->flags |= + IEEE80211_TXCTL_USE_RTS_CTS; + control->flags |= + IEEE80211_TXCTL_LONG_RETRY_LIMIT; + control->retry_limit = + tx->local->long_retry_limit; + } else { + control->retry_limit = + tx->local->short_retry_limit; + } + } else { + control->retry_limit = 1; + } + } + + if (tx->flags & IEEE80211_TXRXD_FRAGMENTED) { + /* Do not use multiple retry rates when sending fragmented + * frames. + * TODO: The last fragment could still use multiple retry + * rates. */ + control->alt_retry_rate = -1; + } + + /* Use CTS protection for unicast frames sent using extended rates if + * there are associated non-ERP stations and RTS/CTS is not configured + * for the frame. */ + if (mode->mode == MODE_IEEE80211G && + (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) && + (tx->flags & IEEE80211_TXRXD_TXUNICAST) && + (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) && + !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) + control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; + + /* Transmit data frames using short preambles if the driver supports + * short preambles at the selected rate and short preambles are + * available on the network at the current point in time. */ + if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) && + (tx->u.tx.rate->flags & IEEE80211_RATE_PREAMBLE2) && + (tx->sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && + (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) { + tx->u.tx.control->tx_rate = tx->u.tx.rate->val2; + } + + /* Setup duration field for the first fragment of the frame. Duration + * for remaining fragments will be updated when they are being sent + * to low-level driver in ieee80211_tx(). */ + dur = ieee80211_duration(tx, is_multicast_ether_addr(hdr->addr1), + (tx->flags & IEEE80211_TXRXD_FRAGMENTED) ? + tx->u.tx.extra_frag[0]->len : 0); + hdr->duration_id = cpu_to_le16(dur); + + if ((control->flags & IEEE80211_TXCTL_USE_RTS_CTS) || + (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) { + struct ieee80211_rate *rate; + + /* Do not use multiple retry rates when using RTS/CTS */ + control->alt_retry_rate = -1; + + /* Use min(data rate, max base rate) as CTS/RTS rate */ + rate = tx->u.tx.rate; + while (rate > mode->rates && + !(rate->flags & IEEE80211_RATE_BASIC)) + rate--; + + control->rts_cts_rate = rate->val; + control->rts_rate = rate; + } + + if (tx->sta) { + tx->sta->tx_packets++; + tx->sta->tx_fragments++; + tx->sta->tx_bytes += tx->skb->len; + if (tx->u.tx.extra_frag) { + int i; + tx->sta->tx_fragments += tx->u.tx.num_extra_frag; + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + tx->sta->tx_bytes += + tx->u.tx.extra_frag[i]->len; + } + } + } + + /* + * Tell hardware to not encrypt when we had sw crypto. + * Because we use the same flag to internally indicate that + * no (software) encryption should be done, we have to set it + * after all crypto handlers. + */ + if (tx->key && !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_load_stats(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_local *local = tx->local; + struct ieee80211_hw_mode *mode = tx->u.tx.mode; + struct sk_buff *skb = tx->skb; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u32 load = 0, hdrtime; + + /* TODO: this could be part of tx_status handling, so that the number + * of retries would be known; TX rate should in that case be stored + * somewhere with the packet */ + + /* Estimate total channel use caused by this frame */ + + /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, + * 1 usec = 1/8 * (1080 / 10) = 13.5 */ + + if (mode->mode == MODE_IEEE80211A || + (mode->mode == MODE_IEEE80211G && + tx->u.tx.rate->flags & IEEE80211_RATE_ERP)) + hdrtime = CHAN_UTIL_HDR_SHORT; + else + hdrtime = CHAN_UTIL_HDR_LONG; + + load = hdrtime; + if (!is_multicast_ether_addr(hdr->addr1)) + load += hdrtime; + + if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_RTS_CTS) + load += 2 * hdrtime; + else if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) + load += hdrtime; + + load += skb->len * tx->u.tx.rate->rate_inv; + + if (tx->u.tx.extra_frag) { + int i; + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + load += 2 * hdrtime; + load += tx->u.tx.extra_frag[i]->len * + tx->u.tx.rate->rate; + } + } + + /* Divide channel_use by 8 to avoid wrapping around the counter */ + load >>= CHAN_UTIL_SHIFT; + local->channel_use_raw += load; + if (tx->sta) + tx->sta->channel_use_raw += load; + tx->sdata->channel_use_raw += load; + + return TXRX_CONTINUE; +} + +/* TODO: implement register/unregister functions for adding TX/RX handlers + * into ordered list */ + +ieee80211_tx_handler ieee80211_tx_handlers[] = +{ + ieee80211_tx_h_check_assoc, + ieee80211_tx_h_sequence, + ieee80211_tx_h_ps_buf, + ieee80211_tx_h_select_key, + ieee80211_tx_h_michael_mic_add, + ieee80211_tx_h_fragment, + ieee80211_tx_h_encrypt, + ieee80211_tx_h_rate_ctrl, + ieee80211_tx_h_misc, + ieee80211_tx_h_load_stats, + NULL +}; + +/* actual transmit path */ + +/* + * deal with packet injection down monitor interface + * with Radiotap Header -- only called for monitor mode interface + */ +static ieee80211_txrx_result +__ieee80211_parse_tx_radiotap(struct ieee80211_txrx_data *tx, + struct sk_buff *skb) +{ + /* + * this is the moment to interpret and discard the radiotap header that + * must be at the start of the packet injected in Monitor mode + * + * Need to take some care with endian-ness since radiotap + * args are little-endian + */ + + struct ieee80211_radiotap_iterator iterator; + struct ieee80211_radiotap_header *rthdr = + (struct ieee80211_radiotap_header *) skb->data; + struct ieee80211_hw_mode *mode = tx->local->hw.conf.mode; + int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); + struct ieee80211_tx_control *control = tx->u.tx.control; + + control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; + tx->flags |= IEEE80211_TXRXD_TX_INJECTED; + tx->flags &= ~IEEE80211_TXRXD_FRAGMENTED; + + /* + * for every radiotap entry that is present + * (ieee80211_radiotap_iterator_next returns -ENOENT when no more + * entries present, or -EINVAL on error) + */ + + while (!ret) { + int i, target_rate; + + ret = ieee80211_radiotap_iterator_next(&iterator); + + if (ret) + continue; + + /* see if this argument is something we can use */ + switch (iterator.this_arg_index) { + /* + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + */ + case IEEE80211_RADIOTAP_RATE: + /* + * radiotap rate u8 is in 500kbps units eg, 0x02=1Mbps + * ieee80211 rate int is in 100kbps units eg, 0x0a=1Mbps + */ + target_rate = (*iterator.this_arg) * 5; + for (i = 0; i < mode->num_rates; i++) { + struct ieee80211_rate *r = &mode->rates[i]; + + if (r->rate == target_rate) { + tx->u.tx.rate = r; + break; + } + } + break; + + case IEEE80211_RADIOTAP_ANTENNA: + /* + * radiotap uses 0 for 1st ant, mac80211 is 1 for + * 1st ant + */ + control->antenna_sel_tx = (*iterator.this_arg) + 1; + break; + + case IEEE80211_RADIOTAP_DBM_TX_POWER: + control->power_level = *iterator.this_arg; + break; + + case IEEE80211_RADIOTAP_FLAGS: + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { + /* + * this indicates that the skb we have been + * handed has the 32-bit FCS CRC at the end... + * we should react to that by snipping it off + * because it will be recomputed and added + * on transmission + */ + if (skb->len < (iterator.max_length + FCS_LEN)) + return TXRX_DROP; + + skb_trim(skb, skb->len - FCS_LEN); + } + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) + control->flags &= + ~IEEE80211_TXCTL_DO_NOT_ENCRYPT; + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) + tx->flags |= IEEE80211_TXRXD_FRAGMENTED; + break; + + /* + * Please update the file + * Documentation/networking/mac80211-injection.txt + * when parsing new fields here. + */ + + default: + break; + } + } + + if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ + return TXRX_DROP; + + /* + * remove the radiotap header + * iterator->max_length was sanity-checked against + * skb->len by iterator init + */ + skb_pull(skb, iterator.max_length); + + return TXRX_CONTINUE; +} + +/* + * initialises @tx + */ +static ieee80211_txrx_result +__ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, + struct sk_buff *skb, + struct net_device *dev, + struct ieee80211_tx_control *control) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_hdr *hdr; + struct ieee80211_sub_if_data *sdata; + ieee80211_txrx_result res = TXRX_CONTINUE; + + int hdrlen; + + memset(tx, 0, sizeof(*tx)); + tx->skb = skb; + tx->dev = dev; /* use original interface */ + tx->local = local; + tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); + tx->u.tx.control = control; + /* + * Set this flag (used below to indicate "automatic fragmentation"), + * it will be cleared/left by radiotap as desired. + */ + tx->flags |= IEEE80211_TXRXD_FRAGMENTED; + + /* process and remove the injection radiotap header */ + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { + if (__ieee80211_parse_tx_radiotap(tx, skb) == TXRX_DROP) + return TXRX_DROP; + + /* + * __ieee80211_parse_tx_radiotap has now removed + * the radiotap header that was present and pre-filled + * 'tx' with tx control information. + */ + } + + hdr = (struct ieee80211_hdr *) skb->data; + + tx->sta = sta_info_get(local, hdr->addr1); + tx->fc = le16_to_cpu(hdr->frame_control); + + if (is_multicast_ether_addr(hdr->addr1)) { + tx->flags &= ~IEEE80211_TXRXD_TXUNICAST; + control->flags |= IEEE80211_TXCTL_NO_ACK; + } else { + tx->flags |= IEEE80211_TXRXD_TXUNICAST; + control->flags &= ~IEEE80211_TXCTL_NO_ACK; + } + + if (tx->flags & IEEE80211_TXRXD_FRAGMENTED) { + if ((tx->flags & IEEE80211_TXRXD_TXUNICAST) && + skb->len + FCS_LEN > local->fragmentation_threshold && + !local->ops->set_frag_threshold) + tx->flags |= IEEE80211_TXRXD_FRAGMENTED; + else + tx->flags &= ~IEEE80211_TXRXD_FRAGMENTED; + } + + if (!tx->sta) + control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; + else if (tx->sta->clear_dst_mask) { + control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; + tx->sta->clear_dst_mask = 0; + } + + hdrlen = ieee80211_get_hdrlen(tx->fc); + if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { + u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; + tx->ethertype = (pos[0] << 8) | pos[1]; + } + control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; + + return res; +} + +/* Device in tx->dev has a reference added; use dev_put(tx->dev) when + * finished with it. + * + * NB: @tx is uninitialised when passed in here + */ +static int ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, + struct sk_buff *skb, + struct net_device *mdev, + struct ieee80211_tx_control *control) +{ + struct ieee80211_tx_packet_data *pkt_data; + struct net_device *dev; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + dev = dev_get_by_index(&init_net, pkt_data->ifindex); + if (unlikely(dev && !is_ieee80211_device(dev, mdev))) { + dev_put(dev); + dev = NULL; + } + if (unlikely(!dev)) + return -ENODEV; + /* initialises tx with control */ + __ieee80211_tx_prepare(tx, skb, dev, control); + return 0; +} + +static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, + struct ieee80211_txrx_data *tx) +{ + struct ieee80211_tx_control *control = tx->u.tx.control; + int ret, i; + + if (!ieee80211_qdisc_installed(local->mdev) && + __ieee80211_queue_stopped(local, 0)) { + netif_stop_queue(local->mdev); + return IEEE80211_TX_AGAIN; + } + if (skb) { + ieee80211_dump_frame(wiphy_name(local->hw.wiphy), + "TX to low-level driver", skb); + ret = local->ops->tx(local_to_hw(local), skb, control); + if (ret) + return IEEE80211_TX_AGAIN; + local->mdev->trans_start = jiffies; + ieee80211_led_tx(local, 1); + } + if (tx->u.tx.extra_frag) { + control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | + IEEE80211_TXCTL_USE_CTS_PROTECT | + IEEE80211_TXCTL_CLEAR_DST_MASK | + IEEE80211_TXCTL_FIRST_FRAGMENT); + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + if (!tx->u.tx.extra_frag[i]) + continue; + if (__ieee80211_queue_stopped(local, control->queue)) + return IEEE80211_TX_FRAG_AGAIN; + if (i == tx->u.tx.num_extra_frag) { + control->tx_rate = tx->u.tx.last_frag_hwrate; + control->rate = tx->u.tx.last_frag_rate; + if (tx->flags & IEEE80211_TXRXD_TXPROBE_LAST_FRAG) + control->flags |= + IEEE80211_TXCTL_RATE_CTRL_PROBE; + else + control->flags &= + ~IEEE80211_TXCTL_RATE_CTRL_PROBE; + } + + ieee80211_dump_frame(wiphy_name(local->hw.wiphy), + "TX to low-level driver", + tx->u.tx.extra_frag[i]); + ret = local->ops->tx(local_to_hw(local), + tx->u.tx.extra_frag[i], + control); + if (ret) + return IEEE80211_TX_FRAG_AGAIN; + local->mdev->trans_start = jiffies; + ieee80211_led_tx(local, 1); + tx->u.tx.extra_frag[i] = NULL; + } + kfree(tx->u.tx.extra_frag); + tx->u.tx.extra_frag = NULL; + } + return IEEE80211_TX_OK; +} + +static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, + struct ieee80211_tx_control *control) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct sta_info *sta; + ieee80211_tx_handler *handler; + struct ieee80211_txrx_data tx; + ieee80211_txrx_result res = TXRX_DROP, res_prepare; + int ret, i; + + WARN_ON(__ieee80211_queue_pending(local, control->queue)); + + if (unlikely(skb->len < 10)) { + dev_kfree_skb(skb); + return 0; + } + + /* initialises tx */ + res_prepare = __ieee80211_tx_prepare(&tx, skb, dev, control); + + if (res_prepare == TXRX_DROP) { + dev_kfree_skb(skb); + return 0; + } + + /* + * key references are protected using RCU and this requires that + * we are in a read-site RCU section during receive processing + */ + rcu_read_lock(); + + sta = tx.sta; + tx.u.tx.mode = local->hw.conf.mode; + + for (handler = local->tx_handlers; *handler != NULL; + handler++) { + res = (*handler)(&tx); + if (res != TXRX_CONTINUE) + break; + } + + skb = tx.skb; /* handlers are allowed to change skb */ + + if (sta) + sta_info_put(sta); + + if (unlikely(res == TXRX_DROP)) { + I802_DEBUG_INC(local->tx_handlers_drop); + goto drop; + } + + if (unlikely(res == TXRX_QUEUED)) { + I802_DEBUG_INC(local->tx_handlers_queued); + rcu_read_unlock(); + return 0; + } + + if (tx.u.tx.extra_frag) { + for (i = 0; i < tx.u.tx.num_extra_frag; i++) { + int next_len, dur; + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) + tx.u.tx.extra_frag[i]->data; + + if (i + 1 < tx.u.tx.num_extra_frag) { + next_len = tx.u.tx.extra_frag[i + 1]->len; + } else { + next_len = 0; + tx.u.tx.rate = tx.u.tx.last_frag_rate; + tx.u.tx.last_frag_hwrate = tx.u.tx.rate->val; + } + dur = ieee80211_duration(&tx, 0, next_len); + hdr->duration_id = cpu_to_le16(dur); + } + } + +retry: + ret = __ieee80211_tx(local, skb, &tx); + if (ret) { + struct ieee80211_tx_stored_packet *store = + &local->pending_packet[control->queue]; + + if (ret == IEEE80211_TX_FRAG_AGAIN) + skb = NULL; + set_bit(IEEE80211_LINK_STATE_PENDING, + &local->state[control->queue]); + smp_mb(); + /* When the driver gets out of buffers during sending of + * fragments and calls ieee80211_stop_queue, there is + * a small window between IEEE80211_LINK_STATE_XOFF and + * IEEE80211_LINK_STATE_PENDING flags are set. If a buffer + * gets available in that window (i.e. driver calls + * ieee80211_wake_queue), we would end up with ieee80211_tx + * called with IEEE80211_LINK_STATE_PENDING. Prevent this by + * continuing transmitting here when that situation is + * possible to have happened. */ + if (!__ieee80211_queue_stopped(local, control->queue)) { + clear_bit(IEEE80211_LINK_STATE_PENDING, + &local->state[control->queue]); + goto retry; + } + memcpy(&store->control, control, + sizeof(struct ieee80211_tx_control)); + store->skb = skb; + store->extra_frag = tx.u.tx.extra_frag; + store->num_extra_frag = tx.u.tx.num_extra_frag; + store->last_frag_hwrate = tx.u.tx.last_frag_hwrate; + store->last_frag_rate = tx.u.tx.last_frag_rate; + store->last_frag_rate_ctrl_probe = + !!(tx.flags & IEEE80211_TXRXD_TXPROBE_LAST_FRAG); + } + rcu_read_unlock(); + return 0; + + drop: + if (skb) + dev_kfree_skb(skb); + for (i = 0; i < tx.u.tx.num_extra_frag; i++) + if (tx.u.tx.extra_frag[i]) + dev_kfree_skb(tx.u.tx.extra_frag[i]); + kfree(tx.u.tx.extra_frag); + rcu_read_unlock(); + return 0; +} + +/* device xmit handlers */ + +int ieee80211_master_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_tx_control control; + struct ieee80211_tx_packet_data *pkt_data; + struct net_device *odev = NULL; + struct ieee80211_sub_if_data *osdata; + int headroom; + int ret; + + /* + * copy control out of the skb so other people can use skb->cb + */ + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(&control, 0, sizeof(struct ieee80211_tx_control)); + + if (pkt_data->ifindex) + odev = dev_get_by_index(&init_net, pkt_data->ifindex); + if (unlikely(odev && !is_ieee80211_device(odev, dev))) { + dev_put(odev); + odev = NULL; + } + if (unlikely(!odev)) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: Discarded packet with nonexistent " + "originating device\n", dev->name); +#endif + dev_kfree_skb(skb); + return 0; + } + osdata = IEEE80211_DEV_TO_SUB_IF(odev); + + headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM; + if (skb_headroom(skb) < headroom) { + if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { + dev_kfree_skb(skb); + dev_put(odev); + return 0; + } + } + + control.ifindex = odev->ifindex; + control.type = osdata->type; + if (pkt_data->flags & IEEE80211_TXPD_REQ_TX_STATUS) + control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS; + if (pkt_data->flags & IEEE80211_TXPD_DO_NOT_ENCRYPT) + control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; + if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) + control.flags |= IEEE80211_TXCTL_REQUEUE; + control.queue = pkt_data->queue; + + ret = ieee80211_tx(odev, skb, &control); + dev_put(odev); + + return ret; +} + +int ieee80211_monitor_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_radiotap_header *prthdr = + (struct ieee80211_radiotap_header *)skb->data; + u16 len_rthdr; + + /* check for not even having the fixed radiotap header part */ + if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) + goto fail; /* too short to be possibly valid */ + + /* is it a header version we can trust to find length from? */ + if (unlikely(prthdr->it_version)) + goto fail; /* only version 0 is supported */ + + /* then there must be a radiotap header with a length we can use */ + len_rthdr = ieee80211_get_radiotap_len(skb->data); + + /* does the skb contain enough to deliver on the alleged length? */ + if (unlikely(skb->len < len_rthdr)) + goto fail; /* skb too short for claimed rt header extent */ + + skb->dev = local->mdev; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(pkt_data, 0, sizeof(*pkt_data)); + /* needed because we set skb device to master */ + pkt_data->ifindex = dev->ifindex; + + pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; + + /* + * fix up the pointers accounting for the radiotap + * header still being in there. We are being given + * a precooked IEEE80211 header so no need for + * normal processing + */ + skb_set_mac_header(skb, len_rthdr); + /* + * these are just fixed to the end of the rt area since we + * don't have any better information and at this point, nobody cares + */ + skb_set_network_header(skb, len_rthdr); + skb_set_transport_header(skb, len_rthdr); + + /* pass the radiotap header up to the next stage intact */ + dev_queue_xmit(skb); + return NETDEV_TX_OK; + +fail: + dev_kfree_skb(skb); + return NETDEV_TX_OK; /* meaning, we dealt with the skb */ +} + +/** + * ieee80211_subif_start_xmit - netif start_xmit function for Ethernet-type + * subinterfaces (wlan#, WDS, and VLAN interfaces) + * @skb: packet to be sent + * @dev: incoming interface + * + * Returns: 0 on success (and frees skb in this case) or 1 on failure (skb will + * not be freed, and caller is responsible for either retrying later or freeing + * skb). + * + * This function takes in an Ethernet header and encapsulates it with suitable + * IEEE 802.11 header based on which interface the packet is coming in. The + * encapsulated packet will then be passed to master interface, wlan#.11, for + * transmission (through low-level driver). + */ +int ieee80211_subif_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_sub_if_data *sdata; + int ret = 1, head_need; + u16 ethertype, hdrlen, fc; + struct ieee80211_hdr hdr; + const u8 *encaps_data; + int encaps_len, skip_header_bytes; + int nh_pos, h_pos; + struct sta_info *sta; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (unlikely(skb->len < ETH_HLEN)) { + printk(KERN_DEBUG "%s: short skb (len=%d)\n", + dev->name, skb->len); + ret = 0; + goto fail; + } + + nh_pos = skb_network_header(skb) - skb->data; + h_pos = skb_transport_header(skb) - skb->data; + + /* convert Ethernet header to proper 802.11 header (based on + * operation mode) */ + ethertype = (skb->data[12] << 8) | skb->data[13]; + /* TODO: handling for 802.1x authorized/unauthorized port */ + fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; + + switch (sdata->type) { + case IEEE80211_IF_TYPE_AP: + case IEEE80211_IF_TYPE_VLAN: + fc |= IEEE80211_FCTL_FROMDS; + /* DA BSSID SA */ + memcpy(hdr.addr1, skb->data, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); + hdrlen = 24; + break; + case IEEE80211_IF_TYPE_WDS: + fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS; + /* RA TA DA SA */ + memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); + hdrlen = 30; + break; + case IEEE80211_IF_TYPE_STA: + fc |= IEEE80211_FCTL_TODS; + /* BSSID SA DA */ + memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + hdrlen = 24; + break; + case IEEE80211_IF_TYPE_IBSS: + /* DA SA BSSID */ + memcpy(hdr.addr1, skb->data, ETH_ALEN); + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + memcpy(hdr.addr3, sdata->u.sta.bssid, ETH_ALEN); + hdrlen = 24; + break; + default: + ret = 0; + goto fail; + } + + /* receiver is QoS enabled, use a QoS type frame */ + sta = sta_info_get(local, hdr.addr1); + if (sta) { + if (sta->flags & WLAN_STA_WME) { + fc |= IEEE80211_STYPE_QOS_DATA; + hdrlen += 2; + } + sta_info_put(sta); + } + + hdr.frame_control = cpu_to_le16(fc); + hdr.duration_id = 0; + hdr.seq_ctrl = 0; + + skip_header_bytes = ETH_HLEN; + if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) { + encaps_data = bridge_tunnel_header; + encaps_len = sizeof(bridge_tunnel_header); + skip_header_bytes -= 2; + } else if (ethertype >= 0x600) { + encaps_data = rfc1042_header; + encaps_len = sizeof(rfc1042_header); + skip_header_bytes -= 2; + } else { + encaps_data = NULL; + encaps_len = 0; + } + + skb_pull(skb, skip_header_bytes); + nh_pos -= skip_header_bytes; + h_pos -= skip_header_bytes; + + /* TODO: implement support for fragments so that there is no need to + * reallocate and copy payload; it might be enough to support one + * extra fragment that would be copied in the beginning of the frame + * data.. anyway, it would be nice to include this into skb structure + * somehow + * + * There are few options for this: + * use skb->cb as an extra space for 802.11 header + * allocate new buffer if not enough headroom + * make sure that there is enough headroom in every skb by increasing + * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and + * alloc_skb() (net/core/skbuff.c) + */ + head_need = hdrlen + encaps_len + local->tx_headroom; + head_need -= skb_headroom(skb); + + /* We are going to modify skb data, so make a copy of it if happens to + * be cloned. This could happen, e.g., with Linux bridge code passing + * us broadcast frames. */ + + if (head_need > 0 || skb_cloned(skb)) { +#if 0 + printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes " + "of headroom\n", dev->name, head_need); +#endif + + if (skb_cloned(skb)) + I802_DEBUG_INC(local->tx_expand_skb_head_cloned); + else + I802_DEBUG_INC(local->tx_expand_skb_head); + /* Since we have to reallocate the buffer, make sure that there + * is enough room for possible WEP IV/ICV and TKIP (8 bytes + * before payload and 12 after). */ + if (pskb_expand_head(skb, (head_need > 0 ? head_need + 8 : 8), + 12, GFP_ATOMIC)) { + printk(KERN_DEBUG "%s: failed to reallocate TX buffer" + "\n", dev->name); + goto fail; + } + } + + if (encaps_data) { + memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); + nh_pos += encaps_len; + h_pos += encaps_len; + } + + if (fc & IEEE80211_STYPE_QOS_DATA) { + __le16 *qos_control; + + qos_control = (__le16*) skb_push(skb, 2); + memcpy(skb_push(skb, hdrlen - 2), &hdr, hdrlen - 2); + /* + * Maybe we could actually set some fields here, for now just + * initialise to zero to indicate no special operation. + */ + *qos_control = 0; + } else + memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); + + nh_pos += hdrlen; + h_pos += hdrlen; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); + pkt_data->ifindex = dev->ifindex; + + skb->dev = local->mdev; + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* Update skb pointers to various headers since this modified frame + * is going to go through Linux networking code that may potentially + * need things like pointer to IP header. */ + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, nh_pos); + skb_set_transport_header(skb, h_pos); + + dev->trans_start = jiffies; + dev_queue_xmit(skb); + + return 0; + + fail: + if (!ret) + dev_kfree_skb(skb); + + return ret; +} + +/* + * This is the transmit routine for the 802.11 type interfaces + * called by upper layers of the linux networking + * stack when it has a frame to transmit + */ +int ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_hdr *hdr; + u16 fc; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (skb->len < 10) { + dev_kfree_skb(skb); + return 0; + } + + if (skb_headroom(skb) < sdata->local->tx_headroom) { + if (pskb_expand_head(skb, sdata->local->tx_headroom, + 0, GFP_ATOMIC)) { + dev_kfree_skb(skb); + return 0; + } + } + + hdr = (struct ieee80211_hdr *) skb->data; + fc = le16_to_cpu(hdr->frame_control); + + pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); + pkt_data->ifindex = sdata->dev->ifindex; + + skb->priority = 20; /* use hardcoded priority for mgmt TX queue */ + skb->dev = sdata->local->mdev; + + /* + * We're using the protocol field of the the frame control header + * to request TX callback for hostapd. BIT(1) is checked. + */ + if ((fc & BIT(1)) == BIT(1)) { + pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS; + fc &= ~BIT(1); + hdr->frame_control = cpu_to_le16(fc); + } + + if (!(fc & IEEE80211_FCTL_PROTECTED)) + pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + dev_queue_xmit(skb); + + return 0; +} + +/* helper functions for pending packets for when queues are stopped */ + +void ieee80211_clear_tx_pending(struct ieee80211_local *local) +{ + int i, j; + struct ieee80211_tx_stored_packet *store; + + for (i = 0; i < local->hw.queues; i++) { + if (!__ieee80211_queue_pending(local, i)) + continue; + store = &local->pending_packet[i]; + kfree_skb(store->skb); + for (j = 0; j < store->num_extra_frag; j++) + kfree_skb(store->extra_frag[j]); + kfree(store->extra_frag); + clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[i]); + } +} + +void ieee80211_tx_pending(unsigned long data) +{ + struct ieee80211_local *local = (struct ieee80211_local *)data; + struct net_device *dev = local->mdev; + struct ieee80211_tx_stored_packet *store; + struct ieee80211_txrx_data tx; + int i, ret, reschedule = 0; + + netif_tx_lock_bh(dev); + for (i = 0; i < local->hw.queues; i++) { + if (__ieee80211_queue_stopped(local, i)) + continue; + if (!__ieee80211_queue_pending(local, i)) { + reschedule = 1; + continue; + } + store = &local->pending_packet[i]; + tx.u.tx.control = &store->control; + tx.u.tx.extra_frag = store->extra_frag; + tx.u.tx.num_extra_frag = store->num_extra_frag; + tx.u.tx.last_frag_hwrate = store->last_frag_hwrate; + tx.u.tx.last_frag_rate = store->last_frag_rate; + tx.flags = 0; + if (store->last_frag_rate_ctrl_probe) + tx.flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; + ret = __ieee80211_tx(local, store->skb, &tx); + if (ret) { + if (ret == IEEE80211_TX_FRAG_AGAIN) + store->skb = NULL; + } else { + clear_bit(IEEE80211_LINK_STATE_PENDING, + &local->state[i]); + reschedule = 1; + } + } + netif_tx_unlock_bh(dev); + if (reschedule) { + if (!ieee80211_qdisc_installed(dev)) { + if (!__ieee80211_queue_stopped(local, 0)) + netif_wake_queue(dev); + } else + netif_schedule(dev); + } +} + +/* functions for drivers to get certain frames */ + +static void ieee80211_beacon_add_tim(struct ieee80211_local *local, + struct ieee80211_if_ap *bss, + struct sk_buff *skb) +{ + u8 *pos, *tim; + int aid0 = 0; + int i, have_bits = 0, n1, n2; + + /* Generate bitmap for TIM only if there are any STAs in power save + * mode. */ + read_lock_bh(&local->sta_lock); + if (atomic_read(&bss->num_sta_ps) > 0) + /* in the hope that this is faster than + * checking byte-for-byte */ + have_bits = !bitmap_empty((unsigned long*)bss->tim, + IEEE80211_MAX_AID+1); + + if (bss->dtim_count == 0) + bss->dtim_count = bss->dtim_period - 1; + else + bss->dtim_count--; + + tim = pos = (u8 *) skb_put(skb, 6); + *pos++ = WLAN_EID_TIM; + *pos++ = 4; + *pos++ = bss->dtim_count; + *pos++ = bss->dtim_period; + + if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) + aid0 = 1; + + if (have_bits) { + /* Find largest even number N1 so that bits numbered 1 through + * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits + * (N2 + 1) x 8 through 2007 are 0. */ + n1 = 0; + for (i = 0; i < IEEE80211_MAX_TIM_LEN; i++) { + if (bss->tim[i]) { + n1 = i & 0xfe; + break; + } + } + n2 = n1; + for (i = IEEE80211_MAX_TIM_LEN - 1; i >= n1; i--) { + if (bss->tim[i]) { + n2 = i; + break; + } + } + + /* Bitmap control */ + *pos++ = n1 | aid0; + /* Part Virt Bitmap */ + memcpy(pos, bss->tim + n1, n2 - n1 + 1); + + tim[1] = n2 - n1 + 4; + skb_put(skb, n2 - n1); + } else { + *pos++ = aid0; /* Bitmap control */ + *pos++ = 0; /* Part Virt Bitmap */ + } + read_unlock_bh(&local->sta_lock); +} + +struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id, + struct ieee80211_tx_control *control) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sk_buff *skb; + struct net_device *bdev; + struct ieee80211_sub_if_data *sdata = NULL; + struct ieee80211_if_ap *ap = NULL; + struct ieee80211_rate *rate; + struct rate_control_extra extra; + u8 *b_head, *b_tail; + int bh_len, bt_len; + + bdev = dev_get_by_index(&init_net, if_id); + if (bdev) { + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + ap = &sdata->u.ap; + dev_put(bdev); + } + + if (!ap || sdata->type != IEEE80211_IF_TYPE_AP || + !ap->beacon_head) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "no beacon data avail for idx=%d " + "(%s)\n", if_id, bdev ? bdev->name : "N/A"); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + return NULL; + } + + /* Assume we are generating the normal beacon locally */ + b_head = ap->beacon_head; + b_tail = ap->beacon_tail; + bh_len = ap->beacon_head_len; + bt_len = ap->beacon_tail_len; + + skb = dev_alloc_skb(local->tx_headroom + + bh_len + bt_len + 256 /* maximum TIM len */); + if (!skb) + return NULL; + + skb_reserve(skb, local->tx_headroom); + memcpy(skb_put(skb, bh_len), b_head, bh_len); + + ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data); + + ieee80211_beacon_add_tim(local, ap, skb); + + if (b_tail) { + memcpy(skb_put(skb, bt_len), b_tail, bt_len); + } + + if (control) { + memset(&extra, 0, sizeof(extra)); + extra.mode = local->oper_hw_mode; + + rate = rate_control_get_rate(local, local->mdev, skb, &extra); + if (!rate) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: ieee80211_beacon_get: no rate " + "found\n", wiphy_name(local->hw.wiphy)); + } + dev_kfree_skb(skb); + return NULL; + } + + control->tx_rate = + ((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && + (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? + rate->val2 : rate->val; + control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; + control->power_level = local->hw.conf.power_level; + control->flags |= IEEE80211_TXCTL_NO_ACK; + control->retry_limit = 1; + control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; + } + + ap->num_beacons++; + return skb; +} +EXPORT_SYMBOL(ieee80211_beacon_get); + +void ieee80211_rts_get(struct ieee80211_hw *hw, int if_id, + const void *frame, size_t frame_len, + const struct ieee80211_tx_control *frame_txctl, + struct ieee80211_rts *rts) +{ + const struct ieee80211_hdr *hdr = frame; + u16 fctl; + + fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS; + rts->frame_control = cpu_to_le16(fctl); + rts->duration = ieee80211_rts_duration(hw, if_id, frame_len, frame_txctl); + memcpy(rts->ra, hdr->addr1, sizeof(rts->ra)); + memcpy(rts->ta, hdr->addr2, sizeof(rts->ta)); +} +EXPORT_SYMBOL(ieee80211_rts_get); + +void ieee80211_ctstoself_get(struct ieee80211_hw *hw, int if_id, + const void *frame, size_t frame_len, + const struct ieee80211_tx_control *frame_txctl, + struct ieee80211_cts *cts) +{ + const struct ieee80211_hdr *hdr = frame; + u16 fctl; + + fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS; + cts->frame_control = cpu_to_le16(fctl); + cts->duration = ieee80211_ctstoself_duration(hw, if_id, frame_len, frame_txctl); + memcpy(cts->ra, hdr->addr1, sizeof(cts->ra)); +} +EXPORT_SYMBOL(ieee80211_ctstoself_get); + +struct sk_buff * +ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id, + struct ieee80211_tx_control *control) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sk_buff *skb; + struct sta_info *sta; + ieee80211_tx_handler *handler; + struct ieee80211_txrx_data tx; + ieee80211_txrx_result res = TXRX_DROP; + struct net_device *bdev; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_ap *bss = NULL; + + bdev = dev_get_by_index(&init_net, if_id); + if (bdev) { + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + bss = &sdata->u.ap; + dev_put(bdev); + } + if (!bss || sdata->type != IEEE80211_IF_TYPE_AP || !bss->beacon_head) + return NULL; + + if (bss->dtim_count != 0) + return NULL; /* send buffered bc/mc only after DTIM beacon */ + memset(control, 0, sizeof(*control)); + while (1) { + skb = skb_dequeue(&bss->ps_bc_buf); + if (!skb) + return NULL; + local->total_ps_buffered--; + + if (!skb_queue_empty(&bss->ps_bc_buf) && skb->len >= 2) { + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) skb->data; + /* more buffered multicast/broadcast frames ==> set + * MoreData flag in IEEE 802.11 header to inform PS + * STAs */ + hdr->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + } + + if (!ieee80211_tx_prepare(&tx, skb, local->mdev, control)) + break; + dev_kfree_skb_any(skb); + } + sta = tx.sta; + tx.flags |= IEEE80211_TXRXD_TXPS_BUFFERED; + tx.u.tx.mode = local->hw.conf.mode; + + for (handler = local->tx_handlers; *handler != NULL; handler++) { + res = (*handler)(&tx); + if (res == TXRX_DROP || res == TXRX_QUEUED) + break; + } + dev_put(tx.dev); + skb = tx.skb; /* handlers are allowed to change skb */ + + if (res == TXRX_DROP) { + I802_DEBUG_INC(local->tx_handlers_drop); + dev_kfree_skb(skb); + skb = NULL; + } else if (res == TXRX_QUEUED) { + I802_DEBUG_INC(local->tx_handlers_queued); + skb = NULL; + } + + if (sta) + sta_info_put(sta); + + return skb; +} +EXPORT_SYMBOL(ieee80211_get_buffered_bc); diff --git a/net/mac80211/util.c b/net/mac80211/util.c new file mode 100644 index 000000000000..5a0564e1dbd6 --- /dev/null +++ b/net/mac80211/util.c @@ -0,0 +1,486 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * utilities for mac80211 + */ + +#include <net/mac80211.h> +#include <linux/netdevice.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/if_arp.h> +#include <linux/wireless.h> +#include <linux/bitmap.h> +#include <net/net_namespace.h> +#include <net/cfg80211.h> + +#include "ieee80211_i.h" +#include "ieee80211_rate.h" +#include "wme.h" + +/* privid for wiphys to determine whether they belong to us or not */ +void *mac80211_wiphy_privid = &mac80211_wiphy_privid; + +/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ +/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ +const unsigned char rfc1042_header[] = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; + +/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ +const unsigned char bridge_tunnel_header[] = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; + +/* No encapsulation header if EtherType < 0x600 (=length) */ +static const unsigned char eapol_header[] = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e }; + + +static int rate_list_match(const int *rate_list, int rate) +{ + int i; + + if (!rate_list) + return 0; + + for (i = 0; rate_list[i] >= 0; i++) + if (rate_list[i] == rate) + return 1; + + return 0; +} + +void ieee80211_prepare_rates(struct ieee80211_local *local, + struct ieee80211_hw_mode *mode) +{ + int i; + + for (i = 0; i < mode->num_rates; i++) { + struct ieee80211_rate *rate = &mode->rates[i]; + + rate->flags &= ~(IEEE80211_RATE_SUPPORTED | + IEEE80211_RATE_BASIC); + + if (local->supp_rates[mode->mode]) { + if (!rate_list_match(local->supp_rates[mode->mode], + rate->rate)) + continue; + } + + rate->flags |= IEEE80211_RATE_SUPPORTED; + + /* Use configured basic rate set if it is available. If not, + * use defaults that are sane for most cases. */ + if (local->basic_rates[mode->mode]) { + if (rate_list_match(local->basic_rates[mode->mode], + rate->rate)) + rate->flags |= IEEE80211_RATE_BASIC; + } else switch (mode->mode) { + case MODE_IEEE80211A: + if (rate->rate == 60 || rate->rate == 120 || + rate->rate == 240) + rate->flags |= IEEE80211_RATE_BASIC; + break; + case MODE_IEEE80211B: + if (rate->rate == 10 || rate->rate == 20) + rate->flags |= IEEE80211_RATE_BASIC; + break; + case MODE_IEEE80211G: + if (rate->rate == 10 || rate->rate == 20 || + rate->rate == 55 || rate->rate == 110) + rate->flags |= IEEE80211_RATE_BASIC; + break; + case NUM_IEEE80211_MODES: + /* not useful */ + break; + } + + /* Set ERP and MANDATORY flags based on phymode */ + switch (mode->mode) { + case MODE_IEEE80211A: + if (rate->rate == 60 || rate->rate == 120 || + rate->rate == 240) + rate->flags |= IEEE80211_RATE_MANDATORY; + break; + case MODE_IEEE80211B: + if (rate->rate == 10) + rate->flags |= IEEE80211_RATE_MANDATORY; + break; + case MODE_IEEE80211G: + if (rate->rate == 10 || rate->rate == 20 || + rate->rate == 55 || rate->rate == 110 || + rate->rate == 60 || rate->rate == 120 || + rate->rate == 240) + rate->flags |= IEEE80211_RATE_MANDATORY; + break; + case NUM_IEEE80211_MODES: + /* not useful */ + break; + } + if (ieee80211_is_erp_rate(mode->mode, rate->rate)) + rate->flags |= IEEE80211_RATE_ERP; + } +} + +u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len) +{ + u16 fc; + + if (len < 24) + return NULL; + + fc = le16_to_cpu(hdr->frame_control); + + switch (fc & IEEE80211_FCTL_FTYPE) { + case IEEE80211_FTYPE_DATA: + switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + case IEEE80211_FCTL_TODS: + return hdr->addr1; + case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): + return NULL; + case IEEE80211_FCTL_FROMDS: + return hdr->addr2; + case 0: + return hdr->addr3; + } + break; + case IEEE80211_FTYPE_MGMT: + return hdr->addr3; + case IEEE80211_FTYPE_CTL: + if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL) + return hdr->addr1; + else + return NULL; + } + + return NULL; +} + +int ieee80211_get_hdrlen(u16 fc) +{ + int hdrlen = 24; + + switch (fc & IEEE80211_FCTL_FTYPE) { + case IEEE80211_FTYPE_DATA: + if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS)) + hdrlen = 30; /* Addr4 */ + /* + * The QoS Control field is two bytes and its presence is + * indicated by the IEEE80211_STYPE_QOS_DATA bit. Add 2 to + * hdrlen if that bit is set. + * This works by masking out the bit and shifting it to + * bit position 1 so the result has the value 0 or 2. + */ + hdrlen += (fc & IEEE80211_STYPE_QOS_DATA) + >> (ilog2(IEEE80211_STYPE_QOS_DATA)-1); + break; + case IEEE80211_FTYPE_CTL: + /* + * ACK and CTS are 10 bytes, all others 16. To see how + * to get this condition consider + * subtype mask: 0b0000000011110000 (0x00F0) + * ACK subtype: 0b0000000011010000 (0x00D0) + * CTS subtype: 0b0000000011000000 (0x00C0) + * bits that matter: ^^^ (0x00E0) + * value of those: 0b0000000011000000 (0x00C0) + */ + if ((fc & 0xE0) == 0xC0) + hdrlen = 10; + else + hdrlen = 16; + break; + } + + return hdrlen; +} +EXPORT_SYMBOL(ieee80211_get_hdrlen); + +int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) +{ + const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *) skb->data; + int hdrlen; + + if (unlikely(skb->len < 10)) + return 0; + hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)); + if (unlikely(hdrlen > skb->len)) + return 0; + return hdrlen; +} +EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); + +int ieee80211_is_eapol(const struct sk_buff *skb) +{ + const struct ieee80211_hdr *hdr; + u16 fc; + int hdrlen; + + if (unlikely(skb->len < 10)) + return 0; + + hdr = (const struct ieee80211_hdr *) skb->data; + fc = le16_to_cpu(hdr->frame_control); + + if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) + return 0; + + hdrlen = ieee80211_get_hdrlen(fc); + + if (unlikely(skb->len >= hdrlen + sizeof(eapol_header) && + memcmp(skb->data + hdrlen, eapol_header, + sizeof(eapol_header)) == 0)) + return 1; + + return 0; +} + +void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; + + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + if (tx->u.tx.extra_frag) { + struct ieee80211_hdr *fhdr; + int i; + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + fhdr = (struct ieee80211_hdr *) + tx->u.tx.extra_frag[i]->data; + fhdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + } + } +} + +int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, + int rate, int erp, int short_preamble) +{ + int dur; + + /* calculate duration (in microseconds, rounded up to next higher + * integer if it includes a fractional microsecond) to send frame of + * len bytes (does not include FCS) at the given rate. Duration will + * also include SIFS. + * + * rate is in 100 kbps, so divident is multiplied by 10 in the + * DIV_ROUND_UP() operations. + */ + + if (local->hw.conf.phymode == MODE_IEEE80211A || erp) { + /* + * OFDM: + * + * N_DBPS = DATARATE x 4 + * N_SYM = Ceiling((16+8xLENGTH+6) / N_DBPS) + * (16 = SIGNAL time, 6 = tail bits) + * TXTIME = T_PREAMBLE + T_SIGNAL + T_SYM x N_SYM + Signal Ext + * + * T_SYM = 4 usec + * 802.11a - 17.5.2: aSIFSTime = 16 usec + * 802.11g - 19.8.4: aSIFSTime = 10 usec + + * signal ext = 6 usec + */ + dur = 16; /* SIFS + signal ext */ + dur += 16; /* 17.3.2.3: T_PREAMBLE = 16 usec */ + dur += 4; /* 17.3.2.3: T_SIGNAL = 4 usec */ + dur += 4 * DIV_ROUND_UP((16 + 8 * (len + 4) + 6) * 10, + 4 * rate); /* T_SYM x N_SYM */ + } else { + /* + * 802.11b or 802.11g with 802.11b compatibility: + * 18.3.4: TXTIME = PreambleLength + PLCPHeaderTime + + * Ceiling(((LENGTH+PBCC)x8)/DATARATE). PBCC=0. + * + * 802.11 (DS): 15.3.3, 802.11b: 18.3.4 + * aSIFSTime = 10 usec + * aPreambleLength = 144 usec or 72 usec with short preamble + * aPLCPHeaderLength = 48 usec or 24 usec with short preamble + */ + dur = 10; /* aSIFSTime = 10 usec */ + dur += short_preamble ? (72 + 24) : (144 + 48); + + dur += DIV_ROUND_UP(8 * (len + 4) * 10, rate); + } + + return dur; +} + +/* Exported duration function for driver use */ +__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, int if_id, + size_t frame_len, int rate) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct net_device *bdev = dev_get_by_index(&init_net, if_id); + struct ieee80211_sub_if_data *sdata; + u16 dur; + int erp; + + if (unlikely(!bdev)) + return 0; + + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + erp = ieee80211_is_erp_rate(hw->conf.phymode, rate); + dur = ieee80211_frame_duration(local, frame_len, rate, + erp, sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE); + + dev_put(bdev); + return cpu_to_le16(dur); +} +EXPORT_SYMBOL(ieee80211_generic_frame_duration); + +__le16 ieee80211_rts_duration(struct ieee80211_hw *hw, int if_id, + size_t frame_len, + const struct ieee80211_tx_control *frame_txctl) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_rate *rate; + struct net_device *bdev = dev_get_by_index(&init_net, if_id); + struct ieee80211_sub_if_data *sdata; + int short_preamble; + int erp; + u16 dur; + + if (unlikely(!bdev)) + return 0; + + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE; + + rate = frame_txctl->rts_rate; + erp = !!(rate->flags & IEEE80211_RATE_ERP); + + /* CTS duration */ + dur = ieee80211_frame_duration(local, 10, rate->rate, + erp, short_preamble); + /* Data frame duration */ + dur += ieee80211_frame_duration(local, frame_len, rate->rate, + erp, short_preamble); + /* ACK duration */ + dur += ieee80211_frame_duration(local, 10, rate->rate, + erp, short_preamble); + + dev_put(bdev); + return cpu_to_le16(dur); +} +EXPORT_SYMBOL(ieee80211_rts_duration); + +__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, int if_id, + size_t frame_len, + const struct ieee80211_tx_control *frame_txctl) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_rate *rate; + struct net_device *bdev = dev_get_by_index(&init_net, if_id); + struct ieee80211_sub_if_data *sdata; + int short_preamble; + int erp; + u16 dur; + + if (unlikely(!bdev)) + return 0; + + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE; + + rate = frame_txctl->rts_rate; + erp = !!(rate->flags & IEEE80211_RATE_ERP); + + /* Data frame duration */ + dur = ieee80211_frame_duration(local, frame_len, rate->rate, + erp, short_preamble); + if (!(frame_txctl->flags & IEEE80211_TXCTL_NO_ACK)) { + /* ACK duration */ + dur += ieee80211_frame_duration(local, 10, rate->rate, + erp, short_preamble); + } + + dev_put(bdev); + return cpu_to_le16(dur); +} +EXPORT_SYMBOL(ieee80211_ctstoself_duration); + +struct ieee80211_rate * +ieee80211_get_rate(struct ieee80211_local *local, int phymode, int hw_rate) +{ + struct ieee80211_hw_mode *mode; + int r; + + list_for_each_entry(mode, &local->modes_list, list) { + if (mode->mode != phymode) + continue; + for (r = 0; r < mode->num_rates; r++) { + struct ieee80211_rate *rate = &mode->rates[r]; + if (rate->val == hw_rate || + (rate->flags & IEEE80211_RATE_PREAMBLE2 && + rate->val2 == hw_rate)) + return rate; + } + } + + return NULL; +} + +void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue) +{ + struct ieee80211_local *local = hw_to_local(hw); + + if (test_and_clear_bit(IEEE80211_LINK_STATE_XOFF, + &local->state[queue])) { + if (test_bit(IEEE80211_LINK_STATE_PENDING, + &local->state[queue])) + tasklet_schedule(&local->tx_pending_tasklet); + else + if (!ieee80211_qdisc_installed(local->mdev)) { + if (queue == 0) + netif_wake_queue(local->mdev); + } else + __netif_schedule(local->mdev); + } +} +EXPORT_SYMBOL(ieee80211_wake_queue); + +void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue) +{ + struct ieee80211_local *local = hw_to_local(hw); + + if (!ieee80211_qdisc_installed(local->mdev) && queue == 0) + netif_stop_queue(local->mdev); + set_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); +} +EXPORT_SYMBOL(ieee80211_stop_queue); + +void ieee80211_start_queues(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + int i; + + for (i = 0; i < local->hw.queues; i++) + clear_bit(IEEE80211_LINK_STATE_XOFF, &local->state[i]); + if (!ieee80211_qdisc_installed(local->mdev)) + netif_start_queue(local->mdev); +} +EXPORT_SYMBOL(ieee80211_start_queues); + +void ieee80211_stop_queues(struct ieee80211_hw *hw) +{ + int i; + + for (i = 0; i < hw->queues; i++) + ieee80211_stop_queue(hw, i); +} +EXPORT_SYMBOL(ieee80211_stop_queues); + +void ieee80211_wake_queues(struct ieee80211_hw *hw) +{ + int i; + + for (i = 0; i < hw->queues; i++) + ieee80211_wake_queue(hw, i); +} +EXPORT_SYMBOL(ieee80211_wake_queues); diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 1ad3d75281cc..6675261e958f 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -63,11 +63,11 @@ static inline int ieee80211_wep_weak_iv(u32 iv, int keylen) } -void ieee80211_wep_get_iv(struct ieee80211_local *local, - struct ieee80211_key *key, u8 *iv) +static void ieee80211_wep_get_iv(struct ieee80211_local *local, + struct ieee80211_key *key, u8 *iv) { local->wep_iv++; - if (ieee80211_wep_weak_iv(local->wep_iv, key->keylen)) + if (ieee80211_wep_weak_iv(local->wep_iv, key->conf.keylen)) local->wep_iv += 0x0100; if (!iv) @@ -76,13 +76,13 @@ void ieee80211_wep_get_iv(struct ieee80211_local *local, *iv++ = (local->wep_iv >> 16) & 0xff; *iv++ = (local->wep_iv >> 8) & 0xff; *iv++ = local->wep_iv & 0xff; - *iv++ = key->keyidx << 6; + *iv++ = key->conf.keyidx << 6; } -u8 * ieee80211_wep_add_iv(struct ieee80211_local *local, - struct sk_buff *skb, - struct ieee80211_key *key) +static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local, + struct sk_buff *skb, + struct ieee80211_key *key) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; u16 fc; @@ -109,9 +109,9 @@ u8 * ieee80211_wep_add_iv(struct ieee80211_local *local, } -void ieee80211_wep_remove_iv(struct ieee80211_local *local, - struct sk_buff *skb, - struct ieee80211_key *key) +static void ieee80211_wep_remove_iv(struct ieee80211_local *local, + struct sk_buff *skb, + struct ieee80211_key *key) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; u16 fc; @@ -159,10 +159,10 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb, u8 *rc4key, *iv; size_t len; - if (!key || key->alg != ALG_WEP) + if (!key || key->conf.alg != ALG_WEP) return -1; - klen = 3 + key->keylen; + klen = 3 + key->conf.keylen; rc4key = kmalloc(klen, GFP_ATOMIC); if (!rc4key) return -1; @@ -179,7 +179,7 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb, memcpy(rc4key, iv, 3); /* Copy rest of the WEP key (the secret part) */ - memcpy(rc4key + 3, key->key, key->keylen); + memcpy(rc4key + 3, key->conf.key, key->conf.keylen); /* Add room for ICV */ skb_put(skb, WEP_ICV_LEN); @@ -251,10 +251,10 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, keyidx = skb->data[hdrlen + 3] >> 6; - if (!key || keyidx != key->keyidx || key->alg != ALG_WEP) + if (!key || keyidx != key->conf.keyidx || key->conf.alg != ALG_WEP) return -1; - klen = 3 + key->keylen; + klen = 3 + key->conf.keylen; rc4key = kmalloc(klen, GFP_ATOMIC); if (!rc4key) @@ -264,7 +264,7 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, memcpy(rc4key, skb->data + hdrlen, 3); /* Copy rest of the WEP key (the secret part) */ - memcpy(rc4key + 3, key->key, key->keylen); + memcpy(rc4key + 3, key->conf.key, key->conf.keylen); if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen, skb->data + hdrlen + WEP_IV_LEN, @@ -286,43 +286,99 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, } -int ieee80211_wep_get_keyidx(struct sk_buff *skb) +u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; u16 fc; int hdrlen; + u8 *ivpos; + u32 iv; fc = le16_to_cpu(hdr->frame_control); if (!(fc & IEEE80211_FCTL_PROTECTED)) - return -1; + return NULL; hdrlen = ieee80211_get_hdrlen(fc); + ivpos = skb->data + hdrlen; + iv = (ivpos[0] << 16) | (ivpos[1] << 8) | ivpos[2]; - if (skb->len < 8 + hdrlen) - return -1; + if (ieee80211_wep_weak_iv(iv, key->conf.keylen)) + return ivpos; - return skb->data[hdrlen + 3] >> 6; + return NULL; } +ieee80211_txrx_result +ieee80211_crypto_wep_decrypt(struct ieee80211_txrx_data *rx) +{ + if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && + ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) + return TXRX_CONTINUE; + + if (!(rx->u.rx.status->flag & RX_FLAG_DECRYPTED)) { + if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: RX WEP frame, decrypt " + "failed\n", rx->dev->name); + return TXRX_DROP; + } + } else if (!(rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED)) { + ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); + /* remove ICV */ + skb_trim(rx->skb, rx->skb->len - 4); + } -u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key) + return TXRX_CONTINUE; +} + +static int wep_encrypt_skb(struct ieee80211_txrx_data *tx, struct sk_buff *skb) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { + if (ieee80211_wep_encrypt(tx->local, skb, tx->key)) + return -1; + } else { + tx->u.tx.control->key_idx = tx->key->conf.hw_key_idx; + if (tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) { + if (!ieee80211_wep_add_iv(tx->local, skb, tx->key)) + return -1; + } + } + return 0; +} + +ieee80211_txrx_result +ieee80211_crypto_wep_encrypt(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; u16 fc; - int hdrlen; - u8 *ivpos; - u32 iv; fc = le16_to_cpu(hdr->frame_control); - if (!(fc & IEEE80211_FCTL_PROTECTED)) - return NULL; - hdrlen = ieee80211_get_hdrlen(fc); - ivpos = skb->data + hdrlen; - iv = (ivpos[0] << 16) | (ivpos[1] << 8) | ivpos[2]; + if (((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && + ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || + (fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))) + return TXRX_CONTINUE; - if (ieee80211_wep_weak_iv(iv, key->keylen)) - return ivpos; + tx->u.tx.control->iv_len = WEP_IV_LEN; + tx->u.tx.control->icv_len = WEP_ICV_LEN; + ieee80211_tx_set_iswep(tx); - return NULL; + if (wep_encrypt_skb(tx, tx->skb) < 0) { + I802_DEBUG_INC(tx->local->tx_handlers_drop_wep); + return TXRX_DROP; + } + + if (tx->u.tx.extra_frag) { + int i; + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + if (wep_encrypt_skb(tx, tx->u.tx.extra_frag[i]) < 0) { + I802_DEBUG_INC(tx->local-> + tx_handlers_drop_wep); + return TXRX_DROP; + } + } + } + + return TXRX_CONTINUE; } diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h index bfe29e8e10aa..785fbb4e0dd7 100644 --- a/net/mac80211/wep.h +++ b/net/mac80211/wep.h @@ -18,14 +18,6 @@ int ieee80211_wep_init(struct ieee80211_local *local); void ieee80211_wep_free(struct ieee80211_local *local); -void ieee80211_wep_get_iv(struct ieee80211_local *local, - struct ieee80211_key *key, u8 *iv); -u8 * ieee80211_wep_add_iv(struct ieee80211_local *local, - struct sk_buff *skb, - struct ieee80211_key *key); -void ieee80211_wep_remove_iv(struct ieee80211_local *local, - struct sk_buff *skb, - struct ieee80211_key *key); void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, size_t klen, u8 *data, size_t data_len); int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, @@ -34,7 +26,11 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key); int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key); -int ieee80211_wep_get_keyidx(struct sk_buff *skb); u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key); +ieee80211_txrx_result +ieee80211_crypto_wep_decrypt(struct ieee80211_txrx_data *rx); +ieee80211_txrx_result +ieee80211_crypto_wep_encrypt(struct ieee80211_txrx_data *tx); + #endif /* WEP_H */ diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 7ab82b376e1b..5b8a157975a3 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -18,70 +18,6 @@ #include "ieee80211_i.h" #include "wme.h" -static inline int WLAN_FC_IS_QOS_DATA(u16 fc) -{ - return (fc & 0x8C) == 0x88; -} - - -ieee80211_txrx_result -ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx) -{ - u8 *data = rx->skb->data; - int tid; - - /* does the frame have a qos control field? */ - if (WLAN_FC_IS_QOS_DATA(rx->fc)) { - u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN; - /* frame has qos control */ - tid = qc[0] & QOS_CONTROL_TID_MASK; - } else { - if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) { - /* Separate TID for management frames */ - tid = NUM_RX_DATA_QUEUES - 1; - } else { - /* no qos control present */ - tid = 0; /* 802.1d - Best Effort */ - } - } -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - I802_DEBUG_INC(rx->local->wme_rx_queue[tid]); - if (rx->sta) { - I802_DEBUG_INC(rx->sta->wme_rx_queue[tid]); - } -#endif /* CONFIG_MAC80211_DEBUG_COUNTERS */ - - rx->u.rx.queue = tid; - /* Set skb->priority to 1d tag if highest order bit of TID is not set. - * For now, set skb->priority to 0 for other cases. */ - rx->skb->priority = (tid > 7) ? 0 : tid; - - return TXRX_CONTINUE; -} - - -ieee80211_txrx_result -ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx) -{ - u16 fc = rx->fc; - u8 *data = rx->skb->data; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) data; - - if (!WLAN_FC_IS_QOS_DATA(fc)) - return TXRX_CONTINUE; - - /* remove the qos control field, update frame type and meta-data */ - memmove(data + 2, data, ieee80211_get_hdrlen(fc) - 2); - hdr = (struct ieee80211_hdr *) skb_pull(rx->skb, 2); - /* change frame type to non QOS */ - rx->fc = fc &= ~IEEE80211_STYPE_QOS_DATA; - hdr->frame_control = cpu_to_le16(fc); - - return TXRX_CONTINUE; -} - - -#ifdef CONFIG_NET_SCHED /* maximum number of hardware queues we support. */ #define TC_80211_MAX_QUEUES 8 @@ -158,8 +94,6 @@ static inline int wme_downgrade_ac(struct sk_buff *skb) static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd) { struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_tx_packet_data *pkt_data = - (struct ieee80211_tx_packet_data *) skb->cb; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; unsigned short fc = le16_to_cpu(hdr->frame_control); int qos; @@ -172,12 +106,8 @@ static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd) return IEEE80211_TX_QUEUE_DATA0; } - if (unlikely(pkt_data->mgmt_iface)) { - /* Data frames from hostapd (mainly, EAPOL) use AC_VO - * and they will include QoS control fields if - * the target STA is using WME. */ - skb->priority = 7; - return ieee802_1d_to_ac[skb->priority]; + if (0 /* injected */) { + /* use AC from radiotap */ } /* is this a QoS frame? */ @@ -189,14 +119,13 @@ static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd) } /* use the data classifier to determine what 802.1d tag the - * data frame has */ + * data frame has */ skb->priority = classify_1d(skb, qd); - /* incase we are a client verify acm is not set for this ac */ + /* in case we are a client verify acm is not set for this ac */ while (unlikely(local->wmm_acm & BIT(skb->priority))) { if (wme_downgrade_ac(skb)) { - /* No AC with lower priority has acm=0, - * drop packet. */ + /* No AC with lower priority has acm=0, drop packet. */ return -1; } } @@ -217,7 +146,7 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd) struct Qdisc *qdisc; int err, queue; - if (pkt_data->requeue) { + if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) { skb_queue_tail(&q->requeued[pkt_data->queue], skb); qd->q.qlen++; return 0; @@ -675,4 +604,3 @@ void ieee80211_wme_unregister(void) { unregister_qdisc(&wme_qdisc_ops); } -#endif /* CONFIG_NET_SCHED */ diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h index f0bff10f0e08..76c713a6450c 100644 --- a/net/mac80211/wme.h +++ b/net/mac80211/wme.h @@ -24,11 +24,10 @@ #define QOS_CONTROL_TAG1D_MASK 0x07 -ieee80211_txrx_result -ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx); - -ieee80211_txrx_result -ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx); +static inline int WLAN_FC_IS_QOS_DATA(u16 fc) +{ + return (fc & 0x8C) == 0x88; +} #ifdef CONFIG_NET_SCHED void ieee80211_install_qdisc(struct net_device *dev); diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 783af32c6911..6695efba57ec 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -11,10 +11,8 @@ #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/compiler.h> -#include <net/iw_handler.h> - #include <net/mac80211.h> -#include "ieee80211_common.h" + #include "ieee80211_i.h" #include "michael.h" #include "tkip.h" @@ -84,16 +82,16 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_txrx_data *tx) fc = tx->fc; - if (!tx->key || tx->key->alg != ALG_TKIP || skb->len < 24 || + if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || !WLAN_FC_DATA_PRESENT(fc)) return TXRX_CONTINUE; if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len)) return TXRX_DROP; - if (!tx->key->force_sw_encrypt && - !tx->fragmented && - !(tx->local->hw.flags & IEEE80211_HW_TKIP_INCLUDE_MMIC) && + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && + !(tx->flags & IEEE80211_TXRXD_FRAGMENTED) && + !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) && !wpa_test) { /* hwaccel - with no need for preallocated room for Michael MIC */ @@ -116,8 +114,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_txrx_data *tx) #else authenticator = 1; #endif - key = &tx->key->key[authenticator ? ALG_TKIP_TEMP_AUTH_TX_MIC_KEY : - ALG_TKIP_TEMP_AUTH_RX_MIC_KEY]; + key = &tx->key->conf.key[authenticator ? ALG_TKIP_TEMP_AUTH_TX_MIC_KEY : + ALG_TKIP_TEMP_AUTH_RX_MIC_KEY]; mic = skb_put(skb, MICHAEL_MIC_LEN); michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic); @@ -134,31 +132,20 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_txrx_data *rx) u8 mic[MICHAEL_MIC_LEN]; struct sk_buff *skb = rx->skb; int authenticator = 1, wpa_test = 0; + DECLARE_MAC_BUF(mac); fc = rx->fc; - /* If device handles decryption totally, skip this check */ - if ((rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) || - (rx->local->hw.flags & IEEE80211_HW_DEVICE_STRIPS_MIC)) + /* + * No way to verify the MIC if the hardware stripped it + */ + if (rx->u.rx.status->flag & RX_FLAG_MMIC_STRIPPED) return TXRX_CONTINUE; - if (!rx->key || rx->key->alg != ALG_TKIP || + if (!rx->key || rx->key->conf.alg != ALG_TKIP || !(rx->fc & IEEE80211_FCTL_PROTECTED) || !WLAN_FC_DATA_PRESENT(fc)) return TXRX_CONTINUE; - if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && - !rx->key->force_sw_encrypt) { - if (rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) { - if (skb->len < MICHAEL_MIC_LEN) - return TXRX_DROP; - } - /* Need to verify Michael MIC sometimes in software even when - * hwaccel is used. Atheros ar5212: fragmented frames and QoS - * frames. */ - if (!rx->fragmented && !wpa_test) - goto remove_mic; - } - if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len) || data_len < MICHAEL_MIC_LEN) return TXRX_DROP; @@ -170,49 +157,28 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_txrx_data *rx) #else authenticator = 1; #endif - key = &rx->key->key[authenticator ? ALG_TKIP_TEMP_AUTH_RX_MIC_KEY : - ALG_TKIP_TEMP_AUTH_TX_MIC_KEY]; + key = &rx->key->conf.key[authenticator ? ALG_TKIP_TEMP_AUTH_RX_MIC_KEY : + ALG_TKIP_TEMP_AUTH_TX_MIC_KEY]; michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic); if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { - if (!rx->u.rx.ra_match) + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) return TXRX_DROP; printk(KERN_DEBUG "%s: invalid Michael MIC in data frame from " - MAC_FMT "\n", rx->dev->name, MAC_ARG(sa)); - - do { - struct ieee80211_hdr *hdr; - union iwreq_data wrqu; - char *buf = kmalloc(128, GFP_ATOMIC); - if (!buf) - break; - - /* TODO: needed parameters: count, key type, TSC */ - hdr = (struct ieee80211_hdr *) skb->data; - sprintf(buf, "MLME-MICHAELMICFAILURE.indication(" - "keyid=%d %scast addr=" MAC_FMT ")", - rx->key->keyidx, - hdr->addr1[0] & 0x01 ? "broad" : "uni", - MAC_ARG(hdr->addr2)); - memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = strlen(buf); - wireless_send_event(rx->dev, IWEVCUSTOM, &wrqu, buf); - kfree(buf); - } while (0); - - if (!rx->local->apdev) - return TXRX_DROP; - - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_michael_mic_failure); + "%s\n", rx->dev->name, print_mac(mac, sa)); - return TXRX_QUEUED; + mac80211_ev_michael_mic_failure(rx->dev, rx->key->conf.keyidx, + (void *) skb->data); + return TXRX_DROP; } - remove_mic: /* remove Michael MIC from payload */ skb_trim(skb, skb->len - MICHAEL_MIC_LEN); + /* update IV in key information to be able to detect replays */ + rx->key->u.tkip.iv32_rx[rx->u.rx.queue] = rx->u.rx.tkip_iv32; + rx->key->u.tkip.iv16_rx[rx->u.rx.queue] = rx->u.rx.tkip_iv16; + return TXRX_CONTINUE; } @@ -230,7 +196,11 @@ static int tkip_encrypt_skb(struct ieee80211_txrx_data *tx, hdrlen = ieee80211_get_hdrlen(fc); len = skb->len - hdrlen; - tailneed = !tx->key->force_sw_encrypt ? 0 : TKIP_ICV_LEN; + if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) + tailneed = 0; + else + tailneed = TKIP_ICV_LEN; + if ((skb_headroom(skb) < TKIP_IV_LEN || skb_tailroom(skb) < tailneed)) { I802_DEBUG_INC(tx->local->tx_expand_skb_head); @@ -248,8 +218,7 @@ static int tkip_encrypt_skb(struct ieee80211_txrx_data *tx, if (key->u.tkip.iv16 == 0) key->u.tkip.iv32++; - if (!tx->key->force_sw_encrypt) { - u32 flags = tx->local->hw.flags; + if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { hdr = (struct ieee80211_hdr *)skb->data; /* hwaccel - with preallocated room for IV */ @@ -259,23 +228,7 @@ static int tkip_encrypt_skb(struct ieee80211_txrx_data *tx, 0x7f), (u8) key->u.tkip.iv16); - if (flags & IEEE80211_HW_TKIP_REQ_PHASE2_KEY) - ieee80211_tkip_gen_rc4key(key, hdr->addr2, - tx->u.tx.control->tkip_key); - else if (flags & IEEE80211_HW_TKIP_REQ_PHASE1_KEY) { - if (key->u.tkip.iv16 == 0 || - !key->u.tkip.tx_initialized) { - ieee80211_tkip_gen_phase1key(key, hdr->addr2, - (u16 *)tx->u.tx.control->tkip_key); - key->u.tkip.tx_initialized = 1; - tx->u.tx.control->flags |= - IEEE80211_TXCTL_TKIP_NEW_PHASE1_KEY; - } else - tx->u.tx.control->flags &= - ~IEEE80211_TXCTL_TKIP_NEW_PHASE1_KEY; - } - - tx->u.tx.control->key_idx = tx->key->hw_key_idx; + tx->u.tx.control->key_idx = tx->key->conf.hw_key_idx; return 0; } @@ -290,28 +243,27 @@ static int tkip_encrypt_skb(struct ieee80211_txrx_data *tx, ieee80211_txrx_result -ieee80211_tx_h_tkip_encrypt(struct ieee80211_txrx_data *tx) +ieee80211_crypto_tkip_encrypt(struct ieee80211_txrx_data *tx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; u16 fc; - struct ieee80211_key *key = tx->key; struct sk_buff *skb = tx->skb; int wpa_test = 0, test = 0; fc = le16_to_cpu(hdr->frame_control); - if (!key || key->alg != ALG_TKIP || !WLAN_FC_DATA_PRESENT(fc)) + if (!WLAN_FC_DATA_PRESENT(fc)) return TXRX_CONTINUE; tx->u.tx.control->icv_len = TKIP_ICV_LEN; tx->u.tx.control->iv_len = TKIP_IV_LEN; ieee80211_tx_set_iswep(tx); - if (!tx->key->force_sw_encrypt && - !(tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) && + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && + !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) && !wpa_test) { /* hwaccel - with no need for preallocated room for IV/ICV */ - tx->u.tx.control->key_idx = tx->key->hw_key_idx; + tx->u.tx.control->key_idx = tx->key->conf.hw_key_idx; return TXRX_CONTINUE; } @@ -332,30 +284,31 @@ ieee80211_tx_h_tkip_encrypt(struct ieee80211_txrx_data *tx) ieee80211_txrx_result -ieee80211_rx_h_tkip_decrypt(struct ieee80211_txrx_data *rx) +ieee80211_crypto_tkip_decrypt(struct ieee80211_txrx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; u16 fc; int hdrlen, res, hwaccel = 0, wpa_test = 0; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; + DECLARE_MAC_BUF(mac); fc = le16_to_cpu(hdr->frame_control); hdrlen = ieee80211_get_hdrlen(fc); - if (!rx->key || rx->key->alg != ALG_TKIP || - !(rx->fc & IEEE80211_FCTL_PROTECTED) || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) return TXRX_CONTINUE; if (!rx->sta || skb->len - hdrlen < 12) return TXRX_DROP; - if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && - !rx->key->force_sw_encrypt) { - if (!(rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV)) { - /* Hardware takes care of all processing, including - * replay protection, so no need to continue here. */ + if (rx->u.rx.status->flag & RX_FLAG_DECRYPTED) { + if (rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED) { + /* + * Hardware took care of all processing, including + * replay protection, and stripped the ICV/IV so + * we cannot do any checks here. + */ return TXRX_CONTINUE; } @@ -366,11 +319,13 @@ ieee80211_rx_h_tkip_decrypt(struct ieee80211_txrx_data *rx) res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm, key, skb->data + hdrlen, skb->len - hdrlen, rx->sta->addr, - hwaccel, rx->u.rx.queue); + hwaccel, rx->u.rx.queue, + &rx->u.rx.tkip_iv32, + &rx->u.rx.tkip_iv16); if (res != TKIP_DECRYPT_OK || wpa_test) { printk(KERN_DEBUG "%s: TKIP decrypt failed for RX frame from " - MAC_FMT " (res=%d)\n", - rx->dev->name, MAC_ARG(rx->sta->addr), res); + "%s (res=%d)\n", + rx->dev->name, print_mac(mac, rx->sta->addr), res); return TXRX_DROP; } @@ -496,7 +451,10 @@ static int ccmp_encrypt_skb(struct ieee80211_txrx_data *tx, hdrlen = ieee80211_get_hdrlen(fc); len = skb->len - hdrlen; - tailneed = !key->force_sw_encrypt ? 0 : CCMP_MIC_LEN; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) + tailneed = 0; + else + tailneed = CCMP_MIC_LEN; if ((skb_headroom(skb) < CCMP_HDR_LEN || skb_tailroom(skb) < tailneed)) { @@ -520,11 +478,11 @@ static int ccmp_encrypt_skb(struct ieee80211_txrx_data *tx, break; } - ccmp_pn2hdr(pos, pn, key->keyidx); + ccmp_pn2hdr(pos, pn, key->conf.keyidx); - if (!key->force_sw_encrypt) { + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { /* hwaccel - with preallocated room for CCMP header */ - tx->u.tx.control->key_idx = key->hw_key_idx; + tx->u.tx.control->key_idx = key->conf.hw_key_idx; return 0; } @@ -538,28 +496,27 @@ static int ccmp_encrypt_skb(struct ieee80211_txrx_data *tx, ieee80211_txrx_result -ieee80211_tx_h_ccmp_encrypt(struct ieee80211_txrx_data *tx) +ieee80211_crypto_ccmp_encrypt(struct ieee80211_txrx_data *tx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - struct ieee80211_key *key = tx->key; u16 fc; struct sk_buff *skb = tx->skb; int test = 0; fc = le16_to_cpu(hdr->frame_control); - if (!key || key->alg != ALG_CCMP || !WLAN_FC_DATA_PRESENT(fc)) + if (!WLAN_FC_DATA_PRESENT(fc)) return TXRX_CONTINUE; tx->u.tx.control->icv_len = CCMP_MIC_LEN; tx->u.tx.control->iv_len = CCMP_HDR_LEN; ieee80211_tx_set_iswep(tx); - if (!tx->key->force_sw_encrypt && - !(tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV)) { + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && + !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { /* hwaccel - with no need for preallocated room for CCMP " * header or MIC fields */ - tx->u.tx.control->key_idx = tx->key->hw_key_idx; + tx->u.tx.control->key_idx = tx->key->conf.hw_key_idx; return TXRX_CONTINUE; } @@ -568,7 +525,6 @@ ieee80211_tx_h_ccmp_encrypt(struct ieee80211_txrx_data *tx) if (tx->u.tx.extra_frag) { int i; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { if (ccmp_encrypt_skb(tx, tx->u.tx.extra_frag[i], test) < 0) @@ -581,7 +537,7 @@ ieee80211_tx_h_ccmp_encrypt(struct ieee80211_txrx_data *tx) ieee80211_txrx_result -ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) +ieee80211_crypto_ccmp_decrypt(struct ieee80211_txrx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; u16 fc; @@ -590,13 +546,12 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) struct sk_buff *skb = rx->skb; u8 pn[CCMP_PN_LEN]; int data_len; + DECLARE_MAC_BUF(mac); fc = le16_to_cpu(hdr->frame_control); hdrlen = ieee80211_get_hdrlen(fc); - if (!key || key->alg != ALG_CCMP || - !(rx->fc & IEEE80211_FCTL_PROTECTED) || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) return TXRX_CONTINUE; data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN; @@ -604,8 +559,7 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) return TXRX_DROP; if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && - !key->force_sw_encrypt && - !(rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV)) + (rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED)) return TXRX_CONTINUE; (void) ccmp_hdr2pn(pn, skb->data + hdrlen); @@ -613,10 +567,11 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) if (memcmp(pn, key->u.ccmp.rx_pn[rx->u.rx.queue], CCMP_PN_LEN) <= 0) { #ifdef CONFIG_MAC80211_DEBUG u8 *ppn = key->u.ccmp.rx_pn[rx->u.rx.queue]; + printk(KERN_DEBUG "%s: CCMP replay detected for RX frame from " - MAC_FMT " (RX PN %02x%02x%02x%02x%02x%02x <= prev. PN " + "%s (RX PN %02x%02x%02x%02x%02x%02x <= prev. PN " "%02x%02x%02x%02x%02x%02x)\n", rx->dev->name, - MAC_ARG(rx->sta->addr), + print_mac(mac, rx->sta->addr), pn[0], pn[1], pn[2], pn[3], pn[4], pn[5], ppn[0], ppn[1], ppn[2], ppn[3], ppn[4], ppn[5]); #endif /* CONFIG_MAC80211_DEBUG */ @@ -624,10 +579,8 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) return TXRX_DROP; } - if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && - !key->force_sw_encrypt) { - /* hwaccel has already decrypted frame and verified MIC */ - } else { + if (!(rx->u.rx.status->flag & RX_FLAG_DECRYPTED)) { + /* hardware didn't decrypt/verify MIC */ u8 *scratch, *b_0, *aad; scratch = key->u.ccmp.rx_crypto_buf; @@ -642,8 +595,8 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) skb->data + skb->len - CCMP_MIC_LEN, skb->data + hdrlen + CCMP_HDR_LEN)) { printk(KERN_DEBUG "%s: CCMP decrypt failed for RX " - "frame from " MAC_FMT "\n", rx->dev->name, - MAC_ARG(rx->sta->addr)); + "frame from %s\n", rx->dev->name, + print_mac(mac, rx->sta->addr)); return TXRX_DROP; } } @@ -657,4 +610,3 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) return TXRX_CONTINUE; } - diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h index da3b9594f9c3..49d80cf0cd75 100644 --- a/net/mac80211/wpa.h +++ b/net/mac80211/wpa.h @@ -19,13 +19,13 @@ ieee80211_txrx_result ieee80211_rx_h_michael_mic_verify(struct ieee80211_txrx_data *rx); ieee80211_txrx_result -ieee80211_tx_h_tkip_encrypt(struct ieee80211_txrx_data *tx); +ieee80211_crypto_tkip_encrypt(struct ieee80211_txrx_data *tx); ieee80211_txrx_result -ieee80211_rx_h_tkip_decrypt(struct ieee80211_txrx_data *rx); +ieee80211_crypto_tkip_decrypt(struct ieee80211_txrx_data *rx); ieee80211_txrx_result -ieee80211_tx_h_ccmp_encrypt(struct ieee80211_txrx_data *tx); +ieee80211_crypto_ccmp_encrypt(struct ieee80211_txrx_data *tx); ieee80211_txrx_result -ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx); +ieee80211_crypto_ccmp_decrypt(struct ieee80211_txrx_data *rx); #endif /* WPA_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3599770a2473..d7a600a5720a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -665,6 +665,20 @@ config NETFILTER_XT_MATCH_TCPMSS To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_TIME + tristate '"time" match support' + depends on NETFILTER_XTABLES + ---help--- + This option adds a "time" match, which allows you to match based on + the packet arrival time (at the machine which netfilter is running) + on) or departure time/date (for locally generated packets). + + If you say Y here, try `iptables -m time --help` for + more information. + + If you want to compile it as a module, say M here. + If unsure, say N. + config NETFILTER_XT_MATCH_U32 tristate '"u32" match support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0c054bf27973..93c58f973831 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -73,6 +73,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o +obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 381a77cf0c9e..a523fa4136ed 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -19,6 +19,7 @@ #include <linux/inetdevice.h> #include <linux/proc_fs.h> #include <linux/mutex.h> +#include <net/net_namespace.h> #include <net/sock.h> #include "nf_internals.h" @@ -293,7 +294,7 @@ void __init netfilter_init(void) } #ifdef CONFIG_PROC_FS - proc_net_netfilter = proc_mkdir("netfilter", proc_net); + proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net); if (!proc_net_netfilter) panic("cannot create netfilter proc entry"); #endif diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0fe11889ce14..83c30b45d170 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -63,7 +63,6 @@ unsigned int nf_ct_log_invalid __read_mostly; HLIST_HEAD(unconfirmed); static int nf_conntrack_vmalloc __read_mostly; static struct kmem_cache *nf_conntrack_cachep __read_mostly; -static unsigned int nf_conntrack_next_id; DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); @@ -287,7 +286,6 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, unsigned int repl_hash) { - ct->id = ++nf_conntrack_next_id; hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &nf_conntrack_hash[hash]); hlist_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, @@ -827,44 +825,41 @@ EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); #include <linux/netfilter/nfnetlink_conntrack.h> #include <linux/mutex.h> - /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be * in ip_conntrack_core, since we don't want the protocols to autoload * or depend on ctnetlink */ -int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb, +int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), + NLA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), &tuple->src.u.tcp.port); - NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), + NLA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), &tuple->dst.u.tcp.port); return 0; -nfattr_failure: +nla_put_failure: return -1; } -EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nfattr); +EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr); -static const size_t cta_min_proto[CTA_PROTO_MAX] = { - [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), - [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t) +const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = { + [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 }, + [CTA_PROTO_DST_PORT] = { .type = NLA_U16 }, }; +EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy); -int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[], +int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t) { - if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1]) - return -EINVAL; - - if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT]) return -EINVAL; - t->src.u.tcp.port = *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); - t->dst.u.tcp.port = *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); + t->src.u.tcp.port = *(__be16 *)nla_data(tb[CTA_PROTO_SRC_PORT]); + t->dst.u.tcp.port = *(__be16 *)nla_data(tb[CTA_PROTO_DST_PORT]); return 0; } -EXPORT_SYMBOL_GPL(nf_ct_port_nfattr_to_tuple); +EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); #endif /* Used by ipt_REJECT and ip6t_REJECT. */ diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 3ac64e25f10c..175c8d1a1992 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -20,6 +20,7 @@ #include <linux/percpu.h> #include <linux/kernel.h> #include <linux/jhash.h> +#include <net/net_namespace.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> @@ -40,7 +41,6 @@ static int nf_ct_expect_hash_rnd_initted __read_mostly; static int nf_ct_expect_vmalloc; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; -static unsigned int nf_ct_expect_next_id; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) @@ -301,7 +301,6 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) exp->timeout.expires = jiffies + master_help->helper->timeout * HZ; add_timer(&exp->timeout); - exp->id = ++nf_ct_expect_next_id; atomic_inc(&exp->use); NF_CT_STAT_INC(expect_create); } @@ -473,22 +472,8 @@ static const struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct ct_expect_iter_state *st; - int ret; - - st = kzalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL); - if (!st) - return -ENOMEM; - ret = seq_open(file, &exp_seq_ops); - if (ret) - goto out_free; - seq = file->private_data; - seq->private = st; - return ret; -out_free: - kfree(st); - return ret; + return seq_open_private(file, &exp_seq_ops, + sizeof(struct ct_expect_iter_state)); } static const struct file_operations exp_file_ops = { @@ -505,7 +490,7 @@ static int __init exp_proc_init(void) #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc; - proc = proc_net_fops_create("nf_conntrack_expect", 0440, &exp_file_ops); + proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops); if (!proc) return -ENOMEM; #endif /* CONFIG_PROC_FS */ @@ -515,7 +500,7 @@ static int __init exp_proc_init(void) static void exp_proc_remove(void) { #ifdef CONFIG_PROC_FS - proc_net_remove("nf_conntrack_expect"); + proc_net_remove(&init_net, "nf_conntrack_expect"); #endif /* CONFIG_PROC_FS */ } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2863e72b4091..9be1826e6cdd 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -4,7 +4,7 @@ * (C) 2001 by Jay Schulist <jschlst@samba.org> * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org> * (C) 2003 by Patrick Mchardy <kaber@trash.net> - * (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net> + * (C) 2005-2007 by Pablo Neira Ayuso <pablo@netfilter.org> * * Initial connection tracking via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) @@ -54,18 +54,21 @@ ctnetlink_dump_tuples_proto(struct sk_buff *skb, struct nf_conntrack_l4proto *l4proto) { int ret = 0; - struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO); + struct nlattr *nest_parms; - NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); + nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + NLA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); - if (likely(l4proto->tuple_to_nfattr)) - ret = l4proto->tuple_to_nfattr(skb, tuple); + if (likely(l4proto->tuple_to_nlattr)) + ret = l4proto->tuple_to_nlattr(skb, tuple); - NFA_NEST_END(skb, nest_parms); + nla_nest_end(skb, nest_parms); return ret; -nfattr_failure: +nla_put_failure: return -1; } @@ -75,16 +78,20 @@ ctnetlink_dump_tuples_ip(struct sk_buff *skb, struct nf_conntrack_l3proto *l3proto) { int ret = 0; - struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); + struct nlattr *nest_parms; - if (likely(l3proto->tuple_to_nfattr)) - ret = l3proto->tuple_to_nfattr(skb, tuple); + nest_parms = nla_nest_start(skb, CTA_TUPLE_IP | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; - NFA_NEST_END(skb, nest_parms); + if (likely(l3proto->tuple_to_nlattr)) + ret = l3proto->tuple_to_nlattr(skb, tuple); + + nla_nest_end(skb, nest_parms); return ret; -nfattr_failure: +nla_put_failure: return -1; } @@ -114,10 +121,10 @@ static inline int ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) { __be32 status = htonl((u_int32_t) ct->status); - NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); + NLA_PUT(skb, CTA_STATUS, sizeof(status), &status); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -132,10 +139,10 @@ ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) else timeout = htonl(timeout_l / HZ); - NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); + NLA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -143,25 +150,27 @@ static inline int ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct) { struct nf_conntrack_l4proto *l4proto = nf_ct_l4proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); - struct nfattr *nest_proto; + struct nlattr *nest_proto; int ret; - if (!l4proto->to_nfattr) { + if (!l4proto->to_nlattr) { nf_ct_l4proto_put(l4proto); return 0; } - nest_proto = NFA_NEST(skb, CTA_PROTOINFO); + nest_proto = nla_nest_start(skb, CTA_PROTOINFO | NLA_F_NESTED); + if (!nest_proto) + goto nla_put_failure; - ret = l4proto->to_nfattr(skb, nest_proto, ct); + ret = l4proto->to_nlattr(skb, nest_proto, ct); nf_ct_l4proto_put(l4proto); - NFA_NEST_END(skb, nest_proto); + nla_nest_end(skb, nest_proto); return ret; -nfattr_failure: +nla_put_failure: nf_ct_l4proto_put(l4proto); return -1; } @@ -169,7 +178,7 @@ nfattr_failure: static inline int ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) { - struct nfattr *nest_helper; + struct nlattr *nest_helper; const struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_helper *helper; @@ -181,18 +190,20 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) if (!helper) goto out; - nest_helper = NFA_NEST(skb, CTA_HELP); - NFA_PUT(skb, CTA_HELP_NAME, strlen(helper->name), helper->name); + nest_helper = nla_nest_start(skb, CTA_HELP | NLA_F_NESTED); + if (!nest_helper) + goto nla_put_failure; + NLA_PUT(skb, CTA_HELP_NAME, strlen(helper->name), helper->name); - if (helper->to_nfattr) - helper->to_nfattr(skb, ct); + if (helper->to_nlattr) + helper->to_nlattr(skb, ct); - NFA_NEST_END(skb, nest_helper); + nla_nest_end(skb, nest_helper); out: rcu_read_unlock(); return 0; -nfattr_failure: +nla_put_failure: rcu_read_unlock(); return -1; } @@ -203,20 +214,24 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, enum ip_conntrack_dir dir) { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; - struct nfattr *nest_count = NFA_NEST(skb, type); + struct nlattr *nest_count; __be32 tmp; + nest_count = nla_nest_start(skb, type | NLA_F_NESTED); + if (!nest_count) + goto nla_put_failure; + tmp = htonl(ct->counters[dir].packets); - NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); + NLA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); tmp = htonl(ct->counters[dir].bytes); - NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); + NLA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); - NFA_NEST_END(skb, nest_count); + nla_nest_end(skb, nest_count); return 0; -nfattr_failure: +nla_put_failure: return -1; } #else @@ -229,10 +244,10 @@ ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) { __be32 mark = htonl(ct->mark); - NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); + NLA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); return 0; -nfattr_failure: +nla_put_failure: return -1; } #else @@ -242,11 +257,11 @@ nfattr_failure: static inline int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - __be32 id = htonl(ct->id); - NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); + __be32 id = htonl((unsigned long)ct); + NLA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -255,10 +270,10 @@ ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) { __be32 use = htonl(atomic_read(&ct->ct_general.use)); - NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); + NLA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -271,7 +286,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nfattr *nest_parms; + struct nlattr *nest_parms; unsigned char *b = skb_tail_pointer(skb); event |= NFNL_SUBSYS_CTNETLINK << 8; @@ -284,15 +299,19 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) - goto nfattr_failure; - NFA_NEST_END(skb, nest_parms); + goto nla_put_failure; + nla_nest_end(skb, nest_parms); - nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); + nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) - goto nfattr_failure; - NFA_NEST_END(skb, nest_parms); + goto nla_put_failure; + nla_nest_end(skb, nest_parms); if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_timeout(skb, ct) < 0 || @@ -303,13 +322,13 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0) - goto nfattr_failure; + goto nla_put_failure; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: -nfattr_failure: +nla_put_failure: nlmsg_trim(skb, b); return -1; } @@ -320,7 +339,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nfattr *nest_parms; + struct nlattr *nest_parms; struct nf_conn *ct = (struct nf_conn *)ptr; struct sk_buff *skb; unsigned int type; @@ -362,45 +381,49 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) - goto nfattr_failure; - NFA_NEST_END(skb, nest_parms); + goto nla_put_failure; + nla_nest_end(skb, nest_parms); - nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); + nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) - goto nfattr_failure; - NFA_NEST_END(skb, nest_parms); + goto nla_put_failure; + nla_nest_end(skb, nest_parms); if (events & IPCT_DESTROY) { if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) - goto nfattr_failure; + goto nla_put_failure; } else { if (ctnetlink_dump_status(skb, ct) < 0) - goto nfattr_failure; + goto nla_put_failure; if (ctnetlink_dump_timeout(skb, ct) < 0) - goto nfattr_failure; + goto nla_put_failure; if (events & IPCT_PROTOINFO && ctnetlink_dump_protoinfo(skb, ct) < 0) - goto nfattr_failure; + goto nla_put_failure; if ((events & IPCT_HELPER || nfct_help(ct)) && ctnetlink_dump_helpinfo(skb, ct) < 0) - goto nfattr_failure; + goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK if ((events & IPCT_MARK || ct->mark) && ctnetlink_dump_mark(skb, ct) < 0) - goto nfattr_failure; + goto nla_put_failure; #endif if (events & IPCT_COUNTER_FILLING && (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)) - goto nfattr_failure; + goto nla_put_failure; } nlh->nlmsg_len = skb->tail - b; @@ -408,7 +431,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, return NOTIFY_DONE; nlmsg_failure: -nfattr_failure: +nla_put_failure: kfree_skb(skb); return NOTIFY_DONE; } @@ -479,49 +502,56 @@ out: } static inline int -ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple) +ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple) { - struct nfattr *tb[CTA_IP_MAX]; + struct nlattr *tb[CTA_IP_MAX+1]; struct nf_conntrack_l3proto *l3proto; int ret = 0; - nfattr_parse_nested(tb, CTA_IP_MAX, attr); + nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); - if (likely(l3proto->nfattr_to_tuple)) - ret = l3proto->nfattr_to_tuple(tb, tuple); + if (likely(l3proto->nlattr_to_tuple)) { + ret = nla_validate_nested(attr, CTA_IP_MAX, + l3proto->nla_policy); + if (ret == 0) + ret = l3proto->nlattr_to_tuple(tb, tuple); + } nf_ct_l3proto_put(l3proto); return ret; } -static const size_t cta_min_proto[CTA_PROTO_MAX] = { - [CTA_PROTO_NUM-1] = sizeof(u_int8_t), +static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = { + [CTA_PROTO_NUM] = { .type = NLA_U8 }, }; static inline int -ctnetlink_parse_tuple_proto(struct nfattr *attr, +ctnetlink_parse_tuple_proto(struct nlattr *attr, struct nf_conntrack_tuple *tuple) { - struct nfattr *tb[CTA_PROTO_MAX]; + struct nlattr *tb[CTA_PROTO_MAX+1]; struct nf_conntrack_l4proto *l4proto; int ret = 0; - nfattr_parse_nested(tb, CTA_PROTO_MAX, attr); - - if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) - return -EINVAL; + ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy); + if (ret < 0) + return ret; - if (!tb[CTA_PROTO_NUM-1]) + if (!tb[CTA_PROTO_NUM]) return -EINVAL; - tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); + tuple->dst.protonum = *(u_int8_t *)nla_data(tb[CTA_PROTO_NUM]); l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); - if (likely(l4proto->nfattr_to_tuple)) - ret = l4proto->nfattr_to_tuple(tb, tuple); + if (likely(l4proto->nlattr_to_tuple)) { + ret = nla_validate_nested(attr, CTA_PROTO_MAX, + l4proto->nla_policy); + if (ret == 0) + ret = l4proto->nlattr_to_tuple(tb, tuple); + } nf_ct_l4proto_put(l4proto); @@ -529,29 +559,29 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr, } static inline int -ctnetlink_parse_tuple(struct nfattr *cda[], struct nf_conntrack_tuple *tuple, +ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple, enum ctattr_tuple type, u_int8_t l3num) { - struct nfattr *tb[CTA_TUPLE_MAX]; + struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; memset(tuple, 0, sizeof(*tuple)); - nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]); + nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], NULL); - if (!tb[CTA_TUPLE_IP-1]) + if (!tb[CTA_TUPLE_IP]) return -EINVAL; tuple->src.l3num = l3num; - err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple); + err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple); if (err < 0) return err; - if (!tb[CTA_TUPLE_PROTO-1]) + if (!tb[CTA_TUPLE_PROTO]) return -EINVAL; - err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple); + err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO], tuple); if (err < 0) return err; @@ -565,32 +595,32 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct nf_conntrack_tuple *tuple, } #ifdef CONFIG_NF_NAT_NEEDED -static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = { - [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t), - [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t), +static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { + [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, + [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, }; -static int nfnetlink_parse_nat_proto(struct nfattr *attr, +static int nfnetlink_parse_nat_proto(struct nlattr *attr, const struct nf_conn *ct, struct nf_nat_range *range) { - struct nfattr *tb[CTA_PROTONAT_MAX]; + struct nlattr *tb[CTA_PROTONAT_MAX+1]; struct nf_nat_protocol *npt; + int err; - nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr); - - if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat)) - return -EINVAL; + err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); + if (err < 0) + return err; npt = nf_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); - if (!npt->nfattr_to_range) { + if (!npt->nlattr_to_range) { nf_nat_proto_put(npt); return 0; } - /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */ - if (npt->nfattr_to_range(tb, range) > 0) + /* nlattr_to_range returns 1 if it parsed, 0 if not, neg. on error */ + if (npt->nlattr_to_range(tb, range) > 0) range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; nf_nat_proto_put(npt); @@ -598,40 +628,39 @@ static int nfnetlink_parse_nat_proto(struct nfattr *attr, return 0; } -static const size_t cta_min_nat[CTA_NAT_MAX] = { - [CTA_NAT_MINIP-1] = sizeof(u_int32_t), - [CTA_NAT_MAXIP-1] = sizeof(u_int32_t), +static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { + [CTA_NAT_MINIP] = { .type = NLA_U32 }, + [CTA_NAT_MAXIP] = { .type = NLA_U32 }, }; static inline int -nfnetlink_parse_nat(struct nfattr *nat, +nfnetlink_parse_nat(struct nlattr *nat, const struct nf_conn *ct, struct nf_nat_range *range) { - struct nfattr *tb[CTA_NAT_MAX]; + struct nlattr *tb[CTA_NAT_MAX+1]; int err; memset(range, 0, sizeof(*range)); - nfattr_parse_nested(tb, CTA_NAT_MAX, nat); - - if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat)) - return -EINVAL; + err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); + if (err < 0) + return err; - if (tb[CTA_NAT_MINIP-1]) - range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]); + if (tb[CTA_NAT_MINIP]) + range->min_ip = *(__be32 *)nla_data(tb[CTA_NAT_MINIP]); - if (!tb[CTA_NAT_MAXIP-1]) + if (!tb[CTA_NAT_MAXIP]) range->max_ip = range->min_ip; else - range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); + range->max_ip = *(__be32 *)nla_data(tb[CTA_NAT_MAXIP]); if (range->min_ip) range->flags |= IP_NAT_RANGE_MAP_IPS; - if (!tb[CTA_NAT_PROTO-1]) + if (!tb[CTA_NAT_PROTO]) return 0; - err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range); + err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); if (err < 0) return err; @@ -640,31 +669,31 @@ nfnetlink_parse_nat(struct nfattr *nat, #endif static inline int -ctnetlink_parse_help(struct nfattr *attr, char **helper_name) +ctnetlink_parse_help(struct nlattr *attr, char **helper_name) { - struct nfattr *tb[CTA_HELP_MAX]; + struct nlattr *tb[CTA_HELP_MAX+1]; - nfattr_parse_nested(tb, CTA_HELP_MAX, attr); + nla_parse_nested(tb, CTA_HELP_MAX, attr, NULL); - if (!tb[CTA_HELP_NAME-1]) + if (!tb[CTA_HELP_NAME]) return -EINVAL; - *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]); + *helper_name = nla_data(tb[CTA_HELP_NAME]); return 0; } -static const size_t cta_min[CTA_MAX] = { - [CTA_STATUS-1] = sizeof(u_int32_t), - [CTA_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_MARK-1] = sizeof(u_int32_t), - [CTA_USE-1] = sizeof(u_int32_t), - [CTA_ID-1] = sizeof(u_int32_t) +static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { + [CTA_STATUS] = { .type = NLA_U32 }, + [CTA_TIMEOUT] = { .type = NLA_U32 }, + [CTA_MARK] = { .type = NLA_U32 }, + [CTA_USE] = { .type = NLA_U32 }, + [CTA_ID] = { .type = NLA_U32 }, }; static int ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[]) + struct nlmsghdr *nlh, struct nlattr *cda[]) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; @@ -673,12 +702,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, u_int8_t u3 = nfmsg->nfgen_family; int err = 0; - if (nfattr_bad_size(cda, CTA_MAX, cta_min)) - return -EINVAL; - - if (cda[CTA_TUPLE_ORIG-1]) + if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); - else if (cda[CTA_TUPLE_REPLY-1]) + else if (cda[CTA_TUPLE_REPLY]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ @@ -695,9 +721,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, ct = nf_ct_tuplehash_to_ctrack(h); - if (cda[CTA_ID-1]) { - u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1])); - if (ct->id != id) { + if (cda[CTA_ID]) { + u_int32_t id = ntohl(*(__be32 *)nla_data(cda[CTA_ID])); + if (id != (u32)(unsigned long)ct) { nf_ct_put(ct); return -ENOENT; } @@ -712,7 +738,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, static int ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[]) + struct nlmsghdr *nlh, struct nlattr *cda[]) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; @@ -731,12 +757,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, ctnetlink_done); } - if (nfattr_bad_size(cda, CTA_MAX, cta_min)) - return -EINVAL; - - if (cda[CTA_TUPLE_ORIG-1]) + if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); - else if (cda[CTA_TUPLE_REPLY-1]) + else if (cda[CTA_TUPLE_REPLY]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else return -EINVAL; @@ -776,10 +799,10 @@ out: } static inline int -ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[]) +ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) { unsigned long d; - unsigned int status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1])); + unsigned int status = ntohl(*(__be32 *)nla_data(cda[CTA_STATUS])); d = ct->status ^ status; if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) @@ -795,14 +818,14 @@ ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[]) /* ASSURED bit can only be set */ return -EINVAL; - if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { + if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { #ifndef CONFIG_NF_NAT_NEEDED return -EINVAL; #else struct nf_nat_range range; - if (cda[CTA_NAT_DST-1]) { - if (nfnetlink_parse_nat(cda[CTA_NAT_DST-1], ct, + if (cda[CTA_NAT_DST]) { + if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct, &range) < 0) return -EINVAL; if (nf_nat_initialized(ct, @@ -810,8 +833,8 @@ ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[]) return -EEXIST; nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); } - if (cda[CTA_NAT_SRC-1]) { - if (nfnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct, + if (cda[CTA_NAT_SRC]) { + if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct, &range) < 0) return -EINVAL; if (nf_nat_initialized(ct, @@ -831,7 +854,7 @@ ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[]) static inline int -ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) +ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) { struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); @@ -842,7 +865,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) if (ct->master) return -EINVAL; - err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname); + err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); if (err < 0) return err; @@ -879,9 +902,9 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) } static inline int -ctnetlink_change_timeout(struct nf_conn *ct, struct nfattr *cda[]) +ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[]) { - u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); + u_int32_t timeout = ntohl(*(__be32 *)nla_data(cda[CTA_TIMEOUT])); if (!del_timer(&ct->timeout)) return -ETIME; @@ -893,66 +916,67 @@ ctnetlink_change_timeout(struct nf_conn *ct, struct nfattr *cda[]) } static inline int -ctnetlink_change_protoinfo(struct nf_conn *ct, struct nfattr *cda[]) +ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[]) { - struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1]; + struct nlattr *tb[CTA_PROTOINFO_MAX+1], *attr = cda[CTA_PROTOINFO]; struct nf_conntrack_l4proto *l4proto; u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; u_int16_t l3num = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; int err = 0; - nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr); + nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, NULL); l4proto = nf_ct_l4proto_find_get(l3num, npt); - if (l4proto->from_nfattr) - err = l4proto->from_nfattr(tb, ct); + if (l4proto->from_nlattr) + err = l4proto->from_nlattr(tb, ct); nf_ct_l4proto_put(l4proto); return err; } static int -ctnetlink_change_conntrack(struct nf_conn *ct, struct nfattr *cda[]) +ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) { int err; - if (cda[CTA_HELP-1]) { + if (cda[CTA_HELP]) { err = ctnetlink_change_helper(ct, cda); if (err < 0) return err; } - if (cda[CTA_TIMEOUT-1]) { + if (cda[CTA_TIMEOUT]) { err = ctnetlink_change_timeout(ct, cda); if (err < 0) return err; } - if (cda[CTA_STATUS-1]) { + if (cda[CTA_STATUS]) { err = ctnetlink_change_status(ct, cda); if (err < 0) return err; } - if (cda[CTA_PROTOINFO-1]) { + if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); if (err < 0) return err; } #if defined(CONFIG_NF_CONNTRACK_MARK) - if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); + if (cda[CTA_MARK]) + ct->mark = ntohl(*(__be32 *)nla_data(cda[CTA_MARK])); #endif return 0; } static int -ctnetlink_create_conntrack(struct nfattr *cda[], +ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conntrack_tuple *otuple, - struct nf_conntrack_tuple *rtuple) + struct nf_conntrack_tuple *rtuple, + struct nf_conn *master_ct) { struct nf_conn *ct; int err = -EINVAL; @@ -963,28 +987,28 @@ ctnetlink_create_conntrack(struct nfattr *cda[], if (ct == NULL || IS_ERR(ct)) return -ENOMEM; - if (!cda[CTA_TIMEOUT-1]) + if (!cda[CTA_TIMEOUT]) goto err; - ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); + ct->timeout.expires = ntohl(*(__be32 *)nla_data(cda[CTA_TIMEOUT])); ct->timeout.expires = jiffies + ct->timeout.expires * HZ; ct->status |= IPS_CONFIRMED; - if (cda[CTA_STATUS-1]) { + if (cda[CTA_STATUS]) { err = ctnetlink_change_status(ct, cda); if (err < 0) goto err; } - if (cda[CTA_PROTOINFO-1]) { + if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); if (err < 0) goto err; } #if defined(CONFIG_NF_CONNTRACK_MARK) - if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); + if (cda[CTA_MARK]) + ct->mark = ntohl(*(__be32 *)nla_data(cda[CTA_MARK])); #endif helper = nf_ct_helper_find_get(rtuple); @@ -999,6 +1023,10 @@ ctnetlink_create_conntrack(struct nfattr *cda[], rcu_assign_pointer(help->helper, helper); } + /* setup master conntrack: this is a confirmed expectation */ + if (master_ct) + ct->master = master_ct; + add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); @@ -1014,7 +1042,7 @@ err: static int ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[]) + struct nlmsghdr *nlh, struct nlattr *cda[]) { struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -1022,32 +1050,56 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, u_int8_t u3 = nfmsg->nfgen_family; int err = 0; - if (nfattr_bad_size(cda, CTA_MAX, cta_min)) - return -EINVAL; - - if (cda[CTA_TUPLE_ORIG-1]) { + if (cda[CTA_TUPLE_ORIG]) { err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3); if (err < 0) return err; } - if (cda[CTA_TUPLE_REPLY-1]) { + if (cda[CTA_TUPLE_REPLY]) { err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3); if (err < 0) return err; } write_lock_bh(&nf_conntrack_lock); - if (cda[CTA_TUPLE_ORIG-1]) + if (cda[CTA_TUPLE_ORIG]) h = __nf_conntrack_find(&otuple, NULL); - else if (cda[CTA_TUPLE_REPLY-1]) + else if (cda[CTA_TUPLE_REPLY]) h = __nf_conntrack_find(&rtuple, NULL); if (h == NULL) { + struct nf_conntrack_tuple master; + struct nf_conntrack_tuple_hash *master_h = NULL; + struct nf_conn *master_ct = NULL; + + if (cda[CTA_TUPLE_MASTER]) { + err = ctnetlink_parse_tuple(cda, + &master, + CTA_TUPLE_MASTER, + u3); + if (err < 0) + return err; + + master_h = __nf_conntrack_find(&master, NULL); + if (master_h == NULL) { + err = -ENOENT; + goto out_unlock; + } + master_ct = nf_ct_tuplehash_to_ctrack(master_h); + atomic_inc(&master_ct->ct_general.use); + } + write_unlock_bh(&nf_conntrack_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) - err = ctnetlink_create_conntrack(cda, &otuple, &rtuple); + err = ctnetlink_create_conntrack(cda, + &otuple, + &rtuple, + master_ct); + if (err < 0 && master_ct) + nf_ct_put(master_ct); + return err; } /* implicit 'else' */ @@ -1057,7 +1109,12 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, err = -EEXIST; if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { /* we only allow nat config for new conntracks */ - if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { + if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { + err = -EINVAL; + goto out_unlock; + } + /* can't link an existing conntrack to a master */ + if (cda[CTA_TUPLE_MASTER]) { err = -EINVAL; goto out_unlock; } @@ -1079,16 +1136,18 @@ ctnetlink_exp_dump_tuple(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, enum ctattr_expect type) { - struct nfattr *nest_parms = NFA_NEST(skb, type); + struct nlattr *nest_parms; + nest_parms = nla_nest_start(skb, type | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple) < 0) - goto nfattr_failure; - - NFA_NEST_END(skb, nest_parms); + goto nla_put_failure; + nla_nest_end(skb, nest_parms); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -1101,32 +1160,34 @@ ctnetlink_exp_dump_mask(struct sk_buff *skb, struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple m; - struct nfattr *nest_parms; + struct nlattr *nest_parms; memset(&m, 0xFF, sizeof(m)); m.src.u.all = mask->src.u.all; memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3)); - nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK); + nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto); nf_ct_l3proto_put(l3proto); if (unlikely(ret < 0)) - goto nfattr_failure; + goto nla_put_failure; l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); nf_ct_l4proto_put(l4proto); if (unlikely(ret < 0)) - goto nfattr_failure; + goto nla_put_failure; - NFA_NEST_END(skb, nest_parms); + nla_nest_end(skb, nest_parms); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -1136,23 +1197,23 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, { struct nf_conn *master = exp->master; __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ); - __be32 id = htonl(exp->id); + __be32 id = htonl((unsigned long)exp); if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) - goto nfattr_failure; + goto nla_put_failure; if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0) - goto nfattr_failure; + goto nla_put_failure; if (ctnetlink_exp_dump_tuple(skb, &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple, CTA_EXPECT_MASTER) < 0) - goto nfattr_failure; + goto nla_put_failure; - NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); - NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); + NLA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); + NLA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -1176,13 +1237,13 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, nfmsg->res_id = 0; if (ctnetlink_exp_dump_expect(skb, exp) < 0) - goto nfattr_failure; + goto nla_put_failure; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: -nfattr_failure: +nla_put_failure: nlmsg_trim(skb, b); return -1; } @@ -1224,14 +1285,14 @@ static int ctnetlink_expect_event(struct notifier_block *this, nfmsg->res_id = 0; if (ctnetlink_exp_dump_expect(skb, exp) < 0) - goto nfattr_failure; + goto nla_put_failure; nlh->nlmsg_len = skb->tail - b; nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); return NOTIFY_DONE; nlmsg_failure: -nfattr_failure: +nla_put_failure: kfree_skb(skb); return NOTIFY_DONE; } @@ -1286,14 +1347,14 @@ out: return skb->len; } -static const size_t cta_min_exp[CTA_EXPECT_MAX] = { - [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_EXPECT_ID-1] = sizeof(u_int32_t) +static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { + [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 }, + [CTA_EXPECT_ID] = { .type = NLA_U32 }, }; static int ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[]) + struct nlmsghdr *nlh, struct nlattr *cda[]) { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; @@ -1302,16 +1363,13 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, u_int8_t u3 = nfmsg->nfgen_family; int err = 0; - if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) - return -EINVAL; - if (nlh->nlmsg_flags & NLM_F_DUMP) { return netlink_dump_start(ctnl, skb, nlh, ctnetlink_exp_dump_table, ctnetlink_exp_done); } - if (cda[CTA_EXPECT_MASTER-1]) + if (cda[CTA_EXPECT_MASTER]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); else return -EINVAL; @@ -1323,9 +1381,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (!exp) return -ENOENT; - if (cda[CTA_EXPECT_ID-1]) { - __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); - if (exp->id != ntohl(id)) { + if (cda[CTA_EXPECT_ID]) { + __be32 id = *(__be32 *)nla_data(cda[CTA_EXPECT_ID]); + if (ntohl(id) != (u32)(unsigned long)exp) { nf_ct_expect_put(exp); return -ENOENT; } @@ -1355,7 +1413,7 @@ out: static int ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[]) + struct nlmsghdr *nlh, struct nlattr *cda[]) { struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; @@ -1366,10 +1424,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, unsigned int i; int err; - if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) - return -EINVAL; - - if (cda[CTA_EXPECT_TUPLE-1]) { + if (cda[CTA_EXPECT_TUPLE]) { /* delete a single expect by tuple */ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); if (err < 0) @@ -1380,9 +1435,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, if (!exp) return -ENOENT; - if (cda[CTA_EXPECT_ID-1]) { - __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); - if (exp->id != ntohl(id)) { + if (cda[CTA_EXPECT_ID]) { + __be32 id = *(__be32 *)nla_data(cda[CTA_EXPECT_ID]); + if (ntohl(id) != (u32)(unsigned long)exp) { nf_ct_expect_put(exp); return -ENOENT; } @@ -1393,8 +1448,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* have to put what we 'get' above. * after this line usage count == 0 */ nf_ct_expect_put(exp); - } else if (cda[CTA_EXPECT_HELP_NAME-1]) { - char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]); + } else if (cda[CTA_EXPECT_HELP_NAME]) { + char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]); struct nf_conn_help *m_help; /* delete all expectations for this helper */ @@ -1436,13 +1491,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return 0; } static int -ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nfattr *cda[]) +ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nlattr *cda[]) { return -EOPNOTSUPP; } static int -ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3) +ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3) { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -1499,7 +1554,7 @@ out: static int ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[]) + struct nlmsghdr *nlh, struct nlattr *cda[]) { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; @@ -1507,12 +1562,9 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, u_int8_t u3 = nfmsg->nfgen_family; int err = 0; - if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) - return -EINVAL; - - if (!cda[CTA_EXPECT_TUPLE-1] - || !cda[CTA_EXPECT_MASK-1] - || !cda[CTA_EXPECT_MASTER-1]) + if (!cda[CTA_EXPECT_TUPLE] + || !cda[CTA_EXPECT_MASK] + || !cda[CTA_EXPECT_MASTER]) return -EINVAL; err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); @@ -1548,34 +1600,41 @@ static struct notifier_block ctnl_notifier_exp = { }; #endif -static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { +static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, - .attr_count = CTA_MAX, }, + .attr_count = CTA_MAX, + .policy = ct_nla_policy }, [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, - .attr_count = CTA_MAX, }, + .attr_count = CTA_MAX, + .policy = ct_nla_policy }, [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, - .attr_count = CTA_MAX, }, + .attr_count = CTA_MAX, + .policy = ct_nla_policy }, [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, - .attr_count = CTA_MAX, }, + .attr_count = CTA_MAX, + .policy = ct_nla_policy }, }; -static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { +static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, - .attr_count = CTA_EXPECT_MAX, }, + .attr_count = CTA_EXPECT_MAX, + .policy = exp_nla_policy }, [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, - .attr_count = CTA_EXPECT_MAX, }, + .attr_count = CTA_EXPECT_MAX, + .policy = exp_nla_policy }, [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, - .attr_count = CTA_EXPECT_MAX, }, + .attr_count = CTA_EXPECT_MAX, + .policy = exp_nla_policy }, }; -static struct nfnetlink_subsystem ctnl_subsys = { +static const struct nfnetlink_subsystem ctnl_subsys = { .name = "conntrack", .subsys_id = NFNL_SUBSYS_CTNETLINK, .cb_count = IPCTNL_MSG_MAX, .cb = ctnl_cb, }; -static struct nfnetlink_subsystem ctnl_exp_subsys = { +static const struct nfnetlink_subsystem ctnl_exp_subsys = { .name = "conntrack_expect", .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP, .cb_count = IPCTNL_MSG_EXP_MAX, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index bdbead8a7a83..4a185f6aa65a 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -274,8 +274,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .destroy = gre_destroy, .me = THIS_MODULE, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index eb3fe7401466..c7075345971b 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -831,6 +831,20 @@ static int tcp_packet(struct nf_conn *conntrack, tuple = &conntrack->tuplehash[dir].tuple; switch (new_state) { + case TCP_CONNTRACK_SYN_SENT: + if (old_state < TCP_CONNTRACK_TIME_WAIT) + break; + if (conntrack->proto.tcp.seen[!dir].flags & + IP_CT_TCP_FLAG_CLOSE_INIT) { + /* Attempt to reopen a closed connection. + * Delete this connection and look up again. */ + write_unlock_bh(&tcp_lock); + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long) + conntrack); + return -NF_REPEAT; + } + /* Fall through */ case TCP_CONNTRACK_IGNORE: /* Ignored packets: * @@ -879,27 +893,6 @@ static int tcp_packet(struct nf_conn *conntrack, nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; - case TCP_CONNTRACK_SYN_SENT: - if (old_state < TCP_CONNTRACK_TIME_WAIT) - break; - if ((conntrack->proto.tcp.seen[dir].flags & - IP_CT_TCP_FLAG_CLOSE_INIT) - || after(ntohl(th->seq), - conntrack->proto.tcp.seen[dir].td_end)) { - /* Attempt to reopen a closed connection. - * Delete this connection and look up again. */ - write_unlock_bh(&tcp_lock); - if (del_timer(&conntrack->timeout)) - conntrack->timeout.function((unsigned long) - conntrack); - return -NF_REPEAT; - } else { - write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, - NULL, "nf_ct_tcp: invalid SYN"); - return -NF_ACCEPT; - } case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) @@ -1067,93 +1060,96 @@ static int tcp_new(struct nf_conn *conntrack, #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, +static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, const struct nf_conn *ct) { - struct nfattr *nest_parms; + struct nlattr *nest_parms; struct nf_ct_tcp_flags tmp = {}; read_lock_bh(&tcp_lock); - nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); - NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), + nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + NLA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), &ct->proto.tcp.state); - NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t), + NLA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t), &ct->proto.tcp.seen[0].td_scale); - NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t), + NLA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t), &ct->proto.tcp.seen[1].td_scale); tmp.flags = ct->proto.tcp.seen[0].flags; - NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, + NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, sizeof(struct nf_ct_tcp_flags), &tmp); tmp.flags = ct->proto.tcp.seen[1].flags; - NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, + NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, sizeof(struct nf_ct_tcp_flags), &tmp); read_unlock_bh(&tcp_lock); - NFA_NEST_END(skb, nest_parms); + nla_nest_end(skb, nest_parms); return 0; -nfattr_failure: +nla_put_failure: read_unlock_bh(&tcp_lock); return -1; } -static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = { - [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t), - [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] = sizeof(u_int8_t), - [CTA_PROTOINFO_TCP_WSCALE_REPLY-1] = sizeof(u_int8_t), - [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1] = sizeof(struct nf_ct_tcp_flags), - [CTA_PROTOINFO_TCP_FLAGS_REPLY-1] = sizeof(struct nf_ct_tcp_flags) +static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { + [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, + [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, + [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, + [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; -static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct) +static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) { - struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; - struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; + struct nlattr *attr = cda[CTA_PROTOINFO_TCP]; + struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1]; + int err; /* updates could not contain anything about the private * protocol info, in that case skip the parsing */ if (!attr) return 0; - nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr); - - if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp)) - return -EINVAL; + err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr, tcp_nla_policy); + if (err < 0) + return err; - if (!tb[CTA_PROTOINFO_TCP_STATE-1]) + if (!tb[CTA_PROTOINFO_TCP_STATE]) return -EINVAL; write_lock_bh(&tcp_lock); ct->proto.tcp.state = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); + *(u_int8_t *)nla_data(tb[CTA_PROTOINFO_TCP_STATE]); - if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]) { + if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) { struct nf_ct_tcp_flags *attr = - NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]); + nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]); ct->proto.tcp.seen[0].flags &= ~attr->mask; ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask; } - if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]) { + if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) { struct nf_ct_tcp_flags *attr = - NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]); + nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]); ct->proto.tcp.seen[1].flags &= ~attr->mask; ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask; } - if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] && - tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1] && + if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] && + tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] && ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE && ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { ct->proto.tcp.seen[0].td_scale = *(u_int8_t *) - NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]); + nla_data(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]); ct->proto.tcp.seen[1].td_scale = *(u_int8_t *) - NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]); + nla_data(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); } write_unlock_bh(&tcp_lock); @@ -1384,10 +1380,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .new = tcp_new, .error = tcp_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .to_nfattr = tcp_to_nfattr, - .from_nfattr = nfattr_to_tcp, - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .to_nlattr = tcp_to_nlattr, + .from_nlattr = nlattr_to_tcp, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_users = &tcp_sysctl_table_users, @@ -1413,10 +1410,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .new = tcp_new, .error = tcp_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .to_nfattr = tcp_to_nfattr, - .from_nfattr = nfattr_to_tcp, - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .to_nlattr = tcp_to_nlattr, + .from_nlattr = nlattr_to_tcp, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_users = &tcp_sysctl_table_users, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 2a2fd1a764ea..ba80e1a1ea17 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -203,8 +203,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .new = udp_new, .error = udp_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_users = &udp_sysctl_table_users, @@ -230,8 +231,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .new = udp_new, .error = udp_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_users = &udp_sysctl_table_users, diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index b906b413997c..b8981dd922be 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -203,8 +203,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .new = udplite_new, .error = udplite_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_users = &udplite_sysctl_table_users, @@ -226,8 +227,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .new = udplite_new, .error = udplite_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_users = &udplite_sysctl_table_users, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index a4ce5e887997..9efdd37fc195 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -14,6 +14,7 @@ #include <linux/seq_file.h> #include <linux/percpu.h> #include <linux/netdevice.h> +#include <net/net_namespace.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif @@ -194,22 +195,8 @@ static const struct seq_operations ct_seq_ops = { static int ct_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct ct_iter_state *st; - int ret; - - st = kzalloc(sizeof(struct ct_iter_state), GFP_KERNEL); - if (st == NULL) - return -ENOMEM; - ret = seq_open(file, &ct_seq_ops); - if (ret) - goto out_free; - seq = file->private_data; - seq->private = st; - return ret; -out_free: - kfree(st); - return ret; + return seq_open_private(file, &ct_seq_ops, + sizeof(struct ct_iter_state)); } static const struct file_operations ct_file_ops = { @@ -420,10 +407,10 @@ static int __init nf_conntrack_standalone_init(void) return ret; #ifdef CONFIG_PROC_FS - proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops); if (!proc) goto cleanup_init; - proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat); + proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, init_net.proc_net_stat); if (!proc_stat) goto cleanup_proc; @@ -444,9 +431,9 @@ static int __init nf_conntrack_standalone_init(void) cleanup_proc_stat: #endif #ifdef CONFIG_PROC_FS - remove_proc_entry("nf_conntrack", proc_net_stat); + remove_proc_entry("nf_conntrack", init_net. proc_net_stat); cleanup_proc: - proc_net_remove("nf_conntrack"); + proc_net_remove(&init_net, "nf_conntrack"); cleanup_init: #endif /* CNFIG_PROC_FS */ nf_conntrack_cleanup(); @@ -459,8 +446,8 @@ static void __exit nf_conntrack_standalone_fini(void) unregister_sysctl_table(nf_ct_sysctl_header); #endif #ifdef CONFIG_PROC_FS - remove_proc_entry("nf_conntrack", proc_net_stat); - proc_net_remove("nf_conntrack"); + remove_proc_entry("nf_conntrack", init_net.proc_net_stat); + proc_net_remove(&init_net, "nf_conntrack"); #endif /* CNFIG_PROC_FS */ nf_conntrack_cleanup(); } diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index e32761ce260c..aa2831587b82 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -69,6 +69,9 @@ static int nf_sockopt(struct sock *sk, int pf, int val, struct nf_sockopt_ops *ops; int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; @@ -125,6 +128,10 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, struct nf_sockopt_ops *ops; int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + + if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8797e6953ef2..2128542995f7 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -41,32 +41,20 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; static struct sock *nfnl = NULL; -static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; +static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; static DEFINE_MUTEX(nfnl_mutex); -static void nfnl_lock(void) +static inline void nfnl_lock(void) { mutex_lock(&nfnl_mutex); } -static int nfnl_trylock(void) -{ - return !mutex_trylock(&nfnl_mutex); -} - -static void __nfnl_unlock(void) -{ - mutex_unlock(&nfnl_mutex); -} - -static void nfnl_unlock(void) +static inline void nfnl_unlock(void) { mutex_unlock(&nfnl_mutex); - if (nfnl->sk_receive_queue.qlen) - nfnl->sk_data_ready(nfnl, 0); } -int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) +int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) { nfnl_lock(); if (subsys_table[n->subsys_id]) { @@ -80,7 +68,7 @@ int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) } EXPORT_SYMBOL_GPL(nfnetlink_subsys_register); -int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n) +int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n) { nfnl_lock(); subsys_table[n->subsys_id] = NULL; @@ -90,7 +78,7 @@ int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n) } EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister); -static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) +static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) { u_int8_t subsys_id = NFNL_SUBSYS_ID(type); @@ -100,8 +88,8 @@ static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) return subsys_table[subsys_id]; } -static inline struct nfnl_callback * -nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss) +static inline const struct nfnl_callback * +nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss) { u_int8_t cb_id = NFNL_MSG_TYPE(type); @@ -111,62 +99,6 @@ nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss) return &ss->cb[cb_id]; } -void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen, - const void *data) -{ - struct nfattr *nfa; - int size = NFA_LENGTH(attrlen); - - nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); - nfa->nfa_type = attrtype; - nfa->nfa_len = size; - memcpy(NFA_DATA(nfa), data, attrlen); - memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size); -} -EXPORT_SYMBOL_GPL(__nfa_fill); - -void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) -{ - memset(tb, 0, sizeof(struct nfattr *) * maxattr); - - while (NFA_OK(nfa, len)) { - unsigned flavor = NFA_TYPE(nfa); - if (flavor && flavor <= maxattr) - tb[flavor-1] = nfa; - nfa = NFA_NEXT(nfa, len); - } -} -EXPORT_SYMBOL_GPL(nfattr_parse); - -/** - * nfnetlink_check_attributes - check and parse nfnetlink attributes - * - * subsys: nfnl subsystem for which this message is to be parsed - * nlmsghdr: netlink message to be checked/parsed - * cda: array of pointers, needs to be at least subsys->attr_count big - * - */ -static int -nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, - struct nlmsghdr *nlh, struct nfattr *cda[]) -{ - int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); - u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); - u_int16_t attr_count = subsys->cb[cb_id].attr_count; - - /* check attribute lengths. */ - if (likely(nlh->nlmsg_len > min_len)) { - struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh)); - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - nfattr_parse(cda, attr_count, attr, attrlen); - } - - /* implicit: if nlmsg_len == min_len, we return 0, and an empty - * (zeroed) cda[] array. The message is valid, but empty. */ - - return 0; -} - int nfnetlink_has_listeners(unsigned int group) { return netlink_has_listeners(nfnl, group); @@ -175,16 +107,7 @@ EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) { - int err = 0; - - NETLINK_CB(skb).dst_group = group; - if (echo) - atomic_inc(&skb->users); - netlink_broadcast(nfnl, skb, pid, group, gfp_any()); - if (echo) - err = netlink_unicast(nfnl, skb, pid, MSG_DONTWAIT); - - return err; + return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any()); } EXPORT_SYMBOL_GPL(nfnetlink_send); @@ -197,8 +120,8 @@ EXPORT_SYMBOL_GPL(nfnetlink_unicast); /* Process one complete nfnetlink message. */ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct nfnl_callback *nc; - struct nfnetlink_subsystem *ss; + const struct nfnl_callback *nc; + const struct nfnetlink_subsystem *ss; int type, err; if (security_netlink_recv(skb, CAP_NET_ADMIN)) @@ -212,9 +135,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) ss = nfnetlink_get_subsys(type); if (!ss) { #ifdef CONFIG_KMOD - /* don't call nfnl_unlock, since it would reenter - * with further packet processing */ - __nfnl_unlock(); + nfnl_unlock(); request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); nfnl_lock(); ss = nfnetlink_get_subsys(type); @@ -228,29 +149,31 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; { - u_int16_t attr_count = - ss->cb[NFNL_MSG_TYPE(nlh->nlmsg_type)].attr_count; - struct nfattr *cda[attr_count]; - - memset(cda, 0, sizeof(struct nfattr *) * attr_count); + int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); + u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + u_int16_t attr_count = ss->cb[cb_id].attr_count; + struct nlattr *cda[attr_count+1]; + + if (likely(nlh->nlmsg_len >= min_len)) { + struct nlattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); + int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); + + err = nla_parse(cda, attr_count, attr, attrlen, + ss->cb[cb_id].policy); + if (err < 0) + return err; + } else + return -EINVAL; - err = nfnetlink_check_attributes(ss, nlh, cda); - if (err < 0) - return err; return nc->call(nfnl, skb, nlh, cda); } } -static void nfnetlink_rcv(struct sock *sk, int len) +static void nfnetlink_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - if (nfnl_trylock()) - return; - netlink_run_queue(sk, &qlen, nfnetlink_rcv_msg); - __nfnl_unlock(); - } while (qlen); + nfnl_lock(); + netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + nfnl_unlock(); } static void __exit nfnetlink_exit(void) @@ -264,7 +187,7 @@ static int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, + nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX, nfnetlink_rcv, NULL, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 2351533a8507..2c7bd2eb0294 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -37,8 +37,9 @@ #endif #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE -#define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ +#define NFULNL_TIMEOUT_DEFAULT HZ /* every second */ #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ +#define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */ #define PRINTR(x, args...) do { if (net_ratelimit()) \ printk(x, ## args); } while (0); @@ -152,6 +153,11 @@ instance_create(u_int16_t group_num, int pid) if (!inst) goto out_unlock; + if (!try_module_get(THIS_MODULE)) { + kfree(inst); + goto out_unlock; + } + INIT_HLIST_NODE(&inst->hlist); spin_lock_init(&inst->lock); /* needs to be two, since we _put() after creation */ @@ -166,10 +172,7 @@ instance_create(u_int16_t group_num, int pid) inst->flushtimeout = NFULNL_TIMEOUT_DEFAULT; inst->nlbufsiz = NFULNL_NLBUFSIZ_DEFAULT; inst->copy_mode = NFULNL_COPY_PACKET; - inst->copy_range = 0xffff; - - if (!try_module_get(THIS_MODULE)) - goto out_free; + inst->copy_range = NFULNL_COPY_RANGE_MAX; hlist_add_head(&inst->hlist, &instance_table[instance_hashfn(group_num)]); @@ -181,14 +184,12 @@ instance_create(u_int16_t group_num, int pid) return inst; -out_free: - instance_put(inst); out_unlock: write_unlock_bh(&instances_lock); return NULL; } -static int __nfulnl_send(struct nfulnl_instance *inst); +static void __nfulnl_flush(struct nfulnl_instance *inst); static void __instance_destroy(struct nfulnl_instance *inst) @@ -202,17 +203,8 @@ __instance_destroy(struct nfulnl_instance *inst) /* then flush all pending packets from skb */ spin_lock_bh(&inst->lock); - if (inst->skb) { - /* timer "holds" one reference (we have one more) */ - if (del_timer(&inst->timer)) - instance_put(inst); - if (inst->qlen) - __nfulnl_send(inst); - if (inst->skb) { - kfree_skb(inst->skb); - inst->skb = NULL; - } - } + if (inst->skb) + __nfulnl_flush(inst); spin_unlock_bh(&inst->lock); /* and finally put the refcount */ @@ -244,11 +236,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, case NFULNL_COPY_PACKET: inst->copy_mode = mode; - /* we're using struct nfattr which has 16bit nfa_len */ - if (range > 0xffff) - inst->copy_range = 0xffff; - else - inst->copy_range = range; + inst->copy_range = min_t(unsigned int, + range, NFULNL_COPY_RANGE_MAX); break; default: @@ -310,8 +299,8 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags) return 0; } -static struct sk_buff *nfulnl_alloc_skb(unsigned int inst_size, - unsigned int pkt_size) +static struct sk_buff * +nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size) { struct sk_buff *skb; unsigned int n; @@ -364,7 +353,18 @@ nlmsg_failure: return status; } -static void nfulnl_timer(unsigned long data) +static void +__nfulnl_flush(struct nfulnl_instance *inst) +{ + /* timer holds a reference */ + if (del_timer(&inst->timer)) + instance_put(inst); + if (inst->skb) + __nfulnl_send(inst); +} + +static void +nfulnl_timer(unsigned long data) { struct nfulnl_instance *inst = (struct nfulnl_instance *)data; @@ -409,36 +409,36 @@ __build_packet_message(struct nfulnl_instance *inst, pmsg.hw_protocol = skb->protocol; pmsg.hook = hooknum; - NFA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg); + NLA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg); if (prefix) - NFA_PUT(inst->skb, NFULA_PREFIX, plen, prefix); + NLA_PUT(inst->skb, NFULA_PREFIX, plen, prefix); if (indev) { tmp_uint = htonl(indev->ifindex); #ifndef CONFIG_BRIDGE_NETFILTER - NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), + NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); #else if (pf == PF_BRIDGE) { /* Case 1: outdev is physical input device, we need to * look for bridge group (when called from * netfilter_bridge) */ - NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), &tmp_uint); /* this is the bridge group "brX" */ tmp_uint = htonl(indev->br_port->br->dev->ifindex); - NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ - NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); if (skb->nf_bridge && skb->nf_bridge->physindev) { tmp_uint = htonl(skb->nf_bridge->physindev->ifindex); - NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), &tmp_uint); } } @@ -448,28 +448,28 @@ __build_packet_message(struct nfulnl_instance *inst, if (outdev) { tmp_uint = htonl(outdev->ifindex); #ifndef CONFIG_BRIDGE_NETFILTER - NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), + NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); #else if (pf == PF_BRIDGE) { /* Case 1: outdev is physical output device, we need to * look for bridge group (when called from * netfilter_bridge) */ - NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), &tmp_uint); /* this is the bridge group "brX" */ tmp_uint = htonl(outdev->br_port->br->dev->ifindex); - NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); } else { /* Case 2: indev is a bridge group, we need to look * for physical device (when called from ipv4) */ - NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); if (skb->nf_bridge && skb->nf_bridge->physoutdev) { tmp_uint = htonl(skb->nf_bridge->physoutdev->ifindex); - NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), &tmp_uint); } } @@ -478,15 +478,16 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->mark) { tmp_uint = htonl(skb->mark); - NFA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint); + NLA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint); } - if (indev && skb->dev && skb->dev->hard_header_parse) { + if (indev && skb->dev) { struct nfulnl_msg_packet_hw phw; - int len = skb->dev->hard_header_parse((struct sk_buff *)skb, - phw.hw_addr); - phw.hw_addrlen = htons(len); - NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); + int len = dev_parse_header(skb, phw.hw_addr); + if (len > 0) { + phw.hw_addrlen = htons(len); + NLA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); + } } if (skb->tstamp.tv64) { @@ -495,7 +496,7 @@ __build_packet_message(struct nfulnl_instance *inst, ts.sec = cpu_to_be64(tv.tv_sec); ts.usec = cpu_to_be64(tv.tv_usec); - NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); + NLA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); } /* UID */ @@ -503,9 +504,9 @@ __build_packet_message(struct nfulnl_instance *inst, read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) { __be32 uid = htonl(skb->sk->sk_socket->file->f_uid); - /* need to unlock here since NFA_PUT may goto */ + /* need to unlock here since NLA_PUT may goto */ read_unlock_bh(&skb->sk->sk_callback_lock); - NFA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid); + NLA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid); } else read_unlock_bh(&skb->sk->sk_callback_lock); } @@ -513,28 +514,28 @@ __build_packet_message(struct nfulnl_instance *inst, /* local sequence number */ if (inst->flags & NFULNL_CFG_F_SEQ) { tmp_uint = htonl(inst->seq++); - NFA_PUT(inst->skb, NFULA_SEQ, sizeof(tmp_uint), &tmp_uint); + NLA_PUT(inst->skb, NFULA_SEQ, sizeof(tmp_uint), &tmp_uint); } /* global sequence number */ if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) { tmp_uint = htonl(atomic_inc_return(&global_seq)); - NFA_PUT(inst->skb, NFULA_SEQ_GLOBAL, sizeof(tmp_uint), &tmp_uint); + NLA_PUT(inst->skb, NFULA_SEQ_GLOBAL, sizeof(tmp_uint), &tmp_uint); } if (data_len) { - struct nfattr *nfa; - int size = NFA_LENGTH(data_len); + struct nlattr *nla; + int size = nla_attr_size(data_len); - if (skb_tailroom(inst->skb) < (int)NFA_SPACE(data_len)) { + if (skb_tailroom(inst->skb) < nla_total_size(data_len)) { printk(KERN_WARNING "nfnetlink_log: no tailroom!\n"); goto nlmsg_failure; } - nfa = (struct nfattr *)skb_put(inst->skb, NFA_ALIGN(size)); - nfa->nfa_type = NFULA_PAYLOAD; - nfa->nfa_len = size; + nla = (struct nlattr *)skb_put(inst->skb, nla_total_size(data_len)); + nla->nla_type = NFULA_PAYLOAD; + nla->nla_len = size; - if (skb_copy_bits(skb, 0, NFA_DATA(nfa), data_len)) + if (skb_copy_bits(skb, 0, nla_data(nla), data_len)) BUG(); } @@ -543,7 +544,7 @@ __build_packet_message(struct nfulnl_instance *inst, nlmsg_failure: UDEBUG("nlmsg_failure\n"); -nfattr_failure: +nla_put_failure: PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n"); return -1; } @@ -590,32 +591,31 @@ nfulnl_log_packet(unsigned int pf, if (prefix) plen = strlen(prefix) + 1; - /* all macros expand to constant values at compile time */ /* FIXME: do we want to make the size calculation conditional based on * what is actually present? way more branches and checks, but more * memory efficient... */ - size = NLMSG_SPACE(sizeof(struct nfgenmsg)) - + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hdr)) - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + size = NLMSG_ALIGN(sizeof(struct nfgenmsg)) + + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #ifdef CONFIG_BRIDGE_NETFILTER - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #endif - + NFA_SPACE(sizeof(u_int32_t)) /* mark */ - + NFA_SPACE(sizeof(u_int32_t)) /* uid */ - + NFA_SPACE(plen) /* prefix */ - + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hw)) - + NFA_SPACE(sizeof(struct nfulnl_msg_packet_timestamp)); + + nla_total_size(sizeof(u_int32_t)) /* mark */ + + nla_total_size(sizeof(u_int32_t)) /* uid */ + + nla_total_size(plen) /* prefix */ + + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) + + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); UDEBUG("initial size=%u\n", size); spin_lock_bh(&inst->lock); if (inst->flags & NFULNL_CFG_F_SEQ) - size += NFA_SPACE(sizeof(u_int32_t)); + size += nla_total_size(sizeof(u_int32_t)); if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) - size += NFA_SPACE(sizeof(u_int32_t)); + size += nla_total_size(sizeof(u_int32_t)); qthreshold = inst->qthreshold; /* per-rule qthreshold overrides per-instance */ @@ -635,7 +635,7 @@ nfulnl_log_packet(unsigned int pf, else data_len = inst->copy_range; - size += NFA_SPACE(data_len); + size += nla_total_size(data_len); UDEBUG("copy_packet, therefore size now %u\n", size); break; @@ -643,17 +643,13 @@ nfulnl_log_packet(unsigned int pf, goto unlock_and_release; } - if (inst->qlen >= qthreshold || - (inst->skb && size > - skb_tailroom(inst->skb) - sizeof(struct nfgenmsg))) { + if (inst->skb && + size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) { /* either the queue len is too high or we don't have * enough room in the skb left. flush to userspace. */ UDEBUG("flushing old skb\n"); - /* timer "holds" one reference (we have another one) */ - if (del_timer(&inst->timer)) - instance_put(inst); - __nfulnl_send(inst); + __nfulnl_flush(inst); } if (!inst->skb) { @@ -668,9 +664,11 @@ nfulnl_log_packet(unsigned int pf, __build_packet_message(inst, skb, data_len, pf, hooknum, in, out, li, prefix, plen); + if (inst->qlen >= qthreshold) + __nfulnl_flush(inst); /* timer_pending always called within inst->lock, so there * is no chance of a race here */ - if (!timer_pending(&inst->timer)) { + else if (!timer_pending(&inst->timer)) { instance_get(inst); inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100); add_timer(&inst->timer); @@ -706,7 +704,8 @@ nfulnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { UDEBUG("node = %p\n", inst); - if (n->pid == inst->peer_pid) + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } @@ -721,7 +720,7 @@ static struct notifier_block nfulnl_rtnl_notifier = { static int nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[]) + struct nlmsghdr *nlh, struct nlattr *nfqa[]) { return -ENOTSUPP; } @@ -732,34 +731,18 @@ static struct nf_logger nfulnl_logger = { .me = THIS_MODULE, }; -static const int nfula_min[NFULA_MAX] = { - [NFULA_PACKET_HDR-1] = sizeof(struct nfulnl_msg_packet_hdr), - [NFULA_MARK-1] = sizeof(u_int32_t), - [NFULA_TIMESTAMP-1] = sizeof(struct nfulnl_msg_packet_timestamp), - [NFULA_IFINDEX_INDEV-1] = sizeof(u_int32_t), - [NFULA_IFINDEX_OUTDEV-1]= sizeof(u_int32_t), - [NFULA_IFINDEX_PHYSINDEV-1] = sizeof(u_int32_t), - [NFULA_IFINDEX_PHYSOUTDEV-1] = sizeof(u_int32_t), - [NFULA_HWADDR-1] = sizeof(struct nfulnl_msg_packet_hw), - [NFULA_PAYLOAD-1] = 0, - [NFULA_PREFIX-1] = 0, - [NFULA_UID-1] = sizeof(u_int32_t), - [NFULA_SEQ-1] = sizeof(u_int32_t), - [NFULA_SEQ_GLOBAL-1] = sizeof(u_int32_t), -}; - -static const int nfula_cfg_min[NFULA_CFG_MAX] = { - [NFULA_CFG_CMD-1] = sizeof(struct nfulnl_msg_config_cmd), - [NFULA_CFG_MODE-1] = sizeof(struct nfulnl_msg_config_mode), - [NFULA_CFG_TIMEOUT-1] = sizeof(u_int32_t), - [NFULA_CFG_QTHRESH-1] = sizeof(u_int32_t), - [NFULA_CFG_NLBUFSIZ-1] = sizeof(u_int32_t), - [NFULA_CFG_FLAGS-1] = sizeof(u_int16_t), +static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = { + [NFULA_CFG_CMD] = { .len = sizeof(struct nfulnl_msg_config_cmd) }, + [NFULA_CFG_MODE] = { .len = sizeof(struct nfulnl_msg_config_mode) }, + [NFULA_CFG_TIMEOUT] = { .type = NLA_U32 }, + [NFULA_CFG_QTHRESH] = { .type = NLA_U32 }, + [NFULA_CFG_NLBUFSIZ] = { .type = NLA_U32 }, + [NFULA_CFG_FLAGS] = { .type = NLA_U16 }, }; static int nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfula[]) + struct nlmsghdr *nlh, struct nlattr *nfula[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t group_num = ntohs(nfmsg->res_id); @@ -768,16 +751,11 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, UDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); - if (nfattr_bad_size(nfula, NFULA_CFG_MAX, nfula_cfg_min)) { - UDEBUG("bad attribute size\n"); - return -EINVAL; - } - inst = instance_lookup_get(group_num); - if (nfula[NFULA_CFG_CMD-1]) { + if (nfula[NFULA_CFG_CMD]) { u_int8_t pf = nfmsg->nfgen_family; struct nfulnl_msg_config_cmd *cmd; - cmd = NFA_DATA(nfula[NFULA_CFG_CMD-1]); + cmd = nla_data(nfula[NFULA_CFG_CMD]); UDEBUG("found CFG_CMD for\n"); switch (cmd->command) { @@ -840,38 +818,38 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } } - if (nfula[NFULA_CFG_MODE-1]) { + if (nfula[NFULA_CFG_MODE]) { struct nfulnl_msg_config_mode *params; - params = NFA_DATA(nfula[NFULA_CFG_MODE-1]); + params = nla_data(nfula[NFULA_CFG_MODE]); nfulnl_set_mode(inst, params->copy_mode, ntohl(params->copy_range)); } - if (nfula[NFULA_CFG_TIMEOUT-1]) { + if (nfula[NFULA_CFG_TIMEOUT]) { __be32 timeout = - *(__be32 *)NFA_DATA(nfula[NFULA_CFG_TIMEOUT-1]); + *(__be32 *)nla_data(nfula[NFULA_CFG_TIMEOUT]); nfulnl_set_timeout(inst, ntohl(timeout)); } - if (nfula[NFULA_CFG_NLBUFSIZ-1]) { + if (nfula[NFULA_CFG_NLBUFSIZ]) { __be32 nlbufsiz = - *(__be32 *)NFA_DATA(nfula[NFULA_CFG_NLBUFSIZ-1]); + *(__be32 *)nla_data(nfula[NFULA_CFG_NLBUFSIZ]); nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz)); } - if (nfula[NFULA_CFG_QTHRESH-1]) { + if (nfula[NFULA_CFG_QTHRESH]) { __be32 qthresh = - *(__be32 *)NFA_DATA(nfula[NFULA_CFG_QTHRESH-1]); + *(__be32 *)nla_data(nfula[NFULA_CFG_QTHRESH]); nfulnl_set_qthresh(inst, ntohl(qthresh)); } - if (nfula[NFULA_CFG_FLAGS-1]) { + if (nfula[NFULA_CFG_FLAGS]) { __be16 flags = - *(__be16 *)NFA_DATA(nfula[NFULA_CFG_FLAGS-1]); + *(__be16 *)nla_data(nfula[NFULA_CFG_FLAGS]); nfulnl_set_flags(inst, ntohs(flags)); } @@ -881,14 +859,15 @@ out: return ret; } -static struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = { +static const struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = { [NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp, .attr_count = NFULA_MAX, }, [NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config, - .attr_count = NFULA_CFG_MAX, }, + .attr_count = NFULA_CFG_MAX, + .policy = nfula_cfg_policy }, }; -static struct nfnetlink_subsystem nfulnl_subsys = { +static const struct nfnetlink_subsystem nfulnl_subsys = { .name = "log", .subsys_id = NFNL_SUBSYS_ULOG, .cb_count = NFULNL_MSG_MAX, @@ -972,22 +951,8 @@ static const struct seq_operations nful_seq_ops = { static int nful_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct iter_state *is; - int ret; - - is = kzalloc(sizeof(*is), GFP_KERNEL); - if (!is) - return -ENOMEM; - ret = seq_open(file, &nful_seq_ops); - if (ret < 0) - goto out_free; - seq = file->private_data; - seq->private = is; - return ret; -out_free: - kfree(is); - return ret; + return seq_open_private(file, &nful_seq_ops, + sizeof(struct iter_state)); } static const struct file_operations nful_file_ops = { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index bb65a38c816c..49f0480afe09 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -299,7 +299,7 @@ __nfqnl_set_mode(struct nfqnl_instance *queue, case NFQNL_COPY_PACKET: queue->copy_mode = mode; - /* we're using struct nfattr which has 16bit nfa_len */ + /* we're using struct nlattr which has 16bit nla_len */ if (range > 0xffff) queue->copy_range = 0xffff; else @@ -353,18 +353,17 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, QDEBUG("entered\n"); - /* all macros expand to constant values at compile time */ - size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + - + NFA_SPACE(sizeof(struct nfqnl_msg_packet_hdr)) - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + size = NLMSG_ALIGN(sizeof(struct nfgenmsg)) + + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #ifdef CONFIG_BRIDGE_NETFILTER - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #endif - + NFA_SPACE(sizeof(u_int32_t)) /* mark */ - + NFA_SPACE(sizeof(struct nfqnl_msg_packet_hw)) - + NFA_SPACE(sizeof(struct nfqnl_msg_packet_timestamp)); + + nla_total_size(sizeof(u_int32_t)) /* mark */ + + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) + + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); outdev = entinf->outdev; @@ -389,7 +388,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, else data_len = queue->copy_range; - size += NFA_SPACE(data_len); + size += nla_total_size(data_len); break; default: @@ -417,33 +416,33 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, pmsg.hw_protocol = entskb->protocol; pmsg.hook = entinf->hook; - NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); + NLA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); indev = entinf->indev; if (indev) { tmp_uint = htonl(indev->ifindex); #ifndef CONFIG_BRIDGE_NETFILTER - NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); + NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); #else if (entinf->pf == PF_BRIDGE) { /* Case 1: indev is physical input device, we need to * look for bridge group (when called from * netfilter_bridge) */ - NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), + NLA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), &tmp_uint); /* this is the bridge group "brX" */ tmp_uint = htonl(indev->br_port->br->dev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), + NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ - NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), + NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); if (entskb->nf_bridge && entskb->nf_bridge->physindev) { tmp_uint = htonl(entskb->nf_bridge->physindev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, + NLA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), &tmp_uint); } } @@ -453,27 +452,27 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (outdev) { tmp_uint = htonl(outdev->ifindex); #ifndef CONFIG_BRIDGE_NETFILTER - NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); + NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); #else if (entinf->pf == PF_BRIDGE) { /* Case 1: outdev is physical output device, we need to * look for bridge group (when called from * netfilter_bridge) */ - NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), + NLA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), &tmp_uint); /* this is the bridge group "brX" */ tmp_uint = htonl(outdev->br_port->br->dev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), + NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); } else { /* Case 2: outdev is bridge group, we need to look for * physical output device (when called from ipv4) */ - NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), + NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); if (entskb->nf_bridge && entskb->nf_bridge->physoutdev) { tmp_uint = htonl(entskb->nf_bridge->physoutdev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, + NLA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), &tmp_uint); } } @@ -482,17 +481,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entskb->mark) { tmp_uint = htonl(entskb->mark); - NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); + NLA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); } - if (indev && entskb->dev - && entskb->dev->hard_header_parse) { + if (indev && entskb->dev) { struct nfqnl_msg_packet_hw phw; - - int len = entskb->dev->hard_header_parse(entskb, - phw.hw_addr); - phw.hw_addrlen = htons(len); - NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + int len = dev_parse_header(entskb, phw.hw_addr); + if (len) { + phw.hw_addrlen = htons(len); + NLA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + } } if (entskb->tstamp.tv64) { @@ -501,23 +499,23 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, ts.sec = cpu_to_be64(tv.tv_sec); ts.usec = cpu_to_be64(tv.tv_usec); - NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); + NLA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); } if (data_len) { - struct nfattr *nfa; - int size = NFA_LENGTH(data_len); + struct nlattr *nla; + int size = nla_attr_size(data_len); - if (skb_tailroom(skb) < (int)NFA_SPACE(data_len)) { + if (skb_tailroom(skb) < nla_total_size(data_len)) { printk(KERN_WARNING "nf_queue: no tailroom!\n"); goto nlmsg_failure; } - nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); - nfa->nfa_type = NFQA_PAYLOAD; - nfa->nfa_len = size; + nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); + nla->nla_type = NFQA_PAYLOAD; + nla->nla_len = size; - if (skb_copy_bits(entskb, 0, NFA_DATA(nfa), data_len)) + if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) BUG(); } @@ -525,7 +523,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, return skb; nlmsg_failure: -nfattr_failure: +nla_put_failure: if (skb) kfree_skb(skb); *errp = -EINVAL; @@ -734,6 +732,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) nfqnl_dev_drop(dev->ifindex); @@ -762,7 +763,8 @@ nfqnl_rcv_nl_event(struct notifier_block *this, struct hlist_head *head = &instance_table[i]; hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { - if (n->pid == inst->peer_pid) + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } @@ -775,15 +777,15 @@ static struct notifier_block nfqnl_rtnl_notifier = { .notifier_call = nfqnl_rcv_nl_event, }; -static const int nfqa_verdict_min[NFQA_MAX] = { - [NFQA_VERDICT_HDR-1] = sizeof(struct nfqnl_msg_verdict_hdr), - [NFQA_MARK-1] = sizeof(u_int32_t), - [NFQA_PAYLOAD-1] = 0, +static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { + [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, + [NFQA_MARK] = { .type = NLA_U32 }, + [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, }; static int nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[]) + struct nlmsghdr *nlh, struct nlattr *nfqa[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); @@ -794,11 +796,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, struct nfqnl_queue_entry *entry; int err; - if (nfattr_bad_size(nfqa, NFQA_MAX, nfqa_verdict_min)) { - QDEBUG("bad attribute size\n"); - return -EINVAL; - } - queue = instance_lookup_get(queue_num); if (!queue) return -ENODEV; @@ -808,12 +805,12 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, goto err_out_put; } - if (!nfqa[NFQA_VERDICT_HDR-1]) { + if (!nfqa[NFQA_VERDICT_HDR]) { err = -EINVAL; goto err_out_put; } - vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]); + vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); verdict = ntohl(vhdr->verdict); if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) { @@ -827,15 +824,15 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, goto err_out_put; } - if (nfqa[NFQA_PAYLOAD-1]) { - if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]), - NFA_PAYLOAD(nfqa[NFQA_PAYLOAD-1]), entry) < 0) + if (nfqa[NFQA_PAYLOAD]) { + if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), + nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0) verdict = NF_DROP; } - if (nfqa[NFQA_MARK-1]) + if (nfqa[NFQA_MARK]) entry->skb->mark = ntohl(*(__be32 *) - NFA_DATA(nfqa[NFQA_MARK-1])); + nla_data(nfqa[NFQA_MARK])); issue_verdict(entry, verdict); instance_put(queue); @@ -848,14 +845,14 @@ err_out_put: static int nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[]) + struct nlmsghdr *nlh, struct nlattr *nfqa[]) { return -ENOTSUPP; } -static const int nfqa_cfg_min[NFQA_CFG_MAX] = { - [NFQA_CFG_CMD-1] = sizeof(struct nfqnl_msg_config_cmd), - [NFQA_CFG_PARAMS-1] = sizeof(struct nfqnl_msg_config_params), +static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { + [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, + [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, }; static struct nf_queue_handler nfqh = { @@ -865,7 +862,7 @@ static struct nf_queue_handler nfqh = { static int nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[]) + struct nlmsghdr *nlh, struct nlattr *nfqa[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); @@ -874,15 +871,10 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); - if (nfattr_bad_size(nfqa, NFQA_CFG_MAX, nfqa_cfg_min)) { - QDEBUG("bad attribute size\n"); - return -EINVAL; - } - queue = instance_lookup_get(queue_num); - if (nfqa[NFQA_CFG_CMD-1]) { + if (nfqa[NFQA_CFG_CMD]) { struct nfqnl_msg_config_cmd *cmd; - cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]); + cmd = nla_data(nfqa[NFQA_CFG_CMD]); QDEBUG("found CFG_CMD\n"); switch (cmd->command) { @@ -933,21 +925,21 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } } - if (nfqa[NFQA_CFG_PARAMS-1]) { + if (nfqa[NFQA_CFG_PARAMS]) { struct nfqnl_msg_config_params *params; if (!queue) { ret = -ENOENT; goto out_put; } - params = NFA_DATA(nfqa[NFQA_CFG_PARAMS-1]); + params = nla_data(nfqa[NFQA_CFG_PARAMS]); nfqnl_set_mode(queue, params->copy_mode, ntohl(params->copy_range)); } - if (nfqa[NFQA_CFG_QUEUE_MAXLEN-1]) { + if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { __be32 *queue_maxlen; - queue_maxlen = NFA_DATA(nfqa[NFQA_CFG_QUEUE_MAXLEN-1]); + queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); spin_lock_bh(&queue->lock); queue->queue_maxlen = ntohl(*queue_maxlen); spin_unlock_bh(&queue->lock); @@ -958,16 +950,18 @@ out_put: return ret; } -static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { +static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, .attr_count = NFQA_MAX, }, [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, - .attr_count = NFQA_MAX, }, + .attr_count = NFQA_MAX, + .policy = nfqa_verdict_policy }, [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, - .attr_count = NFQA_CFG_MAX, }, + .attr_count = NFQA_CFG_MAX, + .policy = nfqa_cfg_policy }, }; -static struct nfnetlink_subsystem nfqnl_subsys = { +static const struct nfnetlink_subsystem nfqnl_subsys = { .name = "nf_queue", .subsys_id = NFNL_SUBSYS_QUEUE, .cb_count = NFQNL_MSG_MAX, @@ -1057,22 +1051,8 @@ static const struct seq_operations nfqnl_seq_ops = { static int nfqnl_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct iter_state *is; - int ret; - - is = kzalloc(sizeof(*is), GFP_KERNEL); - if (!is) - return -ENOMEM; - ret = seq_open(file, &nfqnl_seq_ops); - if (ret < 0) - goto out_free; - seq = file->private_data; - seq->private = is; - return ret; -out_free: - kfree(is); - return ret; + return seq_open_private(file, &nfqnl_seq_ops, + sizeof(struct iter_state)); } static const struct file_operations nfqnl_file_ops = { diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index cc2baa6d5a7a..d9a3bded0d00 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -22,6 +22,7 @@ #include <linux/vmalloc.h> #include <linux/mutex.h> #include <linux/mm.h> +#include <net/net_namespace.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_arp.h> @@ -795,7 +796,7 @@ int xt_proto_init(int af) #ifdef CONFIG_PROC_FS strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); if (!proc) goto out; proc->data = (void *) ((unsigned long) af | (TABLE << 16)); @@ -803,14 +804,14 @@ int xt_proto_init(int af) strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); if (!proc) goto out_remove_tables; proc->data = (void *) ((unsigned long) af | (MATCH << 16)); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); if (!proc) goto out_remove_matches; proc->data = (void *) ((unsigned long) af | (TARGET << 16)); @@ -822,12 +823,12 @@ int xt_proto_init(int af) out_remove_matches: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); out_remove_tables: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); out: return -1; #endif @@ -841,15 +842,15 @@ void xt_proto_fini(int af) strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); #endif /*CONFIG_PROC_FS*/ } EXPORT_SYMBOL_GPL(xt_proto_fini); diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 519428566829..07a1b9665005 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -24,6 +24,7 @@ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("iptables qdisc classification target module"); MODULE_ALIAS("ipt_CLASSIFY"); +MODULE_ALIAS("ip6t_CLASSIFY"); static unsigned int target(struct sk_buff **pskb, diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 5a00c5444334..7043c2757e09 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -27,6 +27,7 @@ MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>"); MODULE_DESCRIPTION("IP tables CONNMARK matching module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_CONNMARK"); +MODULE_ALIAS("ip6t_CONNMARK"); #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_CONNMARK.h> diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index b7d6312fccc7..fec1aefb1c32 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -9,6 +9,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_NOTRACK"); +MODULE_ALIAS("ip6t_NOTRACK"); static unsigned int target(struct sk_buff **pskb, diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index dd4d79b8fc9d..af79423bc8e8 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -14,6 +14,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection"); MODULE_ALIAS("ipt_connbytes"); +MODULE_ALIAS("ip6t_connbytes"); static bool match(const struct sk_buff *skb, diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index e73fa9b46cf7..1071fc54d6d3 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -29,6 +29,7 @@ MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>"); MODULE_DESCRIPTION("IP tables connmark match module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_connmark"); +MODULE_ALIAS("ip6t_connmark"); static bool match(const struct sk_buff *skb, diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 83224ec89cc0..c2b1b24ee335 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -24,6 +24,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Match for DCCP protocol packets"); MODULE_ALIAS("ipt_dccp"); +MODULE_ALIAS("ip6t_dccp"); #define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \ || (!!((invflag) & (option)) ^ (cond))) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index bd45f9d3f7d0..19103678bf20 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -21,6 +21,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <net/net_namespace.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> @@ -743,13 +744,13 @@ static int __init xt_hashlimit_init(void) printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n"); goto err2; } - hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", proc_net); + hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net); if (!hashlimit_procdir4) { printk(KERN_ERR "xt_hashlimit: unable to create proc dir " "entry\n"); goto err3; } - hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net); + hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net); if (!hashlimit_procdir6) { printk(KERN_ERR "xt_hashlimit: unable to create proc dir " "entry\n"); @@ -757,7 +758,7 @@ static int __init xt_hashlimit_init(void) } return 0; err4: - remove_proc_entry("ipt_hashlimit", proc_net); + remove_proc_entry("ipt_hashlimit", init_net.proc_net); err3: kmem_cache_destroy(hashlimit_cachep); err2: @@ -769,8 +770,8 @@ err1: static void __exit xt_hashlimit_fini(void) { - remove_proc_entry("ipt_hashlimit", proc_net); - remove_proc_entry("ip6t_hashlimit", proc_net); + remove_proc_entry("ipt_hashlimit", init_net.proc_net); + remove_proc_entry("ip6t_hashlimit", init_net.proc_net); kmem_cache_destroy(hashlimit_cachep); xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); } diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index c002153b80ab..f907770fd4e9 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -13,6 +13,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Kiran Kumar Immidi"); MODULE_DESCRIPTION("Match for SCTP protocol packets"); MODULE_ALIAS("ipt_sctp"); +MODULE_ALIAS("ip6t_sctp"); #ifdef DEBUG_SCTP #define duprintf(format, args...) printk(format , ## args) diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index cd5f6d758c68..84d401bfafad 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -22,6 +22,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); MODULE_DESCRIPTION("iptables TCP MSS match module"); MODULE_ALIAS("ipt_tcpmss"); +MODULE_ALIAS("ip6t_tcpmss"); static bool match(const struct sk_buff *skb, diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c new file mode 100644 index 000000000000..ef48bbd93573 --- /dev/null +++ b/net/netfilter/xt_time.c @@ -0,0 +1,269 @@ +/* + * xt_time + * Copyright © Jan Engelhardt <jengelh@computergmbh.de>, 2007 + * + * based on ipt_time by Fabrice MARIE <fabrice@netfilter.org> + * This is a module which is used for time matching + * It is using some modified code from dietlibc (localtime() function) + * that you can find at http://www.fefe.de/dietlibc/ + * This file is distributed under the terms of the GNU General Public + * License (GPL). Copies of the GPL can be obtained from gnu.org/gpl. + */ +#include <linux/ktime.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_time.h> + +struct xtm { + u_int8_t month; /* (1-12) */ + u_int8_t monthday; /* (1-31) */ + u_int8_t weekday; /* (1-7) */ + u_int8_t hour; /* (0-23) */ + u_int8_t minute; /* (0-59) */ + u_int8_t second; /* (0-59) */ + unsigned int dse; +}; + +extern struct timezone sys_tz; /* ouch */ + +static const u_int16_t days_since_year[] = { + 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, +}; + +static const u_int16_t days_since_leapyear[] = { + 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, +}; + +/* + * Since time progresses forward, it is best to organize this array in reverse, + * to minimize lookup time. + */ +enum { + DSE_FIRST = 2039, +}; +static const u_int16_t days_since_epoch[] = { + /* 2039 - 2030 */ + 25202, 24837, 24472, 24106, 23741, 23376, 23011, 22645, 22280, 21915, + /* 2029 - 2020 */ + 21550, 21184, 20819, 20454, 20089, 19723, 19358, 18993, 18628, 18262, + /* 2019 - 2010 */ + 17897, 17532, 17167, 16801, 16436, 16071, 15706, 15340, 14975, 14610, + /* 2009 - 2000 */ + 14245, 13879, 13514, 13149, 12784, 12418, 12053, 11688, 11323, 10957, + /* 1999 - 1990 */ + 10592, 10227, 9862, 9496, 9131, 8766, 8401, 8035, 7670, 7305, + /* 1989 - 1980 */ + 6940, 6574, 6209, 5844, 5479, 5113, 4748, 4383, 4018, 3652, + /* 1979 - 1970 */ + 3287, 2922, 2557, 2191, 1826, 1461, 1096, 730, 365, 0, +}; + +static inline bool is_leap(unsigned int y) +{ + return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0); +} + +/* + * Each network packet has a (nano)seconds-since-the-epoch (SSTE) timestamp. + * Since we match against days and daytime, the SSTE value needs to be + * computed back into human-readable dates. + * + * This is done in three separate functions so that the most expensive + * calculations are done last, in case a "simple match" can be found earlier. + */ +static inline unsigned int localtime_1(struct xtm *r, time_t time) +{ + unsigned int v, w; + + /* Each day has 86400s, so finding the hour/minute is actually easy. */ + v = time % 86400; + r->second = v % 60; + w = v / 60; + r->minute = w % 60; + r->hour = w / 60; + return v; +} + +static inline void localtime_2(struct xtm *r, time_t time) +{ + /* + * Here comes the rest (weekday, monthday). First, divide the SSTE + * by seconds-per-day to get the number of _days_ since the epoch. + */ + r->dse = time / 86400; + + /* 1970-01-01 (w=0) was a Thursday (4). */ + r->weekday = (4 + r->dse) % 7; +} + +static void localtime_3(struct xtm *r, time_t time) +{ + unsigned int year, i, w = r->dse; + + /* + * In each year, a certain number of days-since-the-epoch have passed. + * Find the year that is closest to said days. + * + * Consider, for example, w=21612 (2029-03-04). Loop will abort on + * dse[i] <= w, which happens when dse[i] == 21550. This implies + * year == 2009. w will then be 62. + */ + for (i = 0, year = DSE_FIRST; days_since_epoch[i] > w; + ++i, --year) + /* just loop */; + + w -= days_since_epoch[i]; + + /* + * By now we have the current year, and the day of the year. + * r->yearday = w; + * + * On to finding the month (like above). In each month, a certain + * number of days-since-New Year have passed, and find the closest + * one. + * + * Consider w=62 (in a non-leap year). Loop will abort on + * dsy[i] < w, which happens when dsy[i] == 31+28 (i == 2). + * Concludes i == 2, i.e. 3rd month => March. + * + * (A different approach to use would be to subtract a monthlength + * from w repeatedly while counting.) + */ + if (is_leap(year)) { + for (i = ARRAY_SIZE(days_since_leapyear) - 1; + i > 0 && days_since_year[i] > w; --i) + /* just loop */; + } else { + for (i = ARRAY_SIZE(days_since_year) - 1; + i > 0 && days_since_year[i] > w; --i) + /* just loop */; + } + + r->month = i + 1; + r->monthday = w - days_since_year[i] + 1; + return; +} + +static bool xt_time_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, bool *hotdrop) +{ + const struct xt_time_info *info = matchinfo; + unsigned int packet_time; + struct xtm current_time; + s64 stamp; + + /* + * We cannot use get_seconds() instead of __net_timestamp() here. + * Suppose you have two rules: + * 1. match before 13:00 + * 2. match after 13:00 + * If you match against processing time (get_seconds) it + * may happen that the same packet matches both rules if + * it arrived at the right moment before 13:00. + */ + if (skb->tstamp.tv64 == 0) + __net_timestamp((struct sk_buff *)skb); + + stamp = skb->tstamp.tv64; + do_div(stamp, NSEC_PER_SEC); + + if (info->flags & XT_TIME_LOCAL_TZ) + /* Adjust for local timezone */ + stamp -= 60 * sys_tz.tz_minuteswest; + + /* + * xt_time will match when _all_ of the following hold: + * - 'now' is in the global time range date_start..date_end + * - 'now' is in the monthday mask + * - 'now' is in the weekday mask + * - 'now' is in the daytime range time_start..time_end + * (and by default, libxt_time will set these so as to match) + */ + + if (stamp < info->date_start || stamp > info->date_stop) + return false; + + packet_time = localtime_1(¤t_time, stamp); + + if (info->daytime_start < info->daytime_stop) { + if (packet_time < info->daytime_start || + packet_time > info->daytime_stop) + return false; + } else { + if (packet_time < info->daytime_start && + packet_time > info->daytime_stop) + return false; + } + + localtime_2(¤t_time, stamp); + + if (!(info->weekdays_match & (1 << current_time.weekday))) + return false; + + /* Do not spend time computing monthday if all days match anyway */ + if (info->monthdays_match != XT_TIME_ALL_MONTHDAYS) { + localtime_3(¤t_time, stamp); + if (!(info->monthdays_match & (1 << current_time.monthday))) + return false; + } + + return true; +} + +static bool xt_time_check(const char *tablename, const void *ip, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) +{ + struct xt_time_info *info = matchinfo; + + if (info->daytime_start > XT_TIME_MAX_DAYTIME || + info->daytime_stop > XT_TIME_MAX_DAYTIME) { + printk(KERN_WARNING "xt_time: invalid argument - start or " + "stop time greater than 23:59:59\n"); + return false; + } + + return true; +} + +static struct xt_match xt_time_reg[] __read_mostly = { + { + .name = "time", + .family = AF_INET, + .match = xt_time_match, + .matchsize = sizeof(struct xt_time_info), + .checkentry = xt_time_check, + .me = THIS_MODULE, + }, + { + .name = "time", + .family = AF_INET6, + .match = xt_time_match, + .matchsize = sizeof(struct xt_time_info), + .checkentry = xt_time_check, + .me = THIS_MODULE, + }, +}; + +static int __init xt_time_init(void) +{ + return xt_register_matches(xt_time_reg, ARRAY_SIZE(xt_time_reg)); +} + +static void __exit xt_time_exit(void) +{ + xt_unregister_matches(xt_time_reg, ARRAY_SIZE(xt_time_reg)); +} + +module_init(xt_time_init); +module_exit(xt_time_exit); +MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); +MODULE_DESCRIPTION("netfilter time match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_time"); +MODULE_ALIAS("ip6t_time"); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index c060e3f991f1..ba0ca8d3f77d 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -130,7 +130,7 @@ static int netlbl_cipsov4_add_common(struct genl_info *info, return -EINVAL; nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem) - if (nla->nla_type == NLBL_CIPSOV4_A_TAG) { + if (nla_type(nla) == NLBL_CIPSOV4_A_TAG) { if (iter >= CIPSO_V4_TAG_MAXCNT) return -EINVAL; doi_def->tags[iter++] = nla_get_u8(nla); @@ -192,13 +192,13 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) - if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) { + if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) { if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX, netlbl_cipsov4_genl_policy) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) - switch (nla_b->nla_type) { + switch (nla_type(nla_b)) { case NLBL_CIPSOV4_A_MLSLVLLOC: if (nla_get_u32(nla_b) > CIPSO_V4_MAX_LOC_LVLS) @@ -240,7 +240,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) - if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) { + if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) { struct nlattr *lvl_loc; struct nlattr *lvl_rem; @@ -265,13 +265,13 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSCATLST], nla_a_rem) - if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) { + if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) { if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX, netlbl_cipsov4_genl_policy) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) - switch (nla_b->nla_type) { + switch (nla_type(nla_b)) { case NLBL_CIPSOV4_A_MLSCATLOC: if (nla_get_u32(nla_b) > CIPSO_V4_MAX_LOC_CATS) @@ -315,7 +315,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSCATLST], nla_a_rem) - if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) { + if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) { struct nlattr *cat_loc; struct nlattr *cat_rem; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5681ce3aebca..c776bcd9f825 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -57,6 +57,7 @@ #include <linux/selinux.h> #include <linux/mutex.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/scm.h> #include <net/netlink.h> @@ -79,7 +80,7 @@ struct netlink_sock { struct netlink_callback *cb; struct mutex *cb_mutex; struct mutex cb_def_mutex; - void (*data_ready)(struct sock *sk, int bytes); + void (*netlink_rcv)(struct sk_buff *skb); struct module *module; }; @@ -88,7 +89,12 @@ struct netlink_sock { static inline struct netlink_sock *nlk_sk(struct sock *sk) { - return (struct netlink_sock *)sk; + return container_of(sk, struct netlink_sock, sk); +} + +static inline int netlink_is_kernel(struct sock *sk) +{ + return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; } struct nl_pid_hash { @@ -121,7 +127,6 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); static int netlink_dump(struct sock *sk); static void netlink_destroy_callback(struct netlink_callback *cb); -static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb); static DEFINE_RWLOCK(nl_table_lock); static atomic_t nl_table_users = ATOMIC_INIT(0); @@ -210,7 +215,7 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } -static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) +static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) { struct nl_pid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; @@ -220,7 +225,7 @@ static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) read_lock(&nl_table_lock); head = nl_pid_hashfn(hash, pid); sk_for_each(sk, node, head) { - if (nlk_sk(sk)->pid == pid) { + if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) { sock_hold(sk); goto found; } @@ -327,7 +332,7 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, u32 pid) +static int netlink_insert(struct sock *sk, struct net *net, u32 pid) { struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; @@ -340,7 +345,7 @@ static int netlink_insert(struct sock *sk, u32 pid) head = nl_pid_hashfn(hash, pid); len = 0; sk_for_each(osk, node, head) { - if (nlk_sk(osk)->pid == pid) + if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid)) break; len++; } @@ -383,15 +388,15 @@ static struct proto netlink_proto = { .obj_size = sizeof(struct netlink_sock), }; -static int __netlink_create(struct socket *sock, struct mutex *cb_mutex, - int protocol) +static int __netlink_create(struct net *net, struct socket *sock, + struct mutex *cb_mutex, int protocol) { struct sock *sk; struct netlink_sock *nlk; sock->ops = &netlink_ops; - sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); + sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); if (!sk) return -ENOMEM; @@ -411,7 +416,7 @@ static int __netlink_create(struct socket *sock, struct mutex *cb_mutex, return 0; } -static int netlink_create(struct socket *sock, int protocol) +static int netlink_create(struct net *net, struct socket *sock, int protocol) { struct module *module = NULL; struct mutex *cb_mutex; @@ -440,7 +445,7 @@ static int netlink_create(struct socket *sock, int protocol) cb_mutex = nl_table[protocol].cb_mutex; netlink_unlock_table(); - if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0) + if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0) goto out_module; nlk = nlk_sk(sock->sk); @@ -477,6 +482,7 @@ static int netlink_release(struct socket *sock) if (nlk->pid && !nlk->subscriptions) { struct netlink_notify n = { + .net = sk->sk_net, .protocol = sk->sk_protocol, .pid = nlk->pid, }; @@ -487,7 +493,7 @@ static int netlink_release(struct socket *sock) module_put(nlk->module); netlink_table_grab(); - if (nlk->flags & NETLINK_KERNEL_SOCKET) { + if (netlink_is_kernel(sk)) { kfree(nl_table[sk->sk_protocol].listeners); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].registered = 0; @@ -505,6 +511,7 @@ static int netlink_release(struct socket *sock) static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; @@ -518,6 +525,8 @@ retry: netlink_table_grab(); head = nl_pid_hashfn(hash, pid); sk_for_each(osk, node, head) { + if ((osk->sk_net != net)) + continue; if (nlk_sk(osk)->pid == pid) { /* Bind collision, search negative pid values. */ pid = rover--; @@ -529,7 +538,7 @@ retry: } netlink_table_ungrab(); - err = netlink_insert(sk, pid); + err = netlink_insert(sk, net, pid); if (err == -EADDRINUSE) goto retry; @@ -594,6 +603,7 @@ static int netlink_realloc_groups(struct sock *sk) static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; @@ -615,7 +625,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len return -EINVAL; } else { err = nladdr->nl_pid ? - netlink_insert(sk, nladdr->nl_pid) : + netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) return err; @@ -698,19 +708,17 @@ static void netlink_overrun(struct sock *sk) static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) { - int protocol = ssk->sk_protocol; struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(protocol, pid); + sock = netlink_lookup(ssk->sk_net, ssk->sk_protocol, pid); if (!sock) return ERR_PTR(-ECONNREFUSED); /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); - if ((nlk->pid == 0 && !nlk->data_ready) || - (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_pid != nlk_sk(ssk)->pid)) { + if (sock->sk_state == NETLINK_CONNECTED && + nlk->dst_pid != nlk_sk(ssk)->pid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); } @@ -754,7 +762,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, test_bit(0, &nlk->state)) { DECLARE_WAITQUEUE(wait, current); if (!timeo) { - if (!ssk || nlk_sk(ssk)->pid == 0) + if (!ssk || netlink_is_kernel(ssk)) netlink_overrun(sk); sock_put(sk); kfree_skb(skb); @@ -783,7 +791,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, return 0; } -int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol) +int netlink_sendskb(struct sock *sk, struct sk_buff *skb) { int len = skb->len; @@ -824,7 +832,34 @@ static inline struct sk_buff *netlink_trim(struct sk_buff *skb, return skb; } -int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock) +static inline void netlink_rcv_wake(struct sock *sk) +{ + struct netlink_sock *nlk = nlk_sk(sk); + + if (skb_queue_empty(&sk->sk_receive_queue)) + clear_bit(0, &nlk->state); + if (!test_bit(0, &nlk->state)) + wake_up_interruptible(&nlk->wait); +} + +static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) +{ + int ret; + struct netlink_sock *nlk = nlk_sk(sk); + + ret = -ECONNREFUSED; + if (nlk->netlink_rcv != NULL) { + ret = skb->len; + skb_set_owner_r(skb, sk); + nlk->netlink_rcv(skb); + } + kfree_skb(skb); + sock_put(sk); + return ret; +} + +int netlink_unicast(struct sock *ssk, struct sk_buff *skb, + u32 pid, int nonblock) { struct sock *sk; int err; @@ -839,13 +874,16 @@ retry: kfree_skb(skb); return PTR_ERR(sk); } + if (netlink_is_kernel(sk)) + return netlink_unicast_kernel(sk, skb); + err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); if (err == 1) goto retry; if (err) return err; - return netlink_sendskb(sk, skb, ssk->sk_protocol); + return netlink_sendskb(sk, skb); } int netlink_has_listeners(struct sock *sk, unsigned int group) @@ -853,7 +891,7 @@ int netlink_has_listeners(struct sock *sk, unsigned int group) int res = 0; unsigned long *listeners; - BUG_ON(!(nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET)); + BUG_ON(!netlink_is_kernel(sk)); rcu_read_lock(); listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); @@ -883,6 +921,7 @@ static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff struct netlink_broadcast_data { struct sock *exclude_sk; + struct net *net; u32 pid; u32 group; int failure; @@ -905,6 +944,9 @@ static inline int do_one_broadcast(struct sock *sk, !test_bit(p->group - 1, nlk->groups)) goto out; + if ((sk->sk_net != p->net)) + goto out; + if (p->failure) { netlink_overrun(sk); goto out; @@ -943,6 +985,7 @@ out: int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, u32 group, gfp_t allocation) { + struct net *net = ssk->sk_net; struct netlink_broadcast_data info; struct hlist_node *node; struct sock *sk; @@ -950,6 +993,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, skb = netlink_trim(skb, allocation); info.exclude_sk = ssk; + info.net = net; info.pid = pid; info.group = group; info.failure = 0; @@ -998,6 +1042,9 @@ static inline int do_one_set_err(struct sock *sk, if (sk == p->exclude_sk) goto out; + if (sk->sk_net != p->exclude_sk->sk_net) + goto out; + if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1129,16 +1176,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); } -static inline void netlink_rcv_wake(struct sock *sk) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (skb_queue_empty(&sk->sk_receive_queue)) - clear_bit(0, &nlk->state); - if (!test_bit(0, &nlk->state)) - wake_up_interruptible(&nlk->wait); -} - static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { @@ -1286,11 +1323,7 @@ out: static void netlink_data_ready(struct sock *sk, int len) { - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->data_ready) - nlk->data_ready(sk, len); - netlink_rcv_wake(sk); + BUG(); } /* @@ -1300,8 +1333,8 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(int unit, unsigned int groups, - void (*input)(struct sock *sk, int len), +netlink_kernel_create(struct net *net, int unit, unsigned int groups, + void (*input)(struct sk_buff *skb), struct mutex *cb_mutex, struct module *module) { struct socket *sock; @@ -1317,7 +1350,7 @@ netlink_kernel_create(int unit, unsigned int groups, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (__netlink_create(sock, cb_mutex, unit) < 0) + if (__netlink_create(net, sock, cb_mutex, unit) < 0) goto out_sock_release; if (groups < 32) @@ -1330,20 +1363,22 @@ netlink_kernel_create(int unit, unsigned int groups, sk = sock->sk; sk->sk_data_ready = netlink_data_ready; if (input) - nlk_sk(sk)->data_ready = input; + nlk_sk(sk)->netlink_rcv = input; - if (netlink_insert(sk, 0)) + if (netlink_insert(sk, net, 0)) goto out_sock_release; nlk = nlk_sk(sk); nlk->flags |= NETLINK_KERNEL_SOCKET; netlink_table_grab(); - nl_table[unit].groups = groups; - nl_table[unit].listeners = listeners; - nl_table[unit].cb_mutex = cb_mutex; - nl_table[unit].module = module; - nl_table[unit].registered = 1; + if (!nl_table[unit].registered) { + nl_table[unit].groups = groups; + nl_table[unit].listeners = listeners; + nl_table[unit].cb_mutex = cb_mutex; + nl_table[unit].module = module; + nl_table[unit].registered = 1; + } netlink_table_ungrab(); return sk; @@ -1509,7 +1544,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1528,12 +1563,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, netlink_dump(sk); sock_put(sk); - - /* We successfully started a dump, by returning -EINTR we - * signal the queue mangement to interrupt processing of - * any netlink messages so userspace gets a chance to read - * the results. */ - return -EINTR; + return 0; } void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) @@ -1551,7 +1581,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) if (!skb) { struct sock *sk; - sk = netlink_lookup(in_skb->sk->sk_protocol, + sk = netlink_lookup(in_skb->sk->sk_net, + in_skb->sk->sk_protocol, NETLINK_CB(in_skb).pid); if (sk) { sk->sk_err = ENOBUFS; @@ -1569,13 +1600,15 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); } -static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, +int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *)) { struct nlmsghdr *nlh; int err; while (skb->len >= nlmsg_total_size(0)) { + int msglen; + nlh = nlmsg_hdr(skb); err = 0; @@ -1591,84 +1624,20 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, goto skip; err = cb(skb, nlh); - if (err == -EINTR) { - /* Not an error, but we interrupt processing */ - netlink_queue_skip(nlh, skb); - return err; - } skip: if (nlh->nlmsg_flags & NLM_F_ACK || err) netlink_ack(skb, nlh, err); - netlink_queue_skip(nlh, skb); + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + skb_pull(skb, msglen); } return 0; } /** - * nelink_run_queue - Process netlink receive queue. - * @sk: Netlink socket containing the queue - * @qlen: Place to store queue length upon entry - * @cb: Callback function invoked for each netlink message found - * - * Processes as much as there was in the queue upon entry and invokes - * a callback function for each netlink message found. The callback - * function may refuse a message by returning a negative error code - * but setting the error pointer to 0 in which case this function - * returns with a qlen != 0. - * - * qlen must be initialized to 0 before the initial entry, afterwards - * the function may be called repeatedly until qlen reaches 0. - * - * The callback function may return -EINTR to signal that processing - * of netlink messages shall be interrupted. In this case the message - * currently being processed will NOT be requeued onto the receive - * queue. - */ -void netlink_run_queue(struct sock *sk, unsigned int *qlen, - int (*cb)(struct sk_buff *, struct nlmsghdr *)) -{ - struct sk_buff *skb; - - if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue)) - *qlen = skb_queue_len(&sk->sk_receive_queue); - - for (; *qlen; (*qlen)--) { - skb = skb_dequeue(&sk->sk_receive_queue); - if (netlink_rcv_skb(skb, cb)) { - if (skb->len) - skb_queue_head(&sk->sk_receive_queue, skb); - else { - kfree_skb(skb); - (*qlen)--; - } - break; - } - - kfree_skb(skb); - } -} - -/** - * netlink_queue_skip - Skip netlink message while processing queue. - * @nlh: Netlink message to be skipped - * @skb: Socket buffer containing the netlink messages. - * - * Pulls the given netlink message off the socket buffer so the next - * call to netlink_queue_run() will not reconsider the message. - */ -static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) -{ - int msglen = NLMSG_ALIGN(nlh->nlmsg_len); - - if (msglen > skb->len) - msglen = skb->len; - - skb_pull(skb, msglen); -} - -/** * nlmsg_notify - send a notification netlink message * @sk: netlink socket to use * @skb: notification message @@ -1702,6 +1671,7 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, #ifdef CONFIG_PROC_FS struct nl_seq_iter { + struct net *net; int link; int hash_idx; }; @@ -1719,6 +1689,8 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { + if (iter->net != s->sk_net) + continue; if (off == pos) { iter->link = i; iter->hash_idx = j; @@ -1748,11 +1720,14 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) return netlink_seq_socket_idx(seq, 0); - s = sk_next(v); + iter = seq->private; + s = v; + do { + s = sk_next(s); + } while (s && (iter->net != s->sk_net)); if (s) return s; - iter = seq->private; i = iter->link; j = iter->hash_idx + 1; @@ -1761,6 +1736,8 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); + while (s && (iter->net != s->sk_net)) + s = sk_next(s); if (s) { iter->link = i; iter->hash_idx = j; @@ -1815,31 +1792,35 @@ static const struct seq_operations netlink_seq_ops = { static int netlink_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; struct nl_seq_iter *iter; - int err; - iter = kzalloc(sizeof(*iter), GFP_KERNEL); + iter = __seq_open_private(file, &netlink_seq_ops, sizeof(*iter)); if (!iter) return -ENOMEM; - err = seq_open(file, &netlink_seq_ops); - if (err) { - kfree(iter); - return err; + iter->net = get_proc_net(inode); + if (!iter->net) { + seq_release_private(inode, file); + return -ENXIO; } - seq = file->private_data; - seq->private = iter; return 0; } +static int netlink_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct nl_seq_iter *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static const struct file_operations netlink_seq_fops = { .owner = THIS_MODULE, .open = netlink_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = netlink_seq_release, }; #endif @@ -1881,11 +1862,32 @@ static struct net_proto_family netlink_family_ops = { .owner = THIS_MODULE, /* for consistency 8) */ }; +static int __net_init netlink_net_init(struct net *net) +{ +#ifdef CONFIG_PROC_FS + if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) + return -ENOMEM; +#endif + return 0; +} + +static void __net_exit netlink_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "netlink"); +#endif +} + +static struct pernet_operations __net_initdata netlink_net_ops = { + .init = netlink_net_init, + .exit = netlink_net_exit, +}; + static int __init netlink_proto_init(void) { struct sk_buff *dummy_skb; int i; - unsigned long max; + unsigned long limit; unsigned int order; int err = proto_register(&netlink_proto, 0); @@ -1899,13 +1901,13 @@ static int __init netlink_proto_init(void) goto panic; if (num_physpages >= (128 * 1024)) - max = num_physpages >> (21 - PAGE_SHIFT); + limit = num_physpages >> (21 - PAGE_SHIFT); else - max = num_physpages >> (23 - PAGE_SHIFT); + limit = num_physpages >> (23 - PAGE_SHIFT); - order = get_bitmask_order(max) - 1 + PAGE_SHIFT; - max = (1UL << order) / sizeof(struct hlist_head); - order = get_bitmask_order(max > UINT_MAX ? UINT_MAX : max) - 1; + order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; + limit = (1UL << order) / sizeof(struct hlist_head); + order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; for (i = 0; i < MAX_LINKS; i++) { struct nl_pid_hash *hash = &nl_table[i].hash; @@ -1926,9 +1928,7 @@ static int __init netlink_proto_init(void) } sock_register(&netlink_family_ops); -#ifdef CONFIG_PROC_FS - proc_net_fops_create("netlink", 0, &netlink_seq_fops); -#endif + register_pernet_subsys(&netlink_net_ops); /* The netlink device handler may be needed early. */ rtnetlink_init(); out: @@ -1940,7 +1940,7 @@ panic: core_initcall(netlink_proto_init); EXPORT_SYMBOL(netlink_ack); -EXPORT_SYMBOL(netlink_run_queue); +EXPORT_SYMBOL(netlink_rcv_skb); EXPORT_SYMBOL(netlink_broadcast); EXPORT_SYMBOL(netlink_dump_start); EXPORT_SYMBOL(netlink_kernel_create); diff --git a/net/netlink/attr.c b/net/netlink/attr.c index e4d7bed99c2e..ec39d12c2423 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -27,12 +27,12 @@ static int validate_nla(struct nlattr *nla, int maxtype, const struct nla_policy *policy) { const struct nla_policy *pt; - int minlen = 0, attrlen = nla_len(nla); + int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); - if (nla->nla_type <= 0 || nla->nla_type > maxtype) + if (type <= 0 || type > maxtype) return 0; - pt = &policy[nla->nla_type]; + pt = &policy[type]; BUG_ON(pt->type > NLA_TYPE_MAX); @@ -149,7 +149,7 @@ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); nla_for_each_attr(nla, head, len, rem) { - u16 type = nla->nla_type; + u16 type = nla_type(nla); if (type > 0 && type <= maxtype) { if (policy) { @@ -185,7 +185,7 @@ struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) int rem; nla_for_each_attr(nla, head, len, rem) - if (nla->nla_type == attrtype) + if (nla_type(nla) == attrtype) return nla; return NULL; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 8c11ca4a2121..150579a21469 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -22,22 +22,14 @@ struct sock *genl_sock = NULL; static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ -static void genl_lock(void) +static inline void genl_lock(void) { mutex_lock(&genl_mutex); } -static int genl_trylock(void) -{ - return !mutex_trylock(&genl_mutex); -} - -static void genl_unlock(void) +static inline void genl_unlock(void) { mutex_unlock(&genl_mutex); - - if (genl_sock && genl_sock->sk_receive_queue.qlen) - genl_sock->sk_data_ready(genl_sock, 0); } #define GENL_FAM_TAB_SIZE 16 @@ -478,16 +470,11 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return ops->doit(skb, &info); } -static void genl_rcv(struct sock *sk, int len) +static void genl_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - if (genl_trylock()) - return; - netlink_run_queue(sk, &qlen, genl_rcv_msg); - genl_unlock(); - } while (qlen && genl_sock && genl_sock->sk_receive_queue.qlen); + genl_lock(); + netlink_rcv_skb(skb, &genl_rcv_msg); + genl_unlock(); } /************************************************************************** @@ -782,8 +769,8 @@ static int __init genl_init(void) netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); /* we'll bump the group number right afterwards */ - genl_sock = netlink_kernel_create(NETLINK_GENERIC, 0, genl_rcv, - NULL, THIS_MODULE); + genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC, 0, + genl_rcv, NULL, THIS_MODULE); if (genl_sock == NULL) panic("GENL: Cannot initialize generic netlink\n"); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index dc9273295a38..3a4d479ea64e 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -27,6 +27,7 @@ #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -105,6 +106,9 @@ static int nr_device_event(struct notifier_block *this, unsigned long event, voi { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_DOWN) return NOTIFY_DONE; @@ -408,15 +412,18 @@ static struct proto nr_proto = { .obj_size = sizeof(struct nr_sock), }; -static int nr_create(struct socket *sock, int protocol) +static int nr_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct nr_sock *nr; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) return -ENOMEM; nr = nr_sk(sk); @@ -458,7 +465,7 @@ static struct sock *nr_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) return NULL; nr = nr_sk(sk); @@ -1447,9 +1454,9 @@ static int __init nr_proto_init(void) nr_loopback_init(); - proc_net_fops_create("nr", S_IRUGO, &nr_info_fops); - proc_net_fops_create("nr_neigh", S_IRUGO, &nr_neigh_fops); - proc_net_fops_create("nr_nodes", S_IRUGO, &nr_nodes_fops); + proc_net_fops_create(&init_net, "nr", S_IRUGO, &nr_info_fops); + proc_net_fops_create(&init_net, "nr_neigh", S_IRUGO, &nr_neigh_fops); + proc_net_fops_create(&init_net, "nr_nodes", S_IRUGO, &nr_nodes_fops); out: return rc; fail: @@ -1477,9 +1484,9 @@ static void __exit nr_exit(void) { int i; - proc_net_remove("nr"); - proc_net_remove("nr_neigh"); - proc_net_remove("nr_nodes"); + proc_net_remove(&init_net, "nr"); + proc_net_remove(&init_net, "nr_neigh"); + proc_net_remove(&init_net, "nr_nodes"); nr_loopback_clear(); nr_rt_free(); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index c7b5d930e732..8c68da5ef0a1 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -95,8 +95,9 @@ static int nr_rebuild_header(struct sk_buff *skb) #endif -static int nr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) +static int nr_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, unsigned len) { unsigned char *buff = skb_push(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN); @@ -193,6 +194,12 @@ static struct net_device_stats *nr_get_stats(struct net_device *dev) return &nr->stats; } +static const struct header_ops nr_header_ops = { + .create = nr_header, + .rebuild= nr_rebuild_header, +}; + + void nr_setup(struct net_device *dev) { dev->mtu = NR_MAX_PACKET_SIZE; @@ -200,11 +207,10 @@ void nr_setup(struct net_device *dev) dev->open = nr_open; dev->stop = nr_close; - dev->hard_header = nr_header; + dev->header_ops = &nr_header_ops; dev->hard_header_len = NR_NETWORK_LEN + NR_TRANSPORT_LEN; dev->addr_len = AX25_ADDR_LEN; dev->type = ARPHRD_NETROM; - dev->rebuild_header = nr_rebuild_header; dev->set_mac_address = nr_set_mac_address; /* New-style flags. */ diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 24fe4a66d297..e943c16552a2 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -580,7 +580,7 @@ static struct net_device *nr_ax25_dev_get(char *devname) { struct net_device *dev; - if ((dev = dev_get_by_name(devname)) == NULL) + if ((dev = dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) @@ -598,7 +598,7 @@ struct net_device *nr_dev_first(void) struct net_device *dev, *first = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; @@ -618,7 +618,7 @@ struct net_device *nr_dev_get(ax25_address *addr) struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1322d62b5d97..e11000a8e950 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -61,6 +61,7 @@ #include <linux/wireless.h> #include <linux/kernel.h> #include <linux/kmod.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> #include <linux/skbuff.h> @@ -251,6 +252,9 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct struct sock *sk; struct sockaddr_pkt *spkt; + if (dev->nd_net != &init_net) + goto out; + /* * When we registered the protocol we saved the socket in the data * field for just this event. @@ -343,7 +347,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, */ saddr->spkt_device[13] = 0; - dev = dev_get_by_name(saddr->spkt_device); + dev = dev_get_by_name(&init_net, saddr->spkt_device); err = -ENODEV; if (dev == NULL) goto out_unlock; @@ -385,7 +389,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, skb_reset_network_header(skb); /* Try to align data part correctly */ - if (dev->hard_header) { + if (dev->header_ops) { skb->data -= dev->hard_header_len; skb->tail -= dev->hard_header_len; if (len < dev->hard_header_len) @@ -451,6 +455,9 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet int skb_len = skb->len; unsigned int snaplen, res; + if (dev->nd_net != &init_net) + goto drop; + if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -459,7 +466,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet skb->dev = dev; - if (dev->hard_header) { + if (dev->header_ops) { /* The device has an explicit notion of ll header, exported to higher levels. @@ -512,10 +519,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; - sll->sll_halen = 0; - if (dev->hard_header_parse) - sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr); + sll->sll_halen = dev_parse_header(skb, sll->sll_addr); PACKET_SKB_CB(skb)->origlen = skb->len; @@ -567,13 +572,16 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe struct sk_buff *copy_skb = NULL; struct timeval tv; + if (dev->nd_net != &init_net) + goto drop; + if (skb->pkt_type == PACKET_LOOPBACK) goto drop; sk = pt->af_packet_priv; po = pkt_sk(sk); - if (dev->hard_header) { + if (dev->header_ops) { if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); else if (skb->pkt_type == PACKET_OUTGOING) { @@ -640,18 +648,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe h->tp_snaplen = snaplen; h->tp_mac = macoff; h->tp_net = netoff; - if (skb->tstamp.tv64 == 0) { - __net_timestamp(skb); - sock_enable_timestamp(sk); - } - tv = ktime_to_timeval(skb->tstamp); + if (skb->tstamp.tv64) + tv = ktime_to_timeval(skb->tstamp); + else + do_gettimeofday(&tv); h->tp_sec = tv.tv_sec; h->tp_usec = tv.tv_usec; sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); - sll->sll_halen = 0; - if (dev->hard_header_parse) - sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr); + sll->sll_halen = dev_parse_header(skb, sll->sll_addr); sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; @@ -733,7 +738,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, } - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); err = -ENXIO; if (dev == NULL) goto out_unlock; @@ -756,16 +761,10 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, skb_reserve(skb, LL_RESERVED_SPACE(dev)); skb_reset_network_header(skb); - if (dev->hard_header) { - int res; - err = -EINVAL; - res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len); - if (sock->type != SOCK_DGRAM) { - skb_reset_tail_pointer(skb); - skb->len = 0; - } else if (res < 0) - goto out_free; - } + err = -EINVAL; + if (sock->type == SOCK_DGRAM && + dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0) + goto out_free; /* Returns -EFAULT on error */ err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); @@ -928,7 +927,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int add return -EINVAL; strlcpy(name,uaddr->sa_data,sizeof(name)); - dev = dev_get_by_name(name); + dev = dev_get_by_name(&init_net, name); if (dev) { err = packet_do_bind(sk, dev, pkt_sk(sk)->num); dev_put(dev); @@ -955,7 +954,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (sll->sll_ifindex) { err = -ENODEV; - dev = dev_get_by_index(sll->sll_ifindex); + dev = dev_get_by_index(&init_net, sll->sll_ifindex); if (dev == NULL) goto out; } @@ -977,13 +976,16 @@ static struct proto packet_proto = { * Create a packet of type SOCK_PACKET. */ -static int packet_create(struct socket *sock, int protocol) +static int packet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct packet_sock *po; __be16 proto = (__force __be16)protocol; /* weird, but documented */ int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (!capable(CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && @@ -993,7 +995,7 @@ static int packet_create(struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1); + sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, 1); if (sk == NULL) goto out; @@ -1149,7 +1151,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - dev = dev_get_by_index(pkt_sk(sk)->ifindex); + dev = dev_get_by_index(&init_net, pkt_sk(sk)->ifindex); if (dev) { strlcpy(uaddr->sa_data, dev->name, 15); dev_put(dev); @@ -1174,7 +1176,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; sll->sll_protocol = po->num; - dev = dev_get_by_index(po->ifindex); + dev = dev_get_by_index(&init_net, po->ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@ -1226,7 +1228,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) rtnl_lock(); err = -ENODEV; - dev = __dev_get_by_index(mreq->mr_ifindex); + dev = __dev_get_by_index(&init_net, mreq->mr_ifindex); if (!dev) goto done; @@ -1280,7 +1282,7 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) if (--ml->count == 0) { struct net_device *dev; *mlp = ml->next; - dev = dev_get_by_index(ml->ifindex); + dev = dev_get_by_index(&init_net, ml->ifindex); if (dev) { packet_dev_mc(dev, ml, -1); dev_put(dev); @@ -1308,7 +1310,7 @@ static void packet_flush_mclist(struct sock *sk) struct net_device *dev; po->mclist = ml->next; - if ((dev = dev_get_by_index(ml->ifindex)) != NULL) { + if ((dev = dev_get_by_index(&init_net, ml->ifindex)) != NULL) { packet_dev_mc(dev, ml, -1); dev_put(dev); } @@ -1465,6 +1467,9 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void struct hlist_node *node; struct net_device *dev = data; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + read_lock(&packet_sklist_lock); sk_for_each(sk, node, &packet_sklist) { struct packet_sock *po = pkt_sk(sk); @@ -1952,7 +1957,7 @@ static const struct file_operations packet_seq_fops = { static void __exit packet_exit(void) { - proc_net_remove("packet"); + proc_net_remove(&init_net, "packet"); unregister_netdevice_notifier(&packet_netdev_notifier); sock_unregister(PF_PACKET); proto_unregister(&packet_proto); @@ -1967,7 +1972,7 @@ static int __init packet_init(void) sock_register(&packet_family_ops); register_netdevice_notifier(&packet_netdev_notifier); - proc_net_fops_create("packet", 0, &packet_seq_fops); + proc_net_fops_create(&init_net, "packet", 0, &packet_seq_fops); out: return rc; } diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index 8b31759ee8b0..7f807b30cfbb 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -5,7 +5,7 @@ menuconfig RFKILL tristate "RF switch subsystem support" help Say Y here if you want to have control over RF switches - found on many WiFi, Bluetooth and IRDA cards. + found on many WiFi and Bluetooth cards. To compile this driver as a module, choose M here: the module will be called rfkill. @@ -22,3 +22,10 @@ config RFKILL_INPUT To compile this driver as a module, choose M here: the module will be called rfkill-input. + +# LED trigger support +config RFKILL_LEDS + bool + depends on RFKILL && LEDS_TRIGGERS + default y + diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c index 9f746be58854..eaabf087c59b 100644 --- a/net/rfkill/rfkill-input.c +++ b/net/rfkill/rfkill-input.c @@ -17,6 +17,8 @@ #include <linux/init.h> #include <linux/rfkill.h> +#include "rfkill-input.h" + MODULE_AUTHOR("Dmitry Torokhov <dtor@mail.ru>"); MODULE_DESCRIPTION("Input layer to RF switch connector"); MODULE_LICENSE("GPL"); @@ -81,6 +83,7 @@ static void rfkill_schedule_toggle(struct rfkill_task *task) static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN); static DEFINE_RFKILL_TASK(rfkill_bt, RFKILL_TYPE_BLUETOOTH); +static DEFINE_RFKILL_TASK(rfkill_uwb, RFKILL_TYPE_UWB); static void rfkill_event(struct input_handle *handle, unsigned int type, unsigned int code, int down) @@ -93,6 +96,9 @@ static void rfkill_event(struct input_handle *handle, unsigned int type, case KEY_BLUETOOTH: rfkill_schedule_toggle(&rfkill_bt); break; + case KEY_UWB: + rfkill_schedule_toggle(&rfkill_uwb); + break; default: break; } @@ -148,6 +154,11 @@ static const struct input_device_id rfkill_ids[] = { .evbit = { BIT(EV_KEY) }, .keybit = { [LONG(KEY_BLUETOOTH)] = BIT(KEY_BLUETOOTH) }, }, + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, + .evbit = { BIT(EV_KEY) }, + .keybit = { [LONG(KEY_UWB)] = BIT(KEY_UWB) }, + }, { } }; diff --git a/net/rfkill/rfkill-input.h b/net/rfkill/rfkill-input.h new file mode 100644 index 000000000000..4dae5006fc77 --- /dev/null +++ b/net/rfkill/rfkill-input.h @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2007 Ivo van Doorn + */ + +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#ifndef __RFKILL_INPUT_H +#define __RFKILL_INPUT_H + +void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state); + +#endif /* __RFKILL_INPUT_H */ diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index db3395bfbcfa..51d151c0e962 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006 Ivo van Doorn + * Copyright (C) 2006 - 2007 Ivo van Doorn * Copyright (C) 2007 Dmitry Torokhov * * This program is free software; you can redistribute it and/or modify @@ -37,6 +37,22 @@ static DEFINE_MUTEX(rfkill_mutex); static enum rfkill_state rfkill_states[RFKILL_TYPE_MAX]; + +static void rfkill_led_trigger(struct rfkill *rfkill, + enum rfkill_state state) +{ +#ifdef CONFIG_RFKILL_LEDS + struct led_trigger *led = &rfkill->led_trigger; + + if (!led->name) + return; + if (state == RFKILL_STATE_OFF) + led_trigger_event(led, LED_OFF); + else + led_trigger_event(led, LED_FULL); +#endif /* CONFIG_RFKILL_LEDS */ +} + static int rfkill_toggle_radio(struct rfkill *rfkill, enum rfkill_state state) { @@ -48,8 +64,10 @@ static int rfkill_toggle_radio(struct rfkill *rfkill, if (state != rfkill->state) { retval = rfkill->toggle_radio(rfkill->data, state); - if (!retval) + if (!retval) { rfkill->state = state; + rfkill_led_trigger(rfkill, state); + } } mutex_unlock(&rfkill->mutex); @@ -106,8 +124,8 @@ static ssize_t rfkill_type_show(struct device *dev, case RFKILL_TYPE_BLUETOOTH: type = "bluetooth"; break; - case RFKILL_TYPE_IRDA: - type = "irda"; + case RFKILL_TYPE_UWB: + type = "ultrawideband"; break; default: BUG(); @@ -172,6 +190,10 @@ static ssize_t rfkill_claim_store(struct device *dev, if (error) return error; + if (rfkill->user_claim_unsupported) { + error = -EOPNOTSUPP; + goto out_unlock; + } if (rfkill->user_claim != claim) { if (!claim) rfkill_toggle_radio(rfkill, @@ -179,9 +201,10 @@ static ssize_t rfkill_claim_store(struct device *dev, rfkill->user_claim = claim; } +out_unlock: mutex_unlock(&rfkill_mutex); - return count; + return error ? error : count; } static struct device_attribute rfkill_dev_attrs[] = { @@ -281,7 +304,7 @@ static void rfkill_remove_switch(struct rfkill *rfkill) /** * rfkill_allocate - allocate memory for rfkill structure. * @parent: device that has rf switch on it - * @type: type of the switch (wlan, bluetooth, irda) + * @type: type of the switch (RFKILL_TYPE_*) * * This function should be called by the network driver when it needs * rfkill structure. Once the structure is allocated the driver shoud @@ -328,6 +351,26 @@ void rfkill_free(struct rfkill *rfkill) } EXPORT_SYMBOL(rfkill_free); +static void rfkill_led_trigger_register(struct rfkill *rfkill) +{ +#ifdef CONFIG_RFKILL_LEDS + int error; + + rfkill->led_trigger.name = rfkill->dev.bus_id; + error = led_trigger_register(&rfkill->led_trigger); + if (error) + rfkill->led_trigger.name = NULL; +#endif /* CONFIG_RFKILL_LEDS */ +} + +static void rfkill_led_trigger_unregister(struct rfkill *rfkill) +{ +#ifdef CONFIG_RFKILL_LEDS + if (rfkill->led_trigger.name) + led_trigger_unregister(&rfkill->led_trigger); +#endif +} + /** * rfkill_register - Register a rfkill structure. * @rfkill: rfkill structure to be registered @@ -357,6 +400,7 @@ int rfkill_register(struct rfkill *rfkill) rfkill_remove_switch(rfkill); return error; } + rfkill_led_trigger_register(rfkill); return 0; } @@ -372,6 +416,7 @@ EXPORT_SYMBOL(rfkill_register); */ void rfkill_unregister(struct rfkill *rfkill) { + rfkill_led_trigger_unregister(rfkill); device_del(&rfkill->dev); rfkill_remove_switch(rfkill); put_device(&rfkill->dev); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 976c3cc86a29..509defe53ee5 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -26,6 +26,7 @@ #include <linux/sockios.h> #include <linux/net.h> #include <linux/stat.h> +#include <net/net_namespace.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> @@ -196,6 +197,9 @@ static int rose_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_DOWN) return NOTIFY_DONE; @@ -498,15 +502,18 @@ static struct proto rose_proto = { .obj_size = sizeof(struct rose_sock), }; -static int rose_create(struct socket *sock, int protocol) +static int rose_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct rose_sock *rose; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) return -ENOMEM; rose = rose_sk(sk); @@ -544,7 +551,7 @@ static struct sock *rose_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) return NULL; rose = rose_sk(sk); @@ -1576,10 +1583,10 @@ static int __init rose_proto_init(void) rose_add_loopback_neigh(); - proc_net_fops_create("rose", S_IRUGO, &rose_info_fops); - proc_net_fops_create("rose_neigh", S_IRUGO, &rose_neigh_fops); - proc_net_fops_create("rose_nodes", S_IRUGO, &rose_nodes_fops); - proc_net_fops_create("rose_routes", S_IRUGO, &rose_routes_fops); + proc_net_fops_create(&init_net, "rose", S_IRUGO, &rose_info_fops); + proc_net_fops_create(&init_net, "rose_neigh", S_IRUGO, &rose_neigh_fops); + proc_net_fops_create(&init_net, "rose_nodes", S_IRUGO, &rose_nodes_fops); + proc_net_fops_create(&init_net, "rose_routes", S_IRUGO, &rose_routes_fops); out: return rc; fail: @@ -1606,10 +1613,10 @@ static void __exit rose_exit(void) { int i; - proc_net_remove("rose"); - proc_net_remove("rose_neigh"); - proc_net_remove("rose_nodes"); - proc_net_remove("rose_routes"); + proc_net_remove(&init_net, "rose"); + proc_net_remove(&init_net, "rose_neigh"); + proc_net_remove(&init_net, "rose_nodes"); + proc_net_remove(&init_net, "rose_routes"); rose_loopback_clear(); rose_rt_free(); diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 8d88795dc663..1b6741f1d746 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -35,8 +35,9 @@ #include <net/ax25.h> #include <net/rose.h> -static int rose_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) +static int rose_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, unsigned len) { unsigned char *buff = skb_push(skb, ROSE_MIN_LEN + 2); @@ -148,6 +149,11 @@ static struct net_device_stats *rose_get_stats(struct net_device *dev) return netdev_priv(dev); } +static const struct header_ops rose_header_ops = { + .create = rose_header, + .rebuild= rose_rebuild_header, +}; + void rose_setup(struct net_device *dev) { dev->mtu = ROSE_MAX_PACKET_SIZE - 2; @@ -155,11 +161,10 @@ void rose_setup(struct net_device *dev) dev->open = rose_open; dev->stop = rose_close; - dev->hard_header = rose_header; + dev->header_ops = &rose_header_ops; dev->hard_header_len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN; dev->addr_len = ROSE_ADDR_LEN; dev->type = ARPHRD_ROSE; - dev->rebuild_header = rose_rebuild_header; dev->set_mac_address = rose_set_mac_address; /* New-style flags. */ diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 96f61a71b252..540c0f26ffee 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -583,7 +583,7 @@ static struct net_device *rose_ax25_dev_get(char *devname) { struct net_device *dev; - if ((dev = dev_get_by_name(devname)) == NULL) + if ((dev = dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) @@ -601,7 +601,7 @@ struct net_device *rose_dev_first(void) struct net_device *dev, *first = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; @@ -619,7 +619,7 @@ struct net_device *rose_dev_get(rose_address *addr) struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; @@ -636,7 +636,7 @@ static int rose_dev_exists(rose_address *addr) struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) goto out; } diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c58fa0d1be26..0803f305ed08 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -14,6 +14,7 @@ #include <linux/skbuff.h> #include <linux/poll.h> #include <linux/proc_fs.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" @@ -605,13 +606,16 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock, /* * create an RxRPC socket */ -static int rxrpc_create(struct socket *sock, int protocol) +static int rxrpc_create(struct net *net, struct socket *sock, int protocol) { struct rxrpc_sock *rx; struct sock *sk; _enter("%p,%d", sock, protocol); + if (net != &init_net) + return -EAFNOSUPPORT; + /* we support transport protocol UDP only */ if (protocol != PF_INET) return -EPROTONOSUPPORT; @@ -622,7 +626,7 @@ static int rxrpc_create(struct socket *sock, int protocol) sock->ops = &rxrpc_rpc_ops; sock->state = SS_UNCONNECTED; - sk = sk_alloc(PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1); + sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1); if (!sk) return -ENOMEM; @@ -829,8 +833,8 @@ static int __init af_rxrpc_init(void) } #ifdef CONFIG_PROC_FS - proc_net_fops_create("rxrpc_calls", 0, &rxrpc_call_seq_fops); - proc_net_fops_create("rxrpc_conns", 0, &rxrpc_connection_seq_fops); + proc_net_fops_create(&init_net, "rxrpc_calls", 0, &rxrpc_call_seq_fops); + proc_net_fops_create(&init_net, "rxrpc_conns", 0, &rxrpc_connection_seq_fops); #endif return 0; @@ -868,8 +872,8 @@ static void __exit af_rxrpc_exit(void) _debug("flush scheduled work"); flush_workqueue(rxrpc_workqueue); - proc_net_remove("rxrpc_conns"); - proc_net_remove("rxrpc_calls"); + proc_net_remove(&init_net, "rxrpc_conns"); + proc_net_remove(&init_net, "rxrpc_calls"); destroy_workqueue(rxrpc_workqueue); kmem_cache_destroy(rxrpc_call_jar); _leave(""); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 8a74cac0be8c..92435a882fac 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -447,6 +447,17 @@ config NET_ACT_IPT To compile this code as a module, choose M here: the module will be called ipt. +config NET_ACT_NAT + tristate "Stateless NAT" + depends on NET_CLS_ACT + select NETFILTER + ---help--- + Say Y here to do stateless NAT on IPv4 packets. You should use + netfilter for NAT unless you know what you are doing. + + To compile this code as a module, choose M here: the + module will be called nat. + config NET_ACT_PEDIT tristate "Packet Editing" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index b67c36f65cf2..81ecbe8e7dce 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_NET_ACT_POLICE) += act_police.o obj-$(CONFIG_NET_ACT_GACT) += act_gact.o obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o +obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 579578944ae7..fd7bca4d5c20 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -20,6 +20,7 @@ #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_mirred.h> @@ -73,7 +74,7 @@ static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est, parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]); if (parm->ifindex) { - dev = __dev_get_by_index(parm->ifindex); + dev = __dev_get_by_index(&init_net, parm->ifindex); if (dev == NULL) return -ENODEV; switch (dev->type) { diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c new file mode 100644 index 000000000000..c96273bcaf9c --- /dev/null +++ b/net/sched/act_nat.c @@ -0,0 +1,322 @@ +/* + * Stateless NAT actions + * + * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/rtnetlink.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/tc_act/tc_nat.h> +#include <net/act_api.h> +#include <net/icmp.h> +#include <net/ip.h> +#include <net/netlink.h> +#include <net/tc_act/tc_nat.h> +#include <net/tcp.h> +#include <net/udp.h> + + +#define NAT_TAB_MASK 15 +static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1]; +static u32 nat_idx_gen; +static DEFINE_RWLOCK(nat_lock); + +static struct tcf_hashinfo nat_hash_info = { + .htab = tcf_nat_ht, + .hmask = NAT_TAB_MASK, + .lock = &nat_lock, +}; + +static int tcf_nat_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct rtattr *tb[TCA_NAT_MAX]; + struct tc_nat *parm; + int ret = 0; + struct tcf_nat *p; + struct tcf_common *pc; + + if (rta == NULL || rtattr_parse_nested(tb, TCA_NAT_MAX, rta) < 0) + return -EINVAL; + + if (tb[TCA_NAT_PARMS - 1] == NULL || + RTA_PAYLOAD(tb[TCA_NAT_PARMS - 1]) < sizeof(*parm)) + return -EINVAL; + parm = RTA_DATA(tb[TCA_NAT_PARMS - 1]); + + pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, + &nat_idx_gen, &nat_hash_info); + if (unlikely(!pc)) + return -ENOMEM; + p = to_tcf_nat(pc); + ret = ACT_P_CREATED; + } else { + p = to_tcf_nat(pc); + if (!ovr) { + tcf_hash_release(pc, bind, &nat_hash_info); + return -EEXIST; + } + } + + spin_lock_bh(&p->tcf_lock); + p->old_addr = parm->old_addr; + p->new_addr = parm->new_addr; + p->mask = parm->mask; + p->flags = parm->flags; + + p->tcf_action = parm->action; + spin_unlock_bh(&p->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &nat_hash_info); + + return ret; +} + +static int tcf_nat_cleanup(struct tc_action *a, int bind) +{ + struct tcf_nat *p = a->priv; + + return tcf_hash_release(&p->common, bind, &nat_hash_info); +} + +static int tcf_nat(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_nat *p = a->priv; + struct iphdr *iph; + __be32 old_addr; + __be32 new_addr; + __be32 mask; + __be32 addr; + int egress; + int action; + int ihl; + + spin_lock(&p->tcf_lock); + + p->tcf_tm.lastuse = jiffies; + old_addr = p->old_addr; + new_addr = p->new_addr; + mask = p->mask; + egress = p->flags & TCA_NAT_FLAG_EGRESS; + action = p->tcf_action; + + p->tcf_bstats.bytes += skb->len; + p->tcf_bstats.packets++; + + spin_unlock(&p->tcf_lock); + + if (unlikely(action == TC_ACT_SHOT)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(*iph))) + goto drop; + + iph = ip_hdr(skb); + + if (egress) + addr = iph->saddr; + else + addr = iph->daddr; + + if (!((old_addr ^ addr) & mask)) { + if (skb_cloned(skb) && + !skb_clone_writable(skb, sizeof(*iph)) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto drop; + + new_addr &= mask; + new_addr |= addr & ~mask; + + /* Rewrite IP header */ + iph = ip_hdr(skb); + if (egress) + iph->saddr = new_addr; + else + iph->daddr = new_addr; + + nf_csum_replace4(&iph->check, addr, new_addr); + } + + ihl = iph->ihl * 4; + + /* It would be nice to share code with stateful NAT. */ + switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { + case IPPROTO_TCP: + { + struct tcphdr *tcph; + + if (!pskb_may_pull(skb, ihl + sizeof(*tcph)) || + (skb_cloned(skb) && + !skb_clone_writable(skb, ihl + sizeof(*tcph)) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + goto drop; + + tcph = (void *)(skb_network_header(skb) + ihl); + nf_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); + break; + } + case IPPROTO_UDP: + { + struct udphdr *udph; + + if (!pskb_may_pull(skb, ihl + sizeof(*udph)) || + (skb_cloned(skb) && + !skb_clone_writable(skb, ihl + sizeof(*udph)) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + goto drop; + + udph = (void *)(skb_network_header(skb) + ihl); + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { + nf_proto_csum_replace4(&udph->check, skb, addr, + new_addr, 1); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + break; + } + case IPPROTO_ICMP: + { + struct icmphdr *icmph; + + if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) + goto drop; + + icmph = (void *)(skb_network_header(skb) + ihl); + + if ((icmph->type != ICMP_DEST_UNREACH) && + (icmph->type != ICMP_TIME_EXCEEDED) && + (icmph->type != ICMP_PARAMETERPROB)) + break; + + iph = (void *)(icmph + 1); + if (egress) + addr = iph->daddr; + else + addr = iph->saddr; + + if ((old_addr ^ addr) & mask) + break; + + if (skb_cloned(skb) && + !skb_clone_writable(skb, + ihl + sizeof(*icmph) + sizeof(*iph)) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto drop; + + icmph = (void *)(skb_network_header(skb) + ihl); + iph = (void *)(icmph + 1); + + new_addr &= mask; + new_addr |= addr & ~mask; + + /* XXX Fix up the inner checksums. */ + if (egress) + iph->daddr = new_addr; + else + iph->saddr = new_addr; + + nf_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, + 1); + break; + } + default: + break; + } + + return action; + +drop: + spin_lock(&p->tcf_lock); + p->tcf_qstats.drops++; + spin_unlock(&p->tcf_lock); + return TC_ACT_SHOT; +} + +static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_nat *p = a->priv; + struct tc_nat *opt; + struct tcf_t t; + int s; + + s = sizeof(*opt); + + /* netlink spinlocks held above us - must use ATOMIC */ + opt = kzalloc(s, GFP_ATOMIC); + if (unlikely(!opt)) + return -ENOBUFS; + + opt->old_addr = p->old_addr; + opt->new_addr = p->new_addr; + opt->mask = p->mask; + opt->flags = p->flags; + + opt->index = p->tcf_index; + opt->action = p->tcf_action; + opt->refcnt = p->tcf_refcnt - ref; + opt->bindcnt = p->tcf_bindcnt - bind; + + RTA_PUT(skb, TCA_NAT_PARMS, s, opt); + t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(p->tcf_tm.expires); + RTA_PUT(skb, TCA_NAT_TM, sizeof(t), &t); + + kfree(opt); + + return skb->len; + +rtattr_failure: + nlmsg_trim(skb, b); + kfree(opt); + return -1; +} + +static struct tc_action_ops act_nat_ops = { + .kind = "nat", + .hinfo = &nat_hash_info, + .type = TCA_ACT_NAT, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_nat, + .dump = tcf_nat_dump, + .cleanup = tcf_nat_cleanup, + .lookup = tcf_hash_search, + .init = tcf_nat_init, + .walk = tcf_generic_walker +}; + +MODULE_DESCRIPTION("Stateless NAT actions"); +MODULE_LICENSE("GPL"); + +static int __init nat_init_module(void) +{ + return tcf_register_action(&act_nat_ops); +} + +static void __exit nat_cleanup_module(void) +{ + tcf_unregister_action(&act_nat_ops); +} + +module_init(nat_init_module); +module_exit(nat_cleanup_module); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 17f6f27e28a2..a73e3e6d87ea 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -21,8 +21,8 @@ #include <net/act_api.h> #include <net/netlink.h> -#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log]) -#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log]) +#define L2T(p,L) qdisc_l2t((p)->tcfp_R_tab, L) +#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L) #define POL_TAB_MASK 15 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5f0fbca7393f..03657976fd50 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -154,7 +154,7 @@ replay: /* Find head of filter chain. */ /* Find link */ - if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, t->tcm_ifindex)) == NULL) return -ENODEV; /* Find qdisc */ @@ -387,7 +387,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return skb->len; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; if (!tcm->tcm_parent) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d4d5d2f271d2..9e98c6e567dd 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -592,7 +592,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, } else handle = gen_new_kid(ht, htid); - if (tb[TCA_U32_SEL-1] == 0 || + if (tb[TCA_U32_SEL-1] == NULL || RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel)) return -EINVAL; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 650f09c8bd6a..e9989610712c 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -291,7 +291,7 @@ META_COLLECTOR(var_sk_bound_if) } else { struct net_device *dev; - dev = dev_get_by_index(skb->sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, skb->sk->sk_bound_dev_if); *err = var_dev(dev, dst); if (dev) dev_put(dev); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index dee0d5fb39c5..8ae137e3522b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -28,6 +28,7 @@ #include <linux/list.h> #include <linux/hrtimer.h> +#include <net/net_namespace.h> #include <net/netlink.h> #include <net/pkt_sched.h> @@ -606,7 +607,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *p = NULL; int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; if (clid) { @@ -673,7 +674,7 @@ replay: clid = tcm->tcm_parent; q = p = NULL; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; if (clid) { @@ -880,7 +881,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = q_idx = cb->args[1]; read_lock(&dev_base_lock); idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -931,7 +932,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 qid = TC_H_MAJ(clid); int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; /* @@ -1114,7 +1115,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return 0; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return 0; s_t = cb->args[0]; @@ -1225,10 +1226,13 @@ EXPORT_SYMBOL(tcf_destroy_chain); #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { + struct timespec ts; + + hrtimer_get_res(CLOCK_MONOTONIC, &ts); seq_printf(seq, "%08x %08x %08x %08x\n", (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1), 1000000, - (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES)); + (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts))); return 0; } @@ -1251,7 +1255,7 @@ static int __init pktsched_init(void) { register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); - proc_net_fops_create("psched", 0, &psched_fops); + proc_net_fops_create(&init_net, "psched", 0, &psched_fops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index cbef3bbfc20f..4de3744e65c3 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -175,7 +175,7 @@ struct cbq_sched_data }; -#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log]) +#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len) static __inline__ unsigned cbq_hash(u32 h) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index c81649cf0b9e..95ae11956f35 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -134,34 +134,19 @@ static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; struct sk_buff *skb; - unsigned lockless; int ret; /* Dequeue packet */ if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) return 0; - /* - * When the driver has LLTX set, it does its own locking in - * start_xmit. These checks are worth it because even uncongested - * locks can be quite expensive. The driver can do a trylock, as - * is being done here; in case of lock contention it should return - * NETDEV_TX_LOCKED and the packet will be requeued. - */ - lockless = (dev->features & NETIF_F_LLTX); - - if (!lockless && !netif_tx_trylock(dev)) { - /* Another CPU grabbed the driver tx lock */ - return handle_dev_cpu_collision(skb, dev, q); - } /* And release queue */ spin_unlock(&dev->queue_lock); + HARD_TX_LOCK(dev, smp_processor_id()); ret = dev_hard_start_xmit(skb, dev); - - if (!lockless) - netif_tx_unlock(dev); + HARD_TX_UNLOCK(dev); spin_lock(&dev->queue_lock); q = dev->qdisc; @@ -256,6 +241,12 @@ static void dev_watchdog_down(struct net_device *dev) netif_tx_unlock_bh(dev); } +/** + * netif_carrier_on - set carrier + * @dev: network device + * + * Device has detected that carrier. + */ void netif_carrier_on(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) @@ -264,6 +255,12 @@ void netif_carrier_on(struct net_device *dev) __netdev_watchdog_up(dev); } +/** + * netif_carrier_off - clear carrier + * @dev: network device + * + * Device has detected loss of carrier. + */ void netif_carrier_off(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 246a2f9765f1..5e608a64935a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -132,10 +132,8 @@ struct htb_class { static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, int size) { - int slot = size >> rate->rate.cell_log; - if (slot > 255) - return (rate->data[255]*(slot >> 8) + rate->data[slot & 0xFF]); - return rate->data[slot]; + long result = qdisc_l2t(rate, size); + return result; } struct htb_sched { diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 8c2639af4c6a..b0d81098b0ee 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -115,8 +115,8 @@ struct tbf_sched_data struct qdisc_watchdog watchdog; /* Watchdog timer */ }; -#define L2T(q,L) ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log]) -#define L2T_P(q,L) ((q)->P_tab->data[(L)>>(q)->P_tab->rate.cell_log]) +#define L2T(q,L) qdisc_l2t((q)->R_tab,L) +#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L) static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) { diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 0968184ea6be..be57cf317a7f 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -232,9 +232,12 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * } if (neigh_event_send(n, skb_res) == 0) { int err; + read_lock(&n->lock); - err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + n->ha, NULL, skb->len); read_unlock(&n->lock); + if (err < 0) { neigh_release(n); return -EINVAL; @@ -246,10 +249,10 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * return (skb_res == NULL) ? -EAGAIN : 1; } -static __inline__ int -teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) +static inline int teql_resolve(struct sk_buff *skb, + struct sk_buff *skb_res, struct net_device *dev) { - if (dev->hard_header == NULL || + if (dev->header_ops == NULL || skb->dst == NULL || skb->dst->neighbour == NULL) return 0; @@ -432,7 +435,6 @@ static __init void teql_master_setup(struct net_device *dev) dev->tx_queue_len = 100; dev->flags = IFF_NOARP; dev->hard_header_len = LL_MAX_HEADER; - SET_MODULE_OWNER(dev); } static LIST_HEAD(master_dev_list); diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 70c828bbe444..1da7204d9b42 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -9,7 +9,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ transport.o chunk.o sm_make_chunk.o ulpevent.o \ inqueue.o outqueue.o ulpqueue.o command.o \ tsnmap.o bind_addr.o socket.o primitive.o \ - output.o input.o debug.o ssnmap.o proc.o crc32c.o + output.o input.o debug.o ssnmap.o proc.o crc32c.o \ + auth.o sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o sctp-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 9bad8ba0feda..03158e3665da 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -74,6 +74,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a { struct sctp_sock *sp; int i; + sctp_paramhdr_t *p; + int err; /* Retrieve the SCTP per socket area. */ sp = sctp_sk((struct sock *)sk); @@ -298,6 +300,30 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->default_timetolive = sp->default_timetolive; asoc->default_rcv_context = sp->default_rcv_context; + /* AUTH related initializations */ + INIT_LIST_HEAD(&asoc->endpoint_shared_keys); + err = sctp_auth_asoc_copy_shkeys(ep, asoc, gfp); + if (err) + goto fail_init; + + asoc->active_key_id = ep->active_key_id; + asoc->asoc_shared_key = NULL; + + asoc->default_hmac_id = 0; + /* Save the hmacs and chunks list into this association */ + if (ep->auth_hmacs_list) + memcpy(asoc->c.auth_hmacs, ep->auth_hmacs_list, + ntohs(ep->auth_hmacs_list->param_hdr.length)); + if (ep->auth_chunk_list) + memcpy(asoc->c.auth_chunks, ep->auth_chunk_list, + ntohs(ep->auth_chunk_list->param_hdr.length)); + + /* Get the AUTH random number for this association */ + p = (sctp_paramhdr_t *)asoc->c.auth_random; + p->type = SCTP_PARAM_RANDOM; + p->length = htons(sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH); + get_random_bytes(p+1, SCTP_AUTH_RANDOM_LENGTH); + return asoc; fail_init: @@ -389,6 +415,9 @@ void sctp_association_free(struct sctp_association *asoc) /* Free peer's cached cookie. */ kfree(asoc->peer.cookie); + kfree(asoc->peer.peer_random); + kfree(asoc->peer.peer_chunks); + kfree(asoc->peer.peer_hmacs); /* Release the transport structures. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { @@ -407,6 +436,12 @@ void sctp_association_free(struct sctp_association *asoc) if (asoc->addip_last_asconf) sctp_chunk_free(asoc->addip_last_asconf); + /* AUTH - Free the endpoint shared keys */ + sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); + + /* AUTH - Free the association shared key */ + sctp_auth_key_put(asoc->asoc_shared_key); + sctp_association_put(asoc); } @@ -976,6 +1011,16 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) state = asoc->state; subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type); + /* SCTP-AUTH, Section 6.3: + * The receiver has a list of chunk types which it expects + * to be received only after an AUTH-chunk. This list has + * been sent to the peer during the association setup. It + * MUST silently discard these chunks if they are not placed + * after an AUTH chunk in the packet. + */ + if (sctp_auth_recv_cid(subtype.chunk, asoc) && !chunk->auth) + continue; + /* Remember where the last DATA chunk came from so we * know where to send the SACK. */ @@ -1112,6 +1157,24 @@ void sctp_assoc_update(struct sctp_association *asoc, sctp_assoc_set_id(asoc, GFP_ATOMIC); } } + + /* SCTP-AUTH: Save the peer parameters from the new assocaitions + * and also move the association shared keys over + */ + kfree(asoc->peer.peer_random); + asoc->peer.peer_random = new->peer.peer_random; + new->peer.peer_random = NULL; + + kfree(asoc->peer.peer_chunks); + asoc->peer.peer_chunks = new->peer.peer_chunks; + new->peer.peer_chunks = NULL; + + kfree(asoc->peer.peer_hmacs); + asoc->peer.peer_hmacs = new->peer.peer_hmacs; + new->peer.peer_hmacs = NULL; + + sctp_auth_key_put(asoc->asoc_shared_key); + sctp_auth_asoc_init_active_key(asoc, GFP_ATOMIC); } /* Update the retran path for sending a retransmitted packet. diff --git a/net/sctp/auth.c b/net/sctp/auth.c new file mode 100644 index 000000000000..781810724714 --- /dev/null +++ b/net/sctp/auth.c @@ -0,0 +1,938 @@ +/* SCTP kernel reference Implementation + * (C) Copyright 2007 Hewlett-Packard Development Company, L.P. + * + * This file is part of the SCTP kernel reference Implementation + * + * The SCTP reference implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * The SCTP reference implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Please send any bug reports or fixes you make to the + * email address(es): + * lksctp developers <lksctp-developers@lists.sourceforge.net> + * + * Or submit a bug report through the following website: + * http://www.sf.net/projects/lksctp + * + * Written or modified by: + * Vlad Yasevich <vladislav.yasevich@hp.com> + * + * Any bugs reported given to us we will try to fix... any fixes shared will + * be incorporated into the next SCTP release. + */ + +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/scatterlist.h> +#include <net/sctp/sctp.h> +#include <net/sctp/auth.h> + +static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { + { + /* id 0 is reserved. as all 0 */ + .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_0, + }, + { + .hmac_id = SCTP_AUTH_HMAC_ID_SHA1, + .hmac_name="hmac(sha1)", + .hmac_len = SCTP_SHA1_SIG_SIZE, + }, + { + /* id 2 is reserved as well */ + .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2, + }, + { + .hmac_id = SCTP_AUTH_HMAC_ID_SHA256, + .hmac_name="hmac(sha256)", + .hmac_len = SCTP_SHA256_SIG_SIZE, + } +}; + + +void sctp_auth_key_put(struct sctp_auth_bytes *key) +{ + if (!key) + return; + + if (atomic_dec_and_test(&key->refcnt)) { + kfree(key); + SCTP_DBG_OBJCNT_DEC(keys); + } +} + +/* Create a new key structure of a given length */ +static struct sctp_auth_bytes *sctp_auth_create_key(__u32 key_len, gfp_t gfp) +{ + struct sctp_auth_bytes *key; + + /* Allocate the shared key */ + key = kmalloc(sizeof(struct sctp_auth_bytes) + key_len, gfp); + if (!key) + return NULL; + + key->len = key_len; + atomic_set(&key->refcnt, 1); + SCTP_DBG_OBJCNT_INC(keys); + + return key; +} + +/* Create a new shared key container with a give key id */ +struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp) +{ + struct sctp_shared_key *new; + + /* Allocate the shared key container */ + new = kzalloc(sizeof(struct sctp_shared_key), gfp); + if (!new) + return NULL; + + INIT_LIST_HEAD(&new->key_list); + new->key_id = key_id; + + return new; +} + +/* Free the shared key stucture */ +void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) +{ + BUG_ON(!list_empty(&sh_key->key_list)); + sctp_auth_key_put(sh_key->key); + sh_key->key = NULL; + kfree(sh_key); +} + +/* Destory the entire key list. This is done during the + * associon and endpoint free process. + */ +void sctp_auth_destroy_keys(struct list_head *keys) +{ + struct sctp_shared_key *ep_key; + struct sctp_shared_key *tmp; + + if (list_empty(keys)) + return; + + key_for_each_safe(ep_key, tmp, keys) { + list_del_init(&ep_key->key_list); + sctp_auth_shkey_free(ep_key); + } +} + +/* Compare two byte vectors as numbers. Return values + * are: + * 0 - vectors are equal + * < 0 - vector 1 is smaller then vector2 + * > 0 - vector 1 is greater then vector2 + * + * Algorithm is: + * This is performed by selecting the numerically smaller key vector... + * If the key vectors are equal as numbers but differ in length ... + * the shorter vector is considered smaller + * + * Examples (with small values): + * 000123456789 > 123456789 (first number is longer) + * 000123456789 < 234567891 (second number is larger numerically) + * 123456789 > 2345678 (first number is both larger & longer) + */ +static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1, + struct sctp_auth_bytes *vector2) +{ + int diff; + int i; + const __u8 *longer; + + diff = vector1->len - vector2->len; + if (diff) { + longer = (diff > 0) ? vector1->data : vector2->data; + + /* Check to see if the longer number is + * lead-zero padded. If it is not, it + * is automatically larger numerically. + */ + for (i = 0; i < abs(diff); i++ ) { + if (longer[i] != 0) + return diff; + } + } + + /* lengths are the same, compare numbers */ + return memcmp(vector1->data, vector2->data, vector1->len); +} + +/* + * Create a key vector as described in SCTP-AUTH, Section 6.1 + * The RANDOM parameter, the CHUNKS parameter and the HMAC-ALGO + * parameter sent by each endpoint are concatenated as byte vectors. + * These parameters include the parameter type, parameter length, and + * the parameter value, but padding is omitted; all padding MUST be + * removed from this concatenation before proceeding with further + * computation of keys. Parameters which were not sent are simply + * omitted from the concatenation process. The resulting two vectors + * are called the two key vectors. + */ +static struct sctp_auth_bytes *sctp_auth_make_key_vector( + sctp_random_param_t *random, + sctp_chunks_param_t *chunks, + sctp_hmac_algo_param_t *hmacs, + gfp_t gfp) +{ + struct sctp_auth_bytes *new; + __u32 len; + __u32 offset = 0; + + len = ntohs(random->param_hdr.length) + ntohs(hmacs->param_hdr.length); + if (chunks) + len += ntohs(chunks->param_hdr.length); + + new = kmalloc(sizeof(struct sctp_auth_bytes) + len, gfp); + if (!new) + return NULL; + + new->len = len; + + memcpy(new->data, random, ntohs(random->param_hdr.length)); + offset += ntohs(random->param_hdr.length); + + if (chunks) { + memcpy(new->data + offset, chunks, + ntohs(chunks->param_hdr.length)); + offset += ntohs(chunks->param_hdr.length); + } + + memcpy(new->data + offset, hmacs, ntohs(hmacs->param_hdr.length)); + + return new; +} + + +/* Make a key vector based on our local parameters */ +struct sctp_auth_bytes *sctp_auth_make_local_vector( + const struct sctp_association *asoc, + gfp_t gfp) +{ + return sctp_auth_make_key_vector( + (sctp_random_param_t*)asoc->c.auth_random, + (sctp_chunks_param_t*)asoc->c.auth_chunks, + (sctp_hmac_algo_param_t*)asoc->c.auth_hmacs, + gfp); +} + +/* Make a key vector based on peer's parameters */ +struct sctp_auth_bytes *sctp_auth_make_peer_vector( + const struct sctp_association *asoc, + gfp_t gfp) +{ + return sctp_auth_make_key_vector(asoc->peer.peer_random, + asoc->peer.peer_chunks, + asoc->peer.peer_hmacs, + gfp); +} + + +/* Set the value of the association shared key base on the parameters + * given. The algorithm is: + * From the endpoint pair shared keys and the key vectors the + * association shared keys are computed. This is performed by selecting + * the numerically smaller key vector and concatenating it to the + * endpoint pair shared key, and then concatenating the numerically + * larger key vector to that. The result of the concatenation is the + * association shared key. + */ +static struct sctp_auth_bytes *sctp_auth_asoc_set_secret( + struct sctp_shared_key *ep_key, + struct sctp_auth_bytes *first_vector, + struct sctp_auth_bytes *last_vector, + gfp_t gfp) +{ + struct sctp_auth_bytes *secret; + __u32 offset = 0; + __u32 auth_len; + + auth_len = first_vector->len + last_vector->len; + if (ep_key->key) + auth_len += ep_key->key->len; + + secret = sctp_auth_create_key(auth_len, gfp); + if (!secret) + return NULL; + + if (ep_key->key) { + memcpy(secret->data, ep_key->key->data, ep_key->key->len); + offset += ep_key->key->len; + } + + memcpy(secret->data + offset, first_vector->data, first_vector->len); + offset += first_vector->len; + + memcpy(secret->data + offset, last_vector->data, last_vector->len); + + return secret; +} + +/* Create an association shared key. Follow the algorithm + * described in SCTP-AUTH, Section 6.1 + */ +static struct sctp_auth_bytes *sctp_auth_asoc_create_secret( + const struct sctp_association *asoc, + struct sctp_shared_key *ep_key, + gfp_t gfp) +{ + struct sctp_auth_bytes *local_key_vector; + struct sctp_auth_bytes *peer_key_vector; + struct sctp_auth_bytes *first_vector, + *last_vector; + struct sctp_auth_bytes *secret = NULL; + int cmp; + + + /* Now we need to build the key vectors + * SCTP-AUTH , Section 6.1 + * The RANDOM parameter, the CHUNKS parameter and the HMAC-ALGO + * parameter sent by each endpoint are concatenated as byte vectors. + * These parameters include the parameter type, parameter length, and + * the parameter value, but padding is omitted; all padding MUST be + * removed from this concatenation before proceeding with further + * computation of keys. Parameters which were not sent are simply + * omitted from the concatenation process. The resulting two vectors + * are called the two key vectors. + */ + + local_key_vector = sctp_auth_make_local_vector(asoc, gfp); + peer_key_vector = sctp_auth_make_peer_vector(asoc, gfp); + + if (!peer_key_vector || !local_key_vector) + goto out; + + /* Figure out the order in wich the key_vectors will be + * added to the endpoint shared key. + * SCTP-AUTH, Section 6.1: + * This is performed by selecting the numerically smaller key + * vector and concatenating it to the endpoint pair shared + * key, and then concatenating the numerically larger key + * vector to that. If the key vectors are equal as numbers + * but differ in length, then the concatenation order is the + * endpoint shared key, followed by the shorter key vector, + * followed by the longer key vector. Otherwise, the key + * vectors are identical, and may be concatenated to the + * endpoint pair key in any order. + */ + cmp = sctp_auth_compare_vectors(local_key_vector, + peer_key_vector); + if (cmp < 0) { + first_vector = local_key_vector; + last_vector = peer_key_vector; + } else { + first_vector = peer_key_vector; + last_vector = local_key_vector; + } + + secret = sctp_auth_asoc_set_secret(ep_key, first_vector, last_vector, + gfp); +out: + kfree(local_key_vector); + kfree(peer_key_vector); + + return secret; +} + +/* + * Populate the association overlay list with the list + * from the endpoint. + */ +int sctp_auth_asoc_copy_shkeys(const struct sctp_endpoint *ep, + struct sctp_association *asoc, + gfp_t gfp) +{ + struct sctp_shared_key *sh_key; + struct sctp_shared_key *new; + + BUG_ON(!list_empty(&asoc->endpoint_shared_keys)); + + key_for_each(sh_key, &ep->endpoint_shared_keys) { + new = sctp_auth_shkey_create(sh_key->key_id, gfp); + if (!new) + goto nomem; + + new->key = sh_key->key; + sctp_auth_key_hold(new->key); + list_add(&new->key_list, &asoc->endpoint_shared_keys); + } + + return 0; + +nomem: + sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); + return -ENOMEM; +} + + +/* Public interface to creat the association shared key. + * See code above for the algorithm. + */ +int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) +{ + struct sctp_auth_bytes *secret; + struct sctp_shared_key *ep_key; + + /* If we don't support AUTH, or peer is not capable + * we don't need to do anything. + */ + if (!sctp_auth_enable || !asoc->peer.auth_capable) + return 0; + + /* If the key_id is non-zero and we couldn't find an + * endpoint pair shared key, we can't compute the + * secret. + * For key_id 0, endpoint pair shared key is a NULL key. + */ + ep_key = sctp_auth_get_shkey(asoc, asoc->active_key_id); + BUG_ON(!ep_key); + + secret = sctp_auth_asoc_create_secret(asoc, ep_key, gfp); + if (!secret) + return -ENOMEM; + + sctp_auth_key_put(asoc->asoc_shared_key); + asoc->asoc_shared_key = secret; + + return 0; +} + + +/* Find the endpoint pair shared key based on the key_id */ +struct sctp_shared_key *sctp_auth_get_shkey( + const struct sctp_association *asoc, + __u16 key_id) +{ + struct sctp_shared_key *key = NULL; + + /* First search associations set of endpoint pair shared keys */ + key_for_each(key, &asoc->endpoint_shared_keys) { + if (key->key_id == key_id) + break; + } + + return key; +} + +/* + * Initialize all the possible digest transforms that we can use. Right now + * now, the supported digests are SHA1 and SHA256. We do this here once + * because of the restrictiong that transforms may only be allocated in + * user context. This forces us to pre-allocated all possible transforms + * at the endpoint init time. + */ +int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) +{ + struct crypto_hash *tfm = NULL; + __u16 id; + + /* if the transforms are already allocted, we are done */ + if (!sctp_auth_enable) { + ep->auth_hmacs = NULL; + return 0; + } + + if (ep->auth_hmacs) + return 0; + + /* Allocated the array of pointers to transorms */ + ep->auth_hmacs = kzalloc( + sizeof(struct crypto_hash *) * SCTP_AUTH_NUM_HMACS, + gfp); + if (!ep->auth_hmacs) + return -ENOMEM; + + for (id = 0; id < SCTP_AUTH_NUM_HMACS; id++) { + + /* See is we support the id. Supported IDs have name and + * length fields set, so that we can allocated and use + * them. We can safely just check for name, for without the + * name, we can't allocate the TFM. + */ + if (!sctp_hmac_list[id].hmac_name) + continue; + + /* If this TFM has been allocated, we are all set */ + if (ep->auth_hmacs[id]) + continue; + + /* Allocate the ID */ + tfm = crypto_alloc_hash(sctp_hmac_list[id].hmac_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto out_err; + + ep->auth_hmacs[id] = tfm; + } + + return 0; + +out_err: + /* Clean up any successfull allocations */ + sctp_auth_destroy_hmacs(ep->auth_hmacs); + return -ENOMEM; +} + +/* Destroy the hmac tfm array */ +void sctp_auth_destroy_hmacs(struct crypto_hash *auth_hmacs[]) +{ + int i; + + if (!auth_hmacs) + return; + + for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) + { + if (auth_hmacs[i]) + crypto_free_hash(auth_hmacs[i]); + } + kfree(auth_hmacs); +} + + +struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id) +{ + return &sctp_hmac_list[hmac_id]; +} + +/* Get an hmac description information that we can use to build + * the AUTH chunk + */ +struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc) +{ + struct sctp_hmac_algo_param *hmacs; + __u16 n_elt; + __u16 id = 0; + int i; + + /* If we have a default entry, use it */ + if (asoc->default_hmac_id) + return &sctp_hmac_list[asoc->default_hmac_id]; + + /* Since we do not have a default entry, find the first entry + * we support and return that. Do not cache that id. + */ + hmacs = asoc->peer.peer_hmacs; + if (!hmacs) + return NULL; + + n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1; + for (i = 0; i < n_elt; i++) { + id = ntohs(hmacs->hmac_ids[i]); + + /* Check the id is in the supported range */ + if (id > SCTP_AUTH_HMAC_ID_MAX) + continue; + + /* See is we support the id. Supported IDs have name and + * length fields set, so that we can allocated and use + * them. We can safely just check for name, for without the + * name, we can't allocate the TFM. + */ + if (!sctp_hmac_list[id].hmac_name) + continue; + + break; + } + + if (id == 0) + return NULL; + + return &sctp_hmac_list[id]; +} + +static int __sctp_auth_find_hmacid(__u16 *hmacs, int n_elts, __u16 hmac_id) +{ + int found = 0; + int i; + + for (i = 0; i < n_elts; i++) { + if (hmac_id == hmacs[i]) { + found = 1; + break; + } + } + + return found; +} + +/* See if the HMAC_ID is one that we claim as supported */ +int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, + __u16 hmac_id) +{ + struct sctp_hmac_algo_param *hmacs; + __u16 n_elt; + + if (!asoc) + return 0; + + hmacs = (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs; + n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1; + + return __sctp_auth_find_hmacid(hmacs->hmac_ids, n_elt, hmac_id); +} + + +/* Cache the default HMAC id. This to follow this text from SCTP-AUTH: + * Section 6.1: + * The receiver of a HMAC-ALGO parameter SHOULD use the first listed + * algorithm it supports. + */ +void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, + struct sctp_hmac_algo_param *hmacs) +{ + struct sctp_endpoint *ep; + __u16 id; + int i; + int n_params; + + /* if the default id is already set, use it */ + if (asoc->default_hmac_id) + return; + + n_params = (ntohs(hmacs->param_hdr.length) + - sizeof(sctp_paramhdr_t)) >> 1; + ep = asoc->ep; + for (i = 0; i < n_params; i++) { + id = ntohs(hmacs->hmac_ids[i]); + + /* Check the id is in the supported range */ + if (id > SCTP_AUTH_HMAC_ID_MAX) + continue; + + /* If this TFM has been allocated, use this id */ + if (ep->auth_hmacs[id]) { + asoc->default_hmac_id = id; + break; + } + } +} + + +/* Check to see if the given chunk is supposed to be authenticated */ +static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) +{ + unsigned short len; + int found = 0; + int i; + + if (!param) + return 0; + + len = ntohs(param->param_hdr.length) - sizeof(sctp_paramhdr_t); + + /* SCTP-AUTH, Section 3.2 + * The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH + * chunks MUST NOT be listed in the CHUNKS parameter. However, if + * a CHUNKS parameter is received then the types for INIT, INIT-ACK, + * SHUTDOWN-COMPLETE and AUTH chunks MUST be ignored. + */ + for (i = 0; !found && i < len; i++) { + switch (param->chunks[i]) { + case SCTP_CID_INIT: + case SCTP_CID_INIT_ACK: + case SCTP_CID_SHUTDOWN_COMPLETE: + case SCTP_CID_AUTH: + break; + + default: + if (param->chunks[i] == chunk) + found = 1; + break; + } + } + + return found; +} + +/* Check if peer requested that this chunk is authenticated */ +int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) +{ + if (!sctp_auth_enable || !asoc || !asoc->peer.auth_capable) + return 0; + + return __sctp_auth_cid(chunk, asoc->peer.peer_chunks); +} + +/* Check if we requested that peer authenticate this chunk. */ +int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc) +{ + if (!sctp_auth_enable || !asoc) + return 0; + + return __sctp_auth_cid(chunk, + (struct sctp_chunks_param *)asoc->c.auth_chunks); +} + +/* SCTP-AUTH: Section 6.2: + * The sender MUST calculate the MAC as described in RFC2104 [2] using + * the hash function H as described by the MAC Identifier and the shared + * association key K based on the endpoint pair shared key described by + * the shared key identifier. The 'data' used for the computation of + * the AUTH-chunk is given by the AUTH chunk with its HMAC field set to + * zero (as shown in Figure 6) followed by all chunks that are placed + * after the AUTH chunk in the SCTP packet. + */ +void sctp_auth_calculate_hmac(const struct sctp_association *asoc, + struct sk_buff *skb, + struct sctp_auth_chunk *auth, + gfp_t gfp) +{ + struct scatterlist sg; + struct hash_desc desc; + struct sctp_auth_bytes *asoc_key; + __u16 key_id, hmac_id; + __u8 *digest; + unsigned char *end; + int free_key = 0; + + /* Extract the info we need: + * - hmac id + * - key id + */ + key_id = ntohs(auth->auth_hdr.shkey_id); + hmac_id = ntohs(auth->auth_hdr.hmac_id); + + if (key_id == asoc->active_key_id) + asoc_key = asoc->asoc_shared_key; + else { + struct sctp_shared_key *ep_key; + + ep_key = sctp_auth_get_shkey(asoc, key_id); + if (!ep_key) + return; + + asoc_key = sctp_auth_asoc_create_secret(asoc, ep_key, gfp); + if (!asoc_key) + return; + + free_key = 1; + } + + /* set up scatter list */ + end = skb_tail_pointer(skb); + sg.page = virt_to_page(auth); + sg.offset = (unsigned long)(auth) % PAGE_SIZE; + sg.length = end - (unsigned char *)auth; + + desc.tfm = asoc->ep->auth_hmacs[hmac_id]; + desc.flags = 0; + + digest = auth->auth_hdr.hmac; + if (crypto_hash_setkey(desc.tfm, &asoc_key->data[0], asoc_key->len)) + goto free; + + crypto_hash_digest(&desc, &sg, sg.length, digest); + +free: + if (free_key) + sctp_auth_key_put(asoc_key); +} + +/* API Helpers */ + +/* Add a chunk to the endpoint authenticated chunk list */ +int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id) +{ + struct sctp_chunks_param *p = ep->auth_chunk_list; + __u16 nchunks; + __u16 param_len; + + /* If this chunk is already specified, we are done */ + if (__sctp_auth_cid(chunk_id, p)) + return 0; + + /* Check if we can add this chunk to the array */ + param_len = ntohs(p->param_hdr.length); + nchunks = param_len - sizeof(sctp_paramhdr_t); + if (nchunks == SCTP_NUM_CHUNK_TYPES) + return -EINVAL; + + p->chunks[nchunks] = chunk_id; + p->param_hdr.length = htons(param_len + 1); + return 0; +} + +/* Add hmac identifires to the endpoint list of supported hmac ids */ +int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep, + struct sctp_hmacalgo *hmacs) +{ + int has_sha1 = 0; + __u16 id; + int i; + + /* Scan the list looking for unsupported id. Also make sure that + * SHA1 is specified. + */ + for (i = 0; i < hmacs->shmac_num_idents; i++) { + id = hmacs->shmac_idents[i]; + + if (SCTP_AUTH_HMAC_ID_SHA1 == id) + has_sha1 = 1; + + if (!sctp_hmac_list[id].hmac_name) + return -EOPNOTSUPP; + } + + if (!has_sha1) + return -EINVAL; + + memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0], + hmacs->shmac_num_idents * sizeof(__u16)); + ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) + + hmacs->shmac_num_idents * sizeof(__u16)); + return 0; +} + +/* Set a new shared key on either endpoint or association. If the + * the key with a same ID already exists, replace the key (remove the + * old key and add a new one). + */ +int sctp_auth_set_key(struct sctp_endpoint *ep, + struct sctp_association *asoc, + struct sctp_authkey *auth_key) +{ + struct sctp_shared_key *cur_key = NULL; + struct sctp_auth_bytes *key; + struct list_head *sh_keys; + int replace = 0; + + /* Try to find the given key id to see if + * we are doing a replace, or adding a new key + */ + if (asoc) + sh_keys = &asoc->endpoint_shared_keys; + else + sh_keys = &ep->endpoint_shared_keys; + + key_for_each(cur_key, sh_keys) { + if (cur_key->key_id == auth_key->sca_keynumber) { + replace = 1; + break; + } + } + + /* If we are not replacing a key id, we need to allocate + * a shared key. + */ + if (!replace) { + cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, + GFP_KERNEL); + if (!cur_key) + return -ENOMEM; + } + + /* Create a new key data based on the info passed in */ + key = sctp_auth_create_key(auth_key->sca_keylen, GFP_KERNEL); + if (!key) + goto nomem; + + memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylen); + + /* If we are replacing, remove the old keys data from the + * key id. If we are adding new key id, add it to the + * list. + */ + if (replace) + sctp_auth_key_put(cur_key->key); + else + list_add(&cur_key->key_list, sh_keys); + + cur_key->key = key; + sctp_auth_key_hold(key); + + return 0; +nomem: + if (!replace) + sctp_auth_shkey_free(cur_key); + + return -ENOMEM; +} + +int sctp_auth_set_active_key(struct sctp_endpoint *ep, + struct sctp_association *asoc, + __u16 key_id) +{ + struct sctp_shared_key *key; + struct list_head *sh_keys; + int found = 0; + + /* The key identifier MUST correst to an existing key */ + if (asoc) + sh_keys = &asoc->endpoint_shared_keys; + else + sh_keys = &ep->endpoint_shared_keys; + + key_for_each(key, sh_keys) { + if (key->key_id == key_id) { + found = 1; + break; + } + } + + if (!found) + return -EINVAL; + + if (asoc) { + asoc->active_key_id = key_id; + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + } else + ep->active_key_id = key_id; + + return 0; +} + +int sctp_auth_del_key_id(struct sctp_endpoint *ep, + struct sctp_association *asoc, + __u16 key_id) +{ + struct sctp_shared_key *key; + struct list_head *sh_keys; + int found = 0; + + /* The key identifier MUST NOT be the current active key + * The key identifier MUST correst to an existing key + */ + if (asoc) { + if (asoc->active_key_id == key_id) + return -EINVAL; + + sh_keys = &asoc->endpoint_shared_keys; + } else { + if (ep->active_key_id == key_id) + return -EINVAL; + + sh_keys = &ep->endpoint_shared_keys; + } + + key_for_each(key, sh_keys) { + if (key->key_id == key_id) { + found = 1; + break; + } + } + + if (!found) + return -EINVAL; + + /* Delete the shared key */ + list_del_init(&key->key_list); + sctp_auth_shkey_free(key); + + return 0; +} diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 77fb7b06a9c4..619d0f2dee51 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -194,6 +194,18 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, max = asoc->frag_point; + /* If the the peer requested that we authenticate DATA chunks + * we need to accound for bundling of the AUTH chunks along with + * DATA. + */ + if (sctp_auth_send_cid(SCTP_CID_DATA, asoc)) { + struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc); + + if (hmac_desc) + max -= WORD_ROUND(sizeof(sctp_auth_chunk_t) + + hmac_desc->hmac_len); + } + whole = 0; first_len = max; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 8f485a0d14bd..2d2d81ef4a69 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -69,12 +69,56 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sock *sk, gfp_t gfp) { + struct sctp_hmac_algo_param *auth_hmacs = NULL; + struct sctp_chunks_param *auth_chunks = NULL; + struct sctp_shared_key *null_key; + int err; + memset(ep, 0, sizeof(struct sctp_endpoint)); ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp); if (!ep->digest) return NULL; + if (sctp_auth_enable) { + /* Allocate space for HMACS and CHUNKS authentication + * variables. There are arrays that we encode directly + * into parameters to make the rest of the operations easier. + */ + auth_hmacs = kzalloc(sizeof(sctp_hmac_algo_param_t) + + sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp); + if (!auth_hmacs) + goto nomem; + + auth_chunks = kzalloc(sizeof(sctp_chunks_param_t) + + SCTP_NUM_CHUNK_TYPES, gfp); + if (!auth_chunks) + goto nomem; + + /* Initialize the HMACS parameter. + * SCTP-AUTH: Section 3.3 + * Every endpoint supporting SCTP chunk authentication MUST + * support the HMAC based on the SHA-1 algorithm. + */ + auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO; + auth_hmacs->param_hdr.length = + htons(sizeof(sctp_paramhdr_t) + 2); + auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1); + + /* Initialize the CHUNKS parameter */ + auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS; + + /* If the Add-IP functionality is enabled, we must + * authenticate, ASCONF and ASCONF-ACK chunks + */ + if (sctp_addip_enable) { + auth_chunks->chunks[0] = SCTP_CID_ASCONF; + auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; + auth_chunks->param_hdr.length = + htons(sizeof(sctp_paramhdr_t) + 2); + } + } + /* Initialize the base structure. */ /* What type of endpoint are we? */ ep->base.type = SCTP_EP_TYPE_SOCKET; @@ -102,6 +146,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Use SCTP specific send buffer space queues. */ ep->sndbuf_policy = sctp_sndbuf_policy; + sk->sk_write_space = sctp_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); @@ -113,7 +158,36 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, ep->last_key = ep->current_key = 0; ep->key_changed_at = jiffies; + /* SCTP-AUTH extensions*/ + INIT_LIST_HEAD(&ep->endpoint_shared_keys); + null_key = sctp_auth_shkey_create(0, GFP_KERNEL); + if (!null_key) + goto nomem; + + list_add(&null_key->key_list, &ep->endpoint_shared_keys); + + /* Allocate and initialize transorms arrays for suported HMACs. */ + err = sctp_auth_init_hmacs(ep, gfp); + if (err) + goto nomem_hmacs; + + /* Add the null key to the endpoint shared keys list and + * set the hmcas and chunks pointers. + */ + ep->auth_hmacs_list = auth_hmacs; + ep->auth_chunk_list = auth_chunks; + return ep; + +nomem_hmacs: + sctp_auth_destroy_keys(&ep->endpoint_shared_keys); +nomem: + /* Free all allocations */ + kfree(auth_hmacs); + kfree(auth_chunks); + kfree(ep->digest); + return NULL; + } /* Create a sctp_endpoint with all that boring stuff initialized. @@ -186,6 +260,16 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) /* Free the digest buffer */ kfree(ep->digest); + /* SCTP-AUTH: Free up AUTH releated data such as shared keys + * chunks and hmacs arrays that were allocated + */ + sctp_auth_destroy_keys(&ep->endpoint_shared_keys); + kfree(ep->auth_hmacs_list); + kfree(ep->auth_chunk_list); + + /* AUTH - Free any allocated HMAC transform containers */ + sctp_auth_destroy_hmacs(ep->auth_hmacs); + /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); sctp_bind_addr_free(&ep->base.bind_addr); @@ -316,6 +400,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) sctp_subtype_t subtype; sctp_state_t state; int error = 0; + int first_time = 1; /* is this the first time through the looop */ if (ep->base.dead) return; @@ -327,6 +412,29 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) while (NULL != (chunk = sctp_inq_pop(inqueue))) { subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type); + /* If the first chunk in the packet is AUTH, do special + * processing specified in Section 6.3 of SCTP-AUTH spec + */ + if (first_time && (subtype.chunk == SCTP_CID_AUTH)) { + struct sctp_chunkhdr *next_hdr; + + next_hdr = sctp_inq_peek(inqueue); + if (!next_hdr) + goto normal; + + /* If the next chunk is COOKIE-ECHO, skip the AUTH + * chunk while saving a pointer to it so we can do + * Authentication later (during cookie-echo + * processing). + */ + if (next_hdr->type == SCTP_CID_COOKIE_ECHO) { + chunk->auth_chunk = skb_clone(chunk->skb, + GFP_ATOMIC); + chunk->auth = 1; + continue; + } + } +normal: /* We might have grown an association since last we * looked, so try again. * @@ -342,6 +450,8 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) } state = asoc ? asoc->state : SCTP_STATE_CLOSED; + if (sctp_auth_recv_cid(subtype.chunk, asoc) && !chunk->auth) + continue; /* Remember where the last DATA chunk came from so we * know where to send the SACK. @@ -365,5 +475,8 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) */ if (!sctp_sk(sk)->ep) break; + + if (first_time) + first_time = 0; } } diff --git a/net/sctp/input.c b/net/sctp/input.c index f9a0c9276e3b..86503e7fa21e 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -911,15 +911,6 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, ch = (sctp_chunkhdr_t *) skb->data; - /* If this is INIT/INIT-ACK look inside the chunk too. */ - switch (ch->type) { - case SCTP_CID_INIT: - case SCTP_CID_INIT_ACK: - break; - default: - return NULL; - } - /* The code below will attempt to walk the chunk and extract * parameter information. Before we do that, we need to verify * that the chunk length doesn't cause overflow. Otherwise, we'll @@ -964,6 +955,60 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, return NULL; } +/* SCTP-AUTH, Section 6.3: +* If the receiver does not find a STCB for a packet containing an AUTH +* chunk as the first chunk and not a COOKIE-ECHO chunk as the second +* chunk, it MUST use the chunks after the AUTH chunk to look up an existing +* association. +* +* This means that any chunks that can help us identify the association need +* to be looked at to find this assocation. +* +* TODO: The only chunk currently defined that can do that is ASCONF, but we +* don't support that functionality yet. +*/ +static struct sctp_association *__sctp_rcv_auth_lookup(struct sk_buff *skb, + const union sctp_addr *paddr, + const union sctp_addr *laddr, + struct sctp_transport **transportp) +{ + /* XXX - walk through the chunks looking for something that can + * help us find the association. INIT, and INIT-ACK are not permitted. + * That leaves ASCONF, but we don't support that yet. + */ + return NULL; +} + +/* + * There are circumstances when we need to look inside the SCTP packet + * for information to help us find the association. Examples + * include looking inside of INIT/INIT-ACK chunks or after the AUTH + * chunks. + */ +static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, + const union sctp_addr *paddr, + const union sctp_addr *laddr, + struct sctp_transport **transportp) +{ + sctp_chunkhdr_t *ch; + + ch = (sctp_chunkhdr_t *) skb->data; + + /* If this is INIT/INIT-ACK look inside the chunk too. */ + switch (ch->type) { + case SCTP_CID_INIT: + case SCTP_CID_INIT_ACK: + return __sctp_rcv_init_lookup(skb, laddr, transportp); + break; + + case SCTP_CID_AUTH: + return __sctp_rcv_auth_lookup(skb, paddr, laddr, transportp); + break; + } + + return NULL; +} + /* Lookup an association for an inbound skb. */ static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb, const union sctp_addr *paddr, @@ -979,7 +1024,7 @@ static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb, * parameters within the INIT or INIT-ACK. */ if (!asoc) - asoc = __sctp_rcv_init_lookup(skb, laddr, transportp); + asoc = __sctp_rcv_lookup_harder(skb, paddr, laddr, transportp); return asoc; } diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index e4ea7fdf36ed..f10fe7fbf24c 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -100,6 +100,25 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) q->immediate.func(&q->immediate); } +/* Peek at the next chunk on the inqeue. */ +struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue) +{ + struct sctp_chunk *chunk; + sctp_chunkhdr_t *ch = NULL; + + chunk = queue->in_progress; + /* If there is no more chunks in this packet, say so */ + if (chunk->singleton || + chunk->end_of_packet || + chunk->pdiscard) + return NULL; + + ch = (sctp_chunkhdr_t *)chunk->chunk_end; + + return ch; +} + + /* Extract a chunk from an SCTP inqueue. * * WARNING: If you need to put the chunk on another queue, you need to diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 670fd2740b89..9de3ddaa2768 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -493,7 +493,7 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1, if (addr1->sa.sa_family != addr2->sa.sa_family) { if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET6 && - IPV6_ADDR_MAPPED == ipv6_addr_type(&addr2->v6.sin6_addr)) { + ipv6_addr_v4mapped(&addr2->v6.sin6_addr)) { if (addr2->v6.sin6_port == addr1->v4.sin_port && addr2->v6.sin6_addr.s6_addr32[3] == addr1->v4.sin_addr.s_addr) @@ -501,7 +501,7 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1, } if (addr2->sa.sa_family == AF_INET && addr1->sa.sa_family == AF_INET6 && - IPV6_ADDR_MAPPED == ipv6_addr_type(&addr1->v6.sin6_addr)) { + ipv6_addr_v4mapped(&addr1->v6.sin6_addr)) { if (addr1->v6.sin6_port == addr2->v4.sin_port && addr1->v6.sin6_addr.s6_addr32[3] == addr2->v4.sin_addr.s_addr) @@ -631,7 +631,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; - newsk = sk_alloc(PF_INET6, GFP_KERNEL, sk->sk_prot, 1); + newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot, 1); if (!newsk) goto out; @@ -855,7 +855,7 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(addr->v6.sin6_scope_id); + dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); if (!dev) return 0; if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) { @@ -886,7 +886,7 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(addr->v6.sin6_scope_id); + dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); if (!dev) return 0; dev_put(dev); diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index fcfb9d806de1..2cf6ad6ff8ce 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -58,6 +58,7 @@ SCTP_DBG_OBJCNT(chunk); SCTP_DBG_OBJCNT(addr); SCTP_DBG_OBJCNT(ssnmap); SCTP_DBG_OBJCNT(datamsg); +SCTP_DBG_OBJCNT(keys); /* An array to make it easy to pretty print the debug information * to the proc fs. @@ -73,6 +74,7 @@ static sctp_dbg_objcnt_entry_t sctp_dbg_objcnt[] = { SCTP_DBG_OBJCNT_ENTRY(addr), SCTP_DBG_OBJCNT_ENTRY(ssnmap), SCTP_DBG_OBJCNT_ENTRY(datamsg), + SCTP_DBG_OBJCNT_ENTRY(keys), }; /* Callback from procfs to read out objcount information. diff --git a/net/sctp/output.c b/net/sctp/output.c index d85543def754..847639d542c0 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -79,7 +79,10 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, packet->vtag = vtag; packet->has_cookie_echo = 0; packet->has_sack = 0; + packet->has_auth = 0; + packet->has_data = 0; packet->ipfragok = 0; + packet->auth = NULL; if (ecn_capable && sctp_packet_empty(packet)) { chunk = sctp_get_ecne_prepend(packet->transport->asoc); @@ -121,8 +124,11 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, packet->vtag = 0; packet->has_cookie_echo = 0; packet->has_sack = 0; + packet->has_auth = 0; + packet->has_data = 0; packet->ipfragok = 0; packet->malloced = 0; + packet->auth = NULL; return packet; } @@ -181,6 +187,39 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, return retval; } +/* Try to bundle an auth chunk into the packet. */ +static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt, + struct sctp_chunk *chunk) +{ + struct sctp_association *asoc = pkt->transport->asoc; + struct sctp_chunk *auth; + sctp_xmit_t retval = SCTP_XMIT_OK; + + /* if we don't have an association, we can't do authentication */ + if (!asoc) + return retval; + + /* See if this is an auth chunk we are bundling or if + * auth is already bundled. + */ + if (chunk->chunk_hdr->type == SCTP_CID_AUTH || pkt->auth) + return retval; + + /* if the peer did not request this chunk to be authenticated, + * don't do it + */ + if (!chunk->auth) + return retval; + + auth = sctp_make_auth(asoc); + if (!auth) + return retval; + + retval = sctp_packet_append_chunk(pkt, auth); + + return retval; +} + /* Try to bundle a SACK with the packet. */ static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt, struct sctp_chunk *chunk) @@ -227,12 +266,17 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __FUNCTION__, packet, chunk); - retval = sctp_packet_bundle_sack(packet, chunk); - psize = packet->size; + /* Try to bundle AUTH chunk */ + retval = sctp_packet_bundle_auth(packet, chunk); + if (retval != SCTP_XMIT_OK) + goto finish; + /* Try to bundle SACK chunk */ + retval = sctp_packet_bundle_sack(packet, chunk); if (retval != SCTP_XMIT_OK) goto finish; + psize = packet->size; pmtu = ((packet->transport->asoc) ? (packet->transport->asoc->pathmtu) : (packet->transport->pathmtu)); @@ -241,10 +285,16 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, /* Decide if we need to fragment or resubmit later. */ if (too_big) { - /* Both control chunks and data chunks with TSNs are - * non-fragmentable. + /* It's OK to fragmet at IP level if any one of the following + * is true: + * 1. The packet is empty (meaning this chunk is greater + * the MTU) + * 2. The chunk we are adding is a control chunk + * 3. The packet doesn't have any data in it yet and data + * requires authentication. */ - if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk)) { + if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) || + (!packet->has_data && chunk->auth)) { /* We no longer do re-fragmentation. * Just fragment at the IP layer, if we * actually hit this condition @@ -266,16 +316,31 @@ append: /* DATA is a special case since we must examine both rwnd and cwnd * before we send DATA. */ - if (sctp_chunk_is_data(chunk)) { + switch (chunk->chunk_hdr->type) { + case SCTP_CID_DATA: retval = sctp_packet_append_data(packet, chunk); /* Disallow SACK bundling after DATA. */ packet->has_sack = 1; + /* Disallow AUTH bundling after DATA */ + packet->has_auth = 1; + /* Let it be knows that packet has DATA in it */ + packet->has_data = 1; if (SCTP_XMIT_OK != retval) goto finish; - } else if (SCTP_CID_COOKIE_ECHO == chunk->chunk_hdr->type) + break; + case SCTP_CID_COOKIE_ECHO: packet->has_cookie_echo = 1; - else if (SCTP_CID_SACK == chunk->chunk_hdr->type) + break; + + case SCTP_CID_SACK: packet->has_sack = 1; + break; + + case SCTP_CID_AUTH: + packet->has_auth = 1; + packet->auth = chunk; + break; + } /* It is OK to send this chunk. */ list_add_tail(&chunk->list, &packet->chunk_list); @@ -303,6 +368,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) int padding; /* How much padding do we need? */ __u8 has_data = 0; struct dst_entry *dst = tp->dst; + unsigned char *auth = NULL; /* pointer to auth in skb data */ + __u32 cksum_buf_len = sizeof(struct sctphdr); SCTP_DEBUG_PRINTK("%s: packet:%p\n", __FUNCTION__, packet); @@ -356,16 +423,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) sh->vtag = htonl(packet->vtag); sh->checksum = 0; - /* 2) Calculate the Adler-32 checksum of the whole packet, - * including the SCTP common header and all the - * chunks. - * - * Note: Adler-32 is no longer applicable, as has been replaced - * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. - */ - if (!(dst->dev->features & NETIF_F_NO_CSUM)) - crc32 = sctp_start_cksum((__u8 *)sh, sizeof(struct sctphdr)); - /** * 6.10 Bundling * @@ -416,14 +473,16 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (padding) memset(skb_put(chunk->skb, padding), 0, padding); - if (dst->dev->features & NETIF_F_NO_CSUM) - memcpy(skb_put(nskb, chunk->skb->len), + /* if this is the auth chunk that we are adding, + * store pointer where it will be added and put + * the auth into the packet. + */ + if (chunk == packet->auth) + auth = skb_tail_pointer(nskb); + + cksum_buf_len += chunk->skb->len; + memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data, chunk->skb->len); - else - crc32 = sctp_update_copy_cksum(skb_put(nskb, - chunk->skb->len), - chunk->skb->data, - chunk->skb->len, crc32); SCTP_DEBUG_PRINTK("%s %p[%s] %s 0x%x, %s %d, %s %d, %s %d\n", "*** Chunk", chunk, @@ -445,9 +504,31 @@ int sctp_packet_transmit(struct sctp_packet *packet) sctp_chunk_free(chunk); } - /* Perform final transformation on checksum. */ - if (!(dst->dev->features & NETIF_F_NO_CSUM)) + /* SCTP-AUTH, Section 6.2 + * The sender MUST calculate the MAC as described in RFC2104 [2] + * using the hash function H as described by the MAC Identifier and + * the shared association key K based on the endpoint pair shared key + * described by the shared key identifier. The 'data' used for the + * computation of the AUTH-chunk is given by the AUTH chunk with its + * HMAC field set to zero (as shown in Figure 6) followed by all + * chunks that are placed after the AUTH chunk in the SCTP packet. + */ + if (auth) + sctp_auth_calculate_hmac(asoc, nskb, + (struct sctp_auth_chunk *)auth, + GFP_ATOMIC); + + /* 2) Calculate the Adler-32 checksum of the whole packet, + * including the SCTP common header and all the + * chunks. + * + * Note: Adler-32 is no longer applicable, as has been replaced + * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. + */ + if (!(dst->dev->features & NETIF_F_NO_CSUM)) { + crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); crc32 = sctp_end_cksum(crc32); + } /* 3) Put the resultant value into the checksum field in the * common header, and leave the rest of the bits unchanged. diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3d036cdfae41..81b26c5ffd4b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -51,6 +51,8 @@ #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/seq_file.h> +#include <linux/bootmem.h> +#include <net/net_namespace.h> #include <net/protocol.h> #include <net/ip.h> #include <net/ipv6.h> @@ -82,6 +84,10 @@ static struct sctp_af *sctp_af_v6_specific; struct kmem_cache *sctp_chunk_cachep __read_mostly; struct kmem_cache *sctp_bucket_cachep __read_mostly; +int sysctl_sctp_mem[3]; +int sysctl_sctp_rmem[3]; +int sysctl_sctp_wmem[3]; + /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) { @@ -93,7 +99,7 @@ static __init int sctp_proc_init(void) { if (!proc_net_sctp) { struct proc_dir_entry *ent; - ent = proc_mkdir("net/sctp", NULL); + ent = proc_mkdir("sctp", init_net.proc_net); if (ent) { ent->owner = THIS_MODULE; proc_net_sctp = ent; @@ -126,7 +132,7 @@ static void sctp_proc_exit(void) if (proc_net_sctp) { proc_net_sctp = NULL; - remove_proc_entry("net/sctp", NULL); + remove_proc_entry("sctp", init_net.proc_net); } } @@ -173,7 +179,7 @@ static void sctp_get_local_addr_list(void) struct sctp_af *af; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&sctp_local_addr_list, dev); @@ -546,7 +552,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, { struct inet_sock *inet = inet_sk(sk); struct inet_sock *newinet; - struct sock *newsk = sk_alloc(PF_INET, GFP_KERNEL, sk->sk_prot, 1); + struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, sk->sk_prot, 1); if (!newsk) goto out; @@ -987,6 +993,8 @@ SCTP_STATIC __init int sctp_init(void) int i; int status = -EINVAL; unsigned long goal; + unsigned long limit; + int max_share; int order; /* SCTP_DEBUG sanity check. */ @@ -1077,6 +1085,31 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); + /* Set the pressure threshold to be a fraction of global memory that + * is up to 1/2 at 256 MB, decreasing toward zero with the amount of + * memory, with a floor of 128 pages. + * Note this initalizes the data in sctpv6_prot too + * Unabashedly stolen from tcp_init + */ + limit = min(num_physpages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (num_physpages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = max(limit, 128UL); + sysctl_sctp_mem[0] = limit / 4 * 3; + sysctl_sctp_mem[1] = limit; + sysctl_sctp_mem[2] = sysctl_sctp_mem[0] * 2; + + /* Set per-socket limits to no more than 1/128 the pressure threshold*/ + limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7); + max_share = min(4UL*1024*1024, limit); + + sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */ + sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); + sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); + + sysctl_sctp_wmem[0] = SK_STREAM_MEM_QUANTUM; + sysctl_sctp_wmem[1] = 16*1024; + sysctl_sctp_wmem[2] = max(64*1024, max_share); + /* Size and allocate the association hash table. * The methodology is similar to that of the tcp hash tables. */ @@ -1139,9 +1172,6 @@ SCTP_STATIC __init int sctp_init(void) sctp_port_hashtable[i].chain = NULL; } - spin_lock_init(&sctp_port_alloc_lock); - sctp_port_rover = sysctl_local_port_range[0] - 1; - printk(KERN_INFO "SCTP: Hash tables configured " "(established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); @@ -1152,6 +1182,9 @@ SCTP_STATIC __init int sctp_init(void) /* Enable PR-SCTP by default. */ sctp_prsctp_enable = 1; + /* Disable AUTH by default. */ + sctp_auth_enable = 0; + sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 23ae37ec8711..f983a369d4e2 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -179,6 +179,11 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_supported_addrs_param_t sat; __be16 types[2]; sctp_adaptation_ind_param_t aiparam; + sctp_supported_ext_param_t ext_param; + int num_ext = 0; + __u8 extensions[3]; + sctp_paramhdr_t *auth_chunks = NULL, + *auth_hmacs = NULL; /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -202,11 +207,52 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN(num_types); chunksize += sizeof(ecap_param); - if (sctp_prsctp_enable) + if (sctp_prsctp_enable) { chunksize += sizeof(prsctp_param); + extensions[num_ext] = SCTP_CID_FWD_TSN; + num_ext += 1; + } + /* ADDIP: Section 4.2.7: + * An implementation supporting this extension [ADDIP] MUST list + * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and + * INIT-ACK parameters. + */ + if (sctp_addip_enable) { + extensions[num_ext] = SCTP_CID_ASCONF; + extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; + num_ext += 2; + } + chunksize += sizeof(aiparam); chunksize += vparam_len; + /* Account for AUTH related parameters */ + if (sctp_auth_enable) { + /* Add random parameter length*/ + chunksize += sizeof(asoc->c.auth_random); + + /* Add HMACS parameter length if any were defined */ + auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; + if (auth_hmacs->length) + chunksize += ntohs(auth_hmacs->length); + else + auth_hmacs = NULL; + + /* Add CHUNKS parameter length */ + auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; + if (auth_chunks->length) + chunksize += ntohs(auth_chunks->length); + else + auth_hmacs = NULL; + + extensions[num_ext] = SCTP_CID_AUTH; + num_ext += 1; + } + + /* If we have any extensions to report, account for that */ + if (num_ext) + chunksize += sizeof(sctp_supported_ext_param_t) + num_ext; + /* RFC 2960 3.3.2 Initiation (INIT) (1) * * Note 3: An INIT chunk MUST NOT contain more than one Host @@ -241,12 +287,38 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_addto_chunk(retval, num_types * sizeof(__u16), &types); sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); + + /* Add the supported extensions paramter. Be nice and add this + * fist before addiding the parameters for the extensions themselves + */ + if (num_ext) { + ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; + ext_param.param_hdr.length = + htons(sizeof(sctp_supported_ext_param_t) + num_ext); + sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), + &ext_param); + sctp_addto_chunk(retval, num_ext, extensions); + } + if (sctp_prsctp_enable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); + aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; aiparam.param_hdr.length = htons(sizeof(aiparam)); aiparam.adaptation_ind = htonl(sp->adaptation_ind); sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); + + /* Add SCTP-AUTH chunks to the parameter list */ + if (sctp_auth_enable) { + sctp_addto_chunk(retval, sizeof(asoc->c.auth_random), + asoc->c.auth_random); + if (auth_hmacs) + sctp_addto_chunk(retval, ntohs(auth_hmacs->length), + auth_hmacs); + if (auth_chunks) + sctp_addto_chunk(retval, ntohs(auth_chunks->length), + auth_chunks); + } nodata: kfree(addrs.v); return retval; @@ -264,6 +336,12 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, int cookie_len; size_t chunksize; sctp_adaptation_ind_param_t aiparam; + sctp_supported_ext_param_t ext_param; + int num_ext = 0; + __u8 extensions[3]; + sctp_paramhdr_t *auth_chunks = NULL, + *auth_hmacs = NULL, + *auth_random = NULL; retval = NULL; @@ -294,11 +372,41 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, chunksize += sizeof(ecap_param); /* Tell peer that we'll do PR-SCTP only if peer advertised. */ - if (asoc->peer.prsctp_capable) + if (asoc->peer.prsctp_capable) { chunksize += sizeof(prsctp_param); + extensions[num_ext] = SCTP_CID_FWD_TSN; + num_ext += 1; + } + + if (sctp_addip_enable) { + extensions[num_ext] = SCTP_CID_ASCONF; + extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; + num_ext += 2; + } + chunksize += sizeof(ext_param) + num_ext; chunksize += sizeof(aiparam); + if (asoc->peer.auth_capable) { + auth_random = (sctp_paramhdr_t *)asoc->c.auth_random; + chunksize += ntohs(auth_random->length); + + auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; + if (auth_hmacs->length) + chunksize += ntohs(auth_hmacs->length); + else + auth_hmacs = NULL; + + auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; + if (auth_chunks->length) + chunksize += ntohs(auth_chunks->length); + else + auth_chunks = NULL; + + extensions[num_ext] = SCTP_CID_AUTH; + num_ext += 1; + } + /* Now allocate and fill out the chunk. */ retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize); if (!retval) @@ -314,6 +422,14 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, sctp_addto_chunk(retval, cookie_len, cookie); if (asoc->peer.ecn_capable) sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); + if (num_ext) { + ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; + ext_param.param_hdr.length = + htons(sizeof(sctp_supported_ext_param_t) + num_ext); + sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), + &ext_param); + sctp_addto_chunk(retval, num_ext, extensions); + } if (asoc->peer.prsctp_capable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); @@ -322,6 +438,17 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, aiparam.adaptation_ind = htonl(sctp_sk(asoc->base.sk)->adaptation_ind); sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); + if (asoc->peer.auth_capable) { + sctp_addto_chunk(retval, ntohs(auth_random->length), + auth_random); + if (auth_hmacs) + sctp_addto_chunk(retval, ntohs(auth_hmacs->length), + auth_hmacs); + if (auth_chunks) + sctp_addto_chunk(retval, ntohs(auth_chunks->length), + auth_chunks); + } + /* We need to remove the const qualifier at this point. */ retval->asoc = (struct sctp_association *) asoc; @@ -839,6 +966,26 @@ err_chunk: return retval; } +/* Append bytes to the end of a parameter. Will panic if chunk is not big + * enough. + */ +static void *sctp_addto_param(struct sctp_chunk *chunk, int len, + const void *data) +{ + void *target; + int chunklen = ntohs(chunk->chunk_hdr->length); + + target = skb_put(chunk->skb, len); + + memcpy(target, data, len); + + /* Adjust the chunk length field. */ + chunk->chunk_hdr->length = htons(chunklen + len); + chunk->chunk_end = skb_tail_pointer(chunk->skb); + + return target; +} + /* Make an ABORT chunk with a PROTOCOL VIOLATION cause code. */ struct sctp_chunk *sctp_make_abort_violation( const struct sctp_association *asoc, @@ -964,6 +1111,41 @@ nodata: return retval; } +struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) +{ + struct sctp_chunk *retval; + struct sctp_hmac *hmac_desc; + struct sctp_authhdr auth_hdr; + __u8 *hmac; + + /* Get the first hmac that the peer told us to use */ + hmac_desc = sctp_auth_asoc_get_hmac(asoc); + if (unlikely(!hmac_desc)) + return NULL; + + retval = sctp_make_chunk(asoc, SCTP_CID_AUTH, 0, + hmac_desc->hmac_len + sizeof(sctp_authhdr_t)); + if (!retval) + return NULL; + + auth_hdr.hmac_id = htons(hmac_desc->hmac_id); + auth_hdr.shkey_id = htons(asoc->active_key_id); + + retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(sctp_authhdr_t), + &auth_hdr); + + hmac = skb_put(retval->skb, hmac_desc->hmac_len); + memset(hmac, 0, hmac_desc->hmac_len); + + /* Adjust the chunk header to include the empty MAC */ + retval->chunk_hdr->length = + htons(ntohs(retval->chunk_hdr->length) + hmac_desc->hmac_len); + retval->chunk_end = skb_tail_pointer(retval->skb); + + return retval; +} + + /******************************************************************** * 2nd Level Abstractions ********************************************************************/ @@ -1078,6 +1260,10 @@ struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, retval->chunk_hdr = chunk_hdr; retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(struct sctp_chunkhdr); + /* Determine if the chunk needs to be authenticated */ + if (sctp_auth_send_cid(type, asoc)) + retval->auth = 1; + /* Set the skb to the belonging sock for accounting. */ skb->sk = sk; @@ -1146,25 +1332,6 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) return target; } -/* Append bytes to the end of a parameter. Will panic if chunk is not big - * enough. - */ -void *sctp_addto_param(struct sctp_chunk *chunk, int len, const void *data) -{ - void *target; - int chunklen = ntohs(chunk->chunk_hdr->length); - - target = skb_put(chunk->skb, len); - - memcpy(target, data, len); - - /* Adjust the chunk length field. */ - chunk->chunk_hdr->length = htons(chunklen + len); - chunk->chunk_end = skb_tail_pointer(chunk->skb); - - return target; -} - /* Append bytes from user space to the end of a chunk. Will panic if * chunk is not big enough. * Returns a kernel err value. @@ -1663,6 +1830,35 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, return 0; } +static void sctp_process_ext_param(struct sctp_association *asoc, + union sctp_params param) +{ + __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); + int i; + + for (i = 0; i < num_ext; i++) { + switch (param.ext->chunks[i]) { + case SCTP_CID_FWD_TSN: + if (sctp_prsctp_enable && + !asoc->peer.prsctp_capable) + asoc->peer.prsctp_capable = 1; + break; + case SCTP_CID_AUTH: + /* if the peer reports AUTH, assume that he + * supports AUTH. + */ + asoc->peer.auth_capable = 1; + break; + case SCTP_CID_ASCONF: + case SCTP_CID_ASCONF_ACK: + asoc->peer.addip_capable = 1; + break; + default: + break; + } + } +} + /* RFC 3.2.1 & the Implementers Guide 2.2. * * The Parameter Types are encoded such that the @@ -1779,15 +1975,52 @@ static int sctp_verify_param(const struct sctp_association *asoc, case SCTP_PARAM_UNRECOGNIZED_PARAMETERS: case SCTP_PARAM_ECN_CAPABLE: case SCTP_PARAM_ADAPTATION_LAYER_IND: + case SCTP_PARAM_SUPPORTED_EXT: break; case SCTP_PARAM_HOST_NAME_ADDRESS: /* Tell the peer, we won't support this param. */ return sctp_process_hn_param(asoc, param, chunk, err_chunk); + case SCTP_PARAM_FWD_TSN_SUPPORT: if (sctp_prsctp_enable) break; + goto fallthrough; + + case SCTP_PARAM_RANDOM: + if (!sctp_auth_enable) + goto fallthrough; + + /* SCTP-AUTH: Secion 6.1 + * If the random number is not 32 byte long the association + * MUST be aborted. The ABORT chunk SHOULD contain the error + * cause 'Protocol Violation'. + */ + if (SCTP_AUTH_RANDOM_LENGTH != + ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) + return sctp_process_inv_paramlength(asoc, param.p, + chunk, err_chunk); + break; + + case SCTP_PARAM_CHUNKS: + if (!sctp_auth_enable) + goto fallthrough; + + /* SCTP-AUTH: Section 3.2 + * The CHUNKS parameter MUST be included once in the INIT or + * INIT-ACK chunk if the sender wants to receive authenticated + * chunks. Its maximum length is 260 bytes. + */ + if (260 < ntohs(param.p->length)) + return sctp_process_inv_paramlength(asoc, param.p, + chunk, err_chunk); + break; + + case SCTP_PARAM_HMAC_ALGO: + if (!sctp_auth_enable) + break; /* Fall Through */ +fallthrough: default: SCTP_DEBUG_PRINTK("Unrecognized param: %d for chunk %d.\n", ntohs(param.p->type), cid); @@ -1892,13 +2125,29 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, } /* Process the initialization parameters. */ - sctp_walk_params(param, peer_init, init_hdr.params) { if (!sctp_process_param(asoc, param, peer_addr, gfp)) goto clean_up; } + /* AUTH: After processing the parameters, make sure that we + * have all the required info to potentially do authentications. + */ + if (asoc->peer.auth_capable && (!asoc->peer.peer_random || + !asoc->peer.peer_hmacs)) + asoc->peer.auth_capable = 0; + + + /* If the peer claims support for ADD-IP without support + * for AUTH, disable support for ADD-IP. + */ + if (asoc->peer.addip_capable && !asoc->peer.auth_capable) { + asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP | + SCTP_PARAM_DEL_IP | + SCTP_PARAM_SET_PRIMARY); + } + /* Walk list of transports, removing transports in the UNKNOWN state. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); @@ -2128,12 +2377,57 @@ static int sctp_process_param(struct sctp_association *asoc, asoc->peer.adaptation_ind = param.aind->adaptation_ind; break; + case SCTP_PARAM_SUPPORTED_EXT: + sctp_process_ext_param(asoc, param); + break; + case SCTP_PARAM_FWD_TSN_SUPPORT: if (sctp_prsctp_enable) { asoc->peer.prsctp_capable = 1; break; } /* Fall Through */ + goto fall_through; + + case SCTP_PARAM_RANDOM: + if (!sctp_auth_enable) + goto fall_through; + + /* Save peer's random parameter */ + asoc->peer.peer_random = kmemdup(param.p, + ntohs(param.p->length), gfp); + if (!asoc->peer.peer_random) { + retval = 0; + break; + } + break; + + case SCTP_PARAM_HMAC_ALGO: + if (!sctp_auth_enable) + goto fall_through; + + /* Save peer's HMAC list */ + asoc->peer.peer_hmacs = kmemdup(param.p, + ntohs(param.p->length), gfp); + if (!asoc->peer.peer_hmacs) { + retval = 0; + break; + } + + /* Set the default HMAC the peer requested*/ + sctp_auth_asoc_set_default_hmac(asoc, param.hmac_algo); + break; + + case SCTP_PARAM_CHUNKS: + if (!sctp_auth_enable) + goto fall_through; + + asoc->peer.peer_chunks = kmemdup(param.p, + ntohs(param.p->length), gfp); + if (!asoc->peer.peer_chunks) + retval = 0; + break; +fall_through: default: /* Any unrecognized parameters should have been caught * and handled by sctp_verify_param() which should be diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8d7890083493..bbdc938da86f 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1524,6 +1524,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_cmd_adaptation_ind(commands, asoc); break; + case SCTP_CMD_ASSOC_SHKEY: + error = sctp_auth_asoc_init_active_key(asoc, + GFP_ATOMIC); + break; + default: printk(KERN_WARNING "Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a583d67cab63..f01b408508ff 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -138,6 +138,11 @@ static sctp_disposition_t sctp_sf_violation_chunk( void *arg, sctp_cmd_seq_t *commands); +static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + struct sctp_chunk *chunk); + /* Small helper function that checks if the chunk length * is of the appropriate length. The 'required_length' argument * is set to be the size of a specific chunk we are testing. @@ -495,8 +500,6 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes if there is any. */ @@ -521,6 +524,22 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); error = SCTP_ERROR_INV_PARAM; } + + /* SCTP-AUTH, Section 6.3: + * It should be noted that if the receiver wants to tear + * down an association in an authenticated way only, the + * handling of malformed packets should not result in + * tearing down the association. + * + * This means that if we only want to abort associations + * in an authenticated way (i.e AUTH+ABORT), then we + * can't destory this association just becuase the packet + * was malformed. + */ + if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + + SCTP_INC_STATS(SCTP_MIB_ABORTEDS); return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, asoc, chunk->transport); } @@ -549,6 +568,11 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_COOKIE_ECHOED)); + /* SCTP-AUTH: genereate the assocition shared keys so that + * we can potentially signe the COOKIE-ECHO. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_SHKEY, SCTP_NULL()); + /* 5.1 C) "A" shall then send the State Cookie received in the * INIT ACK chunk in a COOKIE ECHO chunk, ... */ @@ -686,6 +710,44 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, peer_init, GFP_ATOMIC)) goto nomem_init; + /* SCTP-AUTH: Now that we've populate required fields in + * sctp_process_init, set up the assocaition shared keys as + * necessary so that we can potentially authenticate the ACK + */ + error = sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC); + if (error) + goto nomem_init; + + /* SCTP-AUTH: auth_chunk pointer is only set when the cookie-echo + * is supposed to be authenticated and we have to do delayed + * authentication. We've just recreated the association using + * the information in the cookie and now it's much easier to + * do the authentication. + */ + if (chunk->auth_chunk) { + struct sctp_chunk auth; + sctp_ierror_t ret; + + /* set-up our fake chunk so that we can process it */ + auth.skb = chunk->auth_chunk; + auth.asoc = chunk->asoc; + auth.sctp_hdr = chunk->sctp_hdr; + auth.chunk_hdr = (sctp_chunkhdr_t *)skb_push(chunk->auth_chunk, + sizeof(sctp_chunkhdr_t)); + skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t)); + auth.transport = chunk->transport; + + ret = sctp_sf_authenticate(ep, new_asoc, type, &auth); + + /* We can now safely free the auth_chunk clone */ + kfree_skb(chunk->auth_chunk); + + if (ret != SCTP_IERROR_NO_ERROR) { + sctp_association_free(new_asoc); + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + } + } + repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) goto nomem_init; @@ -1247,6 +1309,26 @@ static void sctp_tietags_populate(struct sctp_association *new_asoc, new_asoc->c.initial_tsn = asoc->c.initial_tsn; } +static void sctp_auth_params_populate(struct sctp_association *new_asoc, + const struct sctp_association *asoc) +{ + /* Only perform this if AUTH extension is enabled */ + if (!sctp_auth_enable) + return; + + /* We need to provide the same parameter information as + * was in the original INIT. This means that we need to copy + * the HMACS, CHUNKS, and RANDOM parameter from the original + * assocaition. + */ + memcpy(new_asoc->c.auth_random, asoc->c.auth_random, + sizeof(asoc->c.auth_random)); + memcpy(new_asoc->c.auth_hmacs, asoc->c.auth_hmacs, + sizeof(asoc->c.auth_hmacs)); + memcpy(new_asoc->c.auth_chunks, asoc->c.auth_chunks, + sizeof(asoc->c.auth_chunks)); +} + /* * Compare vtag/tietag values to determine unexpected COOKIE-ECHO * handling action. @@ -1404,6 +1486,8 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( sctp_tietags_populate(new_asoc, asoc); + sctp_auth_params_populate(new_asoc, asoc); + /* B) "Z" shall respond immediately with an INIT ACK chunk. */ /* If there are errors need to be reported for unknown parameters, @@ -3618,6 +3702,169 @@ gen_shutdown: } /* + * SCTP-AUTH Section 6.3 Receving authenticated chukns + * + * The receiver MUST use the HMAC algorithm indicated in the HMAC + * Identifier field. If this algorithm was not specified by the + * receiver in the HMAC-ALGO parameter in the INIT or INIT-ACK chunk + * during association setup, the AUTH chunk and all chunks after it MUST + * be discarded and an ERROR chunk SHOULD be sent with the error cause + * defined in Section 4.1. + * + * If an endpoint with no shared key receives a Shared Key Identifier + * other than 0, it MUST silently discard all authenticated chunks. If + * the endpoint has at least one endpoint pair shared key for the peer, + * it MUST use the key specified by the Shared Key Identifier if a + * key has been configured for that Shared Key Identifier. If no + * endpoint pair shared key has been configured for that Shared Key + * Identifier, all authenticated chunks MUST be silently discarded. + * + * Verification Tag: 8.5 Verification Tag [Normal verification] + * + * The return value is the disposition of the chunk. + */ +static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + struct sctp_chunk *chunk) +{ + struct sctp_authhdr *auth_hdr; + struct sctp_hmac *hmac; + unsigned int sig_len; + __u16 key_id; + __u8 *save_digest; + __u8 *digest; + + /* Pull in the auth header, so we can do some more verification */ + auth_hdr = (struct sctp_authhdr *)chunk->skb->data; + chunk->subh.auth_hdr = auth_hdr; + skb_pull(chunk->skb, sizeof(struct sctp_authhdr)); + + /* Make sure that we suport the HMAC algorithm from the auth + * chunk. + */ + if (!sctp_auth_asoc_verify_hmac_id(asoc, auth_hdr->hmac_id)) + return SCTP_IERROR_AUTH_BAD_HMAC; + + /* Make sure that the provided shared key identifier has been + * configured + */ + key_id = ntohs(auth_hdr->shkey_id); + if (key_id != asoc->active_key_id && !sctp_auth_get_shkey(asoc, key_id)) + return SCTP_IERROR_AUTH_BAD_KEYID; + + + /* Make sure that the length of the signature matches what + * we expect. + */ + sig_len = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_auth_chunk_t); + hmac = sctp_auth_get_hmac(ntohs(auth_hdr->hmac_id)); + if (sig_len != hmac->hmac_len) + return SCTP_IERROR_PROTO_VIOLATION; + + /* Now that we've done validation checks, we can compute and + * verify the hmac. The steps involved are: + * 1. Save the digest from the chunk. + * 2. Zero out the digest in the chunk. + * 3. Compute the new digest + * 4. Compare saved and new digests. + */ + digest = auth_hdr->hmac; + skb_pull(chunk->skb, sig_len); + + save_digest = kmemdup(digest, sig_len, GFP_ATOMIC); + if (!save_digest) + goto nomem; + + memset(digest, 0, sig_len); + + sctp_auth_calculate_hmac(asoc, chunk->skb, + (struct sctp_auth_chunk *)chunk->chunk_hdr, + GFP_ATOMIC); + + /* Discard the packet if the digests do not match */ + if (memcmp(save_digest, digest, sig_len)) { + kfree(save_digest); + return SCTP_IERROR_BAD_SIG; + } + + kfree(save_digest); + chunk->auth = 1; + + return SCTP_IERROR_NO_ERROR; +nomem: + return SCTP_IERROR_NOMEM; +} + +sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) +{ + struct sctp_authhdr *auth_hdr; + struct sctp_chunk *chunk = arg; + struct sctp_chunk *err_chunk; + sctp_ierror_t error; + + if (!sctp_vtag_verify(chunk, asoc)) { + sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, + SCTP_NULL()); + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + } + + /* Make sure that the AUTH chunk has valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_auth_chunk))) + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + + auth_hdr = (struct sctp_authhdr *)chunk->skb->data; + error = sctp_sf_authenticate(ep, asoc, type, chunk); + switch (error) { + case SCTP_IERROR_AUTH_BAD_HMAC: + /* Generate the ERROR chunk and discard the rest + * of the packet + */ + err_chunk = sctp_make_op_error(asoc, chunk, + SCTP_ERROR_UNSUP_HMAC, + &auth_hdr->hmac_id, + sizeof(__u16)); + if (err_chunk) { + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, + SCTP_CHUNK(err_chunk)); + } + /* Fall Through */ + case SCTP_IERROR_AUTH_BAD_KEYID: + case SCTP_IERROR_BAD_SIG: + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + break; + case SCTP_IERROR_PROTO_VIOLATION: + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + break; + case SCTP_IERROR_NOMEM: + return SCTP_DISPOSITION_NOMEM; + default: + break; + } + + if (asoc->active_key_id != ntohs(auth_hdr->shkey_id)) { + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, ntohs(auth_hdr->shkey_id), + SCTP_AUTH_NEWKEY, GFP_ATOMIC); + + if (!ev) + return -ENOMEM; + + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ev)); + } + + return SCTP_DISPOSITION_CONSUME; +} + +/* * Process an unknown chunk. * * Section: 3.2. Also, 2.1 in the implementor's guide. @@ -3822,6 +4069,20 @@ static sctp_disposition_t sctp_sf_abort_violation( if (!abort) goto nomem; + /* SCTP-AUTH, Section 6.3: + * It should be noted that if the receiver wants to tear + * down an association in an authenticated way only, the + * handling of malformed packets should not result in + * tearing down the association. + * + * This means that if we only want to abort associations + * in an authenticated way (i.e AUTH+ABORT), then we + * can't destory this association just becuase the packet + * was malformed. + */ + if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) + goto discard; + if (asoc) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); @@ -3859,6 +4120,7 @@ static sctp_disposition_t sctp_sf_abort_violation( SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); } +discard: sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); @@ -5428,10 +5690,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, sctp_verb_t deliver; int tmp; __u32 tsn; - int account_value; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sock *sk = asoc->base.sk; - int rcvbuf_over = 0; data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data; skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); @@ -5441,48 +5701,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, /* ASSERT: Now skb->data is really the user data. */ - /* - * If we are established, and we have used up our receive buffer - * memory, think about droping the frame. - * Note that we have an opportunity to improve performance here. - * If we accept one chunk from an skbuff, we have to keep all the - * memory of that skbuff around until the chunk is read into user - * space. Therefore, once we accept 1 chunk we may as well accept all - * remaining chunks in the skbuff. The data_accepted flag helps us do - * that. - */ - if ((asoc->state == SCTP_STATE_ESTABLISHED) && (!chunk->data_accepted)) { - /* - * If the receive buffer policy is 1, then each - * association can allocate up to sk_rcvbuf bytes - * otherwise, all the associations in aggregate - * may allocate up to sk_rcvbuf bytes - */ - if (asoc->ep->rcvbuf_policy) - account_value = atomic_read(&asoc->rmem_alloc); - else - account_value = atomic_read(&sk->sk_rmem_alloc); - if (account_value > sk->sk_rcvbuf) { - /* - * We need to make forward progress, even when we are - * under memory pressure, so we always allow the - * next tsn after the ctsn ack point to be accepted. - * This lets us avoid deadlocks in which we have to - * drop frames that would otherwise let us drain the - * receive queue. - */ - if ((sctp_tsnmap_get_ctsn(map) + 1) != tsn) - return SCTP_IERROR_IGNORE_TSN; - - /* - * We're going to accept the frame but we should renege - * to make space for it. This will send us down that - * path later in this function. - */ - rcvbuf_over = 1; - } - } - /* Process ECN based congestion. * * Since the chunk structure is reused for all chunks within @@ -5542,18 +5760,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, * seems a bit troublesome in that frag_point varies based on * PMTU. In cases, such as loopback, this might be a rather * large spill over. - * NOTE: If we have a full receive buffer here, we only renege if - * our receiver can still make progress without the tsn being - * received. We do this because in the event that the associations - * receive queue is empty we are filling a leading gap, and since - * reneging moves the gap to the end of the tsn stream, we are likely - * to stall again very shortly. Avoiding the renege when we fill a - * leading gap is a good heuristic for avoiding such steady state - * stalls. - */ - if (!asoc->rwnd || asoc->rwnd_over || - (datalen > asoc->rwnd + asoc->frag_point) || - (rcvbuf_over && (!skb_queue_len(&sk->sk_receive_queue)))) { + */ + if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over || + (datalen > asoc->rwnd + asoc->frag_point))) { /* If this is the next TSN, consider reneging to make * room. Note: Playing nice with a confused sender. A @@ -5574,6 +5783,21 @@ static int sctp_eat_data(const struct sctp_association *asoc, } /* + * Also try to renege to limit our memory usage in the event that + * we are under memory pressure + * If we can't renege, don't worry about it, the sk_stream_rmem_schedule + * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our + * memory usage too much + */ + if (*sk->sk_prot_creator->memory_pressure) { + if (sctp_tsnmap_has_gap(map) && + (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { + SCTP_DEBUG_PRINTK("Under Pressure! Reneging for tsn:%u\n", tsn); + deliver = SCTP_CMD_RENEGE; + } + } + + /* * Section 3.3.10.9 No User Data (9) * * Cause of error diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index ddb0ba3974b0..a93a4bc8f68f 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -523,6 +523,34 @@ static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUN TYPE_SCTP_FWD_TSN, }; /*state_fn_t prsctp_chunk_event_table[][] */ +#define TYPE_SCTP_AUTH { \ + /* SCTP_STATE_EMPTY */ \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ + /* SCTP_STATE_CLOSED */ \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ + /* SCTP_STATE_COOKIE_WAIT */ \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + /* SCTP_STATE_COOKIE_ECHOED */ \ + TYPE_SCTP_FUNC(sctp_sf_eat_auth), \ + /* SCTP_STATE_ESTABLISHED */ \ + TYPE_SCTP_FUNC(sctp_sf_eat_auth), \ + /* SCTP_STATE_SHUTDOWN_PENDING */ \ + TYPE_SCTP_FUNC(sctp_sf_eat_auth), \ + /* SCTP_STATE_SHUTDOWN_SENT */ \ + TYPE_SCTP_FUNC(sctp_sf_eat_auth), \ + /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ + TYPE_SCTP_FUNC(sctp_sf_eat_auth), \ + /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ + TYPE_SCTP_FUNC(sctp_sf_eat_auth), \ +} /* TYPE_SCTP_AUTH */ + +/* The primary index for this table is the chunk type. + * The secondary index for this table is the state. + */ +static const sctp_sm_table_entry_t auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { + TYPE_SCTP_AUTH, +}; /*state_fn_t auth_chunk_event_table[][] */ + static const sctp_sm_table_entry_t chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = { /* SCTP_STATE_EMPTY */ @@ -976,5 +1004,10 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, return &addip_chunk_event_table[1][state]; } + if (sctp_auth_enable) { + if (cid == SCTP_CID_AUTH) + return &auth_chunk_event_table[0][state]; + } + return &chunk_event_table_unknown[state]; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 772fbfb4bfda..9c6a4b5f6264 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -107,23 +107,42 @@ static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; +extern struct kmem_cache *sctp_bucket_cachep; +extern int sysctl_sctp_mem[3]; +extern int sysctl_sctp_rmem[3]; +extern int sysctl_sctp_wmem[3]; + +static int sctp_memory_pressure; +static atomic_t sctp_memory_allocated; +static atomic_t sctp_sockets_allocated; + +static void sctp_enter_memory_pressure(void) +{ + sctp_memory_pressure = 1; +} + + /* Get the sndbuf space available at the time on the association. */ static inline int sctp_wspace(struct sctp_association *asoc) { - struct sock *sk = asoc->base.sk; - int amt = 0; + int amt; - if (asoc->ep->sndbuf_policy) { - /* make sure that no association uses more than sk_sndbuf */ - amt = sk->sk_sndbuf - asoc->sndbuf_used; + if (asoc->ep->sndbuf_policy) + amt = asoc->sndbuf_used; + else + amt = atomic_read(&asoc->base.sk->sk_wmem_alloc); + + if (amt >= asoc->base.sk->sk_sndbuf) { + if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK) + amt = 0; + else { + amt = sk_stream_wspace(asoc->base.sk); + if (amt < 0) + amt = 0; + } } else { - /* do socket level accounting */ - amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amt = asoc->base.sk->sk_sndbuf - amt; } - - if (amt < 0) - amt = 0; - return amt; } @@ -155,6 +174,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) sizeof(struct sctp_chunk); atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); + sk_charge_skb(sk, chunk->skb); } /* Verify that this is a valid address. */ @@ -2926,6 +2946,164 @@ static int sctp_setsockopt_maxburst(struct sock *sk, return 0; } +/* + * 7.1.18. Add a chunk that must be authenticated (SCTP_AUTH_CHUNK) + * + * This set option adds a chunk type that the user is requesting to be + * received only in an authenticated way. Changes to the list of chunks + * will only effect future associations on the socket. + */ +static int sctp_setsockopt_auth_chunk(struct sock *sk, + char __user *optval, + int optlen) +{ + struct sctp_authchunk val; + + if (optlen != sizeof(struct sctp_authchunk)) + return -EINVAL; + if (copy_from_user(&val, optval, optlen)) + return -EFAULT; + + switch (val.sauth_chunk) { + case SCTP_CID_INIT: + case SCTP_CID_INIT_ACK: + case SCTP_CID_SHUTDOWN_COMPLETE: + case SCTP_CID_AUTH: + return -EINVAL; + } + + /* add this chunk id to the endpoint */ + return sctp_auth_ep_add_chunkid(sctp_sk(sk)->ep, val.sauth_chunk); +} + +/* + * 7.1.19. Get or set the list of supported HMAC Identifiers (SCTP_HMAC_IDENT) + * + * This option gets or sets the list of HMAC algorithms that the local + * endpoint requires the peer to use. + */ +static int sctp_setsockopt_hmac_ident(struct sock *sk, + char __user *optval, + int optlen) +{ + struct sctp_hmacalgo *hmacs; + int err; + + if (optlen < sizeof(struct sctp_hmacalgo)) + return -EINVAL; + + hmacs = kmalloc(optlen, GFP_KERNEL); + if (!hmacs) + return -ENOMEM; + + if (copy_from_user(hmacs, optval, optlen)) { + err = -EFAULT; + goto out; + } + + if (hmacs->shmac_num_idents == 0 || + hmacs->shmac_num_idents > SCTP_AUTH_NUM_HMACS) { + err = -EINVAL; + goto out; + } + + err = sctp_auth_ep_set_hmacs(sctp_sk(sk)->ep, hmacs); +out: + kfree(hmacs); + return err; +} + +/* + * 7.1.20. Set a shared key (SCTP_AUTH_KEY) + * + * This option will set a shared secret key which is used to build an + * association shared key. + */ +static int sctp_setsockopt_auth_key(struct sock *sk, + char __user *optval, + int optlen) +{ + struct sctp_authkey *authkey; + struct sctp_association *asoc; + int ret; + + if (optlen <= sizeof(struct sctp_authkey)) + return -EINVAL; + + authkey = kmalloc(optlen, GFP_KERNEL); + if (!authkey) + return -ENOMEM; + + if (copy_from_user(authkey, optval, optlen)) { + ret = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, authkey->sca_assoc_id); + if (!asoc && authkey->sca_assoc_id && sctp_style(sk, UDP)) { + ret = -EINVAL; + goto out; + } + + ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey); +out: + kfree(authkey); + return ret; +} + +/* + * 7.1.21. Get or set the active shared key (SCTP_AUTH_ACTIVE_KEY) + * + * This option will get or set the active shared key to be used to build + * the association shared key. + */ +static int sctp_setsockopt_active_key(struct sock *sk, + char __user *optval, + int optlen) +{ + struct sctp_authkeyid val; + struct sctp_association *asoc; + + if (optlen != sizeof(struct sctp_authkeyid)) + return -EINVAL; + if (copy_from_user(&val, optval, optlen)) + return -EFAULT; + + asoc = sctp_id2assoc(sk, val.scact_assoc_id); + if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + return sctp_auth_set_active_key(sctp_sk(sk)->ep, asoc, + val.scact_keynumber); +} + +/* + * 7.1.22. Delete a shared key (SCTP_AUTH_DELETE_KEY) + * + * This set option will delete a shared secret key from use. + */ +static int sctp_setsockopt_del_key(struct sock *sk, + char __user *optval, + int optlen) +{ + struct sctp_authkeyid val; + struct sctp_association *asoc; + + if (optlen != sizeof(struct sctp_authkeyid)) + return -EINVAL; + if (copy_from_user(&val, optval, optlen)) + return -EFAULT; + + asoc = sctp_id2assoc(sk, val.scact_assoc_id); + if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + return sctp_auth_del_key_id(sctp_sk(sk)->ep, asoc, + val.scact_keynumber); + +} + + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3049,6 +3227,21 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_MAX_BURST: retval = sctp_setsockopt_maxburst(sk, optval, optlen); break; + case SCTP_AUTH_CHUNK: + retval = sctp_setsockopt_auth_chunk(sk, optval, optlen); + break; + case SCTP_HMAC_IDENT: + retval = sctp_setsockopt_hmac_ident(sk, optval, optlen); + break; + case SCTP_AUTH_KEY: + retval = sctp_setsockopt_auth_key(sk, optval, optlen); + break; + case SCTP_AUTH_ACTIVE_KEY: + retval = sctp_setsockopt_active_key(sk, optval, optlen); + break; + case SCTP_AUTH_DELETE_KEY: + retval = sctp_setsockopt_del_key(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -3293,6 +3486,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); + atomic_inc(&sctp_sockets_allocated); return 0; } @@ -3306,7 +3500,7 @@ SCTP_STATIC int sctp_destroy_sock(struct sock *sk) /* Release our hold on the endpoint. */ ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); - + atomic_dec(&sctp_sockets_allocated); return 0; } @@ -4819,6 +5013,118 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, return -ENOTSUPP; } +static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_hmac_algo_param *hmacs; + __u16 param_len; + + hmacs = sctp_sk(sk)->ep->auth_hmacs_list; + param_len = ntohs(hmacs->param_hdr.length); + + if (len < param_len) + return -EINVAL; + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, hmacs->hmac_ids, len)) + return -EFAULT; + + return 0; +} + +static int sctp_getsockopt_active_key(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_authkeyid val; + struct sctp_association *asoc; + + if (len < sizeof(struct sctp_authkeyid)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid))) + return -EFAULT; + + asoc = sctp_id2assoc(sk, val.scact_assoc_id); + if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) + val.scact_keynumber = asoc->active_key_id; + else + val.scact_keynumber = sctp_sk(sk)->ep->active_key_id; + + return 0; +} + +static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_authchunks val; + struct sctp_association *asoc; + struct sctp_chunks_param *ch; + char __user *to; + + if (len <= sizeof(struct sctp_authchunks)) + return -EINVAL; + + if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) + return -EFAULT; + + to = val.gauth_chunks; + asoc = sctp_id2assoc(sk, val.gauth_assoc_id); + if (!asoc) + return -EINVAL; + + ch = asoc->peer.peer_chunks; + + /* See if the user provided enough room for all the data */ + if (len < ntohs(ch->param_hdr.length)) + return -EINVAL; + + len = ntohs(ch->param_hdr.length); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(to, ch->chunks, len)) + return -EFAULT; + + return 0; +} + +static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_authchunks val; + struct sctp_association *asoc; + struct sctp_chunks_param *ch; + char __user *to; + + if (len <= sizeof(struct sctp_authchunks)) + return -EINVAL; + + if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) + return -EFAULT; + + to = val.gauth_chunks; + asoc = sctp_id2assoc(sk, val.gauth_assoc_id); + if (!asoc && val.gauth_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) + ch = (struct sctp_chunks_param*)asoc->c.auth_chunks; + else + ch = sctp_sk(sk)->ep->auth_chunk_list; + + if (len < ntohs(ch->param_hdr.length)) + return -EINVAL; + + len = ntohs(ch->param_hdr.length); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(to, ch->chunks, len)) + return -EFAULT; + + return 0; +} + SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -4942,6 +5248,25 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_MAX_BURST: retval = sctp_getsockopt_maxburst(sk, len, optval, optlen); break; + case SCTP_AUTH_KEY: + case SCTP_AUTH_CHUNK: + case SCTP_AUTH_DELETE_KEY: + retval = -EOPNOTSUPP; + break; + case SCTP_HMAC_IDENT: + retval = sctp_getsockopt_hmac_ident(sk, len, optval, optlen); + break; + case SCTP_AUTH_ACTIVE_KEY: + retval = sctp_getsockopt_active_key(sk, len, optval, optlen); + break; + case SCTP_PEER_AUTH_CHUNKS: + retval = sctp_getsockopt_peer_auth_chunks(sk, len, optval, + optlen); + break; + case SCTP_LOCAL_AUTH_CHUNKS: + retval = sctp_getsockopt_local_auth_chunks(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -4989,22 +5314,14 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) sctp_local_bh_disable(); if (snum == 0) { - /* Search for an available port. - * - * 'sctp_port_rover' was the last port assigned, so - * we start to search from 'sctp_port_rover + - * 1'. What we do is first check if port 'rover' is - * already in the hash table; if not, we use that; if - * it is, we try next. - */ - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover; - int index; - - sctp_spin_lock(&sctp_port_alloc_lock); - rover = sctp_port_rover; + /* Search for an available port. */ + int low, high, remaining, index; + unsigned int rover; + + inet_get_local_port_range(&low, &high); + remaining = (high - low) + 1; + rover = net_random() % remaining + low; + do { rover++; if ((rover < low) || (rover > high)) @@ -5019,8 +5336,6 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) next: sctp_spin_unlock(&head->lock); } while (--remaining > 0); - sctp_port_rover = rover; - sctp_spin_unlock(&sctp_port_alloc_lock); /* Exhausted local port range during search? */ ret = 1; @@ -5720,6 +6035,12 @@ static void sctp_wfree(struct sk_buff *skb) atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); + /* + * This undoes what is done via sk_charge_skb + */ + sk->sk_wmem_queued -= skb->truesize; + sk->sk_forward_alloc += skb->truesize; + sock_wfree(skb); __sctp_write_space(asoc); @@ -5737,6 +6058,11 @@ void sctp_sock_rfree(struct sk_buff *skb) struct sctp_ulpevent *event = sctp_skb2event(skb); atomic_sub(event->rmem_len, &sk->sk_rmem_alloc); + + /* + * Mimic the behavior of sk_stream_rfree + */ + sk->sk_forward_alloc += event->rmem_len; } @@ -6126,6 +6452,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_release_sock(newsk); } + /* This proto struct describes the ULP interface for SCTP. */ struct proto sctp_prot = { .name = "SCTP", @@ -6148,6 +6475,12 @@ struct proto sctp_prot = { .unhash = sctp_unhash, .get_port = sctp_get_port, .obj_size = sizeof(struct sctp_sock), + .sysctl_mem = sysctl_sctp_mem, + .sysctl_rmem = sysctl_sctp_rmem, + .sysctl_wmem = sysctl_sctp_wmem, + .memory_pressure = &sctp_memory_pressure, + .enter_memory_pressure = sctp_enter_memory_pressure, + .memory_allocated = &sctp_memory_allocated, }; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -6172,5 +6505,11 @@ struct proto sctpv6_prot = { .unhash = sctp_unhash, .get_port = sctp_get_port, .obj_size = sizeof(struct sctp6_sock), + .sysctl_mem = sysctl_sctp_mem, + .sysctl_rmem = sysctl_sctp_rmem, + .sysctl_wmem = sysctl_sctp_wmem, + .memory_pressure = &sctp_memory_pressure, + .enter_memory_pressure = sctp_enter_memory_pressure, + .memory_allocated = &sctp_memory_allocated, }; #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index e2c679baf912..0669778e4335 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -52,6 +52,10 @@ static int int_max = INT_MAX; static long sack_timer_min = 1; static long sack_timer_max = 500; +extern int sysctl_sctp_mem[3]; +extern int sysctl_sctp_rmem[3]; +extern int sysctl_sctp_wmem[3]; + static ctl_table sctp_table[] = { { .ctl_name = NET_SCTP_RTO_INITIAL, @@ -226,6 +230,39 @@ static ctl_table sctp_table[] = { .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sctp_mem", + .data = &sysctl_sctp_mem, + .maxlen = sizeof(sysctl_sctp_mem), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sctp_rmem", + .data = &sysctl_sctp_rmem, + .maxlen = sizeof(sysctl_sctp_rmem), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sctp_wmem", + .data = &sysctl_sctp_wmem, + .maxlen = sizeof(sysctl_sctp_wmem), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "auth_enable", + .data = &sctp_auth_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec + }, { .ctl_name = 0 } }; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index bfecb353ab3d..2c17c7efad46 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -685,6 +685,24 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, struct sctp_ulpevent *event = NULL; struct sk_buff *skb; size_t padding, len; + int rx_count; + + /* + * check to see if we need to make space for this + * new skb, expand the rcvbuffer if needed, or drop + * the frame + */ + if (asoc->ep->rcvbuf_policy) + rx_count = atomic_read(&asoc->rmem_alloc); + else + rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc); + + if (rx_count >= asoc->base.sk->sk_rcvbuf) { + + if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || + (!sk_stream_rmem_schedule(asoc->base.sk, chunk->skb))) + goto fail; + } /* Clone the original skb, sharing the data. */ skb = skb_clone(chunk->skb, gfp); @@ -795,6 +813,43 @@ fail: return NULL; } +struct sctp_ulpevent *sctp_ulpevent_make_authkey( + const struct sctp_association *asoc, __u16 key_id, + __u32 indication, gfp_t gfp) +{ + struct sctp_ulpevent *event; + struct sctp_authkey_event *ak; + struct sk_buff *skb; + + event = sctp_ulpevent_new(sizeof(struct sctp_authkey_event), + MSG_NOTIFICATION, gfp); + if (!event) + goto fail; + + skb = sctp_event2skb(event); + ak = (struct sctp_authkey_event *) + skb_put(skb, sizeof(struct sctp_authkey_event)); + + ak->auth_type = SCTP_AUTHENTICATION_EVENT; + ak->auth_flags = 0; + ak->auth_length = sizeof(struct sctp_authkey_event); + + ak->auth_keynumber = key_id; + ak->auth_altkeynumber = 0; + ak->auth_indication = indication; + + /* + * The association id field, holds the identifier for the association. + */ + sctp_ulpevent_set_owner(event, asoc); + ak->auth_assoc_id = sctp_assoc2id(asoc); + + return event; +fail: + return NULL; +} + + /* Return the notification type, assuming this is a notification * event. */ diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index fa0ba2a5564e..b9370956b187 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -1027,6 +1027,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, sctp_ulpq_partial_delivery(ulpq, chunk, gfp); } + sk_stream_mem_reclaim(asoc->base.sk); return; } diff --git a/net/socket.c b/net/socket.c index b09eb9036a17..379b3a390755 100644 --- a/net/socket.c +++ b/net/socket.c @@ -84,6 +84,7 @@ #include <linux/kmod.h> #include <linux/audit.h> #include <linux/wireless.h> +#include <linux/nsproxy.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -790,9 +791,9 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; -void brioctl_set(int (*hook) (unsigned int, void __user *)) +void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) { mutex_lock(&br_ioctl_mutex); br_ioctl_hook = hook; @@ -802,9 +803,9 @@ void brioctl_set(int (*hook) (unsigned int, void __user *)) EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); -static int (*vlan_ioctl_hook) (void __user *arg); +static int (*vlan_ioctl_hook) (struct net *, void __user *arg); -void vlan_ioctl_set(int (*hook) (void __user *)) +void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) { mutex_lock(&vlan_ioctl_mutex); vlan_ioctl_hook = hook; @@ -833,16 +834,20 @@ EXPORT_SYMBOL(dlci_ioctl_set); static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct socket *sock; + struct sock *sk; void __user *argp = (void __user *)arg; int pid, err; + struct net *net; sock = file->private_data; + sk = sock->sk; + net = sk->sk_net; if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); } else #ifdef CONFIG_WIRELESS_EXT if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); } else #endif /* CONFIG_WIRELESS_EXT */ switch (cmd) { @@ -868,7 +873,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) mutex_lock(&br_ioctl_mutex); if (br_ioctl_hook) - err = br_ioctl_hook(cmd, argp); + err = br_ioctl_hook(net, cmd, argp); mutex_unlock(&br_ioctl_mutex); break; case SIOCGIFVLAN: @@ -879,7 +884,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) mutex_lock(&vlan_ioctl_mutex); if (vlan_ioctl_hook) - err = vlan_ioctl_hook(argp); + err = vlan_ioctl_hook(net, argp); mutex_unlock(&vlan_ioctl_mutex); break; case SIOCADDDLCI: @@ -902,7 +907,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) * to the NIC driver. */ if (err == -ENOIOCTLCMD) - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); break; } return err; @@ -1071,7 +1076,7 @@ call_kill: return 0; } -static int __sock_create(int family, int type, int protocol, +static int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { int err; @@ -1147,7 +1152,7 @@ static int __sock_create(int family, int type, int protocol, /* Now protected by module ref count */ rcu_read_unlock(); - err = pf->create(sock, protocol); + err = pf->create(net, sock, protocol); if (err < 0) goto out_module_put; @@ -1186,12 +1191,12 @@ out_release: int sock_create(int family, int type, int protocol, struct socket **res) { - return __sock_create(family, type, protocol, res, 0); + return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } int sock_create_kern(int family, int type, int protocol, struct socket **res) { - return __sock_create(family, type, protocol, res, 1); + return __sock_create(&init_net, family, type, protocol, res, 1); } asmlinkage long sys_socket(int family, int type, int protocol) @@ -1924,7 +1929,7 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, * kernel msghdr to use the kernel address space) */ - uaddr = (void __user *)msg_sys.msg_name; + uaddr = (__force void __user *)msg_sys.msg_name; uaddr_len = COMPAT_NAMELEN(msg); if (MSG_CMSG_COMPAT & flags) { err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); @@ -2230,6 +2235,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) err = sock->ops->accept(sock, *newsock, flags); if (err < 0) { sock_release(*newsock); + *newsock = NULL; goto done; } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index ebe344f34d1a..8e05557414ce 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1218,23 +1218,15 @@ static const struct seq_operations cache_content_op = { static int content_open(struct inode *inode, struct file *file) { - int res; struct handle *han; struct cache_detail *cd = PDE(inode)->data; - han = kmalloc(sizeof(*han), GFP_KERNEL); + han = __seq_open_private(file, &cache_content_op, sizeof(*han)); if (han == NULL) return -ENOMEM; han->cd = cd; - - res = seq_open(file, &cache_content_op); - if (res) - kfree(han); - else - ((struct seq_file *)file->private_data)->private = han; - - return res; + return 0; } static const struct file_operations content_file_operations = { diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 74ba7d443dfc..4d4f3738b688 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -21,6 +21,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/metrics.h> +#include <net/net_namespace.h> #define RPCDBG_FACILITY RPCDBG_MISC @@ -265,7 +266,7 @@ rpc_proc_init(void) dprintk("RPC: registering /proc/net/rpc\n"); if (!proc_net_rpc) { struct proc_dir_entry *ent; - ent = proc_mkdir("rpc", proc_net); + ent = proc_mkdir("rpc", init_net.proc_net); if (ent) { ent->owner = THIS_MODULE; proc_net_rpc = ent; @@ -279,7 +280,7 @@ rpc_proc_exit(void) dprintk("RPC: unregistering /proc/net/rpc\n"); if (proc_net_rpc) { proc_net_rpc = NULL; - remove_proc_entry("net/rpc", NULL); + remove_proc_entry("rpc", init_net.proc_net); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 036ab520df21..c75bffeb89eb 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -19,6 +19,7 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ +#include <linux/kernel.h> #include <linux/sched.h> #include <linux/errno.h> #include <linux/fcntl.h> @@ -103,7 +104,7 @@ static struct lock_class_key svc_slock_key[2]; static inline void svc_reclassify_socket(struct socket *sock) { struct sock *sk = sock->sk; - BUG_ON(sk->sk_lock.owner != NULL); + BUG_ON(sock_owned_by_user(sk)); switch (sk->sk_family) { case AF_INET: sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", @@ -877,7 +878,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) } else { rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; rqstp->rq_respages = rqstp->rq_pages + 1 + - (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE; + DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE); } if (serv->sv_stats) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4ae7eed7f617..282efd447a61 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1186,7 +1186,7 @@ static struct lock_class_key xs_slock_key[2]; static inline void xs_reclassify_socket(struct socket *sock) { struct sock *sk = sock->sk; - BUG_ON(sk->sk_lock.owner != NULL); + BUG_ON(sock_owned_by_user(sk)); switch (sk->sk_family) { case AF_INET: sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS", diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 711ca4b1f051..3bbef2ab22ae 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -38,6 +38,7 @@ #include <net/tipc/tipc_bearer.h> #include <net/tipc/tipc_msg.h> #include <linux/netdevice.h> +#include <net/net_namespace.h> #define MAX_ETH_BEARERS 2 #define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI @@ -76,7 +77,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, skb_reset_network_header(clone); dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev; clone->dev = dev; - dev->hard_header(clone, dev, ETH_P_TIPC, + dev_hard_header(clone, dev, ETH_P_TIPC, &dest->dev_addr.eth_addr, dev->dev_addr, clone->len); dev_queue_xmit(clone); @@ -100,6 +101,11 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; u32 size; + if (dev->nd_net != &init_net) { + kfree_skb(buf); + return 0; + } + if (likely(eb_ptr->bearer)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { size = msg_size((struct tipc_msg *)buf->data); @@ -129,7 +135,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) /* Find device with specified name */ - for_each_netdev(pdev){ + for_each_netdev(&init_net, pdev){ if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { dev = pdev; break; @@ -192,6 +198,9 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + while ((eb_ptr->dev != dev)) { if (++eb_ptr == stop) return NOTIFY_DONE; /* couldn't find device */ @@ -234,12 +243,12 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, static char *eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) { unchar *addr = (unchar *)&a->dev_addr; + DECLARE_MAC_BUF(mac); if (str_size < 18) *str_buf = '\0'; else - sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x", - addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + sprintf(str_buf, "%s", print_mac(mac, addr)); return str_buf; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 84110172031e..e36b4b5a5222 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -162,13 +162,16 @@ static void advance_queue(struct tipc_sock *tsock) * * Returns 0 on success, errno otherwise */ -static int tipc_create(struct socket *sock, int protocol) +static int tipc_create(struct net *net, struct socket *sock, int protocol) { struct tipc_sock *tsock; struct tipc_port *port; struct sock *sk; u32 ref; + if (net != &init_net) + return -EAFNOSUPPORT; + if (unlikely(protocol != 0)) return -EPROTONOSUPPORT; @@ -198,7 +201,7 @@ static int tipc_create(struct socket *sock, int protocol) return -EPROTOTYPE; } - sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1); + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, 1); if (!sk) { tipc_deleteport(ref); return -ENOMEM; @@ -1372,7 +1375,7 @@ static int accept(struct socket *sock, struct socket *newsock, int flags) } buf = skb_peek(&sock->sk->sk_receive_queue); - res = tipc_create(newsock, 0); + res = tipc_create(sock->sk->sk_net, newsock, 0); if (!res) { struct tipc_sock *new_tsock = tipc_sk(newsock->sk); struct tipc_portid id; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a05c34260e70..2b57eaf66abc 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -103,6 +103,7 @@ #include <asm/uaccess.h> #include <linux/skbuff.h> #include <linux/netdevice.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/tcp_states.h> #include <net/af_unix.h> @@ -593,7 +594,7 @@ static struct proto unix_proto = { */ static struct lock_class_key af_unix_sk_receive_queue_lock_key; -static struct sock * unix_create1(struct socket *sock) +static struct sock * unix_create1(struct net *net, struct socket *sock) { struct sock *sk = NULL; struct unix_sock *u; @@ -601,7 +602,7 @@ static struct sock * unix_create1(struct socket *sock) if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) goto out; - sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1); + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, 1); if (!sk) goto out; @@ -627,8 +628,11 @@ out: return sk; } -static int unix_create(struct socket *sock, int protocol) +static int unix_create(struct net *net, struct socket *sock, int protocol) { + if (net != &init_net) + return -EAFNOSUPPORT; + if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; @@ -654,7 +658,7 @@ static int unix_create(struct socket *sock, int protocol) return -ESOCKTNOSUPPORT; } - return unix_create1(sock) ? 0 : -ENOMEM; + return unix_create1(net, sock) ? 0 : -ENOMEM; } static int unix_release(struct socket *sock) @@ -1038,7 +1042,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENOMEM; /* create new sock for complete connection */ - newsk = unix_create1(NULL); + newsk = unix_create1(sk->sk_net, NULL); if (newsk == NULL) goto out; @@ -2082,25 +2086,7 @@ static const struct seq_operations unix_seq_ops = { static int unix_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - int *iter = kmalloc(sizeof(int), GFP_KERNEL); - - if (!iter) - goto out; - - rc = seq_open(file, &unix_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = iter; - *iter = 0; -out: - return rc; -out_kfree: - kfree(iter); - goto out; + return seq_open_private(file, &unix_seq_ops, sizeof(int)); } static const struct file_operations unix_seq_fops = { @@ -2135,7 +2121,7 @@ static int __init af_unix_init(void) sock_register(&unix_family_ops); #ifdef CONFIG_PROC_FS - proc_net_fops_create("unix", 0, &unix_seq_fops); + proc_net_fops_create(&init_net, "unix", 0, &unix_seq_fops); #endif unix_sysctl_register(); out: @@ -2146,7 +2132,7 @@ static void __exit af_unix_exit(void) { sock_unregister(PF_UNIX); unix_sysctl_unregister(); - proc_net_remove("unix"); + proc_net_remove(&init_net, "unix"); proto_unregister(&unix_proto); } diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 236e7eaf1b7f..f2e54c3f064e 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -29,6 +29,7 @@ #include <linux/seq_file.h> #include <linux/smp_lock.h> +#include <net/net_namespace.h> #include <asm/io.h> #define PROC_STATS_FORMAT "%30s: %12lu\n" @@ -287,7 +288,7 @@ static const struct file_operations wandev_fops = { int __init wanrouter_proc_init(void) { struct proc_dir_entry *p; - proc_router = proc_mkdir(ROUTER_NAME, proc_net); + proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net); if (!proc_router) goto fail; @@ -303,7 +304,7 @@ int __init wanrouter_proc_init(void) fail_stat: remove_proc_entry("config", proc_router); fail_config: - remove_proc_entry(ROUTER_NAME, proc_net); + remove_proc_entry(ROUTER_NAME, init_net.proc_net); fail: return -ENOMEM; } @@ -316,7 +317,7 @@ void wanrouter_proc_cleanup(void) { remove_proc_entry("config", proc_router); remove_proc_entry("status", proc_router); - remove_proc_entry(ROUTER_NAME, proc_net); + remove_proc_entry(ROUTER_NAME, init_net.proc_net); } /* diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index a228d56a91b8..6426055a8be0 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -1,6 +1,19 @@ config CFG80211 tristate "Improved wireless configuration API" +config NL80211 + bool "nl80211 new netlink interface support" + depends on CFG80211 + default y + ---help--- + This option turns on the new netlink interface + (nl80211) support in cfg80211. + + If =n, drivers using mac80211 will be configured via + wireless extension support provided by that subsystem. + + If unsure, say Y. + config WIRELESS_EXT bool "Wireless extensions" default n @@ -10,7 +23,9 @@ config WIRELESS_EXT Wireless extensions will be replaced by cfg80211 and will be required only by legacy drivers that implement - wireless extension handlers. + wireless extension handlers. This option does not + affect the wireless-extension backward compatibility + code in cfg80211. Say N (if you can) unless you know you need wireless extensions for external modules. diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 092116e390b6..65710a42e5a7 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_WIRELESS_EXT) += wext.o obj-$(CONFIG_CFG80211) += cfg80211.o cfg80211-y += core.o sysfs.o radiotap.o +cfg80211-$(CONFIG_NL80211) += nl80211.o diff --git a/net/wireless/core.c b/net/wireless/core.c index 9771451eae21..febc33bc9c09 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -16,6 +16,7 @@ #include <net/genetlink.h> #include <net/cfg80211.h> #include <net/wireless.h> +#include "nl80211.h" #include "core.h" #include "sysfs.h" @@ -36,6 +37,146 @@ static int wiphy_counter; /* for debugfs */ static struct dentry *ieee80211_debugfs_dir; +/* requires cfg80211_drv_mutex to be held! */ +static struct cfg80211_registered_device *cfg80211_drv_by_wiphy(int wiphy) +{ + struct cfg80211_registered_device *result = NULL, *drv; + + list_for_each_entry(drv, &cfg80211_drv_list, list) { + if (drv->idx == wiphy) { + result = drv; + break; + } + } + + return result; +} + +/* requires cfg80211_drv_mutex to be held! */ +static struct cfg80211_registered_device * +__cfg80211_drv_from_info(struct genl_info *info) +{ + int ifindex; + struct cfg80211_registered_device *bywiphy = NULL, *byifidx = NULL; + struct net_device *dev; + int err = -EINVAL; + + if (info->attrs[NL80211_ATTR_WIPHY]) { + bywiphy = cfg80211_drv_by_wiphy( + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY])); + err = -ENODEV; + } + + if (info->attrs[NL80211_ATTR_IFINDEX]) { + ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); + dev = dev_get_by_index(&init_net, ifindex); + if (dev) { + if (dev->ieee80211_ptr) + byifidx = + wiphy_to_dev(dev->ieee80211_ptr->wiphy); + dev_put(dev); + } + err = -ENODEV; + } + + if (bywiphy && byifidx) { + if (bywiphy != byifidx) + return ERR_PTR(-EINVAL); + else + return bywiphy; /* == byifidx */ + } + if (bywiphy) + return bywiphy; + + if (byifidx) + return byifidx; + + return ERR_PTR(err); +} + +struct cfg80211_registered_device * +cfg80211_get_dev_from_info(struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + + mutex_lock(&cfg80211_drv_mutex); + drv = __cfg80211_drv_from_info(info); + + /* if it is not an error we grab the lock on + * it to assure it won't be going away while + * we operate on it */ + if (!IS_ERR(drv)) + mutex_lock(&drv->mtx); + + mutex_unlock(&cfg80211_drv_mutex); + + return drv; +} + +struct cfg80211_registered_device * +cfg80211_get_dev_from_ifindex(int ifindex) +{ + struct cfg80211_registered_device *drv = ERR_PTR(-ENODEV); + struct net_device *dev; + + mutex_lock(&cfg80211_drv_mutex); + dev = dev_get_by_index(&init_net, ifindex); + if (!dev) + goto out; + if (dev->ieee80211_ptr) { + drv = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + mutex_lock(&drv->mtx); + } else + drv = ERR_PTR(-ENODEV); + dev_put(dev); + out: + mutex_unlock(&cfg80211_drv_mutex); + return drv; +} + +void cfg80211_put_dev(struct cfg80211_registered_device *drv) +{ + BUG_ON(IS_ERR(drv)); + mutex_unlock(&drv->mtx); +} + +int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, + char *newname) +{ + int idx, taken = -1, result, digits; + + /* prohibit calling the thing phy%d when %d is not its number */ + sscanf(newname, PHY_NAME "%d%n", &idx, &taken); + if (taken == strlen(newname) && idx != rdev->idx) { + /* count number of places needed to print idx */ + digits = 1; + while (idx /= 10) + digits++; + /* + * deny the name if it is phy<idx> where <idx> is printed + * without leading zeroes. taken == strlen(newname) here + */ + if (taken == strlen(PHY_NAME) + digits) + return -EINVAL; + } + + /* this will check for collisions */ + result = device_rename(&rdev->wiphy.dev, newname); + if (result) + return result; + + if (!debugfs_rename(rdev->wiphy.debugfsdir->d_parent, + rdev->wiphy.debugfsdir, + rdev->wiphy.debugfsdir->d_parent, + newname)) + printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n", + newname); + + nl80211_notify_dev_rename(rdev); + + return 0; +} + /* exported functions */ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) @@ -204,10 +345,16 @@ static int cfg80211_init(void) if (err) goto out_fail_notifier; + err = nl80211_init(); + if (err) + goto out_fail_nl80211; + ieee80211_debugfs_dir = debugfs_create_dir("ieee80211", NULL); return 0; +out_fail_nl80211: + unregister_netdevice_notifier(&cfg80211_netdev_notifier); out_fail_notifier: wiphy_sysfs_exit(); out_fail_sysfs: @@ -218,6 +365,7 @@ subsys_initcall(cfg80211_init); static void cfg80211_exit(void) { debugfs_remove(ieee80211_debugfs_dir); + nl80211_exit(); unregister_netdevice_notifier(&cfg80211_netdev_notifier); wiphy_sysfs_exit(); } diff --git a/net/wireless/core.h b/net/wireless/core.h index 158db1edb92a..eb0f846b40df 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -43,7 +43,39 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) extern struct mutex cfg80211_drv_mutex; extern struct list_head cfg80211_drv_list; +/* + * This function returns a pointer to the driver + * that the genl_info item that is passed refers to. + * If successful, it returns non-NULL and also locks + * the driver's mutex! + * + * This means that you need to call cfg80211_put_dev() + * before being allowed to acquire &cfg80211_drv_mutex! + * + * This is necessary because we need to lock the global + * mutex to get an item off the list safely, and then + * we lock the drv mutex so it doesn't go away under us. + * + * We don't want to keep cfg80211_drv_mutex locked + * for all the time in order to allow requests on + * other interfaces to go through at the same time. + * + * The result of this can be a PTR_ERR and hence must + * be checked with IS_ERR() for errors. + */ +extern struct cfg80211_registered_device * +cfg80211_get_dev_from_info(struct genl_info *info); + +/* identical to cfg80211_get_dev_from_info but only operate on ifindex */ +extern struct cfg80211_registered_device * +cfg80211_get_dev_from_ifindex(int ifindex); + +extern void cfg80211_put_dev(struct cfg80211_registered_device *drv); + /* free object */ extern void cfg80211_dev_free(struct cfg80211_registered_device *drv); +extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv, + char *newname); + #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c new file mode 100644 index 000000000000..48b0d453e4e1 --- /dev/null +++ b/net/wireless/nl80211.c @@ -0,0 +1,431 @@ +/* + * This is the new netlink-based wireless configuration interface. + * + * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net> + */ + +#include <linux/if.h> +#include <linux/module.h> +#include <linux/err.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/if_ether.h> +#include <linux/ieee80211.h> +#include <linux/nl80211.h> +#include <linux/rtnetlink.h> +#include <linux/netlink.h> +#include <net/genetlink.h> +#include <net/cfg80211.h> +#include "core.h" +#include "nl80211.h" + +/* the netlink family */ +static struct genl_family nl80211_fam = { + .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ + .name = "nl80211", /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ + .maxattr = NL80211_ATTR_MAX, +}; + +/* internal helper: get drv and dev */ +static int get_drv_dev_by_info_ifindex(struct genl_info *info, + struct cfg80211_registered_device **drv, + struct net_device **dev) +{ + int ifindex; + + if (!info->attrs[NL80211_ATTR_IFINDEX]) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); + *dev = dev_get_by_index(&init_net, ifindex); + if (!*dev) + return -ENODEV; + + *drv = cfg80211_get_dev_from_ifindex(ifindex); + if (IS_ERR(*drv)) { + dev_put(*dev); + return PTR_ERR(*drv); + } + + return 0; +} + +/* policy for the attributes */ +static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { + [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, + .len = BUS_ID_SIZE-1 }, + + [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 }, + [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, + [NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, +}; + +/* message building helper */ +static inline void *nl80211hdr_put(struct sk_buff *skb, u32 pid, u32 seq, + int flags, u8 cmd) +{ + /* since there is no private header just add the generic one */ + return genlmsg_put(skb, pid, seq, &nl80211_fam, flags, cmd); +} + +/* netlink command implementations */ + +static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, + struct cfg80211_registered_device *dev) +{ + void *hdr; + + hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); + if (!hdr) + return -1; + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx); + NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); + return genlmsg_end(msg, hdr); + + nla_put_failure: + return genlmsg_cancel(msg, hdr); +} + +static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx = 0; + int start = cb->args[0]; + struct cfg80211_registered_device *dev; + + mutex_lock(&cfg80211_drv_mutex); + list_for_each_entry(dev, &cfg80211_drv_list, list) { + if (++idx < start) + continue; + if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + dev) < 0) + break; + } + mutex_unlock(&cfg80211_drv_mutex); + + cb->args[0] = idx; + + return skb->len; +} + +static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + struct cfg80211_registered_device *dev; + + dev = cfg80211_get_dev_from_info(info); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + goto out_err; + + if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0) + goto out_free; + + cfg80211_put_dev(dev); + + return genlmsg_unicast(msg, info->snd_pid); + + out_free: + nlmsg_free(msg); + out_err: + cfg80211_put_dev(dev); + return -ENOBUFS; +} + +static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + int result; + + if (!info->attrs[NL80211_ATTR_WIPHY_NAME]) + return -EINVAL; + + rdev = cfg80211_get_dev_from_info(info); + if (IS_ERR(rdev)) + return PTR_ERR(rdev); + + result = cfg80211_dev_rename(rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); + + cfg80211_put_dev(rdev); + return result; +} + + +static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, + struct net_device *dev) +{ + void *hdr; + + hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE); + if (!hdr) + return -1; + + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); + NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, dev->name); + /* TODO: interface type */ + return genlmsg_end(msg, hdr); + + nla_put_failure: + return genlmsg_cancel(msg, hdr); +} + +static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb) +{ + int wp_idx = 0; + int if_idx = 0; + int wp_start = cb->args[0]; + int if_start = cb->args[1]; + struct cfg80211_registered_device *dev; + struct wireless_dev *wdev; + + mutex_lock(&cfg80211_drv_mutex); + list_for_each_entry(dev, &cfg80211_drv_list, list) { + if (++wp_idx < wp_start) + continue; + if_idx = 0; + + mutex_lock(&dev->devlist_mtx); + list_for_each_entry(wdev, &dev->netdev_list, list) { + if (++if_idx < if_start) + continue; + if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + wdev->netdev) < 0) + break; + } + mutex_unlock(&dev->devlist_mtx); + } + mutex_unlock(&cfg80211_drv_mutex); + + cb->args[0] = wp_idx; + cb->args[1] = if_idx; + + return skb->len; +} + +static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + struct cfg80211_registered_device *dev; + struct net_device *netdev; + int err; + + err = get_drv_dev_by_info_ifindex(info, &dev, &netdev); + if (err) + return err; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + goto out_err; + + if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, netdev) < 0) + goto out_free; + + dev_put(netdev); + cfg80211_put_dev(dev); + + return genlmsg_unicast(msg, info->snd_pid); + + out_free: + nlmsg_free(msg); + out_err: + dev_put(netdev); + cfg80211_put_dev(dev); + return -ENOBUFS; +} + +static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int err, ifindex; + enum nl80211_iftype type; + struct net_device *dev; + + if (info->attrs[NL80211_ATTR_IFTYPE]) { + type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); + if (type > NL80211_IFTYPE_MAX) + return -EINVAL; + } else + return -EINVAL; + + err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + if (err) + return err; + ifindex = dev->ifindex; + dev_put(dev); + + if (!drv->ops->change_virtual_intf) { + err = -EOPNOTSUPP; + goto unlock; + } + + rtnl_lock(); + err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, type); + rtnl_unlock(); + + unlock: + cfg80211_put_dev(drv); + return err; +} + +static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int err; + enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; + + if (!info->attrs[NL80211_ATTR_IFNAME]) + return -EINVAL; + + if (info->attrs[NL80211_ATTR_IFTYPE]) { + type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); + if (type > NL80211_IFTYPE_MAX) + return -EINVAL; + } + + drv = cfg80211_get_dev_from_info(info); + if (IS_ERR(drv)) + return PTR_ERR(drv); + + if (!drv->ops->add_virtual_intf) { + err = -EOPNOTSUPP; + goto unlock; + } + + rtnl_lock(); + err = drv->ops->add_virtual_intf(&drv->wiphy, + nla_data(info->attrs[NL80211_ATTR_IFNAME]), type); + rtnl_unlock(); + + unlock: + cfg80211_put_dev(drv); + return err; +} + +static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int ifindex, err; + struct net_device *dev; + + err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + if (err) + return err; + ifindex = dev->ifindex; + dev_put(dev); + + if (!drv->ops->del_virtual_intf) { + err = -EOPNOTSUPP; + goto out; + } + + rtnl_lock(); + err = drv->ops->del_virtual_intf(&drv->wiphy, ifindex); + rtnl_unlock(); + + out: + cfg80211_put_dev(drv); + return err; +} + +static struct genl_ops nl80211_ops[] = { + { + .cmd = NL80211_CMD_GET_WIPHY, + .doit = nl80211_get_wiphy, + .dumpit = nl80211_dump_wiphy, + .policy = nl80211_policy, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = NL80211_CMD_SET_WIPHY, + .doit = nl80211_set_wiphy, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_GET_INTERFACE, + .doit = nl80211_get_interface, + .dumpit = nl80211_dump_interface, + .policy = nl80211_policy, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = NL80211_CMD_SET_INTERFACE, + .doit = nl80211_set_interface, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_NEW_INTERFACE, + .doit = nl80211_new_interface, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_DEL_INTERFACE, + .doit = nl80211_del_interface, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + +/* multicast groups */ +static struct genl_multicast_group nl80211_config_mcgrp = { + .name = "config", +}; + +/* notification functions */ + +void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) +{ + struct sk_buff *msg; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return; + + if (nl80211_send_wiphy(msg, 0, 0, 0, rdev) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_config_mcgrp.id, GFP_KERNEL); +} + +/* initialisation/exit functions */ + +int nl80211_init(void) +{ + int err, i; + + err = genl_register_family(&nl80211_fam); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(nl80211_ops); i++) { + err = genl_register_ops(&nl80211_fam, &nl80211_ops[i]); + if (err) + goto err_out; + } + + err = genl_register_mc_group(&nl80211_fam, &nl80211_config_mcgrp); + if (err) + goto err_out; + + return 0; + err_out: + genl_unregister_family(&nl80211_fam); + return err; +} + +void nl80211_exit(void) +{ + genl_unregister_family(&nl80211_fam); +} diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h new file mode 100644 index 000000000000..f3ea5c029aee --- /dev/null +++ b/net/wireless/nl80211.h @@ -0,0 +1,24 @@ +#ifndef __NET_WIRELESS_NL80211_H +#define __NET_WIRELESS_NL80211_H + +#include "core.h" + +#ifdef CONFIG_NL80211 +extern int nl80211_init(void); +extern void nl80211_exit(void); +extern void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev); +#else +static inline int nl80211_init(void) +{ + return 0; +} +static inline void nl80211_exit(void) +{ +} +static inline void nl80211_notify_dev_rename( + struct cfg80211_registered_device *rdev) +{ +} +#endif /* CONFIG_NL80211 */ + +#endif /* __NET_WIRELESS_NL80211_H */ diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index 68c11d099917..28fbd0b0b568 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -161,7 +161,11 @@ int ieee80211_radiotap_iterator_next( [IEEE80211_RADIOTAP_DBM_TX_POWER] = 0x11, [IEEE80211_RADIOTAP_ANTENNA] = 0x11, [IEEE80211_RADIOTAP_DB_ANTSIGNAL] = 0x11, - [IEEE80211_RADIOTAP_DB_ANTNOISE] = 0x11 + [IEEE80211_RADIOTAP_DB_ANTNOISE] = 0x11, + [IEEE80211_RADIOTAP_RX_FLAGS] = 0x22, + [IEEE80211_RADIOTAP_TX_FLAGS] = 0x22, + [IEEE80211_RADIOTAP_RTS_RETRIES] = 0x11, + [IEEE80211_RADIOTAP_DATA_RETRIES] = 0x11, /* * add more here as they are defined in * include/net/ieee80211_radiotap.h diff --git a/net/wireless/wext.c b/net/wireless/wext.c index d6aaf65192e9..85e5f9dd0d8e 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -93,6 +93,7 @@ #include <linux/if_arp.h> /* ARPHRD_ETHER */ #include <linux/etherdevice.h> /* compare_ether_addr */ #include <linux/interrupt.h> +#include <net/net_namespace.h> #include <linux/wireless.h> /* Pretty obvious */ #include <net/iw_handler.h> /* New driver API */ @@ -672,7 +673,26 @@ static const struct seq_operations wireless_seq_ops = { static int wireless_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &wireless_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &wireless_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_proc_net(inode); + if (!seq->private) { + seq_release(inode, file); + res = -ENXIO; + } + } + return res; +} + +static int wireless_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations wireless_seq_fops = { @@ -680,17 +700,22 @@ static const struct file_operations wireless_seq_fops = { .open = wireless_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = wireless_seq_release, }; -int __init wext_proc_init(void) +int wext_proc_init(struct net *net) { /* Create /proc/net/wireless entry */ - if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops)) + if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops)) return -ENOMEM; return 0; } + +void wext_proc_exit(struct net *net) +{ + proc_net_remove(net, "wireless"); +} #endif /* CONFIG_PROC_FS */ /************************** IOCTL SUPPORT **************************/ @@ -1010,7 +1035,7 @@ static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr, * Main IOCTl dispatcher. * Check the type of IOCTL and call the appropriate wrapper... */ -static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) +static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd) { struct net_device *dev; iw_handler handler; @@ -1019,7 +1044,7 @@ static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) * The copy_to/from_user() of ifr is also dealt with in there */ /* Make sure the device exist */ - if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) + if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL) return -ENODEV; /* A bunch of special cases, then the generic case... @@ -1053,7 +1078,7 @@ static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) } /* entry point from dev ioctl */ -int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, +int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, void __user *arg) { int ret; @@ -1065,9 +1090,9 @@ int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, && !capable(CAP_NET_ADMIN)) return -EPERM; - dev_load(ifr->ifr_name); + dev_load(net, ifr->ifr_name); rtnl_lock(); - ret = wireless_process_ioctl(ifr, cmd); + ret = wireless_process_ioctl(net, ifr, cmd); rtnl_unlock(); if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq))) return -EFAULT; @@ -1129,10 +1154,12 @@ static int rtnetlink_fill_iwinfo(struct sk_buff *skb, struct net_device *dev, { struct ifinfomsg *r; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r)); - r = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, 0, 0, type, sizeof(*r), 0); + if (nlh == NULL) + return -EMSGSIZE; + + r = nlmsg_data(nlh); r->ifi_family = AF_UNSPEC; r->__ifi_pad = 0; r->ifi_type = dev->type; @@ -1141,15 +1168,13 @@ static int rtnetlink_fill_iwinfo(struct sk_buff *skb, struct net_device *dev, r->ifi_change = 0; /* Wireless changes don't affect those flags */ /* Add the wireless events in the netlink packet */ - RTA_PUT(skb, IFLA_WIRELESS, event_len, event); + NLA_PUT(skb, IFLA_WIRELESS, event_len, event); - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); -nlmsg_failure: -rtattr_failure: - nlmsg_trim(skb, b); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } /* ---------------------------------------------------------------- */ @@ -1162,17 +1187,19 @@ rtattr_failure: static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len) { struct sk_buff *skb; - int size = NLMSG_GOODSIZE; + int err; - skb = alloc_skb(size, GFP_ATOMIC); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return; - if (rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK, - event, event_len) < 0) { + err = rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK, event, event_len); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); kfree_skb(skb); return; } + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; skb_queue_tail(&wireless_nlevent_queue, skb); tasklet_schedule(&wireless_nlevent_tasklet); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 479927cb45ca..fc416f9606a9 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -191,6 +191,9 @@ static int x25_device_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct x25_neigh *nb; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (dev->type == ARPHRD_X25 #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) || dev->type == ARPHRD_ETHER @@ -466,10 +469,10 @@ static struct proto x25_proto = { .obj_size = sizeof(struct x25_sock), }; -static struct sock *x25_alloc_socket(void) +static struct sock *x25_alloc_socket(struct net *net) { struct x25_sock *x25; - struct sock *sk = sk_alloc(AF_X25, GFP_ATOMIC, &x25_proto, 1); + struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, 1); if (!sk) goto out; @@ -485,17 +488,20 @@ out: return sk; } -static int x25_create(struct socket *sock, int protocol) +static int x25_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct x25_sock *x25; int rc = -ESOCKTNOSUPPORT; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol) goto out; rc = -ENOMEM; - if ((sk = x25_alloc_socket()) == NULL) + if ((sk = x25_alloc_socket(net)) == NULL) goto out; x25 = x25_sk(sk); @@ -543,7 +549,7 @@ static struct sock *x25_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) goto out; - if ((sk = x25_alloc_socket()) == NULL) + if ((sk = x25_alloc_socket(osk->sk_net)) == NULL) goto out; x25 = x25_sk(sk); diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 848a6b6f90a6..f0679d283110 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -95,6 +95,9 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, struct sk_buff *nskb; struct x25_neigh *nb; + if (dev->nd_net != &init_net) + goto drop; + nskb = skb_copy(skb, GFP_ATOMIC); if (!nskb) goto drop; diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 7405b9c5b7f2..7d55e50c936f 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/x25.h> @@ -301,7 +302,7 @@ int __init x25_proc_init(void) struct proc_dir_entry *p; int rc = -ENOMEM; - x25_proc_dir = proc_mkdir("x25", proc_net); + x25_proc_dir = proc_mkdir("x25", init_net.proc_net); if (!x25_proc_dir) goto out; @@ -328,7 +329,7 @@ out_forward: out_socket: remove_proc_entry("route", x25_proc_dir); out_route: - remove_proc_entry("x25", proc_net); + remove_proc_entry("x25", init_net.proc_net); goto out; } @@ -337,7 +338,7 @@ void __exit x25_proc_exit(void) remove_proc_entry("forward", x25_proc_dir); remove_proc_entry("route", x25_proc_dir); remove_proc_entry("socket", x25_proc_dir); - remove_proc_entry("x25", proc_net); + remove_proc_entry("x25", init_net.proc_net); } #else /* CONFIG_PROC_FS */ diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index 060fcfaa2f47..86b5b4da097c 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -129,7 +129,7 @@ void x25_route_device_down(struct net_device *dev) */ struct net_device *x25_dev_get(char *devname) { - struct net_device *dev = dev_get_by_name(devname); + struct net_device *dev = dev_get_by_name(&init_net, devname); if (dev && (!(dev->flags & IFF_UP) || (dev->type != ARPHRD_X25 diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index de3c1a625a46..45744a3d3a51 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -3,6 +3,6 @@ # obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ - xfrm_input.o xfrm_algo.o + xfrm_input.o xfrm_output.o xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c new file mode 100644 index 000000000000..0eb3377602e9 --- /dev/null +++ b/net/xfrm/xfrm_output.c @@ -0,0 +1,95 @@ +/* + * xfrm_output.c - Common IPsec encapsulation code. + * + * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <net/dst.h> +#include <net/xfrm.h> + +static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) +{ + int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev) + - skb_headroom(skb); + + if (nhead > 0) + return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + + /* Check tail too... */ + return 0; +} + +static int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = xfrm_state_check_expire(x); + if (err < 0) + goto err; + err = xfrm_state_check_space(x, skb); +err: + return err; +} + +int xfrm_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct xfrm_state *x = dst->xfrm; + int err; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); + if (err) + goto error_nolock; + } + + do { + spin_lock_bh(&x->lock); + err = xfrm_state_check(x, skb); + if (err) + goto error; + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq = ++x->replay.oseq; + if (xfrm_aevent_is_on()) + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + } + + err = x->mode->output(x, skb); + if (err) + goto error; + + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock_bh(&x->lock); + + err = x->type->output(x, skb); + if (err) + goto error_nolock; + + if (!(skb->dst = dst_pop(dst))) { + err = -EHOSTUNREACH; + goto error_nolock; + } + dst = skb->dst; + x = dst->xfrm; + } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); + + err = 0; + +error_nolock: + return err; +error: + spin_unlock_bh(&x->lock); + goto error_nolock; +} +EXPORT_SYMBOL_GPL(xfrm_output); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7012891d39f2..af27c193697c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -23,7 +23,6 @@ #include <linux/netfilter.h> #include <linux/module.h> #include <linux/cache.h> -#include <linux/audit.h> #include <net/xfrm.h> #include <net/ip.h> @@ -850,10 +849,9 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) continue; err = security_xfrm_policy_delete(pol); if (err) { - xfrm_audit_log(audit_info->loginuid, - audit_info->secid, - AUDIT_MAC_IPSEC_DELSPD, 0, - pol, NULL); + xfrm_audit_policy_delete(pol, 0, + audit_info->loginuid, + audit_info->secid); return err; } } @@ -865,10 +863,9 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) continue; err = security_xfrm_policy_delete(pol); if (err) { - xfrm_audit_log(audit_info->loginuid, - audit_info->secid, - AUDIT_MAC_IPSEC_DELSPD, - 0, pol, NULL); + xfrm_audit_policy_delete(pol, 0, + audit_info->loginuid, + audit_info->secid); return err; } } @@ -909,8 +906,8 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); - xfrm_audit_log(audit_info->loginuid, audit_info->secid, - AUDIT_MAC_IPSEC_DELSPD, 1, pol, NULL); + xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, + audit_info->secid); xfrm_policy_kill(pol); killed++; @@ -930,11 +927,9 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); - xfrm_audit_log(audit_info->loginuid, - audit_info->secid, - AUDIT_MAC_IPSEC_DELSPD, 1, - pol, NULL); - + xfrm_audit_policy_delete(pol, 1, + audit_info->loginuid, + audit_info->secid); xfrm_policy_kill(pol); killed++; @@ -1477,7 +1472,7 @@ restart: pol_dead = 0; xfrm_nr = 0; - if (sk && sk->sk_policy[1]) { + if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); if (IS_ERR(policy)) return PTR_ERR(policy); @@ -1687,17 +1682,13 @@ static inline int xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) { struct xfrm_state *x; - int err; if (!skb->sp || idx < 0 || idx >= skb->sp->len) return 0; x = skb->sp->xvec[idx]; if (!x->type->reject) return 0; - xfrm_state_hold(x); - err = x->type->reject(x, skb, fl); - xfrm_state_put(x); - return err; + return x->type->reject(x, skb, fl); } /* When skb is transformed back to its "native" form, we have to @@ -1954,8 +1945,8 @@ static int stale_bundle(struct dst_entry *dst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + dst->dev = init_net.loopback_dev; + dev_hold(dst->dev); dev_put(dev); } } @@ -2150,123 +2141,6 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, EXPORT_SYMBOL(xfrm_bundle_ok); -#ifdef CONFIG_AUDITSYSCALL -/* Audit addition and deletion of SAs and ipsec policy */ - -void xfrm_audit_log(uid_t auid, u32 sid, int type, int result, - struct xfrm_policy *xp, struct xfrm_state *x) -{ - - char *secctx; - u32 secctx_len; - struct xfrm_sec_ctx *sctx = NULL; - struct audit_buffer *audit_buf; - int family; - extern int audit_enabled; - - if (audit_enabled == 0) - return; - - BUG_ON((type == AUDIT_MAC_IPSEC_ADDSA || - type == AUDIT_MAC_IPSEC_DELSA) && !x); - BUG_ON((type == AUDIT_MAC_IPSEC_ADDSPD || - type == AUDIT_MAC_IPSEC_DELSPD) && !xp); - - audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type); - if (audit_buf == NULL) - return; - - switch(type) { - case AUDIT_MAC_IPSEC_ADDSA: - audit_log_format(audit_buf, "SAD add: auid=%u", auid); - break; - case AUDIT_MAC_IPSEC_DELSA: - audit_log_format(audit_buf, "SAD delete: auid=%u", auid); - break; - case AUDIT_MAC_IPSEC_ADDSPD: - audit_log_format(audit_buf, "SPD add: auid=%u", auid); - break; - case AUDIT_MAC_IPSEC_DELSPD: - audit_log_format(audit_buf, "SPD delete: auid=%u", auid); - break; - default: - return; - } - - if (sid != 0 && - security_secid_to_secctx(sid, &secctx, &secctx_len) == 0) { - audit_log_format(audit_buf, " subj=%s", secctx); - security_release_secctx(secctx, secctx_len); - } else - audit_log_task_context(audit_buf); - - if (xp) { - family = xp->selector.family; - if (xp->security) - sctx = xp->security; - } else { - family = x->props.family; - if (x->security) - sctx = x->security; - } - - if (sctx) - audit_log_format(audit_buf, - " sec_alg=%u sec_doi=%u sec_obj=%s", - sctx->ctx_alg, sctx->ctx_doi, sctx->ctx_str); - - switch(family) { - case AF_INET: - { - struct in_addr saddr, daddr; - if (xp) { - saddr.s_addr = xp->selector.saddr.a4; - daddr.s_addr = xp->selector.daddr.a4; - } else { - saddr.s_addr = x->props.saddr.a4; - daddr.s_addr = x->id.daddr.a4; - } - audit_log_format(audit_buf, - " src=%u.%u.%u.%u dst=%u.%u.%u.%u", - NIPQUAD(saddr), NIPQUAD(daddr)); - } - break; - case AF_INET6: - { - struct in6_addr saddr6, daddr6; - if (xp) { - memcpy(&saddr6, xp->selector.saddr.a6, - sizeof(struct in6_addr)); - memcpy(&daddr6, xp->selector.daddr.a6, - sizeof(struct in6_addr)); - } else { - memcpy(&saddr6, x->props.saddr.a6, - sizeof(struct in6_addr)); - memcpy(&daddr6, x->id.daddr.a6, - sizeof(struct in6_addr)); - } - audit_log_format(audit_buf, - " src=" NIP6_FMT " dst=" NIP6_FMT, - NIP6(saddr6), NIP6(daddr6)); - } - break; - } - - if (x) - audit_log_format(audit_buf, " spi=%lu(0x%lx) protocol=%s", - (unsigned long)ntohl(x->id.spi), - (unsigned long)ntohl(x->id.spi), - x->id.proto == IPPROTO_AH ? "AH" : - (x->id.proto == IPPROTO_ESP ? - "ESP" : "IPCOMP")); - - audit_log_format(audit_buf, " res=%u", result); - audit_log_end(audit_buf); -} - -EXPORT_SYMBOL(xfrm_audit_log); -#endif /* CONFIG_AUDITSYSCALL */ - int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { int err = 0; @@ -2358,6 +2232,11 @@ static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo) static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_DOWN: xfrm_flush_bundles(); @@ -2412,6 +2291,72 @@ void __init xfrm_init(void) xfrm_input_init(); } +#ifdef CONFIG_AUDITSYSCALL +static inline void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, + struct audit_buffer *audit_buf) +{ + if (xp->security) + audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", + xp->security->ctx_alg, xp->security->ctx_doi, + xp->security->ctx_str); + + switch(xp->selector.family) { + case AF_INET: + audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u", + NIPQUAD(xp->selector.saddr.a4), + NIPQUAD(xp->selector.daddr.a4)); + break; + case AF_INET6: + { + struct in6_addr saddr6, daddr6; + + memcpy(&saddr6, xp->selector.saddr.a6, + sizeof(struct in6_addr)); + memcpy(&daddr6, xp->selector.daddr.a6, + sizeof(struct in6_addr)); + audit_log_format(audit_buf, + " src=" NIP6_FMT " dst=" NIP6_FMT, + NIP6(saddr6), NIP6(daddr6)); + } + break; + } +} + +void +xfrm_audit_policy_add(struct xfrm_policy *xp, int result, u32 auid, u32 sid) +{ + struct audit_buffer *audit_buf; + extern int audit_enabled; + + if (audit_enabled == 0) + return; + audit_buf = xfrm_audit_start(sid, auid); + if (audit_buf == NULL) + return; + audit_log_format(audit_buf, " op=SPD-add res=%u", result); + xfrm_audit_common_policyinfo(xp, audit_buf); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); + +void +xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, u32 auid, u32 sid) +{ + struct audit_buffer *audit_buf; + extern int audit_enabled; + + if (audit_enabled == 0) + return; + audit_buf = xfrm_audit_start(sid, auid); + if (audit_buf == NULL) + return; + audit_log_format(audit_buf, " op=SPD-delete res=%u", result); + xfrm_audit_common_policyinfo(xp, audit_buf); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); +#endif + #ifdef CONFIG_XFRM_MIGRATE static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, struct xfrm_selector *sel_tgt) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d4356e6f7f9b..344f0a6abec5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -19,7 +19,6 @@ #include <linux/ipsec.h> #include <linux/module.h> #include <linux/cache.h> -#include <linux/audit.h> #include <asm/uaccess.h> #include "xfrm_hash.h" @@ -301,8 +300,8 @@ expired: if (!err && x->id.spi) km_state_expired(x, 1, 0); - xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, - AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + xfrm_audit_state_delete(x, err ? 0 : 1, + audit_get_loginuid(current->audit_context), 0); out: spin_unlock(&x->lock); @@ -403,11 +402,9 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { - xfrm_audit_log(audit_info->loginuid, - audit_info->secid, - AUDIT_MAC_IPSEC_DELSA, - 0, NULL, x); - + xfrm_audit_state_delete(x, 0, + audit_info->loginuid, + audit_info->secid); return err; } } @@ -443,10 +440,9 @@ restart: spin_unlock_bh(&xfrm_state_lock); err = xfrm_state_delete(x); - xfrm_audit_log(audit_info->loginuid, - audit_info->secid, - AUDIT_MAC_IPSEC_DELSA, - err ? 0 : 1, NULL, x); + xfrm_audit_state_delete(x, err ? 0 : 1, + audit_info->loginuid, + audit_info->secid); xfrm_state_put(x); spin_lock_bh(&xfrm_state_lock); @@ -852,7 +848,6 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); - wake_up(&km_waitq); xfrm_state_num++; @@ -1156,29 +1151,6 @@ int xfrm_state_check_expire(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_check_expire); -static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) -{ - int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev) - - skb_headroom(skb); - - if (nhead > 0) - return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); - - /* Check tail too... */ - return 0; -} - -int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb) -{ - int err = xfrm_state_check_expire(x); - if (err < 0) - goto err; - err = xfrm_state_check_space(x, skb); -err: - return err; -} -EXPORT_SYMBOL(xfrm_state_check); - struct xfrm_state * xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) @@ -1303,26 +1275,33 @@ u32 xfrm_get_acqseq(void) } EXPORT_SYMBOL(xfrm_get_acqseq); -void -xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi) +int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) { unsigned int h; struct xfrm_state *x0; + int err = -ENOENT; + __be32 minspi = htonl(low); + __be32 maxspi = htonl(high); + + spin_lock_bh(&x->lock); + if (x->km.state == XFRM_STATE_DEAD) + goto unlock; + err = 0; if (x->id.spi) - return; + goto unlock; + + err = -ENOENT; if (minspi == maxspi) { x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family); if (x0) { xfrm_state_put(x0); - return; + goto unlock; } x->id.spi = minspi; } else { u32 spi = 0; - u32 low = ntohl(minspi); - u32 high = ntohl(maxspi); for (h=0; h<high-low+1; h++) { spi = low + net_random()%(high-low+1); x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family); @@ -1338,8 +1317,14 @@ xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi) h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); spin_unlock_bh(&xfrm_state_lock); - wake_up(&km_waitq); + + err = 0; } + +unlock: + spin_unlock_bh(&x->lock); + + return err; } EXPORT_SYMBOL(xfrm_alloc_spi); @@ -1424,7 +1409,6 @@ void xfrm_replay_notify(struct xfrm_state *x, int event) !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) x->xflags &= ~XFRM_TIME_DEFER; } -EXPORT_SYMBOL(xfrm_replay_notify); static void xfrm_replay_timer_handler(unsigned long data) { @@ -1821,3 +1805,72 @@ void __init xfrm_state_init(void) INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); } +#ifdef CONFIG_AUDITSYSCALL +static inline void xfrm_audit_common_stateinfo(struct xfrm_state *x, + struct audit_buffer *audit_buf) +{ + if (x->security) + audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", + x->security->ctx_alg, x->security->ctx_doi, + x->security->ctx_str); + + switch(x->props.family) { + case AF_INET: + audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u", + NIPQUAD(x->props.saddr.a4), + NIPQUAD(x->id.daddr.a4)); + break; + case AF_INET6: + { + struct in6_addr saddr6, daddr6; + + memcpy(&saddr6, x->props.saddr.a6, + sizeof(struct in6_addr)); + memcpy(&daddr6, x->id.daddr.a6, + sizeof(struct in6_addr)); + audit_log_format(audit_buf, + " src=" NIP6_FMT " dst=" NIP6_FMT, + NIP6(saddr6), NIP6(daddr6)); + } + break; + } +} + +void +xfrm_audit_state_add(struct xfrm_state *x, int result, u32 auid, u32 sid) +{ + struct audit_buffer *audit_buf; + extern int audit_enabled; + + if (audit_enabled == 0) + return; + audit_buf = xfrm_audit_start(sid, auid); + if (audit_buf == NULL) + return; + audit_log_format(audit_buf, " op=SAD-add res=%u",result); + xfrm_audit_common_stateinfo(x, audit_buf); + audit_log_format(audit_buf, " spi=%lu(0x%lx)", + (unsigned long)x->id.spi, (unsigned long)x->id.spi); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_state_add); + +void +xfrm_audit_state_delete(struct xfrm_state *x, int result, u32 auid, u32 sid) +{ + struct audit_buffer *audit_buf; + extern int audit_enabled; + + if (audit_enabled == 0) + return; + audit_buf = xfrm_audit_start(sid, auid); + if (audit_buf == NULL) + return; + audit_log_format(audit_buf, " op=SAD-delete res=%u",result); + xfrm_audit_common_stateinfo(x, audit_buf); + audit_log_format(audit_buf, " spi=%lu(0x%lx)", + (unsigned long)x->id.spi, (unsigned long)x->id.spi); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_state_delete); +#endif /* CONFIG_AUDITSYSCALL */ diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 61339e17a0f5..d41588d101d0 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -19,7 +19,6 @@ #include <linux/string.h> #include <linux/net.h> #include <linux/skbuff.h> -#include <linux/rtnetlink.h> #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/init.h> @@ -31,25 +30,22 @@ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #include <linux/in6.h> #endif -#include <linux/audit.h> -static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) +static inline int alg_len(struct xfrm_algo *alg) { - struct rtattr *rt = xfrma[type - 1]; + return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); +} + +static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) +{ + struct nlattr *rt = attrs[type]; struct xfrm_algo *algp; - int len; if (!rt) return 0; - len = (rt->rta_len - sizeof(*rt)) - sizeof(*algp); - if (len < 0) - return -EINVAL; - - algp = RTA_DATA(rt); - - len -= (algp->alg_key_len + 7U) / 8; - if (len < 0) + algp = nla_data(rt); + if (nla_len(rt) < alg_len(algp)) return -EINVAL; switch (type) { @@ -77,55 +73,25 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) return 0; } -static int verify_encap_tmpl(struct rtattr **xfrma) -{ - struct rtattr *rt = xfrma[XFRMA_ENCAP - 1]; - struct xfrm_encap_tmpl *encap; - - if (!rt) - return 0; - - if ((rt->rta_len - sizeof(*rt)) < sizeof(*encap)) - return -EINVAL; - - return 0; -} - -static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type, +static void verify_one_addr(struct nlattr **attrs, enum xfrm_attr_type_t type, xfrm_address_t **addrp) { - struct rtattr *rt = xfrma[type - 1]; - - if (!rt) - return 0; - - if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp)) - return -EINVAL; - - if (addrp) - *addrp = RTA_DATA(rt); + struct nlattr *rt = attrs[type]; - return 0; + if (rt && addrp) + *addrp = nla_data(rt); } -static inline int verify_sec_ctx_len(struct rtattr **xfrma) +static inline int verify_sec_ctx_len(struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX - 1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_user_sec_ctx *uctx; - int len = 0; if (!rt) return 0; - if (rt->rta_len < sizeof(*uctx)) - return -EINVAL; - - uctx = RTA_DATA(rt); - - len += sizeof(struct xfrm_user_sec_ctx); - len += uctx->ctx_len; - - if (uctx->len != len) + uctx = nla_data(rt); + if (uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)) return -EINVAL; return 0; @@ -133,7 +99,7 @@ static inline int verify_sec_ctx_len(struct rtattr **xfrma) static int verify_newsa_info(struct xfrm_usersa_info *p, - struct rtattr **xfrma) + struct nlattr **attrs) { int err; @@ -157,35 +123,35 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->id.proto) { case IPPROTO_AH: - if (!xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1] || - xfrma[XFRMA_ALG_COMP-1]) + if (!attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_CRYPT] || + attrs[XFRMA_ALG_COMP]) goto out; break; case IPPROTO_ESP: - if ((!xfrma[XFRMA_ALG_AUTH-1] && - !xfrma[XFRMA_ALG_CRYPT-1]) || - xfrma[XFRMA_ALG_COMP-1]) + if ((!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_CRYPT]) || + attrs[XFRMA_ALG_COMP]) goto out; break; case IPPROTO_COMP: - if (!xfrma[XFRMA_ALG_COMP-1] || - xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1]) + if (!attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_CRYPT]) goto out; break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case IPPROTO_DSTOPTS: case IPPROTO_ROUTING: - if (xfrma[XFRMA_ALG_COMP-1] || - xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1] || - xfrma[XFRMA_ENCAP-1] || - xfrma[XFRMA_SEC_CTX-1] || - !xfrma[XFRMA_COADDR-1]) + if (attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_CRYPT] || + attrs[XFRMA_ENCAP] || + attrs[XFRMA_SEC_CTX] || + !attrs[XFRMA_COADDR]) goto out; break; #endif @@ -194,17 +160,13 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; } - if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH))) - goto out; - if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT))) - goto out; - if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH))) goto out; - if ((err = verify_encap_tmpl(xfrma))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT))) goto out; - if ((err = verify_sec_ctx_len(xfrma))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_COMP))) goto out; - if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL))) + if ((err = verify_sec_ctx_len(attrs))) goto out; err = -EINVAL; @@ -227,25 +189,22 @@ out: static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, struct xfrm_algo_desc *(*get_byname)(char *, int), - struct rtattr *u_arg) + struct nlattr *rta) { - struct rtattr *rta = u_arg; struct xfrm_algo *p, *ualg; struct xfrm_algo_desc *algo; - int len; if (!rta) return 0; - ualg = RTA_DATA(rta); + ualg = nla_data(rta); algo = get_byname(ualg->alg_name, 1); if (!algo) return -ENOSYS; *props = algo->desc.sadb_alg_id; - len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8; - p = kmemdup(ualg, len, GFP_KERNEL); + p = kmemdup(ualg, alg_len(ualg), GFP_KERNEL); if (!p) return -ENOMEM; @@ -254,24 +213,6 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return 0; } -static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_arg) -{ - struct rtattr *rta = u_arg; - struct xfrm_encap_tmpl *p, *uencap; - - if (!rta) - return 0; - - uencap = RTA_DATA(rta); - p = kmemdup(uencap, sizeof(*p), GFP_KERNEL); - if (!p) - return -ENOMEM; - - *encapp = p; - return 0; -} - - static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) { int len = 0; @@ -283,34 +224,6 @@ static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) return len; } -static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) -{ - struct xfrm_user_sec_ctx *uctx; - - if (!u_arg) - return 0; - - uctx = RTA_DATA(u_arg); - return security_xfrm_state_alloc(x, uctx); -} - -static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg) -{ - struct rtattr *rta = u_arg; - xfrm_address_t *p, *uaddrp; - - if (!rta) - return 0; - - uaddrp = RTA_DATA(rta); - p = kmemdup(uaddrp, sizeof(*p), GFP_KERNEL); - if (!p) - return -ENOMEM; - - *addrpp = p; - return 0; -} - static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { memcpy(&x->id, &p->id, sizeof(x->id)); @@ -336,53 +249,38 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * * somehow made shareable and move it to xfrm_state.c - JHS * */ -static int xfrm_update_ae_params(struct xfrm_state *x, struct rtattr **xfrma) +static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) { - int err = - EINVAL; - struct rtattr *rp = xfrma[XFRMA_REPLAY_VAL-1]; - struct rtattr *lt = xfrma[XFRMA_LTIME_VAL-1]; - struct rtattr *et = xfrma[XFRMA_ETIMER_THRESH-1]; - struct rtattr *rt = xfrma[XFRMA_REPLAY_THRESH-1]; + struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; + struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; + struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; if (rp) { struct xfrm_replay_state *replay; - if (RTA_PAYLOAD(rp) < sizeof(*replay)) - goto error; - replay = RTA_DATA(rp); + replay = nla_data(rp); memcpy(&x->replay, replay, sizeof(*replay)); memcpy(&x->preplay, replay, sizeof(*replay)); } if (lt) { struct xfrm_lifetime_cur *ltime; - if (RTA_PAYLOAD(lt) < sizeof(*ltime)) - goto error; - ltime = RTA_DATA(lt); + ltime = nla_data(lt); x->curlft.bytes = ltime->bytes; x->curlft.packets = ltime->packets; x->curlft.add_time = ltime->add_time; x->curlft.use_time = ltime->use_time; } - if (et) { - if (RTA_PAYLOAD(et) < sizeof(u32)) - goto error; - x->replay_maxage = *(u32*)RTA_DATA(et); - } - - if (rt) { - if (RTA_PAYLOAD(rt) < sizeof(u32)) - goto error; - x->replay_maxdiff = *(u32*)RTA_DATA(rt); - } + if (et) + x->replay_maxage = nla_get_u32(et); - return 0; -error: - return err; + if (rt) + x->replay_maxdiff = nla_get_u32(rt); } static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, - struct rtattr **xfrma, + struct nlattr **attrs, int *errp) { struct xfrm_state *x = xfrm_state_alloc(); @@ -395,25 +293,37 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, if ((err = attach_one_algo(&x->aalg, &x->props.aalgo, xfrm_aalg_get_byname, - xfrma[XFRMA_ALG_AUTH-1]))) + attrs[XFRMA_ALG_AUTH]))) goto error; if ((err = attach_one_algo(&x->ealg, &x->props.ealgo, xfrm_ealg_get_byname, - xfrma[XFRMA_ALG_CRYPT-1]))) + attrs[XFRMA_ALG_CRYPT]))) goto error; if ((err = attach_one_algo(&x->calg, &x->props.calgo, xfrm_calg_get_byname, - xfrma[XFRMA_ALG_COMP-1]))) - goto error; - if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) - goto error; - if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1]))) + attrs[XFRMA_ALG_COMP]))) goto error; + + if (attrs[XFRMA_ENCAP]) { + x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), + sizeof(*x->encap), GFP_KERNEL); + if (x->encap == NULL) + goto error; + } + + if (attrs[XFRMA_COADDR]) { + x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), + sizeof(*x->coaddr), GFP_KERNEL); + if (x->coaddr == NULL) + goto error; + } + err = xfrm_init_state(x); if (err) goto error; - if ((err = attach_sec_ctx(x, xfrma[XFRMA_SEC_CTX-1]))) + if (attrs[XFRMA_SEC_CTX] && + security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) goto error; x->km.seq = p->seq; @@ -426,9 +336,7 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, /* override default values from above */ - err = xfrm_update_ae_params(x, (struct rtattr **)xfrma); - if (err < 0) - goto error; + xfrm_update_ae_params(x, attrs); return x; @@ -441,18 +349,18 @@ error_no_put: } static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { - struct xfrm_usersa_info *p = NLMSG_DATA(nlh); + struct xfrm_usersa_info *p = nlmsg_data(nlh); struct xfrm_state *x; int err; struct km_event c; - err = verify_newsa_info(p, xfrma); + err = verify_newsa_info(p, attrs); if (err) return err; - x = xfrm_state_construct(p, xfrma, &err); + x = xfrm_state_construct(p, attrs, &err); if (!x) return err; @@ -462,8 +370,8 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, else err = xfrm_state_update(x); - xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, - AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x); + xfrm_audit_state_add(x, err ? 0 : 1, NETLINK_CB(skb).loginuid, + NETLINK_CB(skb).sid); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -482,7 +390,7 @@ out: } static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, - struct rtattr **xfrma, + struct nlattr **attrs, int *errp) { struct xfrm_state *x = NULL; @@ -494,10 +402,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, } else { xfrm_address_t *saddr = NULL; - err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr); - if (err) - goto out; - + verify_one_addr(attrs, XFRMA_SRCADDR, &saddr); if (!saddr) { err = -EINVAL; goto out; @@ -515,14 +420,14 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, } static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; int err = -ESRCH; struct km_event c; - struct xfrm_usersa_id *p = NLMSG_DATA(nlh); + struct xfrm_usersa_id *p = nlmsg_data(nlh); - x = xfrm_user_state_lookup(p, xfrma, &err); + x = xfrm_user_state_lookup(p, attrs, &err); if (x == NULL) return err; @@ -545,8 +450,8 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_notify(x, &c); out: - xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, - AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + xfrm_audit_state_delete(x, err ? 0 : 1, NETLINK_CB(skb).loginuid, + NETLINK_CB(skb).sid); xfrm_state_put(x); return err; } @@ -576,67 +481,92 @@ struct xfrm_dump_info { int this_idx; }; -static int dump_one_state(struct xfrm_state *x, int count, void *ptr) +static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) { - struct xfrm_dump_info *sp = ptr; - struct sk_buff *in_skb = sp->in_skb; - struct sk_buff *skb = sp->out_skb; - struct xfrm_usersa_info *p; - struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); + struct xfrm_user_sec_ctx *uctx; + struct nlattr *attr; + int ctx_size = sizeof(*uctx) + s->ctx_len; - if (sp->this_idx < sp->start_idx) - goto out; + attr = nla_reserve(skb, XFRMA_SEC_CTX, ctx_size); + if (attr == NULL) + return -EMSGSIZE; + + uctx = nla_data(attr); + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = ctx_size; + uctx->ctx_doi = s->ctx_doi; + uctx->ctx_alg = s->ctx_alg; + uctx->ctx_len = s->ctx_len; + memcpy(uctx + 1, s->ctx_str, s->ctx_len); - nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, - sp->nlmsg_seq, - XFRM_MSG_NEWSA, sizeof(*p)); - nlh->nlmsg_flags = sp->nlmsg_flags; + return 0; +} - p = NLMSG_DATA(nlh); +/* Don't change this without updating xfrm_sa_len! */ +static int copy_to_user_state_extra(struct xfrm_state *x, + struct xfrm_usersa_info *p, + struct sk_buff *skb) +{ + spin_lock_bh(&x->lock); copy_to_user_state(x, p); + if (x->coaddr) + NLA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + + if (x->lastused) + NLA_PUT_U64(skb, XFRMA_LASTUSED, x->lastused); + spin_unlock_bh(&x->lock); + if (x->aalg) - RTA_PUT(skb, XFRMA_ALG_AUTH, - sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg); + NLA_PUT(skb, XFRMA_ALG_AUTH, alg_len(x->aalg), x->aalg); if (x->ealg) - RTA_PUT(skb, XFRMA_ALG_CRYPT, - sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg); + NLA_PUT(skb, XFRMA_ALG_CRYPT, alg_len(x->ealg), x->ealg); if (x->calg) - RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + NLA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); if (x->encap) - RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); - if (x->security) { - int ctx_size = sizeof(struct xfrm_sec_ctx) + - x->security->ctx_len; - struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); - - uctx->exttype = XFRMA_SEC_CTX; - uctx->len = ctx_size; - uctx->ctx_doi = x->security->ctx_doi; - uctx->ctx_alg = x->security->ctx_alg; - uctx->ctx_len = x->security->ctx_len; - memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); - } + if (x->security && copy_sec_ctx(x->security, skb) < 0) + goto nla_put_failure; - if (x->coaddr) - RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + return 0; - if (x->lastused) - RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused); +nla_put_failure: + return -EMSGSIZE; +} - nlh->nlmsg_len = skb_tail_pointer(skb) - b; +static int dump_one_state(struct xfrm_state *x, int count, void *ptr) +{ + struct xfrm_dump_info *sp = ptr; + struct sk_buff *in_skb = sp->in_skb; + struct sk_buff *skb = sp->out_skb; + struct xfrm_usersa_info *p; + struct nlmsghdr *nlh; + int err; + + if (sp->this_idx < sp->start_idx) + goto out; + + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); + if (nlh == NULL) + return -EMSGSIZE; + + p = nlmsg_data(nlh); + + err = copy_to_user_state_extra(x, p, skb); + if (err) + goto nla_put_failure; + + nlmsg_end(skb, nlh); out: sp->this_idx++; return 0; -nlmsg_failure: -rtattr_failure: - nlmsg_trim(skb, b); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return err; } static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) @@ -661,7 +591,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, struct xfrm_dump_info info; struct sk_buff *skb; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); @@ -679,6 +609,13 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, return skb; } +static inline size_t xfrm_spdinfo_msgsize(void) +{ + return NLMSG_ALIGN(4) + + nla_total_size(sizeof(struct xfrmu_spdinfo)) + + nla_total_size(sizeof(struct xfrmu_spdhinfo)); +} + static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) { struct xfrmk_spdinfo si; @@ -714,18 +651,14 @@ nla_put_failure: } static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct sk_buff *r_skb; - u32 *flags = NLMSG_DATA(nlh); + u32 *flags = nlmsg_data(nlh); u32 spid = NETLINK_CB(skb).pid; u32 seq = nlh->nlmsg_seq; - int len = NLMSG_LENGTH(sizeof(u32)); - len += RTA_SPACE(sizeof(struct xfrmu_spdinfo)); - len += RTA_SPACE(sizeof(struct xfrmu_spdhinfo)); - - r_skb = alloc_skb(len, GFP_ATOMIC); + r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; @@ -735,6 +668,13 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, return nlmsg_unicast(xfrm_nl, r_skb, spid); } +static inline size_t xfrm_sadinfo_msgsize(void) +{ + return NLMSG_ALIGN(4) + + nla_total_size(sizeof(struct xfrmu_sadhinfo)) + + nla_total_size(4); /* XFRMA_SAD_CNT */ +} + static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) { struct xfrmk_sadinfo si; @@ -764,19 +704,14 @@ nla_put_failure: } static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct sk_buff *r_skb; - u32 *flags = NLMSG_DATA(nlh); + u32 *flags = nlmsg_data(nlh); u32 spid = NETLINK_CB(skb).pid; u32 seq = nlh->nlmsg_seq; - int len = NLMSG_LENGTH(sizeof(u32)); - - len += RTA_SPACE(sizeof(struct xfrmu_sadhinfo)); - len += RTA_SPACE(sizeof(u32)); - - r_skb = alloc_skb(len, GFP_ATOMIC); + r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; @@ -787,14 +722,14 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { - struct xfrm_usersa_id *p = NLMSG_DATA(nlh); + struct xfrm_usersa_id *p = nlmsg_data(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; int err = -ESRCH; - x = xfrm_user_state_lookup(p, xfrma, &err); + x = xfrm_user_state_lookup(p, attrs, &err); if (x == NULL) goto out_noput; @@ -802,8 +737,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); } xfrm_state_put(x); out_noput: @@ -834,7 +768,7 @@ static int verify_userspi_info(struct xfrm_userspi_info *p) } static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; struct xfrm_userspi_info *p; @@ -843,7 +777,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, int family; int err; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); err = verify_userspi_info(p); if (err) goto out_noput; @@ -869,23 +803,17 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, if (x == NULL) goto out_noput; - resp_skb = ERR_PTR(-ENOENT); - - spin_lock_bh(&x->lock); - if (x->km.state != XFRM_STATE_DEAD) { - xfrm_alloc_spi(x, htonl(p->min), htonl(p->max)); - if (x->id.spi) - resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); - } - spin_unlock_bh(&x->lock); + err = xfrm_alloc_spi(x, p->min, p->max); + if (err) + goto out; + resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); goto out; } - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); out: xfrm_state_put(x); @@ -964,15 +892,15 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) return verify_policy_dir(p->dir); } -static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct rtattr **xfrma) +static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_user_sec_ctx *uctx; if (!rt) return 0; - uctx = RTA_DATA(rt); + uctx = nla_data(rt); return security_xfrm_policy_alloc(pol, uctx); } @@ -1032,38 +960,35 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) return 0; } -static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) +static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_TMPL-1]; + struct nlattr *rt = attrs[XFRMA_TMPL]; if (!rt) { pol->xfrm_nr = 0; } else { - struct xfrm_user_tmpl *utmpl = RTA_DATA(rt); - int nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl); + struct xfrm_user_tmpl *utmpl = nla_data(rt); + int nr = nla_len(rt) / sizeof(*utmpl); int err; err = validate_tmpl(nr, utmpl, pol->family); if (err) return err; - copy_templates(pol, RTA_DATA(rt), nr); + copy_templates(pol, utmpl, nr); } return 0; } -static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma) +static int copy_from_user_policy_type(u8 *tp, struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1]; + struct nlattr *rt = attrs[XFRMA_POLICY_TYPE]; struct xfrm_userpolicy_type *upt; u8 type = XFRM_POLICY_TYPE_MAIN; int err; if (rt) { - if (rt->rta_len < sizeof(*upt)) - return -EINVAL; - - upt = RTA_DATA(rt); + upt = nla_data(rt); type = upt->type; } @@ -1101,7 +1026,7 @@ static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_i p->share = XFRM_SHARE_ANY; /* XXX xp->share */ } -static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp) +static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp) { struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL); int err; @@ -1113,12 +1038,12 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, copy_from_user_policy(xp, p); - err = copy_from_user_policy_type(&xp->type, xfrma); + err = copy_from_user_policy_type(&xp->type, attrs); if (err) goto error; - if (!(err = copy_from_user_tmpl(xp, xfrma))) - err = copy_from_user_sec_ctx(xp, xfrma); + if (!(err = copy_from_user_tmpl(xp, attrs))) + err = copy_from_user_sec_ctx(xp, attrs); if (err) goto error; @@ -1130,9 +1055,9 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, } static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { - struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); + struct xfrm_userpolicy_info *p = nlmsg_data(nlh); struct xfrm_policy *xp; struct km_event c; int err; @@ -1141,11 +1066,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, err = verify_newpolicy_info(p); if (err) return err; - err = verify_sec_ctx_len(xfrma); + err = verify_sec_ctx_len(attrs); if (err) return err; - xp = xfrm_policy_construct(p, xfrma, &err); + xp = xfrm_policy_construct(p, attrs, &err); if (!xp) return err; @@ -1155,8 +1080,8 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); - xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, - AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL); + xfrm_audit_policy_add(xp, err ? 0 : 1, NETLINK_CB(skb).loginuid, + NETLINK_CB(skb).sid); if (err) { security_xfrm_policy_free(xp); @@ -1197,32 +1122,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) up->ealgos = kp->ealgos; up->calgos = kp->calgos; } - RTA_PUT(skb, XFRMA_TMPL, - (sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr), - vec); - return 0; - -rtattr_failure: - return -1; -} - -static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) -{ - int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len; - struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); - - uctx->exttype = XFRMA_SEC_CTX; - uctx->len = ctx_size; - uctx->ctx_doi = s->ctx_doi; - uctx->ctx_alg = s->ctx_alg; - uctx->ctx_len = s->ctx_len; - memcpy(uctx + 1, s->ctx_str, s->ctx_len); - return 0; - - rtattr_failure: - return -1; + return nla_put(skb, XFRMA_TMPL, + sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr, vec); } static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb) @@ -1240,21 +1142,23 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s } return 0; } +static inline size_t userpolicy_type_attrsize(void) +{ +#ifdef CONFIG_XFRM_SUB_POLICY + return nla_total_size(sizeof(struct xfrm_userpolicy_type)); +#else + return 0; +#endif +} #ifdef CONFIG_XFRM_SUB_POLICY static int copy_to_user_policy_type(u8 type, struct sk_buff *skb) { - struct xfrm_userpolicy_type upt; - - memset(&upt, 0, sizeof(upt)); - upt.type = type; - - RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); + struct xfrm_userpolicy_type upt = { + .type = type, + }; - return 0; - -rtattr_failure: - return -1; + return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); } #else @@ -1271,17 +1175,16 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct sk_buff *in_skb = sp->in_skb; struct sk_buff *skb = sp->out_skb; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); if (sp->this_idx < sp->start_idx) goto out; - nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, - sp->nlmsg_seq, - XFRM_MSG_NEWPOLICY, sizeof(*p)); - p = NLMSG_DATA(nlh); - nlh->nlmsg_flags = sp->nlmsg_flags; + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); + if (nlh == NULL) + return -EMSGSIZE; + p = nlmsg_data(nlh); copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; @@ -1290,14 +1193,14 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; + nlmsg_end(skb, nlh); out: sp->this_idx++; return 0; nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) @@ -1326,7 +1229,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, struct xfrm_dump_info info; struct sk_buff *skb; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); @@ -1345,7 +1248,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, } static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; @@ -1354,10 +1257,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int delete; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; - err = copy_from_user_policy_type(&type, xfrma); + err = copy_from_user_policy_type(&type, attrs); if (err) return err; @@ -1368,16 +1271,16 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (p->index) xp = xfrm_policy_byid(type, p->dir, p->index, delete, &err); else { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_policy tmp; - err = verify_sec_ctx_len(xfrma); + err = verify_sec_ctx_len(attrs); if (err) return err; memset(&tmp, 0, sizeof(struct xfrm_policy)); if (rt) { - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + struct xfrm_user_sec_ctx *uctx = nla_data(rt); if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; @@ -1396,13 +1299,13 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, - MSG_DONTWAIT); + err = nlmsg_unicast(xfrm_nl, resp_skb, + NETLINK_CB(skb).pid); } } else { - xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, - AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL); + xfrm_audit_policy_delete(xp, err ? 0 : 1, + NETLINK_CB(skb).loginuid, + NETLINK_CB(skb).sid); if (err != 0) goto out; @@ -1420,10 +1323,10 @@ out: } static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct km_event c; - struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); + struct xfrm_usersa_flush *p = nlmsg_data(nlh); struct xfrm_audit audit_info; int err; @@ -1441,18 +1344,25 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +static inline size_t xfrm_aevent_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) + + nla_total_size(sizeof(struct xfrm_replay_state)) + + nla_total_size(sizeof(struct xfrm_lifetime_cur)) + + nla_total_size(4) /* XFRM_AE_RTHR */ + + nla_total_size(4); /* XFRM_AE_ETHR */ +} static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) { struct xfrm_aevent_id *id; struct nlmsghdr *nlh; - struct xfrm_lifetime_cur ltime; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id)); - id = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); + if (nlh == NULL) + return -EMSGSIZE; + id = nlmsg_data(nlh); memcpy(&id->sa_id.daddr, &x->id.daddr,sizeof(x->id.daddr)); id->sa_id.spi = x->id.spi; id->sa_id.family = x->props.family; @@ -1461,54 +1371,34 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve id->reqid = x->props.reqid; id->flags = c->data.aevent; - RTA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); + NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); + NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); - ltime.bytes = x->curlft.bytes; - ltime.packets = x->curlft.packets; - ltime.add_time = x->curlft.add_time; - ltime.use_time = x->curlft.use_time; + if (id->flags & XFRM_AE_RTHR) + NLA_PUT_U32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff); - RTA_PUT(skb, XFRMA_LTIME_VAL, sizeof(struct xfrm_lifetime_cur), <ime); + if (id->flags & XFRM_AE_ETHR) + NLA_PUT_U32(skb, XFRMA_ETIMER_THRESH, + x->replay_maxage * 10 / HZ); - if (id->flags&XFRM_AE_RTHR) { - RTA_PUT(skb,XFRMA_REPLAY_THRESH,sizeof(u32),&x->replay_maxdiff); - } - - if (id->flags&XFRM_AE_ETHR) { - u32 etimer = x->replay_maxage*10/HZ; - RTA_PUT(skb,XFRMA_ETIMER_THRESH,sizeof(u32),&etimer); - } - - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); -rtattr_failure: -nlmsg_failure: - nlmsg_trim(skb, b); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; struct sk_buff *r_skb; int err; struct km_event c; - struct xfrm_aevent_id *p = NLMSG_DATA(nlh); - int len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id)); + struct xfrm_aevent_id *p = nlmsg_data(nlh); struct xfrm_usersa_id *id = &p->sa_id; - len += RTA_SPACE(sizeof(struct xfrm_replay_state)); - len += RTA_SPACE(sizeof(struct xfrm_lifetime_cur)); - - if (p->flags&XFRM_AE_RTHR) - len+=RTA_SPACE(sizeof(u32)); - - if (p->flags&XFRM_AE_ETHR) - len+=RTA_SPACE(sizeof(u32)); - - r_skb = alloc_skb(len, GFP_ATOMIC); + r_skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; @@ -1530,22 +1420,21 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (build_aevent(r_skb, x, &c) < 0) BUG(); - err = netlink_unicast(xfrm_nl, r_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = nlmsg_unicast(xfrm_nl, r_skb, NETLINK_CB(skb).pid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; } static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; struct km_event c; int err = - EINVAL; - struct xfrm_aevent_id *p = NLMSG_DATA(nlh); - struct rtattr *rp = xfrma[XFRMA_REPLAY_VAL-1]; - struct rtattr *lt = xfrma[XFRMA_LTIME_VAL-1]; + struct xfrm_aevent_id *p = nlmsg_data(nlh); + struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; if (!lt && !rp) return err; @@ -1562,10 +1451,8 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; spin_lock_bh(&x->lock); - err = xfrm_update_ae_params(x, xfrma); + xfrm_update_ae_params(x, attrs); spin_unlock_bh(&x->lock); - if (err < 0) - goto out; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1579,14 +1466,14 @@ out: } static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct xfrm_audit audit_info; - err = copy_from_user_policy_type(&type, xfrma); + err = copy_from_user_policy_type(&type, attrs); if (err) return err; @@ -1604,31 +1491,31 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, } static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_policy *xp; - struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); + struct xfrm_user_polexpire *up = nlmsg_data(nlh); struct xfrm_userpolicy_info *p = &up->pol; u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; - err = copy_from_user_policy_type(&type, xfrma); + err = copy_from_user_policy_type(&type, attrs); if (err) return err; if (p->index) xp = xfrm_policy_byid(type, p->dir, p->index, 0, &err); else { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_policy tmp; - err = verify_sec_ctx_len(xfrma); + err = verify_sec_ctx_len(attrs); if (err) return err; memset(&tmp, 0, sizeof(struct xfrm_policy)); if (rt) { - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + struct xfrm_user_sec_ctx *uctx = nla_data(rt); if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; @@ -1650,8 +1537,8 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; if (up->hard) { xfrm_policy_delete(xp, p->dir); - xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, - AUDIT_MAC_IPSEC_DELSPD, 1, xp, NULL); + xfrm_audit_policy_delete(xp, 1, NETLINK_CB(skb).loginuid, + NETLINK_CB(skb).sid); } else { // reset the timers here? @@ -1665,11 +1552,11 @@ out: } static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; int err; - struct xfrm_user_expire *ue = NLMSG_DATA(nlh); + struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; x = xfrm_state_lookup(&p->id.daddr, p->id.spi, p->id.proto, p->family); @@ -1686,8 +1573,8 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (ue->hard) { __xfrm_state_delete(x); - xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, - AUDIT_MAC_IPSEC_DELSA, 1, NULL, x); + xfrm_audit_state_delete(x, 1, NETLINK_CB(skb).loginuid, + NETLINK_CB(skb).sid); } err = 0; out: @@ -1697,14 +1584,14 @@ out: } static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_policy *xp; struct xfrm_user_tmpl *ut; int i; - struct rtattr *rt = xfrma[XFRMA_TMPL-1]; + struct nlattr *rt = attrs[XFRMA_TMPL]; - struct xfrm_user_acquire *ua = NLMSG_DATA(nlh); + struct xfrm_user_acquire *ua = nlmsg_data(nlh); struct xfrm_state *x = xfrm_state_alloc(); int err = -ENOMEM; @@ -1719,7 +1606,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, } /* build an XP */ - xp = xfrm_policy_construct(&ua->policy, (struct rtattr **) xfrma, &err); + xp = xfrm_policy_construct(&ua->policy, attrs, &err); if (!xp) { kfree(x); return err; @@ -1729,7 +1616,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, memcpy(&x->props.saddr, &ua->saddr, sizeof(ua->saddr)); memcpy(&x->sel, &ua->sel, sizeof(ua->sel)); - ut = RTA_DATA(rt); + ut = nla_data(rt); /* extract the templates and for each call km_key */ for (i = 0; i < xp->xfrm_nr; i++, ut++) { struct xfrm_tmpl *t = &xp->xfrm_vec[i]; @@ -1751,29 +1638,15 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, } #ifdef CONFIG_XFRM_MIGRATE -static int verify_user_migrate(struct rtattr **xfrma) -{ - struct rtattr *rt = xfrma[XFRMA_MIGRATE-1]; - struct xfrm_user_migrate *um; - - if (!rt) - return -EINVAL; - - if ((rt->rta_len - sizeof(*rt)) < sizeof(*um)) - return -EINVAL; - - return 0; -} - static int copy_from_user_migrate(struct xfrm_migrate *ma, - struct rtattr **xfrma, int *num) + struct nlattr **attrs, int *num) { - struct rtattr *rt = xfrma[XFRMA_MIGRATE-1]; + struct nlattr *rt = attrs[XFRMA_MIGRATE]; struct xfrm_user_migrate *um; int i, num_migrate; - um = RTA_DATA(rt); - num_migrate = (rt->rta_len - sizeof(*rt)) / sizeof(*um); + um = nla_data(rt); + num_migrate = nla_len(rt) / sizeof(*um); if (num_migrate <= 0 || num_migrate > XFRM_MAX_DEPTH) return -EINVAL; @@ -1797,24 +1670,23 @@ static int copy_from_user_migrate(struct xfrm_migrate *ma, } static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { - struct xfrm_userpolicy_id *pi = NLMSG_DATA(nlh); + struct xfrm_userpolicy_id *pi = nlmsg_data(nlh); struct xfrm_migrate m[XFRM_MAX_DEPTH]; u8 type; int err; int n = 0; - err = verify_user_migrate((struct rtattr **)xfrma); - if (err) - return err; + if (attrs[XFRMA_MIGRATE] == NULL) + return -EINVAL; - err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + err = copy_from_user_policy_type(&type, attrs); if (err) return err; err = copy_from_user_migrate((struct xfrm_migrate *)m, - (struct rtattr **)xfrma, &n); + attrs, &n); if (err) return err; @@ -1827,7 +1699,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, } #else static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { return -ENOPROTOOPT; } @@ -1849,11 +1721,14 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) memcpy(&um.new_daddr, &m->new_daddr, sizeof(um.new_daddr)); memcpy(&um.new_saddr, &m->new_saddr, sizeof(um.new_saddr)); - RTA_PUT(skb, XFRMA_MIGRATE, sizeof(um), &um); - return 0; + return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); +} -rtattr_failure: - return -1; +static inline size_t xfrm_migrate_msgsize(int num_migrate) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) + + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) + + userpolicy_type_attrsize(); } static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, @@ -1863,13 +1738,13 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); int i; - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id)); - pol_id = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id), 0); + if (nlh == NULL) + return -EMSGSIZE; + pol_id = nlmsg_data(nlh); /* copy data from selector, dir, and type to the pol_id */ memset(pol_id, 0, sizeof(*pol_id)); memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); @@ -1883,25 +1758,18 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, goto nlmsg_failure; } - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate) { struct sk_buff *skb; - size_t len; - len = RTA_SPACE(sizeof(struct xfrm_user_migrate) * num_migrate); - len += NLMSG_SPACE(sizeof(struct xfrm_userpolicy_id)); -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -1909,9 +1777,7 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, if (build_migrate(skb, m, num_migrate, sel, dir, type) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_MIGRATE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, - GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, @@ -1921,7 +1787,7 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } #endif -#define XMSGSIZE(type) NLMSG_LENGTH(sizeof(struct type)) +#define XMSGSIZE(type) sizeof(struct type) static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), @@ -1937,19 +1803,36 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_polexpire), [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush), - [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), + [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = 0, [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), - [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)), - [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)), + [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), }; #undef XMSGSIZE +static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { + [XFRMA_ALG_AUTH] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ALG_CRYPT] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) }, + [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) }, + [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) }, + [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) }, + [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) }, + [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 }, + [XFRMA_ETIMER_THRESH] = { .type = NLA_U32 }, + [XFRMA_SRCADDR] = { .len = sizeof(xfrm_address_t) }, + [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, + [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, + [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, +}; + static struct xfrm_link { - int (*doit)(struct sk_buff *, struct nlmsghdr *, struct rtattr **); + int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); int (*dump)(struct sk_buff *, struct netlink_callback *); } xfrm_dispatch[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, @@ -1977,9 +1860,9 @@ static struct xfrm_link { static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct rtattr *xfrma[XFRMA_MAX]; + struct nlattr *attrs[XFRMA_MAX+1]; struct xfrm_link *link; - int type, min_len; + int type, err; type = nlh->nlmsg_type; if (type > XFRM_MSG_MAX) @@ -2001,98 +1884,71 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL); } - memset(xfrma, 0, sizeof(xfrma)); - - if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type])) - return -EINVAL; - - if (nlh->nlmsg_len > min_len) { - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len); - - while (RTA_OK(attr, attrlen)) { - unsigned short flavor = attr->rta_type; - if (flavor) { - if (flavor > XFRMA_MAX) - return -EINVAL; - xfrma[flavor - 1] = attr; - } - attr = RTA_NEXT(attr, attrlen); - } - } + err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, + xfrma_policy); + if (err < 0) + return err; if (link->doit == NULL) return -EINVAL; - return link->doit(skb, nlh, xfrma); + return link->doit(skb, nlh, attrs); } -static void xfrm_netlink_rcv(struct sock *sk, int len) +static void xfrm_netlink_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - mutex_lock(&xfrm_cfg_mutex); - netlink_run_queue(sk, &qlen, &xfrm_user_rcv_msg); - mutex_unlock(&xfrm_cfg_mutex); + mutex_lock(&xfrm_cfg_mutex); + netlink_rcv_skb(skb, &xfrm_user_rcv_msg); + mutex_unlock(&xfrm_cfg_mutex); +} - } while (qlen); +static inline size_t xfrm_expire_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)); } static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) { struct xfrm_user_expire *ue; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_EXPIRE, - sizeof(*ue)); - ue = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); + if (nlh == NULL) + return -EMSGSIZE; + ue = nlmsg_data(nlh); copy_to_user_state(x, &ue->state); ue->hard = (c->data.hard != 0) ? 1 : 0; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; - -nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + return nlmsg_end(skb, nlh); } static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) { struct sk_buff *skb; - int len = NLMSG_LENGTH(sizeof(struct xfrm_user_expire)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_expire_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_expire(skb, x, c) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) { struct sk_buff *skb; - int len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id)); - len += RTA_SPACE(sizeof(struct xfrm_replay_state)); - len += RTA_SPACE(sizeof(struct xfrm_lifetime_cur)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_aevent(skb, x, c) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_AEVENTS; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); } static int xfrm_notify_sa_flush(struct km_event *c) @@ -2100,42 +1956,45 @@ static int xfrm_notify_sa_flush(struct km_event *c) struct xfrm_usersa_flush *p; struct nlmsghdr *nlh; struct sk_buff *skb; - sk_buff_data_t b; - int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush)); + int len = NLMSG_ALIGN(sizeof(struct xfrm_usersa_flush)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - nlh = NLMSG_PUT(skb, c->pid, c->seq, - XFRM_MSG_FLUSHSA, sizeof(*p)); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); + if (nlh == NULL) { + kfree_skb(skb); + return -EMSGSIZE; + } - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); p->proto = c->data.proto; - nlh->nlmsg_len = skb->tail - b; - - NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + nlmsg_end(skb, nlh); -nlmsg_failure: - kfree_skb(skb); - return -1; + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); } -static inline int xfrm_sa_len(struct xfrm_state *x) +static inline size_t xfrm_sa_len(struct xfrm_state *x) { - int l = 0; + size_t l = 0; if (x->aalg) - l += RTA_SPACE(sizeof(*x->aalg) + (x->aalg->alg_key_len+7)/8); + l += nla_total_size(alg_len(x->aalg)); if (x->ealg) - l += RTA_SPACE(sizeof(*x->ealg) + (x->ealg->alg_key_len+7)/8); + l += nla_total_size(alg_len(x->ealg)); if (x->calg) - l += RTA_SPACE(sizeof(*x->calg)); + l += nla_total_size(sizeof(*x->calg)); if (x->encap) - l += RTA_SPACE(sizeof(*x->encap)); + l += nla_total_size(sizeof(*x->encap)); + if (x->security) + l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + + x->security->ctx_len); + if (x->coaddr) + l += nla_total_size(sizeof(*x->coaddr)); + + /* Must count this as this may become non-zero behind our back. */ + l += nla_total_size(sizeof(x->lastused)); return l; } @@ -2146,57 +2005,51 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) struct xfrm_usersa_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - sk_buff_data_t b; int len = xfrm_sa_len(x); int headlen; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELSA) { - len += RTA_SPACE(headlen); + len += nla_total_size(headlen); headlen = sizeof(*id); } - len += NLMSG_SPACE(headlen); + len += NLMSG_ALIGN(headlen); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + if (nlh == NULL) + goto nla_put_failure; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELSA) { - id = NLMSG_DATA(nlh); + struct nlattr *attr; + + id = nlmsg_data(nlh); memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr)); id->spi = x->id.spi; id->family = x->props.family; id->proto = x->id.proto; - p = RTA_DATA(__RTA_PUT(skb, XFRMA_SA, sizeof(*p))); - } - - copy_to_user_state(x, p); + attr = nla_reserve(skb, XFRMA_SA, sizeof(*p)); + if (attr == NULL) + goto nla_put_failure; - if (x->aalg) - RTA_PUT(skb, XFRMA_ALG_AUTH, - sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg); - if (x->ealg) - RTA_PUT(skb, XFRMA_ALG_CRYPT, - sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg); - if (x->calg) - RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + p = nla_data(attr); + } - if (x->encap) - RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + if (copy_to_user_state_extra(x, p, skb)) + goto nla_put_failure; - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); -nlmsg_failure: -rtattr_failure: +nla_put_failure: + /* Somebody screwed up with xfrm_sa_len! */ + WARN_ON(1); kfree_skb(skb); return -1; } @@ -2224,20 +2077,28 @@ static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c) } +static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, + struct xfrm_policy *xp) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)) + + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + + nla_total_size(xfrm_user_sec_ctx_size(x->security)) + + userpolicy_type_attrsize(); +} + static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp, int dir) { struct xfrm_user_acquire *ua; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); __u32 seq = xfrm_get_acqseq(); - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE, - sizeof(*ua)); - ua = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_ACQUIRE, sizeof(*ua), 0); + if (nlh == NULL) + return -EMSGSIZE; + ua = nlmsg_data(nlh); memcpy(&ua->id, &x->id, sizeof(ua->id)); memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr)); memcpy(&ua->sel, &x->sel, sizeof(ua->sel)); @@ -2254,35 +2115,26 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp, int dir) { struct sk_buff *skb; - size_t len; - len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); - len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire)); - len += RTA_SPACE(xfrm_user_sec_ctx_size(x->security)); -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_acquire(skb, x, xt, xp, dir) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_ACQUIRE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -2344,18 +2196,26 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, return xp; } +static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)) + + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + + nla_total_size(xfrm_user_sec_ctx_size(xp->security)) + + userpolicy_type_attrsize(); +} + static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, int dir, struct km_event *c) { struct xfrm_user_polexpire *upe; struct nlmsghdr *nlh; int hard = c->data.hard; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe)); - upe = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); + if (nlh == NULL) + return -EMSGSIZE; + upe = nlmsg_data(nlh); copy_to_user_policy(xp, &upe->pol, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; @@ -2365,34 +2225,25 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, goto nlmsg_failure; upe->hard = !!hard; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) { struct sk_buff *skb; - size_t len; - len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); - len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire)); - len += RTA_SPACE(xfrm_user_sec_ctx_size(xp->security)); -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_polexpire(skb, xp, dir, c) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) @@ -2401,30 +2252,30 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - sk_buff_data_t b; - int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); + int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); int headlen; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELPOLICY) { - len += RTA_SPACE(headlen); + len += nla_total_size(headlen); headlen = sizeof(*id); } -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - len += NLMSG_SPACE(headlen); + len += userpolicy_type_attrsize(); + len += NLMSG_ALIGN(headlen); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen); + nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + if (nlh == NULL) + goto nlmsg_failure; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELPOLICY) { - id = NLMSG_DATA(nlh); + struct nlattr *attr; + + id = nlmsg_data(nlh); memset(id, 0, sizeof(*id)); id->dir = dir; if (c->data.byid) @@ -2432,10 +2283,12 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * else memcpy(&id->sel, &xp->selector, sizeof(id->sel)); - p = RTA_DATA(__RTA_PUT(skb, XFRMA_POLICY, sizeof(*p))); - } + attr = nla_reserve(skb, XFRMA_POLICY, sizeof(*p)); + if (attr == NULL) + goto nlmsg_failure; - nlh->nlmsg_flags = 0; + p = nla_data(attr); + } copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) @@ -2443,13 +2296,11 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: -rtattr_failure: kfree_skb(skb); return -1; } @@ -2458,28 +2309,20 @@ static int xfrm_notify_policy_flush(struct km_event *c) { struct nlmsghdr *nlh; struct sk_buff *skb; - sk_buff_data_t b; - int len = 0; -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - len += NLMSG_LENGTH(0); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(userpolicy_type_attrsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - - nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + if (nlh == NULL) + goto nlmsg_failure; if (copy_to_user_policy_type(c->data.type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); @@ -2506,48 +2349,48 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev } +static inline size_t xfrm_report_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_report)); +} + static int build_report(struct sk_buff *skb, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { struct xfrm_user_report *ur; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur)); - ur = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur), 0); + if (nlh == NULL) + return -EMSGSIZE; + ur = nlmsg_data(nlh); ur->proto = proto; memcpy(&ur->sel, sel, sizeof(ur->sel)); if (addr) - RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); + NLA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); -nlmsg_failure: -rtattr_failure: - nlmsg_trim(skb, b); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { struct sk_buff *skb; - size_t len; - len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report))); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_report(skb, proto, sel, addr) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); } static struct xfrm_mgr netlink_mgr = { @@ -2566,7 +2409,7 @@ static int __init xfrm_user_init(void) printk(KERN_INFO "Initializing XFRM netlink socket\n"); - nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, + nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX, xfrm_netlink_rcv, NULL, THIS_MODULE); if (nlsk == NULL) return -ENOMEM; |